summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2023-03-03 16:20:56 +0300
committerTakashi Iwai <tiwai@suse.de>2023-03-03 16:20:56 +0300
commit26ed1d29fc44f3f2f0c396c1392abefac5f0454e (patch)
treeffba9ebddf759f04cbeca8adace5cc8093c58c2d /net
parente97fc9cffbb9f372b53b42c36cd7b20aab44a554 (diff)
parent7933b90b42896f5b6596e6a829bb31c5121fc2a9 (diff)
downloadlinux-26ed1d29fc44f3f2f0c396c1392abefac5f0454e.tar.xz
Merge branch 'for-next' into for-linus
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_xen.c3
-rw-r--r--net/bluetooth/hci_conn.c18
-rw-r--r--net/bluetooth/hci_event.c5
-rw-r--r--net/bluetooth/hci_sync.c19
-rw-r--r--net/bluetooth/iso.c64
-rw-r--r--net/bluetooth/mgmt_util.h2
-rw-r--r--net/bluetooth/rfcomm/sock.c7
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/caif/cfctrl.c6
-rw-r--r--net/can/isotp.c69
-rw-r--r--net/can/j1939/address-claim.c40
-rw-r--r--net/can/j1939/transport.c4
-rw-r--r--net/can/raw.c47
-rw-r--r--net/core/devlink.c9
-rw-r--r--net/core/filter.c7
-rw-r--r--net/core/gro.c14
-rw-r--r--net/core/neighbour.c18
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/sock_map.c61
-rw-r--r--net/ethtool/ioctl.c107
-rw-r--r--net/ethtool/rss.c11
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c43
-rw-r--r--net/ipv4/inet_hashtables.c25
-rw-r--r--net/ipv4/inet_timewait_sock.c39
-rw-r--r--net/ipv4/metrics.c2
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_ulp.c4
-rw-r--r--net/ipv6/addrconf.c59
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ip6_output.c15
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/l2tp/l2tp_core.c102
-rw-r--r--net/mac80211/agg-tx.c8
-rw-r--r--net/mac80211/cfg.c7
-rw-r--r--net/mac80211/debugfs_sta.c5
-rw-r--r--net/mac80211/driver-ops.c3
-rw-r--r--net/mac80211/driver-ops.h2
-rw-r--r--net/mac80211/ht.c31
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/rx.c225
-rw-r--r--net/mac80211/tx.c34
-rw-r--r--net/mac80211/util.c42
-rw-r--r--net/mac802154/rx.c1
-rw-r--r--net/mctp/af_mctp.c16
-rw-r--r--net/mctp/route.c34
-rw-r--r--net/mptcp/pm.c25
-rw-r--r--net/mptcp/pm_netlink.c10
-rw-r--r--net/mptcp/pm_userspace.c7
-rw-r--r--net/mptcp/protocol.c31
-rw-r--r--net/mptcp/protocol.h10
-rw-r--r--net/mptcp/sockopt.c11
-rw-r--r--net/mptcp/subflow.c40
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c4
-rw-r--r--net/netfilter/ipset/ip_set_core.c7
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c17
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c15
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c23
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c40
-rw-r--r--net/netfilter/nf_conntrack_proto.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c167
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c15
-rw-r--r--net/netfilter/nf_conntrack_standalone.c16
-rw-r--r--net/netfilter/nf_tables_api.c261
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c332
-rw-r--r--net/netlink/af_netlink.c38
-rw-r--r--net/netrom/af_netrom.c5
-rw-r--r--net/netrom/nr_timer.c1
-rw-r--r--net/nfc/llcp_core.c1
-rw-r--r--net/nfc/netlink.c52
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/qrtr/ns.c5
-rw-r--r--net/rds/message.c6
-rw-r--r--net/rose/af_rose.c8
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c27
-rw-r--r--net/rxrpc/ar-internal.h212
-rw-r--r--net/rxrpc/call_accept.c57
-rw-r--r--net/rxrpc/call_event.c86
-rw-r--r--net/rxrpc/call_object.c116
-rw-r--r--net/rxrpc/call_state.c69
-rw-r--r--net/rxrpc/conn_client.c709
-rw-r--r--net/rxrpc/conn_event.c382
-rw-r--r--net/rxrpc/conn_object.c67
-rw-r--r--net/rxrpc/conn_service.c1
-rw-r--r--net/rxrpc/input.c175
-rw-r--r--net/rxrpc/insecure.c20
-rw-r--r--net/rxrpc/io_thread.c204
-rw-r--r--net/rxrpc/local_object.c35
-rw-r--r--net/rxrpc/net_ns.c17
-rw-r--r--net/rxrpc/output.c60
-rw-r--r--net/rxrpc/peer_object.c23
-rw-r--r--net/rxrpc/proc.c17
-rw-r--r--net/rxrpc/recvmsg.c270
-rw-r--r--net/rxrpc/rxkad.c356
-rw-r--r--net/rxrpc/rxperf.c17
-rw-r--r--net/rxrpc/security.c53
-rw-r--r--net/rxrpc/sendmsg.c195
-rw-r--r--net/sched/act_mpls.c8
-rw-r--r--net/sched/cls_tcindex.c12
-rw-r--r--net/sched/sch_api.c5
-rw-r--r--net/sched/sch_atm.c5
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_htb.c40
-rw-r--r--net/sched/sch_taprio.c2
-rw-r--r--net/sctp/bind_addr.c6
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c19
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svc_xprt.c2
-rw-r--r--net/sunrpc/svcsock.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/x25/af_x25.c6
-rw-r--r--net/xfrm/xfrm_compat.c4
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_interface_core.c54
-rw-r--r--net/xfrm/xfrm_policy.c14
-rw-r--r--net/xfrm/xfrm_state.c18
134 files changed, 3129 insertions, 2784 deletions
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 9630b1275557..82c7005ede65 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -305,13 +305,12 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
kfree(priv);
}
-static int xen_9pfs_front_remove(struct xenbus_device *dev)
+static void xen_9pfs_front_remove(struct xenbus_device *dev)
{
struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
dev_set_drvdata(&dev->dev, NULL);
xen_9pfs_front_free(priv);
- return 0;
}
static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index d3e542c2fc3e..acf563fbdfd9 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -821,6 +821,7 @@ static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err)
static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
{
struct iso_list_data *d;
+ int ret;
bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", big, bis);
@@ -831,8 +832,12 @@ static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
d->big = big;
d->bis = bis;
- return hci_cmd_sync_queue(hdev, terminate_big_sync, d,
- terminate_big_destroy);
+ ret = hci_cmd_sync_queue(hdev, terminate_big_sync, d,
+ terminate_big_destroy);
+ if (ret)
+ kfree(d);
+
+ return ret;
}
static int big_terminate_sync(struct hci_dev *hdev, void *data)
@@ -857,6 +862,7 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data)
static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
{
struct iso_list_data *d;
+ int ret;
bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, sync_handle);
@@ -867,8 +873,12 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
d->big = big;
d->sync_handle = sync_handle;
- return hci_cmd_sync_queue(hdev, big_terminate_sync, d,
- terminate_big_destroy);
+ ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d,
+ terminate_big_destroy);
+ if (ret)
+ kfree(d);
+
+ return ret;
}
/* Cleanup BIS connection
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0594af4e37ca..ad92a4be5851 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3848,8 +3848,11 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
conn->handle, conn->link);
/* Create CIS if LE is already connected */
- if (conn->link && conn->link->state == BT_CONNECTED)
+ if (conn->link && conn->link->state == BT_CONNECTED) {
+ rcu_read_unlock();
hci_le_create_cis(conn->link);
+ rcu_read_lock();
+ }
if (i == rp->num_handles)
break;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 9e2d7e4b850c..117eedb6f709 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -3572,7 +3572,7 @@ static const struct hci_init_stage hci_init2[] = {
static int hci_le_read_buffer_size_sync(struct hci_dev *hdev)
{
/* Use Read LE Buffer Size V2 if supported */
- if (hdev->commands[41] & 0x20)
+ if (iso_capable(hdev) && hdev->commands[41] & 0x20)
return __hci_cmd_sync_status(hdev,
HCI_OP_LE_READ_BUFFER_SIZE_V2,
0, NULL, HCI_CMD_TIMEOUT);
@@ -3597,10 +3597,10 @@ static int hci_le_read_supported_states_sync(struct hci_dev *hdev)
/* LE Controller init stage 2 command sequence */
static const struct hci_init_stage le_init2[] = {
- /* HCI_OP_LE_READ_BUFFER_SIZE */
- HCI_INIT(hci_le_read_buffer_size_sync),
/* HCI_OP_LE_READ_LOCAL_FEATURES */
HCI_INIT(hci_le_read_local_features_sync),
+ /* HCI_OP_LE_READ_BUFFER_SIZE */
+ HCI_INIT(hci_le_read_buffer_size_sync),
/* HCI_OP_LE_READ_SUPPORTED_STATES */
HCI_INIT(hci_le_read_supported_states_sync),
{}
@@ -6187,20 +6187,13 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
{
- u8 instance = *(u8 *)data;
-
- kfree(data);
+ u8 instance = PTR_ERR(data);
return hci_update_adv_data_sync(hdev, instance);
}
int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
{
- u8 *inst_ptr = kmalloc(1, GFP_KERNEL);
-
- if (!inst_ptr)
- return -ENOMEM;
-
- *inst_ptr = instance;
- return hci_cmd_sync_queue(hdev, _update_adv_data_sync, inst_ptr, NULL);
+ return hci_cmd_sync_queue(hdev, _update_adv_data_sync,
+ ERR_PTR(instance), NULL);
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 035bb5d25f85..24444b502e58 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -289,15 +289,15 @@ static int iso_connect_bis(struct sock *sk)
hci_dev_unlock(hdev);
hci_dev_put(hdev);
+ err = iso_chan_add(conn, sk, NULL);
+ if (err)
+ return err;
+
lock_sock(sk);
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
- err = iso_chan_add(conn, sk, NULL);
- if (err)
- goto release;
-
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
@@ -306,7 +306,6 @@ static int iso_connect_bis(struct sock *sk)
iso_sock_set_timer(sk, sk->sk_sndtimeo);
}
-release:
release_sock(sk);
return err;
@@ -372,15 +371,15 @@ static int iso_connect_cis(struct sock *sk)
hci_dev_unlock(hdev);
hci_dev_put(hdev);
+ err = iso_chan_add(conn, sk, NULL);
+ if (err)
+ return err;
+
lock_sock(sk);
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
- err = iso_chan_add(conn, sk, NULL);
- if (err)
- goto release;
-
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
@@ -392,7 +391,6 @@ static int iso_connect_cis(struct sock *sk)
iso_sock_set_timer(sk, sk->sk_sndtimeo);
}
-release:
release_sock(sk);
return err;
@@ -895,13 +893,10 @@ static int iso_listen_bis(struct sock *sk)
if (!hdev)
return -EHOSTUNREACH;
- hci_dev_lock(hdev);
-
err = hci_pa_create_sync(hdev, &iso_pi(sk)->dst,
le_addr_type(iso_pi(sk)->dst_type),
iso_pi(sk)->bc_sid);
- hci_dev_unlock(hdev);
hci_dev_put(hdev);
return err;
@@ -1432,33 +1427,29 @@ static void iso_conn_ready(struct iso_conn *conn)
struct sock *parent;
struct sock *sk = conn->sk;
struct hci_ev_le_big_sync_estabilished *ev;
+ struct hci_conn *hcon;
BT_DBG("conn %p", conn);
if (sk) {
iso_sock_ready(conn->sk);
} else {
- iso_conn_lock(conn);
-
- if (!conn->hcon) {
- iso_conn_unlock(conn);
+ hcon = conn->hcon;
+ if (!hcon)
return;
- }
- ev = hci_recv_event_data(conn->hcon->hdev,
+ ev = hci_recv_event_data(hcon->hdev,
HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
if (ev)
- parent = iso_get_sock_listen(&conn->hcon->src,
- &conn->hcon->dst,
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
iso_match_big, ev);
else
- parent = iso_get_sock_listen(&conn->hcon->src,
+ parent = iso_get_sock_listen(&hcon->src,
BDADDR_ANY, NULL, NULL);
- if (!parent) {
- iso_conn_unlock(conn);
+ if (!parent)
return;
- }
lock_sock(parent);
@@ -1466,30 +1457,29 @@ static void iso_conn_ready(struct iso_conn *conn)
BTPROTO_ISO, GFP_ATOMIC, 0);
if (!sk) {
release_sock(parent);
- iso_conn_unlock(conn);
return;
}
iso_sock_init(sk, parent);
- bacpy(&iso_pi(sk)->src, &conn->hcon->src);
- iso_pi(sk)->src_type = conn->hcon->src_type;
+ bacpy(&iso_pi(sk)->src, &hcon->src);
+ iso_pi(sk)->src_type = hcon->src_type;
/* If hcon has no destination address (BDADDR_ANY) it means it
* was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED so we need to
* initialize using the parent socket destination address.
*/
- if (!bacmp(&conn->hcon->dst, BDADDR_ANY)) {
- bacpy(&conn->hcon->dst, &iso_pi(parent)->dst);
- conn->hcon->dst_type = iso_pi(parent)->dst_type;
- conn->hcon->sync_handle = iso_pi(parent)->sync_handle;
+ if (!bacmp(&hcon->dst, BDADDR_ANY)) {
+ bacpy(&hcon->dst, &iso_pi(parent)->dst);
+ hcon->dst_type = iso_pi(parent)->dst_type;
+ hcon->sync_handle = iso_pi(parent)->sync_handle;
}
- bacpy(&iso_pi(sk)->dst, &conn->hcon->dst);
- iso_pi(sk)->dst_type = conn->hcon->dst_type;
+ bacpy(&iso_pi(sk)->dst, &hcon->dst);
+ iso_pi(sk)->dst_type = hcon->dst_type;
- hci_conn_hold(conn->hcon);
- __iso_chan_add(conn, sk, parent);
+ hci_conn_hold(hcon);
+ iso_chan_add(conn, sk, parent);
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
sk->sk_state = BT_CONNECT2;
@@ -1500,8 +1490,6 @@ static void iso_conn_ready(struct iso_conn *conn)
parent->sk_data_ready(parent);
release_sock(parent);
-
- iso_conn_unlock(conn);
}
}
diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h
index 6a8b7e84293d..bdf978605d5a 100644
--- a/net/bluetooth/mgmt_util.h
+++ b/net/bluetooth/mgmt_util.h
@@ -27,7 +27,7 @@ struct mgmt_mesh_tx {
struct sock *sk;
u8 handle;
u8 instance;
- u8 param[sizeof(struct mgmt_cp_mesh_send) + 29];
+ u8 param[sizeof(struct mgmt_cp_mesh_send) + 31];
};
struct mgmt_pending_cmd {
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 21e24da4847f..4397e14ff560 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -391,6 +391,7 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ sock_hold(sk);
lock_sock(sk);
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
@@ -410,14 +411,18 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
d->sec_level = rfcomm_pi(sk)->sec_level;
d->role_switch = rfcomm_pi(sk)->role_switch;
+ /* Drop sock lock to avoid potential deadlock with the RFCOMM lock */
+ release_sock(sk);
err = rfcomm_dlc_open(d, &rfcomm_pi(sk)->src, &sa->rc_bdaddr,
sa->rc_channel);
- if (!err)
+ lock_sock(sk);
+ if (!err && !sock_flag(sk, SOCK_ZAPPED))
err = bt_sock_wait_state(sk, BT_CONNECTED,
sock_sndtimeo(sk, flags & O_NONBLOCK));
done:
release_sock(sk);
+ sock_put(sk);
return err;
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index f20f4373ff40..9554abcfd5b4 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -871,6 +871,7 @@ static unsigned int ip_sabotage_in(void *priv,
if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev) &&
!netif_is_l3_slave(skb->dev)) {
+ nf_bridge_info_free(skb);
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
}
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index cc405d8c7c30..8480684f2762 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer,
default:
pr_warn("Request setup of bad link type = %d\n",
param->linktype);
+ cfpkt_destroy(pkt);
return -EINVAL;
}
req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
+ if (!req) {
+ cfpkt_destroy(pkt);
return -ENOMEM;
+ }
+
req->client_layer = user_layer;
req->cmd = CFCTRL_CMD_LINK_SETUP;
req->param = *param;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 608f8c24ae46..fc81d77724a1 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -140,7 +140,7 @@ struct isotp_sock {
canid_t rxid;
ktime_t tx_gap;
ktime_t lastrxcf_tstamp;
- struct hrtimer rxtimer, txtimer;
+ struct hrtimer rxtimer, txtimer, txfrtimer;
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
@@ -871,7 +871,7 @@ static void isotp_rcv_echo(struct sk_buff *skb, void *data)
}
/* start timer to send next consecutive frame with correct delay */
- hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
+ hrtimer_start(&so->txfrtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
}
static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
@@ -879,49 +879,39 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
txtimer);
struct sock *sk = &so->sk;
- enum hrtimer_restart restart = HRTIMER_NORESTART;
- switch (so->tx.state) {
- case ISOTP_SENDING:
-
- /* cfecho should be consumed by isotp_rcv_echo() here */
- if (!so->cfecho) {
- /* start timeout for unlikely lost echo skb */
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(),
- ktime_set(ISOTP_ECHO_TIMEOUT, 0)));
- restart = HRTIMER_RESTART;
+ /* don't handle timeouts in IDLE state */
+ if (so->tx.state == ISOTP_IDLE)
+ return HRTIMER_NORESTART;
- /* push out the next consecutive frame */
- isotp_send_cframe(so);
- break;
- }
+ /* we did not get any flow control or echo frame in time */
- /* cfecho has not been cleared in isotp_rcv_echo() */
- pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho);
- fallthrough;
+ /* report 'communication error on send' */
+ sk->sk_err = ECOMM;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk_error_report(sk);
- case ISOTP_WAIT_FC:
- case ISOTP_WAIT_FIRST_FC:
+ /* reset tx state */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
- /* we did not get any flow control frame in time */
+ return HRTIMER_NORESTART;
+}
- /* report 'communication error on send' */
- sk->sk_err = ECOMM;
- if (!sock_flag(sk, SOCK_DEAD))
- sk_error_report(sk);
+static enum hrtimer_restart isotp_txfr_timer_handler(struct hrtimer *hrtimer)
+{
+ struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+ txfrtimer);
- /* reset tx state */
- so->tx.state = ISOTP_IDLE;
- wake_up_interruptible(&so->wait);
- break;
+ /* start echo timeout handling and cover below protocol error */
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0),
+ HRTIMER_MODE_REL_SOFT);
- default:
- WARN_ONCE(1, "can-isotp: tx timer state %08X cfecho %08X\n",
- so->tx.state, so->cfecho);
- }
+ /* cfecho should be consumed by isotp_rcv_echo() here */
+ if (so->tx.state == ISOTP_SENDING && !so->cfecho)
+ isotp_send_cframe(so);
- return restart;
+ return HRTIMER_NORESTART;
}
static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -1162,6 +1152,10 @@ static int isotp_release(struct socket *sock)
/* wait for complete transmission of current pdu */
wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ /* force state machines to be idle also when a signal occurred */
+ so->tx.state = ISOTP_IDLE;
+ so->rx.state = ISOTP_IDLE;
+
spin_lock(&isotp_notifier_lock);
while (isotp_busy_notifier == so) {
spin_unlock(&isotp_notifier_lock);
@@ -1194,6 +1188,7 @@ static int isotp_release(struct socket *sock)
}
}
+ hrtimer_cancel(&so->txfrtimer);
hrtimer_cancel(&so->txtimer);
hrtimer_cancel(&so->rxtimer);
@@ -1597,6 +1592,8 @@ static int isotp_init(struct sock *sk)
so->rxtimer.function = isotp_rx_timer_handler;
hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
so->txtimer.function = isotp_tx_timer_handler;
+ hrtimer_init(&so->txfrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ so->txfrtimer.function = isotp_txfr_timer_handler;
init_waitqueue_head(&so->wait);
spin_lock_init(&so->rx_lock);
diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
index f33c47327927..ca4ad6cdd5cb 100644
--- a/net/can/j1939/address-claim.c
+++ b/net/can/j1939/address-claim.c
@@ -165,6 +165,46 @@ static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
* leaving this function.
*/
ecu = j1939_ecu_get_by_name_locked(priv, name);
+
+ if (ecu && ecu->addr == skcb->addr.sa) {
+ /* The ISO 11783-5 standard, in "4.5.2 - Address claim
+ * requirements", states:
+ * d) No CF shall begin, or resume, transmission on the
+ * network until 250 ms after it has successfully claimed
+ * an address except when responding to a request for
+ * address-claimed.
+ *
+ * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim
+ * prioritization" show that the CF begins the transmission
+ * after 250 ms from the first AC (address-claimed) message
+ * even if it sends another AC message during that time window
+ * to resolve the address contention with another CF.
+ *
+ * As stated in "4.4.2.3 - Address-claimed message":
+ * In order to successfully claim an address, the CF sending
+ * an address claimed message shall not receive a contending
+ * claim from another CF for at least 250 ms.
+ *
+ * As stated in "4.4.3.2 - NAME management (NM) message":
+ * 1) A commanding CF can
+ * d) request that a CF with a specified NAME transmit
+ * the address-claimed message with its current NAME.
+ * 2) A target CF shall
+ * d) send an address-claimed message in response to a
+ * request for a matching NAME
+ *
+ * Taking the above arguments into account, the 250 ms wait is
+ * requested only during network initialization.
+ *
+ * Do not restart the timer on AC message if both the NAME and
+ * the address match and so if the address has already been
+ * claimed (timer has expired) or the AC message has been sent
+ * to resolve the contention with another CF (timer is still
+ * running).
+ */
+ goto out_ecu_put;
+ }
+
if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
ecu = j1939_ecu_create_locked(priv, name);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 5c722b55fe23..fce9b9ebf13f 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1092,10 +1092,6 @@ static bool j1939_session_deactivate(struct j1939_session *session)
bool active;
j1939_session_list_lock(priv);
- /* This function should be called with a session ref-count of at
- * least 2.
- */
- WARN_ON_ONCE(kref_read(&session->kref) < 2);
active = j1939_session_deactivate_locked(session);
j1939_session_list_unlock(priv);
diff --git a/net/can/raw.c b/net/can/raw.c
index 81071cdb0301..ba86782ba8bb 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -132,8 +132,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
return;
/* make sure to not pass oversized frames to the socket */
- if ((can_is_canfd_skb(oskb) && !ro->fd_frames && !ro->xl_frames) ||
- (can_is_canxl_skb(oskb) && !ro->xl_frames))
+ if ((!ro->fd_frames && can_is_canfd_skb(oskb)) ||
+ (!ro->xl_frames && can_is_canxl_skb(oskb)))
return;
/* eliminate multiple filter matches for the same skb */
@@ -670,6 +670,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->fd_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames && !ro->fd_frames) {
+ ro->fd_frames = ro->xl_frames;
+ return -EINVAL;
+ }
break;
case CAN_RAW_XL_FRAMES:
@@ -679,6 +684,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->xl_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames)
+ ro->fd_frames = ro->xl_frames;
break;
case CAN_RAW_JOIN_FILTERS:
@@ -786,6 +794,25 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
+static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
+{
+ /* Classical CAN -> no checks for flags and device capabilities */
+ if (can_is_can_skb(skb))
+ return false;
+
+ /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */
+ if (ro->fd_frames && can_is_canfd_skb(skb) &&
+ (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu)))
+ return false;
+
+ /* CAN XL -> needs to be enabled and a CAN XL device */
+ if (ro->xl_frames && can_is_canxl_skb(skb) &&
+ can_is_canxl_dev_mtu(mtu))
+ return false;
+
+ return true;
+}
+
static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
@@ -833,20 +860,8 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
goto free_skb;
err = -EINVAL;
- if (ro->xl_frames && can_is_canxl_dev_mtu(dev->mtu)) {
- /* CAN XL, CAN FD and Classical CAN */
- if (!can_is_canxl_skb(skb) && !can_is_canfd_skb(skb) &&
- !can_is_can_skb(skb))
- goto free_skb;
- } else if (ro->fd_frames && dev->mtu == CANFD_MTU) {
- /* CAN FD and Classical CAN */
- if (!can_is_canfd_skb(skb) && !can_is_can_skb(skb))
- goto free_skb;
- } else {
- /* Classical CAN */
- if (!can_is_can_skb(skb))
- goto free_skb;
- }
+ if (raw_bad_txframe(ro, skb, dev->mtu))
+ goto free_skb;
sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 032d6d0a5ce6..909a10e4b0dd 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -9979,7 +9979,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
goto err_xa_alloc;
devlink->netdevice_nb.notifier_call = devlink_netdevice_event;
- ret = register_netdevice_notifier_net(net, &devlink->netdevice_nb);
+ ret = register_netdevice_notifier(&devlink->netdevice_nb);
if (ret)
goto err_register_netdevice_notifier;
@@ -10171,8 +10171,7 @@ void devlink_free(struct devlink *devlink)
xa_destroy(&devlink->snapshot_ids);
xa_destroy(&devlink->ports);
- WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink),
- &devlink->netdevice_nb));
+ WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
xa_erase(&devlinks, devlink->index);
@@ -10503,6 +10502,8 @@ static int devlink_netdevice_event(struct notifier_block *nb,
break;
case NETDEV_REGISTER:
case NETDEV_CHANGENAME:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
/* Set the netdev on top of previously set type. Note this
* event happens also during net namespace change so here
* we take into account netdev pointer appearing in this
@@ -10512,6 +10513,8 @@ static int devlink_netdevice_event(struct notifier_block *nb,
netdev);
break;
case NETDEV_UNREGISTER:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
/* Clear netdev pointer, but not the type. This event happens
* also during net namespace change so we need to clear
* pointer to netdev that is going to another net namespace.
diff --git a/net/core/filter.c b/net/core/filter.c
index 929358677183..43cc1fe58a2c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3180,15 +3180,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
{
+ void *old_data;
+
/* skb_ensure_writable() is not needed here, as we're
* already working on an uncloned skb.
*/
if (unlikely(!pskb_may_pull(skb, off + len)))
return -ENOMEM;
- skb_postpull_rcsum(skb, skb->data + off, len);
- memmove(skb->data + len, skb->data, off);
+ old_data = skb->data;
__skb_pull(skb, len);
+ skb_postpull_rcsum(skb, old_data + off, len);
+ memmove(skb->data, old_data, off);
return 0;
}
diff --git a/net/core/gro.c b/net/core/gro.c
index fd8c6a7e8d3e..4bac7ea6e025 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -162,6 +162,15 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
struct sk_buff *lp;
int segs;
+ /* Do not splice page pool based packets w/ non-page pool
+ * packets. This can result in reference count issues as page
+ * pool pages will not decrement the reference count and will
+ * instead be immediately returned to the pool or have frag
+ * count decremented.
+ */
+ if (p->pp_recycle != skb->pp_recycle)
+ return -ETOOMANYREFS;
+
/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
gro_max_size = READ_ONCE(p->dev->gro_max_size);
@@ -505,8 +514,9 @@ found_ptype:
NAPI_GRO_CB(skb)->count = 1;
if (unlikely(skb_is_gso(skb))) {
NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
- /* Only support TCP at the moment. */
- if (!skb_is_gso_tcp(skb))
+ /* Only support TCP and non DODGY users. */
+ if (!skb_is_gso_tcp(skb) ||
+ (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY))
NAPI_GRO_CB(skb)->flush = 1;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f00a79fc301b..4edd2176e238 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -269,7 +269,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
(n->nud_state == NUD_NOARP) ||
(tbl->is_multicast &&
tbl->is_multicast(n->primary_key)) ||
- time_after(tref, n->updated))
+ !time_in_range(n->updated, tref, jiffies))
remove = true;
write_unlock(&n->lock);
@@ -289,7 +289,17 @@ static int neigh_forced_gc(struct neigh_table *tbl)
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
+ /* Use safe distance from the jiffies - LONG_MAX point while timer
+ * is running in DELAY/PROBE state but still show to user space
+ * large times in the past.
+ */
+ unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
+
neigh_hold(n);
+ if (!time_in_range(n->confirmed, mint, jiffies))
+ n->confirmed = mint;
+ if (time_before(n->used, n->confirmed))
+ n->used = n->confirmed;
if (unlikely(mod_timer(&n->timer, when))) {
printk("NEIGH: BUG, double timer add, state is %x\n",
n->nud_state);
@@ -1001,12 +1011,14 @@ static void neigh_periodic_work(struct work_struct *work)
goto next_elt;
}
- if (time_before(n->used, n->confirmed))
+ if (time_before(n->used, n->confirmed) &&
+ time_is_before_eq_jiffies(n->confirmed))
n->used = n->confirmed;
if (refcount_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
- time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
+ !time_in_range_open(jiffies, n->used,
+ n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
*np = n->next;
neigh_mark_dead(n);
write_unlock(&n->lock);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 5581d22cc191..078a0a420c8a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -137,12 +137,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
return 0;
if (ops->id && ops->size) {
-cleanup:
ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&pernet_ops_rwsem));
ng->ptr[*ops->id] = NULL;
}
+cleanup:
kfree(data);
out:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4a0eb5593275..a31ff4d83ecc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4100,7 +4100,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_shinfo(skb)->frag_list = NULL;
- do {
+ while (list_skb) {
nskb = list_skb;
list_skb = list_skb->next;
@@ -4146,8 +4146,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
if (skb_needs_linearize(nskb, features) &&
__skb_linearize(nskb))
goto err_linearize;
-
- } while (list_skb);
+ }
skb->truesize = skb->truesize - delta_truesize;
skb->data_len = skb->data_len - delta_len;
diff --git a/net/core/sock.c b/net/core/sock.c
index f954d5893e79..6f27c24016fe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1531,6 +1531,8 @@ set_sndbuf:
ret = -EINVAL;
break;
}
+ if ((u8)val == SOCK_TXREHASH_DEFAULT)
+ val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
/* Paired with READ_ONCE() in tcp_rtx_synack() */
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;
@@ -3451,7 +3453,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_pacing_rate = ~0UL;
WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
- sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
sk_rx_queue_clear(sk);
/*
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 22fa2c5bc6ec..a68a7290a3b2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1569,15 +1569,16 @@ void sock_map_unhash(struct sock *sk)
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->unhash)
- sk->sk_prot->unhash(sk);
- return;
+ saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
+ } else {
+ saved_unhash = psock->saved_unhash;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
}
-
- saved_unhash = psock->saved_unhash;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- saved_unhash(sk);
+ if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
+ return;
+ if (saved_unhash)
+ saved_unhash(sk);
}
EXPORT_SYMBOL_GPL(sock_map_unhash);
@@ -1590,17 +1591,18 @@ void sock_map_destroy(struct sock *sk)
psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->destroy)
- sk->sk_prot->destroy(sk);
- return;
+ saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
+ } else {
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ sk_psock_put(sk, psock);
}
-
- saved_destroy = psock->saved_destroy;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- sk_psock_put(sk, psock);
- saved_destroy(sk);
+ if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
+ return;
+ if (saved_destroy)
+ saved_destroy(sk);
}
EXPORT_SYMBOL_GPL(sock_map_destroy);
@@ -1615,16 +1617,21 @@ void sock_map_close(struct sock *sk, long timeout)
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
+ saved_close = READ_ONCE(sk->sk_prot)->close;
+ } else {
+ saved_close = psock->saved_close;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ release_sock(sk);
+ cancel_work_sync(&psock->work);
+ sk_psock_put(sk, psock);
}
-
- saved_close = psock->saved_close;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- release_sock(sk);
- cancel_work_sync(&psock->work);
- sk_psock_put(sk, psock);
+ /* Make sure we do not recurse. This is a bug.
+ * Leak the socket instead of crashing on a stack overflow.
+ */
+ if (WARN_ON_ONCE(saved_close == sock_map_close))
+ return;
saved_close(sk, timeout);
}
EXPORT_SYMBOL_GPL(sock_map_close);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index c2f1a542e6fa..646b3e490c71 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2078,58 +2078,91 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return ret;
}
-static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
+static int ethtool_vzalloc_stats_array(int n_stats, u64 **data)
{
+ if (n_stats < 0)
+ return n_stats;
+ if (n_stats > S32_MAX / sizeof(u64))
+ return -ENOMEM;
+ if (WARN_ON_ONCE(!n_stats))
+ return -EOPNOTSUPP;
+
+ *data = vzalloc(array_size(n_stats, sizeof(u64)));
+ if (!*data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ethtool_get_phy_stats_phydev(struct phy_device *phydev,
+ struct ethtool_stats *stats,
+ u64 **data)
+ {
const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
+ int n_stats, ret;
+
+ if (!phy_ops || !phy_ops->get_sset_count || !phy_ops->get_stats)
+ return -EOPNOTSUPP;
+
+ n_stats = phy_ops->get_sset_count(phydev);
+
+ ret = ethtool_vzalloc_stats_array(n_stats, data);
+ if (ret)
+ return ret;
+
+ stats->n_stats = n_stats;
+ return phy_ops->get_stats(phydev, stats, *data);
+}
+
+static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 **data)
+{
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct phy_device *phydev = dev->phydev;
- struct ethtool_stats stats;
- u64 *data;
- int ret, n_stats;
+ int n_stats, ret;
- if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count))
+ if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats)
return -EOPNOTSUPP;
- if (phydev && !ops->get_ethtool_phy_stats &&
- phy_ops && phy_ops->get_sset_count)
- n_stats = phy_ops->get_sset_count(phydev);
- else
- n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
- if (n_stats < 0)
- return n_stats;
- if (n_stats > S32_MAX / sizeof(u64))
- return -ENOMEM;
- WARN_ON_ONCE(!n_stats);
+ n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
+
+ ret = ethtool_vzalloc_stats_array(n_stats, data);
+ if (ret)
+ return ret;
+
+ stats->n_stats = n_stats;
+ ops->get_ethtool_phy_stats(dev, stats, *data);
+
+ return 0;
+}
+
+static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
+{
+ struct phy_device *phydev = dev->phydev;
+ struct ethtool_stats stats;
+ u64 *data = NULL;
+ int ret = -EOPNOTSUPP;
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
- stats.n_stats = n_stats;
+ if (phydev)
+ ret = ethtool_get_phy_stats_phydev(phydev, &stats, &data);
- if (n_stats) {
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (!data)
- return -ENOMEM;
+ if (ret == -EOPNOTSUPP)
+ ret = ethtool_get_phy_stats_ethtool(dev, &stats, &data);
- if (phydev && !ops->get_ethtool_phy_stats &&
- phy_ops && phy_ops->get_stats) {
- ret = phy_ops->get_stats(phydev, &stats, data);
- if (ret < 0)
- goto out;
- } else {
- ops->get_ethtool_phy_stats(dev, &stats, data);
- }
- } else {
- data = NULL;
- }
+ if (ret)
+ goto out;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &stats, sizeof(stats)))
+ if (copy_to_user(useraddr, &stats, sizeof(stats))) {
+ ret = -EFAULT;
goto out;
+ }
+
useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
- goto out;
- ret = 0;
+ if (copy_to_user(useraddr, data, array_size(stats.n_stats, sizeof(u64))))
+ ret = -EFAULT;
out:
vfree(data);
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index ebe6145aed3f..be260ab34e58 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -122,10 +122,13 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
{
const struct rss_reply_data *data = RSS_REPDATA(reply_base);
- if (nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc) ||
- nla_put(skb, ETHTOOL_A_RSS_INDIR,
- sizeof(u32) * data->indir_size, data->indir_table) ||
- nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))
+ if ((data->hfunc &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
+ (data->indir_size &&
+ nla_put(skb, ETHTOOL_A_RSS_INDIR,
+ sizeof(u32) * data->indir_size, data->indir_table)) ||
+ (data->hkey_size &&
+ nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))
return -EMSGSIZE;
return 0;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ab4a06be489b..cf11f10927e1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -347,6 +347,7 @@ lookup_protocol:
sk->sk_destruct = inet_sock_destruct;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+ sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
inet->uc_ttl = -1;
inet->mc_loop = 1;
@@ -1665,6 +1666,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
if (rc == 0) {
*sk = sock->sk;
(*sk)->sk_allocation = GFP_ATOMIC;
+ (*sk)->sk_use_task_frag = false;
/*
* Unhash it so that IP input processing does not even see it,
* we do not wish this socket to see incoming packets.
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ce9ff3c62e84..3bb890a40ed7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -30,6 +30,7 @@
#include <linux/slab.h>
#include <linux/netlink.h>
#include <linux/hash.h>
+#include <linux/nospec.h>
#include <net/arp.h>
#include <net/inet_dscp.h>
@@ -1022,6 +1023,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
if (type > RTAX_MAX)
return false;
+ type = array_index_nospec(type, RTAX_MAX + 1);
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
bool ecn_ca = false;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b366ab9148f2..f2c43f67187d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -173,22 +173,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
return false;
}
+static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
+ kuid_t sk_uid, bool relax,
+ bool reuseport_cb_ok, bool reuseport_ok)
+{
+ if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
+ return false;
+
+ return inet_bind_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok);
+}
+
static bool inet_bhash2_conflict(const struct sock *sk,
const struct inet_bind2_bucket *tb2,
kuid_t sk_uid,
bool relax, bool reuseport_cb_ok,
bool reuseport_ok)
{
+ struct inet_timewait_sock *tw2;
struct sock *sk2;
sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
+ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
+ return true;
+ }
+
+ twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) {
+ sk2 = (struct sock *)tw2;
- if (inet_bind_conflict(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
+ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
return true;
}
+
return false;
}
@@ -1182,20 +1200,31 @@ void inet_csk_prepare_forced_close(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_prepare_forced_close);
+static int inet_ulp_can_listen(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
+ return -EINVAL;
+
+ return 0;
+}
+
int inet_csk_listen_start(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
int err;
+ err = inet_ulp_can_listen(sk);
+ if (unlikely(err))
+ return err;
+
reqsk_queue_alloc(&icsk->icsk_accept_queue);
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
- if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
- sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
-
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d039b4e732a3..f58d73888638 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
#endif
tb->rcv_saddr = sk->sk_rcv_saddr;
INIT_HLIST_HEAD(&tb->owners);
+ INIT_HLIST_HEAD(&tb->deathrow);
hlist_add_head(&tb->node, &head->chain);
}
@@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
/* Caller must hold hashbucket lock for this tb with local BH disabled */
void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
{
- if (hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) {
__hlist_del(&tb->node);
kmem_cache_free(cachep, tb);
}
@@ -649,8 +650,20 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
spin_lock(lock);
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
- ret = sk_nulls_del_node_init_rcu(osk);
- } else if (found_dup_sk) {
+ ret = sk_hashed(osk);
+ if (ret) {
+ /* Before deleting the node, we insert a new one to make
+ * sure that the look-up-sk process would not miss either
+ * of them and that at least one node would exist in ehash
+ * table all the time. Otherwise there's a tiny chance
+ * that lookup process could find nothing in ehash table.
+ */
+ __sk_nulls_add_node_tail_rcu(sk, list);
+ sk_nulls_del_node_init_rcu(osk);
+ }
+ goto unlock;
+ }
+ if (found_dup_sk) {
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
if (*found_dup_sk)
ret = false;
@@ -659,6 +672,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ret)
__sk_nulls_add_node_rcu(sk, list);
+unlock:
spin_unlock(lock);
return ret;
@@ -1103,15 +1117,16 @@ ok:
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, tb2, port);
- spin_unlock(&head2->lock);
-
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
+
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
+
if (tw)
inet_twsk_deschedule_put(tw);
local_bh_enable();
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 66fc940f9521..beed32fff484 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -29,6 +29,7 @@
void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
{
+ struct inet_bind2_bucket *tb2 = tw->tw_tb2;
struct inet_bind_bucket *tb = tw->tw_tb;
if (!tb)
@@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+
+ __hlist_del(&tw->tw_bind2_node);
+ tw->tw_tb2 = NULL;
+ inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+
__sock_put((struct sock *)tw);
}
@@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
- struct inet_bind_hashbucket *bhead;
+ struct inet_bind_hashbucket *bhead, *bhead2;
spin_lock(lock);
sk_nulls_del_node_init_rcu((struct sock *)tw);
@@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
/* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
hashinfo->bhash_size)];
+ bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw,
+ twsk_net(tw), tw->tw_num);
spin_lock(&bhead->lock);
+ spin_lock(&bhead2->lock);
inet_twsk_bind_unhash(tw, hashinfo);
+ spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
refcount_dec(&tw->tw_dr->tw_refcount);
@@ -81,10 +91,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
-static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
- struct hlist_nulls_head *list)
+static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
+ struct hlist_nulls_head *list)
{
- hlist_nulls_add_head_rcu(&tw->tw_node, list);
+ hlist_nulls_add_tail_rcu(&tw->tw_node, list);
}
static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
hlist_add_head(&tw->tw_bind_node, list);
}
+static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw,
+ struct hlist_head *list)
+{
+ hlist_add_head(&tw->tw_bind2_node, list);
+}
+
/*
* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -105,22 +121,33 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- struct inet_bind_hashbucket *bhead;
+ struct inet_bind_hashbucket *bhead, *bhead2;
+
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
hashinfo->bhash_size)];
+ bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num);
+
spin_lock(&bhead->lock);
+ spin_lock(&bhead2->lock);
+
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+
+ tw->tw_tb2 = icsk->icsk_bind2_hash;
+ WARN_ON(!icsk->icsk_bind2_hash);
+ inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow);
+
+ spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
spin_lock(lock);
- inet_twsk_add_node_rcu(tw, &ehead->chain);
+ inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 7fcfdfd8f9de..0e3ee1532848 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/netlink.h>
+#include <linux/nospec.h>
#include <linux/rtnetlink.h>
#include <linux/types.h>
#include <net/ip.h>
@@ -25,6 +26,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
return -EINVAL;
}
+ type = array_index_nospec(type, RTAX_MAX + 1);
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c567d5e8053e..33f559f491c8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -435,6 +435,7 @@ void tcp_init_sock(struct sock *sk)
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
+ tp->rate_app_limited = 1;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
@@ -3178,6 +3179,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->plb_rehash = 0;
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
+ tp->rate_app_limited = 1;
tp->rack.mstamp = 0;
tp->rack.advanced = 0;
tp->rack.reo_wnd_steps = 1;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 94aad3870c5f..cf26d65ca389 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -6,6 +6,7 @@
#include <linux/bpf.h>
#include <linux/init.h>
#include <linux/wait.h>
+#include <linux/util_macros.h>
#include <net/inet_common.h>
#include <net/tls.h>
@@ -639,10 +640,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
*/
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
struct proto *prot = newsk->sk_prot;
- if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
+ if (is_insidevar(prot, tcp_bpf_prots))
newsk->sk_prot = sk->sk_prot_creator;
}
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 9ae50b1bd844..2aa442128630 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -139,6 +139,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
if (sk->sk_socket)
clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+ err = -ENOTCONN;
+ if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN)
+ goto out_err;
+
err = ulp_ops->init(sk);
if (err)
goto out_err;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7a84a4acffc..faa47f9ea73a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3127,17 +3127,17 @@ static void add_v4_addrs(struct inet6_dev *idev)
offset = sizeof(struct in6_addr) - 4;
memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
- if (idev->dev->flags&IFF_POINTOPOINT) {
+ if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
+ scope = IPV6_ADDR_COMPATv4;
+ plen = 96;
+ pflags |= RTF_NONEXTHOP;
+ } else {
if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
return;
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
plen = 64;
- } else {
- scope = IPV6_ADDR_COMPATv4;
- plen = 96;
- pflags |= RTF_NONEXTHOP;
}
if (addr.s6_addr32[3]) {
@@ -3447,6 +3447,30 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
+static void addrconf_init_auto_addrs(struct net_device *dev)
+{
+ switch (dev->type) {
+#if IS_ENABLED(CONFIG_IPV6_SIT)
+ case ARPHRD_SIT:
+ addrconf_sit_config(dev);
+ break;
+#endif
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+ case ARPHRD_IP6GRE:
+ case ARPHRD_IPGRE:
+ addrconf_gre_config(dev);
+ break;
+#endif
+ case ARPHRD_LOOPBACK:
+ init_loopback(dev);
+ break;
+
+ default:
+ addrconf_dev_config(dev);
+ break;
+ }
+}
+
static int fixup_permanent_addr(struct net *net,
struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
@@ -3615,26 +3639,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
- switch (dev->type) {
-#if IS_ENABLED(CONFIG_IPV6_SIT)
- case ARPHRD_SIT:
- addrconf_sit_config(dev);
- break;
-#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
- case ARPHRD_IP6GRE:
- case ARPHRD_IPGRE:
- addrconf_gre_config(dev);
- break;
-#endif
- case ARPHRD_LOOPBACK:
- init_loopback(dev);
- break;
-
- default:
- addrconf_dev_config(dev);
- break;
- }
+ addrconf_init_auto_addrs(dev);
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
@@ -6397,7 +6402,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
@@ -6408,7 +6413,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
}
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fee9163382c2..847934763868 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -222,6 +222,7 @@ lookup_protocol:
np->pmtudisc = IPV6_PMTUDISC_WANT;
np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
+ sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
/* Init the ipv4 part of the socket since we can have sockets
* using v6 API for ipv4.
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 60fd91bb5171..c314fdde0097 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -547,7 +547,20 @@ int ip6_forward(struct sk_buff *skb)
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0) {
- hdr->hop_limit--;
+ /* It's tempting to decrease the hop limit
+ * here by 1, as we do at the end of the
+ * function too.
+ *
+ * But that would be incorrect, as proxying is
+ * not forwarding. The ip6_input function
+ * will handle this packet locally, and it
+ * depends on the hop limit being unchanged.
+ *
+ * One example is the NDP hop limit, that
+ * always has to stay 255, but other would be
+ * similar checks around RA packets, where the
+ * user can even change the desired limit.
+ */
return ip6_input(skb);
} else if (proxied < 0) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a06a9f847db5..ada087b50541 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -505,6 +505,7 @@ csum_copy_err:
static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
struct raw6_sock *rp)
{
+ struct ipv6_txoptions *opt;
struct sk_buff *skb;
int err = 0;
int offset;
@@ -522,6 +523,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
offset = rp->offset;
total_len = inet_sk(sk)->cork.base.length;
+ opt = inet6_sk(sk)->cork.opt;
+ total_len -= opt ? opt->opt_flen : 0;
+
if (offset >= total_len - 1) {
err = -EINVAL;
ip6_flush_pending_frames(sk);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 9a1415fe3fa7..03608d3ded4b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq;
/* per-net private data for this module */
static unsigned int l2tp_net_id;
struct l2tp_net {
- struct list_head l2tp_tunnel_list;
- /* Lock for write access to l2tp_tunnel_list */
- spinlock_t l2tp_tunnel_list_lock;
+ /* Lock for write access to l2tp_tunnel_idr */
+ spinlock_t l2tp_tunnel_idr_lock;
+ struct idr l2tp_tunnel_idr;
struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
/* Lock for write access to l2tp_session_hlist */
spinlock_t l2tp_session_hlist_lock;
@@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
struct l2tp_tunnel *tunnel;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (tunnel->tunnel_id == tunnel_id &&
- refcount_inc_not_zero(&tunnel->ref_count)) {
- rcu_read_unlock_bh();
-
- return tunnel;
- }
+ tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id);
+ if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) {
+ rcu_read_unlock_bh();
+ return tunnel;
}
rcu_read_unlock_bh();
@@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
{
- const struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_net *pn = l2tp_pernet(net);
+ unsigned long tunnel_id, tmp;
struct l2tp_tunnel *tunnel;
int count = 0;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (++count > nth &&
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel && ++count > nth &&
refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
@@ -1043,7 +1041,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
nf_reset_ct(skb);
- bh_lock_sock(sk);
+ bh_lock_sock_nested(sk);
if (sock_owned_by_user(sk)) {
kfree_skb(skb);
ret = NET_XMIT_DROP;
@@ -1227,6 +1225,15 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
l2tp_tunnel_delete(tunnel);
}
+static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
+}
+
/* Workqueue tunnel deletion function */
static void l2tp_tunnel_del_work(struct work_struct *work)
{
@@ -1234,7 +1241,6 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
del_work);
struct sock *sk = tunnel->sock;
struct socket *sock = sk->sk_socket;
- struct l2tp_net *pn;
l2tp_tunnel_closeall(tunnel);
@@ -1248,12 +1254,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
}
}
- /* Remove the tunnel struct from the tunnel list */
- pn = l2tp_pernet(tunnel->l2tp_net);
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_del_rcu(&tunnel->list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
+ l2tp_tunnel_remove(tunnel->l2tp_net, tunnel);
/* drop initial ref */
l2tp_tunnel_dec_refcount(tunnel);
@@ -1384,8 +1385,6 @@ out:
return err;
}
-static struct lock_class_key l2tp_socket_class;
-
int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
@@ -1455,12 +1454,19 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
struct l2tp_tunnel_cfg *cfg)
{
- struct l2tp_tunnel *tunnel_walk;
- struct l2tp_net *pn;
+ struct l2tp_net *pn = l2tp_pernet(net);
+ u32 tunnel_id = tunnel->tunnel_id;
struct socket *sock;
struct sock *sk;
int ret;
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id,
+ GFP_ATOMIC);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
+ if (ret)
+ return ret == -ENOSPC ? -EEXIST : ret;
+
if (tunnel->fd < 0) {
ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
tunnel->peer_tunnel_id, cfg,
@@ -1474,6 +1480,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
}
sk = sock->sk;
+ lock_sock(sk);
write_lock_bh(&sk->sk_callback_lock);
ret = l2tp_validate_socket(sk, net, tunnel->encap);
if (ret < 0)
@@ -1481,24 +1488,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
rcu_assign_sk_user_data(sk, tunnel);
write_unlock_bh(&sk->sk_callback_lock);
- tunnel->l2tp_net = net;
- pn = l2tp_pernet(net);
-
- sock_hold(sk);
- tunnel->sock = sk;
-
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
- if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- sock_put(sk);
- ret = -EEXIST;
- goto err_sock;
- }
- }
- list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
.sk_user_data = tunnel,
@@ -1512,9 +1501,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
tunnel->old_sk_destruct = sk->sk_destruct;
sk->sk_destruct = &l2tp_tunnel_destruct;
- lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class,
- "l2tp_sock");
sk->sk_allocation = GFP_ATOMIC;
+ release_sock(sk);
+
+ sock_hold(sk);
+ tunnel->sock = sk;
+ tunnel->l2tp_net = net;
+
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
trace_register_tunnel(tunnel);
@@ -1523,17 +1519,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
return 0;
-err_sock:
- write_lock_bh(&sk->sk_callback_lock);
- rcu_assign_sk_user_data(sk, NULL);
err_inval_sock:
write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
if (tunnel->fd < 0)
sock_release(sock);
else
sockfd_put(sock);
err:
+ l2tp_tunnel_remove(net, tunnel);
return ret;
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
@@ -1647,8 +1642,8 @@ static __net_init int l2tp_init_net(struct net *net)
struct l2tp_net *pn = net_generic(net, l2tp_net_id);
int hash;
- INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
- spin_lock_init(&pn->l2tp_tunnel_list_lock);
+ idr_init(&pn->l2tp_tunnel_idr);
+ spin_lock_init(&pn->l2tp_tunnel_idr_lock);
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
@@ -1662,11 +1657,13 @@ static __net_exit void l2tp_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
+ unsigned long tunnel_id, tmp;
int hash;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- l2tp_tunnel_delete(tunnel);
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel)
+ l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
@@ -1676,6 +1673,7 @@ static __net_exit void l2tp_exit_net(struct net *net)
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
+ idr_destroy(&pn->l2tp_tunnel_idr);
}
static struct pernet_operations l2tp_net_ops = {
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9c40f8d3bce8..f9514bacbd4a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -491,7 +491,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
{
struct tid_ampdu_tx *tid_tx;
struct ieee80211_local *local = sta->local;
- struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_sub_if_data *sdata;
struct ieee80211_ampdu_params params = {
.sta = &sta->sta,
.action = IEEE80211_AMPDU_TX_START,
@@ -511,8 +511,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
- ieee80211_agg_stop_txq(sta, tid);
-
/*
* Make sure no packets are being processed. This ensures that
* we have a valid starting sequence number and that in-flight
@@ -521,6 +519,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
synchronize_net();
+ sdata = sta->sdata;
params.ssn = sta->tid_seq[tid] >> 4;
ret = drv_ampdu_action(local, sdata, &params);
tid_tx->ssn = params.ssn;
@@ -534,6 +533,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state);
} else if (ret) {
+ if (!sdata)
+ return;
+
ht_dbg(sdata,
"BA request denied - HW unavailable for %pM tid %d\n",
sta->sta.addr, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8f9a2ab502b3..672eff6f5d32 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -147,6 +147,7 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
link_conf->bssid_index = 0;
link_conf->nontransmitted = false;
link_conf->ema_ap = false;
+ link_conf->bssid_indicator = 0;
if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev)
return -EINVAL;
@@ -1511,6 +1512,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
kfree(link_conf->ftmr_params);
link_conf->ftmr_params = NULL;
+ sdata->vif.mbssid_tx_vif = NULL;
+ link_conf->bssid_index = 0;
+ link_conf->nontransmitted = false;
+ link_conf->ema_ap = false;
+ link_conf->bssid_indicator = 0;
+
__sta_info_flush(sdata, true);
ieee80211_free_keys(sdata, true);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 7a3d7893e19d..f1914bf39f0e 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -167,7 +167,7 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
continue;
txqi = to_txq_info(sta->sta.txq[i]);
p += scnprintf(p, bufsz + buf - p,
- "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
+ "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s%s)\n",
txqi->txq.tid,
txqi->txq.ac,
txqi->tin.backlog_bytes,
@@ -182,7 +182,8 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
txqi->flags,
test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN",
test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "",
- test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "");
+ test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "",
+ test_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ? " DIRTY" : "");
}
rcu_read_unlock();
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index d737db4e07e2..cfb09e4aed4d 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -392,6 +392,9 @@ int drv_ampdu_action(struct ieee80211_local *local,
might_sleep();
+ if (!sdata)
+ return -EIO;
+
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
return -EIO;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 809bad53e15b..5d13a3dfd366 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1199,7 +1199,7 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
/* In reconfig don't transmit now, but mark for waking later */
if (local->in_reconfig) {
- set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags);
+ set_bit(IEEE80211_TXQ_DIRTY, &txq->flags);
return;
}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 83bc41346ae7..5315ab750280 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -391,6 +391,37 @@ void ieee80211_ba_session_work(struct work_struct *work)
tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
if (!blocked && tid_tx) {
+ struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
+ struct ieee80211_sub_if_data *sdata =
+ vif_to_sdata(txqi->txq.vif);
+ struct fq *fq = &sdata->local->fq;
+
+ spin_lock_bh(&fq->lock);
+
+ /* Allow only frags to be dequeued */
+ set_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+
+ if (!skb_queue_empty(&txqi->frags)) {
+ /* Fragmented Tx is ongoing, wait for it to
+ * finish. Reschedule worker to retry later.
+ */
+
+ spin_unlock_bh(&fq->lock);
+ spin_unlock_bh(&sta->lock);
+
+ /* Give the task working on the txq a chance
+ * to send out the queued frags
+ */
+ synchronize_net();
+
+ mutex_unlock(&sta->ampdu_mlme.mtx);
+
+ ieee80211_queue_work(&sdata->local->hw, work);
+ return;
+ }
+
+ spin_unlock_bh(&fq->lock);
+
/*
* Assign it over to the normal tid_tx array
* where it "goes live".
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 63ff0d2524b6..d16606e84e22 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -838,7 +838,7 @@ enum txq_info_flags {
IEEE80211_TXQ_STOP,
IEEE80211_TXQ_AMPDU,
IEEE80211_TXQ_NO_AMSDU,
- IEEE80211_TXQ_STOP_NETIF_TX,
+ IEEE80211_TXQ_DIRTY,
};
/**
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d49a5906a943..23ed13f15067 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -364,7 +364,9 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
/* No support for VLAN with MLO yet */
if (iftype == NL80211_IFTYPE_AP_VLAN &&
- nsdata->wdev.use_4addr)
+ sdata->wdev.use_4addr &&
+ nsdata->vif.type == NL80211_IFTYPE_AP &&
+ nsdata->vif.valid_links)
return -EOPNOTSUPP;
/*
@@ -2195,7 +2197,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = cfg80211_register_netdevice(ndev);
if (ret) {
- ieee80211_if_free(ndev);
free_netdev(ndev);
return ret;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7e3ab6e1b28f..c6562a6d2503 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4049,6 +4049,58 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
#undef CALL_RXH
}
+static bool
+ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id)
+{
+ if (!sta->mlo)
+ return false;
+
+ return !!(sta->valid_links & BIT(link_id));
+}
+
+static bool ieee80211_rx_data_set_link(struct ieee80211_rx_data *rx,
+ u8 link_id)
+{
+ rx->link_id = link_id;
+ rx->link = rcu_dereference(rx->sdata->link[link_id]);
+
+ if (!rx->sta)
+ return rx->link;
+
+ if (!ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta, link_id))
+ return false;
+
+ rx->link_sta = rcu_dereference(rx->sta->link[link_id]);
+
+ return rx->link && rx->link_sta;
+}
+
+static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx,
+ struct ieee80211_sta *pubsta,
+ int link_id)
+{
+ struct sta_info *sta;
+
+ sta = container_of(pubsta, struct sta_info, sta);
+
+ rx->link_id = link_id;
+ rx->sta = sta;
+
+ if (sta) {
+ rx->local = sta->sdata->local;
+ if (!rx->sdata)
+ rx->sdata = sta->sdata;
+ rx->link_sta = &sta->deflink;
+ }
+
+ if (link_id < 0)
+ rx->link = &rx->sdata->deflink;
+ else if (!ieee80211_rx_data_set_link(rx, link_id))
+ return false;
+
+ return true;
+}
+
/*
* This function makes calls into the RX path, therefore
* it has to be invoked under RCU read lock.
@@ -4057,16 +4109,19 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
{
struct sk_buff_head frames;
struct ieee80211_rx_data rx = {
- .sta = sta,
- .sdata = sta->sdata,
- .local = sta->local,
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .link_id = -1,
};
struct tid_ampdu_rx *tid_agg_rx;
- u8 link_id;
+ int link_id = -1;
+
+ /* FIXME: statistics won't be right with this */
+ if (sta->sta.valid_links)
+ link_id = ffs(sta->sta.valid_links) - 1;
+
+ if (!ieee80211_rx_data_set_sta(&rx, &sta->sta, link_id))
+ return;
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
if (!tid_agg_rx)
@@ -4086,10 +4141,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
};
drv_event_callback(rx.local, rx.sdata, &event);
}
- /* FIXME: statistics won't be right with this */
- link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0;
- rx.link = rcu_dereference(sta->sdata->link[link_id]);
- rx.link_sta = rcu_dereference(sta->link[link_id]);
ieee80211_rx_handlers(&rx, &frames);
}
@@ -4105,7 +4156,6 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .link_id = -1,
};
int i, diff;
@@ -4116,10 +4166,8 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
sta = container_of(pubsta, struct sta_info, sta);
- rx.sta = sta;
- rx.sdata = sta->sdata;
- rx.link = &rx.sdata->deflink;
- rx.local = sta->local;
+ if (!ieee80211_rx_data_set_sta(&rx, pubsta, -1))
+ return;
rcu_read_lock();
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
@@ -4506,15 +4554,6 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->sta_mtx);
}
-static bool
-ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id)
-{
- if (!sta->mlo)
- return false;
-
- return !!(sta->valid_links & BIT(link_id));
-}
-
static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
struct ieee80211_fast_rx *fast_rx,
int orig_len)
@@ -4625,7 +4664,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
struct sk_buff *skb = rx->skb;
struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- struct sta_info *sta = rx->sta;
int orig_len = skb->len;
int hdrlen = ieee80211_hdrlen(hdr->frame_control);
int snap_offs = hdrlen;
@@ -4637,7 +4675,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u8 da[ETH_ALEN];
u8 sa[ETH_ALEN];
} addrs __aligned(2);
- struct link_sta_info *link_sta;
struct ieee80211_sta_rx_stats *stats;
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
@@ -4740,18 +4777,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
drop:
dev_kfree_skb(skb);
- if (rx->link_id >= 0) {
- link_sta = rcu_dereference(sta->link[rx->link_id]);
- if (!link_sta)
- return true;
- } else {
- link_sta = &sta->deflink;
- }
-
if (fast_rx->uses_rss)
- stats = this_cpu_ptr(link_sta->pcpu_rx_stats);
+ stats = this_cpu_ptr(rx->link_sta->pcpu_rx_stats);
else
- stats = &link_sta->rx_stats;
+ stats = &rx->link_sta->rx_stats;
stats->dropped++;
return true;
@@ -4769,8 +4798,8 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct ieee80211_hdr *hdr = (void *)skb->data;
- struct link_sta_info *link_sta = NULL;
- struct ieee80211_link_data *link;
+ struct link_sta_info *link_sta = rx->link_sta;
+ struct ieee80211_link_data *link = rx->link;
rx->skb = skb;
@@ -4792,35 +4821,6 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
if (!ieee80211_accept_frame(rx))
return false;
- if (rx->link_id >= 0) {
- link = rcu_dereference(rx->sdata->link[rx->link_id]);
-
- /* we might race link removal */
- if (!link)
- return true;
- rx->link = link;
-
- if (rx->sta) {
- rx->link_sta =
- rcu_dereference(rx->sta->link[rx->link_id]);
- if (!rx->link_sta)
- return true;
- }
- } else {
- if (rx->sta)
- rx->link_sta = &rx->sta->deflink;
-
- rx->link = &sdata->deflink;
- }
-
- if (unlikely(!is_multicast_ether_addr(hdr->addr1) &&
- rx->link_id >= 0 && rx->sta && rx->sta->sta.mlo)) {
- link_sta = rcu_dereference(rx->sta->link[rx->link_id]);
-
- if (WARN_ON_ONCE(!link_sta))
- return true;
- }
-
if (!consume) {
struct skb_shared_hwtstamps *shwt;
@@ -4838,9 +4838,12 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
*/
shwt = skb_hwtstamps(rx->skb);
shwt->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+
+ /* Update the hdr pointer to the new skb for translation below */
+ hdr = (struct ieee80211_hdr *)rx->skb->data;
}
- if (unlikely(link_sta)) {
+ if (unlikely(rx->sta && rx->sta->sta.mlo)) {
/* translate to MLD addresses */
if (ether_addr_equal(link->conf->addr, hdr->addr1))
ether_addr_copy(hdr->addr1, rx->sdata->vif.addr);
@@ -4870,6 +4873,7 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_fast_rx *fast_rx;
struct ieee80211_rx_data rx;
+ int link_id = -1;
memset(&rx, 0, sizeof(rx));
rx.skb = skb;
@@ -4886,12 +4890,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
if (!pubsta)
goto drop;
- rx.sta = container_of(pubsta, struct sta_info, sta);
- rx.sdata = rx.sta->sdata;
-
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(pubsta, status->link_id))
- goto drop;
+ if (status->link_valid)
+ link_id = status->link_id;
/*
* TODO: Should the frame be dropped if the right link_id is not
@@ -4900,19 +4900,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
* link_id is used only for stats purpose and updating the stats on
* the deflink is fine?
*/
- if (status->link_valid)
- rx.link_id = status->link_id;
-
- if (rx.link_id >= 0) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(rx.sdata->link[rx.link_id]);
- if (!link)
- goto drop;
- rx.link = link;
- } else {
- rx.link = &rx.sdata->deflink;
- }
+ if (!ieee80211_rx_data_set_sta(&rx, pubsta, link_id))
+ goto drop;
fast_rx = rcu_dereference(rx.sta->fast_rx);
if (!fast_rx)
@@ -4930,6 +4919,8 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx,
{
struct link_sta_info *link_sta;
struct ieee80211_hdr *hdr = (void *)skb->data;
+ struct sta_info *sta;
+ int link_id = -1;
/*
* Look up link station first, in case there's a
@@ -4939,24 +4930,19 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx,
*/
link_sta = link_sta_info_get_bss(rx->sdata, hdr->addr2);
if (link_sta) {
- rx->sta = link_sta->sta;
- rx->link_id = link_sta->link_id;
+ sta = link_sta->sta;
+ link_id = link_sta->link_id;
} else {
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- rx->sta = sta_info_get_bss(rx->sdata, hdr->addr2);
- if (rx->sta) {
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta,
- status->link_id))
- return false;
-
- rx->link_id = status->link_valid ? status->link_id : -1;
- } else {
- rx->link_id = -1;
- }
+ sta = sta_info_get_bss(rx->sdata, hdr->addr2);
+ if (status->link_valid)
+ link_id = status->link_id;
}
+ if (!ieee80211_rx_data_set_sta(rx, &sta->sta, link_id))
+ return false;
+
return ieee80211_prepare_and_rx_handle(rx, skb, consume);
}
@@ -5015,19 +5001,15 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (ieee80211_is_data(fc)) {
struct sta_info *sta, *prev_sta;
- u8 link_id = status->link_id;
+ int link_id = -1;
- if (pubsta) {
- rx.sta = container_of(pubsta, struct sta_info, sta);
- rx.sdata = rx.sta->sdata;
+ if (status->link_valid)
+ link_id = status->link_id;
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(pubsta, link_id))
+ if (pubsta) {
+ if (!ieee80211_rx_data_set_sta(&rx, pubsta, link_id))
goto out;
- if (status->link_valid)
- rx.link_id = status->link_id;
-
/*
* In MLO connection, fetch the link_id using addr2
* when the driver does not pass link_id in status.
@@ -5045,7 +5027,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (!link_sta)
goto out;
- rx.link_id = link_sta->link_id;
+ ieee80211_rx_data_set_link(&rx, link_sta->link_id);
}
if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
@@ -5061,30 +5043,27 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
continue;
}
- if ((status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta,
- link_id)) ||
- (!status->link_valid && prev_sta->sta.mlo))
+ rx.sdata = prev_sta->sdata;
+ if (!ieee80211_rx_data_set_sta(&rx, &prev_sta->sta,
+ link_id))
+ goto out;
+
+ if (!status->link_valid && prev_sta->sta.mlo)
continue;
- rx.link_id = status->link_valid ? link_id : -1;
- rx.sta = prev_sta;
- rx.sdata = prev_sta->sdata;
ieee80211_prepare_and_rx_handle(&rx, skb, false);
prev_sta = sta;
}
if (prev_sta) {
- if ((status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta,
- link_id)) ||
- (!status->link_valid && prev_sta->sta.mlo))
+ rx.sdata = prev_sta->sdata;
+ if (!ieee80211_rx_data_set_sta(&rx, &prev_sta->sta,
+ link_id))
goto out;
- rx.link_id = status->link_valid ? link_id : -1;
- rx.sta = prev_sta;
- rx.sdata = prev_sta->sdata;
+ if (!status->link_valid && prev_sta->sta.mlo)
+ goto out;
if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
return;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2171cd1ca807..defe97a31724 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1129,7 +1129,6 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
struct sk_buff *purge_skb = NULL;
if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
- info->flags |= IEEE80211_TX_CTL_AMPDU;
reset_agg_timer = true;
} else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
/*
@@ -1161,7 +1160,6 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
if (!tid_tx) {
/* do nothing, let packet pass through */
} else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
- info->flags |= IEEE80211_TX_CTL_AMPDU;
reset_agg_timer = true;
} else {
queued = true;
@@ -3677,8 +3675,7 @@ static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
info->band = fast_tx->band;
info->control.vif = &sdata->vif;
info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
- IEEE80211_TX_CTL_DONTFRAG |
- (ampdu ? IEEE80211_TX_CTL_AMPDU : 0);
+ IEEE80211_TX_CTL_DONTFRAG;
info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT |
u32_encode_bits(IEEE80211_LINK_UNSPECIFIED,
IEEE80211_TX_CTRL_MLO_LINK);
@@ -3783,6 +3780,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_tx_data tx;
ieee80211_tx_result r;
struct ieee80211_vif *vif = txq->vif;
+ int q = vif->hw_queue[txq->ac];
+ bool q_stopped;
WARN_ON_ONCE(softirq_count() == 0);
@@ -3790,17 +3789,18 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
return NULL;
begin:
- spin_lock_bh(&fq->lock);
-
- if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
- test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags))
- goto out;
+ spin_lock(&local->queue_stop_reason_lock);
+ q_stopped = local->queue_stop_reasons[q];
+ spin_unlock(&local->queue_stop_reason_lock);
- if (vif->txqs_stopped[txq->ac]) {
- set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags);
- goto out;
+ if (unlikely(q_stopped)) {
+ /* mark for waking later */
+ set_bit(IEEE80211_TXQ_DIRTY, &txqi->flags);
+ return NULL;
}
+ spin_lock_bh(&fq->lock);
+
/* Make sure fragments stay together. */
skb = __skb_dequeue(&txqi->frags);
if (unlikely(skb)) {
@@ -3810,6 +3810,9 @@ begin:
IEEE80211_SKB_CB(skb)->control.flags &=
~IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
} else {
+ if (unlikely(test_bit(IEEE80211_TXQ_STOP, &txqi->flags)))
+ goto out;
+
skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
}
@@ -3860,9 +3863,8 @@ begin:
}
if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
- info->flags |= IEEE80211_TX_CTL_AMPDU;
- else
- info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+ info->flags |= (IEEE80211_TX_CTL_AMPDU |
+ IEEE80211_TX_CTL_DONTFRAG);
if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
@@ -4596,8 +4598,6 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
- if (tid_tx)
- info->flags |= IEEE80211_TX_CTL_AMPDU;
info->hw_queue = sdata->vif.hw_queue[queue];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6f5407038459..261ac667887f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -292,22 +292,12 @@ static void wake_tx_push_queue(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_txq *queue)
{
- int q = sdata->vif.hw_queue[queue->ac];
struct ieee80211_tx_control control = {
.sta = queue->sta,
};
struct sk_buff *skb;
- unsigned long flags;
- bool q_stopped;
while (1) {
- spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- q_stopped = local->queue_stop_reasons[q];
- spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
-
- if (q_stopped)
- break;
-
skb = ieee80211_tx_dequeue(&local->hw, queue);
if (!skb)
break;
@@ -347,8 +337,6 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
local_bh_disable();
spin_lock(&fq->lock);
- sdata->vif.txqs_stopped[ac] = false;
-
if (!test_bit(SDATA_STATE_RUNNING, &sdata->state))
goto out;
@@ -370,7 +358,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
if (ac != txq->ac)
continue;
- if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX,
+ if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY,
&txqi->flags))
continue;
@@ -385,7 +373,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
txqi = to_txq_info(vif->txq);
- if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) ||
+ if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ||
(ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
goto out;
@@ -517,8 +505,6 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_sub_if_data *sdata;
- int n_acs = IEEE80211_NUM_ACS;
trace_stop_queue(local, queue, reason);
@@ -530,29 +516,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
else
local->q_stop_reasons[queue][reason]++;
- if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
- return;
-
- if (local->hw.queues < IEEE80211_NUM_ACS)
- n_acs = 1;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- int ac;
-
- if (!sdata->dev)
- continue;
-
- for (ac = 0; ac < n_acs; ac++) {
- if (sdata->vif.hw_queue[ac] == queue ||
- sdata->vif.cab_queue == queue) {
- spin_lock(&local->fq.lock);
- sdata->vif.txqs_stopped[ac] = true;
- spin_unlock(&local->fq.lock);
- }
- }
- }
- rcu_read_unlock();
+ set_bit(reason, &local->queue_stop_reasons[queue]);
}
void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index c2aae2a6d6a6..97bb4401dd3e 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -213,7 +213,6 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
ret = ieee802154_parse_frame_start(skb, &hdr);
if (ret) {
pr_debug("got invalid frame\n");
- kfree_skb(skb);
return;
}
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index fc9e728b6333..3150f3f0c872 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -544,9 +544,6 @@ static int mctp_sk_init(struct sock *sk)
static void mctp_sk_close(struct sock *sk, long timeout)
{
- struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
-
- del_timer_sync(&msk->key_expiry);
sk_common_release(sk);
}
@@ -580,7 +577,19 @@ static void mctp_sk_unhash(struct sock *sk)
spin_lock_irqsave(&key->lock, fl2);
__mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED);
}
+ sock_set_flag(sk, SOCK_DEAD);
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ /* Since there are no more tag allocations (we have removed all of the
+ * keys), stop any pending expiry events. the timer cannot be re-queued
+ * as the sk is no longer observable
+ */
+ del_timer_sync(&msk->key_expiry);
+}
+
+static void mctp_sk_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
}
static struct proto mctp_proto = {
@@ -619,6 +628,7 @@ static int mctp_pf_create(struct net *net, struct socket *sock,
return -ENOMEM;
sock_init_data(sock, sk);
+ sk->sk_destruct = mctp_sk_destruct;
rc = 0;
if (sk->sk_prot->init)
diff --git a/net/mctp/route.c b/net/mctp/route.c
index f9a80b82dc51..f51a05ec7162 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -147,6 +147,7 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
key->valid = true;
spin_lock_init(&key->lock);
refcount_set(&key->refs, 1);
+ sock_hold(key->sk);
return key;
}
@@ -165,6 +166,7 @@ void mctp_key_unref(struct mctp_sk_key *key)
mctp_dev_release_key(key->dev, key);
spin_unlock_irqrestore(&key->lock, flags);
+ sock_put(key->sk);
kfree(key);
}
@@ -177,6 +179,11 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ if (sock_flag(&msk->sk, SOCK_DEAD)) {
+ rc = -EINVAL;
+ goto out_unlock;
+ }
+
hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
key->tag)) {
@@ -198,6 +205,7 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
hlist_add_head(&key->sklist, &msk->keys);
}
+out_unlock:
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
return rc;
@@ -315,8 +323,8 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
{
+ struct mctp_sk_key *key, *any_key = NULL;
struct net *net = dev_net(skb->dev);
- struct mctp_sk_key *key;
struct mctp_sock *msk;
struct mctp_hdr *mh;
unsigned long f;
@@ -361,13 +369,11 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* key for reassembly - we'll create a more specific
* one for future packets if required (ie, !EOM).
*/
- key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
- if (key) {
- msk = container_of(key->sk,
+ any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
+ if (any_key) {
+ msk = container_of(any_key->sk,
struct mctp_sock, sk);
- spin_unlock_irqrestore(&key->lock, f);
- mctp_key_unref(key);
- key = NULL;
+ spin_unlock_irqrestore(&any_key->lock, f);
}
}
@@ -419,14 +425,14 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* this function.
*/
rc = mctp_key_add(key, msk);
- if (rc) {
- kfree(key);
- } else {
+ if (!rc)
trace_mctp_key_acquire(key);
- /* we don't need to release key->lock on exit */
- mctp_key_unref(key);
- }
+ /* we don't need to release key->lock on exit, so
+ * clean up here and suppress the unlock via
+ * setting to NULL
+ */
+ mctp_key_unref(key);
key = NULL;
} else {
@@ -473,6 +479,8 @@ out_unlock:
spin_unlock_irqrestore(&key->lock, f);
mctp_key_unref(key);
}
+ if (any_key)
+ mctp_key_unref(any_key);
out:
if (rc)
kfree_skb(skb);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 45e2a48397b9..70f0ced3ca86 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -420,6 +420,31 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
}
}
+/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses,
+ * otherwise allow any matching local/remote pair
+ */
+bool mptcp_pm_addr_families_match(const struct sock *sk,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *rem)
+{
+ bool mptcp_is_v4 = sk->sk_family == AF_INET;
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6);
+ bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6);
+
+ if (mptcp_is_v4)
+ return loc_is_v4 && rem_is_v4;
+
+ if (ipv6_only_sock(sk))
+ return !loc_is_v4 && !rem_is_v4;
+
+ return loc_is_v4 == rem_is_v4;
+#else
+ return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET;
+#endif
+}
+
void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2ea7eae43bdb..10fe9771a852 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -998,8 +998,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
{
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
- struct mptcp_sock *msk;
struct socket *ssock;
+ struct sock *newsk;
int backlog = 1024;
int err;
@@ -1008,11 +1008,13 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
- msk = mptcp_sk(entry->lsk->sk);
- if (!msk)
+ newsk = entry->lsk->sk;
+ if (!newsk)
return -EINVAL;
- ssock = __mptcp_nmpc_socket(msk);
+ lock_sock(newsk);
+ ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
+ release_sock(newsk);
if (!ssock)
return -EINVAL;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 65dcc55a8ad8..ea6ad9da7493 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -294,6 +294,13 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
}
sk = (struct sock *)msk;
+
+ if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) {
+ GENL_SET_ERR_MSG(info, "families mismatch");
+ err = -EINVAL;
+ goto create_err;
+ }
+
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f6f93957275b..bc6c1f62a690 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -98,7 +98,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
struct socket *ssock;
int err;
- err = mptcp_subflow_create_socket(sk, &ssock);
+ err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock);
if (err)
return err;
@@ -1662,6 +1662,8 @@ static void mptcp_set_nospace(struct sock *sk)
set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
}
+static int mptcp_disconnect(struct sock *sk, int flags);
+
static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
size_t len, int *copied_syn)
{
@@ -1672,9 +1674,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
- msk->is_sendmsg = 1;
+ msk->fastopening = 1;
ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
- msk->is_sendmsg = 0;
+ msk->fastopening = 0;
msg->msg_flags = saved_flags;
release_sock(ssk);
@@ -1688,6 +1690,8 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
*/
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
*copied_syn = 0;
+ } else if (ret && ret != -EINPROGRESS) {
+ mptcp_disconnect(sk, 0);
}
return ret;
@@ -2353,7 +2357,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN) {
tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(ssk);
+ mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
@@ -2893,6 +2897,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false;
+ int subflows_alive = 0;
sk->sk_shutdown = SHUTDOWN_MASK;
@@ -2918,6 +2923,8 @@ cleanup:
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ subflows_alive += ssk->sk_state != TCP_CLOSE;
+
/* since the close timeout takes precedence on the fail one,
* cancel the latter
*/
@@ -2933,6 +2940,12 @@ cleanup:
}
sock_orphan(sk);
+ /* all the subflows are closed, only timeout can change the msk
+ * state, let's not keep resources busy for no reasons
+ */
+ if (subflows_alive == 0)
+ inet_sk_state_store(sk, TCP_CLOSE);
+
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (msk->token)
@@ -2989,6 +3002,14 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ /* We are on the fastopen error path. We can't call straight into the
+ * subflows cleanup code due to lock nesting (we are already under
+ * msk->firstsocket lock). Do nothing and leave the cleanup to the
+ * caller.
+ */
+ if (msk->fastopening)
+ return 0;
+
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3532,7 +3553,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->is_sendmsg)
+ if (msk->fastopening)
err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
else
err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 955fb3d88eb3..601469249da8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -295,7 +295,7 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1,
- is_sendmsg:1;
+ fastopening:1;
int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -628,7 +628,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
-void mptcp_subflow_queue_clean(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
@@ -641,7 +641,8 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
-int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
+int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
+ struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
@@ -776,6 +777,9 @@ int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
bool require_family,
struct mptcp_pm_addr_entry *entry);
+bool mptcp_pm_addr_families_match(const struct sock *sk,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *rem);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index d4b1e6ec1b36..7f2c3727ab23 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -760,14 +760,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct sock *sk = (struct sock *)msk;
struct socket *sock;
+ int ret = -EINVAL;
/* Limit to first subflow, before the connection establishment */
+ lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
if (!sock)
- return -EINVAL;
+ goto unlock;
- return tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+ ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+
+unlock:
+ release_sock(sk);
+ return ret;
}
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d1d32a66ae3f..32904c76c6a1 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1399,6 +1399,7 @@ void __mptcp_error_report(struct sock *sk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int err = sock_error(ssk);
+ int ssk_state;
if (!err)
continue;
@@ -1409,7 +1410,14 @@ void __mptcp_error_report(struct sock *sk)
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
continue;
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
sk->sk_err = -err;
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@@ -1547,7 +1555,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (!mptcp_is_fully_established(sk))
goto err_out;
- err = mptcp_subflow_create_socket(sk, &sf);
+ err = mptcp_subflow_create_socket(sk, loc->family, &sf);
if (err)
goto err_out;
@@ -1660,7 +1668,9 @@ static void mptcp_subflow_ops_undo_override(struct sock *ssk)
#endif
ssk->sk_prot = &tcp_prot;
}
-int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
+
+int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
+ struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
struct net *net = sock_net(sk);
@@ -1673,12 +1683,11 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
if (unlikely(!sk->sk_socket))
return -EINVAL;
- err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
- &sf);
+ err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
if (err)
return err;
- lock_sock(sf->sk);
+ lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
@@ -1791,7 +1800,7 @@ static void subflow_state_change(struct sock *sk)
}
}
-void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
struct mptcp_sock *msk, *next, *head = NULL;
@@ -1840,8 +1849,23 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk)
do_cancel_work = __mptcp_close(sk, 0);
release_sock(sk);
- if (do_cancel_work)
+ if (do_cancel_work) {
+ /* lockdep will report a false positive ABBA deadlock
+ * between cancel_work_sync and the listener socket.
+ * The involved locks belong to different sockets WRT
+ * the existing AB chain.
+ * Using a per socket key is problematic as key
+ * deregistration requires process context and must be
+ * performed at socket disposal time, in atomic
+ * context.
+ * Just tell lockdep to consider the listener socket
+ * released here.
+ */
+ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
mptcp_cancel_work(sk);
+ mutex_acquire(&listener_sk->sk_lock.dep_map,
+ SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+ }
sock_put(sk);
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index a8ce04a4bb72..e4fa00abde6a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
- hosts = 2 << (32 - netmask - 1);
- elements = 2 << (netmask - mask_bits - 1);
+ hosts = 2U << (32 - netmask - 1);
+ elements = 2UL << (netmask - mask_bits - 1);
}
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e7ba5b6dd2b7..46ebee9400da 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
retried = true;
- } while (ret == -EAGAIN &&
- set->variant->resize &&
- (ret = set->variant->resize(set, retried)) == 0);
+ } while (ret == -ERANGE ||
+ (ret == -EAGAIN &&
+ set->variant->resize &&
+ (ret = set->variant->resize(set, retried)) == 0));
if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
return 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e30513cefd90..c9f4e3859663 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -100,11 +100,11 @@ static int
hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ip4 *h = set->data;
+ struct hash_ip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, hosts;
+ u32 ip = 0, ip_to = 0, hosts, i = 0;
int ret = 0;
if (tb[IPSET_ATTR_LINENO])
@@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
- /* 64bit division is not allowed on 32bit */
- if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to;) {
+ for (; ip <= ip_to; i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 153de3457423..a22ec1a6f6ec 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -97,11 +97,11 @@ static int
hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipmark4 *h = set->data;
+ struct hash_ipmark4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0;
+ u32 ip, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
- if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to; ip++, i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipmark4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 2ffbd0b78a8c..e977b5a9c48d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -112,11 +112,11 @@ static int
hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipport4 *h = set->data;
+ struct hash_ipport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 334fb1ad0e86..39a01934b153 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -108,11 +108,11 @@ static int
hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportip4 *h = set->data;
+ struct hash_ipportip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7df94f437f60..5c6de605a9fb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -160,12 +160,12 @@ static int
hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportnet4 *h = set->data;
+ struct hash_ipportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip2 = htonl(ip2);
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
e.cidr = cidr - 1;
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1422739d9aa2..ce0a9ce5a91f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -136,11 +136,11 @@ static int
hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_net4 *h = set->data;
+ struct hash_net4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ipn, n = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_net4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 9810f5bf63f5..031073286236 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ipn, n = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netiface4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index cdfb78c6e0d3..8fbe649c9dd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -166,13 +166,12 @@ static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netnet4 *h = set->data;
+ struct hash_netnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
- u64 n = 0, m = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
- n++;
- } while (ipn++ < ip_to);
- ipn = ip2_from;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
- m++;
- } while (ipn++ < ip2_to);
-
- if (n*m > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
@@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip[0] = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
do {
+ i++;
e.ip[1] = htonl(ip2);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netnet4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09cf72eb37f8..d1a0628df4ef 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -154,12 +154,11 @@ static int
hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netport4 *h = set->data;
+ struct hash_netport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
- u64 n = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 19bcdb3141f6..005a7ce87217 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
+static u32
+hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
+{
+ if (from == 0 && to == UINT_MAX) {
+ *cidr = 0;
+ return to;
+ }
+ return ip_set_range_to_cidr(from, to, cidr);
+}
+
static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netportnet4 *h = set->data;
+ struct hash_netportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
- u64 n = 0, m = 0;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
int ret;
@@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
- n++;
- } while (ipn++ < ip_to);
- ipn = ip2_from;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
- m++;
- } while (ipn++ < ip2_to);
-
- if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
@@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
do {
e.ip[0] = htonl(ip);
- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip[1] = htonl(ip2);
- ip2 = ip_set_range_to_cidr(ip2, ip2_to,
- &e.cidr[1]);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
+ ip2 = hash_netportnet4_range_to_cidr(ip2,
+ ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 99323fb12d0f..ccef340be575 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -141,6 +141,7 @@ unsigned int nf_confirm(void *priv,
struct nf_conn *ct;
bool seqadj_needed;
__be16 frag_off;
+ int start;
u8 pnum;
ct = nf_ct_get(skb, &ctinfo);
@@ -163,9 +164,11 @@ unsigned int nf_confirm(void *priv,
break;
case NFPROTO_IPV6:
pnum = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
- if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+ start = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
+ if (start < 0 || (frag_off & htons(~0x7)) != 0)
return nf_conntrack_confirm(skb);
+
+ protoff = start;
break;
default:
return nf_conntrack_confirm(skb);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index d88b92a8ffca..011d414038ea 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -27,22 +27,16 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR
-
- And so for me for SCTP :D -Kiran */
-
static const char *const sctp_conntrack_names[] = {
- "NONE",
- "CLOSED",
- "COOKIE_WAIT",
- "COOKIE_ECHOED",
- "ESTABLISHED",
- "SHUTDOWN_SENT",
- "SHUTDOWN_RECD",
- "SHUTDOWN_ACK_SENT",
- "HEARTBEAT_SENT",
- "HEARTBEAT_ACKED",
+ [SCTP_CONNTRACK_NONE] = "NONE",
+ [SCTP_CONNTRACK_CLOSED] = "CLOSED",
+ [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT",
+ [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED",
+ [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED",
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT",
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD",
+ [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT",
+ [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT",
};
#define SECS * HZ
@@ -54,13 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_CLOSED] = 10 SECS,
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
- [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
+ [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
- [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
- [SCTP_CONNTRACK_DATA_SENT] = 30 SECS,
};
#define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1
@@ -74,8 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
-#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
-#define sDS SCTP_CONNTRACK_DATA_SENT
#define sIV SCTP_CONNTRACK_MAX
/*
@@ -98,10 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
-HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK/DATA/SACK in the direction
- opposite to that of the HEARTBEAT/DATA chunk. Secondary connection
- is established.
-DATA_SENT - We have seen a DATA/SACK in a new flow.
*/
/* TODO
@@ -115,38 +101,36 @@ cookie echoed to closed.
*/
/* SCTP conntrack state transitions */
-static const u8 sctp_conntracks[2][12][SCTP_CONNTRACK_MAX] = {
+static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */
-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA, sCW},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS, sCL},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA, sCL},
-/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS},
-/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS},
-/* data/sack */ {sDS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
+/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
},
{
/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL, sIV},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR, sIV},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA, sIV},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* Can't come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA, sIV},
-/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sHA},
-/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA},
-/* data/sack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA},
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
+/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES},
}
};
@@ -158,6 +142,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
}
#endif
+/* do_basic_checks ensures sch->length > 0, do not use before */
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
(offset) < (skb)->len && \
@@ -258,11 +243,6 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
pr_debug("SCTP_CID_HEARTBEAT_ACK");
i = 10;
break;
- case SCTP_CID_DATA:
- case SCTP_CID_SACK:
- pr_debug("SCTP_CID_DATA/SACK");
- i = 11;
- break;
default:
/* Other chunks like DATA or SACK do not change the state */
pr_debug("Unknown chunk type, Will stay in %s\n",
@@ -316,9 +296,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
ih->init_tag);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
- } else if (sch->type == SCTP_CID_HEARTBEAT ||
- sch->type == SCTP_CID_DATA ||
- sch->type == SCTP_CID_SACK) {
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
@@ -404,19 +382,19 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
if (!sctp_new(ct, skb, sh, dataoff))
return -NF_ACCEPT;
- } else {
- /* Check the verification tag (Sec 8.5) */
- if (!test_bit(SCTP_CID_INIT, map) &&
- !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
- !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
- !test_bit(SCTP_CID_ABORT, map) &&
- !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
- !test_bit(SCTP_CID_HEARTBEAT, map) &&
- !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
- sh->vtag != ct->proto.sctp.vtag[dir]) {
- pr_debug("Verification tag check failed\n");
- goto out;
- }
+ }
+
+ /* Check the verification tag (Sec 8.5) */
+ if (!test_bit(SCTP_CID_INIT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
+ !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
+ !test_bit(SCTP_CID_ABORT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) {
+ pr_debug("Verification tag check failed\n");
+ goto out;
}
old_state = new_state = SCTP_CONNTRACK_NONE;
@@ -424,22 +402,29 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
- /* Sec 8.5.1 (A) */
+ /* (A) vtag MUST be zero */
if (sh->vtag != 0)
goto out_unlock;
} else if (sch->type == SCTP_CID_ABORT) {
- /* Sec 8.5.1 (B) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir])
+ /* (B) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- /* Sec 8.5.1 (C) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir] &&
- sch->flags & SCTP_CHUNK_FLAG_T)
+ /* (C) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
- /* Sec 8.5.1 (D) */
+ /* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
@@ -476,11 +461,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
} else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) {
ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED;
}
- } else if (sch->type == SCTP_CID_DATA || sch->type == SCTP_CID_SACK) {
- if (ct->proto.sctp.vtag[dir] == 0) {
- pr_debug("Setting vtag %x for dir %d\n", sh->vtag, dir);
- ct->proto.sctp.vtag[dir] = sh->vtag;
- }
}
old_state = ct->proto.sctp.state;
@@ -518,8 +498,12 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
}
ct->proto.sctp.state = new_state;
- if (old_state != new_state)
+ if (old_state != new_state) {
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+ if (new_state == SCTP_CONNTRACK_ESTABLISHED &&
+ !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
+ }
}
spin_unlock_bh(&ct->lock);
@@ -533,14 +517,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
- if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
- dir == IP_CT_DIR_REPLY &&
- new_state == SCTP_CONNTRACK_ESTABLISHED) {
- pr_debug("Setting assured bit\n");
- set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_ASSURED, ct);
- }
-
return NF_ACCEPT;
out_unlock:
@@ -701,7 +677,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
- [CTA_TIMEOUT_SCTP_DATA_SENT] = { .type = NLA_U32 },
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 656631083177..3ac1af6f59fc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1068,6 +1068,13 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_flags |=
IP_CT_EXP_CHALLENGE_ACK;
}
+
+ /* possible challenge ack reply to syn */
+ if (old_state == TCP_CONNTRACK_SYN_SENT &&
+ index == TCP_ACK_SET &&
+ dir == IP_CT_DIR_REPLY)
+ ct->proto.tcp.last_ack = ntohl(th->ack_seq);
+
spin_unlock_bh(&ct->lock);
nf_ct_l4proto_log_invalid(skb, ct, state,
"packet (index %d) in dir %d ignored, state %s",
@@ -1193,6 +1200,14 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
* segments we ignored. */
goto in_window;
}
+
+ /* Reset in response to a challenge-ack we let through earlier */
+ if (old_state == TCP_CONNTRACK_SYN_SENT &&
+ ct->proto.tcp.last_index == TCP_ACK_SET &&
+ ct->proto.tcp.last_dir == IP_CT_DIR_REPLY &&
+ ntohl(th->seq) == ct->proto.tcp.last_ack)
+ goto in_window;
+
break;
default:
/* Keep compilers happy. */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0250725e38a4..460294bd4b60 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -601,8 +601,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED,
- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT,
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
@@ -887,18 +885,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = {
- .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT] = {
- .procname = "nf_conntrack_sctp_timeout_data_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
@@ -1042,8 +1028,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
XASSIGN(SHUTDOWN_RECD, sn);
XASSIGN(SHUTDOWN_ACK_SENT, sn);
XASSIGN(HEARTBEAT_SENT, sn);
- XASSIGN(HEARTBEAT_ACKED, sn);
- XASSIGN(DATA_SENT, sn);
#undef XASSIGN
#endif
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 832b881f7c17..8c09e4d12ac1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
- struct nft_set *set)
+static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set,
+ const struct nft_set_desc *desc)
{
struct nft_trans *trans;
@@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
if (trans == NULL)
return -ENOMEM;
- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
+ if (desc) {
+ nft_trans_set_update(trans) = true;
+ nft_trans_set_gc_int(trans) = desc->gc_int;
+ nft_trans_set_timeout(trans) = desc->timeout;
+ }
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ return __nft_trans_set_add(ctx, msg_type, set, NULL);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -3780,8 +3792,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
static const struct nft_set_ops *
nft_select_set_ops(const struct nft_ctx *ctx,
const struct nlattr * const nla[],
- const struct nft_set_desc *desc,
- enum nft_set_policies policy)
+ const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
@@ -3810,7 +3821,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
if (!ops->estimate(desc, flags, &est))
continue;
- switch (policy) {
+ switch (desc->policy) {
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
@@ -4045,8 +4056,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
const struct nft_set *set, u16 event, u16 flags)
{
- struct nlmsghdr *nlh;
+ u64 timeout = READ_ONCE(set->timeout);
+ u32 gc_int = READ_ONCE(set->gc_int);
u32 portid = ctx->portid;
+ struct nlmsghdr *nlh;
struct nlattr *nest;
u32 seq = ctx->seq;
int i;
@@ -4082,13 +4095,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
goto nla_put_failure;
- if (set->timeout &&
+ if (timeout &&
nla_put_be64(skb, NFTA_SET_TIMEOUT,
- nf_jiffies64_to_msecs(set->timeout),
+ nf_jiffies64_to_msecs(timeout),
NFTA_SET_PAD))
goto nla_put_failure;
- if (set->gc_int &&
- nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ if (gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
goto nla_put_failure;
if (set->policy != NFT_SET_POL_PERFORMANCE) {
@@ -4389,15 +4402,94 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
return err;
}
+static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr * const *nla,
+ struct nft_expr **exprs, int *num_exprs,
+ u32 flags)
+{
+ struct nft_expr *expr;
+ int err, i;
+
+ if (nla[NFTA_SET_EXPR]) {
+ expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+ goto err_set_expr_alloc;
+ }
+ exprs[0] = expr;
+ (*num_exprs)++;
+ } else if (nla[NFTA_SET_EXPRESSIONS]) {
+ struct nlattr *tmp;
+ int left;
+
+ if (!(flags & NFT_SET_EXPR)) {
+ err = -EINVAL;
+ goto err_set_expr_alloc;
+ }
+ i = 0;
+ nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+ if (i == NFT_SET_EXPR_MAX) {
+ err = -E2BIG;
+ goto err_set_expr_alloc;
+ }
+ if (nla_type(tmp) != NFTA_LIST_ELEM) {
+ err = -EINVAL;
+ goto err_set_expr_alloc;
+ }
+ expr = nft_set_elem_expr_alloc(ctx, set, tmp);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+ goto err_set_expr_alloc;
+ }
+ exprs[i++] = expr;
+ (*num_exprs)++;
+ }
+ }
+
+ return 0;
+
+err_set_expr_alloc:
+ for (i = 0; i < *num_exprs; i++)
+ nft_expr_destroy(ctx, exprs[i]);
+
+ return err;
+}
+
+static bool nft_set_is_same(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ struct nft_expr *exprs[], u32 num_exprs, u32 flags)
+{
+ int i;
+
+ if (set->ktype != desc->ktype ||
+ set->dtype != desc->dtype ||
+ set->flags != flags ||
+ set->klen != desc->klen ||
+ set->dlen != desc->dlen ||
+ set->field_count != desc->field_count ||
+ set->num_exprs != num_exprs)
+ return false;
+
+ for (i = 0; i < desc->field_count; i++) {
+ if (set->field_len[i] != desc->field_len[i])
+ return false;
+ }
+
+ for (i = 0; i < num_exprs; i++) {
+ if (set->exprs[i]->ops != exprs[i]->ops)
+ return false;
+ }
+
+ return true;
+}
+
static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- u32 ktype, dtype, flags, policy, gc_int, objtype;
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
const struct nft_set_ops *ops;
- struct nft_expr *expr = NULL;
struct net *net = info->net;
struct nft_set_desc desc;
struct nft_table *table;
@@ -4405,10 +4497,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
struct nft_set *set;
struct nft_ctx ctx;
size_t alloc_size;
- u64 timeout;
+ int num_exprs = 0;
char *name;
int err, i;
u16 udlen;
+ u32 flags;
u64 size;
if (nla[NFTA_SET_TABLE] == NULL ||
@@ -4419,10 +4512,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
memset(&desc, 0, sizeof(desc));
- ktype = NFT_DATA_VALUE;
+ desc.ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
- ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
- if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+ desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+ if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
return -EINVAL;
}
@@ -4447,17 +4540,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return -EOPNOTSUPP;
}
- dtype = 0;
+ desc.dtype = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
- dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
- if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
- dtype != NFT_DATA_VERDICT)
+ desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+ if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+ desc.dtype != NFT_DATA_VERDICT)
return -EINVAL;
- if (dtype != NFT_DATA_VERDICT) {
+ if (desc.dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
@@ -4472,34 +4565,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_OBJECT))
return -EINVAL;
- objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
- if (objtype == NFT_OBJECT_UNSPEC ||
- objtype > NFT_OBJECT_MAX)
+ desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+ if (desc.objtype == NFT_OBJECT_UNSPEC ||
+ desc.objtype > NFT_OBJECT_MAX)
return -EOPNOTSUPP;
} else if (flags & NFT_SET_OBJECT)
return -EINVAL;
else
- objtype = NFT_OBJECT_UNSPEC;
+ desc.objtype = NFT_OBJECT_UNSPEC;
- timeout = 0;
+ desc.timeout = 0;
if (nla[NFTA_SET_TIMEOUT] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
+ err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
}
- gc_int = 0;
+ desc.gc_int = 0;
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
- policy = NFT_SET_POL_PERFORMANCE;
+ desc.policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
- policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+ desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
if (nla[NFTA_SET_DESC] != NULL) {
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
@@ -4531,6 +4624,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(set);
}
} else {
+ struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {};
+
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return -EEXIST;
@@ -4538,13 +4633,29 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return 0;
+ err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
+ if (err < 0)
+ return err;
+
+ err = 0;
+ if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+ err = -EEXIST;
+ }
+
+ for (i = 0; i < num_exprs; i++)
+ nft_expr_destroy(&ctx, exprs[i]);
+
+ if (err < 0)
+ return err;
+
+ return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
}
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(&ctx, nla, &desc, policy);
+ ops = nft_select_set_ops(&ctx, nla, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -4584,18 +4695,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
- set->ktype = ktype;
+ set->ktype = desc.ktype;
set->klen = desc.klen;
- set->dtype = dtype;
- set->objtype = objtype;
+ set->dtype = desc.dtype;
+ set->objtype = desc.objtype;
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
- set->policy = policy;
+ set->policy = desc.policy;
set->udlen = udlen;
set->udata = udata;
- set->timeout = timeout;
- set->gc_int = gc_int;
+ set->timeout = desc.timeout;
+ set->gc_int = desc.gc_int;
set->field_count = desc.field_count;
for (i = 0; i < desc.field_count; i++)
@@ -4605,43 +4716,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (err < 0)
goto err_set_init;
- if (nla[NFTA_SET_EXPR]) {
- expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
- if (IS_ERR(expr)) {
- err = PTR_ERR(expr);
- goto err_set_expr_alloc;
- }
- set->exprs[0] = expr;
- set->num_exprs++;
- } else if (nla[NFTA_SET_EXPRESSIONS]) {
- struct nft_expr *expr;
- struct nlattr *tmp;
- int left;
-
- if (!(flags & NFT_SET_EXPR)) {
- err = -EINVAL;
- goto err_set_expr_alloc;
- }
- i = 0;
- nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
- if (i == NFT_SET_EXPR_MAX) {
- err = -E2BIG;
- goto err_set_expr_alloc;
- }
- if (nla_type(tmp) != NFTA_LIST_ELEM) {
- err = -EINVAL;
- goto err_set_expr_alloc;
- }
- expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
- if (IS_ERR(expr)) {
- err = PTR_ERR(expr);
- goto err_set_expr_alloc;
- }
- set->exprs[i++] = expr;
- set->num_exprs++;
- }
- }
+ err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags);
+ if (err < 0)
+ goto err_set_destroy;
+ set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
@@ -4655,7 +4734,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
-
+err_set_destroy:
ops->destroy(set);
err_set_init:
kfree(set->name);
@@ -6008,7 +6087,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
} else if (set->flags & NFT_SET_TIMEOUT &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) {
- timeout = set->timeout;
+ timeout = READ_ONCE(set->timeout);
}
expiration = 0;
@@ -6109,7 +6188,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_parse_key_end;
- if (timeout != set->timeout) {
+ if (timeout != READ_ONCE(set->timeout)) {
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
if (err < 0)
goto err_parse_key_end;
@@ -9031,14 +9110,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
- nft_clear(net, nft_trans_set(trans));
- /* This avoids hitting -EBUSY when deleting the table
- * from the transaction.
- */
- if (nft_set_is_anonymous(nft_trans_set(trans)) &&
- !list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ if (nft_trans_set_update(trans)) {
+ struct nft_set *set = nft_trans_set(trans);
+ WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
+ WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
+ } else {
+ nft_clear(net, nft_trans_set(trans));
+ /* This avoids hitting -EBUSY when deleting the table
+ * from the transaction.
+ */
+ if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !list_empty(&nft_trans_set(trans)->bindings))
+ trans->ctx.table->use--;
+ }
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
nft_trans_destroy(trans);
@@ -9260,6 +9345,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
+ if (nft_trans_set_update(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
trans->ctx.table->use--;
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 17b418a5a593..3a3c7746e88f 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -63,7 +63,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
return false;
if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
- ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen;
+ ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 7325bee7d144..19ea4d3c3553 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
return !nft_rbtree_interval_end(rbe);
}
-static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
- const struct nft_rbtree_elem *interval)
+static int nft_rbtree_cmp(const struct nft_set *set,
+ const struct nft_rbtree_elem *e1,
+ const struct nft_rbtree_elem *e2)
{
- return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+ return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
+ set->klen);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
- const void *this;
int d;
parent = rcu_dereference_raw(priv->root.rb_node);
@@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- this = nft_set_ext_key(&rbe->ext);
- d = memcmp(this, key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
- nft_rbtree_equal(set, this, interval) &&
+ !nft_rbtree_cmp(set, rbe, interval) &&
nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_start(interval))
continue;
@@ -215,154 +215,216 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static int nft_rbtree_gc_elem(const struct nft_set *__set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set *set = (struct nft_set *)__set;
+ struct rb_node *prev = rb_prev(&rbe->node);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_set_gc_batch *gcb;
+
+ gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+ if (!gcb)
+ return -ENOMEM;
+
+ /* search for expired end interval coming before this element. */
+ do {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ if (nft_rbtree_interval_end(rbe_prev))
+ break;
+
+ prev = rb_prev(prev);
+ } while (prev != NULL);
+
+ rb_erase(&rbe_prev->node, &priv->root);
+ rb_erase(&rbe->node, &priv->root);
+ atomic_sub(2, &set->nelems);
+
+ nft_set_gc_batch_add(gcb, rbe);
+ nft_set_gc_batch_complete(gcb);
+
+ return 0;
+}
+
+static bool nft_rbtree_update_first(const struct nft_set *set,
+ struct nft_rbtree_elem *rbe,
+ struct rb_node *first)
+{
+ struct nft_rbtree_elem *first_elem;
+
+ first_elem = rb_entry(first, struct nft_rbtree_elem, node);
+ /* this element is closest to where the new element is to be inserted:
+ * update the first element for the node list path.
+ */
+ if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
+ return true;
+
+ return false;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
{
- bool overlap = false, dup_end_left = false, dup_end_right = false;
+ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+ struct rb_node *node, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
- struct nft_rbtree_elem *rbe;
- struct rb_node *parent, **p;
- int d;
+ int d, err;
- /* Detect overlaps as we descend the tree. Set the flag in these cases:
- *
- * a1. _ _ __>| ?_ _ __| (insert end before existing end)
- * a2. _ _ ___| ?_ _ _>| (insert end after existing end)
- * a3. _ _ ___? >|_ _ __| (insert start before existing end)
- *
- * and clear it later on, as we eventually reach the points indicated by
- * '?' above, in the cases described below. We'll always meet these
- * later, locally, due to tree ordering, and overlaps for the intervals
- * that are the closest together are always evaluated last.
- *
- * b1. _ _ __>| !_ _ __| (insert end before existing start)
- * b2. _ _ ___| !_ _ _>| (insert end after existing start)
- * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf)
- * '--' no nodes falling in this range
- * b4. >|_ _ ! (insert start before existing start)
- *
- * Case a3. resolves to b3.:
- * - if the inserted start element is the leftmost, because the '0'
- * element in the tree serves as end element
- * - otherwise, if an existing end is found immediately to the left. If
- * there are existing nodes in between, we need to further descend the
- * tree before we can conclude the new start isn't causing an overlap
- *
- * or to b4., which, preceded by a3., means we already traversed one or
- * more existing intervals entirely, from the right.
- *
- * For a new, rightmost pair of elements, we'll hit cases b3. and b2.,
- * in that order.
- *
- * The flag is also cleared in two special cases:
- *
- * b5. |__ _ _!|<_ _ _ (insert start right before existing end)
- * b6. |__ _ >|!__ _ _ (insert end right after existing start)
- *
- * which always happen as last step and imply that no further
- * overlapping is possible.
- *
- * Another special case comes from the fact that start elements matching
- * an already existing start element are allowed: insertion is not
- * performed but we return -EEXIST in that case, and the error will be
- * cleared by the caller if NLM_F_EXCL is not present in the request.
- * This way, request for insertion of an exact overlap isn't reported as
- * error to userspace if not desired.
- *
- * However, if the existing start matches a pre-existing start, but the
- * end element doesn't match the corresponding pre-existing end element,
- * we need to report a partial overlap. This is a local condition that
- * can be noticed without need for a tracking flag, by checking for a
- * local duplicated end for a corresponding start, from left and right,
- * separately.
+ /* Descend the tree to search for an existing element greater than the
+ * key value to insert that is greater than the new element. This is the
+ * first element to walk the ordered elements to find possible overlap.
*/
-
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = memcmp(nft_set_ext_key(&rbe->ext),
- nft_set_ext_key(&new->ext),
- set->klen);
+ d = nft_rbtree_cmp(set, rbe, new);
+
if (d < 0) {
p = &parent->rb_left;
-
- if (nft_rbtree_interval_start(new)) {
- if (nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext) && !*p)
- overlap = false;
- } else {
- if (dup_end_left && !*p)
- return -ENOTEMPTY;
-
- overlap = nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask) &&
- !nft_set_elem_expired(&rbe->ext);
-
- if (overlap) {
- dup_end_right = true;
- continue;
- }
- }
} else if (d > 0) {
- p = &parent->rb_right;
+ if (!first ||
+ nft_rbtree_update_first(set, rbe, first))
+ first = &rbe->node;
- if (nft_rbtree_interval_end(new)) {
- if (dup_end_right && !*p)
- return -ENOTEMPTY;
-
- overlap = nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask) &&
- !nft_set_elem_expired(&rbe->ext);
-
- if (overlap) {
- dup_end_left = true;
- continue;
- }
- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext)) {
- overlap = nft_rbtree_interval_end(rbe);
- }
+ p = &parent->rb_right;
} else {
- if (nft_rbtree_interval_end(rbe) &&
- nft_rbtree_interval_start(new)) {
+ if (nft_rbtree_interval_end(rbe))
p = &parent->rb_left;
-
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext))
- overlap = false;
- } else if (nft_rbtree_interval_start(rbe) &&
- nft_rbtree_interval_end(new)) {
+ else
p = &parent->rb_right;
+ }
+ }
+
+ if (!first)
+ first = rb_first(&priv->root);
+
+ /* Detect overlap by going through the list of valid tree nodes.
+ * Values stored in the tree are in reversed order, starting from
+ * highest to lowest value.
+ */
+ for (node = first; node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext))
- overlap = false;
- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext)) {
- *ext = &rbe->ext;
- return -EEXIST;
- } else {
- overlap = false;
- if (nft_rbtree_interval_end(rbe))
- p = &parent->rb_left;
- else
- p = &parent->rb_right;
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* perform garbage collection to avoid bogus overlap reports. */
+ if (nft_set_elem_expired(&rbe->ext)) {
+ err = nft_rbtree_gc_elem(set, priv, rbe);
+ if (err < 0)
+ return err;
+
+ continue;
+ }
+
+ d = nft_rbtree_cmp(set, rbe, new);
+ if (d == 0) {
+ /* Matching end element: no need to look for an
+ * overlapping greater or equal element.
+ */
+ if (nft_rbtree_interval_end(rbe)) {
+ rbe_le = rbe;
+ break;
+ }
+
+ /* first element that is greater or equal to key value. */
+ if (!rbe_ge) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* this is a closer more or equal element, update it. */
+ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* element is equal to key value, make sure flags are
+ * the same, an existing more or equal start element
+ * must not be replaced by more or equal end element.
+ */
+ if ((nft_rbtree_interval_start(new) &&
+ nft_rbtree_interval_start(rbe_ge)) ||
+ (nft_rbtree_interval_end(new) &&
+ nft_rbtree_interval_end(rbe_ge))) {
+ rbe_ge = rbe;
+ continue;
}
+ } else if (d > 0) {
+ /* annotate element greater than the new element. */
+ rbe_ge = rbe;
+ continue;
+ } else if (d < 0) {
+ /* annotate element less than the new element. */
+ rbe_le = rbe;
+ break;
}
+ }
- dup_end_left = dup_end_right = false;
+ /* - new start element matching existing start element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
+ nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
+ *ext = &rbe_ge->ext;
+ return -EEXIST;
}
- if (overlap)
+ /* - new end element matching existing end element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
+ nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
+ *ext = &rbe_le->ext;
+ return -EEXIST;
+ }
+
+ /* - new start element with existing closest, less or equal key value
+ * being a start element: partial overlap, reported as -ENOTEMPTY.
+ * Anonymous sets allow for two consecutive start element since they
+ * are constant, skip them to avoid bogus overlap reports.
+ */
+ if (!nft_set_is_anonymous(set) && rbe_le &&
+ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
+ return -ENOTEMPTY;
+
+ /* - new end element with existing closest, less or equal key value
+ * being a end element: partial overlap, reported as -ENOTEMPTY.
+ */
+ if (rbe_le &&
+ nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
return -ENOTEMPTY;
+ /* - new end element with existing closest, greater or equal key value
+ * being an end element: partial overlap, reported as -ENOTEMPTY
+ */
+ if (rbe_ge &&
+ nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
+ return -ENOTEMPTY;
+
+ /* Accepted element: pick insertion point depending on key value */
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ d = nft_rbtree_cmp(set, rbe, new);
+
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ else if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@@ -501,23 +563,37 @@ static void nft_rbtree_gc(struct work_struct *work)
struct nft_rbtree *priv;
struct rb_node *node;
struct nft_set *set;
+ struct net *net;
+ u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ genmask = nft_genmask_cur(net);
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* elements are reversed in the rbtree for historical reasons,
+ * from highest to lowest value, that is why end element is
+ * always visited before the start element.
+ */
if (nft_rbtree_interval_end(rbe)) {
rbe_end = rbe;
continue;
}
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext))
+
+ if (nft_set_elem_mark_busy(&rbe->ext)) {
+ rbe_end = NULL;
continue;
+ }
if (rbe_prev) {
rb_erase(&rbe_prev->node, &priv->root);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bca2a470ccad..c64277659753 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -580,7 +580,9 @@ static int netlink_insert(struct sock *sk, u32 portid)
if (nlk_sk(sk)->bound)
goto err;
- nlk_sk(sk)->portid = portid;
+ /* portid can be read locklessly from netlink_getname(). */
+ WRITE_ONCE(nlk_sk(sk)->portid, portid);
+
sock_hold(sk);
err = __netlink_insert(table, sk);
@@ -1096,9 +1098,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
if (addr->sa_family == AF_UNSPEC) {
- sk->sk_state = NETLINK_UNCONNECTED;
- nlk->dst_portid = 0;
- nlk->dst_group = 0;
+ /* paired with READ_ONCE() in netlink_getsockbyportid() */
+ WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED);
+ /* dst_portid and dst_group can be read locklessly */
+ WRITE_ONCE(nlk->dst_portid, 0);
+ WRITE_ONCE(nlk->dst_group, 0);
return 0;
}
if (addr->sa_family != AF_NETLINK)
@@ -1119,9 +1123,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
err = netlink_autobind(sock);
if (err == 0) {
- sk->sk_state = NETLINK_CONNECTED;
- nlk->dst_portid = nladdr->nl_pid;
- nlk->dst_group = ffs(nladdr->nl_groups);
+ /* paired with READ_ONCE() in netlink_getsockbyportid() */
+ WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED);
+ /* dst_portid and dst_group can be read locklessly */
+ WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid);
+ WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups));
}
return err;
@@ -1138,10 +1144,12 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_pad = 0;
if (peer) {
- nladdr->nl_pid = nlk->dst_portid;
- nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
+ /* Paired with WRITE_ONCE() in netlink_connect() */
+ nladdr->nl_pid = READ_ONCE(nlk->dst_portid);
+ nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group));
} else {
- nladdr->nl_pid = nlk->portid;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ nladdr->nl_pid = READ_ONCE(nlk->portid);
netlink_lock_table();
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
netlink_unlock_table();
@@ -1168,8 +1176,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
/* Don't bother queuing skb if kernel socket has no input function */
nlk = nlk_sk(sock);
- if (sock->sk_state == NETLINK_CONNECTED &&
- nlk->dst_portid != nlk_sk(ssk)->portid) {
+ /* dst_portid and sk_state can be changed in netlink_connect() */
+ if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED &&
+ READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) {
sock_put(sock);
return ERR_PTR(-ECONNREFUSED);
}
@@ -1886,8 +1895,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
netlink_skb_flags |= NETLINK_SKB_DST;
} else {
- dst_portid = nlk->dst_portid;
- dst_group = nlk->dst_group;
+ /* Paired with WRITE_ONCE() in netlink_connect() */
+ dst_portid = READ_ONCE(nlk->dst_portid);
+ dst_group = READ_ONCE(nlk->dst_group);
}
/* Paired with WRITE_ONCE() in netlink_insert() */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 6f7f4392cffb..5a4cb796150f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog)
struct sock *sk = sock->sk;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index a8da88db7893..4e7c968cde2d 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -121,6 +121,7 @@ static void nr_heartbeat_expiry(struct timer_list *t)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
+ sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
goto out;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 3364caabef8b..a27e1842b2a0 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -157,6 +157,7 @@ static void local_cleanup(struct nfc_llcp_local *local)
cancel_work_sync(&local->rx_work);
cancel_work_sync(&local->timeout_work);
kfree_skb(local->rx_pending);
+ local->rx_pending = NULL;
del_timer_sync(&local->sdreq_timer);
cancel_work_sync(&local->sdreq_timeout_work);
nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 9d91087b9399..1fc339084d89 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1497,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
u32 dev_idx, se_idx;
u8 *apdu;
size_t apdu_len;
+ int rc;
if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
!info->attrs[NFC_ATTR_SE_INDEX] ||
@@ -1510,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
if (!dev)
return -ENODEV;
- if (!dev->ops || !dev->ops->se_io)
- return -ENOTSUPP;
+ if (!dev->ops || !dev->ops->se_io) {
+ rc = -EOPNOTSUPP;
+ goto put_dev;
+ }
apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
- if (apdu_len == 0)
- return -EINVAL;
+ if (apdu_len == 0) {
+ rc = -EINVAL;
+ goto put_dev;
+ }
apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
- if (!apdu)
- return -EINVAL;
+ if (!apdu) {
+ rc = -EINVAL;
+ goto put_dev;
+ }
ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto put_dev;
+ }
ctx->dev_idx = dev_idx;
ctx->se_idx = se_idx;
- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
+ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
+
+put_dev:
+ nfc_put_device(dev);
+ return rc;
}
static int nfc_genl_vendor_cmd(struct sk_buff *skb,
@@ -1551,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
dev = nfc_get_device(dev_idx);
- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
+ if (!dev)
return -ENODEV;
+ if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
+ err = -ENODEV;
+ goto put_dev;
+ }
+
if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
- if (data_len == 0)
- return -EINVAL;
+ if (data_len == 0) {
+ err = -EINVAL;
+ goto put_dev;
+ }
} else {
data = NULL;
data_len = 0;
@@ -1573,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
dev->cur_cmd_info = info;
err = cmd->doit(dev, data, data_len);
dev->cur_cmd_info = NULL;
- return err;
+ goto put_dev;
}
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+
+put_dev:
+ nfc_put_device(dev);
+ return err;
}
/* message building helper */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9ca721c9fa71..fcee6012293b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1004,14 +1004,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key) {
error = -ENOMEM;
- goto err_kfree_key;
+ goto err_kfree_flow;
}
ovs_match_init(&match, key, false, &mask);
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
ovs_flow_mask_key(&new_flow->key, key, true, &mask);
@@ -1019,14 +1019,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
key, log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
/* Validate actions. */
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
+ goto err_kfree_key;
}
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
@@ -1126,10 +1126,10 @@ err_unlock_ovs:
kfree_skb(reply);
err_kfree_acts:
ovs_nla_free_flow_actions(acts);
-err_kfree_flow:
- ovs_flow_free(new_flow, false);
err_kfree_key:
kfree(key);
+err_kfree_flow:
+ ovs_flow_free(new_flow, false);
error:
return error;
}
@@ -1861,7 +1861,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
if (!vport->upcall_stats) {
err = -ENOMEM;
- goto err_destroy_portids;
+ goto err_destroy_vport;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1876,6 +1876,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
+err_destroy_vport:
+ ovs_dp_detach_port(vport);
err_destroy_portids:
kfree(rcu_dereference_raw(dp->upcall_portids));
err_unlock_and_destroy_meters:
@@ -2323,7 +2325,7 @@ restart:
vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
if (!vport->upcall_stats) {
err = -ENOMEM;
- goto exit_unlock_free;
+ goto exit_unlock_free_vport;
}
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
@@ -2343,6 +2345,8 @@ restart:
ovs_notify(&dp_vport_genl_family, reply, info);
return 0;
+exit_unlock_free_vport:
+ ovs_dp_detach_port(vport);
exit_unlock_free:
ovs_unlock();
kfree_skb(reply);
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 1990d496fcfc..e595079c2caf 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -83,7 +83,10 @@ static struct qrtr_node *node_get(unsigned int node_id)
node->id = node_id;
- radix_tree_insert(&nodes, node_id, node);
+ if (radix_tree_insert(&nodes, node_id, node)) {
+ kfree(node);
+ return NULL;
+ }
return node;
}
diff --git a/net/rds/message.c b/net/rds/message.c
index b47e4f0a1639..c19c93561227 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
spin_lock_irqsave(&q->lock, flags);
head = &q->zcookie_head;
if (!list_empty(head)) {
- info = list_entry(head, struct rds_msg_zcopy_info,
- rs_zcookie_next);
- if (info && rds_zcookie_add(info, cookie)) {
+ info = list_first_entry(head, struct rds_msg_zcopy_info,
+ rs_zcookie_next);
+ if (rds_zcookie_add(info, cookie)) {
spin_unlock_irqrestore(&q->lock, flags);
kfree(rds_info_from_znotifier(znotif));
/* caller invokes rds_wake_sk_sleep() */
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 36fefc3957d7..ca2b17f32670 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -488,6 +488,12 @@ static int rose_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
+ lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
struct rose_sock *rose = rose_sk(sk);
@@ -497,8 +503,10 @@ static int rose_listen(struct socket *sock, int backlog)
memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
+ release_sock(sk);
return 0;
}
+ release_sock(sk);
return -EOPNOTSUPP;
}
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index e76d3459d78e..ac5caf5a48e1 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -10,6 +10,7 @@ rxrpc-y := \
call_accept.o \
call_event.o \
call_object.o \
+ call_state.o \
conn_client.o \
conn_event.o \
conn_object.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7ea576f6ba4b..ebbd4a1c3f86 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -155,10 +155,10 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
if (service_id) {
write_lock(&local->services_lock);
- if (rcu_access_pointer(local->service))
+ if (local->service)
goto service_in_use;
rx->local = local;
- rcu_assign_pointer(local->service, rx);
+ local->service = rx;
write_unlock(&local->services_lock);
rx->sk.sk_state = RXRPC_SERVER_BOUND;
@@ -328,7 +328,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
mutex_unlock(&call->user_mutex);
}
- rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p", call);
return call;
}
@@ -374,13 +373,17 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* @sock: The socket the call is on
* @call: The call to check
*
- * Allow a kernel service to find out whether a call is still alive -
- * ie. whether it has completed.
+ * Allow a kernel service to find out whether a call is still alive - whether
+ * it has completed successfully and all received data has been consumed.
*/
bool rxrpc_kernel_check_life(const struct socket *sock,
const struct rxrpc_call *call)
{
- return call->state != RXRPC_CALL_COMPLETE;
+ if (!rxrpc_call_is_complete(call))
+ return true;
+ if (call->completion != RXRPC_CALL_SUCCEEDED)
+ return false;
+ return !skb_queue_empty(&call->recvmsg_queue);
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
@@ -872,9 +875,9 @@ static int rxrpc_release_sock(struct sock *sk)
sk->sk_state = RXRPC_CLOSE;
- if (rx->local && rcu_access_pointer(rx->local->service) == rx) {
+ if (rx->local && rx->local->service == rx) {
write_lock(&rx->local->services_lock);
- rcu_assign_pointer(rx->local->service, NULL);
+ rx->local->service = NULL;
write_unlock(&rx->local->services_lock);
}
@@ -957,16 +960,9 @@ static const struct net_proto_family rxrpc_family_ops = {
static int __init af_rxrpc_init(void)
{
int ret = -1;
- unsigned int tmp;
BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb));
- get_random_bytes(&tmp, sizeof(tmp));
- tmp &= 0x3fffffff;
- if (tmp == 0)
- tmp = 1;
- idr_set_cursor(&rxrpc_client_conn_ids, tmp);
-
ret = -ENOMEM;
rxrpc_call_jar = kmem_cache_create(
"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
@@ -1062,7 +1058,6 @@ static void __exit af_rxrpc_exit(void)
* are released.
*/
rcu_barrier();
- rxrpc_destroy_client_conn_ids();
destroy_workqueue(rxrpc_workqueue);
rxrpc_exit_security();
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 18092526d3c8..433060cade03 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -38,6 +38,7 @@ struct rxrpc_txbuf;
enum rxrpc_skb_mark {
RXRPC_SKB_MARK_PACKET, /* Received packet */
RXRPC_SKB_MARK_ERROR, /* Error notification */
+ RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */
RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
};
@@ -75,13 +76,7 @@ struct rxrpc_net {
bool live;
- bool kill_all_client_conns;
atomic_t nr_client_conns;
- spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
- struct mutex client_conn_discard_lock; /* Prevent multiple discarders */
- struct list_head idle_client_conns;
- struct work_struct client_conn_reaper;
- struct timer_list client_conn_reap_timer;
struct hlist_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
@@ -202,6 +197,7 @@ struct rxrpc_host_header {
* - max 48 bytes (struct sk_buff::cb)
*/
struct rxrpc_skb_priv {
+ struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
u16 offset; /* Offset of data */
u16 len; /* Length of data */
u8 flags;
@@ -262,13 +258,11 @@ struct rxrpc_security {
/* respond to a challenge */
int (*respond_to_challenge)(struct rxrpc_connection *,
- struct sk_buff *,
- u32 *);
+ struct sk_buff *);
/* verify a response */
int (*verify_response)(struct rxrpc_connection *,
- struct sk_buff *,
- u32 *);
+ struct sk_buff *);
/* clear connection security */
void (*clear)(struct rxrpc_connection *);
@@ -283,22 +277,34 @@ struct rxrpc_local {
struct rcu_head rcu;
atomic_t active_users; /* Number of users of the local endpoint */
refcount_t ref; /* Number of references to the structure */
- struct rxrpc_net *rxnet; /* The network ns in which this resides */
+ struct net *net; /* The network namespace */
+ struct rxrpc_net *rxnet; /* Our bits in the network namespace */
struct hlist_node link;
struct socket *socket; /* my UDP socket */
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
- struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
+ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
struct sk_buff_head rx_queue; /* Received packets */
+ struct list_head conn_attend_q; /* Conns requiring immediate attention */
struct list_head call_attend_q; /* Calls requiring immediate attention */
+
struct rb_root client_bundles; /* Client connection bundles by socket params */
spinlock_t client_bundles_lock; /* Lock for client_bundles */
+ bool kill_all_client_conns;
+ struct list_head idle_client_conns;
+ struct timer_list client_conn_reap_timer;
+ unsigned long client_conn_flags;
+#define RXRPC_CLIENT_CONN_REAP_TIMER 0 /* The client conn reap timer expired */
+
spinlock_t lock; /* access lock */
rwlock_t services_lock; /* lock for services list */
int debug_id; /* debug ID for printks */
bool dead;
bool service_closed; /* Service socket closed */
+ struct idr conn_ids; /* List of connection IDs */
+ struct list_head new_client_calls; /* Newly created client calls need connection */
+ spinlock_t client_call_lock; /* Lock for ->new_client_calls */
struct sockaddr_rxrpc srx; /* local address */
};
@@ -356,7 +362,6 @@ struct rxrpc_conn_proto {
struct rxrpc_conn_parameters {
struct rxrpc_local *local; /* Representation of local endpoint */
- struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
@@ -365,10 +370,21 @@ struct rxrpc_conn_parameters {
};
/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+ RXRPC_CALL_SUCCEEDED, /* - Normal termination */
+ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
+ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
+ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
+ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
+ NR__RXRPC_CALL_COMPLETIONS
+};
+
+/*
* Bits in the connection flags.
*/
enum rxrpc_conn_flag {
- RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */
RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
@@ -388,6 +404,7 @@ enum rxrpc_conn_flag {
*/
enum rxrpc_conn_event {
RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */
+ RXRPC_CONN_EV_ABORT_CALLS, /* Abort attached calls */
};
/*
@@ -395,13 +412,13 @@ enum rxrpc_conn_event {
*/
enum rxrpc_conn_proto_state {
RXRPC_CONN_UNUSED, /* Connection not yet attempted */
+ RXRPC_CONN_CLIENT_UNSECURED, /* Client connection needs security init */
RXRPC_CONN_CLIENT, /* Client connection */
RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */
RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */
RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
RXRPC_CONN_SERVICE, /* Service secured connection */
- RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */
- RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */
+ RXRPC_CONN_ABORTED, /* Conn aborted */
RXRPC_CONN__NR_STATES
};
@@ -412,17 +429,16 @@ struct rxrpc_bundle {
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
+ const struct rxrpc_security *security; /* applied security module */
refcount_t ref;
atomic_t active; /* Number of active users */
unsigned int debug_id;
u32 security_level; /* Security level selected */
u16 service_id; /* Service ID for this connection */
bool try_upgrade; /* True if the bundle is attempting upgrade */
- bool alloc_conn; /* True if someone's getting a conn */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
- short alloc_error; /* Error from last conn allocation */
- spinlock_t channel_lock;
+ unsigned short alloc_error; /* Error from last conn allocation */
struct rb_node local_node; /* Node in local->client_conns */
struct list_head waiting_calls; /* Calls waiting for channels */
unsigned long avail_chans; /* Mask of available channels */
@@ -440,6 +456,7 @@ struct rxrpc_connection {
struct rxrpc_peer *peer; /* Remote endpoint */
struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
struct key *key; /* Security details */
+ struct list_head attend_link; /* Link in local->conn_attend_q */
refcount_t ref;
atomic_t active; /* Active count for service conns */
@@ -449,7 +466,7 @@ struct rxrpc_connection {
unsigned char act_chans; /* Mask of active channels */
struct rxrpc_channel {
unsigned long final_ack_at; /* Time at which to issue final ACK */
- struct rxrpc_call __rcu *call; /* Active call */
+ struct rxrpc_call *call; /* Active call */
unsigned int call_debug_id; /* call->debug_id */
u32 call_id; /* ID of current call */
u32 call_counter; /* Call ID counter */
@@ -470,6 +487,7 @@ struct rxrpc_connection {
struct list_head link; /* link in master connection list */
struct sk_buff_head rx_queue; /* received conn-level packets */
+ struct mutex security_lock; /* Lock for security management */
const struct rxrpc_security *security; /* applied security module */
union {
struct {
@@ -483,7 +501,8 @@ struct rxrpc_connection {
unsigned long idle_timestamp; /* Time at which last became idle */
spinlock_t state_lock; /* state-change lock */
enum rxrpc_conn_proto_state state; /* current state of connection */
- u32 abort_code; /* Abort code of connection abort */
+ enum rxrpc_call_completion completion; /* Completion condition */
+ s32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
atomic_t serial; /* packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
@@ -527,7 +546,8 @@ enum rxrpc_call_flag {
RXRPC_CALL_KERNEL, /* The call was made by the kernel */
RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */
RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */
- RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */
+ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */
+ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */
};
/*
@@ -558,18 +578,6 @@ enum rxrpc_call_state {
};
/*
- * Call completion condition (state == RXRPC_CALL_COMPLETE).
- */
-enum rxrpc_call_completion {
- RXRPC_CALL_SUCCEEDED, /* - Normal termination */
- RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
- RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
- RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
- RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
- NR__RXRPC_CALL_COMPLETIONS
-};
-
-/*
* Call Tx congestion management modes.
*/
enum rxrpc_congest_mode {
@@ -587,6 +595,7 @@ enum rxrpc_congest_mode {
struct rxrpc_call {
struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
+ struct rxrpc_bundle *bundle; /* Connection bundle to use */
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_sock __rcu *socket; /* socket responsible */
@@ -609,7 +618,7 @@ struct rxrpc_call {
struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
struct list_head link; /* link in master call list */
- struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */
+ struct list_head wait_link; /* Link in local->new_client_calls */
struct hlist_node error_link; /* link in error distribution list */
struct list_head accept_link; /* Link in rx->acceptq */
struct list_head recvmsg_link; /* Link in rx->recvmsg_q */
@@ -623,10 +632,13 @@ struct rxrpc_call {
unsigned long flags;
unsigned long events;
spinlock_t notify_lock; /* Kernel notification lock */
- rwlock_t state_lock; /* lock for state transition */
- u32 abort_code; /* Local/remote abort code */
+ unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */
+ s32 send_abort; /* Abort code to be sent */
+ short send_abort_err; /* Error to be associated with the abort */
+ rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */
+ s32 abort_code; /* Local/remote abort code */
int error; /* Local error incurred */
- enum rxrpc_call_state state; /* current state of call */
+ enum rxrpc_call_state _state; /* Current state of call (needs barrier) */
enum rxrpc_call_completion completion; /* Call completion condition */
refcount_t ref;
u8 security_ix; /* Security type */
@@ -812,9 +824,11 @@ extern struct workqueue_struct *rxrpc_workqueue;
*/
int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
void rxrpc_discard_prealloc(struct rxrpc_sock *);
-int rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *,
- struct rxrpc_connection *, struct sockaddr_rxrpc *,
- struct sk_buff *);
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb);
void rxrpc_accept_incoming_calls(struct rxrpc_local *);
int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
@@ -834,7 +848,7 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call,
unsigned long now,
enum rxrpc_timer_trace why);
-void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
/*
* call_object.c
@@ -851,6 +865,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct sockaddr_rxrpc *,
struct rxrpc_call_params *, gfp_t,
unsigned int);
+void rxrpc_start_call_timer(struct rxrpc_call *call);
void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
struct sk_buff *);
void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
@@ -873,32 +888,88 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
}
/*
+ * call_state.c
+ */
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error);
+bool rxrpc_call_completed(struct rxrpc_call *call);
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u32 abort_code, int error, enum rxrpc_abort_reason why);
+void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl,
+ int error);
+
+static inline void rxrpc_set_call_state(struct rxrpc_call *call,
+ enum rxrpc_call_state state)
+{
+ /* Order write of completion info before write of ->state. */
+ smp_store_release(&call->_state, state);
+ wake_up(&call->waitq);
+}
+
+static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call)
+{
+ return call->_state; /* Only inside I/O thread */
+}
+
+static inline bool __rxrpc_call_is_complete(const struct rxrpc_call *call)
+{
+ return __rxrpc_call_state(call) == RXRPC_CALL_COMPLETE;
+}
+
+static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call)
+{
+ /* Order read ->state before read of completion info. */
+ return smp_load_acquire(&call->_state);
+}
+
+static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call)
+{
+ return rxrpc_call_state(call) == RXRPC_CALL_COMPLETE;
+}
+
+static inline bool rxrpc_call_has_failed(const struct rxrpc_call *call)
+{
+ return rxrpc_call_is_complete(call) && call->completion != RXRPC_CALL_SUCCEEDED;
+}
+
+/*
* conn_client.c
*/
extern unsigned int rxrpc_reap_client_connections;
extern unsigned long rxrpc_conn_idle_client_expiry;
extern unsigned long rxrpc_conn_idle_client_fast_expiry;
-extern struct idr rxrpc_client_conn_ids;
-void rxrpc_destroy_client_conn_ids(void);
+void rxrpc_purge_client_connections(struct rxrpc_local *local);
struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
-int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *,
- struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *,
- gfp_t);
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp);
+void rxrpc_connect_client_calls(struct rxrpc_local *local);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
-void rxrpc_discard_expired_client_conns(struct work_struct *);
-void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
+void rxrpc_discard_expired_client_conns(struct rxrpc_local *local);
void rxrpc_clean_up_local_conns(struct rxrpc_local *);
/*
* conn_event.c
*/
+void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int channel);
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err, enum rxrpc_abort_reason why);
void rxrpc_process_connection(struct work_struct *);
void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
-int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb);
+
+static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn)
+{
+ /* Order reading the abort info after the state check. */
+ return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED;
+}
/*
* conn_object.c
@@ -906,6 +977,7 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
extern unsigned int rxrpc_connection_expiry;
extern unsigned int rxrpc_closed_conn_expiry;
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why);
struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t);
struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *,
struct sockaddr_rxrpc *,
@@ -961,12 +1033,19 @@ void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
*/
int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
void rxrpc_error_report(struct sock *);
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err);
int rxrpc_io_thread(void *data);
static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
{
wake_up_process(local->io_thread);
}
+static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+ return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO);
+}
+
/*
* insecure.c
*/
@@ -1048,6 +1127,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
@@ -1063,12 +1143,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *);
*/
struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
const struct sockaddr_rxrpc *);
-struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *,
- struct sockaddr_rxrpc *, gfp_t);
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx, gfp_t gfp);
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t,
enum rxrpc_peer_trace);
-void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *,
- struct rxrpc_peer *);
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer);
void rxrpc_destroy_all_peers(struct rxrpc_net *);
struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace);
@@ -1086,33 +1165,22 @@ extern const struct seq_operations rxrpc_local_seq_ops;
* recvmsg.c
*/
void rxrpc_notify_socket(struct rxrpc_call *);
-bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
-bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
-bool __rxrpc_call_completed(struct rxrpc_call *);
-bool rxrpc_call_completed(struct rxrpc_call *);
-bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
-bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
* Abort a call due to a protocol error.
*/
-static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
- struct sk_buff *skb,
- const char *eproto_why,
- const char *why,
- u32 abort_code)
+static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ s32 abort_code,
+ enum rxrpc_abort_reason why)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
- return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+ rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why);
+ return -EPROTO;
}
-#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
- __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
- (abort_why), (abort_code))
-
/*
* rtt.c
*/
@@ -1144,6 +1212,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *,
/*
* sendmsg.c
*/
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+ enum rxrpc_abort_reason why);
int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
/*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index c02401656fa9..3e8689fdc437 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -99,7 +99,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
if (!call)
return -ENOMEM;
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
- call->state = RXRPC_CALL_SERVER_PREALLOC;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC);
__set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events);
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
@@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
(peer_tail + 1) &
(RXRPC_BACKLOG_MAX - 1));
- rxrpc_new_incoming_peer(rx, local, peer);
+ rxrpc_new_incoming_peer(local, peer);
}
/* Now allocate and set up the connection */
@@ -326,11 +326,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
* If we want to report an error, we mark the skb with the packet type and
* abort code and return false.
*/
-int rxrpc_new_incoming_call(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct rxrpc_connection *conn,
- struct sockaddr_rxrpc *peer_srx,
- struct sk_buff *skb)
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
{
const struct rxrpc_security *sec = NULL;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -339,18 +339,17 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
_enter("");
- /* Don't set up a call for anything other than the first DATA packet. */
- if (sp->hdr.seq != 1 ||
- sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
- return 0; /* Just discard */
+ /* Don't set up a call for anything other than a DATA packet. */
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call);
- rcu_read_lock();
+ read_lock(&local->services_lock);
/* Weed out packets to services we're not offering. Packets that would
* begin a call are explicitly rejected and the rest are just
* discarded.
*/
- rx = rcu_dereference(local->service);
+ rx = local->service;
if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
sp->hdr.serviceId != rx->second_service)
) {
@@ -363,16 +362,14 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
if (!conn) {
sec = rxrpc_get_incoming_security(rx, skb);
if (!sec)
- goto reject;
+ goto unsupported_security;
}
spin_lock(&rx->incoming_lock);
if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
rx->sk.sk_state == RXRPC_CLOSE) {
- trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber,
- sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = RX_INVALID_OPERATION;
+ rxrpc_direct_abort(skb, rxrpc_abort_shut_down,
+ RX_INVALID_OPERATION, -ESHUTDOWN);
goto no_call;
}
@@ -402,7 +399,7 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
spin_unlock(&conn->state_lock);
spin_unlock(&rx->incoming_lock);
- rcu_read_unlock();
+ read_unlock(&local->services_lock);
if (hlist_unhashed(&call->error_link)) {
spin_lock(&call->peer->lock);
@@ -413,22 +410,24 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
_leave(" = %p{%d}", call, call->debug_id);
rxrpc_input_call_event(call, skb);
rxrpc_put_call(call, rxrpc_call_put_input);
- return 0;
+ return true;
unsupported_service:
- trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EOPNOTSUPP);
- skb->priority = RX_INVALID_OPERATION;
- goto reject;
+ read_unlock(&local->services_lock);
+ return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EOPNOTSUPP);
+unsupported_security:
+ read_unlock(&local->services_lock);
+ return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
no_call:
spin_unlock(&rx->incoming_lock);
-reject:
- rcu_read_unlock();
+ read_unlock(&local->services_lock);
_leave(" = f [%u]", skb->mark);
- return -EPROTO;
+ return false;
discard:
- rcu_read_unlock();
- return 0;
+ read_unlock(&local->services_lock);
+ return true;
}
/*
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index b2cf448fb02c..1abdef15debc 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -251,6 +251,41 @@ out:
_leave("");
}
+/*
+ * Start transmitting the reply to a service. This cancels the need to ACK the
+ * request if we haven't yet done so.
+ */
+static void rxrpc_begin_service_reply(struct rxrpc_call *call)
+{
+ unsigned long now = jiffies;
+
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY);
+ WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
+ if (call->ackr_reason == RXRPC_ACK_DELAY)
+ call->ackr_reason = 0;
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
+}
+
+/*
+ * Close the transmission phase. After this point there is no more data to be
+ * transmitted in the call.
+ */
+static void rxrpc_close_tx_phase(struct rxrpc_call *call)
+{
+ _debug("________awaiting reply/ACK__________");
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY);
+ break;
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_AWAIT_ACK);
+ break;
+ default:
+ break;
+ }
+}
+
static bool rxrpc_tx_window_has_space(struct rxrpc_call *call)
{
unsigned int winsize = min_t(unsigned int, call->tx_winsize,
@@ -270,9 +305,11 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
{
struct rxrpc_txbuf *txb;
- if (rxrpc_is_client_call(call) &&
- !test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ if (list_empty(&call->tx_sendmsg))
+ return;
rxrpc_expose_client_call(call);
+ }
while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
struct rxrpc_txbuf, call_link))) {
@@ -283,6 +320,9 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
call->tx_top = txb->seq;
list_add_tail(&txb->call_link, &call->tx_buffer);
+ if (txb->wire.flags & RXRPC_LAST_PACKET)
+ rxrpc_close_tx_phase(call);
+
rxrpc_transmit_one(call, txb);
if (!rxrpc_tx_window_has_space(call))
@@ -292,16 +332,15 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
static void rxrpc_transmit_some_data(struct rxrpc_call *call)
{
- switch (call->state) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_SERVER_ACK_REQUEST:
if (list_empty(&call->tx_sendmsg))
return;
+ rxrpc_begin_service_reply(call);
fallthrough;
case RXRPC_CALL_SERVER_SEND_REPLY:
- case RXRPC_CALL_SERVER_AWAIT_ACK:
case RXRPC_CALL_CLIENT_SEND_REQUEST:
- case RXRPC_CALL_CLIENT_AWAIT_REPLY:
if (!rxrpc_tx_window_has_space(call))
return;
if (list_empty(&call->tx_sendmsg)) {
@@ -331,21 +370,31 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
/*
* Handle retransmission and deferred ACK/abort generation.
*/
-void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
unsigned long now, next, t;
rxrpc_serial_t ackr_serial;
bool resend = false, expired = false;
+ s32 abort_code;
rxrpc_see_call(call, rxrpc_call_see_input);
//printk("\n--------------------\n");
_enter("{%d,%s,%lx}",
- call->debug_id, rxrpc_call_states[call->state], call->events);
+ call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)],
+ call->events);
- if (call->state == RXRPC_CALL_COMPLETE)
+ if (__rxrpc_call_is_complete(call))
goto out;
+ /* Handle abort request locklessly, vs rxrpc_propose_abort(). */
+ abort_code = smp_load_acquire(&call->send_abort);
+ if (abort_code) {
+ rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err,
+ call->send_abort_why);
+ goto out;
+ }
+
if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
goto out;
@@ -358,7 +407,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
}
t = READ_ONCE(call->expect_req_by);
- if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
+ if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST &&
time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
expired = true;
@@ -429,11 +478,12 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
(int)call->conn->hi_serial - (int)call->rx_serial > 0) {
trace_rxrpc_call_reset(call);
- rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_reset);
} else {
- rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+ rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
+ rxrpc_abort_call_timeout);
}
- rxrpc_send_abort_packet(call);
goto out;
}
@@ -441,7 +491,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY)
+ if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY)
rxrpc_resend(call, NULL);
if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
@@ -453,7 +503,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_input_data);
/* Make sure the timer is restarted */
- if (call->state != RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
next = call->expect_rx_by;
#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
@@ -474,9 +524,15 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
}
out:
- if (call->state == RXRPC_CALL_COMPLETE)
+ if (__rxrpc_call_is_complete(call)) {
del_timer_sync(&call->timer);
+ if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ rxrpc_disconnect_call(call);
+ if (call->security)
+ call->security->free_call_crypto(call);
+ }
if (call->acks_hard_ack != call->tx_bottom)
rxrpc_shrink_call_tx_buffer(call);
_leave("");
+ return true;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 89dcf60b1158..f3c9f0201c15 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -50,7 +50,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
struct rxrpc_local *local = call->local;
bool busy;
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) {
spin_lock_bh(&local->lock);
busy = !list_empty(&call->attend_link);
trace_rxrpc_poke_call(call, busy, what);
@@ -69,7 +69,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
trace_rxrpc_timer_expired(call, jiffies);
rxrpc_poke_call(call, rxrpc_call_poke_timer);
}
@@ -150,7 +150,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
INIT_WORK(&call->destroyer, rxrpc_destroy_call);
INIT_LIST_HEAD(&call->link);
- INIT_LIST_HEAD(&call->chan_wait_link);
+ INIT_LIST_HEAD(&call->wait_link);
INIT_LIST_HEAD(&call->accept_link);
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
@@ -162,7 +162,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->tx_lock);
- rwlock_init(&call->state_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
call->tx_total_len = -1;
@@ -211,7 +210,6 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
now = ktime_get_real();
call->acks_latest_ts = now;
call->cong_tstamp = now;
- call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
call->dest_srx = *srx;
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
@@ -227,11 +225,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
ret = rxrpc_init_client_call_security(call);
if (ret < 0) {
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret);
rxrpc_put_call(call, rxrpc_call_put_discard_error);
return ERR_PTR(ret);
}
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_CONN);
+
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
p->user_call_ID, rxrpc_call_new_client);
@@ -242,7 +242,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
/*
* Initiate the call ack/resend/expiry timer.
*/
-static void rxrpc_start_call_timer(struct rxrpc_call *call)
+void rxrpc_start_call_timer(struct rxrpc_call *call)
{
unsigned long now = jiffies;
unsigned long j = now + MAX_JIFFY_OFFSET;
@@ -287,6 +287,39 @@ static void rxrpc_put_call_slot(struct rxrpc_call *call)
}
/*
+ * Start the process of connecting a call. We obtain a peer and a connection
+ * bundle, but the actual association of a call with a connection is offloaded
+ * to the I/O thread to simplify locking.
+ */
+static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
+{
+ struct rxrpc_local *local = call->local;
+ int ret = -ENOMEM;
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp);
+ if (!call->peer)
+ goto error;
+
+ ret = rxrpc_look_up_bundle(call, gfp);
+ if (ret < 0)
+ goto error;
+
+ trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call);
+ rxrpc_get_call(call, rxrpc_call_get_io_thread);
+ spin_lock(&local->client_call_lock);
+ list_add_tail(&call->wait_link, &local->new_client_calls);
+ spin_unlock(&local->client_call_lock);
+ rxrpc_wake_up_io_thread(local);
+ return 0;
+
+error:
+ __set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ return ret;
+}
+
+/*
* Set up a call for the given parameters.
* - Called with the socket lock held, which it must release.
* - If it returns a call, the call's lock will need releasing by the caller.
@@ -365,14 +398,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
/* Set up or get a connection record and set the protocol parameters,
* including channel number and call ID.
*/
- ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
+ ret = rxrpc_connect_call(call, gfp);
if (ret < 0)
goto error_attached_to_socket;
- rxrpc_see_call(call, rxrpc_call_see_connected);
-
- rxrpc_start_call_timer(call);
-
_leave(" = %p [new]", call);
return call;
@@ -384,27 +413,23 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
error_dup_user_ID:
write_unlock(&rx->call_lock);
release_sock(&rx->sk);
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- RX_CALL_DEAD, -EEXIST);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST);
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0,
rxrpc_call_see_userid_exists);
- rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_userid_exists);
_leave(" = -EEXIST");
return ERR_PTR(-EEXIST);
/* We got an error, but the call is attached to the socket and is in
- * need of release. However, we might now race with recvmsg() when
- * completing the call queues it. Return 0 from sys_sendmsg() and
+ * need of release. However, we might now race with recvmsg() when it
+ * completion notifies the socket. Return 0 from sys_sendmsg() and
* leave the error to recvmsg() to deal with.
*/
error_attached_to_socket:
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret,
rxrpc_call_see_connect_failed);
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- RX_CALL_DEAD, ret);
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
_leave(" = c=%08x [err]", call->debug_id);
return call;
}
@@ -427,32 +452,32 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
call->call_id = sp->hdr.callNumber;
call->dest_srx.srx_service = sp->hdr.serviceId;
call->cid = sp->hdr.cid;
- call->state = RXRPC_CALL_SERVER_SECURING;
call->cong_tstamp = skb->tstamp;
+ __set_bit(RXRPC_CALL_EXPOSED, &call->flags);
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+
spin_lock(&conn->state_lock);
switch (conn->state) {
case RXRPC_CONN_SERVICE_UNSECURED:
case RXRPC_CONN_SERVICE_CHALLENGING:
- call->state = RXRPC_CALL_SERVER_SECURING;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
break;
case RXRPC_CONN_SERVICE:
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
break;
- case RXRPC_CONN_REMOTELY_ABORTED:
- __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->abort_code, conn->error);
- break;
- case RXRPC_CONN_LOCALLY_ABORTED:
- __rxrpc_abort_call("CON", call, 1,
- conn->abort_code, conn->error);
+ case RXRPC_CONN_ABORTED:
+ rxrpc_set_call_completion(call, conn->completion,
+ conn->abort_code, conn->error);
break;
default:
BUG();
}
+ rxrpc_get_call(call, rxrpc_call_get_io_thread);
+
/* Set the channel for this call. We don't get channel_lock as we're
* only defending against the data_ready handler (which we're called
* from) and the RESPONSE packet parser (which is only really
@@ -462,7 +487,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
chan = sp->hdr.cid & RXRPC_CHANNELMASK;
conn->channels[chan].call_counter = call->call_id;
conn->channels[chan].call_id = call->call_id;
- rcu_assign_pointer(conn->channels[chan].call, call);
+ conn->channels[chan].call = call;
spin_unlock(&conn->state_lock);
spin_lock(&conn->peer->lock);
@@ -522,20 +547,17 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- bool put = false;
+ bool put = false, putu = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
call->flags, rxrpc_call_see_release);
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
-
if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
BUG();
rxrpc_put_call_slot(call);
- del_timer_sync(&call->timer);
/* Make sure we don't get any more notifications */
write_lock(&rx->recvmsg_lock);
@@ -560,7 +582,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
rb_erase(&call->sock_node, &rx->calls);
memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
- rxrpc_put_call(call, rxrpc_call_put_userid_exists);
+ putu = true;
}
list_del(&call->sock_link);
@@ -568,10 +590,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- rxrpc_disconnect_call(call);
- if (call->security)
- call->security->free_call_crypto(call);
+ if (putu)
+ rxrpc_put_call(call, rxrpc_call_put_userid);
+
_leave("");
}
@@ -588,7 +609,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->to_be_accepted.next,
struct rxrpc_call, accept_link);
list_del(&call->accept_link);
- rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
+ rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_sock_release_tba);
rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
}
@@ -596,8 +618,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->sock_calls.next,
struct rxrpc_call, sock_link);
rxrpc_get_call(call, rxrpc_call_get_release_sock);
- rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
- rxrpc_send_abort_packet(call);
+ rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_sock_release);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put_release_sock);
}
@@ -620,7 +642,7 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
dead = __refcount_dec_and_test(&call->ref, &r);
trace_rxrpc_call(debug_id, r - 1, 0, why);
if (dead) {
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+ ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE);
if (!list_empty(&call->link)) {
spin_lock(&rxnet->call_lock);
@@ -669,6 +691,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
+ rxrpc_deactivate_bundle(call->bundle);
+ rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call);
rxrpc_put_peer(call->peer, rxrpc_peer_put_call);
rxrpc_put_local(call->local, rxrpc_local_put_call);
call_rcu(&call->rcu, rxrpc_rcu_free_call);
@@ -681,7 +705,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
{
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+ ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
del_timer(&call->timer);
@@ -719,7 +743,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
call, refcount_read(&call->ref),
- rxrpc_call_states[call->state],
+ rxrpc_call_states[__rxrpc_call_state(call)],
call->flags, call->events);
spin_unlock(&rxnet->call_lock);
diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c
new file mode 100644
index 000000000000..6afb54373ebb
--- /dev/null
+++ b/net/rxrpc/call_state.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Call state changing functions.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include "ar-internal.h"
+
+/*
+ * Transition a call to the complete state.
+ */
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ if (__rxrpc_call_state(call) == RXRPC_CALL_COMPLETE)
+ return false;
+
+ call->abort_code = abort_code;
+ call->error = error;
+ call->completion = compl;
+ /* Allow reader of completion state to operate locklessly */
+ rxrpc_set_call_state(call, RXRPC_CALL_COMPLETE);
+ trace_rxrpc_call_complete(call);
+ wake_up(&call->waitq);
+ rxrpc_notify_socket(call);
+ return true;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+ return rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u32 abort_code, int error, enum rxrpc_abort_reason why)
+{
+ trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
+ abort_code, error);
+ if (!rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+ abort_code, error))
+ return false;
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ rxrpc_send_abort_packet(call);
+ return true;
+}
+
+/*
+ * Record that a call errored out before even getting off the ground, thereby
+ * setting the state to allow it to be destroyed.
+ */
+void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl,
+ int error)
+{
+ call->abort_code = RX_CALL_DEAD;
+ call->error = error;
+ call->completion = compl;
+ call->_state = RXRPC_CALL_COMPLETE;
+ trace_rxrpc_call_complete(call);
+ WARN_ON_ONCE(__test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags));
+}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 87efa0373aed..981ca5b98bcb 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -34,104 +34,59 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900;
__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
-/*
- * We use machine-unique IDs for our client connections.
- */
-DEFINE_IDR(rxrpc_client_conn_ids);
-static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
-
-static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
-
-/*
- * Get a connection ID and epoch for a client connection from the global pool.
- * The connection struct pointer is then recorded in the idr radix tree. The
- * epoch doesn't change until the client is rebooted (or, at least, unless the
- * module is unloaded).
- */
-static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
- gfp_t gfp)
+static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle)
{
- struct rxrpc_net *rxnet = conn->rxnet;
- int id;
-
- _enter("");
-
- idr_preload(gfp);
- spin_lock(&rxrpc_conn_id_lock);
-
- id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn,
- 1, 0x40000000, GFP_NOWAIT);
- if (id < 0)
- goto error;
-
- spin_unlock(&rxrpc_conn_id_lock);
- idr_preload_end();
-
- conn->proto.epoch = rxnet->epoch;
- conn->proto.cid = id << RXRPC_CIDSHIFT;
- set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
- _leave(" [CID %x]", conn->proto.cid);
- return 0;
-
-error:
- spin_unlock(&rxrpc_conn_id_lock);
- idr_preload_end();
- _leave(" = %d", id);
- return id;
+ atomic_inc(&bundle->active);
}
/*
- * Release a connection ID for a client connection from the global pool.
+ * Release a connection ID for a client connection.
*/
-static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+static void rxrpc_put_client_connection_id(struct rxrpc_local *local,
+ struct rxrpc_connection *conn)
{
- if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
- spin_lock(&rxrpc_conn_id_lock);
- idr_remove(&rxrpc_client_conn_ids,
- conn->proto.cid >> RXRPC_CIDSHIFT);
- spin_unlock(&rxrpc_conn_id_lock);
- }
+ idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT);
}
/*
* Destroy the client connection ID tree.
*/
-void rxrpc_destroy_client_conn_ids(void)
+static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
{
struct rxrpc_connection *conn;
int id;
- if (!idr_is_empty(&rxrpc_client_conn_ids)) {
- idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+ if (!idr_is_empty(&local->conn_ids)) {
+ idr_for_each_entry(&local->conn_ids, conn, id) {
pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
conn, refcount_read(&conn->ref));
}
BUG();
}
- idr_destroy(&rxrpc_client_conn_ids);
+ idr_destroy(&local->conn_ids);
}
/*
* Allocate a connection bundle.
*/
-static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
+static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
gfp_t gfp)
{
struct rxrpc_bundle *bundle;
bundle = kzalloc(sizeof(*bundle), gfp);
if (bundle) {
- bundle->local = cp->local;
- bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle);
- bundle->key = cp->key;
- bundle->exclusive = cp->exclusive;
- bundle->upgrade = cp->upgrade;
- bundle->service_id = cp->service_id;
- bundle->security_level = cp->security_level;
+ bundle->local = call->local;
+ bundle->peer = rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle);
+ bundle->key = key_get(call->key);
+ bundle->security = call->security;
+ bundle->exclusive = test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+ bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
+ bundle->service_id = call->dest_srx.srx_service;
+ bundle->security_level = call->security_level;
refcount_set(&bundle->ref, 1);
atomic_set(&bundle->active, 1);
- spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
}
@@ -152,84 +107,87 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
{
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
+ key_put(bundle->key);
kfree(bundle);
}
void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why)
{
- unsigned int id = bundle->debug_id;
+ unsigned int id;
bool dead;
int r;
- dead = __refcount_dec_and_test(&bundle->ref, &r);
- trace_rxrpc_bundle(id, r - 1, why);
- if (dead)
- rxrpc_free_bundle(bundle);
+ if (bundle) {
+ id = bundle->debug_id;
+ dead = __refcount_dec_and_test(&bundle->ref, &r);
+ trace_rxrpc_bundle(id, r - 1, why);
+ if (dead)
+ rxrpc_free_bundle(bundle);
+ }
+}
+
+/*
+ * Get rid of outstanding client connection preallocations when a local
+ * endpoint is destroyed.
+ */
+void rxrpc_purge_client_connections(struct rxrpc_local *local)
+{
+ rxrpc_destroy_client_conn_ids(local);
}
/*
* Allocate a client connection.
*/
static struct rxrpc_connection *
-rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
+rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle)
{
struct rxrpc_connection *conn;
- struct rxrpc_net *rxnet = bundle->local->rxnet;
- int ret;
+ struct rxrpc_local *local = bundle->local;
+ struct rxrpc_net *rxnet = local->rxnet;
+ int id;
_enter("");
- conn = rxrpc_alloc_connection(rxnet, gfp);
- if (!conn) {
- _leave(" = -ENOMEM");
+ conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN);
+ if (!conn)
return ERR_PTR(-ENOMEM);
+
+ id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (id < 0) {
+ kfree(conn);
+ return ERR_PTR(id);
}
refcount_set(&conn->ref, 1);
- conn->bundle = bundle;
- conn->local = bundle->local;
- conn->peer = bundle->peer;
- conn->key = bundle->key;
+ conn->proto.cid = id << RXRPC_CIDSHIFT;
+ conn->proto.epoch = local->rxnet->epoch;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
+ conn->local = rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn);
+ conn->peer = rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn);
+ conn->key = key_get(bundle->key);
+ conn->security = bundle->security;
conn->exclusive = bundle->exclusive;
conn->upgrade = bundle->upgrade;
conn->orig_service_id = bundle->service_id;
conn->security_level = bundle->security_level;
- conn->out_clientflag = RXRPC_CLIENT_INITIATED;
- conn->state = RXRPC_CONN_CLIENT;
+ conn->state = RXRPC_CONN_CLIENT_UNSECURED;
conn->service_id = conn->orig_service_id;
- ret = rxrpc_get_client_connection_id(conn, gfp);
- if (ret < 0)
- goto error_0;
-
- ret = rxrpc_init_client_conn_security(conn);
- if (ret < 0)
- goto error_1;
+ if (conn->security == &rxrpc_no_security)
+ conn->state = RXRPC_CONN_CLIENT;
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
- rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn);
- rxrpc_get_local(conn->local, rxrpc_local_get_client_conn);
- key_get(conn->key);
-
- trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
- rxrpc_conn_new_client);
+ rxrpc_see_connection(conn, rxrpc_conn_new_client);
atomic_inc(&rxnet->nr_client_conns);
trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
- _leave(" = %p", conn);
return conn;
-
-error_1:
- rxrpc_put_client_connection_id(conn);
-error_0:
- kfree(conn);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
}
/*
@@ -247,7 +205,8 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
goto dont_reuse;
- if (conn->state != RXRPC_CONN_CLIENT ||
+ if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED &&
+ conn->state != RXRPC_CONN_CLIENT) ||
conn->proto.epoch != rxnet->epoch)
goto mark_dont_reuse;
@@ -257,7 +216,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
* times the maximum number of client conns away from the current
* allocation point to try and keep the IDs concentrated.
*/
- id_cursor = idr_get_cursor(&rxrpc_client_conn_ids);
+ id_cursor = idr_get_cursor(&conn->local->conn_ids);
id = conn->proto.cid >> RXRPC_CIDSHIFT;
distance = id - id_cursor;
if (distance < 0)
@@ -278,20 +237,23 @@ dont_reuse:
* Look up the conn bundle that matches the connection parameters, adding it if
* it doesn't yet exist.
*/
-static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp,
- gfp_t gfp)
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
{
static atomic_t rxrpc_bundle_id;
struct rxrpc_bundle *bundle, *candidate;
- struct rxrpc_local *local = cp->local;
+ struct rxrpc_local *local = call->local;
struct rb_node *p, **pp, *parent;
long diff;
+ bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
_enter("{%px,%x,%u,%u}",
- cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade);
+ call->peer, key_serial(call->key), call->security_level,
+ upgrade);
- if (cp->exclusive)
- return rxrpc_alloc_bundle(cp, gfp);
+ if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) {
+ call->bundle = rxrpc_alloc_bundle(call, gfp);
+ return call->bundle ? 0 : -ENOMEM;
+ }
/* First, see if the bundle is already there. */
_debug("search 1");
@@ -300,11 +262,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
while (p) {
bundle = rb_entry(p, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level) ?:
- cmp(upgrade));
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+ diff = (cmp(bundle->peer, call->peer) ?:
+ cmp(bundle->key, call->key) ?:
+ cmp(bundle->security_level, call->security_level) ?:
+ cmp(bundle->upgrade, upgrade));
#undef cmp
if (diff < 0)
p = p->rb_left;
@@ -317,9 +279,9 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
_debug("not found");
/* It wasn't. We need to add one. */
- candidate = rxrpc_alloc_bundle(cp, gfp);
+ candidate = rxrpc_alloc_bundle(call, gfp);
if (!candidate)
- return NULL;
+ return -ENOMEM;
_debug("search 2");
spin_lock(&local->client_bundles_lock);
@@ -329,11 +291,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
parent = *pp;
bundle = rb_entry(parent, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level) ?:
- cmp(upgrade));
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+ diff = (cmp(bundle->peer, call->peer) ?:
+ cmp(bundle->key, call->key) ?:
+ cmp(bundle->security_level, call->security_level) ?:
+ cmp(bundle->upgrade, upgrade));
#undef cmp
if (diff < 0)
pp = &(*pp)->rb_left;
@@ -347,178 +309,89 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
rb_link_node(&candidate->local_node, parent, pp);
rb_insert_color(&candidate->local_node, &local->client_bundles);
- rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
+ call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
spin_unlock(&local->client_bundles_lock);
- _leave(" = %u [new]", candidate->debug_id);
- return candidate;
+ _leave(" = B=%u [new]", call->bundle->debug_id);
+ return 0;
found_bundle_free:
rxrpc_free_bundle(candidate);
found_bundle:
- rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
- atomic_inc(&bundle->active);
+ call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
+ rxrpc_activate_bundle(bundle);
spin_unlock(&local->client_bundles_lock);
- _leave(" = %u [found]", bundle->debug_id);
- return bundle;
-}
-
-/*
- * Create or find a client bundle to use for a call.
- *
- * If we return with a connection, the call will be on its waiting list. It's
- * left to the caller to assign a channel and wake up the call.
- */
-static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
-{
- struct rxrpc_bundle *bundle;
-
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
-
- cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp);
- if (!cp->peer)
- goto error;
-
- call->tx_last_sent = ktime_get_real();
- call->cong_ssthresh = cp->peer->cong_ssthresh;
- if (call->cong_cwnd >= call->cong_ssthresh)
- call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
- else
- call->cong_mode = RXRPC_CALL_SLOW_START;
- if (cp->upgrade)
- __set_bit(RXRPC_CALL_UPGRADE, &call->flags);
-
- /* Find the client connection bundle. */
- bundle = rxrpc_look_up_bundle(cp, gfp);
- if (!bundle)
- goto error;
-
- /* Get this call queued. Someone else may activate it whilst we're
- * lining up a new connection, but that's fine.
- */
- spin_lock(&bundle->channel_lock);
- list_add_tail(&call->chan_wait_link, &bundle->waiting_calls);
- spin_unlock(&bundle->channel_lock);
-
- _leave(" = [B=%x]", bundle->debug_id);
- return bundle;
-
-error:
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
+ _leave(" = B=%u [found]", call->bundle->debug_id);
+ return 0;
}
/*
* Allocate a new connection and add it into a bundle.
*/
-static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
- __releases(bundle->channel_lock)
+static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
+ unsigned int slot)
{
- struct rxrpc_connection *candidate = NULL, *old = NULL;
- bool conflict;
- int i;
-
- _enter("");
-
- conflict = bundle->alloc_conn;
- if (!conflict)
- bundle->alloc_conn = true;
- spin_unlock(&bundle->channel_lock);
- if (conflict) {
- _leave(" [conf]");
- return;
- }
-
- candidate = rxrpc_alloc_client_connection(bundle, gfp);
-
- spin_lock(&bundle->channel_lock);
- bundle->alloc_conn = false;
-
- if (IS_ERR(candidate)) {
- bundle->alloc_error = PTR_ERR(candidate);
- spin_unlock(&bundle->channel_lock);
- _leave(" [err %ld]", PTR_ERR(candidate));
- return;
- }
-
- bundle->alloc_error = 0;
-
- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
- unsigned int shift = i * RXRPC_MAXCALLS;
- int j;
-
- old = bundle->conns[i];
- if (!rxrpc_may_reuse_conn(old)) {
- if (old)
- trace_rxrpc_client(old, -1, rxrpc_client_replace);
- candidate->bundle_shift = shift;
- atomic_inc(&bundle->active);
- bundle->conns[i] = candidate;
- for (j = 0; j < RXRPC_MAXCALLS; j++)
- set_bit(shift + j, &bundle->avail_chans);
- candidate = NULL;
- break;
- }
+ struct rxrpc_connection *conn, *old;
+ unsigned int shift = slot * RXRPC_MAXCALLS;
+ unsigned int i;
- old = NULL;
+ old = bundle->conns[slot];
+ if (old) {
+ bundle->conns[slot] = NULL;
+ trace_rxrpc_client(old, -1, rxrpc_client_replace);
+ rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
}
- spin_unlock(&bundle->channel_lock);
-
- if (candidate) {
- _debug("discard C=%x", candidate->debug_id);
- trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate);
- rxrpc_put_connection(candidate, rxrpc_conn_put_discard);
+ conn = rxrpc_alloc_client_connection(bundle);
+ if (IS_ERR(conn)) {
+ bundle->alloc_error = PTR_ERR(conn);
+ return false;
}
- rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
- _leave("");
+ rxrpc_activate_bundle(bundle);
+ conn->bundle_shift = shift;
+ bundle->conns[slot] = conn;
+ for (i = 0; i < RXRPC_MAXCALLS; i++)
+ set_bit(shift + i, &bundle->avail_chans);
+ return true;
}
/*
* Add a connection to a bundle if there are no usable connections or we have
* connections waiting for extra capacity.
*/
-static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp)
+static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle)
{
- struct rxrpc_call *call;
- int i, usable;
+ int slot = -1, i, usable;
_enter("");
- spin_lock(&bundle->channel_lock);
+ bundle->alloc_error = 0;
/* See if there are any usable connections. */
usable = 0;
- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++)
+ for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
if (rxrpc_may_reuse_conn(bundle->conns[i]))
usable++;
-
- if (!usable && !list_empty(&bundle->waiting_calls)) {
- call = list_first_entry(&bundle->waiting_calls,
- struct rxrpc_call, chan_wait_link);
- if (test_bit(RXRPC_CALL_UPGRADE, &call->flags))
- bundle->try_upgrade = true;
+ else if (slot == -1)
+ slot = i;
}
+ if (!usable && bundle->upgrade)
+ bundle->try_upgrade = true;
+
if (!usable)
goto alloc_conn;
if (!bundle->avail_chans &&
!bundle->try_upgrade &&
- !list_empty(&bundle->waiting_calls) &&
usable < ARRAY_SIZE(bundle->conns))
goto alloc_conn;
- spin_unlock(&bundle->channel_lock);
_leave("");
- return;
+ return usable;
alloc_conn:
- return rxrpc_add_conn_to_bundle(bundle, gfp);
+ return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false;
}
/*
@@ -532,11 +405,13 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
struct rxrpc_channel *chan = &conn->channels[channel];
struct rxrpc_bundle *bundle = conn->bundle;
struct rxrpc_call *call = list_entry(bundle->waiting_calls.next,
- struct rxrpc_call, chan_wait_link);
+ struct rxrpc_call, wait_link);
u32 call_id = chan->call_counter + 1;
_enter("C=%x,%u", conn->debug_id, channel);
+ list_del_init(&call->wait_link);
+
trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
/* Cancel the final ACK on the previous call if it hasn't been sent yet
@@ -546,68 +421,50 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
clear_bit(conn->bundle_shift + channel, &bundle->avail_chans);
rxrpc_see_call(call, rxrpc_call_see_activate_client);
- list_del_init(&call->chan_wait_link);
- call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call);
call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
call->cid = conn->proto.cid | channel;
call->call_id = call_id;
call->dest_srx.srx_service = conn->service_id;
-
- trace_rxrpc_connect_call(call);
-
- write_lock(&call->state_lock);
- call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
- write_unlock(&call->state_lock);
-
- /* Paired with the read barrier in rxrpc_connect_call(). This orders
- * cid and epoch in the connection wrt to call_id without the need to
- * take the channel_lock.
- *
- * We provisionally assign a callNumber at this point, but we don't
- * confirm it until the call is about to be exposed.
- *
- * TODO: Pair with a barrier in the data_ready handler when that looks
- * at the call ID through a connection channel.
- */
- smp_wmb();
+ call->cong_ssthresh = call->peer->cong_ssthresh;
+ if (call->cong_cwnd >= call->cong_ssthresh)
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ else
+ call->cong_mode = RXRPC_CALL_SLOW_START;
chan->call_id = call_id;
chan->call_debug_id = call->debug_id;
- rcu_assign_pointer(chan->call, call);
+ chan->call = call;
+
+ rxrpc_see_call(call, rxrpc_call_see_connected);
+ trace_rxrpc_connect_call(call);
+ call->tx_last_sent = ktime_get_real();
+ rxrpc_start_call_timer(call);
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST);
wake_up(&call->waitq);
}
/*
* Remove a connection from the idle list if it's on it.
*/
-static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn)
+static void rxrpc_unidle_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_net *rxnet = bundle->local->rxnet;
- bool drop_ref;
-
if (!list_empty(&conn->cache_link)) {
- drop_ref = false;
- spin_lock(&rxnet->client_conn_cache_lock);
- if (!list_empty(&conn->cache_link)) {
- list_del_init(&conn->cache_link);
- drop_ref = true;
- }
- spin_unlock(&rxnet->client_conn_cache_lock);
- if (drop_ref)
- rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
+ list_del_init(&conn->cache_link);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
}
}
/*
- * Assign channels and callNumbers to waiting calls with channel_lock
- * held by caller.
+ * Assign channels and callNumbers to waiting calls.
*/
-static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
+static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
{
struct rxrpc_connection *conn;
unsigned long avail, mask;
unsigned int channel, slot;
+ trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
+
if (bundle->try_upgrade)
mask = 1;
else
@@ -627,7 +484,7 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
if (bundle->try_upgrade)
set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
- rxrpc_unidle_conn(bundle, conn);
+ rxrpc_unidle_conn(conn);
channel &= (RXRPC_MAXCALLS - 1);
conn->act_chans |= 1 << channel;
@@ -636,132 +493,24 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
}
/*
- * Assign channels and callNumbers to waiting calls.
- */
-static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
-{
- _enter("B=%x", bundle->debug_id);
-
- trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
-
- if (!bundle->avail_chans)
- return;
-
- spin_lock(&bundle->channel_lock);
- rxrpc_activate_channels_locked(bundle);
- spin_unlock(&bundle->channel_lock);
- _leave("");
-}
-
-/*
- * Wait for a callNumber and a channel to be granted to a call.
- */
-static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle,
- struct rxrpc_call *call, gfp_t gfp)
-{
- DECLARE_WAITQUEUE(myself, current);
- int ret = 0;
-
- _enter("%d", call->debug_id);
-
- if (!gfpflags_allow_blocking(gfp)) {
- rxrpc_maybe_add_conn(bundle, gfp);
- rxrpc_activate_channels(bundle);
- ret = bundle->alloc_error ?: -EAGAIN;
- goto out;
- }
-
- add_wait_queue_exclusive(&call->waitq, &myself);
- for (;;) {
- rxrpc_maybe_add_conn(bundle, gfp);
- rxrpc_activate_channels(bundle);
- ret = bundle->alloc_error;
- if (ret < 0)
- break;
-
- switch (call->interruptibility) {
- case RXRPC_INTERRUPTIBLE:
- case RXRPC_PREINTERRUPTIBLE:
- set_current_state(TASK_INTERRUPTIBLE);
- break;
- case RXRPC_UNINTERRUPTIBLE:
- default:
- set_current_state(TASK_UNINTERRUPTIBLE);
- break;
- }
- if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_AWAIT_CONN)
- break;
- if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
- call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
- signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
- schedule();
- }
- remove_wait_queue(&call->waitq, &myself);
- __set_current_state(TASK_RUNNING);
-
-out:
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Connect waiting channels (called from the I/O thread).
*/
-int rxrpc_connect_call(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
+void rxrpc_connect_client_calls(struct rxrpc_local *local)
{
- struct rxrpc_bundle *bundle;
- struct rxrpc_net *rxnet = cp->local->rxnet;
- int ret = 0;
-
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
-
- rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
+ struct rxrpc_call *call;
- bundle = rxrpc_prep_call(rx, call, cp, srx, gfp);
- if (IS_ERR(bundle)) {
- ret = PTR_ERR(bundle);
- goto out;
- }
+ while ((call = list_first_entry_or_null(&local->new_client_calls,
+ struct rxrpc_call, wait_link))
+ ) {
+ struct rxrpc_bundle *bundle = call->bundle;
- if (call->state == RXRPC_CALL_CLIENT_AWAIT_CONN) {
- ret = rxrpc_wait_for_channel(bundle, call, gfp);
- if (ret < 0)
- goto wait_failed;
- }
+ spin_lock(&local->client_call_lock);
+ list_move_tail(&call->wait_link, &bundle->waiting_calls);
+ spin_unlock(&local->client_call_lock);
-granted_channel:
- /* Paired with the write barrier in rxrpc_activate_one_channel(). */
- smp_rmb();
-
-out_put_bundle:
- rxrpc_deactivate_bundle(bundle);
- rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call);
-out:
- _leave(" = %d", ret);
- return ret;
-
-wait_failed:
- spin_lock(&bundle->channel_lock);
- list_del_init(&call->chan_wait_link);
- spin_unlock(&bundle->channel_lock);
-
- if (call->state != RXRPC_CALL_CLIENT_AWAIT_CONN) {
- ret = 0;
- goto granted_channel;
+ if (rxrpc_bundle_has_space(bundle))
+ rxrpc_activate_channels(bundle);
}
-
- trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
- rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
- rxrpc_disconnect_client_call(bundle, call);
- goto out_put_bundle;
}
/*
@@ -794,14 +543,14 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
/*
* Set the reap timer.
*/
-static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
+static void rxrpc_set_client_reap_timer(struct rxrpc_local *local)
{
- if (!rxnet->kill_all_client_conns) {
+ if (!local->kill_all_client_conns) {
unsigned long now = jiffies;
unsigned long reap_at = now + rxrpc_conn_idle_client_expiry;
- if (rxnet->live)
- timer_reduce(&rxnet->client_conn_reap_timer, reap_at);
+ if (local->rxnet->live)
+ timer_reduce(&local->client_conn_reap_timer, reap_at);
}
}
@@ -812,16 +561,13 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
{
struct rxrpc_connection *conn;
struct rxrpc_channel *chan = NULL;
- struct rxrpc_net *rxnet = bundle->local->rxnet;
+ struct rxrpc_local *local = bundle->local;
unsigned int channel;
bool may_reuse;
u32 cid;
_enter("c=%x", call->debug_id);
- spin_lock(&bundle->channel_lock);
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
-
/* Calls that have never actually been assigned a channel can simply be
* discarded.
*/
@@ -830,8 +576,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
_debug("call is waiting");
ASSERTCMP(call->call_id, ==, 0);
ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
- list_del_init(&call->chan_wait_link);
- goto out;
+ list_del_init(&call->wait_link);
+ return;
}
cid = call->cid;
@@ -839,10 +585,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
chan = &conn->channels[channel];
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
- if (rcu_access_pointer(chan->call) != call) {
- spin_unlock(&bundle->channel_lock);
- BUG();
- }
+ if (WARN_ON(chan->call != call))
+ return;
may_reuse = rxrpc_may_reuse_conn(conn);
@@ -863,16 +607,15 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
trace_rxrpc_client(conn, channel, rxrpc_client_to_active);
bundle->try_upgrade = false;
if (may_reuse)
- rxrpc_activate_channels_locked(bundle);
+ rxrpc_activate_channels(bundle);
}
-
}
/* See if we can pass the channel directly to another call. */
if (may_reuse && !list_empty(&bundle->waiting_calls)) {
trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
rxrpc_activate_one_channel(conn, channel);
- goto out;
+ return;
}
/* Schedule the final ACK to be transmitted in a short while so that it
@@ -890,7 +633,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
}
/* Deactivate the channel. */
- rcu_assign_pointer(chan->call, NULL);
+ chan->call = NULL;
set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans);
conn->act_chans &= ~(1 << channel);
@@ -903,17 +646,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
conn->idle_timestamp = jiffies;
rxrpc_get_connection(conn, rxrpc_conn_get_idle);
- spin_lock(&rxnet->client_conn_cache_lock);
- list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
- spin_unlock(&rxnet->client_conn_cache_lock);
+ list_move_tail(&conn->cache_link, &local->idle_client_conns);
- rxrpc_set_client_reap_timer(rxnet);
+ rxrpc_set_client_reap_timer(local);
}
-
-out:
- spin_unlock(&bundle->channel_lock);
- _leave("");
- return;
}
/*
@@ -923,7 +659,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
{
struct rxrpc_bundle *bundle = conn->bundle;
unsigned int bindex;
- bool need_drop = false;
int i;
_enter("C=%x", conn->debug_id);
@@ -931,18 +666,13 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
rxrpc_process_delayed_final_acks(conn, true);
- spin_lock(&bundle->channel_lock);
bindex = conn->bundle_shift / RXRPC_MAXCALLS;
if (bundle->conns[bindex] == conn) {
_debug("clear slot %u", bindex);
bundle->conns[bindex] = NULL;
for (i = 0; i < RXRPC_MAXCALLS; i++)
clear_bit(conn->bundle_shift + i, &bundle->avail_chans);
- need_drop = true;
- }
- spin_unlock(&bundle->channel_lock);
-
- if (need_drop) {
+ rxrpc_put_client_connection_id(bundle->local, conn);
rxrpc_deactivate_bundle(bundle);
rxrpc_put_connection(conn, rxrpc_conn_put_unbundle);
}
@@ -951,11 +681,15 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
/*
* Drop the active count on a bundle.
*/
-static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
{
- struct rxrpc_local *local = bundle->local;
+ struct rxrpc_local *local;
bool need_put = false;
+ if (!bundle)
+ return;
+
+ local = bundle->local;
if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
if (!bundle->exclusive) {
_debug("erase bundle");
@@ -982,7 +716,7 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
atomic_dec(&rxnet->nr_client_conns);
- rxrpc_put_client_connection_id(conn);
+ rxrpc_put_client_connection_id(local, conn);
}
/*
@@ -992,42 +726,26 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
* This may be called from conn setup or from a work item so cannot be
* considered non-reentrant.
*/
-void rxrpc_discard_expired_client_conns(struct work_struct *work)
+void rxrpc_discard_expired_client_conns(struct rxrpc_local *local)
{
struct rxrpc_connection *conn;
- struct rxrpc_net *rxnet =
- container_of(work, struct rxrpc_net, client_conn_reaper);
unsigned long expiry, conn_expires_at, now;
unsigned int nr_conns;
_enter("");
- if (list_empty(&rxnet->idle_client_conns)) {
- _leave(" [empty]");
- return;
- }
-
- /* Don't double up on the discarding */
- if (!mutex_trylock(&rxnet->client_conn_discard_lock)) {
- _leave(" [already]");
- return;
- }
-
/* We keep an estimate of what the number of conns ought to be after
* we've discarded some so that we don't overdo the discarding.
*/
- nr_conns = atomic_read(&rxnet->nr_client_conns);
+ nr_conns = atomic_read(&local->rxnet->nr_client_conns);
next:
- spin_lock(&rxnet->client_conn_cache_lock);
-
- if (list_empty(&rxnet->idle_client_conns))
- goto out;
-
- conn = list_entry(rxnet->idle_client_conns.next,
- struct rxrpc_connection, cache_link);
+ conn = list_first_entry_or_null(&local->idle_client_conns,
+ struct rxrpc_connection, cache_link);
+ if (!conn)
+ return;
- if (!rxnet->kill_all_client_conns) {
+ if (!local->kill_all_client_conns) {
/* If the number of connections is over the reap limit, we
* expedite discard by reducing the expiry timeout. We must,
* however, have at least a short grace period to be able to do
@@ -1050,8 +768,6 @@ next:
trace_rxrpc_client(conn, -1, rxrpc_client_discard);
list_del_init(&conn->cache_link);
- spin_unlock(&rxnet->client_conn_cache_lock);
-
rxrpc_unbundle_conn(conn);
/* Drop the ->cache_link ref */
rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle);
@@ -1068,31 +784,8 @@ not_yet_expired:
* then things get messier.
*/
_debug("not yet");
- if (!rxnet->kill_all_client_conns)
- timer_reduce(&rxnet->client_conn_reap_timer, conn_expires_at);
-
-out:
- spin_unlock(&rxnet->client_conn_cache_lock);
- mutex_unlock(&rxnet->client_conn_discard_lock);
- _leave("");
-}
-
-/*
- * Preemptively destroy all the client connection records rather than waiting
- * for them to time out
- */
-void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
-{
- _enter("");
-
- spin_lock(&rxnet->client_conn_cache_lock);
- rxnet->kill_all_client_conns = true;
- spin_unlock(&rxnet->client_conn_cache_lock);
-
- del_timer_sync(&rxnet->client_conn_reap_timer);
-
- if (!rxrpc_queue_work(&rxnet->client_conn_reaper))
- _debug("destroy: queue failed");
+ if (!local->kill_all_client_conns)
+ timer_reduce(&local->client_conn_reap_timer, conn_expires_at);
_leave("");
}
@@ -1102,29 +795,19 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
*/
void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
{
- struct rxrpc_connection *conn, *tmp;
- struct rxrpc_net *rxnet = local->rxnet;
- LIST_HEAD(graveyard);
+ struct rxrpc_connection *conn;
_enter("");
- spin_lock(&rxnet->client_conn_cache_lock);
-
- list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns,
- cache_link) {
- if (conn->local == local) {
- atomic_dec(&conn->active);
- trace_rxrpc_client(conn, -1, rxrpc_client_discard);
- list_move(&conn->cache_link, &graveyard);
- }
- }
+ local->kill_all_client_conns = true;
- spin_unlock(&rxnet->client_conn_cache_lock);
+ del_timer_sync(&local->client_conn_reap_timer);
- while (!list_empty(&graveyard)) {
- conn = list_entry(graveyard.next,
- struct rxrpc_connection, cache_link);
+ while ((conn = list_first_entry_or_null(&local->idle_client_conns,
+ struct rxrpc_connection, cache_link))) {
list_del_init(&conn->cache_link);
+ atomic_dec(&conn->active);
+ trace_rxrpc_client(conn, -1, rxrpc_client_discard);
rxrpc_unbundle_conn(conn);
rxrpc_put_connection(conn, rxrpc_conn_put_local_dead);
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 480364bcbf85..44414e724415 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -17,11 +17,65 @@
#include "ar-internal.h"
/*
+ * Set the completion state on an aborted connection.
+ */
+static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err,
+ enum rxrpc_call_completion compl)
+{
+ bool aborted = false;
+
+ if (conn->state != RXRPC_CONN_ABORTED) {
+ spin_lock(&conn->state_lock);
+ if (conn->state != RXRPC_CONN_ABORTED) {
+ conn->abort_code = abort_code;
+ conn->error = err;
+ conn->completion = compl;
+ /* Order the abort info before the state change. */
+ smp_store_release(&conn->state, RXRPC_CONN_ABORTED);
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events);
+ aborted = true;
+ }
+ spin_unlock(&conn->state_lock);
+ }
+
+ return aborted;
+}
+
+/*
+ * Mark a socket buffer to indicate that the connection it's on should be aborted.
+ */
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err, enum rxrpc_abort_reason why)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (rxrpc_set_conn_aborted(conn, skb, abort_code, err,
+ RXRPC_CALL_LOCALLY_ABORTED)) {
+ trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber,
+ sp->hdr.seq, abort_code, err);
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort);
+ }
+ return -EPROTO;
+}
+
+/*
+ * Mark a connection as being remotely aborted.
+ */
+static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
+ RXRPC_CALL_REMOTELY_ABORTED);
+}
+
+/*
* Retransmit terminal ACK or ABORT of the previous call.
*/
-static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- unsigned int channel)
+void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ unsigned int channel)
{
struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
struct rxrpc_channel *chan;
@@ -46,9 +100,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
/* If the last call got moved on whilst we were waiting to run, just
* ignore this packet.
*/
- call_id = READ_ONCE(chan->last_call);
- /* Sync with __rxrpc_disconnect_call() */
- smp_rmb();
+ call_id = chan->last_call;
if (skb && call_id != sp->hdr.callNumber)
return;
@@ -65,9 +117,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[2].iov_base = &ack_info;
iov[2].iov_len = sizeof(ack_info);
+ serial = atomic_inc_return(&conn->serial);
+
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
pkt.whdr.callNumber = htonl(call_id);
+ pkt.whdr.serial = htonl(serial);
pkt.whdr.seq = 0;
pkt.whdr.type = chan->last_type;
pkt.whdr.flags = conn->out_clientflag;
@@ -104,31 +159,15 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[0].iov_len += sizeof(pkt.ack);
len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
ioc = 3;
- break;
-
- default:
- return;
- }
-
- /* Resync with __rxrpc_disconnect_call() and check that the last call
- * didn't get advanced whilst we were filling out the packets.
- */
- smp_rmb();
- if (READ_ONCE(chan->last_call) != call_id)
- return;
-
- serial = atomic_inc_return(&conn->serial);
- pkt.whdr.serial = htonl(serial);
- switch (chan->last_type) {
- case RXRPC_PACKET_TYPE_ABORT:
- break;
- case RXRPC_PACKET_TYPE_ACK:
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
ntohl(pkt.ack.firstPacket),
ntohl(pkt.ack.serial),
pkt.ack.reason, 0);
break;
+
+ default:
+ return;
}
ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len);
@@ -146,131 +185,34 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
/*
* pass a connection-level abort onto all calls on that connection
*/
-static void rxrpc_abort_calls(struct rxrpc_connection *conn,
- enum rxrpc_call_completion compl,
- rxrpc_serial_t serial)
+static void rxrpc_abort_calls(struct rxrpc_connection *conn)
{
struct rxrpc_call *call;
int i;
_enter("{%d},%x", conn->debug_id, conn->abort_code);
- spin_lock(&conn->bundle->channel_lock);
-
for (i = 0; i < RXRPC_MAXCALLS; i++) {
- call = rcu_dereference_protected(
- conn->channels[i].call,
- lockdep_is_held(&conn->bundle->channel_lock));
- if (call) {
- if (compl == RXRPC_CALL_LOCALLY_ABORTED)
- trace_rxrpc_abort(call->debug_id,
- "CON", call->cid,
- call->call_id, 0,
+ call = conn->channels[i].call;
+ if (call)
+ rxrpc_set_call_completion(call,
+ conn->completion,
conn->abort_code,
conn->error);
- else
- trace_rxrpc_rx_abort(call, serial,
- conn->abort_code);
- rxrpc_set_call_completion(call, compl,
- conn->abort_code,
- conn->error);
- }
}
- spin_unlock(&conn->bundle->channel_lock);
_leave("");
}
/*
- * generate a connection-level abort
- */
-static int rxrpc_abort_connection(struct rxrpc_connection *conn,
- int error, u32 abort_code)
-{
- struct rxrpc_wire_header whdr;
- struct msghdr msg;
- struct kvec iov[2];
- __be32 word;
- size_t len;
- u32 serial;
- int ret;
-
- _enter("%d,,%u,%u", conn->debug_id, error, abort_code);
-
- /* generate a connection-level abort */
- spin_lock(&conn->state_lock);
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- spin_unlock(&conn->state_lock);
- _leave(" = 0 [already dead]");
- return 0;
- }
-
- conn->error = error;
- conn->abort_code = abort_code;
- conn->state = RXRPC_CONN_LOCALLY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- spin_unlock(&conn->state_lock);
-
- msg.msg_name = &conn->peer->srx.transport;
- msg.msg_namelen = conn->peer->srx.transport_len;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- whdr.epoch = htonl(conn->proto.epoch);
- whdr.cid = htonl(conn->proto.cid);
- whdr.callNumber = 0;
- whdr.seq = 0;
- whdr.type = RXRPC_PACKET_TYPE_ABORT;
- whdr.flags = conn->out_clientflag;
- whdr.userStatus = 0;
- whdr.securityIndex = conn->security_ix;
- whdr._rsvd = 0;
- whdr.serviceId = htons(conn->service_id);
-
- word = htonl(conn->abort_code);
-
- iov[0].iov_base = &whdr;
- iov[0].iov_len = sizeof(whdr);
- iov[1].iov_base = &word;
- iov[1].iov_len = sizeof(word);
-
- len = iov[0].iov_len + iov[1].iov_len;
-
- serial = atomic_inc_return(&conn->serial);
- rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
- whdr.serial = htonl(serial);
-
- ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
- if (ret < 0) {
- trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
- rxrpc_tx_point_conn_abort);
- _debug("sendmsg failed: %d", ret);
- return -EAGAIN;
- }
-
- trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
-
- conn->peer->last_tx_at = ktime_get_seconds();
-
- _leave(" = 0");
- return 0;
-}
-
-/*
* mark a call as being on a now-secured channel
* - must be called with BH's disabled.
*/
static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
- _enter("%p", call);
- if (call) {
- write_lock(&call->state_lock);
- if (call->state == RXRPC_CALL_SERVER_SECURING) {
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
- rxrpc_notify_socket(call);
- }
- write_unlock(&call->state_lock);
+ if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) {
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+ rxrpc_notify_socket(call);
}
}
@@ -278,44 +220,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
* connection-level Rx packet processor
*/
static int rxrpc_process_event(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- int loop, ret;
+ int ret;
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- _leave(" = -ECONNABORTED [%u]", conn->state);
+ if (conn->state == RXRPC_CONN_ABORTED)
return -ECONNABORTED;
- }
_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_DATA:
- case RXRPC_PACKET_TYPE_ACK:
- rxrpc_conn_retransmit_call(conn, skb,
- sp->hdr.cid & RXRPC_CHANNELMASK);
- return 0;
-
- case RXRPC_PACKET_TYPE_BUSY:
- /* Just ignore BUSY packets for now. */
- return 0;
-
- case RXRPC_PACKET_TYPE_ABORT:
- conn->error = -ECONNABORTED;
- conn->abort_code = skb->priority;
- conn->state = RXRPC_CONN_REMOTELY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
- return -ECONNABORTED;
-
case RXRPC_PACKET_TYPE_CHALLENGE:
- return conn->security->respond_to_challenge(conn, skb,
- _abort_code);
+ return conn->security->respond_to_challenge(conn, skb);
case RXRPC_PACKET_TYPE_RESPONSE:
- ret = conn->security->verify_response(conn, skb, _abort_code);
+ ret = conn->security->verify_response(conn, skb);
if (ret < 0)
return ret;
@@ -324,27 +244,25 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
if (ret < 0)
return ret;
- spin_lock(&conn->bundle->channel_lock);
spin_lock(&conn->state_lock);
-
- if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+ if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING)
conn->state = RXRPC_CONN_SERVICE;
- spin_unlock(&conn->state_lock);
- for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
- rxrpc_call_is_secure(
- rcu_dereference_protected(
- conn->channels[loop].call,
- lockdep_is_held(&conn->bundle->channel_lock)));
- } else {
- spin_unlock(&conn->state_lock);
- }
+ spin_unlock(&conn->state_lock);
- spin_unlock(&conn->bundle->channel_lock);
+ if (conn->state == RXRPC_CONN_SERVICE) {
+ /* Offload call state flipping to the I/O thread. As
+ * we've already received the packet, put it on the
+ * front of the queue.
+ */
+ skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
+ rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
+ skb_queue_head(&conn->local->rx_queue, skb);
+ rxrpc_wake_up_io_thread(conn->local);
+ }
return 0;
default:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_conn_pkt"));
+ WARN_ON_ONCE(1);
return -EPROTO;
}
}
@@ -354,26 +272,9 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
*/
static void rxrpc_secure_connection(struct rxrpc_connection *conn)
{
- u32 abort_code;
- int ret;
-
- _enter("{%d}", conn->debug_id);
-
- ASSERT(conn->security_ix != 0);
-
- if (conn->security->issue_challenge(conn) < 0) {
- abort_code = RX_CALL_DEAD;
- ret = -ENOMEM;
- goto abort;
- }
-
- _leave("");
- return;
-
-abort:
- _debug("abort %d, %d", ret, abort_code);
- rxrpc_abort_connection(conn, ret, abort_code);
- _leave(" [aborted]");
+ if (conn->security->issue_challenge(conn) < 0)
+ rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM,
+ rxrpc_abort_nomem);
}
/*
@@ -395,9 +296,7 @@ again:
if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
continue;
- smp_rmb(); /* vs rxrpc_disconnect_client_call */
- ack_at = READ_ONCE(chan->final_ack_at);
-
+ ack_at = chan->final_ack_at;
if (time_before(j, ack_at) && !force) {
if (time_before(ack_at, next_j)) {
next_j = ack_at;
@@ -424,47 +323,27 @@ again:
static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
{
struct sk_buff *skb;
- u32 abort_code = RX_PROTOCOL_ERROR;
int ret;
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
- /* Process delayed ACKs whose time has come. */
- if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
- rxrpc_process_delayed_final_acks(conn, false);
-
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
rxrpc_see_skb(skb, rxrpc_skb_see_conn_work);
- ret = rxrpc_process_event(conn, skb, &abort_code);
+ ret = rxrpc_process_event(conn, skb);
switch (ret) {
- case -EPROTO:
- case -EKEYEXPIRED:
- case -EKEYREJECTED:
- goto protocol_error;
case -ENOMEM:
case -EAGAIN:
- goto requeue_and_leave;
- case -ECONNABORTED:
+ skb_queue_head(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work);
+ break;
default:
rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
break;
}
}
-
- return;
-
-requeue_and_leave:
- skb_queue_head(&conn->rx_queue, skb);
- return;
-
-protocol_error:
- if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
- goto requeue_and_leave;
- rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
- return;
}
void rxrpc_process_connection(struct work_struct *work)
@@ -498,44 +377,59 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
/*
* Input a connection-level packet.
*/
-int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- _leave(" = -ECONNABORTED [%u]", conn->state);
- return -ECONNABORTED;
- }
-
- _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
-
switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_DATA:
- case RXRPC_PACKET_TYPE_ACK:
- rxrpc_conn_retransmit_call(conn, skb,
- sp->hdr.cid & RXRPC_CHANNELMASK);
- return 0;
-
case RXRPC_PACKET_TYPE_BUSY:
/* Just ignore BUSY packets for now. */
- return 0;
+ return true;
case RXRPC_PACKET_TYPE_ABORT:
- conn->error = -ECONNABORTED;
- conn->abort_code = skb->priority;
- conn->state = RXRPC_CONN_REMOTELY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
- return -ECONNABORTED;
+ if (rxrpc_is_conn_aborted(conn))
+ return true;
+ rxrpc_input_conn_abort(conn, skb);
+ rxrpc_abort_calls(conn);
+ return true;
case RXRPC_PACKET_TYPE_CHALLENGE:
case RXRPC_PACKET_TYPE_RESPONSE:
+ if (rxrpc_is_conn_aborted(conn)) {
+ if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED)
+ rxrpc_send_conn_abort(conn);
+ return true;
+ }
rxrpc_post_packet_to_conn(conn, skb);
- return 0;
+ return true;
default:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_conn_pkt"));
- return -EPROTO;
+ WARN_ON_ONCE(1);
+ return true;
}
}
+
+/*
+ * Input a connection event.
+ */
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+ unsigned int loop;
+
+ if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
+ rxrpc_abort_calls(conn);
+
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ if (conn->state != RXRPC_CONN_SERVICE)
+ break;
+
+ for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+ rxrpc_call_is_secure(conn->channels[loop].call);
+ break;
+ }
+
+ /* Process delayed ACKs whose time has come. */
+ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+ rxrpc_process_delayed_final_acks(conn, false);
+}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 3c8f83dacb2b..ac85d4644a3c 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -23,12 +23,30 @@ static void rxrpc_clean_up_connection(struct work_struct *work);
static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
unsigned long reap_at);
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
+{
+ struct rxrpc_local *local = conn->local;
+ bool busy;
+
+ if (WARN_ON_ONCE(!local))
+ return;
+
+ spin_lock_bh(&local->lock);
+ busy = !list_empty(&conn->attend_link);
+ if (!busy) {
+ rxrpc_get_connection(conn, why);
+ list_add_tail(&conn->attend_link, &local->conn_attend_q);
+ }
+ spin_unlock_bh(&local->lock);
+ rxrpc_wake_up_io_thread(local);
+}
+
static void rxrpc_connection_timer(struct timer_list *timer)
{
struct rxrpc_connection *conn =
container_of(timer, struct rxrpc_connection, timer);
- rxrpc_queue_conn(conn, rxrpc_conn_queue_timer);
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer);
}
/*
@@ -49,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
+ mutex_init(&conn->security_lock);
skb_queue_head_init(&conn->rx_queue);
conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
@@ -82,10 +101,10 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo
_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- /* Look up client connections by connection ID alone as their IDs are
- * unique for this machine.
+ /* Look up client connections by connection ID alone as their
+ * IDs are unique for this machine.
*/
- conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
+ conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
if (!conn || refcount_read(&conn->ref) == 0) {
_debug("no conn");
goto not_found;
@@ -139,7 +158,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
_enter("%d,%x", conn->debug_id, call->cid);
- if (rcu_access_pointer(chan->call) == call) {
+ if (chan->call == call) {
/* Save the result of the call so that we can repeat it if necessary
* through the channel, whilst disposing of the actual call record.
*/
@@ -159,12 +178,9 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
break;
}
- /* Sync with rxrpc_conn_retransmit(). */
- smp_wmb();
chan->last_call = chan->call_id;
chan->call_id = chan->call_counter;
-
- rcu_assign_pointer(chan->call, NULL);
+ chan->call = NULL;
}
_leave("");
@@ -178,6 +194,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ rxrpc_see_call(call, rxrpc_call_see_disconnected);
+
call->peer->cong_ssthresh = call->cong_ssthresh;
if (!hlist_unhashed(&call->error_link)) {
@@ -186,18 +205,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
spin_unlock(&call->peer->lock);
}
- if (rxrpc_is_client_call(call))
- return rxrpc_disconnect_client_call(conn->bundle, call);
-
- spin_lock(&conn->bundle->channel_lock);
- __rxrpc_disconnect_call(conn, call);
- spin_unlock(&conn->bundle->channel_lock);
+ if (rxrpc_is_client_call(call)) {
+ rxrpc_disconnect_client_call(call->bundle, call);
+ } else {
+ __rxrpc_disconnect_call(conn, call);
+ conn->idle_timestamp = jiffies;
+ if (atomic_dec_and_test(&conn->active))
+ rxrpc_set_service_reap_timer(conn->rxnet,
+ jiffies + rxrpc_connection_expiry);
+ }
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
- conn->idle_timestamp = jiffies;
- if (atomic_dec_and_test(&conn->active))
- rxrpc_set_service_reap_timer(conn->rxnet,
- jiffies + rxrpc_connection_expiry);
+ rxrpc_put_call(call, rxrpc_call_put_io_thread);
}
/*
@@ -293,10 +311,10 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
container_of(work, struct rxrpc_connection, destructor);
struct rxrpc_net *rxnet = conn->rxnet;
- ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
- !rcu_access_pointer(conn->channels[1].call) &&
- !rcu_access_pointer(conn->channels[2].call) &&
- !rcu_access_pointer(conn->channels[3].call));
+ ASSERT(!conn->channels[0].call &&
+ !conn->channels[1].call &&
+ !conn->channels[2].call &&
+ !conn->channels[3].call);
ASSERT(list_empty(&conn->cache_link));
del_timer_sync(&conn->timer);
@@ -447,7 +465,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
_enter("");
atomic_dec(&rxnet->nr_conns);
- rxrpc_destroy_all_client_connections(rxnet);
del_timer_sync(&rxnet->service_conn_reap_timer);
rxrpc_queue_work(&rxnet->service_conn_reaper);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 2a55a88b2a5b..f30323de82bd 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -11,7 +11,6 @@
static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
.ref = REFCOUNT_INIT(1),
.debug_id = UINT_MAX,
- .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock),
};
/*
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index d0e20e946e48..367927a99881 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -9,11 +9,10 @@
#include "ar-internal.h"
-static void rxrpc_proto_abort(const char *why,
- struct rxrpc_call *call, rxrpc_seq_t seq)
+static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
+ enum rxrpc_abort_reason why)
{
- if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG))
- rxrpc_send_abort_packet(call);
+ rxrpc_abort_call(call, seq, RX_PROTOCOL_ERROR, -EBADMSG, why);
}
/*
@@ -185,7 +184,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call)
if (call->cong_mode != RXRPC_CALL_SLOW_START &&
call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE)
return;
- if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+ if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY)
return;
rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
@@ -250,47 +249,34 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
* This occurs when we get an ACKALL packet, the first DATA packet of a reply,
* or a final ACK packet.
*/
-static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
- const char *abort_why)
+static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
+ enum rxrpc_abort_reason abort_why)
{
- unsigned int state;
-
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
- write_lock(&call->state_lock);
-
- state = call->state;
- switch (state) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
- if (reply_begun)
- call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY;
- else
- call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ if (reply_begun) {
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_RECV_REPLY);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
+ break;
+ }
+
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply);
break;
case RXRPC_CALL_SERVER_AWAIT_ACK:
- __rxrpc_call_completed(call);
- state = call->state;
+ rxrpc_call_completed(call);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
break;
default:
- goto bad_state;
+ kdebug("end_tx %s", rxrpc_call_states[__rxrpc_call_state(call)]);
+ rxrpc_proto_abort(call, call->tx_top, abort_why);
+ break;
}
-
- write_unlock(&call->state_lock);
- if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
- trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply);
- else
- trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
- _leave(" = ok");
- return true;
-
-bad_state:
- write_unlock(&call->state_lock);
- kdebug("end_tx %s", rxrpc_call_states[call->state]);
- rxrpc_proto_abort(abort_why, call, call->tx_top);
- return false;
}
/*
@@ -305,18 +291,48 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
if (call->ackr_reason) {
now = jiffies;
timo = now + MAX_JIFFY_OFFSET;
- WRITE_ONCE(call->resend_at, timo);
+
WRITE_ONCE(call->delay_ack_at, timo);
trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
if (!rxrpc_rotate_tx_window(call, top, &summary)) {
- rxrpc_proto_abort("TXL", call, top);
+ rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply);
return false;
}
}
- return rxrpc_end_tx_phase(call, true, "ETD");
+
+ rxrpc_end_tx_phase(call, true, rxrpc_eproto_unexpected_reply);
+ return true;
+}
+
+/*
+ * End the packet reception phase.
+ */
+static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
+{
+ rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq);
+
+ _enter("%d,%s", call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)]);
+
+ trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh);
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
+ rxrpc_call_completed(call);
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST);
+ call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op);
+ break;
+
+ default:
+ break;
+ }
}
static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
@@ -337,8 +353,9 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
__skb_queue_tail(&call->recvmsg_queue, skb);
rxrpc_input_update_ack_window(call, window, wtop);
-
trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq);
+ if (last)
+ rxrpc_end_rx_phase(call, sp->hdr.serial);
}
/*
@@ -366,17 +383,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
if (last) {
if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
- seq + 1 != wtop) {
- rxrpc_proto_abort("LSN", call, seq);
- return;
- }
+ seq + 1 != wtop)
+ return rxrpc_proto_abort(call, seq, rxrpc_eproto_different_last);
} else {
if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
after_eq(seq, wtop)) {
pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n",
call->debug_id, seq, window, wtop, wlimit);
- rxrpc_proto_abort("LSA", call, seq);
- return;
+ return rxrpc_proto_abort(call, seq, rxrpc_eproto_data_after_last);
}
}
@@ -550,7 +564,6 @@ protocol_error:
static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- enum rxrpc_call_state state;
rxrpc_serial_t serial = sp->hdr.serial;
rxrpc_seq_t seq0 = sp->hdr.seq;
@@ -558,11 +571,20 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
atomic64_read(&call->ackr_window), call->rx_highest_seq,
skb->len, seq0);
- state = READ_ONCE(call->state);
- if (state >= RXRPC_CALL_COMPLETE)
+ if (__rxrpc_call_is_complete(call))
return;
- if (state == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ /* Received data implicitly ACKs all of the request
+ * packets we sent when we're acting as a client.
+ */
+ if (!rxrpc_receiving_reply(call))
+ goto out_notify;
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST: {
unsigned long timo = READ_ONCE(call->next_req_timo);
unsigned long now, expect_req_by;
@@ -573,18 +595,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_reduce_call_timer(call, expect_req_by, now,
rxrpc_timer_set_for_idle);
}
+ break;
}
- /* Received data implicitly ACKs all of the request packets we sent
- * when we're acting as a client.
- */
- if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
- state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
- !rxrpc_receiving_reply(call))
- goto out_notify;
+ default:
+ break;
+ }
if (!rxrpc_input_split_jumbo(call, skb)) {
- rxrpc_proto_abort("VLD", call, sp->hdr.seq);
+ rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo);
goto out_notify;
}
skb = NULL;
@@ -765,7 +784,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
offset = sizeof(struct rxrpc_wire_header);
if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
- return rxrpc_proto_abort("XAK", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack);
offset += sizeof(ack);
ack_serial = sp->hdr.serial;
@@ -845,7 +864,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
ioffset = offset + nr_acks + 3;
if (skb->len >= ioffset + sizeof(info) &&
skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
- return rxrpc_proto_abort("XAI", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info);
if (nr_acks > 0)
skb_condense(skb);
@@ -868,10 +887,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_input_ackinfo(call, skb, &info);
if (first_soft_ack == 0)
- return rxrpc_proto_abort("AK0", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
/* Ignore ACKs unless we are or have just been transmitting. */
- switch (READ_ONCE(call->state)) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -883,20 +902,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (before(hard_ack, call->acks_hard_ack) ||
after(hard_ack, call->tx_top))
- return rxrpc_proto_abort("AKW", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window);
if (nr_acks > call->tx_top - hard_ack)
- return rxrpc_proto_abort("AKN", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow);
if (after(hard_ack, call->acks_hard_ack)) {
if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
- rxrpc_end_tx_phase(call, false, "ETA");
+ rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
return;
}
}
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
- return rxrpc_proto_abort("XSA", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
nr_acks, &summary);
}
@@ -918,7 +937,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_ack_summary summary = { 0 };
if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
- rxrpc_end_tx_phase(call, false, "ETL");
+ rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall);
}
/*
@@ -963,27 +982,23 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
- rxrpc_input_data(call, skb);
- break;
+ return rxrpc_input_data(call, skb);
case RXRPC_PACKET_TYPE_ACK:
- rxrpc_input_ack(call, skb);
- break;
+ return rxrpc_input_ack(call, skb);
case RXRPC_PACKET_TYPE_BUSY:
/* Just ignore BUSY packets from the server; the retry and
* lifespan timers will take care of business. BUSY packets
* from the client don't make sense.
*/
- break;
+ return;
case RXRPC_PACKET_TYPE_ABORT:
- rxrpc_input_abort(call, skb);
- break;
+ return rxrpc_input_abort(call, skb);
case RXRPC_PACKET_TYPE_ACKALL:
- rxrpc_input_ackall(call, skb);
- break;
+ return rxrpc_input_ackall(call, skb);
default:
break;
@@ -998,24 +1013,18 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
*/
void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
{
- struct rxrpc_connection *conn = call->conn;
-
- switch (READ_ONCE(call->state)) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
rxrpc_call_completed(call);
fallthrough;
case RXRPC_CALL_COMPLETE:
break;
default:
- if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN))
- rxrpc_send_abort_packet(call);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ESHUTDOWN,
+ rxrpc_eproto_improper_term);
trace_rxrpc_improper_term(call);
break;
}
rxrpc_input_call_event(call, skb);
-
- spin_lock(&conn->bundle->channel_lock);
- __rxrpc_disconnect_call(conn, call);
- spin_unlock(&conn->bundle->channel_lock);
}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 0eb8471bfc53..34353b6e584b 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -43,25 +43,17 @@ static void none_free_call_crypto(struct rxrpc_call *call)
}
static int none_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("chall_none"));
- return -EPROTO;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_challenge);
}
static int none_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("resp_none"));
- return -EPROTO;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_response);
}
static void none_clear(struct rxrpc_connection *conn)
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 1ad067d66fb6..9e9dfb2fc559 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -67,9 +67,31 @@ void rxrpc_error_report(struct sock *sk)
}
/*
+ * Directly produce an abort from a packet.
+ */
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ abort_code, err);
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+ skb->priority = abort_code;
+ return false;
+}
+
+static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+ return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG);
+}
+
+#define just_discard true
+
+/*
* Process event packets targeted at a local endpoint.
*/
-static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
+static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
char v;
@@ -81,22 +103,21 @@ static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
if (v == 0)
rxrpc_send_version_request(local, &sp->hdr, skb);
}
+
+ return true;
}
/*
* Extract the wire header from a packet and translate the byte order.
*/
-static noinline
-int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
+static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
+ struct sk_buff *skb)
{
struct rxrpc_wire_header whdr;
/* dig out the RxRPC connection details */
- if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_hdr"));
- return -EBADMSG;
- }
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr);
memset(sp, 0, sizeof(*sp));
sp->hdr.epoch = ntohl(whdr.epoch);
@@ -110,7 +131,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
sp->hdr.securityIndex = whdr.securityIndex;
sp->hdr._rsvd = ntohs(whdr._rsvd);
sp->hdr.serviceId = ntohs(whdr.serviceId);
- return 0;
+ return true;
}
/*
@@ -130,28 +151,28 @@ static bool rxrpc_extract_abort(struct sk_buff *skb)
/*
* Process packets received on the local endpoint
*/
-static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
+static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
{
struct rxrpc_connection *conn;
struct sockaddr_rxrpc peer_srx;
struct rxrpc_skb_priv *sp;
struct rxrpc_peer *peer = NULL;
struct sk_buff *skb = *_skb;
- int ret = 0;
+ bool ret = false;
skb_pull(skb, sizeof(struct udphdr));
sp = rxrpc_skb(skb);
/* dig out the RxRPC connection details */
- if (rxrpc_extract_header(sp, skb) < 0)
- goto bad_message;
+ if (!rxrpc_extract_header(sp, skb))
+ return just_discard;
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
if ((lose++ & 7) == 7) {
trace_rxrpc_rx_lose(sp);
- return 0;
+ return just_discard;
}
}
@@ -160,28 +181,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_VERSION:
if (rxrpc_to_client(sp))
- return 0;
- rxrpc_input_version(local, skb);
- return 0;
+ return just_discard;
+ return rxrpc_input_version(local, skb);
case RXRPC_PACKET_TYPE_BUSY:
if (rxrpc_to_server(sp))
- return 0;
+ return just_discard;
fallthrough;
case RXRPC_PACKET_TYPE_ACK:
case RXRPC_PACKET_TYPE_ACKALL:
if (sp->hdr.callNumber == 0)
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
break;
case RXRPC_PACKET_TYPE_ABORT:
if (!rxrpc_extract_abort(skb))
- return 0; /* Just discard if malformed */
+ return just_discard; /* Just discard if malformed */
break;
case RXRPC_PACKET_TYPE_DATA:
- if (sp->hdr.callNumber == 0 ||
- sp->hdr.seq == 0)
- goto bad_message;
+ if (sp->hdr.callNumber == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
+ if (sp->hdr.seq == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq);
/* Unshare the packet so that it can be modified for in-place
* decryption.
@@ -191,7 +212,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
if (!skb) {
rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem);
*_skb = NULL;
- return 0;
+ return just_discard;
}
if (skb != *_skb) {
@@ -205,28 +226,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
case RXRPC_PACKET_TYPE_CHALLENGE:
if (rxrpc_to_server(sp))
- return 0;
+ return just_discard;
break;
case RXRPC_PACKET_TYPE_RESPONSE:
if (rxrpc_to_client(sp))
- return 0;
+ return just_discard;
break;
/* Packet types 9-11 should just be ignored. */
case RXRPC_PACKET_TYPE_PARAMS:
case RXRPC_PACKET_TYPE_10:
case RXRPC_PACKET_TYPE_11:
- return 0;
+ return just_discard;
default:
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet);
}
if (sp->hdr.serviceId == 0)
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service);
if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0))
- return true; /* Unsupported address type - discard. */
+ return just_discard; /* Unsupported address type. */
if (peer_srx.transport.family != local->srx.transport.family &&
(peer_srx.transport.family == AF_INET &&
@@ -234,7 +255,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
peer_srx.transport.family,
local->srx.transport.family);
- return true; /* Wrong address type - discard. */
+ return just_discard; /* Wrong address type. */
}
if (rxrpc_to_client(sp)) {
@@ -242,12 +263,8 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb);
conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
rcu_read_unlock();
- if (!conn) {
- trace_rxrpc_abort(0, "NCC", sp->hdr.cid,
- sp->hdr.callNumber, sp->hdr.seq,
- RXKADINCONSISTENCY, EBADMSG);
- goto protocol_error;
- }
+ if (!conn)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn);
ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
@@ -280,19 +297,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb);
rxrpc_put_peer(peer, rxrpc_peer_put_input);
- if (ret < 0)
- goto reject_packet;
- return 0;
-
-bad_message:
- trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
- skb->priority = RX_PROTOCOL_ERROR;
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
- rxrpc_reject_packet(local, skb);
- return 0;
+ return ret;
}
/*
@@ -306,21 +311,23 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
struct rxrpc_channel *chan;
struct rxrpc_call *call = NULL;
unsigned int channel;
+ bool ret;
if (sp->hdr.securityIndex != conn->security_ix)
- goto wrong_security;
+ return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security,
+ RXKADINCONSISTENCY, -EBADMSG);
if (sp->hdr.serviceId != conn->service_id) {
int old_id;
if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
- goto reupgrade;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade);
+
old_id = cmpxchg(&conn->service_id, conn->orig_service_id,
sp->hdr.serviceId);
-
if (old_id != conn->orig_service_id &&
old_id != sp->hdr.serviceId)
- goto reupgrade;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade);
}
if (after(sp->hdr.serial, conn->hi_serial))
@@ -336,19 +343,19 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
/* Ignore really old calls */
if (sp->hdr.callNumber < chan->last_call)
- return 0;
+ return just_discard;
if (sp->hdr.callNumber == chan->last_call) {
if (chan->call ||
sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
- return 0;
+ return just_discard;
/* For the previous service call, if completed successfully, we
* discard all further packets.
*/
if (rxrpc_conn_is_service(conn) &&
chan->last_type == RXRPC_PACKET_TYPE_ACK)
- return 0;
+ return just_discard;
/* But otherwise we need to retransmit the final packet from
* data cached in the connection record.
@@ -358,19 +365,17 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
sp->hdr.seq,
sp->hdr.serial,
sp->hdr.flags);
- rxrpc_input_conn_packet(conn, skb);
- return 0;
+ rxrpc_conn_retransmit_call(conn, skb, channel);
+ return just_discard;
}
- rcu_read_lock();
- call = rxrpc_try_get_call(rcu_dereference(chan->call),
- rxrpc_call_get_input);
- rcu_read_unlock();
+ call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input);
if (sp->hdr.callNumber > chan->call_id) {
if (rxrpc_to_client(sp)) {
rxrpc_put_call(call, rxrpc_call_put_input);
- goto reject_packet;
+ return rxrpc_protocol_error(skb,
+ rxrpc_eproto_unexpected_implicit_end);
}
if (call) {
@@ -382,38 +387,14 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
if (!call) {
if (rxrpc_to_client(sp))
- goto bad_message;
- if (rxrpc_new_incoming_call(conn->local, conn->peer, conn,
- peer_srx, skb) == 0)
- return 0;
- goto reject_packet;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call);
+ return rxrpc_new_incoming_call(conn->local, conn->peer, conn,
+ peer_srx, skb);
}
- rxrpc_input_call_event(call, skb);
+ ret = rxrpc_input_call_event(call, skb);
rxrpc_put_call(call, rxrpc_call_put_input);
- return 0;
-
-wrong_security:
- trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RXKADINCONSISTENCY, EBADMSG);
- skb->priority = RXKADINCONSISTENCY;
- goto post_abort;
-
-reupgrade:
- trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
- goto protocol_error;
-
-bad_message:
- trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
- skb->priority = RX_PROTOCOL_ERROR;
-post_abort:
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
- rxrpc_reject_packet(conn->local, skb);
- return 0;
+ return ret;
}
/*
@@ -421,6 +402,7 @@ reject_packet:
*/
int rxrpc_io_thread(void *data)
{
+ struct rxrpc_connection *conn;
struct sk_buff_head rx_queue;
struct rxrpc_local *local = data;
struct rxrpc_call *call;
@@ -436,6 +418,24 @@ int rxrpc_io_thread(void *data)
for (;;) {
rxrpc_inc_stat(local->rxnet, stat_io_loop);
+ /* Deal with connections that want immediate attention. */
+ conn = list_first_entry_or_null(&local->conn_attend_q,
+ struct rxrpc_connection,
+ attend_link);
+ if (conn) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&conn->attend_link);
+ spin_unlock_bh(&local->lock);
+
+ rxrpc_input_conn_event(conn, NULL);
+ rxrpc_put_connection(conn, rxrpc_conn_put_poke);
+ continue;
+ }
+
+ if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags))
+ rxrpc_discard_expired_client_conns(local);
+
/* Deal with calls that want immediate attention. */
if ((call = list_first_entry_or_null(&local->call_attend_q,
struct rxrpc_call,
@@ -450,12 +450,17 @@ int rxrpc_io_thread(void *data)
continue;
}
+ if (!list_empty(&local->new_client_calls))
+ rxrpc_connect_client_calls(local);
+
/* Process received packets and errors. */
if ((skb = __skb_dequeue(&rx_queue))) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
switch (skb->mark) {
case RXRPC_SKB_MARK_PACKET:
skb->priority = 0;
- rxrpc_input_packet(local, &skb);
+ if (!rxrpc_input_packet(local, &skb))
+ rxrpc_reject_packet(local, skb);
trace_rxrpc_rx_done(skb->mark, skb->priority);
rxrpc_free_skb(skb, rxrpc_skb_put_input);
break;
@@ -463,6 +468,11 @@ int rxrpc_io_thread(void *data)
rxrpc_input_error(local, skb);
rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
break;
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ rxrpc_input_conn_event(sp->conn, skb);
+ rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke);
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured);
+ break;
default:
WARN_ON_ONCE(1);
rxrpc_free_skb(skb, rxrpc_skb_put_unknown);
@@ -481,7 +491,11 @@ int rxrpc_io_thread(void *data)
set_current_state(TASK_INTERRUPTIBLE);
should_stop = kthread_should_stop();
if (!skb_queue_empty(&local->rx_queue) ||
- !list_empty(&local->call_attend_q)) {
+ !list_empty(&local->call_attend_q) ||
+ !list_empty(&local->conn_attend_q) ||
+ !list_empty(&local->new_client_calls) ||
+ test_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags)) {
__set_current_state(TASK_RUNNING);
continue;
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 270b63d8f37a..b8eaca5d9f22 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -82,31 +82,59 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
}
}
+static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_local *local =
+ container_of(timer, struct rxrpc_local, client_conn_reap_timer);
+
+ if (local->kill_all_client_conns &&
+ test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags))
+ rxrpc_wake_up_io_thread(local);
+}
+
/*
* Allocate a new local endpoint.
*/
-static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
+static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
const struct sockaddr_rxrpc *srx)
{
struct rxrpc_local *local;
+ u32 tmp;
local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
if (local) {
refcount_set(&local->ref, 1);
atomic_set(&local->active_users, 1);
- local->rxnet = rxnet;
+ local->net = net;
+ local->rxnet = rxrpc_net(net);
INIT_HLIST_NODE(&local->link);
init_rwsem(&local->defrag_sem);
init_completion(&local->io_thread_ready);
skb_queue_head_init(&local->rx_queue);
+ INIT_LIST_HEAD(&local->conn_attend_q);
INIT_LIST_HEAD(&local->call_attend_q);
+
local->client_bundles = RB_ROOT;
spin_lock_init(&local->client_bundles_lock);
+ local->kill_all_client_conns = false;
+ INIT_LIST_HEAD(&local->idle_client_conns);
+ timer_setup(&local->client_conn_reap_timer,
+ rxrpc_client_conn_reap_timeout, 0);
+
spin_lock_init(&local->lock);
rwlock_init(&local->services_lock);
local->debug_id = atomic_inc_return(&rxrpc_debug_id);
memcpy(&local->srx, srx, sizeof(*srx));
local->srx.srx_service = 0;
+ idr_init(&local->conn_ids);
+ get_random_bytes(&tmp, sizeof(tmp));
+ tmp &= 0x3fffffff;
+ if (tmp == 0)
+ tmp = 1;
+ idr_set_cursor(&local->conn_ids, tmp);
+ INIT_LIST_HEAD(&local->new_client_calls);
+ spin_lock_init(&local->client_call_lock);
+
trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1);
}
@@ -248,7 +276,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
goto found;
}
- local = rxrpc_alloc_local(rxnet, srx);
+ local = rxrpc_alloc_local(net, srx);
if (!local)
goto nomem;
@@ -407,6 +435,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
* local endpoint.
*/
rxrpc_purge_queue(&local->rx_queue);
+ rxrpc_purge_client_connections(local);
}
/*
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 5905530e2f33..a0319c040c25 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -10,15 +10,6 @@
unsigned int rxrpc_net_id;
-static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
-{
- struct rxrpc_net *rxnet =
- container_of(timer, struct rxrpc_net, client_conn_reap_timer);
-
- if (rxnet->live)
- rxrpc_queue_work(&rxnet->client_conn_reaper);
-}
-
static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
{
struct rxrpc_net *rxnet =
@@ -63,14 +54,6 @@ static __net_init int rxrpc_init_net(struct net *net)
rxrpc_service_conn_reap_timeout, 0);
atomic_set(&rxnet->nr_client_conns, 0);
- rxnet->kill_all_client_conns = false;
- spin_lock_init(&rxnet->client_conn_cache_lock);
- mutex_init(&rxnet->client_conn_discard_lock);
- INIT_LIST_HEAD(&rxnet->idle_client_conns);
- INIT_WORK(&rxnet->client_conn_reaper,
- rxrpc_discard_expired_client_conns);
- timer_setup(&rxnet->client_conn_reap_timer,
- rxrpc_client_conn_reap_timeout, 0);
INIT_HLIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 3d8c9f830ee0..a9746be29634 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -261,7 +261,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_tx_point_call_ack);
rxrpc_tx_backoff(call, ret);
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
if (ret < 0)
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
rxrpc_set_keepalive(call);
@@ -545,6 +545,62 @@ send_fragmentable:
}
/*
+ * Transmit a connection-level abort.
+ */
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
+{
+ struct rxrpc_wire_header whdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ __be32 word;
+ size_t len;
+ u32 serial;
+ int ret;
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(conn->proto.epoch);
+ whdr.cid = htonl(conn->proto.cid);
+ whdr.callNumber = 0;
+ whdr.seq = 0;
+ whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ whdr.flags = conn->out_clientflag;
+ whdr.userStatus = 0;
+ whdr.securityIndex = conn->security_ix;
+ whdr._rsvd = 0;
+ whdr.serviceId = htons(conn->service_id);
+
+ word = htonl(conn->abort_code);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = &word;
+ iov[1].iov_len = sizeof(word);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ serial = atomic_inc_return(&conn->serial);
+ whdr.serial = htonl(serial);
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ if (ret < 0) {
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_point_conn_abort);
+ _debug("sendmsg failed: %d", ret);
+ return;
+ }
+
+ trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
+
+ conn->peer->last_tx_at = ktime_get_seconds();
+}
+
+/*
* Reject a packet through the local endpoint.
*/
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
@@ -667,7 +723,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
static inline void rxrpc_instant_resend(struct rxrpc_call *call,
struct rxrpc_txbuf *txb)
{
- if (call->state < RXRPC_CALL_COMPLETE)
+ if (!__rxrpc_call_is_complete(call))
kdebug("resend");
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 4eecea2be307..8d7a715a0bb1 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -147,10 +147,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
* assess the MTU size for the network interface through which this peer is
* reached
*/
-static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
+static void rxrpc_assess_MTU_size(struct rxrpc_local *local,
struct rxrpc_peer *peer)
{
- struct net *net = sock_net(&rx->sk);
+ struct net *net = local->net;
struct dst_entry *dst;
struct rtable *rt;
struct flowi fl;
@@ -236,11 +236,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp,
/*
* Initialise peer record.
*/
-static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer,
+static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer,
unsigned long hash_key)
{
peer->hash_key = hash_key;
- rxrpc_assess_MTU_size(rx, peer);
+ rxrpc_assess_MTU_size(local, peer);
peer->mtu = peer->if_mtu;
peer->rtt_last_req = ktime_get_real();
@@ -272,8 +272,7 @@ static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer,
/*
* Set up a new peer.
*/
-static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
- struct rxrpc_local *local,
+static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx,
unsigned long hash_key,
gfp_t gfp)
@@ -285,7 +284,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client);
if (peer) {
memcpy(&peer->srx, srx, sizeof(*srx));
- rxrpc_init_peer(rx, peer, hash_key);
+ rxrpc_init_peer(local, peer, hash_key);
}
_leave(" = %p", peer);
@@ -304,14 +303,13 @@ static void rxrpc_free_peer(struct rxrpc_peer *peer)
* since we've already done a search in the list from the non-reentrant context
* (the data_ready handler) that is the only place we can add new peers.
*/
-void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
- struct rxrpc_peer *peer)
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
{
struct rxrpc_net *rxnet = local->rxnet;
unsigned long hash_key;
hash_key = rxrpc_peer_hash_key(local, &peer->srx);
- rxrpc_init_peer(rx, peer, hash_key);
+ rxrpc_init_peer(local, peer, hash_key);
spin_lock(&rxnet->peer_hash_lock);
hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
@@ -322,8 +320,7 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
/*
* obtain a remote transport endpoint for the specified address
*/
-struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
- struct rxrpc_local *local,
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx, gfp_t gfp)
{
struct rxrpc_peer *peer, *candidate;
@@ -343,7 +340,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
/* The peer is not yet present in hash - create a candidate
* for a new record and then redo the search.
*/
- candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp);
+ candidate = rxrpc_create_peer(local, srx, hash_key, gfp);
if (!candidate) {
_leave(" = NULL [nomem]");
return NULL;
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 3a59591ec061..750158a085cd 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -12,13 +12,13 @@
static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
[RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT_UNSECURED] = "ClUnsec ",
[RXRPC_CONN_CLIENT] = "Client ",
[RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc",
[RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
[RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
[RXRPC_CONN_SERVICE] = "SvSecure",
- [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
- [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CONN_ABORTED] = "Aborted ",
};
/*
@@ -51,6 +51,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_local *local;
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ enum rxrpc_call_state state;
unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
@@ -75,7 +76,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
- if (call->state != RXRPC_CALL_SERVER_PREALLOC) {
+ state = rxrpc_call_state(call);
+ if (state != RXRPC_CALL_SERVER_PREALLOC) {
timeout = READ_ONCE(call->expect_rx_by);
timeout -= jiffies;
}
@@ -92,7 +94,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
refcount_read(&call->ref),
- rxrpc_call_states[call->state],
+ rxrpc_call_states[state],
call->abort_code,
call->debug_id,
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
@@ -143,6 +145,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_connection *conn;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ const char *state;
char lbuff[50], rbuff[50];
if (v == &rxnet->conn_proc_list) {
@@ -163,9 +166,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
}
sprintf(lbuff, "%pISpc", &conn->local->srx.transport);
-
sprintf(rbuff, "%pISpc", &conn->peer->srx.transport);
print:
+ state = rxrpc_is_conn_aborted(conn) ?
+ rxrpc_call_completions[conn->completion] :
+ rxrpc_conn_states[conn->state];
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %s %3u %3d"
" %s %08x %08x %08x %08x %08x %08x %08x\n",
@@ -176,7 +181,7 @@ print:
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
refcount_read(&conn->ref),
atomic_read(&conn->active),
- rxrpc_conn_states[conn->state],
+ state,
key_serial(conn->key),
atomic_read(&conn->serial),
conn->hi_serial,
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 36b25d003cf0..dd54ceee7bcc 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -59,85 +59,6 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
}
/*
- * Transition a call to the complete state.
- */
-bool __rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- if (call->state < RXRPC_CALL_COMPLETE) {
- call->abort_code = abort_code;
- call->error = error;
- call->completion = compl;
- call->state = RXRPC_CALL_COMPLETE;
- trace_rxrpc_call_complete(call);
- wake_up(&call->waitq);
- rxrpc_notify_socket(call);
- return true;
- }
- return false;
-}
-
-bool rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- bool ret = false;
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock(&call->state_lock);
- ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
- write_unlock(&call->state_lock);
- }
- return ret;
-}
-
-/*
- * Record that a call successfully completed.
- */
-bool __rxrpc_call_completed(struct rxrpc_call *call)
-{
- return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
-}
-
-bool rxrpc_call_completed(struct rxrpc_call *call)
-{
- bool ret = false;
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock(&call->state_lock);
- ret = __rxrpc_call_completed(call);
- write_unlock(&call->state_lock);
- }
- return ret;
-}
-
-/*
- * Record that a call is locally aborted.
- */
-bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq, u32 abort_code, int error)
-{
- trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
- abort_code, error);
- return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
- abort_code, error);
-}
-
-bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq, u32 abort_code, int error)
-{
- bool ret;
-
- write_lock(&call->state_lock);
- ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
- write_unlock(&call->state_lock);
- return ret;
-}
-
-/*
* Pass a call terminating message to userspace.
*/
static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
@@ -168,7 +89,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
break;
default:
- pr_err("Invalid terminal call state %u\n", call->state);
+ pr_err("Invalid terminal call state %u\n", call->completion);
BUG();
break;
}
@@ -180,41 +101,6 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
}
/*
- * End the packet reception phase.
- */
-static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
-{
- rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq);
-
- _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]);
-
- trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh);
-
- if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY)
- rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
-
- write_lock(&call->state_lock);
-
- switch (call->state) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- __rxrpc_call_completed(call);
- write_unlock(&call->state_lock);
- break;
-
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
- call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
- write_unlock(&call->state_lock);
- rxrpc_propose_delay_ACK(call, serial,
- rxrpc_propose_ack_processing_op);
- break;
- default:
- write_unlock(&call->state_lock);
- break;
- }
-}
-
-/*
* Discard a packet we've used up and advance the Rx window by one.
*/
static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
@@ -244,10 +130,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate,
serial, call->rx_consumed);
- if (last) {
- rxrpc_end_rx_phase(call, serial);
- return;
- }
+
+ if (last)
+ set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags);
/* Check to see if there's an ACK that needs sending. */
acked = atomic_add_return(call->rx_consumed - old_consumed,
@@ -272,7 +157,8 @@ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
/*
* Deliver messages to a call. This keeps processing packets until the buffer
* is filled and we find either more DATA (returns 0) or the end of the DATA
- * (returns 1). If more packets are required, it returns -EAGAIN.
+ * (returns 1). If more packets are required, it returns -EAGAIN and if the
+ * call has failed it returns -EIO.
*/
static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, struct iov_iter *iter,
@@ -288,7 +174,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
rx_pkt_offset = call->rx_pkt_offset;
rx_pkt_len = call->rx_pkt_len;
- if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
+ if (rxrpc_call_has_failed(call)) {
+ seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ ret = -EIO;
+ goto done;
+ }
+
+ if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) {
seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
ret = 1;
goto done;
@@ -312,14 +204,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (rx_pkt_offset == 0) {
ret2 = rxrpc_verify_data(call, skb);
- rx_pkt_offset = sp->offset;
- rx_pkt_len = sp->len;
trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
- rx_pkt_offset, rx_pkt_len, ret2);
+ sp->offset, sp->len, ret2);
if (ret2 < 0) {
+ kdebug("verify = %d", ret2);
ret = ret2;
goto out;
}
+ rx_pkt_offset = sp->offset;
+ rx_pkt_len = sp->len;
} else {
trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
rx_pkt_offset, rx_pkt_len, 0);
@@ -388,13 +281,14 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
struct list_head *l;
+ unsigned int call_debug_id = 0;
size_t copied = 0;
long timeo;
int ret;
DEFINE_WAIT(wait);
- trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0);
+ trace_rxrpc_recvmsg(0, rxrpc_recvmsg_enter, 0);
if (flags & (MSG_OOB | MSG_TRUNC))
return -EOPNOTSUPP;
@@ -431,7 +325,7 @@ try_again:
if (list_empty(&rx->recvmsg_q)) {
if (signal_pending(current))
goto wait_interrupted;
- trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, 0);
+ trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0);
timeo = schedule_timeout(timeo);
}
finish_wait(sk_sleep(&rx->sk), &wait);
@@ -450,7 +344,8 @@ try_again:
rxrpc_get_call(call, rxrpc_call_get_recvmsg);
write_unlock(&rx->recvmsg_lock);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0);
+ call_debug_id = call->debug_id;
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
/* We're going to drop the socket lock, so we need to lock the call
* against interference by sendmsg.
@@ -492,36 +387,36 @@ try_again:
msg->msg_namelen = len;
}
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
- flags, &copied);
- if (ret == -EAGAIN)
- ret = 0;
-
- if (!skb_queue_empty(&call->recvmsg_queue))
- rxrpc_notify_socket(call);
- break;
- default:
+ ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
+ flags, &copied);
+ if (ret == -EAGAIN)
ret = 0;
- break;
- }
-
+ if (ret == -EIO)
+ goto call_failed;
if (ret < 0)
goto error_unlock_call;
- if (call->state == RXRPC_CALL_COMPLETE) {
- ret = rxrpc_recvmsg_term(call, msg);
- if (ret < 0)
- goto error_unlock_call;
- if (!(flags & MSG_PEEK))
- rxrpc_release_call(rx, call);
- msg->msg_flags |= MSG_EOR;
- ret = 1;
- }
+ if (rxrpc_call_is_complete(call) &&
+ skb_queue_empty(&call->recvmsg_queue))
+ goto call_complete;
+ if (rxrpc_call_has_failed(call))
+ goto call_failed;
+ rxrpc_notify_socket(call);
+ goto not_yet_complete;
+
+call_failed:
+ rxrpc_purge_queue(&call->recvmsg_queue);
+call_complete:
+ ret = rxrpc_recvmsg_term(call, msg);
+ if (ret < 0)
+ goto error_unlock_call;
+ if (!(flags & MSG_PEEK))
+ rxrpc_release_call(rx, call);
+ msg->msg_flags |= MSG_EOR;
+ ret = 1;
+
+not_yet_complete:
if (ret == 0)
msg->msg_flags |= MSG_MORE;
else
@@ -531,7 +426,7 @@ try_again:
error_unlock_call:
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
return ret;
error_requeue_call:
@@ -539,14 +434,14 @@ error_requeue_call:
write_lock(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
write_unlock(&rx->recvmsg_lock);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
} else {
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
}
error_no_call:
release_sock(&rx->sk);
error_trace:
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
return ret;
wait_interrupted:
@@ -584,49 +479,34 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
size_t offset = 0;
int ret;
- _enter("{%d,%s},%zu,%d",
- call->debug_id, rxrpc_call_states[call->state],
- *_len, want_more);
-
- ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING);
+ _enter("{%d},%zu,%d", call->debug_id, *_len, want_more);
mutex_lock(&call->user_mutex);
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- ret = rxrpc_recvmsg_data(sock, call, NULL, iter,
- *_len, 0, &offset);
- *_len -= offset;
- if (ret < 0)
- goto out;
-
- /* We can only reach here with a partially full buffer if we
- * have reached the end of the data. We must otherwise have a
- * full buffer or have been given -EAGAIN.
- */
- if (ret == 1) {
- if (iov_iter_count(iter) > 0)
- goto short_data;
- if (!want_more)
- goto read_phase_complete;
- ret = 0;
- goto out;
- }
-
- if (!want_more)
- goto excess_data;
+ ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset);
+ *_len -= offset;
+ if (ret == -EIO)
+ goto call_failed;
+ if (ret < 0)
goto out;
- case RXRPC_CALL_COMPLETE:
- goto call_complete;
-
- default:
- ret = -EINPROGRESS;
+ /* We can only reach here with a partially full buffer if we have
+ * reached the end of the data. We must otherwise have a full buffer
+ * or have been given -EAGAIN.
+ */
+ if (ret == 1) {
+ if (iov_iter_count(iter) > 0)
+ goto short_data;
+ if (!want_more)
+ goto read_phase_complete;
+ ret = 0;
goto out;
}
+ if (!want_more)
+ goto excess_data;
+ goto out;
+
read_phase_complete:
ret = 1;
out:
@@ -637,14 +517,18 @@ out:
return ret;
short_data:
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data"));
+ trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EBADMSG);
ret = -EBADMSG;
goto out;
excess_data:
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data"));
+ trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EMSGSIZE);
ret = -EMSGSIZE;
goto out;
-call_complete:
+call_failed:
*_abort = call->abort_code;
ret = call->error;
if (call->completion == RXRPC_CALL_SUCCEEDED) {
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index d1233720e05f..1bf571a66e02 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -411,18 +411,15 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
struct scatterlist sg[16];
- bool aborted;
u32 data_size, buf;
u16 check;
int ret;
_enter("");
- if (sp->len < 8) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (sp->len < 8)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_1_short_header);
/* Decrypt the skbuff in-place. TODO: We really want to decrypt
* directly into the target buffer.
@@ -442,11 +439,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
skcipher_request_zero(req);
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_1_short_encdata);
sp->offset += sizeof(sechdr);
sp->len -= sizeof(sechdr);
@@ -456,26 +451,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
check = buf >> 16;
check ^= seq ^ call->call_id;
check &= 0xffff;
- if (check != 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
-
- if (data_size > sp->len) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (check != 0)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_1_short_check);
+ if (data_size > sp->len)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_1_short_data);
sp->len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
}
/*
@@ -490,18 +475,15 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
- bool aborted;
u32 data_size, buf;
u16 check;
int nsg, ret;
_enter(",{%d}", sp->len);
- if (sp->len < 8) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (sp->len < 8)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_2_short_header);
/* Decrypt the skbuff in-place. TODO: We really want to decrypt
* directly into the target buffer.
@@ -513,7 +495,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
} else {
sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
if (!sg)
- goto nomem;
+ return -ENOMEM;
}
sg_init_table(sg, nsg);
@@ -537,11 +519,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
kfree(sg);
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_2_short_len);
sp->offset += sizeof(sechdr);
sp->len -= sizeof(sechdr);
@@ -551,30 +531,17 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
check = buf >> 16;
check ^= seq ^ call->call_id;
check &= 0xffff;
- if (check != 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (check != 0)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_2_short_check);
- if (data_size > sp->len) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (data_size > sp->len)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_2_short_data);
sp->len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
-
-nomem:
- _leave(" = -ENOMEM");
- return -ENOMEM;
}
/*
@@ -590,7 +557,6 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
__be32 buf[2];
} crypto __aligned(8);
rxrpc_seq_t seq = sp->hdr.seq;
- bool aborted;
int ret;
u16 cksum;
u32 x, y;
@@ -627,9 +593,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
cksum = 1; /* zero checksums are not permitted */
if (cksum != sp->hdr.cksum) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK",
- RXKADSEALEDINCON);
- goto protocol_error;
+ ret = rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_bad_checksum);
+ goto out;
}
switch (call->conn->security_level) {
@@ -647,13 +613,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
break;
}
+out:
skcipher_request_free(req);
return ret;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
}
/*
@@ -821,34 +783,30 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn,
* respond to a challenge packet
*/
static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
const struct rxrpc_key_token *token;
struct rxkad_challenge challenge;
struct rxkad_response *resp;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- const char *eproto;
- u32 version, nonce, min_level, abort_code;
- int ret;
+ u32 version, nonce, min_level;
+ int ret = -EPROTO;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
- eproto = tracepoint_string("chall_no_key");
- abort_code = RX_PROTOCOL_ERROR;
if (!conn->key)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxkad_abort_chall_no_key);
- abort_code = RXKADEXPIRED;
ret = key_validate(conn->key);
if (ret < 0)
- goto other_error;
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_chall_key_expired);
- eproto = tracepoint_string("chall_short");
- abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&challenge, sizeof(challenge)) < 0)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_chall_short);
version = ntohl(challenge.version);
nonce = ntohl(challenge.nonce);
@@ -856,15 +814,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level);
- eproto = tracepoint_string("chall_ver");
- abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_chall_version);
- abort_code = RXKADLEVELFAIL;
- ret = -EACCES;
if (conn->security_level < min_level)
- goto other_error;
+ return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES,
+ rxkad_abort_chall_level);
token = conn->key->payload.data[0];
@@ -893,13 +849,6 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
kfree(resp);
return ret;
-
-protocol_error:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
- ret = -EPROTO;
-other_error:
- *_abort_code = abort_code;
- return ret;
}
/*
@@ -910,20 +859,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
struct sk_buff *skb,
void *ticket, size_t ticket_len,
struct rxrpc_crypt *_session_key,
- time64_t *_expiry,
- u32 *_abort_code)
+ time64_t *_expiry)
{
struct skcipher_request *req;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv, key;
struct scatterlist sg[1];
struct in_addr addr;
unsigned int life;
- const char *eproto;
time64_t issue, now;
bool little_endian;
- int ret;
- u32 abort_code;
u8 *p, *q, *name, *end;
_enter("{%d},{%x}", conn->debug_id, key_serial(server_key));
@@ -935,10 +879,9 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
memcpy(&iv, &server_key->payload.data[2], sizeof(iv));
- ret = -ENOMEM;
req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS);
if (!req)
- goto temporary_error;
+ return -ENOMEM;
sg_init_one(&sg[0], ticket, ticket_len);
skcipher_request_set_callback(req, 0, NULL, NULL);
@@ -949,18 +892,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p = ticket;
end = p + ticket_len;
-#define Z(field) \
- ({ \
- u8 *__str = p; \
- eproto = tracepoint_string("rxkad_bad_"#field); \
- q = memchr(p, 0, end - p); \
- if (!q || q - p > (field##_SZ)) \
- goto bad_ticket; \
- for (; p < q; p++) \
- if (!isprint(*p)) \
- goto bad_ticket; \
- p++; \
- __str; \
+#define Z(field, fieldl) \
+ ({ \
+ u8 *__str = p; \
+ q = memchr(p, 0, end - p); \
+ if (!q || q - p > field##_SZ) \
+ return rxrpc_abort_conn( \
+ conn, skb, RXKADBADTICKET, -EPROTO, \
+ rxkad_abort_resp_tkt_##fieldl); \
+ for (; p < q; p++) \
+ if (!isprint(*p)) \
+ return rxrpc_abort_conn( \
+ conn, skb, RXKADBADTICKET, -EPROTO, \
+ rxkad_abort_resp_tkt_##fieldl); \
+ p++; \
+ __str; \
})
/* extract the ticket flags */
@@ -969,20 +915,20 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p++;
/* extract the authentication name */
- name = Z(ANAME);
+ name = Z(ANAME, aname);
_debug("KIV ANAME: %s", name);
/* extract the principal's instance */
- name = Z(INST);
+ name = Z(INST, inst);
_debug("KIV INST : %s", name);
/* extract the principal's authentication domain */
- name = Z(REALM);
+ name = Z(REALM, realm);
_debug("KIV REALM: %s", name);
- eproto = tracepoint_string("rxkad_bad_len");
if (end - p < 4 + 8 + 4 + 2)
- goto bad_ticket;
+ return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO,
+ rxkad_abort_resp_tkt_short);
/* get the IPv4 address of the entity that requested the ticket */
memcpy(&addr, p, sizeof(addr));
@@ -1014,38 +960,23 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
_debug("KIV ISSUE: %llx [%llx]", issue, now);
/* check the ticket is in date */
- if (issue > now) {
- abort_code = RXKADNOAUTH;
- ret = -EKEYREJECTED;
- goto other_error;
- }
-
- if (issue < now - life) {
- abort_code = RXKADEXPIRED;
- ret = -EKEYEXPIRED;
- goto other_error;
- }
+ if (issue > now)
+ return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, -EKEYREJECTED,
+ rxkad_abort_resp_tkt_future);
+ if (issue < now - life)
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, -EKEYEXPIRED,
+ rxkad_abort_resp_tkt_expired);
*_expiry = issue + life;
/* get the service name */
- name = Z(SNAME);
+ name = Z(SNAME, sname);
_debug("KIV SNAME: %s", name);
/* get the service instance name */
- name = Z(INST);
+ name = Z(INST, sinst);
_debug("KIV SINST: %s", name);
return 0;
-
-bad_ticket:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
- abort_code = RXKADBADTICKET;
- ret = -EPROTO;
-other_error:
- *_abort_code = abort_code;
- return ret;
-temporary_error:
- return ret;
}
/*
@@ -1086,17 +1017,15 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
* verify a response
*/
static int rxkad_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
struct rxkad_response *response;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
struct key *server_key;
- const char *eproto;
time64_t expiry;
void *ticket;
- u32 abort_code, version, kvno, ticket_len, level;
+ u32 version, kvno, ticket_len, level;
__be32 csum;
int ret, i;
@@ -1104,22 +1033,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
server_key = rxrpc_look_up_server_security(conn, skb, 0, 0);
if (IS_ERR(server_key)) {
- switch (PTR_ERR(server_key)) {
+ ret = PTR_ERR(server_key);
+ switch (ret) {
case -ENOKEY:
- abort_code = RXKADUNKNOWNKEY;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, ret,
+ rxkad_abort_resp_nokey);
case -EKEYEXPIRED:
- abort_code = RXKADEXPIRED;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_resp_key_expired);
default:
- abort_code = RXKADNOAUTH;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, ret,
+ rxkad_abort_resp_key_rejected);
}
- trace_rxrpc_abort(0, "SVK",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- abort_code, PTR_ERR(server_key));
- *_abort_code = abort_code;
- return -EPROTO;
}
ret = -ENOMEM;
@@ -1127,11 +1052,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (!response)
goto temporary_error;
- eproto = tracepoint_string("rxkad_rsp_short");
- abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- response, sizeof(*response)) < 0)
+ response, sizeof(*response)) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_resp_short);
goto protocol_error;
+ }
version = ntohl(response->version);
ticket_len = ntohl(response->ticket_len);
@@ -1139,20 +1065,23 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
- eproto = tracepoint_string("rxkad_rsp_ver");
- abort_code = RXKADINCONSISTENCY;
- if (version != RXKAD_VERSION)
+ if (version != RXKAD_VERSION) {
+ rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_resp_version);
goto protocol_error;
+ }
- eproto = tracepoint_string("rxkad_rsp_tktlen");
- abort_code = RXKADTICKETLEN;
- if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
+ if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) {
+ rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO,
+ rxkad_abort_resp_tkt_len);
goto protocol_error;
+ }
- eproto = tracepoint_string("rxkad_rsp_unkkey");
- abort_code = RXKADUNKNOWNKEY;
- if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
+ if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) {
+ rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO,
+ rxkad_abort_resp_unknown_tkt);
goto protocol_error;
+ }
/* extract the kerberos ticket and decrypt and decode it */
ret = -ENOMEM;
@@ -1160,15 +1089,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (!ticket)
goto temporary_error_free_resp;
- eproto = tracepoint_string("rxkad_tkt_short");
- abort_code = RXKADPACKETSHORT;
- ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
- ticket, ticket_len);
- if (ret < 0)
- goto temporary_error_free_ticket;
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
+ ticket, ticket_len) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_resp_short_tkt);
+ goto protocol_error;
+ }
ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len,
- &session_key, &expiry, _abort_code);
+ &session_key, &expiry);
if (ret < 0)
goto temporary_error_free_ticket;
@@ -1176,56 +1105,61 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
* response */
rxkad_decrypt_response(conn, response, &session_key);
- eproto = tracepoint_string("rxkad_rsp_param");
- abort_code = RXKADSEALEDINCON;
- if (ntohl(response->encrypted.epoch) != conn->proto.epoch)
- goto protocol_error_free;
- if (ntohl(response->encrypted.cid) != conn->proto.cid)
- goto protocol_error_free;
- if (ntohl(response->encrypted.securityIndex) != conn->security_ix)
+ if (ntohl(response->encrypted.epoch) != conn->proto.epoch ||
+ ntohl(response->encrypted.cid) != conn->proto.cid ||
+ ntohl(response->encrypted.securityIndex) != conn->security_ix) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_param);
goto protocol_error_free;
+ }
+
csum = response->encrypted.checksum;
response->encrypted.checksum = 0;
rxkad_calc_response_checksum(response);
- eproto = tracepoint_string("rxkad_rsp_csum");
- if (response->encrypted.checksum != csum)
+ if (response->encrypted.checksum != csum) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_checksum);
goto protocol_error_free;
+ }
- spin_lock(&conn->bundle->channel_lock);
for (i = 0; i < RXRPC_MAXCALLS; i++) {
- struct rxrpc_call *call;
u32 call_id = ntohl(response->encrypted.call_id[i]);
+ u32 counter = READ_ONCE(conn->channels[i].call_counter);
+
+ if (call_id > INT_MAX) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_callid);
+ goto protocol_error_free;
+ }
- eproto = tracepoint_string("rxkad_rsp_callid");
- if (call_id > INT_MAX)
- goto protocol_error_unlock;
-
- eproto = tracepoint_string("rxkad_rsp_callctr");
- if (call_id < conn->channels[i].call_counter)
- goto protocol_error_unlock;
-
- eproto = tracepoint_string("rxkad_rsp_callst");
- if (call_id > conn->channels[i].call_counter) {
- call = rcu_dereference_protected(
- conn->channels[i].call,
- lockdep_is_held(&conn->bundle->channel_lock));
- if (call && call->state < RXRPC_CALL_COMPLETE)
- goto protocol_error_unlock;
+ if (call_id < counter) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_call_ctr);
+ goto protocol_error_free;
+ }
+
+ if (call_id > counter) {
+ if (conn->channels[i].call) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_call_state);
+ goto protocol_error_free;
+ }
conn->channels[i].call_counter = call_id;
}
}
- spin_unlock(&conn->bundle->channel_lock);
- eproto = tracepoint_string("rxkad_rsp_seq");
- abort_code = RXKADOUTOFSEQUENCE;
- if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1)
+ if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) {
+ rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO,
+ rxkad_abort_resp_ooseq);
goto protocol_error_free;
+ }
- eproto = tracepoint_string("rxkad_rsp_level");
- abort_code = RXKADLEVELFAIL;
level = ntohl(response->encrypted.level);
- if (level > RXRPC_SECURITY_ENCRYPT)
+ if (level > RXRPC_SECURITY_ENCRYPT) {
+ rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO,
+ rxkad_abort_resp_level);
goto protocol_error_free;
+ }
conn->security_level = level;
/* create a key to hold the security data and expiration time - after
@@ -1240,15 +1174,11 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_leave(" = 0");
return 0;
-protocol_error_unlock:
- spin_unlock(&conn->bundle->channel_lock);
protocol_error_free:
kfree(ticket);
protocol_error:
kfree(response);
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
key_put(server_key);
- *_abort_code = abort_code;
return -EPROTO;
temporary_error_free_ticket:
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index d33a109e846c..16dcabb71ebe 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -10,6 +10,8 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
MODULE_DESCRIPTION("rxperf test server (afs)");
MODULE_AUTHOR("Red Hat, Inc.");
@@ -307,12 +309,14 @@ static void rxperf_deliver_to_call(struct work_struct *work)
case -EOPNOTSUPP:
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- abort_code, ret, "GOP");
+ abort_code, ret,
+ rxperf_abort_op_not_supported);
goto call_complete;
case -ENOTSUPP:
abort_code = RX_USER_ABORT;
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- abort_code, ret, "GUA");
+ abort_code, ret,
+ rxperf_abort_op_not_supported);
goto call_complete;
case -EIO:
pr_err("Call %u in bad state %u\n",
@@ -324,11 +328,13 @@ static void rxperf_deliver_to_call(struct work_struct *work)
case -ENOMEM:
case -EFAULT:
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RXGEN_SS_UNMARSHAL, ret, "GUM");
+ RXGEN_SS_UNMARSHAL, ret,
+ rxperf_abort_unmarshal_error);
goto call_complete;
default:
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RX_CALL_DEAD, ret, "GER");
+ RX_CALL_DEAD, ret,
+ rxperf_abort_general_error);
goto call_complete;
}
}
@@ -523,7 +529,8 @@ static int rxperf_process_call(struct rxperf_call *call)
if (n == -ENOMEM)
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RXGEN_SS_MARSHAL, -ENOMEM, "GOM");
+ RXGEN_SS_MARSHAL, -ENOMEM,
+ rxperf_abort_oom);
return n;
}
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index ab968f65a490..cb8dd1d3b1d4 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -97,38 +97,31 @@ found:
*/
int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
{
- const struct rxrpc_security *sec;
struct rxrpc_key_token *token;
struct key *key = conn->key;
- int ret;
+ int ret = 0;
_enter("{%d},{%x}", conn->debug_id, key_serial(key));
- if (!key)
- return 0;
-
- ret = key_validate(key);
- if (ret < 0)
- return ret;
-
for (token = key->payload.data[0]; token; token = token->next) {
- sec = rxrpc_security_lookup(token->security_index);
- if (sec)
+ if (token->security_index == conn->security->security_index)
goto found;
}
return -EKEYREJECTED;
found:
- conn->security = sec;
-
- ret = conn->security->init_connection_security(conn, token);
- if (ret < 0) {
- conn->security = &rxrpc_no_security;
- return ret;
+ mutex_lock(&conn->security_lock);
+ if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+ ret = conn->security->init_connection_security(conn, token);
+ if (ret == 0) {
+ spin_lock(&conn->state_lock);
+ if (conn->state == RXRPC_CONN_CLIENT_UNSECURED)
+ conn->state = RXRPC_CONN_CLIENT;
+ spin_unlock(&conn->state_lock);
+ }
}
-
- _leave(" = 0");
- return 0;
+ mutex_unlock(&conn->security_lock);
+ return ret;
}
/*
@@ -144,21 +137,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx,
sec = rxrpc_security_lookup(sp->hdr.securityIndex);
if (!sec) {
- trace_rxrpc_abort(0, "SVS",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EKEYREJECTED);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = RX_INVALID_OPERATION;
+ rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
return NULL;
}
if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE &&
!rx->securities) {
- trace_rxrpc_abort(0, "SVR",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EKEYREJECTED);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = sec->no_key_abort;
+ rxrpc_direct_abort(skb, rxrpc_abort_no_service_key,
+ sec->no_key_abort, -EKEYREJECTED);
return NULL;
}
@@ -191,9 +178,9 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
sprintf(kdesc, "%u:%u",
sp->hdr.serviceId, sp->hdr.securityIndex);
- rcu_read_lock();
+ read_lock(&conn->local->services_lock);
- rx = rcu_dereference(conn->local->service);
+ rx = conn->local->service;
if (!rx)
goto out;
@@ -215,6 +202,6 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
}
out:
- rcu_read_unlock();
+ read_unlock(&conn->local->services_lock);
return key;
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index cde1e65f16b4..da49fcf1c456 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -18,6 +18,81 @@
#include "ar-internal.h"
/*
+ * Propose an abort to be made in the I/O thread.
+ */
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+ enum rxrpc_abort_reason why)
+{
+ _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
+
+ if (!call->send_abort && !rxrpc_call_is_complete(call)) {
+ call->send_abort_why = why;
+ call->send_abort_err = error;
+ call->send_abort_seq = 0;
+ /* Request abort locklessly vs rxrpc_input_call_event(). */
+ smp_store_release(&call->send_abort, abort_code);
+ rxrpc_poke_call(call, rxrpc_call_poke_abort);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Wait for a call to become connected. Interruption here doesn't cause the
+ * call to be aborted.
+ */
+static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo)
+{
+ DECLARE_WAITQUEUE(myself, current);
+ int ret = 0;
+
+ _enter("%d", call->debug_id);
+
+ if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN)
+ return call->error;
+
+ add_wait_queue_exclusive(&call->waitq, &myself);
+
+ for (;;) {
+ ret = call->error;
+ if (ret < 0)
+ break;
+
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ case RXRPC_PREINTERRUPTIBLE:
+ set_current_state(TASK_INTERRUPTIBLE);
+ break;
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ break;
+ }
+ if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) {
+ ret = call->error;
+ break;
+ }
+ if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
+ call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
+ signal_pending(current)) {
+ ret = sock_intr_errno(*timeo);
+ break;
+ }
+ *timeo = schedule_timeout(*timeo);
+ }
+
+ remove_wait_queue(&call->waitq, &myself);
+ __set_current_state(TASK_RUNNING);
+
+ if (ret == 0 && rxrpc_call_is_complete(call))
+ ret = call->error;
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* Return true if there's sufficient Tx queue space.
*/
static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
@@ -39,7 +114,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, NULL))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
if (signal_pending(current))
@@ -74,7 +149,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, &tx_win))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
if (timeout == 0 &&
@@ -103,7 +178,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, NULL))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
@@ -168,7 +243,6 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
struct rxrpc_txbuf *txb,
rxrpc_notify_end_tx_t notify_end_tx)
{
- unsigned long now;
rxrpc_seq_t seq = txb->seq;
bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
@@ -191,36 +265,10 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
poke = list_empty(&call->tx_sendmsg);
list_add_tail(&txb->call_link, &call->tx_sendmsg);
call->tx_prepared = seq;
+ if (last)
+ rxrpc_notify_end_tx(rx, call, notify_end_tx);
spin_unlock(&call->tx_lock);
- if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
- _debug("________awaiting reply/ACK__________");
- write_lock(&call->state_lock);
- switch (call->state) {
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
- rxrpc_notify_end_tx(rx, call, notify_end_tx);
- break;
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- call->state = RXRPC_CALL_SERVER_SEND_REPLY;
- now = jiffies;
- WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
- if (call->ackr_reason == RXRPC_ACK_DELAY)
- call->ackr_reason = 0;
- trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
- if (!last)
- break;
- fallthrough;
- case RXRPC_CALL_SERVER_SEND_REPLY:
- call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
- rxrpc_notify_end_tx(rx, call, notify_end_tx);
- break;
- default:
- break;
- }
- write_unlock(&call->state_lock);
- }
-
if (poke)
rxrpc_poke_call(call, rxrpc_call_poke_start);
}
@@ -245,6 +293,16 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ ret = rxrpc_wait_to_be_connected(call, &timeo);
+ if (ret < 0)
+ return ret;
+
+ if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+ ret = rxrpc_init_client_conn_security(call->conn);
+ if (ret < 0)
+ return ret;
+ }
+
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -252,15 +310,20 @@ reload:
ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto maybe_error;
- state = READ_ONCE(call->state);
+ state = rxrpc_call_state(call);
ret = -ESHUTDOWN;
if (state >= RXRPC_CALL_COMPLETE)
goto maybe_error;
ret = -EPROTO;
if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY)
+ state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Request phase complete for this client call */
+ trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EPROTO);
goto maybe_error;
+ }
ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
@@ -329,7 +392,7 @@ reload:
/* check for the far side aborting the call or a network error
* occurring */
- if (call->state == RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
goto call_terminated;
/* add the packet to the send queue if it's now full */
@@ -354,12 +417,9 @@ reload:
success:
ret = copied;
- if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
- read_lock(&call->state_lock);
- if (call->error < 0)
- ret = call->error;
- read_unlock(&call->state_lock);
- }
+ if (rxrpc_call_is_complete(call) &&
+ call->error < 0)
+ ret = call->error;
out:
call->tx_pending = txb;
_leave(" = %d", ret);
@@ -543,7 +603,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
- rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p\n", call);
return call;
}
@@ -556,7 +615,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
- enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
bool dropped_lock = false;
@@ -598,10 +656,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
return PTR_ERR(call);
/* ... and we have the call lock. */
ret = 0;
- if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
goto out_put_unlock;
} else {
- switch (READ_ONCE(call->state)) {
+ switch (rxrpc_call_state(call)) {
case RXRPC_CALL_UNINITIALISED:
case RXRPC_CALL_CLIENT_AWAIT_CONN:
case RXRPC_CALL_SERVER_PREALLOC:
@@ -655,17 +713,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
break;
}
- state = READ_ONCE(call->state);
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, state, call->conn);
-
- if (state >= RXRPC_CALL_COMPLETE) {
+ if (rxrpc_call_is_complete(call)) {
/* it's too late for this call */
ret = -ESHUTDOWN;
} else if (p.command == RXRPC_CMD_SEND_ABORT) {
+ rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED,
+ rxrpc_abort_call_sendmsg);
ret = 0;
- if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED))
- ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
} else {
@@ -705,34 +759,17 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
bool dropped_lock = false;
int ret;
- _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+ _enter("{%d},", call->debug_id);
ASSERTCMP(msg->msg_name, ==, NULL);
ASSERTCMP(msg->msg_control, ==, NULL);
mutex_lock(&call->user_mutex);
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, call->state, call->conn);
-
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- case RXRPC_CALL_SERVER_SEND_REPLY:
- ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx, &dropped_lock);
- break;
- case RXRPC_CALL_COMPLETE:
- read_lock(&call->state_lock);
+ ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+ notify_end_tx, &dropped_lock);
+ if (ret == -ESHUTDOWN)
ret = call->error;
- read_unlock(&call->state_lock);
- break;
- default:
- /* Request phase complete for this client call */
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send"));
- ret = -EPROTO;
- break;
- }
if (!dropped_lock)
mutex_unlock(&call->user_mutex);
@@ -747,24 +784,20 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data);
* @call: The call to be aborted
* @abort_code: The abort code to stick into the ABORT packet
* @error: Local error value
- * @why: 3-char string indicating why.
+ * @why: Indication as to why.
*
* Allow a kernel service to abort a call, if it's still in an abortable state
* and return true if the call was aborted, false if it was already complete.
*/
bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
- u32 abort_code, int error, const char *why)
+ u32 abort_code, int error, enum rxrpc_abort_reason why)
{
bool aborted;
- _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+ _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
mutex_lock(&call->user_mutex);
-
- aborted = rxrpc_abort_call(why, call, 0, abort_code, error);
- if (aborted)
- rxrpc_send_abort_packet(call);
-
+ aborted = rxrpc_propose_abort(call, abort_code, error, why);
mutex_unlock(&call->user_mutex);
return aborted;
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index ff47ce4d3968..6b26bdb999d7 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -134,6 +134,11 @@ static int valid_label(const struct nlattr *attr,
{
const u32 *label = nla_data(attr);
+ if (nla_len(attr) != sizeof(*label)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length");
+ return -EINVAL;
+ }
+
if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) {
NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range");
return -EINVAL;
@@ -145,7 +150,8 @@ static int valid_label(const struct nlattr *attr,
static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
[TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)),
[TCA_MPLS_PROTO] = { .type = NLA_U16 },
- [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label),
+ [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ valid_label),
[TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7),
[TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1),
[TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1),
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index eb0e9458e722..ee2a050c887b 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -333,7 +333,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcindex_filter_result *r, struct nlattr **tb,
struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
{
- struct tcindex_filter_result new_filter_result, *old_r = r;
+ struct tcindex_filter_result new_filter_result;
struct tcindex_data *cp = NULL, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_result cr = {};
@@ -402,7 +402,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcindex_filter_result_init(&new_filter_result, cp, net);
if (err < 0)
goto errout_alloc;
- if (old_r)
+ if (r)
cr = r->res;
err = -EBUSY;
@@ -479,14 +479,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
tcf_bind_filter(tp, &cr, base);
}
- if (old_r && old_r != r) {
- err = tcindex_filter_result_init(old_r, cp, net);
- if (err < 0) {
- kfree(f);
- goto errout_alloc;
- }
- }
-
oldp = p;
r->res = cr;
tcf_exts_change(&r->exts, &e);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2317db02c764..72d2c204d5f3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1133,6 +1133,11 @@ skip:
return -ENOENT;
}
+ if (new && new->ops == &noqueue_qdisc_ops) {
+ NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
+ return -EINVAL;
+ }
+
err = cops->graft(parent, cl, new, &old, extack);
if (err)
return err;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index f52255fea652..4a981ca90b0b 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -393,10 +393,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
result = tcf_classify(skb, NULL, fl, &res, true);
if (result < 0)
continue;
+ if (result == TC_ACT_SHOT)
+ goto done;
+
flow = (struct atm_flow_data *)res.class;
if (!flow)
flow = lookup_flow(sch, res.classid);
- goto done;
+ goto drop;
}
}
flow = NULL;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 6568e17c4c63..36db5f6782f2 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -230,6 +230,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
result = tcf_classify(skb, NULL, fl, &res, true);
if (!fl || result < 0)
goto fallback;
+ if (result == TC_ACT_SHOT)
+ return NULL;
cl = (void *)res.class;
if (!cl) {
@@ -250,8 +252,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
fallthrough;
- case TC_ACT_SHOT:
- return NULL;
case TC_ACT_RECLASSIFY:
return cbq_reclassify(skb, cl);
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index a661b062cca8..872d127c9db4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -377,6 +377,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
/* Even if driver returns failure adjust the stats - in case offload
* ended but driver still wants to adjust the values.
*/
+ sch_tree_lock(sch);
for (i = 0; i < MAX_DPs; i++) {
if (!table->tab[i])
continue;
@@ -393,6 +394,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
}
_bstats_update(&sch->bstats, bytes, packets);
+ sch_tree_unlock(sch);
kfree(hw_stats);
return ret;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index e5b4bbf3ce3d..92f2975b6a82 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -199,8 +199,14 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle)
{
return (unsigned long)htb_find(handle, sch);
}
+
+#define HTB_DIRECT ((struct htb_class *)-1L)
+
/**
* htb_classify - classify a packet into class
+ * @skb: the socket buffer
+ * @sch: the active queue discipline
+ * @qerr: pointer for returned status code
*
* It returns NULL if the packet should be dropped or -1 if the packet
* should be passed directly thru. In all other cases leaf class is returned.
@@ -211,8 +217,6 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle)
* have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
* then finish and return direct queue.
*/
-#define HTB_DIRECT ((struct htb_class *)-1L)
-
static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
@@ -427,7 +431,10 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
while (cl->cmode == HTB_MAY_BORROW && p && mask) {
m = mask;
while (m) {
- int prio = ffz(~m);
+ unsigned int prio = ffz(~m);
+
+ if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
+ break;
m &= ~(1 << prio);
if (p->inner.clprio[prio].feed.rb_node)
@@ -1545,7 +1552,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
struct tc_htb_qopt_offload offload_opt;
struct netdev_queue *dev_queue;
struct Qdisc *q = cl->leaf.q;
- struct Qdisc *old = NULL;
+ struct Qdisc *old;
int err;
if (cl->level)
@@ -1553,14 +1560,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
WARN_ON(!q);
dev_queue = htb_offload_get_queue(cl);
- old = htb_graft_helper(dev_queue, NULL);
- if (destroying)
- /* Before HTB is destroyed, the kernel grafts noop_qdisc to
- * all queues.
+ /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
+ * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
+ * does not need to graft or qdisc_put the qdisc being destroyed.
+ */
+ if (!destroying) {
+ old = htb_graft_helper(dev_queue, NULL);
+ /* Last qdisc grafted should be the same as cl->leaf.q when
+ * calling htb_delete.
*/
- WARN_ON(!(old->flags & TCQ_F_BUILTIN));
- else
WARN_ON(old != q);
+ }
if (cl->parent) {
_bstats_update(&cl->parent->bstats_bias,
@@ -1577,10 +1587,12 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
};
err = htb_offload(qdisc_dev(sch), &offload_opt);
- if (!err || destroying)
- qdisc_put(old);
- else
- htb_graft_helper(dev_queue, old);
+ if (!destroying) {
+ if (!err)
+ qdisc_put(old);
+ else
+ htb_graft_helper(dev_queue, old);
+ }
if (last_child)
return err;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 570389f6cdd7..c322a61eaeea 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1700,6 +1700,7 @@ static void taprio_reset(struct Qdisc *sch)
int i;
hrtimer_cancel(&q->advance_timer);
+
if (q->qdiscs) {
for (i = 0; i < dev->num_tx_queues; i++)
if (q->qdiscs[i])
@@ -1720,6 +1721,7 @@ static void taprio_destroy(struct Qdisc *sch)
* happens in qdisc_create(), after taprio_init() has been called.
*/
hrtimer_cancel(&q->advance_timer);
+ qdisc_synchronize(sch);
taprio_disable_offload(dev, q, NULL);
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 59e653b528b1..6b95d3ba8fe1 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
}
}
+ /* If somehow no addresses were found that can be used with this
+ * scope, it's an error.
+ */
+ if (list_empty(&dest->address_list))
+ error = -ENETUNREACH;
+
out:
if (error)
sctp_bind_addr_clean(dest);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ca1eba95c293..2f66a2006517 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -196,9 +196,7 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
/* When a data chunk is sent, reset the heartbeat interval. */
expires = jiffies + sctp_transport_timeout(transport);
- if ((time_before(transport->hb_timer.expires, expires) ||
- !timer_pending(&transport->hb_timer)) &&
- !mod_timer(&transport->hb_timer,
+ if (!mod_timer(&transport->hb_timer,
expires + get_random_u32_below(transport->rto)))
sctp_transport_hold(transport);
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 7bb247c51e2f..2d7b1e03110a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -302,7 +302,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
list_for_each_entry(pos, &pipe->in_downcall, list) {
if (!uid_eq(pos->uid, uid))
continue;
- if (auth && pos->auth->service != auth->service)
+ if (pos->auth->service != auth->service)
continue;
refcount_inc(&pos->count);
return pos;
@@ -686,6 +686,21 @@ out:
return err;
}
+static struct gss_upcall_msg *
+gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
+{
+ struct gss_upcall_msg *pos;
+ list_for_each_entry(pos, &pipe->in_downcall, list) {
+ if (!uid_eq(pos->uid, uid))
+ continue;
+ if (!rpc_msg_is_inflight(&pos->msg))
+ continue;
+ refcount_inc(&pos->count);
+ return pos;
+ }
+ return NULL;
+}
+
#define MSG_BUF_MAXSIZE 1024
static ssize_t
@@ -732,7 +747,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
err = -ENOENT;
/* Find a matching upcall */
spin_lock(&pipe->lock);
- gss_msg = __gss_find_upcall(pipe, uid, NULL);
+ gss_msg = gss_find_downcall(pipe, uid);
if (gss_msg == NULL) {
spin_unlock(&pipe->lock);
goto err_put_ctx;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 148bb0a7fa5b..acb822b23af1 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -923,7 +923,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
* rejecting the server-computed MIC in this somewhat rare case,
* do not use splice with the GSS integrity service.
*/
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
@@ -990,7 +990,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
int pad, remaining_len, offset;
u32 rseqno;
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 85f0c3cfc877..f06622814a95 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1243,10 +1243,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off by GSS integrity and privacy services */
- __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
- __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
- __clear_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ clear_bit(RQ_DROPME, &rqstp->rq_flags);
svc_putu32(resv, rqstp->rq_xid);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2106003645a7..c2ce12538008 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1238,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
return &dr->handle;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 015714398007..815baf308236 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
if (svc_port_is_privileged(svc_addr(rqstp)))
- __set_bit(RQ_SECURE, &rqstp->rq_flags);
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
else
- __clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
@@ -1008,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
- __set_bit(RQ_LOCAL, &rqstp->rq_flags);
+ set_bit(RQ_LOCAL, &rqstp->rq_flags);
else
- __clear_bit(RQ_LOCAL, &rqstp->rq_flags);
+ clear_bit(RQ_LOCAL, &rqstp->rq_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 199fa012f18a..94b20fb47135 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{
- __set_bit(RQ_SECURE, &rqstp->rq_flags);
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
}
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 49ddc484c4fe..5e000fde8067 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr,
bool addr_match = false;
bool sign_match = false;
bool link_up = false;
+ bool link_is_reset = false;
bool accept_addr = false;
- bool reset = true;
+ bool reset = false;
char *if_name;
unsigned long intv;
u16 session;
@@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr,
/* Prepare to validate requesting node's signature and media address */
l = le->link;
link_up = l && tipc_link_is_up(l);
+ link_is_reset = l && tipc_link_is_reset(l);
addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr));
sign_match = (signature == n->signature);
/* These three flags give us eight permutations: */
if (sign_match && addr_match && link_up) {
- /* All is fine. Do nothing. */
- reset = false;
+ /* All is fine. Ignore requests. */
/* Peer node is not a container/local namespace */
if (!n->peer_hash_mix)
n->peer_hash_mix = hash_mixes;
@@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
*/
accept_addr = true;
*respond = true;
+ reset = true;
} else if (!sign_match && addr_match && link_up) {
/* Peer node rebooted. Two possibilities:
* - Delayed re-discovery; this link endpoint has already
@@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
n->signature = signature;
accept_addr = true;
*respond = true;
+ reset = true;
}
if (!accept_addr)
@@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
tipc_link_fsm_evt(l, LINK_RESET_EVT);
if (n->state == NODE_FAILINGOVER)
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+ link_is_reset = tipc_link_is_reset(l);
le->link = l;
n->link_cnt++;
tipc_node_calculate_timer(n, l);
@@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
tipc_node_write_unlock(n);
- if (reset && l && !tipc_link_is_reset(l))
+ if (reset && !link_is_reset)
tipc_node_link_down(n, b->identity, false);
tipc_node_put(n);
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9ed978634125..a83d2b4275fa 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2427,7 +2427,7 @@ static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx)
{
struct tls_rec *rec;
- rec = list_first_entry(&ctx->tx_list, struct tls_rec, list);
+ rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list);
if (!rec)
return false;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3b55502b2965..5c7ad301d742 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -482,6 +482,12 @@ static int x25_listen(struct socket *sock, int backlog)
int rc = -EOPNOTSUPP;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ rc = -EINVAL;
+ release_sock(sk);
+ return rc;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index a0f62fa02e06..8cbf45a8bcdc 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -5,6 +5,7 @@
* Based on code and translator idea by: Florian Westphal <fw@strlen.de>
*/
#include <linux/compat.h>
+#include <linux/nospec.h>
#include <linux/xfrm.h>
#include <net/xfrm.h>
@@ -302,7 +303,7 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
nla_for_each_attr(nla, attrs, len, remaining) {
int err;
- switch (type) {
+ switch (nlh_src->nlmsg_type) {
case XFRM_MSG_NEWSPDINFO:
err = xfrm_nla_cpy(dst, nla, nla_len(nla));
break;
@@ -437,6 +438,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
+ type = array_index_nospec(type, XFRMA_MAX + 1);
if (nla_len(nla) < compat_policy[type].len) {
NL_SET_ERR_MSG(extack, "Attribute bad length");
return -EOPNOTSUPP;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index c06e54a10540..436d29640ac2 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -279,8 +279,7 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
goto out;
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
- ipipv6_hdr(skb));
+ ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipipv6_hdr(skb));
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip6_ecn_decapsulate(skb);
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 1f99dc469027..35279c220bd7 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
skb->mark = 0;
}
+static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type, unsigned short family)
+{
+ struct sec_path *sp;
+
+ sp = skb_sec_path(skb);
+ if (sp && (sp->len || sp->olen) &&
+ !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ goto discard;
+
+ XFRM_SPI_SKB_CB(skb)->family = family;
+ if (family == AF_INET) {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+ } else {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+ }
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int xfrmi4_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
+}
+
+static int xfrmi6_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+ 0, 0, AF_INET6);
+}
+
+static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
+}
+
+static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
+}
+
static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
{
const struct xfrm_mode *inner_mode;
@@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = {
};
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
- .handler = xfrm6_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi6_rcv,
+ .input_handler = xfrmi6_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
#endif
static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
- .handler = xfrm4_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi4_rcv,
+ .input_handler = xfrmi4_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e9eb82c5457d..5c61ec04b839 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -336,7 +336,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.hard_use_expires_seconds) {
time64_t tmo = xp->lft.hard_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -354,7 +354,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.soft_use_expires_seconds) {
time64_t tmo = xp->lft.soft_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0) {
warn = 1;
tmo = XFRM_KM_TIMEOUT;
@@ -3661,7 +3661,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
- pol->curlft.use_time = ktime_get_real_seconds();
+ /* This lockless write can happen from different cpus. */
+ WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds());
pols[0] = pol;
npols++;
@@ -3676,7 +3677,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
xfrm_pol_put(pols[0]);
return 0;
}
- pols[1]->curlft.use_time = ktime_get_real_seconds();
+ /* This write can happen from different cpus. */
+ WRITE_ONCE(pols[1]->curlft.use_time,
+ ktime_get_real_seconds());
npols++;
}
}
@@ -3742,6 +3745,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
goto reject;
}
+ if (if_id)
+ secpath_reset(skb);
+
xfrm_pols_put(pols, npols);
return 1;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 89c731f4f0c7..00afe831c71c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -577,7 +577,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->km.state == XFRM_STATE_EXPIRED)
goto expired;
if (x->lft.hard_add_expires_seconds) {
- long tmo = x->lft.hard_add_expires_seconds +
+ time64_t tmo = x->lft.hard_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0) {
if (x->xflags & XFRM_SOFT_EXPIRE) {
@@ -594,8 +594,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
next = tmo;
}
if (x->lft.hard_use_expires_seconds) {
- long tmo = x->lft.hard_use_expires_seconds +
- (x->curlft.use_time ? : now) - now;
+ time64_t tmo = x->lft.hard_use_expires_seconds +
+ (READ_ONCE(x->curlft.use_time) ? : now) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -604,7 +604,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->km.dying)
goto resched;
if (x->lft.soft_add_expires_seconds) {
- long tmo = x->lft.soft_add_expires_seconds +
+ time64_t tmo = x->lft.soft_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0) {
warn = 1;
@@ -616,8 +616,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
}
}
if (x->lft.soft_use_expires_seconds) {
- long tmo = x->lft.soft_use_expires_seconds +
- (x->curlft.use_time ? : now) - now;
+ time64_t tmo = x->lft.soft_use_expires_seconds +
+ (READ_ONCE(x->curlft.use_time) ? : now) - now;
if (tmo <= 0)
warn = 1;
else if (tmo < next)
@@ -1906,7 +1906,7 @@ out:
hrtimer_start(&x1->mtimer, ktime_set(1, 0),
HRTIMER_MODE_REL_SOFT);
- if (x1->curlft.use_time)
+ if (READ_ONCE(x1->curlft.use_time))
xfrm_state_check_expire(x1);
if (x->props.smark.m || x->props.smark.v || x->if_id) {
@@ -1940,8 +1940,8 @@ int xfrm_state_check_expire(struct xfrm_state *x)
{
xfrm_dev_state_update_curlft(x);
- if (!x->curlft.use_time)
- x->curlft.use_time = ktime_get_real_seconds();
+ if (!READ_ONCE(x->curlft.use_time))
+ WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {