[PATCH RFC net-next 0/2] improve MPTCP fallback
by Davide Caratti
- patch 1 refactors fallback code to allow infinite mapping
- patch 2 fixes a race between recvmsg() and shutdown(), it was there
since earlier implementations but it became almost systematic
in case of fallback, after patch 1 was applied
Davide Caratti (1):
net: mptcp: improve fallback to TCP
Paolo Abeni (1):
mptcp: fix races between shutdown and recvmsg.
net/mptcp/options.c | 9 +++-
net/mptcp/protocol.c | 125 ++++++++++++++++---------------------------
net/mptcp/protocol.h | 14 +++++
net/mptcp/subflow.c | 63 +++++++++++++++-------
4 files changed, 112 insertions(+), 99 deletions(-)
--
2.26.2
1 year, 11 months
[PATCH v2 mptcp-next] mptcp: add receive buffer auto-tuning
by Florian Westphal
When mptcp is used, userspace doesn't read from the tcp (subflow)
socket but from the parent (mptcp) socket receive queue.
skbs are moved from the subflow socket to the mptcp rx queue either from
'data_ready' callback (if mptcp socket can be locked), a work queue, or
the socket receive function.
This means tcp_rcv_space_adjust() is never called and thus no receive
buffer size auto-tuning is done.
An earlier (not merged) patch added tcp_rcv_space_adjust() calls to the
function that moves skbs from subflow to mptcp socket.
While this enabled autotuning, it also meant tuning was done even if
userspace was reading the mptcp socket very slowly.
This adds mptcp_rcv_space_adjust() and calls it after userspace has
read data from the mptcp socket rx queue.
Its very similar to tcp_rcv_space_adjust, with two differences:
1. The rtt estimate is the largest one observed on a subflow
2. The rcvbuf size and window clamp of all subflows is adjusted
to the mptcp-level rcvbuf.
Otherwise, we get spurious drops at tcp (subflow) socket level if
the skbs are not moved to the mptcp socket fast enough and reduced
throughput..
Before:
time mptcp_connect.sh -t -f $((4*1024*1024)) -d 300 -l 0.01% -r 0 -e "" -m mmap
[..]
ns4 MPTCP -> ns3 (10.0.3.2:10108 ) MPTCP (duration 40562ms) [ OK ]
ns4 MPTCP -> ns3 (10.0.3.2:10109 ) TCP (duration 5415ms) [ OK ]
ns4 TCP -> ns3 (10.0.3.2:10110 ) MPTCP (duration 5413ms) [ OK ]
ns4 MPTCP -> ns3 (dead:beef:3::2:10111) MPTCP (duration 41331ms) [ OK ]
ns4 MPTCP -> ns3 (dead:beef:3::2:10112) TCP (duration 5415ms) [ OK ]
ns4 TCP -> ns3 (dead:beef:3::2:10113) MPTCP (duration 5714ms) [ OK ]
Time: 846 seconds
After:
ns4 MPTCP -> ns3 (10.0.3.2:10108 ) MPTCP (duration 5417ms) [ OK ]
ns4 MPTCP -> ns3 (10.0.3.2:10109 ) TCP (duration 5429ms) [ OK ]
ns4 TCP -> ns3 (10.0.3.2:10110 ) MPTCP (duration 5418ms) [ OK ]
ns4 MPTCP -> ns3 (dead:beef:3::2:10111) MPTCP (duration 5423ms) [ OK ]
ns4 MPTCP -> ns3 (dead:beef:3::2:10112) TCP (duration 5715ms) [ OK ]
ns4 TCP -> ns3 (dead:beef:3::2:10113) MPTCP (duration 5415ms) [ OK ]
Time: 275 seconds
Signed-off-by: Florian Westphal <fw(a)strlen.de>
---
changes in v2:
- cache last rtt_us value used
- don't store seq value
- reset 'copied' to 0 when starting
new measurement to simplify adjust function.
- make sure space.space is not inited to 0, else div-by-0 occurs
net/mptcp/protocol.c | 124 ++++++++++++++++++++++++++++++++++++++++---
net/mptcp/protocol.h | 6 +++
2 files changed, 123 insertions(+), 7 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b2c8b57e7942..3827e4004877 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -207,13 +207,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
return false;
}
- if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf);
-
- if (rcvbuf > sk->sk_rcvbuf)
- sk->sk_rcvbuf = rcvbuf;
- }
-
tp = tcp_sk(ssk);
do {
u32 map_remaining, offset;
@@ -928,6 +921,100 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
return copied;
}
+/* receive buffer autotuning. See tcp_rcv_space_adjust for more information.
+ *
+ * Only difference: Use highest rtt estimate of the subflows in use.
+ */
+static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ u32 time, advmss = 1;
+ u64 rtt_us, mstamp;
+
+ sock_owned_by_me(sk);
+
+ if (copied <= 0)
+ return;
+
+ msk->rcvq_space.copied += copied;
+
+ mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
+ time = tcp_stamp_us_delta(mstamp, msk->rcvq_space.time);
+
+ rtt_us = msk->rcvq_space.rtt_us;
+ if (rtt_us && time < (rtt_us >> 3))
+ return;
+
+ rtt_us = 0;
+ mptcp_for_each_subflow(msk, subflow) {
+ const struct tcp_sock *tp;
+ u64 sf_rtt_us;
+ u32 sf_advmss;
+
+ tp = tcp_sk(mptcp_subflow_tcp_sock(subflow));
+
+ sf_rtt_us = READ_ONCE(tp->rcv_rtt_est.rtt_us);
+ sf_advmss = READ_ONCE(tp->advmss);
+
+ rtt_us = max(sf_rtt_us, rtt_us);
+ advmss = max(sf_advmss, advmss);
+ }
+
+ msk->rcvq_space.rtt_us = rtt_us;
+ if (time < (rtt_us >> 3) || rtt_us == 0)
+ return;
+
+ if (msk->rcvq_space.copied <= msk->rcvq_space.space)
+ goto new_measure;
+
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
+
+ rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
+
+ grow = rcvwin *(msk->rcvq_space.copied - msk->rcvq_space.space);
+
+ do_div(grow, msk->rcvq_space.space);
+ rcvwin += (grow << 1);
+
+ rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
+ while (tcp_win_from_space(sk, rcvmem) < advmss)
+ rcvmem += 128;
+
+ do_div(rcvwin, advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+
+ if (rcvbuf > sk->sk_rcvbuf) {
+ u32 window_clamp;
+
+ window_clamp = tcp_win_from_space(sk, rcvbuf);
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+
+ /* Make subflows follow along. If we do not do this, we
+ * get drops at subflow level if skbs can't be moved to
+ * the mptcp rx queue fast enough (announced rcv_win can
+ * exceed ssk->sk_rcvbuf).
+ */
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
+ tcp_sk(ssk)->window_clamp = window_clamp;
+ }
+ }
+ }
+
+ msk->rcvq_space.space = msk->rcvq_space.copied;
+new_measure:
+ msk->rcvq_space.copied = 0;
+ msk->rcvq_space.time = mstamp;
+}
+
static bool __mptcp_move_skbs(struct mptcp_sock *msk)
{
unsigned int moved = 0;
@@ -1050,6 +1137,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
set_bit(MPTCP_DATA_READY, &msk->flags);
}
out_err:
+ mptcp_rcv_space_adjust(msk, copied);
+
release_sock(sk);
return copied;
}
@@ -1280,6 +1369,7 @@ static int mptcp_init_sock(struct sock *sk)
return ret;
sk_sockets_allocated_inc(sk);
+ sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[2];
return 0;
@@ -1475,6 +1565,23 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
return nsk;
}
+static void mptcp_rcv_space_init(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct tcp_sock *tp = tcp_sk(ssk);
+
+ msk->rcvq_space.copied = 0;
+ msk->rcvq_space.rtt_us = 0;
+
+ tcp_mstamp_refresh(tp);
+ msk->rcvq_space.time = tp->tcp_mstamp;
+
+ /* initial rcv_space offering made to peer */
+ msk->rcvq_space.space = min_t(u32, tp->rcv_wnd,
+ TCP_INIT_CWND * tp->advmss);
+ if (msk->rcvq_space.space == 0)
+ msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
+}
+
static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
bool kern)
{
@@ -1524,6 +1631,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
list_add(&subflow->node, &msk->conn_list);
inet_sk_state_store(newsk, TCP_ESTABLISHED);
+ mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(new_mptcp_sock);
__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
@@ -1678,6 +1786,8 @@ void mptcp_finish_connect(struct sock *ssk)
atomic64_set(&msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, 0);
+
+ mptcp_rcv_space_init(msk, ssk);
}
static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 809687d3f410..5ac157e007e9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -210,6 +210,12 @@ struct mptcp_sock {
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
struct sock *first;
struct mptcp_pm_data pm;
+ struct {
+ u32 space; /* bytes copied in last measurement window */
+ u32 copied; /* bytes copied in this measurement window */
+ u64 time; /* start time of measurement window */
+ u64 rtt_us; /* last maximum rtt of subflows */
+ } rcvq_space;
};
#define mptcp_for_each_subflow(__msk, __subflow) \
--
2.26.2
1 year, 11 months
Crashers on netnext with apache-benchmark
by Christoph Paasch
Hello,
I have a test where I use apachebenchmark with 100 concurrent clients (-c 100),
on a small file (1KB). This test stresses connection establishment,... a
lot, because often you will have a SYN+MP_JOIN in-flight while the
connection is already in the process of being torn down.
In this test, the client is out-of-tree and server is netnext d52caf0404e6
+ a patch to force-enable MPTCP on all connections.
I am not getting very far and have different kinds of panics:
[ 142.001017] ------------[ cut here ]------------
[ 142.002079] refcount_t: saturated; leaking memory.
[ 142.002226] WARNING: CPU: 0 PID: 1400 at lib/refcount.c:22 refcount_warn_saturate+0x65/0x110
[ 142.003085] refcount_t: addition on 0; use-after-free.
[...]
[ 142.004121] RIP: 0010:refcount_warn_saturate+0x65/0x110
[ 142.004125] Code: 00 0f 84 b1 00 00 00 5b 5d c3 85 db 74 40 80 3d 50 02 8d 01 00 75 f0 48 c7 c7 20 62 39 82 c6 05 40 02 8d 01 01 e8 d0 64 aa ff <0f> 0b eb d9 80 3d 2f 02 8d 01 00 75 d0 48 c7 c7 c0 62 39 82 c6 05
[ 142.004130] RSP: 0018:ffff88810d26fb78 EFLAGS: 00010282
[ 142.004138] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
[ 142.004141] RDX: 00000000fffffff8 RSI: 0000000000000004 RDI: ffffed1021a4df61
[ 142.004143] RBP: ffff8880aac11740 R08: ffffffff8120b958 R09: ffffed10236843c9
[ 142.004146] R10: ffff88811b421e43 R11: ffffed10236843c8 R12: ffff8880a1cc0d00
[ 142.004149] R13: ffff88810c273100 R14: ffff8880aac11740 R15: ffff88810669b458
[ 142.004178] mptcp_accept+0x2ca/0x300
[ 142.004213] inet_accept+0xaa/0x3b0
[ 142.004256] mptcp_stream_accept+0x124/0x350
[ 142.004272] __sys_accept4_file+0x260/0x330
[ 142.004324] __sys_accept4+0x6d/0xb0
[ 142.004343] __x64_sys_accept4+0x4b/0x60
[ 142.004353] do_syscall_64+0xc1/0xa10
[ 142.004381] entry_SYSCALL_64_after_hwframe+0x49/0xb3
And another one:
[ 62.586401] ==================================================================
[ 62.588813] BUG: KASAN: use-after-free in inet_twsk_bind_unhash+0x5f/0xe0
[ 62.589975] Write of size 8 at addr ffff88810f155a20 by task ksoftirqd/2/21
[ 62.591194]
[ 62.591485] CPU: 2 PID: 21 Comm: ksoftirqd/2 Kdump: loaded Not tainted 5.7.0-rc6.mptcp #36
[ 62.593067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
[ 62.595268] Call Trace:
[ 62.595775] dump_stack+0x76/0xa0
[ 62.596448] print_address_description.constprop.0+0x3a/0x60
[ 62.600581] __kasan_report.cold+0x20/0x3b
[ 62.602968] kasan_report+0x38/0x50
[ 62.603561] inet_twsk_bind_unhash+0x5f/0xe0
[ 62.604282] inet_twsk_kill+0x195/0x200
[ 62.604945] inet_twsk_deschedule_put+0x25/0x30
[ 62.605731] tcp_v4_rcv+0xa79/0x15e0
[ 62.607139] ip_protocol_deliver_rcu+0x37/0x270
[ 62.607980] ip_local_deliver_finish+0xb0/0xd0
[ 62.608758] ip_local_deliver+0x1c9/0x1e0
[ 62.611162] ip_sublist_rcv_finish+0x84/0xa0
[ 62.611894] ip_sublist_rcv+0x22c/0x320
[ 62.616143] ip_list_rcv+0x1e4/0x225
[ 62.619427] __netif_receive_skb_list_core+0x439/0x460
[ 62.622771] netif_receive_skb_list_internal+0x3ea/0x570
[ 62.625320] gro_normal_list.part.0+0x14/0x50
[ 62.626088] napi_gro_receive+0x6a/0xb0
[ 62.626787] receive_buf+0x371/0x1d50
[ 62.632092] virtnet_poll+0x2be/0x5b0
[ 62.634099] net_rx_action+0x1ec/0x4c0
[ 62.636132] __do_softirq+0xfc/0x29c
[ 62.638180] run_ksoftirqd+0x15/0x30
[ 62.638787] smpboot_thread_fn+0x1fc/0x380
[ 62.642009] kthread+0x1f1/0x210
[ 62.643478] ret_from_fork+0x35/0x40
[ 62.644094]
[ 62.644371] Allocated by task 1355:
[ 62.644980] save_stack+0x1b/0x40
[ 62.645539] __kasan_kmalloc.constprop.0+0xc2/0xd0
[ 62.646347] kmem_cache_alloc+0xb8/0x190
[ 62.647006] getname_flags+0x6b/0x2b0
[ 62.647627] user_path_at_empty+0x1b/0x40
[ 62.648306] vfs_statx+0xba/0x140
[ 62.648875] __do_sys_newstat+0x8c/0xf0
[ 62.649518] do_syscall_64+0xbc/0x790
[ 62.650199] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 62.651091]
[ 62.651360] Freed by task 1355:
[ 62.651903] save_stack+0x1b/0x40
[ 62.652460] __kasan_slab_free+0x12f/0x180
[ 62.653147] kmem_cache_free+0x87/0x240
[ 62.653795] filename_lookup+0x183/0x250
[ 62.654447] vfs_statx+0xba/0x140
[ 62.655001] __do_sys_newstat+0x8c/0xf0
[ 62.655640] do_syscall_64+0xbc/0x790
[ 62.656246] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 62.657089]
[ 62.657351] The buggy address belongs to the object at ffff88810f155500
which belongs to the cache names_cache of size 4096
[ 62.659420] The buggy address is located 1312 bytes inside of
4096-byte region [ffff88810f155500, ffff88810f156500)
[ 62.661358] The buggy address belongs to the page:
[ 62.662175] page:ffffea00043c5400 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 head:ffffea00043c5400 order:3 compound_mapcount:0 compound_pincount:0
[ 62.664523] flags: 0x8000000000010200(slab|head)
[ 62.665342] raw: 8000000000010200 0000000000000000 0000000400000001 ffff88811ac772c0
[ 62.666713] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
[ 62.667984] page dumped because: kasan: bad access detected
[ 62.668904]
[ 62.669171] Memory state around the buggy address:
[ 62.669975] ffff88810f155900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 62.671163] ffff88810f155980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 62.672363] >ffff88810f155a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 62.673559] ^
[ 62.674349] ffff88810f155a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 62.675531] ffff88810f155b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 62.676723] ==================================================================
Another thing that was very surprising (looks like a use-after-free-and-realloc):
[ 59.697222] MPTCP: MP_JOIN bad option size
I will try to dig more into these tomorrow, but if anyone has an idea, let
me know :)
Christoph
1 year, 11 months
[PATCH net-next] mptcp: fix NULL ptr dereference in MP_JOIN error path
by Paolo Abeni
When token lookup on MP_JOIN 3rd ack fails, the server
socket closes with a reset the incoming child. Such socket
has the 'is_mptcp' flag set, but no msk socket associated
- due to the failed lookup.
While crafting the reset packet mptcp_established_options_mp()
will try to dereference the child's master socket, causing
a NULL ptr dereference.
This change addresses the issue with explicit fallback to
TCP in such error path.
Fixes: 729cd6436f35 ("mptcp: cope better with MP_JOIN failure")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
net/mptcp/subflow.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index f3c06b8af92d..493b98a0825c 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -413,6 +413,20 @@ static void subflow_ulp_fallback(struct sock *sk,
tcp_sk(sk)->is_mptcp = 0;
}
+static void subflow_drop_ctx(struct sock *ssk)
+{
+ struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
+
+ if (!ctx)
+ return;
+
+ subflow_ulp_fallback(ssk, ctx);
+ if (ctx->conn)
+ sock_put(ctx->conn);
+
+ kfree_rcu(ctx, rcu);
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -485,10 +499,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (fallback_is_fatal)
goto dispose_child;
- if (ctx) {
- subflow_ulp_fallback(child, ctx);
- kfree_rcu(ctx, rcu);
- }
+ subflow_drop_ctx(child);
goto out;
}
@@ -537,6 +548,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
return child;
dispose_child:
+ subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_for_destroy_sock(child);
--
2.21.3
1 year, 11 months
[PATCH v3 0/4] mptcp: token container refactor
by Paolo Abeni
This series supersede the RTC patch for token refactor. The relevant patch
is unchanged, but this additional bring in some related minor cleanup
(patch 1/4) and KUNIT (!!!) self-tests (4/4). To make things cleaner it also
moves the existing in-kernel tests (crypto) to KUNIT.
only changes from v2 are some comments in patch 2/4 and patch 3/4 rebased.
Paolo Abeni (4):
mptcp: add __init annotation on setup functions
mptcp: refactor token container.
mptcp: move crypto test to KUNIT
mptcp: introduce token KUNIT self-tests
net/mptcp/Kconfig | 19 ++-
net/mptcp/Makefile | 4 +
net/mptcp/crypto.c | 63 +---------
net/mptcp/crypto_test.c | 72 +++++++++++
net/mptcp/pm.c | 2 +-
net/mptcp/pm_netlink.c | 2 +-
net/mptcp/protocol.c | 39 +++---
net/mptcp/protocol.h | 15 ++-
net/mptcp/subflow.c | 12 +-
net/mptcp/token.c | 263 +++++++++++++++++++++++++++++-----------
net/mptcp/token_test.c | 138 +++++++++++++++++++++
11 files changed, 458 insertions(+), 171 deletions(-)
create mode 100644 net/mptcp/crypto_test.c
create mode 100644 net/mptcp/token_test.c
--
2.21.3
1 year, 11 months
[PATCH net-next v3] mptcp: fix NULL ptr dereference in MP_JOIN error path
by Paolo Abeni
When token lookup on MP_JOIN 3rd ack fails, the server
socket closes with a reset the incoming child. Such socket
has the 'is_mptcp' flag set, but no msk socket associated
- due to the failed lookup.
While crafting the reset packet mptcp_established_options_mp()
will try to dereference the child's master socket, causing
a NULL ptr dereference.
This change addresses the issue with explicit fallback to
TCP in such error path.
Fixes: 729cd6436f35 ("mptcp: cope better with MP_JOIN failure")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
v2 -> v3:
- fix msk leak
- targeting net-next to avoid merge conflitc
---
net/mptcp/subflow.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 0a01b6e8f2dc..10b4770a1419 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -413,6 +413,20 @@ static void subflow_ulp_fallback(struct sock *sk,
tcp_sk(sk)->is_mptcp = 0;
}
+static void subflow_drop_ctx(struct sock *ssk)
+{
+ struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
+
+ if (!ctx)
+ return;
+
+ subflow_ulp_fallback(ssk, ctx);
+ if (ctx->conn)
+ sock_put(ctx->conn);
+
+ kfree_rcu(ctx, rcu);
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -485,10 +499,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (fallback_is_fatal)
goto dispose_child;
- if (ctx) {
- subflow_ulp_fallback(child, ctx);
- kfree_rcu(ctx, rcu);
- }
+ subflow_drop_ctx(child);
goto out;
}
@@ -537,6 +548,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
return child;
dispose_child:
+ subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_for_destroy_sock(child);
--
2.21.3
1 year, 11 months
[PATCH net] mptcp: remove msk from the token container at destruction time.
by Paolo Abeni
Currently we remote the msk from the token container only
via mptcp_close(). The MPTCP master socket can be destroyed
also via other paths (e.g. if not yet accepted, when shutting
down the listener socket). When we hit the latter scenario,
dangling msk references are left into the token container,
leading to memory corruption and/or UaF.
This change addresses the issue by moving the token removal
into the msk destructor.
Fixes: 79c0949e9a09 ("mptcp: Add key generation and token tree")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
net/mptcp/protocol.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0b9368c30311..2867160a9524 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1346,7 +1346,6 @@ static void mptcp_close(struct sock *sk, long timeout)
lock_sock(sk);
- mptcp_token_destroy(msk->token);
inet_sk_state_store(sk, TCP_CLOSE);
/* be sure to always acquire the join list lock, to sync vs
@@ -1532,6 +1531,7 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ mptcp_token_destroy(msk->token);
if (msk->cached_ext)
__skb_ext_put(msk->cached_ext);
--
2.21.3
1 year, 11 months
[PATCH net-next] net: mptcp: improve fallback to TCP
by Davide Caratti
keep using MPTCP sockets and a "dummy mapping" in case of fallback to
regular TCP. Skip adding DSS option on send, if TCP fallback has been
done earlier.
Notes: I'm unsure on what to do in mptcp_clean_una() to do a one-time
flush of the retransmit queue, as per Mat's suggestion. Any advice?
Changes since RFC v2:
- use a bit in msk->flags, rather than a dedicated boolean in struct
msk. This bit is going to be used in combination with another one,
TCP_FALLBACK_ALLOWED, that is 1 at the first subflow creation
and gets cleared once TCP fallback is no more allowed.
- separate code that adds support for "infinite mapping", and use
the term "dummy" instead of "infinite". Suggested by Mat
- remove inappropriate call to __mptcp_do_fallback() in
mptcp_accept() (Paolo)
Changes since RFC v1:
- use a dedicated member of struct msk to indicate that a fallback
ha happened, use it in case of infinite mapping
- don't delete skb_ext in case of infinite mapping (Mat)
- test the value of pm.subflows on reception of an infinite map to
ensure that no other subflow is currently opened (Mat)
- in mptcp_established_options(), avoid adding TCP options in case
of fallback indication; simplify sendmsg()/recvmsg()/poll() to
keep using the MPTCP socket in case of TCP fallback. Set the
fallback indication in case subflow is not mp_capable after
successful 3-way handshake, instead of flipping 'is_mptcp'
(Paolo/Mat)
- remove deadcode in mptcp_finish_connect, and increment
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK in subflow_finish_connect
(Paolo)
BugLink: https://github.com/multipath-tcp/mptcp_net-next/issues/11
BugLink: https://github.com/multipath-tcp/mptcp_net-next/issues/22
Signed-off-by: Davide Caratti <dcaratti(a)redhat.com>
---
net/mptcp/options.c | 9 +++++-
net/mptcp/protocol.c | 77 +++++++++++---------------------------------
net/mptcp/protocol.h | 34 +++++++++++++++++++
net/mptcp/subflow.c | 46 +++++++++++++++++---------
4 files changed, 90 insertions(+), 76 deletions(-)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 01f1f4cf4902a..cf0b59ead1e43 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -624,6 +624,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
+ if (unlikely(mptcp_check_fallback(sk)))
+ return false;
+
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
@@ -714,7 +717,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
*/
if (!mp_opt->mp_capable) {
subflow->mp_capable = 0;
- tcp_sk(sk)->is_mptcp = 0;
+ pr_fallback(msk);
+ __mptcp_do_fallback(msk);
return false;
}
@@ -814,6 +818,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
+ if (__mptcp_check_fallback(msk))
+ return;
+
mptcp_get_options(skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b2c8b57e7942a..c2786c661f2fb 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -52,11 +52,6 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
return msk->subflow;
}
-static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
-{
- return msk->first && !sk_is_mptcp(msk->first);
-}
-
static struct socket *mptcp_is_tcpsk(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
@@ -94,7 +89,7 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
if (unlikely(sock))
return sock;
- if (likely(!__mptcp_needs_tcp_fallback(msk)))
+ if (likely(!__mptcp_check_fallback(msk)))
return NULL;
return msk->subflow;
@@ -229,6 +224,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
if (!skb)
break;
+ if (__mptcp_check_fallback(msk)) {
+ /* if we are running under the workqueue, TCP could have
+ * collapsed skbs between dummy map creation and now
+ * be sure to adjust the size
+ */
+ map_remaining = skb->len;
+ subflow->map_data_len = skb->len;
+ }
+
offset = seq - TCP_SKB_CB(skb)->seq;
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (fin) {
@@ -445,8 +449,15 @@ static void mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- u64 snd_una = atomic64_read(&msk->snd_una);
bool cleaned = false;
+ u64 snd_una;
+
+ /* on fallback we just need to ignore snd_una, as this is really
+ * plain TCP
+ */
+ if (__mptcp_check_fallback(msk))
+ atomic64_set(&msk->snd_una, msk->write_seq);
+ snd_una = atomic64_read(&msk->snd_una);
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
@@ -719,7 +730,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
- struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
bool tx_ok;
@@ -738,15 +748,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
-fallback:
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock)) {
- release_sock(sk);
- pr_debug("fallback passthrough");
- ret = sock_sendmsg(ssock, msg);
- return ret >= 0 ? ret + copied : (copied ? copied : ret);
- }
-
pfrag = sk_page_frag(sk);
restart:
mptcp_clean_una(sk);
@@ -798,17 +799,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
break;
}
- if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
- /* Can happen for passive sockets:
- * 3WHS negotiated MPTCP, but first packet after is
- * plain TCP (e.g. due to middlebox filtering unknown
- * options).
- *
- * Fall back to TCP.
- */
- release_sock(ssk);
- goto fallback;
- }
copied += ret;
@@ -951,7 +941,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
int copied = 0;
int target;
long timeo;
@@ -960,16 +949,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return -EOPNOTSUPP;
lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock)) {
-fallback:
- release_sock(sk);
- pr_debug("fallback-read subflow=%p",
- mptcp_subflow_ctx(ssock->sk));
- copied = sock_recvmsg(ssock, msg, flags);
- return copied;
- }
-
timeo = sock_rcvtimeo(sk, nonblock);
len = min_t(size_t, len, INT_MAX);
@@ -1032,8 +1011,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
pr_debug("block timeout %ld", timeo);
mptcp_wait_data(sk, &timeo);
- if (unlikely(__mptcp_tcp_fallback(msk)))
- goto fallback;
}
if (skb_queue_empty(&sk->sk_receive_queue)) {
@@ -1652,12 +1629,6 @@ void mptcp_finish_connect(struct sock *ssk)
sk = subflow->conn;
msk = mptcp_sk(sk);
- if (!subflow->mp_capable) {
- MPTCP_INC_STATS(sock_net(sk),
- MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- return;
- }
-
pr_debug("msk=%p, token=%u", sk, subflow->token);
mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
@@ -1933,21 +1904,9 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct mptcp_sock *msk;
- struct socket *ssock;
__poll_t mask = 0;
msk = mptcp_sk(sk);
- lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (!ssock)
- ssock = __mptcp_nmpc_socket(msk);
- if (ssock) {
- mask = ssock->ops->poll(file, ssock, wait);
- release_sock(sk);
- return mask;
- }
-
- release_sock(sk);
sock_poll_wait(file, sock, wait);
lock_sock(sk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 809687d3f4100..efc6436052e2d 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -89,6 +89,7 @@
#define MPTCP_SEND_SPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
+#define MPTCP_FALLBACK_DONE 4
struct mptcp_options_received {
u64 sndr_key;
@@ -458,4 +459,37 @@ static inline bool before64(__u64 seq1, __u64 seq2)
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
+static inline bool __mptcp_check_fallback(struct mptcp_sock *msk)
+{
+ return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+}
+
+static inline bool mptcp_check_fallback(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ return __mptcp_check_fallback(msk);
+}
+
+static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
+{
+ if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
+ pr_debug("TCP fallback already done (msk=%p)", msk);
+ return;
+ }
+ set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+}
+
+static inline void mptcp_do_fallback(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ __mptcp_do_fallback(msk);
+}
+
+#define pr_fallback(a) do { pr_debug("%s:fallback to TCP (msk=%p)",\
+ __FUNCTION__, a); } while (0)
+
#endif /* __MPTCP_PROTOCOL_H */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index f3c06b8af92de..59c6de6fac3fb 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -223,7 +223,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
- struct tcp_sock *tp = tcp_sk(sk);
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -237,6 +236,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
return;
subflow->conn_finished = 1;
+ subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
+ pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp && mp_opt.mp_capable) {
@@ -252,21 +253,19 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
} else if (subflow->request_mptcp) {
- tp->is_mptcp = 0;
+ mptcp_do_fallback(sk);
+ pr_fallback(mptcp_sk(subflow->conn));
+ MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
}
- if (!tp->is_mptcp)
+ if (mptcp_check_fallback(sk))
return;
if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);
-
- if (skb) {
- pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
- subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
- }
} else if (subflow->mp_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -286,9 +285,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
- if (skb)
- subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
-
if (!mptcp_finish_join(sk))
goto do_reset;
@@ -552,7 +548,8 @@ enum mapping_status {
MAPPING_OK,
MAPPING_INVALID,
MAPPING_EMPTY,
- MAPPING_DATA_FIN
+ MAPPING_DATA_FIN,
+ MAPPING_DUMMY
};
static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
@@ -616,6 +613,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
if (!skb)
return MAPPING_EMPTY;
+ if (mptcp_check_fallback(ssk))
+ return MAPPING_DUMMY;
+
mpext = mptcp_get_ext(skb);
if (!mpext || !mpext->use_map) {
if (!subflow->map_valid && !skb->len) {
@@ -757,6 +757,16 @@ static bool subflow_check_data_avail(struct sock *ssk)
ssk->sk_err = EBADMSG;
goto fatal;
}
+ if (status == MAPPING_DUMMY) {
+ __mptcp_do_fallback(msk);
+ skb = skb_peek(&ssk->sk_receive_queue);
+ subflow->map_valid = 1;
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
+ subflow->map_data_len = skb->len;
+ subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
+ subflow->ssn_offset;
+ return true;
+ }
if (status != MAPPING_OK)
return false;
@@ -880,14 +890,18 @@ static void subflow_data_ready(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
- if (!subflow->mp_capable && !subflow->mp_join) {
- subflow->tcp_data_ready(sk);
-
+ msk = mptcp_sk(parent);
+ if (inet_sk_state_load(sk) == TCP_LISTEN) {
+ set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
+ WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+ !subflow->mp_join);
+
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
}
@@ -1110,7 +1124,7 @@ static void subflow_state_change(struct sock *sk)
* a fin packet carrying a DSS can be unnoticed if we don't trigger
* the data available machinery here.
*/
- if (subflow->mp_capable && mptcp_subflow_data_available(sk))
+ if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
--
2.26.2
1 year, 11 months
[PATCH net] mptcp: fix race between MP_JOIN and close
by Paolo Abeni
If a MP_JOIN subflow completes the 3whs while another
CPU is closing the master msk, we can hit the
following race:
CPU1 CPU2
close()
mptcp_close
subflow_syn_recv_sock
mptcp_token_get_sock
mptcp_finish_join
inet_sk_state_load
mptcp_token_destroy
inet_sk_state_store(TCP_CLOSE)
__mptcp_flush_join_list()
mptcp_sock_graft
list_add_tail
sk_common_release
sock_orphan()
<socket free>
The MP_JOIN socket will be leaked. Additionally we can hit
UaF for the msk 'struct socket' referenced via the 'conn'
field.
This change try to address the issue introducing some
synchronization between the MP_JOIN 3whs and mptcp_close
via the join_list spinlock. If we detect the msk is closing
the MP_JOIN socket is closed, too.
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
net/mptcp/protocol.c | 42 +++++++++++++++++++++++++++---------------
1 file changed, 27 insertions(+), 15 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0f244c0f422e..0b9368c30311 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1349,8 +1349,12 @@ static void mptcp_close(struct sock *sk, long timeout)
mptcp_token_destroy(msk->token);
inet_sk_state_store(sk, TCP_CLOSE);
- __mptcp_flush_join_list(msk);
-
+ /* be sure to always acquire the join list lock, to sync vs
+ * mptcp_finish_join().
+ */
+ spin_lock_bh(&msk->join_list_lock);
+ list_splice_tail_init(&msk->join_list, &msk->conn_list);
+ spin_unlock_bh(&msk->join_list_lock);
list_splice_init(&msk->conn_list, &conn_list);
data_fin_tx_seq = msk->write_seq;
@@ -1707,22 +1711,30 @@ bool mptcp_finish_join(struct sock *sk)
if (!msk->pm.server_side)
return true;
- /* passive connection, attach to msk socket */
+ if (!mptcp_pm_allow_new_subflow(msk))
+ return false;
+
+ /* active connections are already on conn_list, and we can't acquire
+ * msk lock here.
+ * use the join list lock as synchronization point and double-check
+ * msk status to avoid racing with mptcp_close()
+ */
+ spin_lock_bh(&msk->join_list_lock);
+ ret = inet_sk_state_load(parent) == TCP_ESTABLISHED;
+ if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node)))
+ list_add_tail(&subflow->node, &msk->join_list);
+ spin_unlock_bh(&msk->join_list_lock);
+ if (!ret)
+ return false;
+
+ /* attach to msk socket only after we are sure he will deal with us
+ * at close time
+ */
parent_sock = READ_ONCE(parent->sk_socket);
if (parent_sock && !sk->sk_socket)
mptcp_sock_graft(sk, parent_sock);
-
- ret = mptcp_pm_allow_new_subflow(msk);
- if (ret) {
- subflow->map_seq = msk->ack_seq;
-
- /* active connections are already on conn_list */
- spin_lock_bh(&msk->join_list_lock);
- if (!WARN_ON_ONCE(!list_empty(&subflow->node)))
- list_add_tail(&subflow->node, &msk->join_list);
- spin_unlock_bh(&msk->join_list_lock);
- }
- return ret;
+ subflow->map_seq = msk->ack_seq;
+ return true;
}
static bool mptcp_memory_free(const struct sock *sk, int wake)
--
2.21.3
1 year, 11 months
[PATCH net v2] mptcp: fix NULL ptr dereference in MP_JOIN error path
by Paolo Abeni
When token lookup on MP_JOIN 3rd ack fails, the server
socket closes with a reset the incoming child. Such socket
has the 'is_mptcp' flag set, but no msk socket associated
- due to the failed lookup.
While crafting the reset packet mptcp_established_options_mp()
will try to dereference the child's master socket, causing
a NULL ptr dereference.
This change addresses the issue with explicit fallback to
TCP in such error path.
Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
v1 -> v2:
- dropped unneeded dbg messages
---
net/mptcp/subflow.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 0a01b6e8f2dc..5b06a5910aeb 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -413,6 +413,17 @@ static void subflow_ulp_fallback(struct sock *sk,
tcp_sk(sk)->is_mptcp = 0;
}
+static void subflow_drop_ctx(struct sock *ssk)
+{
+ struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
+
+ if (!ctx)
+ return;
+
+ subflow_ulp_fallback(ssk, ctx);
+ kfree_rcu(ctx, rcu);
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -485,10 +496,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (fallback_is_fatal)
goto dispose_child;
- if (ctx) {
- subflow_ulp_fallback(child, ctx);
- kfree_rcu(ctx, rcu);
- }
+ subflow_drop_ctx(child);
goto out;
}
@@ -537,6 +545,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
return child;
dispose_child:
+ subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_for_destroy_sock(child);
--
2.21.3
1 year, 11 months