[PATCH net] mptcp: don't leak msk in token container
by Paolo Abeni
If a listening MPTCP socket has unaccepted sockets at close
time, the related msks are freed via mptcp_sock_destruct(),
which in turn does not invoke the proto->destroy() method
nor the mptcp_token_destroy() function.
Due to the above, the child msk socket is not removed from
the token container, leading to later UaF.
Address the issue explicitly removing the token even in the
above error path.
Fixes: 79c0949e9a09 ("mptcp: Add key generation and token tree")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
net/mptcp/subflow.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 493b98a0825c..bf132575040d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -393,6 +393,7 @@ static void mptcp_sock_destruct(struct sock *sk)
sock_orphan(sk);
}
+ mptcp_token_destroy(mptcp_sk(sk)->token);
inet_sock_destruct(sk);
}
--
2.21.3
2 years, 2 months
[PATCH net] mptcp: fix races between shutdown and recvmsg
by Paolo Abeni
The msk sk_shutdown flag is set by a workqueue, possibly
introducing some delay in user-space notification. If the last
subflow carries some data with the fin packet, the user space
can wake-up before RCV_SHUTDOWN is set. If it executes unblocking
recvmsg(), it may return with an error instead of eof.
Address the issue explicitly checking for eof in recvmsg(), when
no data is found.
Fixes: 59832e246515 ("mptcp: subflow: check parent mptcp socket on subflow state change")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
net/mptcp/protocol.c | 45 +++++++++++++++++++++++---------------------
1 file changed, 24 insertions(+), 21 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 14b253d10ccf..3980fbb6f31e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -374,6 +374,27 @@ void mptcp_subflow_eof(struct sock *sk)
sock_hold(sk);
}
+static void mptcp_check_for_eof(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int receivers = 0;
+
+ mptcp_for_each_subflow(msk, subflow)
+ receivers += !subflow->rx_eof;
+
+ if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+ /* hopefully temporary hack: propagate shutdown status
+ * to msk, when all subflows agree on it
+ */
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+
+ smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
+ set_bit(MPTCP_DATA_READY, &msk->flags);
+ sk->sk_data_ready(sk);
+ }
+}
+
static void mptcp_stop_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1011,6 +1032,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
break;
}
+ if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+ mptcp_check_for_eof(msk);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
@@ -1148,27 +1172,6 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}
-static void mptcp_check_for_eof(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- int receivers = 0;
-
- mptcp_for_each_subflow(msk, subflow)
- receivers += !subflow->rx_eof;
-
- if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
- /* hopefully temporary hack: propagate shutdown status
- * to msk, when all subflows agree on it
- */
- sk->sk_shutdown |= RCV_SHUTDOWN;
-
- smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
- sk->sk_data_ready(sk);
- }
-}
-
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
--
2.21.3
2 years, 2 months
[RFC net] mptcp: fallback in case of simultaneous connect
by Davide Caratti
when a MPTCP client tries to connect to itself, tcp_finish_connect() is
never reached. Because of this, depending on ther socket current state,
multiple faults can be observed:
1) hitting a WARN_ON() in subflow_data_ready()
WARNING: CPU: 2 PID: 882 at net/mptcp/subflow.c:911 subflow_data_ready+0x18b/0x230
[...]
CPU: 2 PID: 882 Comm: gh35 Not tainted 5.7.0+ #187
[...]
RIP: 0010:subflow_data_ready+0x18b/0x230
[...]
Call Trace:
tcp_data_queue+0xd2f/0x4250
tcp_rcv_state_process+0xb1c/0x49d3
tcp_v4_do_rcv+0x2bc/0x790
__release_sock+0x153/0x2d0
release_sock+0x4f/0x170
mptcp_shutdown+0x167/0x4e0
__sys_shutdown+0xe6/0x180
__x64_sys_shutdown+0x50/0x70
do_syscall_64+0x9a/0x370
entry_SYSCALL_64_after_hwframe+0x44/0xa9
2) being stucked forever in mptcp_sendmsg() because the socket is not
TCP_ESTABLISHED
crash> bt 4847
PID: 4847 TASK: ffff88814b2fb100 CPU: 1 COMMAND: "gh35"
#0 [ffff8881376ff680] __schedule at ffffffff97248da4
#1 [ffff8881376ff778] schedule at ffffffff9724a34f
#2 [ffff8881376ff7a0] schedule_timeout at ffffffff97252ba0
#3 [ffff8881376ff8a8] wait_woken at ffffffff958ab4ba
#4 [ffff8881376ff940] sk_stream_wait_connect at ffffffff96c2d859
#5 [ffff8881376ffa28] mptcp_sendmsg at ffffffff97207fca
#6 [ffff8881376ffbc0] sock_sendmsg at ffffffff96be1b5b
#7 [ffff8881376ffbe8] sock_write_iter at ffffffff96be1daa
#8 [ffff8881376ffce8] new_sync_write at ffffffff95e5cb52
#9 [ffff8881376ffe50] vfs_write at ffffffff95e6547f
#10 [ffff8881376ffe90] ksys_write at ffffffff95e65d26
#11 [ffff8881376fff28] do_syscall_64 at ffffffff956088ba
#12 [ffff8881376fff50] entry_SYSCALL_64_after_hwframe at ffffffff9740008c
RIP: 00007f126f6956ed RSP: 00007ffc2a320278 RFLAGS: 00000217
RAX: ffffffffffffffda RBX: 0000000020000044 RCX: 00007f126f6956ed
RDX: 0000000000000004 RSI: 00000000004007b8 RDI: 0000000000000003
RBP: 00007ffc2a3202a0 R8: 0000000000400720 R9: 0000000000400720
R10: 0000000000400720 R11: 0000000000000217 R12: 00000000004004b0
R13: 00007ffc2a320380 R14: 0000000000000000 R15: 0000000000000000
ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b
3) tcpdump captures showing DSS exchange even when MP_CAPABLE handshake
didn't complete.
$ tcpdump -tnnr bad.pcap
IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [S], seq 3208913911, win 65483, options [mss 65495,sackOK,TS val 3291706876 ecr 3291694721,nop,wscale 7,mptcp capable v1], length 0
IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [S.], seq 3208913911, ack 3208913912, win 65483, options [mss 65495,sackOK,TS val 3291706876 ecr 3291706876,nop,wscale 7,mptcp capable v1], length 0
IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [.], ack 1, win 512, options [nop,nop,TS val 3291706876 ecr 3291706876], length 0
IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [F.], seq 1, ack 1, win 512, options [nop,nop,TS val 3291707876 ecr 3291706876,mptcp dss fin seq 0 subseq 0 len 1,nop,nop], length 0
IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [.], ack 2, win 512, options [nop,nop,TS val 3291707876 ecr 3291707876], length 0
force a fallback to TCP in these cases, and adjust the main socket
state to avoid hanging in sendmsg().
BugLink: https://github.com/multipath-tcp/mptcp_net-next/issues/35
Reported-by: Christoph Paasch <cpaasch(a)apple.com>
Suggested-by: Paolo Abeni <pabeni(a)redhat.com>
Signed-off-by: Davide Caratti <dcaratti(a)redhat.com>
---
net/mptcp/protocol.h | 10 ++++++++++
net/mptcp/subflow.c | 9 +++++++++
2 files changed, 19 insertions(+)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e4341b278464..cdd0eaeebf0c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -500,4 +500,14 @@ static inline void mptcp_do_fallback(struct sock *sk)
#define pr_fallback(a) do { pr_debug("%s:fallback to TCP (msk=%p)",\
__FUNCTION__, a); } while (0)
+static inline bool subflow_simultaneous_connect(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct sock *parent = subflow->conn;
+
+ return sk->sk_state == TCP_ESTABLISHED &&
+ !mptcp_sk(parent)->pm.server_side &&
+ !subflow->conn_finished;
+}
+
#endif /* __MPTCP_PROTOCOL_H */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 439170f798c8..eccc19e1adbe 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1128,6 +1128,15 @@ static void subflow_state_change(struct sock *sk)
__subflow_state_change(sk);
+ if (subflow_simultaneous_connect(sk)) {
+ mptcp_do_fallback(sk);
+ pr_fallback(mptcp_sk(parent));
+ if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
+ inet_sk_state_store(parent, TCP_ESTABLISHED);
+ parent->sk_state_change(parent);
+ }
+ }
+
/* as recvmsg() does not acquire the subflow socket for ssk selection
* a fin packet carrying a DSS can be unnoticed if we don't trigger
* the data available machinery here.
--
2.18.1
2 years, 2 months
[PATCH v2 0/3] mptcp: more token follow-up
by Paolo Abeni
The patch 1 addesses a race on close which may possibly fix
issues/36 && issues/29, the 2nd patch is resolve conflicts and the 3rd one
is a follow-up cleanup.
Patch 3 should be inserted just after "mptcp: refactor token container."
v1 -> v2:
- hopefully includes the correct patches
protocol.c | 9 ++++---
subflow.c | 69 ++++++++++++++++++++++---------------------------------------
2 files changed, 30 insertions(+), 48 deletions(-)
2 years, 2 months
[PATCH net v2] mptcp: bugfix for RM_ADDR option parsing
by Geliang Tang
In MPTCPOPT_RM_ADDR option parsing, the pointer "ptr" pointed to the
"Subtype" octet, the pointer "ptr+1" pointed to the "Address ID" octet:
+-------+-------+---------------+
|Subtype|(resvd)| Address ID |
+-------+-------+---------------+
| |
ptr ptr+1
We should set mp_opt->rm_id to the value of "ptr+1", not "ptr". This patch
will fix this bug.
Fixes: 3df523ab582c ("mptcp: Add ADD_ADDR handling")
Signed-off-by: Geliang Tang <geliangtang(a)gmail.com>
---
Changes in v2:
- Add "-net" subject and "Fixes" tag as Matt suggested.
---
net/mptcp/options.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 01f1f4cf4902..490b92534afc 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -273,6 +273,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
break;
+ ptr++;
+
mp_opt->rm_addr = 1;
mp_opt->rm_id = *ptr++;
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
--
2.17.1
2 years, 2 months
[PATCH net-next 0/2] mptcp: more token follow-up
by Paolo Abeni
The patch 1 addesses a race on close which may possibly fix
issues/36 && issues/29, the 2nd patch is a follow-up cleanup.
Patch 2 should be inserted just after "mptcp: refactor token container."
Note: I'm observing frequent (1 every a few runs) self-tests failure on top
of export branch due to timeout, but looks unrelated from series (happens
on unpatched tree)
Paolo Abeni (2):
mptcp: do nonce initialization at subflow creation time
dbg
net/core/sock.c | 16 +------------
net/mptcp/protocol.c | 23 +++++++++++++++++++
net/mptcp/protocol.h | 7 ++++++
net/mptcp/subflow.c | 54 ++++++++++++++++----------------------------
net/mptcp/token.c | 3 +++
5 files changed, 54 insertions(+), 49 deletions(-)
--
2.21.3
2 years, 2 months
[kbuild] [mptcp:export 5/10] net/mptcp/subflow.c:512 subflow_syn_recv_sock() error: uninitialized symbol 'subflow_req'.
by Dan Carpenter
tree: https://github.com/multipath-tcp/mptcp_net-next.git export
head: e28f6a7575a0006d0d49de297e23dd915c59ba39
commit: 962c918a1ca2553fac7db45c2a2f9b84fa30a6d9 [5/10] mptcp: refactor token container.
config: x86_64-randconfig-m001-20200605 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-13) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Dan Carpenter <dan.carpenter(a)oracle.com>
smatch warnings:
net/mptcp/subflow.c:512 subflow_syn_recv_sock() error: uninitialized symbol 'subflow_req'.
# https://github.com/multipath-tcp/mptcp_net-next/commit/962c918a1ca2553fac...
git remote add mptcp https://github.com/multipath-tcp/mptcp_net-next.git
git remote update mptcp
git checkout 962c918a1ca2553fac7db45c2a2f9b84fa30a6d9
vim +/subflow_req +512 net/mptcp/subflow.c
cec37a6e41aae7 Peter Krystad 2020-01-21 430 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
cec37a6e41aae7 Peter Krystad 2020-01-21 431 struct sk_buff *skb,
cec37a6e41aae7 Peter Krystad 2020-01-21 432 struct request_sock *req,
cec37a6e41aae7 Peter Krystad 2020-01-21 433 struct dst_entry *dst,
cec37a6e41aae7 Peter Krystad 2020-01-21 434 struct request_sock *req_unhash,
cec37a6e41aae7 Peter Krystad 2020-01-21 435 bool *own_req)
cec37a6e41aae7 Peter Krystad 2020-01-21 436 {
cec37a6e41aae7 Peter Krystad 2020-01-21 437 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
cc7972ea193233 Christoph Paasch 2020-01-21 438 struct mptcp_subflow_request_sock *subflow_req;
^^^^^^^^^^^
cfde141ea3faa3 Paolo Abeni 2020-04-30 439 struct mptcp_options_received mp_opt;
f296234c98a8fc Peter Krystad 2020-03-27 440 bool fallback_is_fatal = false;
58b09919626bf9 Paolo Abeni 2020-03-13 441 struct sock *new_msk = NULL;
4c8941de781cf7 Paolo Abeni 2020-04-20 442 bool fallback = false;
cec37a6e41aae7 Peter Krystad 2020-01-21 443 struct sock *child;
cec37a6e41aae7 Peter Krystad 2020-01-21 444
cec37a6e41aae7 Peter Krystad 2020-01-21 445 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
cec37a6e41aae7 Peter Krystad 2020-01-21 446
cfde141ea3faa3 Paolo Abeni 2020-04-30 447 /* we need later a valid 'mp_capable' value even when options are not
cfde141ea3faa3 Paolo Abeni 2020-04-30 448 * parsed
cfde141ea3faa3 Paolo Abeni 2020-04-30 449 */
cfde141ea3faa3 Paolo Abeni 2020-04-30 450 mp_opt.mp_capable = 0;
ae2dd7164943e0 Florian Westphal 2020-01-29 451 if (tcp_rsk(req)->is_mptcp == 0)
ae2dd7164943e0 Florian Westphal 2020-01-29 452 goto create_child;
^^^^^^^^^^^^^^^^^
ae2dd7164943e0 Florian Westphal 2020-01-29 453
d22f4988ffecbe Christoph Paasch 2020-01-21 454 /* if the sk is MP_CAPABLE, we try to fetch the client key */
cc7972ea193233 Christoph Paasch 2020-01-21 455 subflow_req = mptcp_subflow_rsk(req);
cc7972ea193233 Christoph Paasch 2020-01-21 456 if (subflow_req->mp_capable) {
d22f4988ffecbe Christoph Paasch 2020-01-21 457 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
d22f4988ffecbe Christoph Paasch 2020-01-21 458 /* here we can receive and accept an in-window,
d22f4988ffecbe Christoph Paasch 2020-01-21 459 * out-of-order pkt, which will not carry the MP_CAPABLE
d22f4988ffecbe Christoph Paasch 2020-01-21 460 * opt even on mptcp enabled paths
d22f4988ffecbe Christoph Paasch 2020-01-21 461 */
58b09919626bf9 Paolo Abeni 2020-03-13 462 goto create_msk;
d22f4988ffecbe Christoph Paasch 2020-01-21 463 }
d22f4988ffecbe Christoph Paasch 2020-01-21 464
cfde141ea3faa3 Paolo Abeni 2020-04-30 465 mptcp_get_options(skb, &mp_opt);
cfde141ea3faa3 Paolo Abeni 2020-04-30 466 if (!mp_opt.mp_capable) {
4c8941de781cf7 Paolo Abeni 2020-04-20 467 fallback = true;
58b09919626bf9 Paolo Abeni 2020-03-13 468 goto create_child;
d22f4988ffecbe Christoph Paasch 2020-01-21 469 }
58b09919626bf9 Paolo Abeni 2020-03-13 470
58b09919626bf9 Paolo Abeni 2020-03-13 471 create_msk:
cfde141ea3faa3 Paolo Abeni 2020-04-30 472 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
58b09919626bf9 Paolo Abeni 2020-03-13 473 if (!new_msk)
4c8941de781cf7 Paolo Abeni 2020-04-20 474 fallback = true;
f296234c98a8fc Peter Krystad 2020-03-27 475 } else if (subflow_req->mp_join) {
f296234c98a8fc Peter Krystad 2020-03-27 476 fallback_is_fatal = true;
cfde141ea3faa3 Paolo Abeni 2020-04-30 477 mptcp_get_options(skb, &mp_opt);
cfde141ea3faa3 Paolo Abeni 2020-04-30 478 if (!mp_opt.mp_join ||
cfde141ea3faa3 Paolo Abeni 2020-04-30 479 !subflow_hmac_valid(req, &mp_opt)) {
fc518953bc9c8d Florian Westphal 2020-03-27 480 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
f296234c98a8fc Peter Krystad 2020-03-27 481 return NULL;
cc7972ea193233 Christoph Paasch 2020-01-21 482 }
fc518953bc9c8d Florian Westphal 2020-03-27 483 }
cec37a6e41aae7 Peter Krystad 2020-01-21 484
d22f4988ffecbe Christoph Paasch 2020-01-21 485 create_child:
^^^^^^^^^^^^
cec37a6e41aae7 Peter Krystad 2020-01-21 486 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
cec37a6e41aae7 Peter Krystad 2020-01-21 487 req_unhash, own_req);
cec37a6e41aae7 Peter Krystad 2020-01-21 488
cec37a6e41aae7 Peter Krystad 2020-01-21 489 if (child && *own_req) {
79c0949e9a09f6 Peter Krystad 2020-01-21 490 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
79c0949e9a09f6 Peter Krystad 2020-01-21 491
90bf45134d55d6 Paolo Abeni 2020-05-15 492 tcp_rsk(req)->drop_req = false;
90bf45134d55d6 Paolo Abeni 2020-05-15 493
4c8941de781cf7 Paolo Abeni 2020-04-20 494 /* we need to fallback on ctx allocation failure and on pre-reqs
4c8941de781cf7 Paolo Abeni 2020-04-20 495 * checking above. In the latter scenario we additionally need
4c8941de781cf7 Paolo Abeni 2020-04-20 496 * to reset the context to non MPTCP status.
79c0949e9a09f6 Peter Krystad 2020-01-21 497 */
4c8941de781cf7 Paolo Abeni 2020-04-20 498 if (!ctx || fallback) {
f296234c98a8fc Peter Krystad 2020-03-27 499 if (fallback_is_fatal)
729cd6436f359b Paolo Abeni 2020-05-15 500 goto dispose_child;
4c8941de781cf7 Paolo Abeni 2020-04-20 501
39884604b11692 Paolo Abeni 2020-05-29 502 subflow_drop_ctx(child);
58b09919626bf9 Paolo Abeni 2020-03-13 503 goto out;
f296234c98a8fc Peter Krystad 2020-03-27 504 }
79c0949e9a09f6 Peter Krystad 2020-01-21 505
79c0949e9a09f6 Peter Krystad 2020-01-21 506 if (ctx->mp_capable) {
58b09919626bf9 Paolo Abeni 2020-03-13 507 /* new mpc subflow takes ownership of the newly
58b09919626bf9 Paolo Abeni 2020-03-13 508 * created mptcp socket
58b09919626bf9 Paolo Abeni 2020-03-13 509 */
df1036da90108b Florian Westphal 2020-04-17 510 new_msk->sk_destruct = mptcp_sock_destruct;
1b1c7a0ef7f323 Peter Krystad 2020-03-27 511 mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
962c918a1ca255 Paolo Abeni 2020-06-05 @512 mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
^^^^^^^^^^^
Potential uninitialized?
58b09919626bf9 Paolo Abeni 2020-03-13 513 ctx->conn = new_msk;
58b09919626bf9 Paolo Abeni 2020-03-13 514 new_msk = NULL;
fca5c82c086ea3 Paolo Abeni 2020-04-20 515
fca5c82c086ea3 Paolo Abeni 2020-04-20 516 /* with OoO packets we can reach here without ingress
fca5c82c086ea3 Paolo Abeni 2020-04-20 517 * mpc option
fca5c82c086ea3 Paolo Abeni 2020-04-20 518 */
cfde141ea3faa3 Paolo Abeni 2020-04-30 519 ctx->remote_key = mp_opt.sndr_key;
cfde141ea3faa3 Paolo Abeni 2020-04-30 520 ctx->fully_established = mp_opt.mp_capable;
cfde141ea3faa3 Paolo Abeni 2020-04-30 521 ctx->can_ack = mp_opt.mp_capable;
f296234c98a8fc Peter Krystad 2020-03-27 522 } else if (ctx->mp_join) {
f296234c98a8fc Peter Krystad 2020-03-27 523 struct mptcp_sock *owner;
f296234c98a8fc Peter Krystad 2020-03-27 524
f296234c98a8fc Peter Krystad 2020-03-27 525 owner = mptcp_token_get_sock(ctx->token);
f296234c98a8fc Peter Krystad 2020-03-27 526 if (!owner)
729cd6436f359b Paolo Abeni 2020-05-15 527 goto dispose_child;
f296234c98a8fc Peter Krystad 2020-03-27 528
f296234c98a8fc Peter Krystad 2020-03-27 529 ctx->conn = (struct sock *)owner;
f296234c98a8fc Peter Krystad 2020-03-27 530 if (!mptcp_finish_join(child))
729cd6436f359b Paolo Abeni 2020-05-15 531 goto dispose_child;
fc518953bc9c8d Florian Westphal 2020-03-27 532
fc518953bc9c8d Florian Westphal 2020-03-27 533 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
90bf45134d55d6 Paolo Abeni 2020-05-15 534 tcp_rsk(req)->drop_req = true;
cec37a6e41aae7 Peter Krystad 2020-01-21 535 }
cec37a6e41aae7 Peter Krystad 2020-01-21 536 }
cec37a6e41aae7 Peter Krystad 2020-01-21 537
58b09919626bf9 Paolo Abeni 2020-03-13 538 out:
58b09919626bf9 Paolo Abeni 2020-03-13 539 /* dispose of the left over mptcp master, if any */
58b09919626bf9 Paolo Abeni 2020-03-13 540 if (unlikely(new_msk))
9f5ca6a59816b4 Florian Westphal 2020-04-17 541 mptcp_force_close(new_msk);
4c8941de781cf7 Paolo Abeni 2020-04-20 542
4c8941de781cf7 Paolo Abeni 2020-04-20 543 /* check for expected invariant - should never trigger, just help
4c8941de781cf7 Paolo Abeni 2020-04-20 544 * catching eariler subtle bugs
4c8941de781cf7 Paolo Abeni 2020-04-20 545 */
ac2b47fb92c506 Paolo Abeni 2020-04-30 546 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
4c8941de781cf7 Paolo Abeni 2020-04-20 547 (!mptcp_subflow_ctx(child) ||
4c8941de781cf7 Paolo Abeni 2020-04-20 548 !mptcp_subflow_ctx(child)->conn));
cec37a6e41aae7 Peter Krystad 2020-01-21 549 return child;
f296234c98a8fc Peter Krystad 2020-03-27 550
729cd6436f359b Paolo Abeni 2020-05-15 551 dispose_child:
39884604b11692 Paolo Abeni 2020-05-29 552 subflow_drop_ctx(child);
729cd6436f359b Paolo Abeni 2020-05-15 553 tcp_rsk(req)->drop_req = true;
f296234c98a8fc Peter Krystad 2020-03-27 554 tcp_send_active_reset(child, GFP_ATOMIC);
729cd6436f359b Paolo Abeni 2020-05-15 555 inet_csk_prepare_for_destroy_sock(child);
f296234c98a8fc Peter Krystad 2020-03-27 556 tcp_done(child);
729cd6436f359b Paolo Abeni 2020-05-15 557
729cd6436f359b Paolo Abeni 2020-05-15 558 /* The last child reference will be released by the caller */
729cd6436f359b Paolo Abeni 2020-05-15 559 return child;
cec37a6e41aae7 Peter Krystad 2020-01-21 560 }
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
_______________________________________________
kbuild mailing list -- kbuild(a)lists.01.org
To unsubscribe send an email to kbuild-leave(a)lists.01.org
2 years, 2 months
[PATCH mptcp-next v2] net: mptcp: improve fallback to TCP
by Davide Caratti
keep using MPTCP sockets and a "dummy mapping" in case of fallback to
regular TCP. Skip adding DSS option on send, if TCP fallback has been
done earlier.
Notes: I'm unsure on what to do in mptcp_clean_una() to do a one-time
flush of the retransmit queue, as per Mat's suggestion. Any advice?
Changes since v1
- rebase on top of Paolo's fix for NULL dereference in mptcp_recvmsg()
Changes since RFC v2:
- use a bit in msk->flags, rather than a dedicated boolean in struct
msk. This bit is going to be used in combination with another one,
TCP_FALLBACK_ALLOWED, that is 1 at the first subflow creation
and gets cleared once TCP fallback is no more allowed.
- separate code that adds support for "infinite mapping", and use
the term "dummy" instead of "infinite". Suggested by Mat
- remove inappropriate call to __mptcp_do_fallback() in
mptcp_accept() (Paolo)
Changes since RFC v1:
- use a dedicated member of struct msk to indicate that a fallback
ha happened, use it in case of infinite mapping
- don't delete skb_ext in case of infinite mapping (Mat)
- test the value of pm.subflows on reception of an infinite map to
ensure that no other subflow is currently opened (Mat)
- in mptcp_established_options(), avoid adding TCP options in case
of fallback indication; simplify sendmsg()/recvmsg()/poll() to
keep using the MPTCP socket in case of TCP fallback. Set the
fallback indication in case subflow is not mp_capable after
successful 3-way handshake, instead of flipping 'is_mptcp'
(Paolo/Mat)
- remove deadcode in mptcp_finish_connect, and increment
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK in subflow_finish_connect
(Paolo)
BugLink: https://github.com/multipath-tcp/mptcp_net-next/issues/11
BugLink: https://github.com/multipath-tcp/mptcp_net-next/issues/22
Signed-off-by: Davide Caratti <dcaratti(a)redhat.com>
---
net/mptcp/options.c | 9 ++++-
net/mptcp/protocol.c | 78 ++++++++++----------------------------------
net/mptcp/protocol.h | 34 +++++++++++++++++++
net/mptcp/subflow.c | 46 +++++++++++++++++---------
4 files changed, 90 insertions(+), 77 deletions(-)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 01f1f4cf4902..cf0b59ead1e4 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -624,6 +624,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
+ if (unlikely(mptcp_check_fallback(sk)))
+ return false;
+
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
@@ -714,7 +717,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
*/
if (!mp_opt->mp_capable) {
subflow->mp_capable = 0;
- tcp_sk(sk)->is_mptcp = 0;
+ pr_fallback(msk);
+ __mptcp_do_fallback(msk);
return false;
}
@@ -814,6 +818,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
+ if (__mptcp_check_fallback(msk))
+ return;
+
mptcp_get_options(skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5d7f4cd2c8e6..1d907519c223 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -52,11 +52,6 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
return msk->subflow;
}
-static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
-{
- return msk->first && !sk_is_mptcp(msk->first);
-}
-
static struct socket *mptcp_is_tcpsk(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
@@ -94,7 +89,7 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
if (unlikely(sock))
return sock;
- if (likely(!__mptcp_needs_tcp_fallback(msk)))
+ if (likely(!__mptcp_check_fallback(msk)))
return NULL;
return msk->subflow;
@@ -229,6 +224,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
if (!skb)
break;
+ if (__mptcp_check_fallback(msk)) {
+ /* if we are running under the workqueue, TCP could have
+ * collapsed skbs between dummy map creation and now
+ * be sure to adjust the size
+ */
+ map_remaining = skb->len;
+ subflow->map_data_len = skb->len;
+ }
+
offset = seq - TCP_SKB_CB(skb)->seq;
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (fin) {
@@ -445,8 +449,15 @@ static void mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- u64 snd_una = atomic64_read(&msk->snd_una);
bool cleaned = false;
+ u64 snd_una;
+
+ /* on fallback we just need to ignore snd_una, as this is really
+ * plain TCP
+ */
+ if (__mptcp_check_fallback(msk))
+ atomic64_set(&msk->snd_una, msk->write_seq);
+ snd_una = atomic64_read(&msk->snd_una);
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
@@ -719,7 +730,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
- struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
bool tx_ok;
@@ -738,15 +748,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
-fallback:
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock)) {
- release_sock(sk);
- pr_debug("fallback passthrough");
- ret = sock_sendmsg(ssock, msg);
- return ret >= 0 ? ret + copied : (copied ? copied : ret);
- }
-
pfrag = sk_page_frag(sk);
restart:
mptcp_clean_una(sk);
@@ -798,17 +799,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
break;
}
- if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
- /* Can happen for passive sockets:
- * 3WHS negotiated MPTCP, but first packet after is
- * plain TCP (e.g. due to middlebox filtering unknown
- * options).
- *
- * Fall back to TCP.
- */
- release_sock(ssk);
- goto fallback;
- }
copied += ret;
@@ -951,7 +941,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
int copied = 0;
int target;
long timeo;
@@ -960,16 +949,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return -EOPNOTSUPP;
lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock)) {
-fallback:
- release_sock(sk);
- pr_debug("fallback-read subflow=%p",
- mptcp_subflow_ctx(ssock->sk));
- copied = sock_recvmsg(ssock, msg, flags);
- return copied;
- }
-
timeo = sock_rcvtimeo(sk, nonblock);
len = min_t(size_t, len, INT_MAX);
@@ -1032,9 +1011,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
pr_debug("block timeout %ld", timeo);
mptcp_wait_data(sk, &timeo);
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock))
- goto fallback;
}
if (skb_queue_empty(&sk->sk_receive_queue)) {
@@ -1659,12 +1635,6 @@ void mptcp_finish_connect(struct sock *ssk)
sk = subflow->conn;
msk = mptcp_sk(sk);
- if (!subflow->mp_capable) {
- MPTCP_INC_STATS(sock_net(sk),
- MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- return;
- }
-
pr_debug("msk=%p, token=%u", sk, subflow->token);
mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
@@ -1964,21 +1934,9 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct mptcp_sock *msk;
- struct socket *ssock;
__poll_t mask = 0;
msk = mptcp_sk(sk);
- lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (!ssock)
- ssock = __mptcp_nmpc_socket(msk);
- if (ssock) {
- mask = ssock->ops->poll(file, ssock, wait);
- release_sock(sk);
- return mask;
- }
-
- release_sock(sk);
sock_poll_wait(file, sock, wait);
lock_sock(sk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0592fec3418b..2b39d5344d95 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -89,6 +89,7 @@
#define MPTCP_SEND_SPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
+#define MPTCP_FALLBACK_DONE 4
struct mptcp_options_received {
u64 sndr_key;
@@ -461,4 +462,37 @@ static inline bool before64(__u64 seq1, __u64 seq2)
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
+static inline bool __mptcp_check_fallback(struct mptcp_sock *msk)
+{
+ return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+}
+
+static inline bool mptcp_check_fallback(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ return __mptcp_check_fallback(msk);
+}
+
+static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
+{
+ if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
+ pr_debug("TCP fallback already done (msk=%p)", msk);
+ return;
+ }
+ set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+}
+
+static inline void mptcp_do_fallback(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ __mptcp_do_fallback(msk);
+}
+
+#define pr_fallback(a) do { pr_debug("%s:fallback to TCP (msk=%p)",\
+ __FUNCTION__, a); } while (0)
+
#endif /* __MPTCP_PROTOCOL_H */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 10b4770a1419..8245395f8354 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -223,7 +223,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
- struct tcp_sock *tp = tcp_sk(sk);
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -237,6 +236,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
return;
subflow->conn_finished = 1;
+ subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
+ pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp && mp_opt.mp_capable) {
@@ -252,21 +253,19 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
} else if (subflow->request_mptcp) {
- tp->is_mptcp = 0;
+ mptcp_do_fallback(sk);
+ pr_fallback(mptcp_sk(subflow->conn));
+ MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
}
- if (!tp->is_mptcp)
+ if (mptcp_check_fallback(sk))
return;
if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);
-
- if (skb) {
- pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
- subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
- }
} else if (subflow->mp_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -286,9 +285,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
- if (skb)
- subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
-
if (!mptcp_finish_join(sk))
goto do_reset;
@@ -564,7 +560,8 @@ enum mapping_status {
MAPPING_OK,
MAPPING_INVALID,
MAPPING_EMPTY,
- MAPPING_DATA_FIN
+ MAPPING_DATA_FIN,
+ MAPPING_DUMMY
};
static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
@@ -628,6 +625,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
if (!skb)
return MAPPING_EMPTY;
+ if (mptcp_check_fallback(ssk))
+ return MAPPING_DUMMY;
+
mpext = mptcp_get_ext(skb);
if (!mpext || !mpext->use_map) {
if (!subflow->map_valid && !skb->len) {
@@ -769,6 +769,16 @@ static bool subflow_check_data_avail(struct sock *ssk)
ssk->sk_err = EBADMSG;
goto fatal;
}
+ if (status == MAPPING_DUMMY) {
+ __mptcp_do_fallback(msk);
+ skb = skb_peek(&ssk->sk_receive_queue);
+ subflow->map_valid = 1;
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
+ subflow->map_data_len = skb->len;
+ subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
+ subflow->ssn_offset;
+ return true;
+ }
if (status != MAPPING_OK)
return false;
@@ -892,14 +902,18 @@ static void subflow_data_ready(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
- if (!subflow->mp_capable && !subflow->mp_join) {
- subflow->tcp_data_ready(sk);
-
+ msk = mptcp_sk(parent);
+ if (inet_sk_state_load(sk) == TCP_LISTEN) {
+ set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
+ WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+ !subflow->mp_join);
+
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
}
@@ -1122,7 +1136,7 @@ static void subflow_state_change(struct sock *sk)
* a fin packet carrying a DSS can be unnoticed if we don't trigger
* the data available machinery here.
*/
- if (subflow->mp_capable && mptcp_subflow_data_available(sk))
+ if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
--
2.18.1
2 years, 2 months
[PATCH] mptcp: bugfix for RM_ADDR option parsing
by Geliang Tang
In MPTCPOPT_RM_ADDR option parsing, the pointer "ptr" pointed to the
"Subtype" octet, the pointer "ptr+1" pointed to the "Address ID" octet:
+-------+-------+---------------+
|Subtype|(resvd)| Address ID |
+-------+-------+---------------+
| |
ptr ptr+1
We should set mp_opt->rm_id to the value of "ptr+1", not "ptr". This patch
will fix this bug.
Signed-off-by: Geliang Tang <geliangtang(a)gmail.com>
---
net/mptcp/options.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 01f1f4cf4902..490b92534afc 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -273,6 +273,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
break;
+ ptr++;
+
mp_opt->rm_addr = 1;
mp_opt->rm_id = *ptr++;
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
--
2.17.1
2 years, 2 months