[PATCH] mptcp: avoid validating MP_CAPABLE keys on 3way HS handling
by Paolo Abeni
The server side already stored the key on syn, and 3way HS ack
reception is fragile: the ack can be lost, and the following
data pkts processed instead.
This fixes the pending self-test issue in my testbed.
Suggested-by: Peter Krystad <peter.krystad(a)linux.intel.com>
Suggested-by: Christoph Paasch <cpaasch(a)apple.com>
Fixes: af550dd282ca ("mptcp: Create SUBFLOW socket for incoming connections")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
--
I'm ok with squashing into the Fixes commit, but that will likely cause
conflicts.
We can possible have [in the future] a similar issue with MP_JOIN, as we
validate the token on 3way HS ack reception, and that is fragile ??!
---
net/mptcp/subflow.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fdc5aecab897..d91d817b8779 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -143,14 +143,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
- if (subflow_req->mp_capable) {
- opt_rx.mptcp.mp_capable = 0;
- mptcp_get_options(skb, &opt_rx);
- if (!opt_rx.mptcp.mp_capable ||
- subflow_req->local_key != opt_rx.mptcp.rcvr_key ||
- subflow_req->remote_key != opt_rx.mptcp.sndr_key)
- return NULL;
- } else if (subflow_req->mp_join) {
+ /* if the sk is MP_CAPABLE, we already received the client key */
+ if (!subflow_req->mp_capable && subflow_req->mp_join) {
opt_rx.mptcp.mp_join = 0;
mptcp_get_options(skb, &opt_rx);
if (!opt_rx.mptcp.mp_join || token_join_valid(req, &opt_rx))
--
2.20.1
1 year, 5 months
[PATCH v2] mptcp: harmonize locking on all socket operations.
by Paolo Abeni
The locking schema implied by sendmsg(), recvmsg(), etc.
requires acquiring the msk's socket lock before manipulating
the msk internal status.
Additionally, we can't acquire the msk->subflow socket lock while holding
the msk lock, due to mptcp_finish_connect().
Many socket operations do not enforce the required locking, e.g. we have
several patterns alike:
if (msk->subflow)
// do something with msk->subflow
or:
if (!msk->subflow)
// allocate msk->subflow
all without any lock acquired.
They can race with each other and with mptcp_finish_connect() causing
UAF, null ptr dereference and/or memory leaks.
This patch ensures that all mptcp socket operations access and manipulate
msk->subflow under the msk socket lock. To avoid breaking the locking
assumption introduced by mptcp_finish_connect(), while avoiding UAF
issues, we acquire a reference to the msk->subflow, where needed.
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
v1 -> v2:
- fix msk->subflow misusage in mptcp_getsockopt()
rfc -> v1:
- rename *mptcp_socket_get_ref() as *mptcp_fallback_get_ref()
- use subflow_create_socket() in mptcp_socket_create_get() instead
of open-codying it.
- use mptcp_fallback_get_ref() instead of mptcp_socket_create_get() in
mptcp_stream_accept()
---
net/mptcp/protocol.c | 191 +++++++++++++++++++++++++++++++------------
1 file changed, 137 insertions(+), 54 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 774ed25d3b6d..d31f5b48a566 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -24,6 +24,28 @@ static inline bool before64(__u64 seq1, __u64 seq2)
#define after64(seq2, seq1) before64(seq1, seq2)
+static struct socket *__mptcp_fallback_get_ref(const struct mptcp_sock *msk)
+{
+ sock_owned_by_me((const struct sock *)msk);
+
+ if (!msk->subflow)
+ return NULL;
+
+ sock_hold(msk->subflow->sk);
+ return msk->subflow;
+}
+
+static struct socket *mptcp_fallback_get_ref(const struct mptcp_sock *msk)
+{
+ struct socket *ssock;
+
+ lock_sock((struct sock *)msk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ release_sock((struct sock *)msk);
+
+ return ssock;
+}
+
static struct sock *mptcp_subflow_get_ref(const struct mptcp_sock *msk)
{
struct subflow_context *subflow;
@@ -158,17 +180,22 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
long timeo;
pr_debug("msk=%p", msk);
- if (msk->subflow) {
+ lock_sock(sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sk);
pr_debug("fallback passthrough");
- return sock_sendmsg(msk->subflow, msg);
+ ret = sock_sendmsg(ssock, msg);
+ sock_put(ssock->sk);
+ return ret;
}
- lock_sock(sk);
ssk = mptcp_subflow_get_ref(msk);
if (!ssk) {
release_sock(sk);
@@ -364,18 +391,22 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct subflow_context *subflow;
struct mptcp_read_arg arg;
read_descriptor_t desc;
+ struct socket *ssock;
struct tcp_sock *tp;
struct sock *ssk;
int copied = 0;
long timeo;
- if (msk->subflow) {
- pr_debug("fallback-read subflow=%p",
- subflow_ctx(msk->subflow->sk));
- return sock_recvmsg(msk->subflow, msg, flags);
+ lock_sock(sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sk);
+ pr_debug("fallback-read subflow=%p", subflow_ctx(ssock->sk));
+ copied = sock_recvmsg(ssock, msg, flags);
+ sock_put(ssock->sk);
+ return copied;
}
- lock_sock(sk);
ssk = mptcp_subflow_get_ref(msk);
if (!ssk) {
release_sock(sk);
@@ -673,15 +704,19 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
{
struct mptcp_sock *msk = mptcp_sk(sk);
char __kernel *optval;
+ struct socket *ssock;
+ int ret;
/* will be treated as __user in tcp_setsockopt */
optval = (char __kernel __force *)uoptval;
pr_debug("msk=%p", msk);
- if (msk->subflow) {
- pr_debug("subflow=%p", msk->subflow->sk);
- return kernel_setsockopt(msk->subflow, level, optname, optval,
- optlen);
+ ssock = mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ pr_debug("subflow=%p", ssock->sk);
+ ret = kernel_setsockopt(ssock, level, optname, optval, optlen);
+ sock_put(ssock->sk);
+ return ret;
}
/* @@ the meaning of setsockopt() when the socket is connected and
@@ -696,16 +731,20 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
struct mptcp_sock *msk = mptcp_sk(sk);
char __kernel *optval;
int __kernel *option;
+ struct socket *ssock;
+ int ret;
/* will be treated as __user in tcp_getsockopt */
optval = (char __kernel __force *)uoptval;
option = (int __kernel __force *)uoption;
pr_debug("msk=%p", msk);
- if (msk->subflow) {
- pr_debug("subflow=%p", msk->subflow->sk);
- return kernel_getsockopt(msk->subflow, level, optname, optval,
- option);
+ ssock = mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ pr_debug("subflow=%p", ssock->sk);
+ ret = kernel_getsockopt(ssock, level, optname, optval, option);
+ sock_put(ssock->sk);
+ return ret;
}
/* @@ the meaning of setsockopt() when the socket is connected and
@@ -817,9 +856,35 @@ static struct proto mptcp_prot = {
.no_autobind = 1,
};
+static struct socket *mptcp_socket_create_get(struct mptcp_sock *msk)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct socket *ssock;
+ int err;
+
+ lock_sock(sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock)
+ goto release;
+
+ err = subflow_create_socket(sk, &ssock);
+ if (err) {
+ ssock = ERR_PTR(err);
+ goto release;
+ }
+
+ msk->subflow = ssock;
+ sock_hold(ssock->sk);
+
+release:
+ release_sock(sk);
+ return ssock;
+}
+
static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
int err = -ENOTSUPP;
pr_debug("msk=%p", msk);
@@ -827,18 +892,20 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (uaddr->sa_family != AF_INET) // @@ allow only IPv4 for now
return err;
- if (!msk->subflow) {
- err = subflow_create_socket(sock->sk, &msk->subflow);
- if (err)
- return err;
- }
- return inet_bind(msk->subflow, uaddr, addr_len);
+ ssock = mptcp_socket_create_get(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
+
+ err = inet_bind(ssock, uaddr, addr_len);
+ sock_put(ssock->sk);
+ return err;
}
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
int err = -ENOTSUPP;
pr_debug("msk=%p", msk);
@@ -846,19 +913,20 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (uaddr->sa_family != AF_INET) // @@ allow only IPv4 for now
return err;
- if (!msk->subflow) {
- err = subflow_create_socket(sock->sk, &msk->subflow);
- if (err)
- return err;
- }
+ ssock = mptcp_socket_create_get(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
- return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
+ err = inet_stream_connect(ssock, uaddr, addr_len, flags);
+ sock_put(ssock->sk);
+ return err;
}
static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
int peer)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
struct sock *ssk;
int ret;
@@ -876,16 +944,20 @@ static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
return inet_getname(sock, uaddr, peer);
}
- if (msk->subflow) {
- pr_debug("subflow=%p", msk->subflow->sk);
- return inet_getname(msk->subflow, uaddr, peer);
+ lock_sock(sock->sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sock->sk);
+ pr_debug("subflow=%p", ssock->sk);
+ ret = inet_getname(ssock, uaddr, peer);
+ sock_put(ssock->sk);
+ return ret;
}
/* @@ the meaning of getname() for the remote peer when the socket
* is connected and there are multiple subflows is not defined.
* For now just use the first subflow on the list.
*/
- lock_sock(sock->sk);
ssk = mptcp_subflow_get_ref(msk);
if (!ssk) {
release_sock(sock->sk);
@@ -901,29 +973,36 @@ static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
int err;
pr_debug("msk=%p", msk);
- if (!msk->subflow) {
- err = subflow_create_socket(sock->sk, &msk->subflow);
- if (err)
- return err;
- }
- return inet_listen(msk->subflow, backlog);
+ ssock = mptcp_socket_create_get(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
+
+ err = inet_listen(ssock, backlog);
+ sock_put(ssock->sk);
+ return err;
}
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
int flags, bool kern)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
+ int err;
pr_debug("msk=%p", msk);
- if (!msk->subflow)
+ ssock = mptcp_fallback_get_ref(msk);
+ if (!ssock)
return -EINVAL;
- return inet_accept(sock, newsock, flags, kern);
+ err = inet_accept(sock, newsock, flags, kern);
+ sock_put(ssock->sk);
+ return err;
}
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
@@ -932,13 +1011,19 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct subflow_context *subflow;
const struct mptcp_sock *msk;
struct sock *sk = sock->sk;
+ struct socket *ssock;
__poll_t ret = 0;
msk = mptcp_sk(sk);
- if (msk->subflow)
- return tcp_poll(file, msk->subflow, wait);
-
lock_sock(sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sk);
+ ret = tcp_poll(file, ssock, wait);
+ sock_put(ssock->sk);
+ return ret;
+ }
+
mptcp_for_each_subflow(msk, subflow) {
struct socket *tcp_sock;
@@ -954,23 +1039,21 @@ static int mptcp_shutdown(struct socket *sock, int how)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct subflow_context *subflow;
+ struct socket *ssock;
int ret = 0;
pr_debug("sk=%p, how=%d", msk, how);
- if (msk->subflow) {
- pr_debug("subflow=%p", msk->subflow->sk);
- return kernel_sock_shutdown(msk->subflow, how);
+ lock_sock(sock->sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sock->sk);
+ pr_debug("subflow=%p", ssock->sk);
+ ret = kernel_sock_shutdown(ssock, how);
+ sock_put(ssock->sk);
+ return ret;
}
- /* protect against concurrent mptcp_close(), so that nobody can
- * remove entries from the conn list and walking the list
- * is still safe.
- *
- * We can't use MPTCP socket lock to protect conn_list changes,
- * as we need to update it from the BH via the mptcp_finish_connect()
- */
- lock_sock(sock->sk);
mptcp_for_each_subflow(msk, subflow) {
struct socket *tcp_socket;
--
2.20.1
1 year, 5 months
[PATCH 0/2] mptcp: simplify crypto.c
by Davide Caratti
his series wants to simplify, and hopefully improve, random generation
of keys and nonces in crypto.c, While at it, I removed some unneeded
#includes on top of the file. Any feedbacks are welcome!
Note:
in case you think the code is ok, I will propose to squash it into
commit 34f09d53931a ("mptcp: Add key generation and token tree")
Changes since RFC:
- use get_random_bytes() instead of siphash, thanks to Florian / Mat
Davide Caratti (2):
net: mptcp: remove useless #includes in crypto.c
net: mptcp: randomness improvements for crypto.c
net/mptcp/crypto.c | 77 +-------------------------------------------
net/mptcp/protocol.c | 1 -
net/mptcp/protocol.h | 11 -------
net/mptcp/token.c | 49 +++-------------------------
4 files changed, 5 insertions(+), 133 deletions(-)
--
2.20.1
1 year, 5 months
[Weekly meetings] MoM - 18th of July 2019
by Matthieu Baerts
Hello,
Yesterday, we had our 58th meeting with Peter and Ossama (Intel OTC),
Davide (Red Hat) and myself (Tessares).
Thanks again for this new good meeting!
Here are the minutes of the meeting:
Accepted patches:
- mptcp: selftests: fix .gitignore for mptcp selftests:
- by Davide
- Accepted by Matth
- "Squashed" in "mptcp: selftests: Add capture option"
Pending patches:
- mptcp: Make MPTCP socket block/wakeup ignore sk_receive_queue :
- by Mat
- commented by Paolo
- Waiting for Mat to be back to comment on Paolo's (crazy???)
alternative
- mptcp: harmonize locking on all socket operations:
- by Paolo
- RFC and v1 sent
- RFC/v1 has been commented by Mat, Peter and Florian
- v2 has been shared
- waiting for approval
- Peter will have a look
- mptcp: simplify crypto.c:
- by Davide
- new version
- waiting for approval
- Davide: using the random bit has it is now done can be an
issue if another namespace also create connections at the same time, we
could get the same info.
- maybe we can mention that in a comment (+comment about hash)
and that might be enough for a v1 (for MPTCP)
- *Davide* will send a new version
- mptcp: new sysctl to control the activation per NS:
- by Matthieu
- RFC
- commented by Ossama and Peter
- some questions are still opened:
- see notes:
- Is it OK to reserve space per ns via the
"pernet_operations" structure? Because MPTCP would not be compiled as a
module, we could directly store stuff in the net structure as other
parts of the code do but maybe better to keep MPTCP code on the side as
done here.
- In mptcp.org, all sysctls are prepended with 'mptcp_',
e.g. 'net.mptcp.mptcp_enabled'. Do we need this? Is it better to keep
the same names if possible or better to differentiate? In this version,
'mptcp_' is not prepended.
- This sysctl will only block new sockets to be created.
Is it enough?
- ENOPROTOOPT is returned, maybe something else to
return? EPERM is maybe too generic? ENOTSUPP is not translated by perror().
- Should we start the documentation now for the sysctl?
- A simple test has been added, because it is not linked
to the rest, I put separated as first test.
- Of course do not hesitate to comment. Even on the idea
of having a sysctl for this purpose.
- security:
- sysctl from net tree can be modified by root or
CAP_NET_ADMIN. Should we require both?
→ a comment will be added in the code, the same
behavior will be kept, same as tcp_fastopen and most of the ones from "net".
- new file -- ctrl.c -- to control MPTCP (sysctl, sockopt,
etc.) → and also to init the protocol?
- mptcp: explicitly retransmit MP_CAPABLE 3whs ack:
- mptcp: avoid validating MP_CAPABLE keys on 3way HS handling:
- by Paolo
- should fix self-tests failures
- suggested by Christoph/Peter
- 2nd one can be applied, 1st one can be discarded
- Implement outgoing MP_JOIN:
- patch-set (9 patches) by Peter
- commented by Paolo
- negotiate the connection fine, found bug on the receiving side
- Peter uses the mptcp.org implementation for the other side,
didn't try to use mptcp upstream on both ends.
- one patch will conflict with Davide's changes, that's fine
roadmap proposal/discussion:
- see: https://lists.01.org/pipermail/mptcp/2019-July/001505.html
- could be good to talk about the locking issues with Paolo in
another meeting, maybe next week
- seems that Peter is not working on the mentioned topics, maybe Mat is
Feedback from netdev (RFC patch):
- /
Feedback from LPC:
- /
Next meeting:
- We propose to have it next Thursday, the 25th of July.
- Usual time: 16:00 UTC (9am PDT, 6pm CEST)
- Still open to everyone!
- https://annuel2.framapad.org/p/mptcp_upstreaming_20190725
Feel free to comment on these points and propose new ones for the next
meeting!
Talk to you next week,
Matthieu
--
Matthieu Baerts | R&D Engineer
matthieu.baerts(a)tessares.net
Tessares SA | Hybrid Access Solutions
www.tessares.net
1 Avenue Jean Monnet, 1348 Louvain-la-Neuve, Belgium
1 year, 6 months
about self-tests failures
by Paolo Abeni
Hi all,
I spent some time debugging the self-test failures, and I think I got
the root cause.
The tests fails when, due to netem, the 3 way handshake final ack
packet is dropped.
When that happen, the sender never emit the MP_CAPABLE option in later
retrasmission (has the '!subflow->fourth_ack' condition in
mptcp_established_options_mp() is always false).
As a result, on the server side, subflow_syn_recv_sock() always returns
NULL - the session keys check fails - the connection is never
established, and the server's select() never returns.
I'm trying to address the issue forcing MP_CAPABLE option in
retransmitted 3 way handshake ack packets, but I'm facing several
issues, as the TCP stack tries hard to avoid retransmitting 'pure ack',
preferring the following data packet, instead.
We can't attach the MP_CAPABLE opt to the next data segment, or such
segment will lack the require DSS - unless we place both DSS and
MP_CAPABLE in the same pkt.
@Christoph: can you please describe how this issue (3way hs ack drop)
is handled from the mptcp.org implementation?
Thanks,
Paolo
1 year, 6 months
[RFC PATCH] mptcp: explicitly retransmit MP_CAPABLE 3whs ack
by Paolo Abeni
If the ack concluding the 3way handshake is lost (e.g. due to
lossy network connection), the mptcp listener will hang on accept.
That happens because the client does not provide the required
rcvr_key/sndr_key info via the MP_CAPABLE option in the following
packets.
>From client perspective the socket is already into established status:
later pkts will carry a DSS option instead.
This patch addresses the issue, forcing the transmission of a forged,
duplicated, 3way handshake ack with MP_CAPABLE option if the retransmission
of a data packet is detected before the peer send back any MPTCP-related
data packet - demonstrating that the MPTCP handshake is completed
successfully.
This apprently fixes the pending self-tests issue in my setup.
Fixes: af061d2af1c2 ("mptcp: Write MPTCP DSS headers to outgoing data packets")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
This can possibly looks like a nasty hack and likelly it is ;)
I'd like to share the code to demostrate the issue and discuss possible solutions
Note that the self test failure can be reproduce in a deterministic way adding:
ip netns exec ns2 iptables -I OUTPUT -o ns2eth3 -p tcp --dport 10000 -m connbytes \
--connbytes-mode packets --connbytes-dir original --connbytes 2:3 -j DROP
at setup time.
Other alternative approaches would be:
- let the server reset the connection if detect the lack of MP_CAPABLE in the 3th
pkts (ack lost, received a later data packet)
- let the server fall back to plain TCP in the above conditions
Both should have a simpler/cleaner implementation but both would make mptcp behavior
less deterministic.
Othewise, is there any other better way to check for the 'this is a 3way HS ack rentransmission'
condition? Any better way to push ack pkts?
---
net/mptcp/options.c | 41 ++++++++++++++++++++++++++++++++++++++---
net/mptcp/protocol.h | 3 +++
net/mptcp/subflow.c | 3 +++
3 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 58215f19829a..d3a890e842e1 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -316,13 +316,46 @@ void mptcp_rcv_synsent(struct sock *sk)
}
}
-static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size,
+static bool mptcp_is_twhs_ack(struct sock *sk,
+ const struct sk_buff *skb)
+{
+ struct subflow_context *subflow = subflow_ctx(sk);
+
+ if (!subflow->fourth_ack) {
+ if (skb) {
+ subflow->twhs_seq = TCP_SKB_CB(skb)->seq;
+ subflow->twhs_ack = TCP_SKB_CB(skb)->ack_seq;
+ }
+ return true;
+ }
+
+ if (!skb)
+ return false;
+
+ if (!subflow->fourth_ack_received &&
+ subflow->twhs_ack == TCP_SKB_CB(skb)->ack_seq &&
+ !after(TCP_SKB_CB(skb)->end_seq, tcp_sk(sk)->snd_nxt)) {
+ /* this is a retransmission and no prof MP_CAPABLE landed
+ * on the peer, force an ack for it
+ */
+ if (skb->len) {
+ tcp_send_ack(sk);
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
+ unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
struct subflow_context *subflow = subflow_ctx(sk);
- if (!subflow->fourth_ack && remaining >= TCPOLEN_MPTCP_MPC_ACK) {
+ if (mptcp_is_twhs_ack(sk, skb) &&
+ remaining >= TCPOLEN_MPTCP_MPC_ACK) {
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
@@ -429,7 +462,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false;
opts->suboptions = 0;
- if (mptcp_established_options_mp(sk, &opt_size, remaining, opts)) {
+ if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
ret = true;
@@ -479,6 +512,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
void mptcp_attach_dss(struct sock *sk, struct sk_buff *skb,
struct tcp_options_received *opt_rx)
{
+ struct subflow_context *subflow = subflow_ctx(sk);
struct mptcp_options_received *mp_opt;
struct mptcp_ext *mpext;
@@ -487,6 +521,7 @@ void mptcp_attach_dss(struct sock *sk, struct sk_buff *skb,
if (!mp_opt->dss)
return;
+ subflow->fourth_ack_received = 1;
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext)
return;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7f15f6aab93d..bd9540515334 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -147,12 +147,15 @@ struct subflow_context {
u64 map_seq;
u32 map_subflow_seq;
u32 ssn_offset;
+ u32 twhs_seq;
+ u32 twhs_ack;
u16 map_data_len;
u16 request_mptcp : 1, /* send MP_CAPABLE */
request_cksum : 1,
mp_capable : 1, /* remote is MPTCP capable */
mp_join : 1, /* remote is JOINing */
fourth_ack : 1, /* send initial DSS */
+ fourth_ack_received : 1,
version : 4,
conn_finished : 1,
use_checksum : 1,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fdc5aecab897..ccbf77f7a451 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -271,6 +271,7 @@ static int subflow_ulp_init(struct sock *sk)
icsk->icsk_af_ops = &subflow_specific;
ctx->tcp_sk_data_ready = sk->sk_data_ready;
ctx->use_checksum = 0;
+ ctx->fourth_ack_received = 0;
sk->sk_data_ready = subflow_data_ready;
out:
return err;
@@ -311,6 +312,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
if (subflow_req->mp_capable) {
new_ctx->mp_capable = 1;
new_ctx->fourth_ack = 1;
+ new_ctx->fourth_ack_received = 1;
new_ctx->remote_key = subflow_req->remote_key;
new_ctx->local_key = subflow_req->local_key;
new_ctx->token = subflow_req->token;
@@ -320,6 +322,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
} else if (subflow_req->mp_join) {
new_ctx->mp_join = 1;
new_ctx->fourth_ack = 1;
+ new_ctx->fourth_ack_received = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->local_id = subflow_req->local_id;
new_ctx->token = subflow_req->token;
--
2.20.1
1 year, 6 months
[PATCH] mptcp: selftests: fix .gitignore for mptcp selftests
by Davide Caratti
since mptcp_connect.sh can generate pcap traces when the '-c' argument is
used, add *.pcap to .gitignore, to avoid untracked files in the output of
"git status".
[root@rhel7 mptcp]# git status
# HEAD detached at origin/mptcp-prova
# Untracked files:
# (use "git add <file>..." to include in what will be committed)
#
# ns1-ns1-MPTCP-MPTCP-10.0.1.1.pcap
# ns1-ns1-MPTCP-TCP-10.0.1.1.pcap
# ns1-ns1-TCP-MPTCP-10.0.1.1.pcap
[...]
Fixes: 4dc10c542166 ("mptcp: selftests: Add capture option")
Signed-off-by: Davide Caratti <dcaratti(a)redhat.com>
---
tools/testing/selftests/net/mptcp/.gitignore | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 3143fb05a511..d72f07642738 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1 +1,2 @@
mptcp_connect
+*.pcap
--
2.20.1
1 year, 6 months
[Weekly meetings] MoM - 27th of June 2019
by Matthieu Baerts
Hello,
Yesterday, we had our 57th meeting with Peter (Intel OTC), Paolo and
Davide (Red Hat) and myself (Tessares).
Thanks again for this new good meeting!
Here are the minutes of the meeting:
Accepted patches:
- Change sock->sk_protocol to a 16-bit value:
- by Mat
- accepted at the last meeting
- 2 new topic, 1 "squashed" commit.
Pending patches:
- mptcp: Make MPTCP socket block/wakeup ignore sk_receive_queue :
- by Mat
- commented by Paolo
- Paolo is looking at the different other possibilities, hard to
reuse the existing helpers, maybe use the wait event API. Paolo is
investigating that. Paolo will reply later on the ML.
- mptcp: harmonize locking on all socket operations:
- by Paolo
- RFC and v1 sent
- commented by Mat, Peter and Florian
- v2 has been sent just after the meeting
- mptcp: simplify crypto.c:
- by Davide
- reviewed by Mat, Florian and Christoph
- a new version has been sent just after the meeting
Error kselftest:
- looks similar to the previous ones we had:
- Paolo was testing with the pending patches but it seems the
problem is somewhere else (not fixed with these patches)
Sync with net-next:
- got a conflict
- easy to resolve
- tests are still OK
Feedback from netdev:
- nobody at the meeting receives any feedback
Feedback from LPC:
- abstract has been sent by Mat
- who would like to participate (if we are selected)?
- Peter would be glad to help (presentation and/or paper)
- Paolo would be able to help for the paper, not sure yet if he
will be able to go on site.
- no feedback yet.
sysctl to disable MPTCP:
- for security reason
- could be turned off by default, only admin can turn on MPTCP
- Matth has a prototype
- Matth has sent that just after the meeting
icsk->icsk_ulp_data is probably going to be RCU-ified:
- some (small) fix to annotation will probably be needed to fix
sparse warning
- affect only kTLS because it can be compiled as a module
- Jakub Kicinski from Netronome has a patch for that
- after that, we will need to add an annotation in MPTCP code (ULP)
to avoid sparse warning
- could be good to execute sparse on the CI job → *@Matth*
- anybody can propose updates for the scripts in the "scripts"
branch ;-)
- anybody is testing MPTCP with Sparse?
- Not from someone at the meeting
- *@Davide* will do that this week
About complex patches:
- recreating them from scratch will be complex (to re-create the
"natural" evolution)
- rebasing/squashing should be the way to go
- when to do that?:
- We can do that "just" before the submission
- If the refs are set in the commit message, Matth can do this
job sooner, especially if we already know where it needs to go.
- but in some cases, that might be good to dedicate a commit to
show an important decision we took.
Next meeting:
- We propose to have it next Thursday, the 18th of July.
- Usual time: 16:00 UTC (9am PDT, 6pm CEST)
- Still open to everyone!
- https://annuel2.framapad.org/p/mptcp_upstreaming_20190718
Feel free to comment on these points and propose new ones for the next
meeting!
Talk to you next week,
Matthieu
--
Matthieu Baerts | R&D Engineer
matthieu.baerts(a)tessares.net
Tessares SA | Hybrid Access Solutions
www.tessares.net
1 Avenue Jean Monnet, 1348 Louvain-la-Neuve, Belgium
1 year, 6 months
[PATCH] mptcp: harmonize locking on all socket operations.
by Paolo Abeni
The locking schema implied by sendmsg(), recvmsg(), etc.
requires acquiring the msk's socket lock before manipulating
the msk internal status.
Additionally, we can't acquire the msk->subflow socket lock while holding
the msk lock, due to mptcp_finish_connect().
Many socket operations do not enforce the required locking, e.g. we have
several patterns alike:
if (msk->subflow)
// do something with msk->subflow
or:
if (!msk->subflow)
// allocate msk->subflow
all without any lock acquired.
They can race with each other and with mptcp_finish_connect() causing
UAF, null ptr dereference and/or memory leaks.
This patch ensures that all mptcp socket operations access and manipulate
msk->subflow under the msk socket lock. To avoid breaking the locking
assumption introduced by mptcp_finish_connect(), while avoiding UAF
issues, we acquire a reference to the msk->subflow, where needed.
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
rfc -> v1:
- rename *mptcp_socket_get_ref() as *mptcp_fallback_get_ref()
- use subflow_create_socket() in mptcp_socket_create_get() instead
of open-codying it.
- use mptcp_fallback_get_ref() instead of mptcp_socket_create_get() in
mptcp_stream_accept()
---
net/mptcp/protocol.c | 189 +++++++++++++++++++++++++++++++------------
1 file changed, 136 insertions(+), 53 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 774ed25d3b6d..619ac2a5022f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -24,6 +24,28 @@ static inline bool before64(__u64 seq1, __u64 seq2)
#define after64(seq2, seq1) before64(seq1, seq2)
+static struct socket *__mptcp_fallback_get_ref(const struct mptcp_sock *msk)
+{
+ sock_owned_by_me((const struct sock *)msk);
+
+ if (!msk->subflow)
+ return NULL;
+
+ sock_hold(msk->subflow->sk);
+ return msk->subflow;
+}
+
+static struct socket *mptcp_fallback_get_ref(const struct mptcp_sock *msk)
+{
+ struct socket *ssock;
+
+ lock_sock((struct sock *)msk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ release_sock((struct sock *)msk);
+
+ return ssock;
+}
+
static struct sock *mptcp_subflow_get_ref(const struct mptcp_sock *msk)
{
struct subflow_context *subflow;
@@ -158,17 +180,22 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
long timeo;
pr_debug("msk=%p", msk);
- if (msk->subflow) {
+ lock_sock(sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sk);
pr_debug("fallback passthrough");
- return sock_sendmsg(msk->subflow, msg);
+ ret = sock_sendmsg(ssock, msg);
+ sock_put(ssock->sk);
+ return ret;
}
- lock_sock(sk);
ssk = mptcp_subflow_get_ref(msk);
if (!ssk) {
release_sock(sk);
@@ -364,18 +391,22 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct subflow_context *subflow;
struct mptcp_read_arg arg;
read_descriptor_t desc;
+ struct socket *ssock;
struct tcp_sock *tp;
struct sock *ssk;
int copied = 0;
long timeo;
- if (msk->subflow) {
- pr_debug("fallback-read subflow=%p",
- subflow_ctx(msk->subflow->sk));
- return sock_recvmsg(msk->subflow, msg, flags);
+ lock_sock(sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sk);
+ pr_debug("fallback-read subflow=%p", subflow_ctx(ssock->sk));
+ copied = sock_recvmsg(ssock, msg, flags);
+ sock_put(ssock->sk);
+ return copied;
}
- lock_sock(sk);
ssk = mptcp_subflow_get_ref(msk);
if (!ssk) {
release_sock(sk);
@@ -673,15 +704,19 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
{
struct mptcp_sock *msk = mptcp_sk(sk);
char __kernel *optval;
+ struct socket *ssock;
+ int ret;
/* will be treated as __user in tcp_setsockopt */
optval = (char __kernel __force *)uoptval;
pr_debug("msk=%p", msk);
- if (msk->subflow) {
- pr_debug("subflow=%p", msk->subflow->sk);
- return kernel_setsockopt(msk->subflow, level, optname, optval,
- optlen);
+ ssock = mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ pr_debug("subflow=%p", ssock->sk);
+ ret = kernel_setsockopt(ssock, level, optname, optval, optlen);
+ sock_put(ssock->sk);
+ return ret;
}
/* @@ the meaning of setsockopt() when the socket is connected and
@@ -696,16 +731,20 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
struct mptcp_sock *msk = mptcp_sk(sk);
char __kernel *optval;
int __kernel *option;
+ struct socket *ssock;
+ int ret;
/* will be treated as __user in tcp_getsockopt */
optval = (char __kernel __force *)uoptval;
option = (int __kernel __force *)uoption;
pr_debug("msk=%p", msk);
+ ssock = mptcp_fallback_get_ref(msk);
if (msk->subflow) {
- pr_debug("subflow=%p", msk->subflow->sk);
- return kernel_getsockopt(msk->subflow, level, optname, optval,
- option);
+ pr_debug("subflow=%p", ssock->sk);
+ ret = kernel_getsockopt(ssock, level, optname, optval, option);
+ sock_put(ssock->sk);
+ return ret;
}
/* @@ the meaning of setsockopt() when the socket is connected and
@@ -817,9 +856,35 @@ static struct proto mptcp_prot = {
.no_autobind = 1,
};
+static struct socket *mptcp_socket_create_get(struct mptcp_sock *msk)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct socket *ssock;
+ int err;
+
+ lock_sock(sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock)
+ goto release;
+
+ err = subflow_create_socket(sk, &ssock);
+ if (err) {
+ ssock = ERR_PTR(err);
+ goto release;
+ }
+
+ msk->subflow = ssock;
+ sock_hold(ssock->sk);
+
+release:
+ release_sock(sk);
+ return ssock;
+}
+
static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
int err = -ENOTSUPP;
pr_debug("msk=%p", msk);
@@ -827,18 +892,20 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (uaddr->sa_family != AF_INET) // @@ allow only IPv4 for now
return err;
- if (!msk->subflow) {
- err = subflow_create_socket(sock->sk, &msk->subflow);
- if (err)
- return err;
- }
- return inet_bind(msk->subflow, uaddr, addr_len);
+ ssock = mptcp_socket_create_get(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
+
+ err = inet_bind(ssock, uaddr, addr_len);
+ sock_put(ssock->sk);
+ return err;
}
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
int err = -ENOTSUPP;
pr_debug("msk=%p", msk);
@@ -846,19 +913,20 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (uaddr->sa_family != AF_INET) // @@ allow only IPv4 for now
return err;
- if (!msk->subflow) {
- err = subflow_create_socket(sock->sk, &msk->subflow);
- if (err)
- return err;
- }
+ ssock = mptcp_socket_create_get(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
- return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
+ err = inet_stream_connect(ssock, uaddr, addr_len, flags);
+ sock_put(ssock->sk);
+ return err;
}
static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
int peer)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
struct sock *ssk;
int ret;
@@ -876,16 +944,20 @@ static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
return inet_getname(sock, uaddr, peer);
}
- if (msk->subflow) {
- pr_debug("subflow=%p", msk->subflow->sk);
- return inet_getname(msk->subflow, uaddr, peer);
+ lock_sock(sock->sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sock->sk);
+ pr_debug("subflow=%p", ssock->sk);
+ ret = inet_getname(ssock, uaddr, peer);
+ sock_put(ssock->sk);
+ return ret;
}
/* @@ the meaning of getname() for the remote peer when the socket
* is connected and there are multiple subflows is not defined.
* For now just use the first subflow on the list.
*/
- lock_sock(sock->sk);
ssk = mptcp_subflow_get_ref(msk);
if (!ssk) {
release_sock(sock->sk);
@@ -901,29 +973,36 @@ static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
int err;
pr_debug("msk=%p", msk);
- if (!msk->subflow) {
- err = subflow_create_socket(sock->sk, &msk->subflow);
- if (err)
- return err;
- }
- return inet_listen(msk->subflow, backlog);
+ ssock = mptcp_socket_create_get(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
+
+ err = inet_listen(ssock, backlog);
+ sock_put(ssock->sk);
+ return err;
}
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
int flags, bool kern)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct socket *ssock;
+ int err;
pr_debug("msk=%p", msk);
- if (!msk->subflow)
+ ssock = mptcp_fallback_get_ref(msk);
+ if (!ssock)
return -EINVAL;
- return inet_accept(sock, newsock, flags, kern);
+ err = inet_accept(sock, newsock, flags, kern);
+ sock_put(ssock->sk);
+ return err;
}
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
@@ -932,13 +1011,19 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct subflow_context *subflow;
const struct mptcp_sock *msk;
struct sock *sk = sock->sk;
+ struct socket *ssock;
__poll_t ret = 0;
msk = mptcp_sk(sk);
- if (msk->subflow)
- return tcp_poll(file, msk->subflow, wait);
-
lock_sock(sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sk);
+ ret = tcp_poll(file, ssock, wait);
+ sock_put(ssock->sk);
+ return ret;
+ }
+
mptcp_for_each_subflow(msk, subflow) {
struct socket *tcp_sock;
@@ -954,23 +1039,21 @@ static int mptcp_shutdown(struct socket *sock, int how)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct subflow_context *subflow;
+ struct socket *ssock;
int ret = 0;
pr_debug("sk=%p, how=%d", msk, how);
- if (msk->subflow) {
- pr_debug("subflow=%p", msk->subflow->sk);
- return kernel_sock_shutdown(msk->subflow, how);
+ lock_sock(sock->sk);
+ ssock = __mptcp_fallback_get_ref(msk);
+ if (ssock) {
+ release_sock(sock->sk);
+ pr_debug("subflow=%p", ssock->sk);
+ ret = kernel_sock_shutdown(ssock, how);
+ sock_put(ssock->sk);
+ return ret;
}
- /* protect against concurrent mptcp_close(), so that nobody can
- * remove entries from the conn list and walking the list
- * is still safe.
- *
- * We can't use MPTCP socket lock to protect conn_list changes,
- * as we need to update it from the BH via the mptcp_finish_connect()
- */
- lock_sock(sock->sk);
mptcp_for_each_subflow(msk, subflow) {
struct socket *tcp_socket;
--
2.20.1
1 year, 6 months