[PATCH] mptcp: cope with later TCP fallback.
by Paolo Abeni
With V1 passive connections can fallback TCP after that the
subflow become established:
syn+ MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlining TCP subflow. Beyond covering
the above scenario, it makes TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw(a)strlen.de>
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
RFC -> v1:
- fixed WARN_ON splat on fallback
- added a few more comments about locking
- tested on both of v1 patches and top of current export
branch
Note: should be applied just after "mptcp: process MP_CAPABLE data option."
---
net/mptcp/protocol.c | 99 +++++++++++++++++++++++++++++++++++++-------
net/mptcp/protocol.h | 1 +
2 files changed, 84 insertions(+), 16 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c15232461a61..4f591f3d74ec 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -24,6 +24,40 @@
#define MPTCP_SAME_STATE TCP_MAX_STATES
+static void __mptcp_close(struct sock *sk, long timeout);
+
+/* MP_CAPABLE handshake failed, convert msk to plain tcp, replacing
+ * socket->sk and stream ops and destroying msk
+ * return the msk socket, as we can't access msk anymore after this function
+ * completes
+ * Called with msk lock held, releases such lock before returning
+ */
+static struct socket *__mptcp_fallback_to_tcp(struct mptcp_sock *msk,
+ struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct socket *sock;
+ struct sock *sk;
+
+ sk = (struct sock *)msk;
+ sock = sk->sk_socket;
+ subflow = mptcp_subflow_ctx(ssk);
+
+ /* detach the msk socket */
+ list_del_init(&subflow->node);
+ sock_orphan(sk);
+ sock->sk = NULL;
+
+ /* socket is now TCP */
+ sock_graft(ssk, sock);
+ sock->ops = sk->sk_family == AF_INET6 ? &inet6_stream_ops :
+ &inet_stream_ops;
+
+ /* destroy the left-over msk sock */
+ __mptcp_close(sk, 0);
+ return sock;
+}
+
/* if msk has a single subflow socket, and the mp_capable handshake is not
* completed yet or has failed - that is, the socket is Not MP Capable,
* returns it.
@@ -37,25 +71,37 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
return msk->subflow;
}
-/* if msk has a single subflow, and the mp_capable handshake is failed,
+static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
+{
+ return msk->first && !tcp_sk(msk->first)->is_mptcp;
+}
+
+/* if the mp_capable handshake is failed, return a tcp socket
* return it.
* Otherwise returns NULL
*/
-static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk)
+static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
- struct socket *ssock = __mptcp_nmpc_socket(msk);
-
sock_owned_by_me((const struct sock *)msk);
- if (!ssock || tcp_sk(ssock->sk)->is_mptcp)
+ if (likely(!__mptcp_needs_tcp_fallback(msk)))
return NULL;
- return ssock;
+ if (msk->subflow) {
+ /* the first subflow is an active connection, discart the
+ * paired socket
+ */
+ msk->subflow->sk = NULL;
+ sock_release(msk->subflow);
+ msk->subflow = NULL;
+ }
+
+ return __mptcp_fallback_to_tcp(msk, msk->first);
}
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
{
- return ((struct sock *)msk)->sk_state == TCP_CLOSE;
+ return !msk->first;
}
static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
@@ -76,6 +122,7 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
if (err)
return ERR_PTR(err);
+ msk->first = ssock->sk;
msk->subflow = ssock;
subflow = mptcp_subflow_ctx(ssock->sk);
list_add(&subflow->node, &msk->conn_list);
@@ -155,6 +202,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
ret = sk_stream_wait_memory(ssk, timeo);
if (ret)
return ret;
+ if (unlikely(__mptcp_needs_tcp_fallback(msk)))
+ return 0;
}
/* compute copy limit */
@@ -265,11 +314,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
- if (ssock) {
+ if (unlikely(ssock)) {
+fallback:
pr_debug("fallback passthrough");
ret = sock_sendmsg(ssock, msg);
- release_sock(sk);
- return ret;
+ return ret >= 0 ? ret + copied : (copied ? copied : ret);
}
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -288,6 +337,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
&size_goal);
if (ret < 0)
break;
+ if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
+ release_sock(ssk);
+ ssock = __mptcp_tcp_fallback(msk);
+ goto fallback;
+ }
copied += ret;
}
@@ -367,11 +421,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
- if (ssock) {
+ if (unlikely(ssock)) {
+fallback:
pr_debug("fallback-read subflow=%p",
mptcp_subflow_ctx(ssock->sk));
copied = sock_recvmsg(ssock, msg, flags);
- release_sock(sk);
return copied;
}
@@ -475,6 +529,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
pr_debug("block timeout %ld", timeo);
wait_data = true;
mptcp_wait_data(sk, &timeo);
+ if (unlikely(__mptcp_tcp_fallback(msk)))
+ goto fallback;
}
if (more_data_avail) {
@@ -527,6 +583,8 @@ static int __mptcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&msk->conn_list);
__set_bit(MPTCP_SEND_SPACE, &msk->flags);
+ msk->first = NULL;
+
return 0;
}
@@ -561,7 +619,8 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how)
release_sock(ssk);
}
-static void mptcp_close(struct sock *sk, long timeout)
+/* Called with msk lock held, releases such lock before returning */
+static void __mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -569,8 +628,6 @@ static void mptcp_close(struct sock *sk, long timeout)
mptcp_token_destroy(msk->token);
inet_sk_state_store(sk, TCP_CLOSE);
- lock_sock(sk);
-
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -583,6 +640,12 @@ static void mptcp_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
+static void mptcp_close(struct sock *sk, long timeout)
+{
+ lock_sock(sk);
+ __mptcp_close(sk, timeout);
+}
+
static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
{
const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
@@ -652,6 +715,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
msk->local_key = subflow->local_key;
msk->token = subflow->token;
msk->subflow = NULL;
+ msk->first = newsk;
mptcp_token_update_accept(newsk, new_mptcp_sock);
@@ -1007,8 +1071,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
- const struct mptcp_sock *msk;
struct sock *sk = sock->sk;
+ struct mptcp_sock *msk;
struct socket *ssock;
__poll_t mask = 0;
@@ -1024,6 +1088,9 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
release_sock(sk);
sock_poll_wait(file, sock, wait);
lock_sock(sk);
+ ssock = __mptcp_tcp_fallback(msk);
+ if (unlikely(ssock))
+ return ssock->ops->poll(file, ssock, NULL);
if (test_bit(MPTCP_DATA_READY, &msk->flags))
mask = EPOLLIN | EPOLLRDNORM;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0e5ba5ac1443..b4cf88258b6b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -73,6 +73,7 @@ struct mptcp_sock {
struct list_head conn_list;
struct skb_ext *cached_ext; /* for the next sendmsg */
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
+ struct sock *first;
};
#define mptcp_for_each_subflow(__msk, __subflow) \
--
2.21.0
2 years, 6 months
[PATCH] Squash-to: tcp: Check for filled TCP option space before SACK
by Paolo Abeni
Less invasive checks, so that the number of branchs when hitting
the sack code path decreses compared to the current vanilla tree.
--
Some addictional check in tcp_established_options() is needed, otherwise
the tcp header will be corrupted when mptcp_established_options() fully
consumes the TCP option space. This condition can be reached without
MPTCP, so not sure if the result patch is still worthy for -net ?!?
---
net/ipv4/tcp_output.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 710ab45badfa..e797ca6c6d7d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -748,19 +748,20 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
size += TCPOLEN_TSTAMP_ALIGNED;
}
- if (size + TCPOLEN_SACK_BASE_ALIGNED >= MAX_TCP_OPTION_SPACE)
- return size;
-
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
+ if (unlikely(remaining < TCPOLEN_SACK_BASE_ALIGNED +
+ TCPOLEN_SACK_PERBLOCK))
+ return size;
+
opts->num_sack_blocks =
min_t(unsigned int, eff_sacks,
(remaining - TCPOLEN_SACK_BASE_ALIGNED) /
TCPOLEN_SACK_PERBLOCK);
- if (likely(opts->num_sack_blocks))
- size += TCPOLEN_SACK_BASE_ALIGNED +
- opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+
+ size += TCPOLEN_SACK_BASE_ALIGNED +
+ opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
}
return size;
--
2.21.0
2 years, 6 months
[PATCH net-next 00/11] Multipath TCP: Prerequisites
by Mat Martineau
The MPTCP upstreaming community has been collaborating on an
upstreamable MPTCP implementation that complies with RFC 8684. A minimal
set of features to comply with the specification involves a sizeable set
of code changes, so David requested that we split this work in to
multiple, smaller patch sets to build up MPTCP infrastructure.
The minimal MPTCP feature set we are proposing for review in the v5.6
timeframe begins with these three parts:
Part 1 (this patch set): MPTCP prerequisites. Introduce some MPTCP
definitions, additional ULP and skb extension features, TCP option space
checking, and a few exported symbols.
Part 2: Single subflow implementation and self tests.
Part 3: Switch from MPTCP v0 (RFC 6824) to MPTCP v1 (new RFC 8684,
publication expected in the next few days).
We plan to send those over the next week. Additional patches for
multiple subflow support, path management, active backup, and other
features are in the pipeline for submission after making progress with
the above reviews.
Clone/fetch:
https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v1-part1)
Browse:
https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v1-part1
Thank you for your review. You can find us at mptcp(a)lists.01.org and
https://is.gd/mptcp_upstream
Mat Martineau (9):
net: Make sock protocol value checks more specific
sock: Make sk_protocol a 16-bit value
tcp: Define IPPROTO_MPTCP
tcp: Add MPTCP option number
tcp, ulp: Add clone operation to tcp_ulp_ops
mptcp: Add MPTCP to skb extensions
tcp: Prevent coalesce/collapse when skb has MPTCP extensions
tcp: Export TCP functions and ops struct
tcp: Check for filled TCP option space before SACK
Paolo Abeni (2):
tcp: clean ext on tx recycle
skb: add helpers to allocate ext independently from sk_buff
MAINTAINERS | 10 ++++++++
include/linux/skbuff.h | 6 +++++
include/net/mptcp.h | 43 +++++++++++++++++++++++++++++++++
include/net/sock.h | 6 ++---
include/net/tcp.h | 22 +++++++++++++++++
include/trace/events/sock.h | 5 ++--
include/uapi/linux/in.h | 2 ++
net/ax25/af_ax25.c | 2 +-
net/core/skbuff.c | 42 ++++++++++++++++++++++++++++++--
net/decnet/af_decnet.c | 2 +-
net/ipv4/inet_connection_sock.c | 2 ++
net/ipv4/tcp.c | 6 ++---
net/ipv4/tcp_input.c | 10 ++++++--
net/ipv4/tcp_ipv4.c | 2 +-
net/ipv4/tcp_output.c | 5 +++-
net/ipv4/tcp_ulp.c | 12 +++++++++
net/ipv6/tcp_ipv6.c | 6 ++---
tools/include/uapi/linux/in.h | 2 ++
18 files changed, 166 insertions(+), 19 deletions(-)
create mode 100644 include/net/mptcp.h
--
2.24.1
2 years, 6 months
[Weekly meetings] MoM - 12th of December 2019
by Matthieu Baerts
Hello,
On Thursday, we had our 79th meeting with Mat, Peter and Ossama (Intel
OTC), Christoph (Apple), Paolo, Florian and Davide (RedHat) and myself
(Tessares).
Thanks again for this new good meeting!
Here are the minutes of the meeting:
Accepted patches:
- The list of accepted patches can be seen on PatchWork:
https://patchwork.ozlabs.org/project/mptcp/list/?state=3
1205721 [22/48] mptcp: add subflow write space signalling and mptcp_poll
1205720 [0/1] proposed export branch rebase
1205201 Re: [PATCH v2 9/9] don't flag parent socket as RCV_SHUTDOWN if
one on...
1205182 Re: [PATCH v4] mptcp: fix option length of mp_capable syn/ack
1204880 mptcp: Remove dss_flags field from struct mptcp_options_received
1204879 mptcp: Remove flags field from struct mptcp_options_received
1204810 [v3,3/3] mptcp: process MP_CAPABLE data option.
1204808 [v3,2/3] mptcp: parse and emit MP_CAPABLE option according to
v1 spec.
1204807 [v3,1/3] mptcp: move from sha1 (v0) to sha256 (v1)
1204700 [v4] mptcp: fix option length of mp_capable syn/ack
1204204 [v3,3/3] mptcp: properly detect skb collapsing
1204203 [v3,2/3] mptcp: avoid data stream corruption on allocation failure.
1204201 [v3,1/3] skb: add helpers to allocate ext independently from
sk_buff
1203258 [v2,9/9] don't flag parent socket as RCV_SHUTDOWN if one one
subflow ...
1202766 [8/9] subflow: place further subflows on new 'join_list'
1202765 [7/9] pass subflow socket to mptcp_finish_connect
1202764 [6/9] make accept not allocate kernel socket struct
1202763 [5/9] add mptcp_subflow_shutdown
1202762 [4/9] store tcp_sk, not socket
1202761 [3/9] copy connection id from first subflow to mptcp socket
1202760 [2/9] mptcp: fix race from mptcp_close/mptcp join
1202758 [1/9] token: handle join-before-accept-completion case
1202759 [0/9] mptcp: support mptcp joins
Pending patches:
- The list of pending patches can be seen on PatchWork:
https://patchwork.ozlabs.org/project/mptcp/list/?state=*
1194592: Changes Requested: [RFC,1/1] mptcp: Optimize struct
mptcp_received_options.:
- remaining changes are only for after what we want to send to net-next
1196109: RFC: [10/10,RFC] selftests:mptcp: decrease timeout to 100 sec:
- please see the discussion below: kselftests timeout
1205319: Awaiting Upstream: mptcp: Fix dd command line option:
- should be squashed [APPLIED]
1205323: New: [v2,1/4] MAINTAINERS: Add MPTCP maintainers [APPLIED]
1205322: New: [v2,2/4] MAINTAINERS: Add net/mptcp/ files to MPTCP
section [APPLIED]
1205321: New: [v2,3/4] MAINTAINERS: Add MPTCP selftests to MPTCP section
[APPLIED]
1205320: New: [v2,4/4] MAINTAINERS: Add uapi header to MPTCP section:
[APPLIED]
- please see the discussion below: MAINTAINERS file
1207144: New: subflow: process pending data on state changes, too: [APPLIED]
- looks good for everybody?
1207190: Awaiting Upstream: [v2,1/4] mptcp: clear 'is_tcp' socket flag
when the MP_CAPABLE handshake fails [APPLIED]
1207193: Awaiting Upstream: [v2,2/4] mptcp: cleanup fallback handling
[APPLIED]
1207192: Awaiting Upstream: [v2,3/4] mptcp: add subflow to conn_list
early [APPLIED]
1207206: Awaiting Upstream: [v3,4/4] mptcp: avoid acquiring the msk lock
in mptcp_finisch_connect(): [APPLIED]
- please see below
1208166: New: mptcp: Remove request_version field from
subflow_request_sock [APPLIED]
1208167: New: mptcp: Remove version field from subflow_request_sock
[APPLIED]
1208169: New: [1/2] mptcp: Move struct mptcp_options_received [APPLIED]
1208168: New: [2/2] mptcp: Remove version field from
mptcp_options_received: [APPLIED]
- Paolo just reviewed that, all 4 patches are OK
1208571: New: mptcp: only orphan partially subflow at close time:
- one question/suggestion from Florian
- testing the new suggestion, v2 will be sent if suggestion is OK
FYI: Current Roadmap:
- Part 1 (mainly TCP changes, will be sent with Part 2):
- MAINTAINERS file [to be applied] [APPLIED]
- Part 2 (minimum set for MPTCP, up to KSelftests, one subflow):
- opti in TCP options? [Done]
- Send DATA_FIN, no corner cases [Done]
- IPv6 support [Done]
- if the peer never sends MPTCP-level ACK, a lot of memory will
be used [to be rebased but more important for part 3]
- Part 3:
- MPTCPv1 support [Done]
- Part 4 (after the KSelftests, to be sent ideally before the end
of the year)
- Full DATA_FIN support [WIP]
- Shared recv window (drop data received on other subflows) [TODO]
- Active backup support [WIP]
- Part 5:
- Shared recv window (full support)
- IPv6 - IPv4 mapped support
- not dropping MPTCP options (ADD_ADDR, etc.)
- FAST_CLOSE
- full MPTCP v1 support (reliable add_addr, etc.)
- Part 6:
- opti/perfs
- TFO
Items for the initial submission: Update:
- MPTCP v1 support:
- Done but fallback issues to be applied
- optimisation of options in TCP "struct mptcp_options_received":
- Done for part 1→3
- MAINTAINERS file:
- please see below
New tree:
- Paolo has proposed a branch for that (with some "squash-to")
- Davide has applied fixes on top:
https://paste.centos.org/view/6097ee81#L83
- Can we directly re-use Davide's branch?
- The suggestion here is:
- take Paolo's branch (v4) to recreate the tree
- extract Davide's fixes and apply them later (but soon) →
Davide will do that [DONE]
KSelftest timeout:
- Because there is still an issue when no re-ordering is used,
better to keep the 450 seconds
- But this patch is not applied upstream yet:
https://patchwork.kernel.org/patch/11204935/
- But we need it to support timeout of > 45 seconds (default value)
- That's why it is in the commit introducing the kselftests
- Should we remove it?
- Yes we should
- *Matth* will do that [DONE]
MAINTAINERS file:
- Possible other lines include:
- L: mptcp(a)lists.01.org :
- The issue with including this ML is that it currently
rejects messages from non-subscribers.
- Mat can reconfigure the ML, the tradeoff is in forwarding
spam to subscribers
- Matth: that's what we have on netdev ML
- Mat has just changed that
- Q: https://patchwork.ozlabs.org/project/mptcp/list/ :
- If all MPTCP patches go to Netdev ML directly, it might
be confusing for patches to end up in two patchworks (netdev and mptcp)
- T: git git://github.com/multipath-tcp/mptcp_net-next.git :
- Mat has seen two main models for net subsystem management:
- netdev-based (TCP, TLS): Patches are sent to netdev,
approved by subsystem maintainers, and approved and applied by David Miller.
- subtree (netfilter, BPF, bluetooth, wireless):
Messages are sent to netdev (netfilter/BPF) or a separate ML (bluetooth,
wireless) and are approved and merged to separate git repositories by
subsystem maintainers. Those maintainers then send pull requests for
batches of commits to netdev for David Miller to merge directly in git.
- Mat's view: The scope of MPTCP seems more like the other
netdev-based projects, so for changes going directly upstream I've
proposed MAINTAINERS entries that match that development model. If we
opt to use our own git repo to merge upstream changes and then send pull
requests, then we would include the "T:" entry for our github repo.
- Matth's view: should we not start as a subtree because it
is quite big and it will continue to grow quite quickly in the near future?
- Pull requests will be difficult for the moment (really
linked to TCP + new)
- But we will continue like before
- in conclusion, we only add a link to the ML, anybody can send
emails there.
- our patchwork and git repository are "internal"
- *Matth*: add the L line when applying the patches [DONE]
Other WIP items:
- Active backup support:
- Florian is working on it, patches have been shared
- DATA_FIN:
- Mat is working on it
Net-dev:
- suggestion by Paolo:
- once everything is merged (Paolo suggested Matth to work
during the night to be ready tomorrow morning) [DONE]
- we send the series to mptcp ML → Matth can do that [DONE]
- we send part 1 as a non RFC: [DONE]
- we need the checkpatch fixes
- and signed-off
- could be send on Friday
- we send part 2 a bit later: [DONE]
- we need the checkpatch fixes
- and signed-off
- we can send part 2 once the tests are done
- we send part 3 "just after":
- we need the checkpatch fixes
- and signed-off
- Mat needs to notify Christoph
- → goal is to send everything on "Friday" (US time)
- we can cc: mptcp ML, no need to add everybody in cc
Signed-off:
- we need to add Mat or Christoph ones before sending
- *Matth* can do that [DONE]
Commit messages:
- first pass already done by some
- could be good to read the draft patches that will be sent to
mptcp ML [DONE]
IRC:
- a channel? Yes: #MPTCPUpstream on freenode
- not to move the ML there
- useful for last minute coordination, questions when applying
patches, small questions, etc.
Ideas:
-
https://opensource.googleblog.com/2019/12/announcing-google-summer-of-cod...
- could be good to start PM work, e.g. for Android, etc.
- but one important thing is that we need someone to help the
student, it can take time
- feedback from the field: some mobile networks are not nice for MPTCP:
- what has been seen is the SYN with MPTCP is received by the
server, SYN+ACK always dropped, with or without MPTCP
- we should put in place a safeguard like they do with TFO: after a
few attempts we fallback to TCP
- TODO for the roadmap Matth [DONE]
Next meeting:
- We propose to have the next meeting on Thursday, the 19th of
December.
- Usual time: 17:00 UTC (9am PST, 6pm CET)
- Still open to everyone!
- https://annuel2.framapad.org/p/mptcp_upstreaming_20191219
Feel free to comment on these points and propose new ones for the next
meeting!
Talk to you on Thursday,
Matt
--
Matthieu Baerts | R&D Engineer
matthieu.baerts(a)tessares.net
Tessares SA | Hybrid Access Solutions
www.tessares.net
1 Avenue Jean Monnet, 1348 Louvain-la-Neuve, Belgium
2 years, 6 months
[PATCH 00/15] Multipath TCP part 2: Single subflow
by Matthieu Baerts
These patches depend on the "Multipath TCP part 1: Prerequisites" patch
set sent earlier.
This set adds MPTCP connection establishment, writing & reading MPTCP
options on data packets, a sysctl to allow MPTCP per-namespace, and self
tests. This is sufficient to establish and maintain a connection with a
MPTCP peer, but will not yet allow or initiate establishment of
additional MPTCP subflows.
Clone/fetch:
https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v1-part2)
Browse:
https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v1-part2
Thank your for your review. You can find us at mptcp(a)lists.01.org and
https://is.gd/mptcp_upstream
Florian Westphal (2):
mptcp: add subflow write space signalling and mptcp_poll
mptcp: add basic kselftest for mptcp
Mat Martineau (3):
mptcp: Add MPTCP socket stubs
mptcp: Write MPTCP DSS headers to outgoing data packets
mptcp: Implement MPTCP receive path
Matthieu Baerts (1):
mptcp: new sysctl to control the activation per NS
Paolo Abeni (2):
mptcp: recvmsg() can drain data from multiple subflows
mptcp: allow collapsing consecutive sendpages on the same substream
Peter Krystad (7):
mptcp: Handle MPTCP TCP options
mptcp: Associate MPTCP context with TCP socket
mptcp: Handle MP_CAPABLE options for outgoing connections
mptcp: Create SUBFLOW socket for incoming connections
mptcp: Add key generation and token tree
mptcp: Add shutdown() socket operation
mptcp: Add setsockopt()/getsockopt() socket operations
MAINTAINERS | 2 +
include/linux/tcp.h | 34 +
include/net/mptcp.h | 98 ++
net/Kconfig | 1 +
net/Makefile | 1 +
net/ipv4/tcp.c | 2 +
net/ipv4/tcp_input.c | 19 +-
net/ipv4/tcp_output.c | 57 +
net/ipv6/tcp_ipv6.c | 7 +
net/mptcp/Kconfig | 16 +
net/mptcp/Makefile | 4 +
net/mptcp/crypto.c | 122 ++
net/mptcp/ctrl.c | 130 ++
net/mptcp/options.c | 520 ++++++++
net/mptcp/protocol.c | 1160 +++++++++++++++++
net/mptcp/protocol.h | 220 ++++
net/mptcp/subflow.c | 763 +++++++++++
net/mptcp/token.c | 195 +++
tools/testing/selftests/Makefile | 1 +
tools/testing/selftests/net/mptcp/.gitignore | 2 +
tools/testing/selftests/net/mptcp/Makefile | 13 +
tools/testing/selftests/net/mptcp/config | 2 +
.../selftests/net/mptcp/mptcp_connect.c | 832 ++++++++++++
.../selftests/net/mptcp/mptcp_connect.sh | 595 +++++++++
tools/testing/selftests/net/mptcp/settings | 1 +
25 files changed, 4796 insertions(+), 1 deletion(-)
create mode 100644 net/mptcp/Kconfig
create mode 100644 net/mptcp/Makefile
create mode 100644 net/mptcp/crypto.c
create mode 100644 net/mptcp/ctrl.c
create mode 100644 net/mptcp/options.c
create mode 100644 net/mptcp/protocol.c
create mode 100644 net/mptcp/protocol.h
create mode 100644 net/mptcp/subflow.c
create mode 100644 net/mptcp/token.c
create mode 100644 tools/testing/selftests/net/mptcp/.gitignore
create mode 100644 tools/testing/selftests/net/mptcp/Makefile
create mode 100644 tools/testing/selftests/net/mptcp/config
create mode 100644 tools/testing/selftests/net/mptcp/mptcp_connect.c
create mode 100755 tools/testing/selftests/net/mptcp/mptcp_connect.sh
create mode 100644 tools/testing/selftests/net/mptcp/settings
--
2.24.0
2 years, 6 months
[PATCH net-next 00/15] Multipath TCP part 2: Single subflow
by Mat Martineau
These patches depend on the "Multipath TCP part 1: Prerequisites" patch
set sent earlier.
This set adds MPTCP connection establishment, writing & reading MPTCP
options on data packets, a sysctl to allow MPTCP per-namespace, and self
tests. This is sufficient to establish and maintain a connection with a
MPTCP peer, but will not yet allow or initiate establishment of
additional MPTCP subflows.
Clone/fetch:
https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v1-part2)
Browse:
https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v1-part2
Thank you for your review. You can find us at mptcp(a)lists.01.org and
https://is.gd/mptcp_upstream
Florian Westphal (2):
mptcp: add subflow write space signalling and mptcp_poll
mptcp: add basic kselftest for mptcp
Mat Martineau (3):
mptcp: Add MPTCP socket stubs
mptcp: Write MPTCP DSS headers to outgoing data packets
mptcp: Implement MPTCP receive path
Matthieu Baerts (1):
mptcp: new sysctl to control the activation per NS
Paolo Abeni (2):
mptcp: recvmsg() can drain data from multiple subflows
mptcp: allow collapsing consecutive sendpages on the same substream
Peter Krystad (7):
mptcp: Handle MPTCP TCP options
mptcp: Associate MPTCP context with TCP socket
mptcp: Handle MP_CAPABLE options for outgoing connections
mptcp: Create SUBFLOW socket for incoming connections
mptcp: Add key generation and token tree
mptcp: Add shutdown() socket operation
mptcp: Add setsockopt()/getsockopt() socket operations
MAINTAINERS | 2 +
include/linux/tcp.h | 34 +
include/net/mptcp.h | 98 ++
net/Kconfig | 1 +
net/Makefile | 1 +
net/ipv4/tcp.c | 2 +
net/ipv4/tcp_input.c | 19 +-
net/ipv4/tcp_output.c | 57 +
net/ipv6/tcp_ipv6.c | 7 +
net/mptcp/Kconfig | 16 +
net/mptcp/Makefile | 4 +
net/mptcp/crypto.c | 122 ++
net/mptcp/ctrl.c | 130 ++
net/mptcp/options.c | 520 ++++++++
net/mptcp/protocol.c | 1160 +++++++++++++++++
net/mptcp/protocol.h | 220 ++++
net/mptcp/subflow.c | 763 +++++++++++
net/mptcp/token.c | 195 +++
tools/testing/selftests/Makefile | 1 +
tools/testing/selftests/net/mptcp/.gitignore | 2 +
tools/testing/selftests/net/mptcp/Makefile | 13 +
tools/testing/selftests/net/mptcp/config | 2 +
.../selftests/net/mptcp/mptcp_connect.c | 832 ++++++++++++
.../selftests/net/mptcp/mptcp_connect.sh | 595 +++++++++
tools/testing/selftests/net/mptcp/settings | 1 +
25 files changed, 4796 insertions(+), 1 deletion(-)
create mode 100644 net/mptcp/Kconfig
create mode 100644 net/mptcp/Makefile
create mode 100644 net/mptcp/crypto.c
create mode 100644 net/mptcp/ctrl.c
create mode 100644 net/mptcp/options.c
create mode 100644 net/mptcp/protocol.c
create mode 100644 net/mptcp/protocol.h
create mode 100644 net/mptcp/subflow.c
create mode 100644 net/mptcp/token.c
create mode 100644 tools/testing/selftests/net/mptcp/.gitignore
create mode 100644 tools/testing/selftests/net/mptcp/Makefile
create mode 100644 tools/testing/selftests/net/mptcp/config
create mode 100644 tools/testing/selftests/net/mptcp/mptcp_connect.c
create mode 100755 tools/testing/selftests/net/mptcp/mptcp_connect.sh
create mode 100644 tools/testing/selftests/net/mptcp/settings
--
2.24.1
2 years, 6 months
[PATCH 0/4] Multipath TCP part 3: MPTCPv1 (RFC 8684) support
by Matthieu Baerts
These patches depend on the "Multipath TCP part 2: Single subflow"
patch set sent earlier.
In this set we add the necessary code for the RFC8684-style handshake.
RFC8684 obsoletes the experimental RFC6824 and makes MPTCP move-on to
version 1.
The MPTCP patchset exclusively supports RFC 8684. Although all MPTCP
deployments are currently based on RFC 6824, future deployments will be
migrating to MPTCP version 1. 3GPP's 5G standardization also solely supports
RFC 8684. Also, we believe that this initial submission of MPTCP will be
cleaner by solely supporting RFC 8684. If later on support for the old
MPTCP-version is required it can always be added in the future.
The major difference between RFC 8684 and RFC 6824 is that it has a better
support for servers using TCP SYN-cookies by reliably retransmitting the
MP_CAPABLE option.
Clone/fetch:
https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v1-part3)
Browse:
https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v1-part3
Thank your for your review. You can find us at mptcp(a)lists.01.org and
https://is.gd/mptcp_upstream.
Christoph Paasch (2):
mptcp: parse and emit MP_CAPABLE option according to v1 spec
mptcp: process MP_CAPABLE data option
Paolo Abeni (2):
mptcp: move from sha1 (v0) to sha256 (v1)
mptcp: cope with later TCP fallback
include/linux/tcp.h | 3 +-
include/net/mptcp.h | 17 ++--
net/ipv4/tcp_input.c | 2 +-
net/ipv4/tcp_output.c | 2 +-
net/mptcp/Kconfig | 10 ++
net/mptcp/crypto.c | 139 +++++++++++++++-----------
net/mptcp/options.c | 220 +++++++++++++++++++++++++++++++++---------
net/mptcp/protocol.c | 117 +++++++++++++++++-----
net/mptcp/protocol.h | 21 ++--
net/mptcp/subflow.c | 46 ++++++++-
10 files changed, 434 insertions(+), 143 deletions(-)
--
2.24.0
2 years, 6 months
[PATCH 00/11] Multipath TCP part 1: Prerequisites
by Matthieu Baerts
The MPTCP upstreaming community has been collaborating on an
upstreamable MPTCP implementation that complies with RFC 8684. A minimal
set of features to comply with the specification involves a sizeable set
of code changes, so David requested that we split this work in to
multiple, smaller patch sets to build up MPTCP infrastructure.
The minimal MPTCP feature set we are proposing for the v5.6 timeframe is
split in to these parts for review:
Part 1 (this patch set): MPTCP prerequisites. Introduce some MPTCP
definitions, additional ULP and skb extension features, TCP option space
checking, and a few exported symbols.
Part 2: Single subflow implementation and self tests.
Part 3: Switch from MPTCP v0 (RFC 6824) to MPTCP v1 (new RFC 8684,
publication expected in the next few days).
Clone/fetch:
https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v1-part1)
Browse:
https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v1-part1
Thank your for your review. You can find us at mptcp(a)lists.01.org and
https://is.gd/mptcp_upstream
Mat Martineau (9):
net: Make sock protocol value checks more specific
sock: Make sk_protocol a 16-bit value
tcp: Define IPPROTO_MPTCP
tcp: Add MPTCP option number
tcp, ulp: Add clone operation to tcp_ulp_ops
mptcp: Add MPTCP to skb extensions
tcp: Prevent coalesce/collapse when skb has MPTCP extensions
tcp: Export TCP functions and ops struct
tcp: Check for filled TCP option space before SACK
Paolo Abeni (2):
tcp: clean ext on tx recycle
skb: add helpers to allocate ext independently from sk_buff
MAINTAINERS | 10 ++++++++
include/linux/skbuff.h | 6 +++++
include/net/mptcp.h | 43 +++++++++++++++++++++++++++++++++
include/net/sock.h | 6 ++---
include/net/tcp.h | 22 +++++++++++++++++
include/trace/events/sock.h | 5 ++--
include/uapi/linux/in.h | 2 ++
net/ax25/af_ax25.c | 2 +-
net/core/skbuff.c | 42 ++++++++++++++++++++++++++++++--
net/decnet/af_decnet.c | 2 +-
net/ipv4/inet_connection_sock.c | 2 ++
net/ipv4/tcp.c | 6 ++---
net/ipv4/tcp_input.c | 10 ++++++--
net/ipv4/tcp_ipv4.c | 2 +-
net/ipv4/tcp_output.c | 5 +++-
net/ipv4/tcp_ulp.c | 12 +++++++++
net/ipv6/tcp_ipv6.c | 6 ++---
tools/include/uapi/linux/in.h | 2 ++
18 files changed, 166 insertions(+), 19 deletions(-)
create mode 100644 include/net/mptcp.h
--
2.24.0
2 years, 6 months
Cover letters for part1/part2 netdev patchsets
by Mat Martineau
Hello everyone -
As we discussed earlier today, here are draft cover letters for patch set
parts 1 & 2. Feedback appreciated!
------
Part 1
Subject: [PATCH net-next 0/0] Multipath TCP part 1: Prerequisites
The MPTCP upstreaming community has been collaborating on an
upstreamable MPTCP implementation. David requested that we split this
work in to multiple, smaller patch sets to build up MPTCP
infrastructure. The scope of this first patch set is limited to
prerequisite TCP core changes so we can get focused feedback in these
areas.
In this patch set we introduce some MPTCP definitions, additional ULP
and skb extension features, TCP option space checking, and a few
exported symbols.
<git tree references at github>
Thank you for your review. You can find us at mptcp(a)lists.01.org and
https://is.gd/mptcp_upstream
------
Part 2
Subject: [PATCH net-next 0/0] Multipath TCP part 2: Single subflow
These patches depend on the "Multipath TCP part 1: Prerequisites" patch
set sent earlier.
This set adds MPTCP connection establishment, writing & reading MPTCP
options on data packets, a sysctl to allow MPTCP per-namespace, and self
tests. This is sufficient to establish and maintain a connection with a
MPTCP peer, but will not yet allow or initiate establishment of
additional MPTCP subflows.
<git tree references at github>
Thank you for your review. You can find us at mptcp(a)lists.01.org and
https://is.gd/mptcp_upstream
--
Mat Martineau
Intel
2 years, 6 months
Re: [multipathtcp] MPTCP implementation feedback for RFC6824bis
by Christoph Paasch
Hello,
> On Dec 10, 2019, at 12:04 PM, V Anil Kumar <anil(a)csir4pi.in> wrote:
>
> Hi Alan,
>
> Please see inline.
>
> On 12/06/19 09:28 PM, Alan Ford <alan.ford(a)gmail.com <mailto:alan.ford@gmail.com>> wrote:
>>
>> Hi all,
>>
>>
>> Following on from the discussion of implementation feedback with Christoph, I propose the following edits to RFC6824bis - which is currently in AUTH48 - as clarifications.
>>
>> ADs, please can you confirm you consider these edits sufficiently editorial to fit into AUTH48.
>>
>> WG participants, please speak up if you have any concerns.
>>
>>
>> Edit 1, clarifying reliability of MP_CAPABLE
>>
>> Change the sentence reading:
>>
>> The SYN with MP_CAPABLE occupies the first octet of data sequence space, although this does not need to be acknowledged at the connection level until the first data is sent (see Section 3.3).
>>
>> To:
>>
>> The SYN with MP_CAPABLE occupies the first octet of data sequence space, and this MUST be acknowledged at the connection level at or before the time the first data is sent or received (see Section 3.3).
>>
>>
>> Change the sentence reading:
>>
>> If B has data to send first, then the reliable delivery of the ACK + MP_CAPABLE can be inferred by the receipt of this data with an MPTCP Data Sequence Signal (DSS) option (Section 3.3).
>>
>> To:
>>
>> If B has data to send first, then the reliable delivery of the ACK + MP_CAPABLE is ensured by the receipt of this data with an MPTCP Data Sequence Signal (DSS) option (Section 3.3) containing a DATA_ACK for the MP_CAPABLE (which is the first octet of the data sequence space).
>>
>>
>> In my personal opinion either one of these edits would be sufficient for making the point, however clearly this has caused some confusion amongst the implementor community so making both these changes should make it absolutely clear as to the expected behaviour here.
>>
>>
>> Edit 2, mapping constraint
>>
>> Change the sentence reading:
>>
>> A Data Sequence Mapping does not need to be included in every MPTCP packet, as long as the subflow sequence space in that packet is covered by a mapping known at the receiver.
>>
>> To:
>>
>> A Data Sequence Mapping MUST appear on a TCP segment which is covered by the mapping. It does not need to be included in every MPTCP packet, as long as the subflow sequence space in that packet is covered by a mapping known at the receiver.
>>
> As far as I understand, the proposed change introduces a “MUST” to insist that the map in a segment must cover at least some data in the segment. But the document does not talk anything about the rational behind it. I guess it is purely an
> ease of implementation?
For two reasons:
1. Ease of implementation
2. If an implementation tries to "remember" early mappings, it is not clear how many of these an implementation can hold. Thus, the sender does not know how many early mappings he can send. So, it is hard for a sender to do the right thing.
> I think the design/format of the Data Sequence Mapping permits the map to stand independent of the data being carried in a segment. So, as long as an implementation is willing to deal with the complexity of storing and processing late and early mappings (with respect to the data arrival), it could be permitted provided that the received map is for an in-window data.
What is the concrete use-case for such early mappings? What are the benefits of it? I think that if we want to enable such implementation-complexity, we need a compelling use-case with a big benefit.
That's the reason why we (the MPTCP-upstreaming community) vouch to have this case restricted.
Cheers,
Christoph
>
> Anil
>>
>>
>>
>> Best regards,
>> Alan
>>
2 years, 6 months