On Thu, 2018-02-22 at 15:50 -0800, rao.shoaib(a)oracle.com wrote:
From: Rao Shoaib <rao.shoaib(a)oracle.com>
Signed-off-by: Rao Shoaib <rao.shoaib(a)oracle.com>
---
include/net/inet_common.h | 2 +
include/net/tcp.h | 106 ++++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/af_inet.c | 3 +-
net/ipv4/tcp.c | 59 ++++++++++++++------------
net/ipv4/tcp_input.c | 89 +++++++++++++++++++++++---------------
net/ipv4/tcp_ipv4.c | 42 ++++++++++++------
net/ipv4/tcp_output.c | 55 +++++++++++++-----------
net/ipv4/tcp_timer.c | 9 ++--
net/ipv6/af_inet6.c | 4 +-
net/ipv6/tcp_ipv6.c | 63 ++++++++++++++-------------
10 files changed, 293 insertions(+), 139 deletions(-)
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 500f813..7b919c7 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -14,6 +14,8 @@ struct sock;
struct sockaddr;
struct socket;
+int inet_create(struct net *net, struct socket *sock, int protocol, int kern);
+int inet6_create(struct net *net, struct socket *sock, int protocol, int kern);
int inet_release(struct socket *sock);
int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f5d748a..3344b1d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -314,6 +314,12 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int
shift)
return false;
}
+static inline void tcp_drop(struct sock *sk, struct sk_buff *skb)
+{
+ sk_drops_add(sk, skb);
+ __kfree_skb(skb);
+}
+
bool tcp_check_oom(struct sock *sk, int shift);
extern struct proto tcp_prot;
@@ -2273,6 +2279,106 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
+/* MPTCP */
+unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
+ int large_allowed);
+ssize_t tcp_sendpages_xmit(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags);
+ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags);
+int tcp_close_state(struct sock *sk);
+void tcp_enter_quickack_mode(struct sock *sk);
+void tcp_sndbuf_expand_impl(struct sock *sk, u32 nr_segs);
+bool tcp_check_rtt(struct sock *sk);
+u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb);
+void tcp_ack_probe(struct sock *sk);
+bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
+ const u32 ack_seq, const u32 nwin);
+bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
+ struct tcp_fastopen_cookie *cookie);
+bool tcp_rcv_fastopen_synack_impl(struct sock *sk, struct sk_buff *synack,
+ struct tcp_fastopen_cookie *cookie,
+ bool rexmit);
+void tcp_enter_quickack_mode(struct sock *sk);
+void tcp_check_space(struct sock *sk);
+int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
+ bool *fragstolen);
+void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb);
+void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags);
+bool tcp_urg_mode(const struct tcp_sock *tp);
+int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+ gfp_t gfp_mask);
+void tcp_queue_skb(struct sock *sk, struct sk_buff *skb);
+void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now);
+void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr);
+int pskb_trim_head(struct sk_buff *skb, int len);
+void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
+ const struct sk_buff *skb);
+unsigned int tcp_mss_split_point(const struct sock *sk,
+ const struct sk_buff *skb,
+ unsigned int mss_now,
+ unsigned int max_segs,
+ int nonagle);
+unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
+ const struct sk_buff *skb);
+int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now);
+bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
+ unsigned int cur_mss, int nonagle);
+bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
+ unsigned int cur_mss);
+int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib);
+void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb);
+void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req);
+void tcp_v4_reqsk_destructor(struct request_sock *req);
+struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb);
+void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
+ const struct tcphdr *th);
+void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
+ const struct tcphdr *th);
+void tcp_write_err(struct sock *sk);
+bool retransmits_timed_out(struct sock *sk, unsigned int boundary,
+ unsigned int timeout);
+int tcp_write_timeout(struct sock *sk);
+struct request_sock *tcp_cookie_req_alloc(struct sock *sk,
+ struct sk_buff *skb,
+ struct tcp_options_received *tcp_opts,
+ __u32 cookie, int mss);
+void inet_twsk_free(struct inet_timewait_sock *tw);
+#if IS_ENABLED(CONFIG_IPV6)
+void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
+void tcp_v6_mtu_reduced(struct sock *sk);
+void tcp_v6_reqsk_destructor(struct request_sock *req);
+void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req);
+struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb);
+int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb);
+struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req);
+int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+#endif
+
+static inline void tcp_data_snd_check(struct sock *sk)
+{
+ tcp_push_pending_frames(sk);
+ tcp_check_space(sk);
+}
+
+/* These states need RST on ABORT according to RFC793 */
+
+static inline bool tcp_need_reset(int state)
+{
+ return (1 << state) &
+ (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
+ TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
+}
+
+/* END MPTCP */
+
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e8c7fad..5d8ea09 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -243,8 +243,7 @@ EXPORT_SYMBOL(inet_listen);
* Create an inet socket.
*/
-static int inet_create(struct net *net, struct socket *sock, int protocol,
- int kern)
+int inet_create(struct net *net, struct socket *sock, int protocol, int kern)
{
struct sock *sk;
struct inet_protosw *answer;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ea89a41..20a69eb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -429,6 +429,7 @@ static const struct tcp_operational_ops __tcp_default_op_ops = {
.sndbuf_expand = tcp_sndbuf_expand,
.shift_skb_data = tcp_shift_skb_data,
.grow_window = tcp_grow_window,
+ .check_rtt = tcp_check_rtt,
.try_coalesce = tcp_try_coalesce,
.try_rmem_schedule = tcp_try_rmem_schedule,
.collapse_one = tcp_collapse_one,
@@ -963,8 +964,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t
gfp,
return NULL;
}
-static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
- int large_allowed)
+unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 new_size_goal, size_goal;
@@ -998,8 +998,8 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
return mss_now;
}
-ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
- size_t size, int flags)
+ssize_t tcp_sendpages_xmit(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
{
struct tcp_sock *tp = tcp_sk(sk);
int mss_now, size_goal;
@@ -1007,25 +1007,12 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int
offset,
ssize_t copied;
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- /* Wait for a connection to finish. One exception is TCP Fast Open
- * (passive side) where data is allowed to be sent before a connection
- * is fully established.
- */
- if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
&&
- !tcp_passive_fastopen(sk)) {
- err = sk_stream_wait_connect(sk, &timeo);
- if (err != 0)
- goto out_err;
- }
-
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
mss_now = tp->op_ops->send_mss(sk, &size_goal, flags);
copied = 0;
err = -EPIPE;
- if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
- goto out_err;
while (size > 0) {
struct sk_buff *skb = tcp_write_queue_tail(sk);
@@ -1120,6 +1107,33 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int
offset,
do_error:
if (copied)
goto out;
+ return err;
+}
+EXPORT_SYMBOL_GPL(tcp_sendpages_xmit);
+
+ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
+{
+ int err;
+ long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+ /* Wait for a connection to finish. One exception is TCP Fast Open
+ * (passive side) where data is allowed to be sent before a connection
+ * is fully established.
+ */
+ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
&&
+ !tcp_passive_fastopen(sk)) {
+ err = sk_stream_wait_connect(sk, &timeo);
+ if (err != 0)
+ goto out_err;
+ }
+
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ goto out_err;
+
+ err = tcp_sendpages_xmit(sk, page, offset, size, flags);
+ if (err >= 0)
+ return(err);
out_err:
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@ -2193,7 +2207,7 @@ static const unsigned char new_state[16] = {
[TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */
};
-static int tcp_close_state(struct sock *sk)
+int tcp_close_state(struct sock *sk)
{
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK;
@@ -2419,15 +2433,6 @@ void tcp_close(struct sock *sk, long timeout)
}
EXPORT_SYMBOL(tcp_close);
-/* These states need RST on ABORT according to RFC793 */
-
-static inline bool tcp_need_reset(int state)
-{
- return (1 << state) &
- (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
- TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
-}
-
static void tcp_rtx_queue_purge(struct sock *sk)
{
struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8cc48bb..398505e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -195,7 +195,7 @@ static void tcp_incr_quickack(struct sock *sk)
icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
}
-static void tcp_enter_quickack_mode(struct sock *sk)
+void tcp_enter_quickack_mode(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_incr_quickack(sk);
@@ -293,12 +293,11 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const
struct tcphdr
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
*/
-void tcp_sndbuf_expand(struct sock *sk)
+void tcp_sndbuf_expand_impl(struct sock *sk, u32 nr_segs)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
int sndmem, per_mss;
- u32 nr_segs;
/* Worst case is non GSO/TSO : each frame consumes one skb
* and skb->head is kmalloced using power of two area of memory
@@ -310,8 +309,10 @@ void tcp_sndbuf_expand(struct sock *sk)
per_mss = roundup_pow_of_two(per_mss) +
SKB_DATA_ALIGN(sizeof(struct sk_buff));
- nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
- nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
+ if (nr_segs <= 0) {
+ nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
+ nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
+ }
/* Fast Recovery (RFC 5681 3.2) :
* Cubic needs 1.7 factor, rounded to 2 to include
@@ -324,6 +325,11 @@ void tcp_sndbuf_expand(struct sock *sk)
sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
}
+void tcp_sndbuf_expand(struct sock *sk)
+{
+ tcp_sndbuf_expand_impl(sk, 0);
+}
+
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
*
* All tcp_full_space() is split to two parts: "network" buffer, allocated
@@ -572,6 +578,17 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
}
}
+bool tcp_check_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int time;
+
+ time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
+
+ if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us ==
0)
+ return (true);
+ return false;
+}
/*
* This function should be called every time data is copied to user space.
* It calculates the appropriate TCP receive buffer space.
@@ -580,11 +597,9 @@ void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 copied;
- int time;
tcp_mstamp_refresh(tp);
- time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
- if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us ==
0)
+ if (tp->op_ops->check_rtt(sk))
return;
/* Number of bytes copied to user in last RTT */
@@ -2966,7 +2981,7 @@ static void tcp_set_xmit_timer(struct sock *sk)
}
/* If we get here, the whole TSO packet has not been acked. */
-static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
+u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 packets_acked;
@@ -3201,7 +3216,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
return flag;
}
-static void tcp_ack_probe(struct sock *sk)
+void tcp_ack_probe(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *head = tcp_send_head(sk);
@@ -3273,7 +3288,7 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32
acked_sacked,
/* Check that window update is acceptable.
* The function assumes that snd_una<=ack<=snd_next.
*/
-static inline bool tcp_may_update_window(const struct tcp_sock *tp,
+inline bool tcp_may_update_window(const struct tcp_sock *tp,
const u32 ack, const u32 ack_seq,
const u32 nwin)
{
@@ -4290,12 +4305,6 @@ bool tcp_try_coalesce(struct sock *sk,
return true;
}
-static void tcp_drop(struct sock *sk, struct sk_buff *skb)
-{
- sk_drops_add(sk, skb);
- __kfree_skb(skb);
-}
-
/* This one checks to see if we can put data from the
* out_of_order queue into the receive_queue.
*/
@@ -4505,8 +4514,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff
*skb)
}
}
-static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
hdrlen,
- bool *fragstolen)
+int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
+ bool *fragstolen)
{
int eaten;
struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
@@ -4580,7 +4589,11 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
int eaten;
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
- __kfree_skb(skb);
+ /* options that a layer above might be interested in */
+ if (unlikely(tp->op_ops->ack_only))
+ tp->op_ops->ack_only(sk, skb);
+ else
+ __kfree_skb(skb);
If it is a possibility that ack_only() does not call __kfree_skb(skb)
what is the consequence? I don't have enough context here.
Peter.
> return;
> }
> skb_dst_drop(skb);
> @@ -4995,7 +5008,7 @@ static void tcp_new_space(struct sock *sk)
> sk->sk_write_space(sk);
> }
>
> -static void tcp_check_space(struct sock *sk)
> +void tcp_check_space(struct sock *sk)
> {
> if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
> sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
> @@ -5010,12 +5023,6 @@ static void tcp_check_space(struct sock *sk)
> }
> }
>
> -static inline void tcp_data_snd_check(struct sock *sk)
> -{
> - tcp_push_pending_frames(sk);
> - tcp_check_space(sk);
> -}
> -
> /*
> * Check if sending an ack is needed.
> */
> @@ -5504,8 +5511,9 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
> tp->pred_flags = 0;
> }
>
> -bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
> - struct tcp_fastopen_cookie *cookie)
> +bool tcp_rcv_fastopen_synack_impl(struct sock *sk, struct sk_buff *synack,
> + struct tcp_fastopen_cookie *cookie,
> + bool rexmit)
> {
> struct tcp_sock *tp = tcp_sk(sk);
> struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
> @@ -5542,7 +5550,7 @@ bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff
*synack,
>
> tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
>
> - if (data) { /* Retransmit unacked data in SYN */
> + if (data && rexmit) { /* Retransmit unacked data in SYN */
> skb_rbtree_walk_from(data) {
> if (__tcp_retransmit_skb(sk, data, 1))
> break;
> @@ -5562,6 +5570,12 @@ bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff
*synack,
> return false;
> }
>
> +bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
> + struct tcp_fastopen_cookie *cookie)
> +{
> + return tcp_rcv_fastopen_synack_impl(sk, synack, cookie, true);
> +}
> +
> static void smc_check_reset_syn(struct tcp_sock *tp)
> {
> #if IS_ENABLED(CONFIG_SMC)
> @@ -5581,6 +5595,9 @@ int tcp_rcv_synsent_state_process(struct sock *sk, struct
sk_buff *skb,
> int saved_clamp = tp->rx_opt.mss_clamp;
> bool fastopen_fail;
>
> + tp->rx_opt.saw_tstamp = 0;
> + tcp_mstamp_refresh(tp);
> +
> tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
> if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
> tp->rx_opt.rcv_tsecr -= tp->tsoffset;
> @@ -5682,7 +5699,7 @@ int tcp_rcv_synsent_state_process(struct sock *sk, struct
sk_buff *skb,
> tcp_finish_connect(sk, skb);
>
> fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
> - tcp_rcv_fastopen_synack(sk, skb, &foc);
> + tp->op_ops->fastopen_synack(sk, skb, &foc);
>
> if (!sock_flag(sk, SOCK_DEAD)) {
> sk->sk_state_change(sk);
> @@ -5842,9 +5859,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff
*skb)
> goto discard;
>
> case TCP_SYN_SENT:
> - tp->rx_opt.saw_tstamp = 0;
> - tcp_mstamp_refresh(tp);
> - queued = tcp_rcv_synsent_state_process(sk, skb, th);
> + queued = tp->state_ops->synsent(sk, skb, th);
> if (queued >= 0)
> return queued;
>
> @@ -6052,8 +6067,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff
*skb)
> }
>
> if (!queued) {
> + /* options that a layer above might be interested in */
> + if (unlikely(tp->op_ops && tp->op_ops->ack_only))
> + tp->op_ops->ack_only(sk, skb);
> + else
> discard:
> - tcp_drop(sk, skb);
> + tcp_drop(sk, skb);
> }
> return 0;
> }
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 482ca15..95d4c1f 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -595,7 +595,7 @@ EXPORT_SYMBOL(tcp_v4_send_check);
> * Exception: precedence violation. We do not implement it in any case.
> */
>
> -static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
> +void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
> {
> const struct tcphdr *th = tcp_hdr(skb);
> struct {
> @@ -829,8 +829,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff
*skb)
> inet_twsk_put(tw);
> }
>
> -static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
> - struct request_sock *req)
> +void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
> + struct request_sock *req)
> {
> /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
> * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
> @@ -892,7 +892,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct
dst_entry *dst,
> /*
> * IPv4 request_sock destructor.
> */
> -static void tcp_v4_reqsk_destructor(struct request_sock *req)
> +void tcp_v4_reqsk_destructor(struct request_sock *req)
> {
> kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
> }
> @@ -1431,7 +1431,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct
sk_buff *skb,
> }
> EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
>
> -static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
> +struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
> {
> #ifdef CONFIG_SYN_COOKIES
> const struct tcphdr *th = tcp_hdr(skb);
> @@ -1598,8 +1598,8 @@ static void tcp_v4_restore_cb(struct sk_buff *skb)
> sizeof(struct inet_skb_parm));
> }
>
> -static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
> - const struct tcphdr *th)
> +void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
> + const struct tcphdr *th)
> {
> /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
> * barrier() makes sure compiler wont play fool^Waliasing games.
> @@ -1620,6 +1620,9 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct
iphdr *iph,
> skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
> }
>
> +process_unclaimed tcp_process_unclaimed;
> +EXPORT_SYMBOL(tcp_process_unclaimed);
> +
> /*
> * From tcp_input.c
> */
> @@ -1750,13 +1753,16 @@ int tcp_v4_rcv(struct sk_buff *skb)
>
> sk_incoming_cpu_update(sk);
>
> - bh_lock_sock_nested(sk);
> - tcp_segs_in(tcp_sk(sk), skb);
> - ret = 0;
> - if (!sock_owned_by_user(sk)) {
> - ret = tcp_v4_do_rcv(sk, skb);
> - } else if (tcp_add_backlog(sk, skb)) {
> - goto discard_and_relse;
> + if (likely(!tcp_sk(sk)->op_ops->rx)) {
> + bh_lock_sock_nested(sk);
> + tcp_segs_in(tcp_sk(sk), skb);
> + ret = 0;
> + if (!sock_owned_by_user(sk))
> + ret = tcp_v4_do_rcv(sk, skb);
> + else if (tcp_add_backlog(sk, skb))
> + goto discard_and_relse;
> + } else {
> + return(tcp_sk(sk)->op_ops->rx(sk, skb, refcounted));
> }
> bh_unlock_sock(sk);
>
> @@ -1778,6 +1784,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
> bad_packet:
> __TCP_INC_STATS(net, TCP_MIB_INERRS);
> } else {
> + if (unlikely(tcp_process_unclaimed)) {
> + if (tcp_process_unclaimed(sk, skb))
> + return (0);
> + }
> tcp_v4_send_reset(NULL, skb);
> }
>
> @@ -1820,6 +1830,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
> refcounted = false;
> goto process;
> }
> + if (unlikely(tcp_process_unclaimed)) {
> + if (tcp_process_unclaimed(sk, skb))
> + return 0;
> + }
> }
> /* to ACK */
> /* fall through */
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 2fa5c05..72b494a 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -46,7 +46,7 @@
> #include <trace/events/tcp.h>
>
> /* Account for new data that has been sent to the network. */
> -static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
> +void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
> {
> struct inet_connection_sock *icsk = inet_csk(sk);
> struct tcp_sock *tp = tcp_sk(sk);
> @@ -375,7 +375,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
> /* Constructs common control bits of non-data skb. If SYN/FIN is present,
> * auto increment end seqno.
> */
> -static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
> +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
> {
> skb->ip_summed = CHECKSUM_PARTIAL;
>
> @@ -390,7 +390,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8
flags)
> TCP_SKB_CB(skb)->end_seq = seq;
> }
>
> -static inline bool tcp_urg_mode(const struct tcp_sock *tp)
> +inline bool tcp_urg_mode(const struct tcp_sock *tp)
> {
> return tp->snd_una != tp->snd_up;
> }
> @@ -1031,8 +1031,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp,
struct sk_buff *skb)
> * We are working here with either a clone of the original
> * SKB, or a fresh unique copy made by the retransmit engine.
> */
> -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
> - gfp_t gfp_mask)
> +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
> + gfp_t gfp_mask)
> {
> const struct inet_connection_sock *icsk = inet_csk(sk);
> struct inet_sock *inet;
> @@ -1193,7 +1193,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff
*skb, int clone_it,
> * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
> * otherwise socket can stall.
> */
> -static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
> +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
> {
> struct tcp_sock *tp = tcp_sk(sk);
>
> @@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff
*skb)
> }
>
> /* Initialize TSO segments for a packet. */
> -static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
> +void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
> {
> if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
> /* Avoid the costly divide in the normal
> @@ -1223,7 +1223,7 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned
int mss_now)
> /* Pcount in the middle of the write queue got changed, we need to do various
> * tweaks to fix counters
> */
> -static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int
decr)
> +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
> {
> struct tcp_sock *tp = tcp_sk(sk);
>
> @@ -1426,6 +1426,11 @@ static int __pskb_trim_head(struct sk_buff *skb, int len)
> return len;
> }
>
> +int pskb_trim_head(struct sk_buff *skb, int len)
> +{
> + return __pskb_trim_head(skb, len);
> +}
> +
> /* Remove acked data from a packet in the transmit queue. */
> int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
> {
> @@ -1434,7 +1439,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32
len)
> if (skb_unclone(skb, GFP_ATOMIC))
> return -ENOMEM;
>
> - delta_truesize = __pskb_trim_head(skb, len);
> + delta_truesize = pskb_trim_head(skb, len);
>
> TCP_SKB_CB(skb)->seq += len;
> skb->ip_summed = CHECKSUM_PARTIAL;
> @@ -1693,8 +1698,8 @@ static bool tcp_minshall_check(const struct tcp_sock *tp)
> * But we can avoid doing the divide again given we already have
> * skb_pcount = skb->len / mss_now
> */
> -static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
> - const struct sk_buff *skb)
> +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
> + const struct sk_buff *skb)
> {
> if (skb->len < tcp_skb_pcount(skb) * mss_now)
> tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
> @@ -1751,11 +1756,11 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int
mss_now)
> }
>
> /* Returns the portion of skb which can be sent right away */
> -static unsigned int tcp_mss_split_point(const struct sock *sk,
> - const struct sk_buff *skb,
> - unsigned int mss_now,
> - unsigned int max_segs,
> - int nonagle)
> +unsigned int tcp_mss_split_point(const struct sock *sk,
> + const struct sk_buff *skb,
> + unsigned int mss_now,
> + unsigned int max_segs,
> + int nonagle)
> {
> const struct tcp_sock *tp = tcp_sk(sk);
> u32 partial, needed, window, max_len;
> @@ -1785,7 +1790,7 @@ static unsigned int tcp_mss_split_point(const struct sock
*sk,
> /* Can at least one segment of SKB be sent right now, according to the
> * congestion window rules? If so, return how many segments are allowed.
> */
> -static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
> +inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
> const struct sk_buff *skb)
> {
> u32 in_flight, cwnd, halfcwnd;
> @@ -1811,7 +1816,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock
*tp,
> * This must be invoked the first time we consider transmitting
> * SKB onto the wire.
> */
> -static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
> +int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
> {
> int tso_segs = tcp_skb_pcount(skb);
>
> @@ -1826,8 +1831,8 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int
mss_now)
> /* Return true if the Nagle test allows this packet to be
> * sent now.
> */
> -static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff
*skb,
> - unsigned int cur_mss, int nonagle)
> +inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
> + unsigned int cur_mss, int nonagle)
> {
> /* Nagle rule does not apply to frames, which sit in the middle of the
> * write_queue (they have no chances to get new data).
> @@ -1849,9 +1854,9 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp,
const struct sk_buf
> }
>
> /* Does at least the first segment of SKB fit into the send window? */
> -static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
> - const struct sk_buff *skb,
> - unsigned int cur_mss)
> +bool tcp_snd_wnd_test(const struct tcp_sock *tp,
> + const struct sk_buff *skb,
> + unsigned int cur_mss)
> {
> u32 end_seq = TCP_SKB_CB(skb)->end_seq;
>
> @@ -2148,7 +2153,7 @@ int tcp_mtu_probe(struct sock *sk)
> skb->csum = csum_partial(skb->data,
> skb->len, 0);
> } else {
> - __pskb_trim_head(skb, copy);
> + pskb_trim_head(skb, copy);
> tcp_set_skb_tso_segs(skb, mss_now);
> }
> TCP_SKB_CB(skb)->seq += copy;
> @@ -3639,7 +3644,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack);
> * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
> * out-of-date with SND.UNA-1 to probe window.
> */
> -static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
> +int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
> {
> struct tcp_sock *tp = tcp_sk(sk);
> struct sk_buff *skb;
> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> index beaba7a..dbf284d 100644
> --- a/net/ipv4/tcp_timer.c
> +++ b/net/ipv4/tcp_timer.c
> @@ -29,7 +29,7 @@
> * Returns: Nothing (void)
> */
>
> -static void tcp_write_err(struct sock *sk)
> +void tcp_write_err(struct sock *sk)
> {
> sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
> sk->sk_error_report(sk);
> @@ -155,9 +155,8 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk,
struct sock *sk)
> * after "boundary" unsuccessful, exponentially backed-off
> * retransmissions with an initial RTO of TCP_RTO_MIN.
> */
> -static bool retransmits_timed_out(struct sock *sk,
> - unsigned int boundary,
> - unsigned int timeout)
> +bool retransmits_timed_out(struct sock *sk, unsigned int boundary,
> + unsigned int timeout)
> {
> const unsigned int rto_base = TCP_RTO_MIN;
> unsigned int linear_backoff_thresh, start_ts;
> @@ -187,7 +186,7 @@ static bool retransmits_timed_out(struct sock *sk,
> }
>
> /* A write timeout has occurred. Process the after effects. */
> -static int tcp_write_timeout(struct sock *sk)
> +int tcp_write_timeout(struct sock *sk)
> {
> struct inet_connection_sock *icsk = inet_csk(sk);
> struct tcp_sock *tp = tcp_sk(sk);
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index c1e292d..9a242a5 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -107,8 +107,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock
*sk)
> return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
> }
>
> -static int inet6_create(struct net *net, struct socket *sock, int protocol,
> - int kern)
> +int inet6_create(struct net *net, struct socket *sock, int protocol,
> + int kern)
> {
> struct inet_sock *inet;
> struct ipv6_pinfo *np;
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 293bdc8..c226cf6 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -71,12 +71,6 @@
>
> #include <trace/events/tcp.h>
>
> -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
> -static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
> - struct request_sock *req);
> -
> -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
> -
> #ifdef CONFIG_TCP_MD5SIG
> static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
> static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
> @@ -88,7 +82,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct
sock *sk,
> }
> #endif
>
> -static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
> +void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
> {
> struct dst_entry *dst = skb_dst(skb);
>
> @@ -315,7 +309,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr
*uaddr,
> return err;
> }
>
> -static void tcp_v6_mtu_reduced(struct sock *sk)
> +void tcp_v6_mtu_reduced(struct sock *sk)
> {
> struct dst_entry *dst;
>
> @@ -495,7 +489,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct
dst_entry *dst,
> }
>
>
> -static void tcp_v6_reqsk_destructor(struct request_sock *req)
> +void tcp_v6_reqsk_destructor(struct request_sock *req)
> {
> kfree(inet_rsk(req)->ipv6_opt);
> kfree_skb(inet_rsk(req)->pktopts);
> @@ -877,7 +871,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct
sk_buff *skb, u32
> kfree_skb(buff);
> }
>
> -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
> +void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
> {
> const struct tcphdr *th = tcp_hdr(skb);
> u32 seq = 0, ack_seq = 0;
> @@ -975,8 +969,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff
*skb)
> inet_twsk_put(tw);
> }
>
> -static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
> - struct request_sock *req)
> +void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
> + struct request_sock *req)
> {
> /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
> * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
> @@ -997,7 +991,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct
sk_buff *skb,
> }
>
>
> -static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
> +struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
> {
> #ifdef CONFIG_SYN_COOKIES
> const struct tcphdr *th = tcp_hdr(skb);
> @@ -1008,7 +1002,7 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct
sk_buff *skb)
> return sk;
> }
>
> -static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
> +int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
> {
> if (skb->protocol == htons(ETH_P_IP))
> return tcp_v4_conn_request(sk, skb);
> @@ -1034,11 +1028,11 @@ static void tcp_v6_restore_cb(struct sk_buff *skb)
> sizeof(struct inet6_skb_parm));
> }
>
> -static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff
*skb,
> - struct request_sock *req,
> - struct dst_entry *dst,
> - struct request_sock *req_unhash,
> - bool *own_req)
> +struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
> + struct request_sock *req,
> + struct dst_entry *dst,
> + struct request_sock *req_unhash,
> + bool *own_req)
> {
> struct inet_request_sock *ireq;
> struct ipv6_pinfo *newnp;
> @@ -1250,7 +1244,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk,
struct sk_buff *
> * This is because we cannot sleep with the original spinlock
> * held.
> */
> -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
> +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
> {
> struct ipv6_pinfo *np = inet6_sk(sk);
> struct tcp_sock *tp;
> @@ -1378,8 +1372,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff
*skb)
> return 0;
> }
>
> -static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
> - const struct tcphdr *th)
> +void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
> + const struct tcphdr *th)
> {
> /* This is tricky: we move IP6CB at its correct location into
> * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
> @@ -1522,13 +1516,16 @@ static int tcp_v6_rcv(struct sk_buff *skb)
>
> sk_incoming_cpu_update(sk);
>
> - bh_lock_sock_nested(sk);
> - tcp_segs_in(tcp_sk(sk), skb);
> - ret = 0;
> - if (!sock_owned_by_user(sk)) {
> - ret = tcp_v6_do_rcv(sk, skb);
> - } else if (tcp_add_backlog(sk, skb)) {
> - goto discard_and_relse;
> + if (likely(!tcp_sk(sk)->op_ops->rx)) {
> + bh_lock_sock_nested(sk);
> + tcp_segs_in(tcp_sk(sk), skb);
> + ret = 0;
> + if (!sock_owned_by_user(sk))
> + ret = tcp_v6_do_rcv(sk, skb);
> + else if (tcp_add_backlog(sk, skb))
> + goto discard_and_relse;
> + } else {
> + return(tcp_sk(sk)->op_ops->rx(sk, skb, refcounted));
> }
> bh_unlock_sock(sk);
>
> @@ -1549,6 +1546,10 @@ static int tcp_v6_rcv(struct sk_buff *skb)
> bad_packet:
> __TCP_INC_STATS(net, TCP_MIB_INERRS);
> } else {
> + if (unlikely(tcp_process_unclaimed)) {
> + if (tcp_process_unclaimed(sk, skb))
> + return(0);
> + }
> tcp_v6_send_reset(NULL, skb);
> }
>
> @@ -1594,6 +1595,10 @@ static int tcp_v6_rcv(struct sk_buff *skb)
> refcounted = false;
> goto process;
> }
> + if (unlikely(tcp_process_unclaimed)) {
> + if (tcp_process_unclaimed(sk, skb))
> + return 0;
> + }
> }
> /* to ACK */
> /* fall through */