tcp: factor out tcp_build_frag()

Will be needed by the next patch, as MPTCP needs to handle
directly the error/memory-allocation-needed path.

No functional changes intended.

Additionally let MPTCP code access the tcp_remove_empty_skb()
helper.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Paolo Abeni 2020-11-16 10:48:02 +01:00 committed by Jakub Kicinski
parent c0a645a7f9
commit b796d04bd0
2 changed files with 70 additions and 52 deletions

View File

@ -322,6 +322,7 @@ void tcp_shutdown(struct sock *sk, int how);
int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb);
void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb);
int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
@ -329,6 +330,8 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags); int flags);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags); size_t size, int flags);
struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
struct page *page, int offset, size_t *size);
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
size_t size, int flags); size_t size, int flags);
int tcp_send_mss(struct sock *sk, int *size_goal, int flags); int tcp_send_mss(struct sock *sk, int *size_goal, int flags);

View File

@ -954,7 +954,7 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
* importantly be able to generate EPOLLOUT for Edge Trigger epoll() * importantly be able to generate EPOLLOUT for Edge Trigger epoll()
* users. * users.
*/ */
static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
{ {
if (skb && !skb->len) { if (skb && !skb->len) {
tcp_unlink_write_queue(skb, sk); tcp_unlink_write_queue(skb, sk);
@ -964,6 +964,68 @@ static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
} }
} }
struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
struct page *page, int offset, size_t *size)
{
struct sk_buff *skb = tcp_write_queue_tail(sk);
struct tcp_sock *tp = tcp_sk(sk);
bool can_coalesce;
int copy, i;
if (!skb || (copy = size_goal - skb->len) <= 0 ||
!tcp_skb_can_collapse_to(skb)) {
new_segment:
if (!sk_stream_memory_free(sk))
return NULL;
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
tcp_rtx_and_write_queues_empty(sk));
if (!skb)
return NULL;
#ifdef CONFIG_TLS_DEVICE
skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
#endif
skb_entail(sk, skb);
copy = size_goal;
}
if (copy > *size)
copy = *size;
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
if (!sk_wmem_schedule(sk, copy))
return NULL;
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
if (!(flags & MSG_NO_SHARED_FRAGS))
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
skb->ip_summed = CHECKSUM_PARTIAL;
WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
*size = copy;
return skb;
}
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
size_t size, int flags) size_t size, int flags)
{ {
@ -999,60 +1061,13 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
goto out_err; goto out_err;
while (size > 0) { while (size > 0) {
struct sk_buff *skb = tcp_write_queue_tail(sk); struct sk_buff *skb;
int copy, i; size_t copy = size;
bool can_coalesce;
if (!skb || (copy = size_goal - skb->len) <= 0 || skb = tcp_build_frag(sk, size_goal, flags, page, offset, &copy);
!tcp_skb_can_collapse_to(skb)) { if (!skb)
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_space;
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
tcp_rtx_and_write_queues_empty(sk));
if (!skb)
goto wait_for_space;
#ifdef CONFIG_TLS_DEVICE
skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
#endif
skb_entail(sk, skb);
copy = size_goal;
}
if (copy > size)
copy = size;
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
if (!sk_wmem_schedule(sk, copy))
goto wait_for_space; goto wait_for_space;
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
if (!(flags & MSG_NO_SHARED_FRAGS))
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
skb->ip_summed = CHECKSUM_PARTIAL;
WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
if (!copied) if (!copied)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;