2006-01-03 02:04:38 +08:00
|
|
|
/*
|
|
|
|
* net/tipc/msg.c: TIPC message header routines
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2015-03-14 04:08:06 +08:00
|
|
|
* Copyright (c) 2000-2006, 2014-2015, Ericsson AB
|
2011-01-26 02:33:31 +08:00
|
|
|
* Copyright (c) 2005, 2010-2011, Wind River Systems
|
2006-01-03 02:04:38 +08:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-03 02:04:38 +08:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-03 02:04:38 +08:00
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-03 02:04:38 +08:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2015-01-09 15:27:04 +08:00
|
|
|
#include <net/sock.h>
|
2006-01-03 02:04:38 +08:00
|
|
|
#include "core.h"
|
|
|
|
#include "msg.h"
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
#include "addr.h"
|
|
|
|
#include "name_table.h"
|
2019-11-08 13:05:11 +08:00
|
|
|
#include "crypto.h"
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2021-06-28 14:37:45 +08:00
|
|
|
#define BUF_ALIGN(x) ALIGN(x, 4)
|
2014-06-26 09:41:35 +08:00
|
|
|
#define MAX_FORWARD_SIZE 1024
|
2019-11-08 13:05:11 +08:00
|
|
|
#ifdef CONFIG_TIPC_CRYPTO
|
|
|
|
#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
|
2021-06-28 14:37:44 +08:00
|
|
|
#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE)
|
2019-11-08 13:05:11 +08:00
|
|
|
#else
|
tipc: unclone unbundled buffers before forwarding
When extracting an individual message from a received "bundle" buffer,
we just create a clone of the base buffer, and adjust it to point into
the right position of the linearized data area of the latter. This works
well for regular message reception, but during periods of extremely high
load it may happen that an extracted buffer, e.g, a connection probe, is
reversed and forwarded through an external interface while the preceding
extracted message is still unhandled. When this happens, the header or
data area of the preceding message will be partially overwritten by a
MAC header, leading to unpredicatable consequences, such as a link
reset.
We now fix this by ensuring that the msg_reverse() function never
returns a cloned buffer, and that the returned buffer always contains
sufficient valid head and tail room to be forwarded.
Reported-by: Erik Hugne <erik.hugne@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-20 21:20:46 +08:00
|
|
|
#define BUF_HEADROOM (LL_MAX_HEADER + 48)
|
2021-06-28 14:37:44 +08:00
|
|
|
#define BUF_OVERHEAD BUF_HEADROOM
|
2019-11-08 13:05:11 +08:00
|
|
|
#endif
|
2014-06-26 09:41:35 +08:00
|
|
|
|
2021-06-28 14:37:44 +08:00
|
|
|
const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) -
|
|
|
|
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
|
|
|
|
2015-01-09 15:27:01 +08:00
|
|
|
/**
|
|
|
|
* tipc_buf_acquire - creates a TIPC message buffer
|
|
|
|
* @size: message size (including TIPC header)
|
2020-11-30 02:32:50 +08:00
|
|
|
* @gfp: memory allocation flags
|
2015-01-09 15:27:01 +08:00
|
|
|
*
|
2020-11-30 02:32:48 +08:00
|
|
|
* Return: a new buffer with data pointers set to the specified size.
|
2015-01-09 15:27:01 +08:00
|
|
|
*
|
2020-11-30 02:32:50 +08:00
|
|
|
* NOTE:
|
|
|
|
* Headroom is reserved to allow prepending of a data link header.
|
|
|
|
* There may also be unrequested tailroom present at the buffer's end.
|
2015-01-09 15:27:01 +08:00
|
|
|
*/
|
2017-01-13 22:46:25 +08:00
|
|
|
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
|
2015-01-09 15:27:01 +08:00
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
2021-06-28 14:37:44 +08:00
|
|
|
skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp);
|
2015-01-09 15:27:01 +08:00
|
|
|
if (skb) {
|
|
|
|
skb_reserve(skb, BUF_HEADROOM);
|
|
|
|
skb_put(skb, size);
|
|
|
|
skb->next = NULL;
|
|
|
|
}
|
|
|
|
return skb;
|
|
|
|
}
|
|
|
|
|
2015-02-05 21:36:36 +08:00
|
|
|
void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type,
|
|
|
|
u32 hsize, u32 dnode)
|
2010-05-11 22:30:18 +08:00
|
|
|
{
|
|
|
|
memset(m, 0, hsize);
|
|
|
|
msg_set_version(m);
|
|
|
|
msg_set_user(m, user);
|
|
|
|
msg_set_hdr_sz(m, hsize);
|
|
|
|
msg_set_size(m, hsize);
|
2015-02-05 21:36:36 +08:00
|
|
|
msg_set_prevnode(m, own_node);
|
2010-05-11 22:30:18 +08:00
|
|
|
msg_set_type(m, type);
|
2014-08-23 06:09:06 +08:00
|
|
|
if (hsize > SHORT_H_SIZE) {
|
2015-02-05 21:36:36 +08:00
|
|
|
msg_set_orignode(m, own_node);
|
|
|
|
msg_set_destnode(m, dnode);
|
2014-08-23 06:09:06 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-05 21:36:36 +08:00
|
|
|
struct sk_buff *tipc_msg_create(uint user, uint type,
|
2015-01-09 15:27:10 +08:00
|
|
|
uint hdr_sz, uint data_sz, u32 dnode,
|
|
|
|
u32 onode, u32 dport, u32 oport, int errcode)
|
2014-08-23 06:09:06 +08:00
|
|
|
{
|
|
|
|
struct tipc_msg *msg;
|
|
|
|
struct sk_buff *buf;
|
|
|
|
|
2017-01-13 22:46:25 +08:00
|
|
|
buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC);
|
2014-08-23 06:09:06 +08:00
|
|
|
if (unlikely(!buf))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
msg = buf_msg(buf);
|
2015-02-05 21:36:36 +08:00
|
|
|
tipc_msg_init(onode, msg, user, type, hdr_sz, dnode);
|
2014-08-23 06:09:06 +08:00
|
|
|
msg_set_size(msg, hdr_sz + data_sz);
|
|
|
|
msg_set_origport(msg, oport);
|
|
|
|
msg_set_destport(msg, dport);
|
|
|
|
msg_set_errcode(msg, errcode);
|
|
|
|
return buf;
|
2010-05-11 22:30:18 +08:00
|
|
|
}
|
|
|
|
|
2014-05-14 17:39:12 +08:00
|
|
|
/* tipc_buf_append(): Append a buffer to the fragment list of another buffer
|
2014-07-06 01:44:13 +08:00
|
|
|
* @*headbuf: in: NULL for first frag, otherwise value returned from prev call
|
|
|
|
* out: set when successful non-complete reassembly, otherwise NULL
|
|
|
|
* @*buf: in: the buffer to append. Always defined
|
2014-10-30 13:58:51 +08:00
|
|
|
* out: head buf after successful complete reassembly, otherwise NULL
|
2014-07-06 01:44:13 +08:00
|
|
|
* Returns 1 when reassembly complete, otherwise 0
|
2014-05-14 17:39:12 +08:00
|
|
|
*/
|
|
|
|
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
|
|
|
|
{
|
|
|
|
struct sk_buff *head = *headbuf;
|
|
|
|
struct sk_buff *frag = *buf;
|
2015-10-19 23:33:00 +08:00
|
|
|
struct sk_buff *tail = NULL;
|
2014-07-26 02:48:09 +08:00
|
|
|
struct tipc_msg *msg;
|
|
|
|
u32 fragid;
|
2014-05-14 17:39:12 +08:00
|
|
|
int delta;
|
2014-07-26 02:48:09 +08:00
|
|
|
bool headstolen;
|
2014-05-14 17:39:12 +08:00
|
|
|
|
2014-07-26 02:48:09 +08:00
|
|
|
if (!frag)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
msg = buf_msg(frag);
|
|
|
|
fragid = msg_type(msg);
|
|
|
|
frag->next = NULL;
|
2014-05-14 17:39:12 +08:00
|
|
|
skb_pull(frag, msg_hdr_sz(msg));
|
|
|
|
|
|
|
|
if (fragid == FIRST_FRAGMENT) {
|
2014-07-26 02:48:09 +08:00
|
|
|
if (unlikely(head))
|
|
|
|
goto err;
|
2021-05-08 03:57:03 +08:00
|
|
|
if (skb_has_frag_list(frag) && __skb_linearize(frag))
|
|
|
|
goto err;
|
2024-04-30 22:03:38 +08:00
|
|
|
*buf = NULL;
|
2020-10-27 11:24:03 +08:00
|
|
|
frag = skb_unshare(frag, GFP_ATOMIC);
|
2020-09-13 19:37:31 +08:00
|
|
|
if (unlikely(!frag))
|
2014-07-26 02:48:09 +08:00
|
|
|
goto err;
|
2014-05-14 17:39:12 +08:00
|
|
|
head = *headbuf = frag;
|
2015-10-19 23:33:00 +08:00
|
|
|
TIPC_SKB_CB(head)->tail = NULL;
|
2014-05-14 17:39:12 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2014-07-26 02:48:09 +08:00
|
|
|
|
2014-05-14 17:39:12 +08:00
|
|
|
if (!head)
|
2014-07-26 02:48:09 +08:00
|
|
|
goto err;
|
|
|
|
|
2024-04-30 21:53:37 +08:00
|
|
|
/* Either the input skb ownership is transferred to headskb
|
|
|
|
* or the input skb is freed, clear the reference to avoid
|
|
|
|
* bad access on error path.
|
|
|
|
*/
|
|
|
|
*buf = NULL;
|
2014-05-14 17:39:12 +08:00
|
|
|
if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
|
|
|
|
kfree_skb_partial(frag, headstolen);
|
|
|
|
} else {
|
2014-07-26 02:48:09 +08:00
|
|
|
tail = TIPC_SKB_CB(head)->tail;
|
2014-05-14 17:39:12 +08:00
|
|
|
if (!skb_has_frag_list(head))
|
|
|
|
skb_shinfo(head)->frag_list = frag;
|
|
|
|
else
|
|
|
|
tail->next = frag;
|
|
|
|
head->truesize += frag->truesize;
|
|
|
|
head->data_len += frag->len;
|
|
|
|
head->len += frag->len;
|
|
|
|
TIPC_SKB_CB(head)->tail = frag;
|
|
|
|
}
|
2014-07-26 02:48:09 +08:00
|
|
|
|
2014-05-14 17:39:12 +08:00
|
|
|
if (fragid == LAST_FRAGMENT) {
|
2019-11-08 13:05:11 +08:00
|
|
|
TIPC_SKB_CB(head)->validated = 0;
|
2017-11-16 04:23:56 +08:00
|
|
|
if (unlikely(!tipc_msg_validate(&head)))
|
2015-03-14 04:08:07 +08:00
|
|
|
goto err;
|
2014-05-14 17:39:12 +08:00
|
|
|
*buf = head;
|
|
|
|
TIPC_SKB_CB(head)->tail = NULL;
|
|
|
|
*headbuf = NULL;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
2014-07-26 02:48:09 +08:00
|
|
|
err:
|
2014-05-14 17:39:12 +08:00
|
|
|
kfree_skb(*buf);
|
2014-07-06 01:44:13 +08:00
|
|
|
kfree_skb(*headbuf);
|
|
|
|
*buf = *headbuf = NULL;
|
2014-05-14 17:39:12 +08:00
|
|
|
return 0;
|
|
|
|
}
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
|
tipc: add smart nagle feature
We introduce a feature that works like a combination of TCP_NAGLE and
TCP_CORK, but without some of the weaknesses of those. In particular,
we will not observe long delivery delays because of delayed acks, since
the algorithm itself decides if and when acks are to be sent from the
receiving peer.
- The nagle property as such is determined by manipulating a new
'maxnagle' field in struct tipc_sock. If certain conditions are met,
'maxnagle' will define max size of the messages which can be bundled.
If it is set to zero no messages are ever bundled, implying that the
nagle property is disabled.
- A socket with the nagle property enabled enters nagle mode when more
than 4 messages have been sent out without receiving any data message
from the peer.
- A socket leaves nagle mode whenever it receives a data message from
the peer.
In nagle mode, messages smaller than 'maxnagle' are accumulated in the
socket write queue. The last buffer in the queue is marked with a new
'ack_required' bit, which forces the receiving peer to send a CONN_ACK
message back to the sender upon reception.
The accumulated contents of the write queue is transmitted when one of
the following events or conditions occur.
- A CONN_ACK message is received from the peer.
- A data message is received from the peer.
- A SOCK_WAKEUP pseudo message is received from the link level.
- The write queue contains more than 64 1k blocks of data.
- The connection is being shut down.
- There is no CONN_ACK message to expect. I.e., there is currently
no outstanding message where the 'ack_required' bit was set. As a
consequence, the first message added after we enter nagle mode
is always sent directly with this bit set.
This new feature gives a 50-100% improvement of throughput for small
(i.e., less than MTU size) messages, while it might add up to one RTT
to latency time when the socket is in nagle mode.
Acked-by: Ying Xue <ying.xue@windreiver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-30 21:00:41 +08:00
|
|
|
/**
|
|
|
|
* tipc_msg_append(): Append data to tail of an existing buffer queue
|
2020-07-13 07:15:14 +08:00
|
|
|
* @_hdr: header to be used
|
tipc: add smart nagle feature
We introduce a feature that works like a combination of TCP_NAGLE and
TCP_CORK, but without some of the weaknesses of those. In particular,
we will not observe long delivery delays because of delayed acks, since
the algorithm itself decides if and when acks are to be sent from the
receiving peer.
- The nagle property as such is determined by manipulating a new
'maxnagle' field in struct tipc_sock. If certain conditions are met,
'maxnagle' will define max size of the messages which can be bundled.
If it is set to zero no messages are ever bundled, implying that the
nagle property is disabled.
- A socket with the nagle property enabled enters nagle mode when more
than 4 messages have been sent out without receiving any data message
from the peer.
- A socket leaves nagle mode whenever it receives a data message from
the peer.
In nagle mode, messages smaller than 'maxnagle' are accumulated in the
socket write queue. The last buffer in the queue is marked with a new
'ack_required' bit, which forces the receiving peer to send a CONN_ACK
message back to the sender upon reception.
The accumulated contents of the write queue is transmitted when one of
the following events or conditions occur.
- A CONN_ACK message is received from the peer.
- A data message is received from the peer.
- A SOCK_WAKEUP pseudo message is received from the link level.
- The write queue contains more than 64 1k blocks of data.
- The connection is being shut down.
- There is no CONN_ACK message to expect. I.e., there is currently
no outstanding message where the 'ack_required' bit was set. As a
consequence, the first message added after we enter nagle mode
is always sent directly with this bit set.
This new feature gives a 50-100% improvement of throughput for small
(i.e., less than MTU size) messages, while it might add up to one RTT
to latency time when the socket is in nagle mode.
Acked-by: Ying Xue <ying.xue@windreiver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-30 21:00:41 +08:00
|
|
|
* @m: the data to be appended
|
|
|
|
* @mss: max allowable size of buffer
|
|
|
|
* @dlen: size of data to be appended
|
2020-11-30 02:32:48 +08:00
|
|
|
* @txq: queue to append to
|
|
|
|
*
|
|
|
|
* Return: the number of 1k blocks appended or errno value
|
tipc: add smart nagle feature
We introduce a feature that works like a combination of TCP_NAGLE and
TCP_CORK, but without some of the weaknesses of those. In particular,
we will not observe long delivery delays because of delayed acks, since
the algorithm itself decides if and when acks are to be sent from the
receiving peer.
- The nagle property as such is determined by manipulating a new
'maxnagle' field in struct tipc_sock. If certain conditions are met,
'maxnagle' will define max size of the messages which can be bundled.
If it is set to zero no messages are ever bundled, implying that the
nagle property is disabled.
- A socket with the nagle property enabled enters nagle mode when more
than 4 messages have been sent out without receiving any data message
from the peer.
- A socket leaves nagle mode whenever it receives a data message from
the peer.
In nagle mode, messages smaller than 'maxnagle' are accumulated in the
socket write queue. The last buffer in the queue is marked with a new
'ack_required' bit, which forces the receiving peer to send a CONN_ACK
message back to the sender upon reception.
The accumulated contents of the write queue is transmitted when one of
the following events or conditions occur.
- A CONN_ACK message is received from the peer.
- A data message is received from the peer.
- A SOCK_WAKEUP pseudo message is received from the link level.
- The write queue contains more than 64 1k blocks of data.
- The connection is being shut down.
- There is no CONN_ACK message to expect. I.e., there is currently
no outstanding message where the 'ack_required' bit was set. As a
consequence, the first message added after we enter nagle mode
is always sent directly with this bit set.
This new feature gives a 50-100% improvement of throughput for small
(i.e., less than MTU size) messages, while it might add up to one RTT
to latency time when the socket is in nagle mode.
Acked-by: Ying Xue <ying.xue@windreiver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-30 21:00:41 +08:00
|
|
|
*/
|
|
|
|
int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
|
|
|
|
int mss, struct sk_buff_head *txq)
|
|
|
|
{
|
2020-05-28 15:43:59 +08:00
|
|
|
struct sk_buff *skb;
|
tipc: add smart nagle feature
We introduce a feature that works like a combination of TCP_NAGLE and
TCP_CORK, but without some of the weaknesses of those. In particular,
we will not observe long delivery delays because of delayed acks, since
the algorithm itself decides if and when acks are to be sent from the
receiving peer.
- The nagle property as such is determined by manipulating a new
'maxnagle' field in struct tipc_sock. If certain conditions are met,
'maxnagle' will define max size of the messages which can be bundled.
If it is set to zero no messages are ever bundled, implying that the
nagle property is disabled.
- A socket with the nagle property enabled enters nagle mode when more
than 4 messages have been sent out without receiving any data message
from the peer.
- A socket leaves nagle mode whenever it receives a data message from
the peer.
In nagle mode, messages smaller than 'maxnagle' are accumulated in the
socket write queue. The last buffer in the queue is marked with a new
'ack_required' bit, which forces the receiving peer to send a CONN_ACK
message back to the sender upon reception.
The accumulated contents of the write queue is transmitted when one of
the following events or conditions occur.
- A CONN_ACK message is received from the peer.
- A data message is received from the peer.
- A SOCK_WAKEUP pseudo message is received from the link level.
- The write queue contains more than 64 1k blocks of data.
- The connection is being shut down.
- There is no CONN_ACK message to expect. I.e., there is currently
no outstanding message where the 'ack_required' bit was set. As a
consequence, the first message added after we enter nagle mode
is always sent directly with this bit set.
This new feature gives a 50-100% improvement of throughput for small
(i.e., less than MTU size) messages, while it might add up to one RTT
to latency time when the socket is in nagle mode.
Acked-by: Ying Xue <ying.xue@windreiver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-30 21:00:41 +08:00
|
|
|
int accounted, total, curr;
|
|
|
|
int mlen, cpy, rem = dlen;
|
|
|
|
struct tipc_msg *hdr;
|
|
|
|
|
|
|
|
skb = skb_peek_tail(txq);
|
|
|
|
accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
|
|
|
|
total = accounted;
|
|
|
|
|
2020-06-03 13:06:01 +08:00
|
|
|
do {
|
tipc: add smart nagle feature
We introduce a feature that works like a combination of TCP_NAGLE and
TCP_CORK, but without some of the weaknesses of those. In particular,
we will not observe long delivery delays because of delayed acks, since
the algorithm itself decides if and when acks are to be sent from the
receiving peer.
- The nagle property as such is determined by manipulating a new
'maxnagle' field in struct tipc_sock. If certain conditions are met,
'maxnagle' will define max size of the messages which can be bundled.
If it is set to zero no messages are ever bundled, implying that the
nagle property is disabled.
- A socket with the nagle property enabled enters nagle mode when more
than 4 messages have been sent out without receiving any data message
from the peer.
- A socket leaves nagle mode whenever it receives a data message from
the peer.
In nagle mode, messages smaller than 'maxnagle' are accumulated in the
socket write queue. The last buffer in the queue is marked with a new
'ack_required' bit, which forces the receiving peer to send a CONN_ACK
message back to the sender upon reception.
The accumulated contents of the write queue is transmitted when one of
the following events or conditions occur.
- A CONN_ACK message is received from the peer.
- A data message is received from the peer.
- A SOCK_WAKEUP pseudo message is received from the link level.
- The write queue contains more than 64 1k blocks of data.
- The connection is being shut down.
- There is no CONN_ACK message to expect. I.e., there is currently
no outstanding message where the 'ack_required' bit was set. As a
consequence, the first message added after we enter nagle mode
is always sent directly with this bit set.
This new feature gives a 50-100% improvement of throughput for small
(i.e., less than MTU size) messages, while it might add up to one RTT
to latency time when the socket is in nagle mode.
Acked-by: Ying Xue <ying.xue@windreiver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-30 21:00:41 +08:00
|
|
|
if (!skb || skb->len >= mss) {
|
|
|
|
skb = tipc_buf_acquire(mss, GFP_KERNEL);
|
|
|
|
if (unlikely(!skb))
|
|
|
|
return -ENOMEM;
|
|
|
|
skb_orphan(skb);
|
|
|
|
skb_trim(skb, MIN_H_SIZE);
|
|
|
|
hdr = buf_msg(skb);
|
|
|
|
skb_copy_to_linear_data(skb, _hdr, MIN_H_SIZE);
|
|
|
|
msg_set_hdr_sz(hdr, MIN_H_SIZE);
|
|
|
|
msg_set_size(hdr, MIN_H_SIZE);
|
|
|
|
__skb_queue_tail(txq, skb);
|
|
|
|
total += 1;
|
|
|
|
}
|
|
|
|
hdr = buf_msg(skb);
|
|
|
|
curr = msg_blocks(hdr);
|
|
|
|
mlen = msg_size(hdr);
|
2020-06-11 18:07:35 +08:00
|
|
|
cpy = min_t(size_t, rem, mss - mlen);
|
tipc: add smart nagle feature
We introduce a feature that works like a combination of TCP_NAGLE and
TCP_CORK, but without some of the weaknesses of those. In particular,
we will not observe long delivery delays because of delayed acks, since
the algorithm itself decides if and when acks are to be sent from the
receiving peer.
- The nagle property as such is determined by manipulating a new
'maxnagle' field in struct tipc_sock. If certain conditions are met,
'maxnagle' will define max size of the messages which can be bundled.
If it is set to zero no messages are ever bundled, implying that the
nagle property is disabled.
- A socket with the nagle property enabled enters nagle mode when more
than 4 messages have been sent out without receiving any data message
from the peer.
- A socket leaves nagle mode whenever it receives a data message from
the peer.
In nagle mode, messages smaller than 'maxnagle' are accumulated in the
socket write queue. The last buffer in the queue is marked with a new
'ack_required' bit, which forces the receiving peer to send a CONN_ACK
message back to the sender upon reception.
The accumulated contents of the write queue is transmitted when one of
the following events or conditions occur.
- A CONN_ACK message is received from the peer.
- A data message is received from the peer.
- A SOCK_WAKEUP pseudo message is received from the link level.
- The write queue contains more than 64 1k blocks of data.
- The connection is being shut down.
- There is no CONN_ACK message to expect. I.e., there is currently
no outstanding message where the 'ack_required' bit was set. As a
consequence, the first message added after we enter nagle mode
is always sent directly with this bit set.
This new feature gives a 50-100% improvement of throughput for small
(i.e., less than MTU size) messages, while it might add up to one RTT
to latency time when the socket is in nagle mode.
Acked-by: Ying Xue <ying.xue@windreiver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-30 21:00:41 +08:00
|
|
|
if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
|
|
|
|
return -EFAULT;
|
|
|
|
msg_set_size(hdr, mlen + cpy);
|
|
|
|
skb_put(skb, cpy);
|
|
|
|
rem -= cpy;
|
|
|
|
total += msg_blocks(hdr) - curr;
|
2020-06-11 18:07:35 +08:00
|
|
|
} while (rem > 0);
|
tipc: add smart nagle feature
We introduce a feature that works like a combination of TCP_NAGLE and
TCP_CORK, but without some of the weaknesses of those. In particular,
we will not observe long delivery delays because of delayed acks, since
the algorithm itself decides if and when acks are to be sent from the
receiving peer.
- The nagle property as such is determined by manipulating a new
'maxnagle' field in struct tipc_sock. If certain conditions are met,
'maxnagle' will define max size of the messages which can be bundled.
If it is set to zero no messages are ever bundled, implying that the
nagle property is disabled.
- A socket with the nagle property enabled enters nagle mode when more
than 4 messages have been sent out without receiving any data message
from the peer.
- A socket leaves nagle mode whenever it receives a data message from
the peer.
In nagle mode, messages smaller than 'maxnagle' are accumulated in the
socket write queue. The last buffer in the queue is marked with a new
'ack_required' bit, which forces the receiving peer to send a CONN_ACK
message back to the sender upon reception.
The accumulated contents of the write queue is transmitted when one of
the following events or conditions occur.
- A CONN_ACK message is received from the peer.
- A data message is received from the peer.
- A SOCK_WAKEUP pseudo message is received from the link level.
- The write queue contains more than 64 1k blocks of data.
- The connection is being shut down.
- There is no CONN_ACK message to expect. I.e., there is currently
no outstanding message where the 'ack_required' bit was set. As a
consequence, the first message added after we enter nagle mode
is always sent directly with this bit set.
This new feature gives a 50-100% improvement of throughput for small
(i.e., less than MTU size) messages, while it might add up to one RTT
to latency time when the socket is in nagle mode.
Acked-by: Ying Xue <ying.xue@windreiver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-30 21:00:41 +08:00
|
|
|
return total - accounted;
|
|
|
|
}
|
|
|
|
|
2015-03-14 04:08:06 +08:00
|
|
|
/* tipc_msg_validate - validate basic format of received message
|
|
|
|
*
|
|
|
|
* This routine ensures a TIPC message has an acceptable header, and at least
|
|
|
|
* as much data as the header indicates it should. The routine also ensures
|
|
|
|
* that the entire message header is stored in the main fragment of the message
|
|
|
|
* buffer, to simplify future access to message header fields.
|
|
|
|
*
|
|
|
|
* Note: Having extra info present in the message header or data areas is OK.
|
|
|
|
* TIPC will ignore the excess, under the assumption that it is optional info
|
|
|
|
* introduced by a later release of the protocol.
|
|
|
|
*/
|
2017-11-16 04:23:56 +08:00
|
|
|
bool tipc_msg_validate(struct sk_buff **_skb)
|
2015-03-14 04:08:06 +08:00
|
|
|
{
|
2017-11-16 04:23:56 +08:00
|
|
|
struct sk_buff *skb = *_skb;
|
|
|
|
struct tipc_msg *hdr;
|
2015-03-14 04:08:06 +08:00
|
|
|
int msz, hsz;
|
|
|
|
|
2017-11-16 04:23:56 +08:00
|
|
|
/* Ensure that flow control ratio condition is satisfied */
|
2018-02-09 00:16:25 +08:00
|
|
|
if (unlikely(skb->truesize / buf_roundup_len(skb) >= 4)) {
|
|
|
|
skb = skb_copy_expand(skb, BUF_HEADROOM, 0, GFP_ATOMIC);
|
2017-11-16 04:23:56 +08:00
|
|
|
if (!skb)
|
|
|
|
return false;
|
|
|
|
kfree_skb(*_skb);
|
|
|
|
*_skb = skb;
|
|
|
|
}
|
|
|
|
|
2015-03-14 04:08:06 +08:00
|
|
|
if (unlikely(TIPC_SKB_CB(skb)->validated))
|
|
|
|
return true;
|
2019-11-08 13:05:11 +08:00
|
|
|
|
2015-03-14 04:08:06 +08:00
|
|
|
if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
hsz = msg_hdr_sz(buf_msg(skb));
|
|
|
|
if (unlikely(hsz < MIN_H_SIZE) || (hsz > MAX_H_SIZE))
|
|
|
|
return false;
|
|
|
|
if (unlikely(!pskb_may_pull(skb, hsz)))
|
|
|
|
return false;
|
|
|
|
|
2017-11-16 04:23:56 +08:00
|
|
|
hdr = buf_msg(skb);
|
|
|
|
if (unlikely(msg_version(hdr) != TIPC_VERSION))
|
2015-03-14 04:08:06 +08:00
|
|
|
return false;
|
|
|
|
|
2017-11-16 04:23:56 +08:00
|
|
|
msz = msg_size(hdr);
|
2015-03-14 04:08:06 +08:00
|
|
|
if (unlikely(msz < hsz))
|
|
|
|
return false;
|
|
|
|
if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
|
|
|
|
return false;
|
|
|
|
if (unlikely(skb->len < msz))
|
|
|
|
return false;
|
|
|
|
|
2019-11-08 13:05:11 +08:00
|
|
|
TIPC_SKB_CB(skb)->validated = 1;
|
2015-03-14 04:08:06 +08:00
|
|
|
return true;
|
|
|
|
}
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
|
tipc: fix changeover issues due to large packet
In conjunction with changing the interfaces' MTU (e.g. especially in
the case of a bonding) where the TIPC links are brought up and down
in a short time, a couple of issues were detected with the current link
changeover mechanism:
1) When one link is up but immediately forced down again, the failover
procedure will be carried out in order to failover all the messages in
the link's transmq queue onto the other working link. The link and node
state is also set to FAILINGOVER as part of the process. The message
will be transmited in form of a FAILOVER_MSG, so its size is plus of 40
bytes (= the message header size). There is no problem if the original
message size is not larger than the link's MTU - 40, and indeed this is
the max size of a normal payload messages. However, in the situation
above, because the link has just been up, the messages in the link's
transmq are almost SYNCH_MSGs which had been generated by the link
synching procedure, then their size might reach the max value already!
When the FAILOVER_MSG is built on the top of such a SYNCH_MSG, its size
will exceed the link's MTU. As a result, the messages are dropped
silently and the failover procedure will never end up, the link will
not be able to exit the FAILINGOVER state, so cannot be re-established.
2) The same scenario above can happen more easily in case the MTU of
the links is set differently or when changing. In that case, as long as
a large message in the failure link's transmq queue was built and
fragmented with its link's MTU > the other link's one, the issue will
happen (there is no need of a link synching in advance).
3) The link synching procedure also faces with the same issue but since
the link synching is only started upon receipt of a SYNCH_MSG, dropping
the message will not result in a state deadlock, but it is not expected
as design.
The 1) & 3) issues are resolved by the last commit that only a dummy
SYNCH_MSG (i.e. without data) is generated at the link synching, so the
size of a FAILOVER_MSG if any then will never exceed the link's MTU.
For the 2) issue, the only solution is trying to fragment the messages
in the failure link's transmq queue according to the working link's MTU
so they can be failovered then. A new function is made to accomplish
this, it will still be a TUNNEL PROTOCOL/FAILOVER MSG but if the
original message size is too large, it will be fragmented & reassembled
at the receiving side.
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-24 09:56:12 +08:00
|
|
|
/**
|
|
|
|
* tipc_msg_fragment - build a fragment skb list for TIPC message
|
|
|
|
*
|
|
|
|
* @skb: TIPC message skb
|
|
|
|
* @hdr: internal msg header to be put on the top of the fragments
|
|
|
|
* @pktmax: max size of a fragment incl. the header
|
|
|
|
* @frags: returned fragment skb list
|
|
|
|
*
|
2020-11-30 02:32:48 +08:00
|
|
|
* Return: 0 if the fragmentation is successful, otherwise: -EINVAL
|
tipc: fix changeover issues due to large packet
In conjunction with changing the interfaces' MTU (e.g. especially in
the case of a bonding) where the TIPC links are brought up and down
in a short time, a couple of issues were detected with the current link
changeover mechanism:
1) When one link is up but immediately forced down again, the failover
procedure will be carried out in order to failover all the messages in
the link's transmq queue onto the other working link. The link and node
state is also set to FAILINGOVER as part of the process. The message
will be transmited in form of a FAILOVER_MSG, so its size is plus of 40
bytes (= the message header size). There is no problem if the original
message size is not larger than the link's MTU - 40, and indeed this is
the max size of a normal payload messages. However, in the situation
above, because the link has just been up, the messages in the link's
transmq are almost SYNCH_MSGs which had been generated by the link
synching procedure, then their size might reach the max value already!
When the FAILOVER_MSG is built on the top of such a SYNCH_MSG, its size
will exceed the link's MTU. As a result, the messages are dropped
silently and the failover procedure will never end up, the link will
not be able to exit the FAILINGOVER state, so cannot be re-established.
2) The same scenario above can happen more easily in case the MTU of
the links is set differently or when changing. In that case, as long as
a large message in the failure link's transmq queue was built and
fragmented with its link's MTU > the other link's one, the issue will
happen (there is no need of a link synching in advance).
3) The link synching procedure also faces with the same issue but since
the link synching is only started upon receipt of a SYNCH_MSG, dropping
the message will not result in a state deadlock, but it is not expected
as design.
The 1) & 3) issues are resolved by the last commit that only a dummy
SYNCH_MSG (i.e. without data) is generated at the link synching, so the
size of a FAILOVER_MSG if any then will never exceed the link's MTU.
For the 2) issue, the only solution is trying to fragment the messages
in the failure link's transmq queue according to the working link's MTU
so they can be failovered then. A new function is made to accomplish
this, it will still be a TUNNEL PROTOCOL/FAILOVER MSG but if the
original message size is too large, it will be fragmented & reassembled
at the receiving side.
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-24 09:56:12 +08:00
|
|
|
* or -ENOMEM
|
|
|
|
*/
|
|
|
|
int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
|
|
|
|
int pktmax, struct sk_buff_head *frags)
|
|
|
|
{
|
|
|
|
int pktno, nof_fragms, dsz, dmax, eat;
|
|
|
|
struct tipc_msg *_hdr;
|
|
|
|
struct sk_buff *_skb;
|
|
|
|
u8 *data;
|
|
|
|
|
|
|
|
/* Non-linear buffer? */
|
|
|
|
if (skb_linearize(skb))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
data = (u8 *)skb->data;
|
|
|
|
dsz = msg_size(buf_msg(skb));
|
|
|
|
dmax = pktmax - INT_H_SIZE;
|
|
|
|
if (dsz <= dmax || !dmax)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
nof_fragms = dsz / dmax + 1;
|
|
|
|
for (pktno = 1; pktno <= nof_fragms; pktno++) {
|
|
|
|
if (pktno < nof_fragms)
|
|
|
|
eat = dmax;
|
|
|
|
else
|
|
|
|
eat = dsz % dmax;
|
|
|
|
/* Allocate a new fragment */
|
|
|
|
_skb = tipc_buf_acquire(INT_H_SIZE + eat, GFP_ATOMIC);
|
|
|
|
if (!_skb)
|
|
|
|
goto error;
|
|
|
|
skb_orphan(_skb);
|
|
|
|
__skb_queue_tail(frags, _skb);
|
|
|
|
/* Copy header & data to the fragment */
|
|
|
|
skb_copy_to_linear_data(_skb, hdr, INT_H_SIZE);
|
|
|
|
skb_copy_to_linear_data_offset(_skb, INT_H_SIZE, data, eat);
|
|
|
|
data += eat;
|
|
|
|
/* Update the fragment's header */
|
|
|
|
_hdr = buf_msg(_skb);
|
|
|
|
msg_set_fragm_no(_hdr, pktno);
|
|
|
|
msg_set_nof_fragms(_hdr, nof_fragms);
|
|
|
|
msg_set_size(_hdr, INT_H_SIZE + eat);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
__skb_queue_purge(frags);
|
|
|
|
__skb_queue_head_init(frags);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
/**
|
2014-07-17 08:41:03 +08:00
|
|
|
* tipc_msg_build - create buffer chain containing specified header and data
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
* @mhdr: Message header, to be prepended to data
|
2014-11-15 14:16:27 +08:00
|
|
|
* @m: User message
|
2020-11-30 02:32:50 +08:00
|
|
|
* @offset: buffer offset for fragmented messages (FIXME)
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
* @dsz: Total length of user data
|
|
|
|
* @pktmax: Max packet size that can be used
|
2014-11-26 11:41:55 +08:00
|
|
|
* @list: Buffer or chain of buffers to be returned to caller
|
|
|
|
*
|
2017-11-30 23:47:25 +08:00
|
|
|
* Note that the recursive call we are making here is safe, since it can
|
|
|
|
* logically go only one further level down.
|
|
|
|
*
|
2020-11-30 02:32:48 +08:00
|
|
|
* Return: message data size or errno: -ENOMEM, -EFAULT
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
*/
|
2017-11-30 23:47:25 +08:00
|
|
|
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
|
|
|
|
int dsz, int pktmax, struct sk_buff_head *list)
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
{
|
|
|
|
int mhsz = msg_hdr_sz(mhdr);
|
2017-11-30 23:47:25 +08:00
|
|
|
struct tipc_msg pkthdr;
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
int msz = mhsz + dsz;
|
|
|
|
int pktrem = pktmax;
|
2014-11-26 11:41:55 +08:00
|
|
|
struct sk_buff *skb;
|
2017-11-30 23:47:25 +08:00
|
|
|
int drem = dsz;
|
|
|
|
int pktno = 1;
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
char *pktpos;
|
2017-11-30 23:47:25 +08:00
|
|
|
int pktsz;
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
int rc;
|
2014-11-26 11:41:55 +08:00
|
|
|
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
msg_set_size(mhdr, msz);
|
|
|
|
|
|
|
|
/* No fragmentation needed? */
|
|
|
|
if (likely(msz <= pktmax)) {
|
2017-01-13 22:46:25 +08:00
|
|
|
skb = tipc_buf_acquire(msz, GFP_KERNEL);
|
2017-11-30 23:47:25 +08:00
|
|
|
|
|
|
|
/* Fall back to smaller MTU if node local message */
|
|
|
|
if (unlikely(!skb)) {
|
|
|
|
if (pktmax != MAX_MSG_SIZE)
|
|
|
|
return -ENOMEM;
|
2021-06-28 14:37:44 +08:00
|
|
|
rc = tipc_msg_build(mhdr, m, offset, dsz,
|
|
|
|
one_page_mtu, list);
|
2017-11-30 23:47:25 +08:00
|
|
|
if (rc != dsz)
|
|
|
|
return rc;
|
|
|
|
if (tipc_msg_assemble(list))
|
|
|
|
return dsz;
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
return -ENOMEM;
|
2017-11-30 23:47:25 +08:00
|
|
|
}
|
2015-01-09 15:27:04 +08:00
|
|
|
skb_orphan(skb);
|
2014-11-26 11:41:55 +08:00
|
|
|
__skb_queue_tail(list, skb);
|
|
|
|
skb_copy_to_linear_data(skb, mhdr, mhsz);
|
|
|
|
pktpos = skb->data + mhsz;
|
2016-11-02 10:09:04 +08:00
|
|
|
if (copy_from_iter_full(pktpos, dsz, &m->msg_iter))
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
return dsz;
|
|
|
|
rc = -EFAULT;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Prepare reusable fragment header */
|
2015-02-05 21:36:36 +08:00
|
|
|
tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER,
|
|
|
|
FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr));
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
msg_set_size(&pkthdr, pktmax);
|
|
|
|
msg_set_fragm_no(&pkthdr, pktno);
|
tipc: clean up handling of message priorities
Messages transferred by TIPC are assigned an "importance priority", -an
integer value indicating how to treat the message when there is link or
destination socket congestion.
There is no separate header field for this value. Instead, the message
user values have been chosen in ascending order according to perceived
importance, so that the message user field can be used for this.
This is not a good solution. First, we have many more users than the
needed priority levels, so we end up with treating more priority
levels than necessary. Second, the user field cannot always
accurately reflect the priority of the message. E.g., a message
fragment packet should really have the priority of the enveloped
user data message, and not the priority of the MSG_FRAGMENTER user.
Until now, we have been working around this problem in different ways,
but it is now time to implement a consistent way of handling such
priorities, although still within the constraint that we cannot
allocate any more bits in the regular data message header for this.
In this commit, we define a new priority level, TIPC_SYSTEM_IMPORTANCE,
that will be the only one used apart from the four (lower) user data
levels. All non-data messages map down to this priority. Furthermore,
we take some free bits from the MSG_FRAGMENTER header and allocate
them to store the priority of the enveloped message. We then adjust
the functions msg_importance()/msg_set_importance() so that they
read/set the correct header fields depending on user type.
This small protocol change is fully compatible, because the code at
the receiving end of a link currently reads the importance level
only from user data messages, where there is no change.
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-14 04:08:11 +08:00
|
|
|
msg_set_importance(&pkthdr, msg_importance(mhdr));
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
|
|
|
|
/* Prepare first fragment */
|
2017-01-13 22:46:25 +08:00
|
|
|
skb = tipc_buf_acquire(pktmax, GFP_KERNEL);
|
2014-11-26 11:41:55 +08:00
|
|
|
if (!skb)
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
return -ENOMEM;
|
2015-01-09 15:27:04 +08:00
|
|
|
skb_orphan(skb);
|
2014-11-26 11:41:55 +08:00
|
|
|
__skb_queue_tail(list, skb);
|
|
|
|
pktpos = skb->data;
|
|
|
|
skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
pktpos += INT_H_SIZE;
|
|
|
|
pktrem -= INT_H_SIZE;
|
2014-11-26 11:41:55 +08:00
|
|
|
skb_copy_to_linear_data_offset(skb, INT_H_SIZE, mhdr, mhsz);
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
pktpos += mhsz;
|
|
|
|
pktrem -= mhsz;
|
|
|
|
|
|
|
|
do {
|
|
|
|
if (drem < pktrem)
|
|
|
|
pktrem = drem;
|
|
|
|
|
2016-11-02 10:09:04 +08:00
|
|
|
if (!copy_from_iter_full(pktpos, pktrem, &m->msg_iter)) {
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
rc = -EFAULT;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
drem -= pktrem;
|
|
|
|
|
|
|
|
if (!drem)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Prepare new fragment: */
|
|
|
|
if (drem < (pktmax - INT_H_SIZE))
|
|
|
|
pktsz = drem + INT_H_SIZE;
|
|
|
|
else
|
|
|
|
pktsz = pktmax;
|
2017-01-13 22:46:25 +08:00
|
|
|
skb = tipc_buf_acquire(pktsz, GFP_KERNEL);
|
2014-11-26 11:41:55 +08:00
|
|
|
if (!skb) {
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
rc = -ENOMEM;
|
|
|
|
goto error;
|
|
|
|
}
|
2015-01-09 15:27:04 +08:00
|
|
|
skb_orphan(skb);
|
2014-11-26 11:41:55 +08:00
|
|
|
__skb_queue_tail(list, skb);
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
msg_set_type(&pkthdr, FRAGMENT);
|
|
|
|
msg_set_size(&pkthdr, pktsz);
|
|
|
|
msg_set_fragm_no(&pkthdr, ++pktno);
|
2014-11-26 11:41:55 +08:00
|
|
|
skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
|
|
|
|
pktpos = skb->data + INT_H_SIZE;
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
pktrem = pktsz - INT_H_SIZE;
|
|
|
|
|
|
|
|
} while (1);
|
2014-11-26 11:41:55 +08:00
|
|
|
msg_set_type(buf_msg(skb), LAST_FRAGMENT);
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
return dsz;
|
|
|
|
error:
|
2014-11-26 11:41:55 +08:00
|
|
|
__skb_queue_purge(list);
|
|
|
|
__skb_queue_head_init(list);
|
tipc: introduce direct iovec to buffer chain fragmentation function
Fragmentation at message sending is currently performed in two
places in link.c, depending on whether data to be transmitted
is delivered in the form of an iovec or as a big sk_buff. Those
functions are also tightly entangled with the send functions
that are using them.
We now introduce a re-entrant, standalone function, tipc_msg_build2(),
that builds a packet chain directly from an iovec. Each fragment is
sized according to the MTU value given by the caller, and is prepended
with a correctly built fragment header, when needed. The function is
independent from who is calling and where the chain will be delivered,
as long as the caller is able to indicate a correct MTU.
The function is tested, but not called by anybody yet. Since it is
incompatible with the existing tipc_msg_build(), and we cannot yet
remove that function, we have given it a temporary name.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:34 +08:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
/**
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
* tipc_msg_bundle - Append contents of a buffer to tail of an existing one
|
|
|
|
* @bskb: the bundle buffer to append to
|
|
|
|
* @msg: message to be appended
|
|
|
|
* @max: max allowable size for the bundle buffer
|
|
|
|
*
|
2020-11-30 02:32:48 +08:00
|
|
|
* Return: "true" if bundling has been performed, otherwise "false"
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
*/
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
|
|
|
|
u32 max)
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
{
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
struct tipc_msg *bmsg = buf_msg(bskb);
|
|
|
|
u32 msz, bsz, offset, pad;
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
msz = msg_size(msg);
|
2015-03-14 04:08:10 +08:00
|
|
|
bsz = msg_size(bmsg);
|
2021-06-28 14:37:45 +08:00
|
|
|
offset = BUF_ALIGN(bsz);
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
pad = offset - bsz;
|
2015-03-14 04:08:10 +08:00
|
|
|
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
if (unlikely(skb_tailroom(bskb) < (pad + msz)))
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
return false;
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
if (unlikely(max < (offset + msz)))
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
return false;
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
|
|
|
|
skb_put(bskb, pad + msz);
|
|
|
|
skb_copy_to_linear_data_offset(bskb, offset, msg, msz);
|
|
|
|
msg_set_size(bmsg, offset + msz);
|
|
|
|
msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* tipc_msg_try_bundle - Try to bundle a new message to the last one
|
|
|
|
* @tskb: the last/target message to which the new one will be appended
|
|
|
|
* @skb: the new message skb pointer
|
|
|
|
* @mss: max message size (header inclusive)
|
|
|
|
* @dnode: destination node for the message
|
|
|
|
* @new_bundle: if this call made a new bundle or not
|
|
|
|
*
|
|
|
|
* Return: "true" if the new message skb is potential for bundling this time or
|
|
|
|
* later, in the case a bundling has been done this time, the skb is consumed
|
|
|
|
* (the skb pointer = NULL).
|
|
|
|
* Otherwise, "false" if the skb cannot be bundled at all.
|
|
|
|
*/
|
|
|
|
bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
|
|
|
|
u32 dnode, bool *new_bundle)
|
|
|
|
{
|
|
|
|
struct tipc_msg *msg, *inner, *outer;
|
|
|
|
u32 tsz;
|
|
|
|
|
|
|
|
/* First, check if the new buffer is suitable for bundling */
|
|
|
|
msg = buf_msg(*skb);
|
|
|
|
if (msg_user(msg) == MSG_FRAGMENTER)
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
return false;
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
if (msg_user(msg) == TUNNEL_PROTOCOL)
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
return false;
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
if (msg_user(msg) == BCAST_PROTOCOL)
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
return false;
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
if (mss <= INT_H_SIZE + msg_size(msg))
|
tipc: improve link congestion algorithm
The link congestion algorithm used until now implies two problems.
- It is too generous towards lower-level messages in situations of high
load by giving "absolute" bandwidth guarantees to the different
priority levels. LOW traffic is guaranteed 10%, MEDIUM is guaranted
20%, HIGH is guaranteed 30%, and CRITICAL is guaranteed 40% of the
available bandwidth. But, in the absence of higher level traffic, the
ratio between two distinct levels becomes unreasonable. E.g. if there
is only LOW and MEDIUM traffic on a system, the former is guaranteed
1/3 of the bandwidth, and the latter 2/3. This again means that if
there is e.g. one LOW user and 10 MEDIUM users, the former will have
33.3% of the bandwidth, and the others will have to compete for the
remainder, i.e. each will end up with 6.7% of the capacity.
- Packets of type MSG_BUNDLER are created at SYSTEM importance level,
but only after the packets bundled into it have passed the congestion
test for their own respective levels. Since bundled packets don't
result in incrementing the level counter for their own importance,
only occasionally for the SYSTEM level counter, they do in practice
obtain SYSTEM level importance. Hence, the current implementation
provides a gap in the congestion algorithm that in the worst case
may lead to a link reset.
We now refine the congestion algorithm as follows:
- A message is accepted to the link backlog only if its own level
counter, and all superior level counters, permit it.
- The importance of a created bundle packet is set according to its
contents. A bundle packet created from messges at levels LOW to
CRITICAL is given importance level CRITICAL, while a bundle created
from a SYSTEM level message is given importance SYSTEM. In the latter
case only subsequent SYSTEM level messages are allowed to be bundled
into it.
This solves the first problem described above, by making the bandwidth
guarantee relative to the total number of users at all levels; only
the upper limit for each level remains absolute. In the example
described above, the single LOW user would use 1/11th of the bandwidth,
the same as each of the ten MEDIUM users, but he still has the same
guarantee against starvation as the latter ones.
The fix also solves the second problem. If the CRITICAL level is filled
up by bundle packets of that level, no lower level packets will be
accepted any more.
Suggested-by: Gergely Kiss <gergely.kiss@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-14 22:46:17 +08:00
|
|
|
return false;
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
/* Ok, but the last/target buffer can be empty? */
|
|
|
|
if (unlikely(!tskb))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* Is it a bundle already? Try to bundle the new message to it */
|
|
|
|
if (msg_user(buf_msg(tskb)) == MSG_BUNDLER) {
|
|
|
|
*new_bundle = false;
|
|
|
|
goto bundle;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Make a new bundle of the two messages if possible */
|
|
|
|
tsz = msg_size(buf_msg(tskb));
|
2021-06-28 14:37:45 +08:00
|
|
|
if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg)))
|
tipc: improve message bundling algorithm
As mentioned in commit e95584a889e1 ("tipc: fix unlimited bundling of
small messages"), the current message bundling algorithm is inefficient
that can generate bundles of only one payload message, that causes
unnecessary overheads for both the sender and receiver.
This commit re-designs the 'tipc_msg_make_bundle()' function (now named
as 'tipc_msg_try_bundle()'), so that when a message comes at the first
place, we will just check & keep a reference to it if the message is
suitable for bundling. The message buffer will be put into the link
backlog queue and processed as normal. Later on, when another one comes
we will make a bundle with the first message if possible and so on...
This way, a bundle if really needed will always consist of at least two
payload messages. Otherwise, we let the first buffer go its way without
any need of bundling, so reduce the overheads to zero.
Moreover, since now we have both the messages in hand, we can even
optimize the 'tipc_msg_bundle()' function, make bundle of a very large
(size ~ MSS) and small messages which is not with the current algorithm
e.g. [1400-byte message] + [10-byte message] (MTU = 1500).
Acked-by: Ying Xue <ying.xue@windreiver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-01 10:58:57 +08:00
|
|
|
return true;
|
|
|
|
if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
|
|
|
|
GFP_ATOMIC)))
|
|
|
|
return true;
|
|
|
|
inner = buf_msg(tskb);
|
|
|
|
skb_push(tskb, INT_H_SIZE);
|
|
|
|
outer = buf_msg(tskb);
|
|
|
|
tipc_msg_init(msg_prevnode(inner), outer, MSG_BUNDLER, 0, INT_H_SIZE,
|
|
|
|
dnode);
|
|
|
|
msg_set_importance(outer, msg_importance(inner));
|
|
|
|
msg_set_size(outer, INT_H_SIZE + tsz);
|
|
|
|
msg_set_msgcnt(outer, 1);
|
|
|
|
*new_bundle = true;
|
|
|
|
|
|
|
|
bundle:
|
|
|
|
if (likely(tipc_msg_bundle(tskb, msg, mss))) {
|
|
|
|
consume_skb(*skb);
|
|
|
|
*skb = NULL;
|
|
|
|
}
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
/**
|
|
|
|
* tipc_msg_extract(): extract bundled inner packet from buffer
|
2015-03-14 04:08:08 +08:00
|
|
|
* @skb: buffer to be extracted from.
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
* @iskb: extracted inner buffer, to be returned
|
2015-03-14 04:08:08 +08:00
|
|
|
* @pos: position in outer message of msg to be extracted.
|
2020-11-30 02:32:50 +08:00
|
|
|
* Returns position of next msg.
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
* Consumes outer buffer when last packet extracted
|
2020-11-30 02:32:48 +08:00
|
|
|
* Return: true when there is an extracted buffer, otherwise false
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
*/
|
|
|
|
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
|
|
|
|
{
|
2018-06-29 04:25:04 +08:00
|
|
|
struct tipc_msg *hdr, *ihdr;
|
|
|
|
int imsz;
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
|
2015-03-14 04:08:08 +08:00
|
|
|
*iskb = NULL;
|
2015-03-14 04:08:07 +08:00
|
|
|
if (unlikely(skb_linearize(skb)))
|
2015-03-14 04:08:08 +08:00
|
|
|
goto none;
|
|
|
|
|
2018-06-29 04:25:04 +08:00
|
|
|
hdr = buf_msg(skb);
|
|
|
|
if (unlikely(*pos > (msg_data_sz(hdr) - MIN_H_SIZE)))
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
goto none;
|
|
|
|
|
2018-06-29 04:25:04 +08:00
|
|
|
ihdr = (struct tipc_msg *)(msg_data(hdr) + *pos);
|
|
|
|
imsz = msg_size(ihdr);
|
|
|
|
|
|
|
|
if ((*pos + imsz) > msg_data_sz(hdr))
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
goto none;
|
2018-06-29 04:25:04 +08:00
|
|
|
|
|
|
|
*iskb = tipc_buf_acquire(imsz, GFP_ATOMIC);
|
|
|
|
if (!*iskb)
|
|
|
|
goto none;
|
|
|
|
|
|
|
|
skb_copy_to_linear_data(*iskb, ihdr, imsz);
|
2017-11-16 04:23:56 +08:00
|
|
|
if (unlikely(!tipc_msg_validate(iskb)))
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
goto none;
|
2018-06-29 04:25:04 +08:00
|
|
|
|
2021-06-28 14:37:45 +08:00
|
|
|
*pos += BUF_ALIGN(imsz);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
return true;
|
|
|
|
none:
|
|
|
|
kfree_skb(skb);
|
2015-03-14 04:08:08 +08:00
|
|
|
kfree_skb(*iskb);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
*iskb = NULL;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-06-26 09:41:35 +08:00
|
|
|
/**
|
|
|
|
* tipc_msg_reverse(): swap source and destination addresses and add error code
|
2015-07-22 22:11:18 +08:00
|
|
|
* @own_node: originating node id for reversed message
|
2018-09-29 02:23:18 +08:00
|
|
|
* @skb: buffer containing message to be reversed; will be consumed
|
2015-07-22 22:11:18 +08:00
|
|
|
* @err: error code to be set in message, if any
|
2018-09-29 02:23:18 +08:00
|
|
|
* Replaces consumed buffer with new one when successful
|
2020-11-30 02:32:48 +08:00
|
|
|
* Return: true if success, otherwise false
|
2014-06-26 09:41:35 +08:00
|
|
|
*/
|
tipc: introduce new tipc_sk_respond() function
Currently, we use the code sequence
if (msg_reverse())
tipc_link_xmit_skb()
at numerous locations in socket.c. The preparation of arguments
for these calls, as well as the sequence itself, makes the code
unecessarily complex.
In this commit, we introduce a new function, tipc_sk_respond(),
that performs this call combination. We also replace some, but not
yet all, of these explicit call sequences with calls to the new
function. Notably, we let the function tipc_sk_proto_rcv() use
the new function to directly send out PROBE_REPLY messages,
instead of deferring this to the calling tipc_sk_rcv() function,
as we do now.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-22 22:11:19 +08:00
|
|
|
bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
|
2014-06-26 09:41:35 +08:00
|
|
|
{
|
2015-07-22 22:11:18 +08:00
|
|
|
struct sk_buff *_skb = *skb;
|
2018-09-29 02:23:18 +08:00
|
|
|
struct tipc_msg *_hdr, *hdr;
|
|
|
|
int hlen, dlen;
|
2014-06-26 09:41:35 +08:00
|
|
|
|
2015-07-22 22:11:18 +08:00
|
|
|
if (skb_linearize(_skb))
|
2014-06-26 09:41:35 +08:00
|
|
|
goto exit;
|
2018-09-29 02:23:18 +08:00
|
|
|
_hdr = buf_msg(_skb);
|
|
|
|
dlen = min_t(uint, msg_data_sz(_hdr), MAX_FORWARD_SIZE);
|
|
|
|
hlen = msg_hdr_sz(_hdr);
|
|
|
|
|
|
|
|
if (msg_dest_droppable(_hdr))
|
2014-06-26 09:41:41 +08:00
|
|
|
goto exit;
|
2018-09-29 02:23:18 +08:00
|
|
|
if (msg_errcode(_hdr))
|
2014-06-26 09:41:35 +08:00
|
|
|
goto exit;
|
2015-07-22 22:11:18 +08:00
|
|
|
|
2018-09-29 02:23:18 +08:00
|
|
|
/* Never return SHORT header */
|
|
|
|
if (hlen == SHORT_H_SIZE)
|
|
|
|
hlen = BASIC_H_SIZE;
|
2015-07-22 22:11:18 +08:00
|
|
|
|
2018-09-29 02:23:22 +08:00
|
|
|
/* Don't return data along with SYN+, - sender has a clone */
|
|
|
|
if (msg_is_syn(_hdr) && err == TIPC_ERR_OVERLOAD)
|
|
|
|
dlen = 0;
|
|
|
|
|
2018-09-29 02:23:18 +08:00
|
|
|
/* Allocate new buffer to return */
|
|
|
|
*skb = tipc_buf_acquire(hlen + dlen, GFP_ATOMIC);
|
|
|
|
if (!*skb)
|
|
|
|
goto exit;
|
|
|
|
memcpy((*skb)->data, _skb->data, msg_hdr_sz(_hdr));
|
|
|
|
memcpy((*skb)->data + hlen, msg_data(_hdr), dlen);
|
|
|
|
|
|
|
|
/* Build reverse header in new buffer */
|
|
|
|
hdr = buf_msg(*skb);
|
|
|
|
msg_set_hdr_sz(hdr, hlen);
|
2015-07-22 22:11:18 +08:00
|
|
|
msg_set_errcode(hdr, err);
|
2017-08-15 00:28:49 +08:00
|
|
|
msg_set_non_seq(hdr, 0);
|
2018-09-29 02:23:18 +08:00
|
|
|
msg_set_origport(hdr, msg_destport(_hdr));
|
|
|
|
msg_set_destport(hdr, msg_origport(_hdr));
|
|
|
|
msg_set_destnode(hdr, msg_prevnode(_hdr));
|
2015-07-22 22:11:18 +08:00
|
|
|
msg_set_prevnode(hdr, own_node);
|
|
|
|
msg_set_orignode(hdr, own_node);
|
2018-09-29 02:23:18 +08:00
|
|
|
msg_set_size(hdr, hlen + dlen);
|
2015-07-22 22:11:18 +08:00
|
|
|
skb_orphan(_skb);
|
2018-09-29 02:23:18 +08:00
|
|
|
kfree_skb(_skb);
|
2014-06-26 09:41:35 +08:00
|
|
|
return true;
|
|
|
|
exit:
|
2015-07-22 22:11:18 +08:00
|
|
|
kfree_skb(_skb);
|
|
|
|
*skb = NULL;
|
2014-06-26 09:41:35 +08:00
|
|
|
return false;
|
|
|
|
}
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
|
2018-09-29 02:23:22 +08:00
|
|
|
bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb, *_skb;
|
|
|
|
|
|
|
|
skb_queue_walk(msg, skb) {
|
|
|
|
_skb = skb_clone(skb, GFP_ATOMIC);
|
|
|
|
if (!_skb) {
|
|
|
|
__skb_queue_purge(cpy);
|
|
|
|
pr_err_ratelimited("Failed to clone buffer chain\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
__skb_queue_tail(cpy, _skb);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
/**
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
* tipc_msg_lookup_dest(): try to find new destination for named message
|
2020-11-30 02:32:50 +08:00
|
|
|
* @net: pointer to associated network namespace
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
* @skb: the buffer containing the message.
|
2015-07-22 22:11:20 +08:00
|
|
|
* @err: error code to be used by caller if lookup fails
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
* Does not consume buffer
|
2020-11-30 02:32:48 +08:00
|
|
|
* Return: true if a destination is found, false otherwise
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
*/
|
2015-07-22 22:11:20 +08:00
|
|
|
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
{
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
struct tipc_msg *msg = buf_msg(skb);
|
2021-03-17 10:06:15 +08:00
|
|
|
u32 scope = msg_lookup_scope(msg);
|
|
|
|
u32 self = tipc_own_addr(net);
|
|
|
|
u32 inst = msg_nameinst(msg);
|
|
|
|
struct tipc_socket_addr sk;
|
|
|
|
struct tipc_uaddr ua;
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
if (!msg_isdata(msg))
|
|
|
|
return false;
|
|
|
|
if (!msg_named(msg))
|
|
|
|
return false;
|
tipc: fix two bugs in secondary destination lookup
A message sent to a node after a successful name table lookup may still
find that the destination socket has disappeared, because distribution
of name table updates is non-atomic. If so, the message will be rejected
back to the sender with error code TIPC_ERR_NO_PORT. If the source
socket of the message has disappeared in the meantime, the message
should be dropped.
However, in the currrent code, the message will instead be subject to an
unwanted tertiary lookup, because the function tipc_msg_lookup_dest()
doesn't check if there is an error code present in the message before
performing the lookup. In the worst case, the message may now find the
old destination again, and be redirected once more, instead of being
dropped directly as it should be.
A second bug in this function is that the "prev_node" field in the message
is not updated after successful lookup, something that may have
unpredictable consequences.
The problems arising from those bugs occur very infrequently.
The third change in this function; the test on msg_reroute_msg_cnt() is
purely cosmetic, reflecting that the returned value never can be negative.
This commit corrects the two bugs described above.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-27 22:19:19 +08:00
|
|
|
if (msg_errcode(msg))
|
|
|
|
return false;
|
2017-09-29 16:02:54 +08:00
|
|
|
*err = TIPC_ERR_NO_NAME;
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
if (skb_linearize(skb))
|
|
|
|
return false;
|
2015-09-18 16:46:31 +08:00
|
|
|
msg = buf_msg(skb);
|
tipc: fix two bugs in secondary destination lookup
A message sent to a node after a successful name table lookup may still
find that the destination socket has disappeared, because distribution
of name table updates is non-atomic. If so, the message will be rejected
back to the sender with error code TIPC_ERR_NO_PORT. If the source
socket of the message has disappeared in the meantime, the message
should be dropped.
However, in the currrent code, the message will instead be subject to an
unwanted tertiary lookup, because the function tipc_msg_lookup_dest()
doesn't check if there is an error code present in the message before
performing the lookup. In the worst case, the message may now find the
old destination again, and be redirected once more, instead of being
dropped directly as it should be.
A second bug in this function is that the "prev_node" field in the message
is not updated after successful lookup, something that may have
unpredictable consequences.
The problems arising from those bugs occur very infrequently.
The third change in this function; the test on msg_reroute_msg_cnt() is
purely cosmetic, reflecting that the returned value never can be negative.
This commit corrects the two bugs described above.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-27 22:19:19 +08:00
|
|
|
if (msg_reroute_cnt(msg))
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
return false;
|
2021-03-17 10:06:15 +08:00
|
|
|
tipc_uaddr(&ua, TIPC_SERVICE_RANGE, scope,
|
|
|
|
msg_nametype(msg), inst, inst);
|
|
|
|
sk.node = tipc_scope2node(net, scope);
|
|
|
|
if (!tipc_nametbl_lookup_anycast(net, &ua, &sk))
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
return false;
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
msg_incr_reroute_cnt(msg);
|
2021-03-17 10:06:15 +08:00
|
|
|
if (sk.node != self)
|
|
|
|
msg_set_prevnode(msg, self);
|
|
|
|
msg_set_destnode(msg, sk.node);
|
|
|
|
msg_set_destport(msg, sk.ref);
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
*err = TIPC_OK;
|
2017-10-07 21:07:20 +08:00
|
|
|
|
tipc: split up function tipc_msg_eval()
The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.
This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:39 +08:00
|
|
|
return true;
|
tipc: introduce message evaluation function
When a message arrives in a node and finds no destination
socket, we may need to drop it, reject it, or forward it after
a secondary destination lookup. The latter two cases currently
results in a code path that is perceived as complex, because it
follows a deep call chain via obscure functions such as
net_route_named_msg() and net_route_msg().
We now introduce a function, tipc_msg_eval(), that takes the
decision about whether such a message should be rejected or
forwarded, but leaves it to the caller to actually perform
the indicated action.
If the decision is 'reject', it is still the task of the recently
introduced function tipc_msg_reverse() to take the final decision
about whether the message is rejectable or not. In the latter case
it drops the message.
As a result of this change, we can finally eliminate the function
net_route_named_msg(), and hence become independent of net_route_msg().
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:36 +08:00
|
|
|
}
|
2014-07-17 08:41:00 +08:00
|
|
|
|
2017-11-30 23:47:25 +08:00
|
|
|
/* tipc_msg_assemble() - assemble chain of fragments into one message
|
|
|
|
*/
|
|
|
|
bool tipc_msg_assemble(struct sk_buff_head *list)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb, *tmp = NULL;
|
|
|
|
|
|
|
|
if (skb_queue_len(list) == 1)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
while ((skb = __skb_dequeue(list))) {
|
|
|
|
skb->next = NULL;
|
|
|
|
if (tipc_buf_append(&tmp, &skb)) {
|
|
|
|
__skb_queue_tail(list, skb);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (!tmp)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
__skb_queue_purge(list);
|
|
|
|
__skb_queue_head_init(list);
|
|
|
|
pr_warn("Failed do assemble buffer\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-07-17 08:41:00 +08:00
|
|
|
/* tipc_msg_reassemble() - clone a buffer chain of fragments and
|
|
|
|
* reassemble the clones into one message
|
|
|
|
*/
|
2015-10-22 20:51:39 +08:00
|
|
|
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq)
|
2014-07-17 08:41:00 +08:00
|
|
|
{
|
2015-10-22 20:51:39 +08:00
|
|
|
struct sk_buff *skb, *_skb;
|
2014-11-26 11:41:55 +08:00
|
|
|
struct sk_buff *frag = NULL;
|
2014-07-17 08:41:00 +08:00
|
|
|
struct sk_buff *head = NULL;
|
2015-10-22 20:51:39 +08:00
|
|
|
int hdr_len;
|
2014-07-17 08:41:00 +08:00
|
|
|
|
|
|
|
/* Copy header if single buffer */
|
2014-11-26 11:41:55 +08:00
|
|
|
if (skb_queue_len(list) == 1) {
|
|
|
|
skb = skb_peek(list);
|
2015-10-22 20:51:39 +08:00
|
|
|
hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
|
|
|
|
_skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC);
|
|
|
|
if (!_skb)
|
|
|
|
return false;
|
|
|
|
__skb_queue_tail(rcvq, _skb);
|
|
|
|
return true;
|
2014-07-17 08:41:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Clone all fragments and reassemble */
|
2014-11-26 11:41:55 +08:00
|
|
|
skb_queue_walk(list, skb) {
|
|
|
|
frag = skb_clone(skb, GFP_ATOMIC);
|
2014-07-17 08:41:00 +08:00
|
|
|
if (!frag)
|
|
|
|
goto error;
|
|
|
|
frag->next = NULL;
|
|
|
|
if (tipc_buf_append(&head, &frag))
|
|
|
|
break;
|
|
|
|
if (!head)
|
|
|
|
goto error;
|
|
|
|
}
|
2015-10-22 20:51:39 +08:00
|
|
|
__skb_queue_tail(rcvq, frag);
|
|
|
|
return true;
|
2014-07-17 08:41:00 +08:00
|
|
|
error:
|
|
|
|
pr_warn("Failed do clone local mcast rcv buffer\n");
|
|
|
|
kfree_skb(head);
|
2015-10-22 20:51:39 +08:00
|
|
|
return false;
|
2014-07-17 08:41:00 +08:00
|
|
|
}
|
2015-10-16 02:52:43 +08:00
|
|
|
|
2017-01-19 02:50:52 +08:00
|
|
|
bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
|
|
|
|
struct sk_buff_head *cpy)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb, *_skb;
|
|
|
|
|
|
|
|
skb_queue_walk(msg, skb) {
|
|
|
|
_skb = pskb_copy(skb, GFP_ATOMIC);
|
|
|
|
if (!_skb) {
|
|
|
|
__skb_queue_purge(cpy);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
msg_set_destnode(buf_msg(_skb), dst);
|
|
|
|
__skb_queue_tail(cpy, _skb);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-10-16 02:52:43 +08:00
|
|
|
/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
|
|
|
|
* @list: list to be appended to
|
|
|
|
* @seqno: sequence number of buffer to add
|
|
|
|
* @skb: buffer to add
|
|
|
|
*/
|
2020-05-26 17:38:37 +08:00
|
|
|
bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
|
2015-10-16 02:52:43 +08:00
|
|
|
struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct sk_buff *_skb, *tmp;
|
|
|
|
|
|
|
|
if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
|
|
|
|
__skb_queue_head(list, skb);
|
2020-05-26 17:38:37 +08:00
|
|
|
return true;
|
2015-10-16 02:52:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
|
|
|
|
__skb_queue_tail(list, skb);
|
2020-05-26 17:38:37 +08:00
|
|
|
return true;
|
2015-10-16 02:52:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
skb_queue_walk_safe(list, _skb, tmp) {
|
|
|
|
if (more(seqno, buf_seqno(_skb)))
|
|
|
|
continue;
|
|
|
|
if (seqno == buf_seqno(_skb))
|
|
|
|
break;
|
|
|
|
__skb_queue_before(list, _skb, skb);
|
2020-05-26 17:38:37 +08:00
|
|
|
return true;
|
2015-10-16 02:52:43 +08:00
|
|
|
}
|
|
|
|
kfree_skb(skb);
|
2020-05-26 17:38:37 +08:00
|
|
|
return false;
|
2015-10-16 02:52:43 +08:00
|
|
|
}
|
2017-10-13 17:04:20 +08:00
|
|
|
|
|
|
|
void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
if (tipc_msg_reverse(tipc_own_addr(net), &skb, err))
|
|
|
|
__skb_queue_tail(xmitq, skb);
|
|
|
|
}
|