2006-01-03 02:04:38 +08:00
|
|
|
/*
|
|
|
|
* net/tipc/link.h: Include file for TIPC link code
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2014-08-23 06:09:07 +08:00
|
|
|
* Copyright (c) 1995-2006, 2013-2014, Ericsson AB
|
2011-01-08 00:43:40 +08:00
|
|
|
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
|
2006-01-03 02:04:38 +08:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-03 02:04:38 +08:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-03 02:04:38 +08:00
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-03 02:04:38 +08:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _TIPC_LINK_H
|
|
|
|
#define _TIPC_LINK_H
|
|
|
|
|
2014-11-20 17:29:07 +08:00
|
|
|
#include <net/genetlink.h>
|
2006-01-03 02:04:38 +08:00
|
|
|
#include "msg.h"
|
|
|
|
#include "node.h"
|
|
|
|
|
2015-01-09 15:27:01 +08:00
|
|
|
/* TIPC-specific error codes
|
|
|
|
*/
|
|
|
|
#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */
|
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
/* Link FSM events:
|
2014-02-14 06:29:08 +08:00
|
|
|
*/
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:25 +08:00
|
|
|
enum {
|
2015-07-31 06:24:21 +08:00
|
|
|
LINK_ESTABLISH_EVT = 0xec1ab1e,
|
|
|
|
LINK_PEER_RESET_EVT = 0x9eed0e,
|
|
|
|
LINK_FAILURE_EVT = 0xfa110e,
|
|
|
|
LINK_RESET_EVT = 0x10ca1d0e,
|
|
|
|
LINK_FAILOVER_BEGIN_EVT = 0xfa110bee,
|
|
|
|
LINK_FAILOVER_END_EVT = 0xfa110ede,
|
|
|
|
LINK_SYNCH_BEGIN_EVT = 0xc1ccbee,
|
|
|
|
LINK_SYNCH_END_EVT = 0xc1ccede
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:25 +08:00
|
|
|
};
|
2014-02-14 06:29:08 +08:00
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
/* Events returned from link at packet reception or at timeout
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
TIPC_LINK_UP_EVT = 1,
|
2015-10-22 20:51:41 +08:00
|
|
|
TIPC_LINK_DOWN_EVT = (1 << 1),
|
2016-09-02 01:52:49 +08:00
|
|
|
TIPC_LINK_SND_STATE = (1 << 2)
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
};
|
|
|
|
|
2014-02-14 06:29:08 +08:00
|
|
|
/* Starting value for maximum packet size negotiation on unicast links
|
2006-01-03 02:04:38 +08:00
|
|
|
* (unless bearer MTU is less)
|
|
|
|
*/
|
|
|
|
#define MAX_PKT_DEFAULT 1500
|
|
|
|
|
2015-10-22 20:51:46 +08:00
|
|
|
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
|
2015-10-22 20:51:36 +08:00
|
|
|
int tolerance, char net_plane, u32 mtu, int priority,
|
tipc: introduce variable window congestion control
We introduce a simple variable window congestion control for links.
The algorithm is inspired by the Reno algorithm, covering both 'slow
start', 'congestion avoidance', and 'fast recovery' modes.
- We introduce hard lower and upper window limits per link, still
different and configurable per bearer type.
- We introduce a 'slow start theshold' variable, initially set to
the maximum window size.
- We let a link start at the minimum congestion window, i.e. in slow
start mode, and then let is grow rapidly (+1 per rceived ACK) until
it reaches the slow start threshold and enters congestion avoidance
mode.
- In congestion avoidance mode we increment the congestion window for
each window-size number of acked packets, up to a possible maximum
equal to the configured maximum window.
- For each non-duplicate NACK received, we drop back to fast recovery
mode, by setting the both the slow start threshold to and the
congestion window to (current_congestion_window / 2).
- If the timeout handler finds that the transmit queue has not moved
since the previous timeout, it drops the link back to slow start
and forces a probe containing the last sent sequence number to the
sent to the peer, so that this can discover the stale situation.
This change does in reality have effect only on unicast ethernet
transport, as we have seen that there is no room whatsoever for
increasing the window max size for the UDP bearer.
For now, we also choose to keep the limits for the broadcast link
unchanged and equal.
This algorithm seems to give a 50-100% throughput improvement for
messages larger than MTU.
Suggested-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-10 07:52:46 +08:00
|
|
|
u32 min_win, u32 max_win, u32 session, u32 ownnode,
|
tipc: handle collisions of 32-bit node address hash values
When a 32-bit node address is generated from a 128-bit identifier,
there is a risk of collisions which must be discovered and handled.
We do this as follows:
- We don't apply the generated address immediately to the node, but do
instead initiate a 1 sec trial period to allow other cluster members
to discover and handle such collisions.
- During the trial period the node periodically sends out a new type
of message, DSC_TRIAL_MSG, using broadcast or emulated broadcast,
to all the other nodes in the cluster.
- When a node is receiving such a message, it must check that the
presented 32-bit identifier either is unused, or was used by the very
same peer in a previous session. In both cases it accepts the request
by not responding to it.
- If it finds that the same node has been up before using a different
address, it responds with a DSC_TRIAL_FAIL_MSG containing that
address.
- If it finds that the address has already been taken by some other
node, it generates a new, unused address and returns it to the
requester.
- During the trial period the requesting node must always be prepared
to accept a failure message, i.e., a message where a peer suggests a
different (or equal) address to the one tried. In those cases it
must apply the suggested value as trial address and restart the trial
period.
This algorithm ensures that in the vast majority of cases a node will
have the same address before and after a reboot. If a legacy user
configures the address explicitly, there will be no trial period and
messages, so this protocol addition is completely backwards compatible.
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-23 03:42:51 +08:00
|
|
|
u32 peer, u8 *peer_id, u16 peer_caps,
|
2015-10-22 20:51:41 +08:00
|
|
|
struct tipc_link *bc_sndlink,
|
|
|
|
struct tipc_link *bc_rcvlink,
|
|
|
|
struct sk_buff_head *inputq,
|
|
|
|
struct sk_buff_head *namedq,
|
2015-07-31 06:24:26 +08:00
|
|
|
struct tipc_link **link);
|
2015-10-22 20:51:46 +08:00
|
|
|
bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
|
tipc: introduce variable window congestion control
We introduce a simple variable window congestion control for links.
The algorithm is inspired by the Reno algorithm, covering both 'slow
start', 'congestion avoidance', and 'fast recovery' modes.
- We introduce hard lower and upper window limits per link, still
different and configurable per bearer type.
- We introduce a 'slow start theshold' variable, initially set to
the maximum window size.
- We let a link start at the minimum congestion window, i.e. in slow
start mode, and then let is grow rapidly (+1 per rceived ACK) until
it reaches the slow start threshold and enters congestion avoidance
mode.
- In congestion avoidance mode we increment the congestion window for
each window-size number of acked packets, up to a possible maximum
equal to the configured maximum window.
- For each non-duplicate NACK received, we drop back to fast recovery
mode, by setting the both the slow start threshold to and the
congestion window to (current_congestion_window / 2).
- If the timeout handler finds that the transmit queue has not moved
since the previous timeout, it drops the link back to slow start
and forces a probe containing the last sent sequence number to the
sent to the peer, so that this can discover the stale situation.
This change does in reality have effect only on unicast ethernet
transport, as we have seen that there is no room whatsoever for
increasing the window max size for the UDP bearer.
For now, we also choose to keep the limits for the broadcast link
unchanged and equal.
This algorithm seems to give a 50-100% throughput improvement for
messages larger than MTU.
Suggested-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-10 07:52:46 +08:00
|
|
|
int mtu, u32 min_win, u32 max_win, u16 peer_caps,
|
2015-10-22 20:51:41 +08:00
|
|
|
struct sk_buff_head *inputq,
|
2015-10-22 20:51:37 +08:00
|
|
|
struct sk_buff_head *namedq,
|
2015-10-22 20:51:41 +08:00
|
|
|
struct tipc_link *bc_sndlink,
|
2015-10-22 20:51:37 +08:00
|
|
|
struct tipc_link **link);
|
2015-07-31 06:24:19 +08:00
|
|
|
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
|
|
|
|
int mtyp, struct sk_buff_head *xmitq);
|
2018-09-27 03:00:54 +08:00
|
|
|
void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl,
|
|
|
|
struct sk_buff_head *xmitq);
|
tipc: fix missing Name entries due to half-failover
TIPC link can temporarily fall into "half-establish" that only one of
the link endpoints is ESTABLISHED and starts to send traffic, PROTOCOL
messages, whereas the other link endpoint is not up (e.g. immediately
when the endpoint receives ACTIVATE_MSG, the network interface goes
down...).
This is a normal situation and will be settled because the link
endpoint will be eventually brought down after the link tolerance time.
However, the situation will become worse when the second link is
established before the first link endpoint goes down,
For example:
1. Both links <1A-2A>, <1B-2B> down
2. Link endpoint 2A up, but 1A still down (e.g. due to network
disturbance, wrong session, etc.)
3. Link <1B-2B> up
4. Link endpoint 2A down (e.g. due to link tolerance timeout)
5. Node B starts failover onto link <1B-2B>
==> Node A does never start link failover.
When the "half-failover" situation happens, two consequences have been
observed:
a) Peer link/node gets stuck in FAILINGOVER state;
b) Traffic or user messages that peer node is trying to failover onto
the second link can be partially or completely dropped by this node.
The consequence a) was actually solved by commit c140eb166d68 ("tipc:
fix failover problem"), but that commit didn't cover the b). It's due
to the fact that the tunnel link endpoint has never been prepared for a
failover, so the 'l->drop_point' (and the other data...) is not set
correctly. When a TUNNEL_MSG from peer node arrives on the link,
depending on the inner message's seqno and the current 'l->drop_point'
value, the message can be dropped (- treated as a duplicate message) or
processed.
At this early stage, the traffic messages from peer are likely to be
NAME_DISTRIBUTORs, this means some name table entries will be missed on
the node forever!
The commit resolves the issue by starting the FAILOVER process on this
node as well. Another benefit from this solution is that we ensure the
link will not be re-established until the failover ends.
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 18:23:23 +08:00
|
|
|
void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
|
|
|
|
struct sk_buff_head *xmitq);
|
2015-10-16 02:52:45 +08:00
|
|
|
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
|
2015-07-31 06:24:21 +08:00
|
|
|
int tipc_link_fsm_evt(struct tipc_link *l, int evt);
|
|
|
|
bool tipc_link_is_up(struct tipc_link *l);
|
2015-10-16 02:52:46 +08:00
|
|
|
bool tipc_link_peer_is_down(struct tipc_link *l);
|
2015-07-31 06:24:21 +08:00
|
|
|
bool tipc_link_is_reset(struct tipc_link *l);
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
bool tipc_link_is_establishing(struct tipc_link *l);
|
2015-07-31 06:24:21 +08:00
|
|
|
bool tipc_link_is_synching(struct tipc_link *l);
|
|
|
|
bool tipc_link_is_failingover(struct tipc_link *l);
|
|
|
|
bool tipc_link_is_blocked(struct tipc_link *l);
|
2015-10-22 20:51:46 +08:00
|
|
|
void tipc_link_set_active(struct tipc_link *l, bool active);
|
2015-11-20 03:30:47 +08:00
|
|
|
void tipc_link_reset(struct tipc_link *l);
|
2015-11-20 03:30:46 +08:00
|
|
|
void tipc_link_reset_stats(struct tipc_link *l);
|
|
|
|
int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
|
2015-07-17 04:54:24 +08:00
|
|
|
struct sk_buff_head *xmitq);
|
2015-11-20 03:30:46 +08:00
|
|
|
struct sk_buff_head *tipc_link_inputq(struct tipc_link *l);
|
|
|
|
u16 tipc_link_rcv_nxt(struct tipc_link *l);
|
|
|
|
u16 tipc_link_acked(struct tipc_link *l);
|
|
|
|
u32 tipc_link_id(struct tipc_link *l);
|
|
|
|
char *tipc_link_name(struct tipc_link *l);
|
tipc: enable tracepoints in tipc
As for the sake of debugging/tracing, the commit enables tracepoints in
TIPC along with some general trace_events as shown below. It also
defines some 'tipc_*_dump()' functions that allow to dump TIPC object
data whenever needed, that is, for general debug purposes, ie. not just
for the trace_events.
The following trace_events are now available:
- trace_tipc_skb_dump(): allows to trace and dump TIPC msg & skb data,
e.g. message type, user, droppable, skb truesize, cloned skb, etc.
- trace_tipc_list_dump(): allows to trace and dump any TIPC buffers or
queues, e.g. TIPC link transmq, socket receive queue, etc.
- trace_tipc_sk_dump(): allows to trace and dump TIPC socket data, e.g.
sk state, sk type, connection type, rmem_alloc, socket queues, etc.
- trace_tipc_link_dump(): allows to trace and dump TIPC link data, e.g.
link state, silent_intv_cnt, gap, bc_gap, link queues, etc.
- trace_tipc_node_dump(): allows to trace and dump TIPC node data, e.g.
node state, active links, capabilities, link entries, etc.
How to use:
Put the trace functions at any places where we want to dump TIPC data
or events.
Note:
a) The dump functions will generate raw data only, that is, to offload
the trace event's processing, it can require a tool or script to parse
the data but this should be simple.
b) The trace_tipc_*_dump() should be reserved for a failure cases only
(e.g. the retransmission failure case) or where we do not expect to
happen too often, then we can consider enabling these events by default
since they will almost not take any effects under normal conditions,
but once the rare condition or failure occurs, we get the dumped data
fully for post-analysis.
For other trace purposes, we can reuse these trace classes as template
but different events.
c) A trace_event is only effective when we enable it. To enable the
TIPC trace_events, echo 1 to 'enable' files in the events/tipc/
directory in the 'debugfs' file system. Normally, they are located at:
/sys/kernel/debug/tracing/events/tipc/
For example:
To enable the tipc_link_dump event:
echo 1 > /sys/kernel/debug/tracing/events/tipc/tipc_link_dump/enable
To enable all the TIPC trace_events:
echo 1 > /sys/kernel/debug/tracing/events/tipc/enable
To collect the trace data:
cat trace
or
cat trace_pipe > /trace.out &
To disable all the TIPC trace_events:
echo 0 > /sys/kernel/debug/tracing/events/tipc/enable
To clear the trace buffer:
echo > trace
d) Like the other trace_events, the feature like 'filter' or 'trigger'
is also usable for the tipc trace_events.
For more details, have a look at:
Documentation/trace/ftrace.txt
MAINTAINERS | add two new files 'trace.h' & 'trace.c' in tipc
Acked-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-12-19 10:17:56 +08:00
|
|
|
char *tipc_link_name_ext(struct tipc_link *l, char *buf);
|
2018-09-27 03:00:54 +08:00
|
|
|
u32 tipc_link_state(struct tipc_link *l);
|
2015-11-20 03:30:46 +08:00
|
|
|
char tipc_link_plane(struct tipc_link *l);
|
|
|
|
int tipc_link_prio(struct tipc_link *l);
|
tipc: introduce variable window congestion control
We introduce a simple variable window congestion control for links.
The algorithm is inspired by the Reno algorithm, covering both 'slow
start', 'congestion avoidance', and 'fast recovery' modes.
- We introduce hard lower and upper window limits per link, still
different and configurable per bearer type.
- We introduce a 'slow start theshold' variable, initially set to
the maximum window size.
- We let a link start at the minimum congestion window, i.e. in slow
start mode, and then let is grow rapidly (+1 per rceived ACK) until
it reaches the slow start threshold and enters congestion avoidance
mode.
- In congestion avoidance mode we increment the congestion window for
each window-size number of acked packets, up to a possible maximum
equal to the configured maximum window.
- For each non-duplicate NACK received, we drop back to fast recovery
mode, by setting the both the slow start threshold to and the
congestion window to (current_congestion_window / 2).
- If the timeout handler finds that the transmit queue has not moved
since the previous timeout, it drops the link back to slow start
and forces a probe containing the last sent sequence number to the
sent to the peer, so that this can discover the stale situation.
This change does in reality have effect only on unicast ethernet
transport, as we have seen that there is no room whatsoever for
increasing the window max size for the UDP bearer.
For now, we also choose to keep the limits for the broadcast link
unchanged and equal.
This algorithm seems to give a 50-100% throughput improvement for
messages larger than MTU.
Suggested-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-10 07:52:46 +08:00
|
|
|
int tipc_link_min_win(struct tipc_link *l);
|
|
|
|
int tipc_link_max_win(struct tipc_link *l);
|
2018-07-10 07:07:35 +08:00
|
|
|
void tipc_link_update_caps(struct tipc_link *l, u16 capabilities);
|
2018-07-10 07:07:36 +08:00
|
|
|
bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr);
|
2015-11-20 03:30:46 +08:00
|
|
|
unsigned long tipc_link_tolerance(struct tipc_link *l);
|
2016-02-01 15:19:56 +08:00
|
|
|
void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
|
|
|
|
struct sk_buff_head *xmitq);
|
|
|
|
void tipc_link_set_prio(struct tipc_link *l, u32 prio,
|
|
|
|
struct sk_buff_head *xmitq);
|
2015-11-20 03:30:46 +08:00
|
|
|
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit);
|
tipc: introduce variable window congestion control
We introduce a simple variable window congestion control for links.
The algorithm is inspired by the Reno algorithm, covering both 'slow
start', 'congestion avoidance', and 'fast recovery' modes.
- We introduce hard lower and upper window limits per link, still
different and configurable per bearer type.
- We introduce a 'slow start theshold' variable, initially set to
the maximum window size.
- We let a link start at the minimum congestion window, i.e. in slow
start mode, and then let is grow rapidly (+1 per rceived ACK) until
it reaches the slow start threshold and enters congestion avoidance
mode.
- In congestion avoidance mode we increment the congestion window for
each window-size number of acked packets, up to a possible maximum
equal to the configured maximum window.
- For each non-duplicate NACK received, we drop back to fast recovery
mode, by setting the both the slow start threshold to and the
congestion window to (current_congestion_window / 2).
- If the timeout handler finds that the transmit queue has not moved
since the previous timeout, it drops the link back to slow start
and forces a probe containing the last sent sequence number to the
sent to the peer, so that this can discover the stale situation.
This change does in reality have effect only on unicast ethernet
transport, as we have seen that there is no room whatsoever for
increasing the window max size for the UDP bearer.
For now, we also choose to keep the limits for the broadcast link
unchanged and equal.
This algorithm seems to give a 50-100% throughput improvement for
messages larger than MTU.
Suggested-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-10 07:52:46 +08:00
|
|
|
void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win);
|
2015-11-20 03:30:45 +08:00
|
|
|
int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
|
|
|
|
struct tipc_link *link, int nlflags);
|
2014-11-20 17:29:07 +08:00
|
|
|
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
|
2015-07-17 04:54:28 +08:00
|
|
|
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *xmitq);
|
2016-04-16 01:33:07 +08:00
|
|
|
int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
|
2015-10-22 20:51:41 +08:00
|
|
|
void tipc_link_add_bc_peer(struct tipc_link *snd_l,
|
|
|
|
struct tipc_link *uc_l,
|
|
|
|
struct sk_buff_head *xmitq);
|
|
|
|
void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
|
|
|
|
struct tipc_link *rcv_l,
|
|
|
|
struct sk_buff_head *xmitq);
|
2015-10-22 20:51:39 +08:00
|
|
|
int tipc_link_bc_peers(struct tipc_link *l);
|
2015-10-22 20:51:43 +08:00
|
|
|
void tipc_link_set_mtu(struct tipc_link *l, int mtu);
|
|
|
|
int tipc_link_mtu(struct tipc_link *l);
|
2019-11-08 13:05:11 +08:00
|
|
|
int tipc_link_mss(struct tipc_link *l);
|
2020-05-26 17:38:34 +08:00
|
|
|
u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
|
|
|
|
struct tipc_msg *hdr, bool uc);
|
|
|
|
int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
|
|
|
|
struct tipc_gap_ack_blks *ga,
|
2020-05-26 17:38:36 +08:00
|
|
|
struct sk_buff_head *xmitq,
|
|
|
|
struct sk_buff_head *retrq);
|
2015-10-22 20:51:41 +08:00
|
|
|
void tipc_link_build_bc_sync_msg(struct tipc_link *l,
|
|
|
|
struct sk_buff_head *xmitq);
|
|
|
|
void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
|
2016-09-02 01:52:49 +08:00
|
|
|
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
|
|
|
|
struct sk_buff_head *xmitq);
|
2015-10-22 20:51:41 +08:00
|
|
|
int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *xmitq);
|
2018-12-19 10:17:57 +08:00
|
|
|
bool tipc_link_too_silent(struct tipc_link *l);
|
2006-01-03 02:04:38 +08:00
|
|
|
#endif
|