2006-01-03 02:04:38 +08:00
|
|
|
/*
|
|
|
|
* net/tipc/link.c: TIPC link code
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2015-03-14 04:08:08 +08:00
|
|
|
* Copyright (c) 1996-2007, 2012-2015, Ericsson AB
|
2013-06-17 22:54:42 +08:00
|
|
|
* Copyright (c) 2004-2007, 2010-2013, Wind River Systems
|
2006-01-03 02:04:38 +08:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-03 02:04:38 +08:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-03 02:04:38 +08:00
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-03 02:04:38 +08:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "core.h"
|
tipc: clean up handling of message priorities
Messages transferred by TIPC are assigned an "importance priority", -an
integer value indicating how to treat the message when there is link or
destination socket congestion.
There is no separate header field for this value. Instead, the message
user values have been chosen in ascending order according to perceived
importance, so that the message user field can be used for this.
This is not a good solution. First, we have many more users than the
needed priority levels, so we end up with treating more priority
levels than necessary. Second, the user field cannot always
accurately reflect the priority of the message. E.g., a message
fragment packet should really have the priority of the enveloped
user data message, and not the priority of the MSG_FRAGMENTER user.
Until now, we have been working around this problem in different ways,
but it is now time to implement a consistent way of handling such
priorities, although still within the constraint that we cannot
allocate any more bits in the regular data message header for this.
In this commit, we define a new priority level, TIPC_SYSTEM_IMPORTANCE,
that will be the only one used apart from the four (lower) user data
levels. All non-data messages map down to this priority. Furthermore,
we take some free bits from the MSG_FRAGMENTER header and allocate
them to store the priority of the enveloped message. We then adjust
the functions msg_importance()/msg_set_importance() so that they
read/set the correct header fields depending on user type.
This small protocol change is fully compatible, because the code at
the receiving end of a link currently reads the importance level
only from user data messages, where there is no change.
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-14 04:08:11 +08:00
|
|
|
#include "subscr.h"
|
2006-01-03 02:04:38 +08:00
|
|
|
#include "link.h"
|
2014-11-20 17:29:12 +08:00
|
|
|
#include "bcast.h"
|
2014-05-14 17:39:15 +08:00
|
|
|
#include "socket.h"
|
2006-01-03 02:04:38 +08:00
|
|
|
#include "name_distr.h"
|
|
|
|
#include "discover.h"
|
2014-11-20 17:29:07 +08:00
|
|
|
#include "netlink.h"
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2013-06-17 22:54:48 +08:00
|
|
|
#include <linux/pkt_sched.h>
|
|
|
|
|
2012-06-29 12:16:37 +08:00
|
|
|
/*
|
|
|
|
* Error message prefixes
|
|
|
|
*/
|
2015-07-31 06:24:19 +08:00
|
|
|
static const char *link_co_err = "Link tunneling error, ";
|
2012-06-29 12:16:37 +08:00
|
|
|
static const char *link_rst_msg = "Resetting link ";
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2014-11-20 17:29:12 +08:00
|
|
|
static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
|
|
|
|
[TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
|
|
|
|
[TIPC_NLA_LINK_NAME] = {
|
|
|
|
.type = NLA_STRING,
|
|
|
|
.len = TIPC_MAX_LINK_NAME
|
|
|
|
},
|
|
|
|
[TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
|
|
|
|
[TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
|
|
|
|
[TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
|
|
|
|
[TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
|
|
|
|
[TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
|
|
|
|
[TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
|
|
|
|
[TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
|
|
|
|
[TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
|
|
|
|
};
|
|
|
|
|
2014-11-20 17:29:07 +08:00
|
|
|
/* Properties valid for media, bearar and link */
|
|
|
|
static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
|
|
|
|
[TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
|
|
|
|
[TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
|
|
|
|
[TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
|
|
|
|
[TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
|
|
|
|
};
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
/*
|
|
|
|
* Interval between NACKs when packets arrive out of order
|
|
|
|
*/
|
|
|
|
#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
|
2008-06-05 08:29:39 +08:00
|
|
|
/*
|
|
|
|
* Out-of-range value for link session numbers
|
|
|
|
*/
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:25 +08:00
|
|
|
#define WILDCARD_SESSION 0x10000
|
2008-06-05 08:29:39 +08:00
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
/* Link FSM states:
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:25 +08:00
|
|
|
enum {
|
2015-07-31 06:24:21 +08:00
|
|
|
LINK_ESTABLISHED = 0xe,
|
|
|
|
LINK_ESTABLISHING = 0xe << 4,
|
|
|
|
LINK_RESET = 0x1 << 8,
|
|
|
|
LINK_RESETTING = 0x2 << 12,
|
|
|
|
LINK_PEER_RESET = 0xd << 16,
|
|
|
|
LINK_FAILINGOVER = 0xf << 20,
|
|
|
|
LINK_SYNCHING = 0xc << 24
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:25 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Link FSM state checking routines
|
|
|
|
*/
|
2015-07-31 06:24:21 +08:00
|
|
|
static int link_is_up(struct tipc_link *l)
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:25 +08:00
|
|
|
{
|
2015-07-31 06:24:21 +08:00
|
|
|
return l->state & (LINK_ESTABLISHED | LINK_SYNCHING);
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:25 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *xmitq);
|
2015-07-17 04:54:26 +08:00
|
|
|
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
|
|
|
|
u16 rcvgap, int tolerance, int priority,
|
|
|
|
struct sk_buff_head *xmitq);
|
2011-12-30 09:58:42 +08:00
|
|
|
static void link_reset_statistics(struct tipc_link *l_ptr);
|
|
|
|
static void link_print(struct tipc_link *l_ptr, const char *str);
|
2014-02-18 16:06:46 +08:00
|
|
|
static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
|
2015-03-26 00:07:26 +08:00
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
/*
|
2006-03-21 14:37:04 +08:00
|
|
|
* Simple non-static link routines (i.e. referenced outside this file)
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2015-07-31 06:24:21 +08:00
|
|
|
bool tipc_link_is_up(struct tipc_link *l)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-07-31 06:24:21 +08:00
|
|
|
return link_is_up(l);
|
|
|
|
}
|
|
|
|
|
2015-10-16 02:52:46 +08:00
|
|
|
bool tipc_link_peer_is_down(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
return l->state == LINK_PEER_RESET;
|
|
|
|
}
|
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
bool tipc_link_is_reset(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING);
|
|
|
|
}
|
|
|
|
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
bool tipc_link_is_establishing(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
return l->state == LINK_ESTABLISHING;
|
|
|
|
}
|
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
bool tipc_link_is_synching(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
return l->state == LINK_SYNCHING;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool tipc_link_is_failingover(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
return l->state == LINK_FAILINGOVER;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool tipc_link_is_blocked(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
int tipc_link_is_active(struct tipc_link *l)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-07-17 04:54:19 +08:00
|
|
|
struct tipc_node *n = l->owner;
|
|
|
|
|
|
|
|
return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-07-31 06:24:26 +08:00
|
|
|
static u32 link_own_addr(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
return msg_prevnode(l->pmsg);
|
|
|
|
}
|
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
/**
|
2006-01-18 07:38:21 +08:00
|
|
|
* tipc_link_create - create a new link
|
2015-07-31 06:24:26 +08:00
|
|
|
* @n: pointer to associated node
|
2015-10-22 20:51:36 +08:00
|
|
|
* @if_name: associated interface name
|
|
|
|
* @bearer_id: id (index) of associated bearer
|
|
|
|
* @tolerance: link tolerance to be used by link
|
|
|
|
* @net_plane: network plane (A,B,c..) this link belongs to
|
|
|
|
* @mtu: mtu to be advertised by link
|
|
|
|
* @priority: priority to be used by link
|
|
|
|
* @window: send window to be used by link
|
|
|
|
* @session: session to be used by link
|
2015-07-31 06:24:26 +08:00
|
|
|
* @ownnode: identity of own node
|
2015-10-22 20:51:36 +08:00
|
|
|
* @peer: node id of peer node
|
2015-07-31 06:24:26 +08:00
|
|
|
* @maddr: media address to be used
|
|
|
|
* @inputq: queue to put messages ready for delivery
|
|
|
|
* @namedq: queue to put binding table update messages ready for delivery
|
|
|
|
* @link: return value, pointer to put the created link
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2015-07-31 06:24:26 +08:00
|
|
|
* Returns true if link was created, otherwise false
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2015-10-22 20:51:36 +08:00
|
|
|
bool tipc_link_create(struct tipc_node *n, char *if_name, int bearer_id,
|
|
|
|
int tolerance, char net_plane, u32 mtu, int priority,
|
|
|
|
int window, u32 session, u32 ownnode, u32 peer,
|
|
|
|
struct tipc_media_addr *maddr,
|
2015-07-31 06:24:26 +08:00
|
|
|
struct sk_buff_head *inputq, struct sk_buff_head *namedq,
|
|
|
|
struct tipc_link **link)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-07-31 06:24:26 +08:00
|
|
|
struct tipc_link *l;
|
|
|
|
struct tipc_msg *hdr;
|
2011-03-01 00:32:27 +08:00
|
|
|
|
2015-07-31 06:24:26 +08:00
|
|
|
l = kzalloc(sizeof(*l), GFP_ATOMIC);
|
|
|
|
if (!l)
|
|
|
|
return false;
|
|
|
|
*link = l;
|
2015-10-22 20:51:36 +08:00
|
|
|
l->pmsg = (struct tipc_msg *)&l->proto_msg;
|
|
|
|
hdr = l->pmsg;
|
|
|
|
tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
|
|
|
|
msg_set_size(hdr, sizeof(l->proto_msg));
|
|
|
|
msg_set_session(hdr, session);
|
|
|
|
msg_set_bearer_id(hdr, l->bearer_id);
|
2011-03-01 00:32:27 +08:00
|
|
|
|
2015-07-31 06:24:26 +08:00
|
|
|
/* Note: peer i/f name is completed by reset/activate message */
|
|
|
|
sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
|
|
|
|
tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
|
|
|
|
if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
|
2015-10-22 20:51:36 +08:00
|
|
|
strcpy((char *)msg_data(hdr), if_name);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-07-31 06:24:26 +08:00
|
|
|
l->addr = peer;
|
|
|
|
l->media_addr = maddr;
|
|
|
|
l->owner = n;
|
|
|
|
l->peer_session = WILDCARD_SESSION;
|
2015-10-22 20:51:36 +08:00
|
|
|
l->bearer_id = bearer_id;
|
|
|
|
l->tolerance = tolerance;
|
|
|
|
l->net_plane = net_plane;
|
|
|
|
l->advertised_mtu = mtu;
|
|
|
|
l->mtu = mtu;
|
|
|
|
l->priority = priority;
|
|
|
|
tipc_link_set_queue_limits(l, window);
|
2015-07-31 06:24:26 +08:00
|
|
|
l->inputq = inputq;
|
|
|
|
l->namedq = namedq;
|
|
|
|
l->state = LINK_RESETTING;
|
|
|
|
__skb_queue_head_init(&l->transmq);
|
|
|
|
__skb_queue_head_init(&l->backlogq);
|
|
|
|
__skb_queue_head_init(&l->deferdq);
|
|
|
|
skb_queue_head_init(&l->wakeupq);
|
|
|
|
skb_queue_head_init(l->inputq);
|
|
|
|
return true;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints.
|
|
|
|
*
|
|
|
|
* Give a newly added peer node the sequence number where it should
|
|
|
|
* start receiving and acking broadcast packets.
|
|
|
|
*/
|
2015-07-31 06:24:20 +08:00
|
|
|
void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
|
|
|
|
struct sk_buff_head *xmitq)
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
struct sk_buff_head list;
|
2015-07-30 06:28:01 +08:00
|
|
|
u16 last_sent;
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
|
|
|
|
skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
|
|
|
|
0, l->addr, link_own_addr(l), 0, 0, 0);
|
|
|
|
if (!skb)
|
|
|
|
return;
|
2015-07-30 06:28:01 +08:00
|
|
|
last_sent = tipc_bclink_get_last_sent(l->owner->net);
|
|
|
|
msg_set_last_bcast(buf_msg(skb), last_sent);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
__skb_queue_head_init(&list);
|
|
|
|
__skb_queue_tail(&list, skb);
|
|
|
|
tipc_link_xmit(l, &list, xmitq);
|
|
|
|
}
|
|
|
|
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
/**
|
|
|
|
* tipc_link_fsm_evt - link finite state machine
|
|
|
|
* @l: pointer to link
|
|
|
|
* @evt: state machine event to be processed
|
|
|
|
*/
|
2015-07-31 06:24:21 +08:00
|
|
|
int tipc_link_fsm_evt(struct tipc_link *l, int evt)
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
{
|
2015-07-31 06:24:20 +08:00
|
|
|
int rc = 0;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
|
|
|
|
switch (l->state) {
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_RESETTING:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
switch (evt) {
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_PEER_RESET_EVT:
|
|
|
|
l->state = LINK_PEER_RESET;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_RESET_EVT:
|
|
|
|
l->state = LINK_RESET;
|
|
|
|
break;
|
|
|
|
case LINK_FAILURE_EVT:
|
|
|
|
case LINK_FAILOVER_BEGIN_EVT:
|
|
|
|
case LINK_ESTABLISH_EVT:
|
|
|
|
case LINK_FAILOVER_END_EVT:
|
|
|
|
case LINK_SYNCH_BEGIN_EVT:
|
|
|
|
case LINK_SYNCH_END_EVT:
|
|
|
|
default:
|
|
|
|
goto illegal_evt;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case LINK_RESET:
|
|
|
|
switch (evt) {
|
|
|
|
case LINK_PEER_RESET_EVT:
|
|
|
|
l->state = LINK_ESTABLISHING;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_FAILOVER_BEGIN_EVT:
|
|
|
|
l->state = LINK_FAILINGOVER;
|
|
|
|
case LINK_FAILURE_EVT:
|
|
|
|
case LINK_RESET_EVT:
|
|
|
|
case LINK_ESTABLISH_EVT:
|
|
|
|
case LINK_FAILOVER_END_EVT:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_SYNCH_BEGIN_EVT:
|
|
|
|
case LINK_SYNCH_END_EVT:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
default:
|
2015-07-31 06:24:21 +08:00
|
|
|
goto illegal_evt;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
}
|
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_PEER_RESET:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
switch (evt) {
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_RESET_EVT:
|
|
|
|
l->state = LINK_ESTABLISHING;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_PEER_RESET_EVT:
|
|
|
|
case LINK_ESTABLISH_EVT:
|
|
|
|
case LINK_FAILURE_EVT:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_SYNCH_BEGIN_EVT:
|
|
|
|
case LINK_SYNCH_END_EVT:
|
|
|
|
case LINK_FAILOVER_BEGIN_EVT:
|
|
|
|
case LINK_FAILOVER_END_EVT:
|
|
|
|
default:
|
|
|
|
goto illegal_evt;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case LINK_FAILINGOVER:
|
|
|
|
switch (evt) {
|
|
|
|
case LINK_FAILOVER_END_EVT:
|
|
|
|
l->state = LINK_RESET;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_PEER_RESET_EVT:
|
|
|
|
case LINK_RESET_EVT:
|
|
|
|
case LINK_ESTABLISH_EVT:
|
|
|
|
case LINK_FAILURE_EVT:
|
|
|
|
break;
|
|
|
|
case LINK_FAILOVER_BEGIN_EVT:
|
|
|
|
case LINK_SYNCH_BEGIN_EVT:
|
|
|
|
case LINK_SYNCH_END_EVT:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
default:
|
2015-07-31 06:24:21 +08:00
|
|
|
goto illegal_evt;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
}
|
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_ESTABLISHING:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
switch (evt) {
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_ESTABLISH_EVT:
|
|
|
|
l->state = LINK_ESTABLISHED;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_FAILOVER_BEGIN_EVT:
|
|
|
|
l->state = LINK_FAILINGOVER;
|
|
|
|
break;
|
|
|
|
case LINK_RESET_EVT:
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
l->state = LINK_RESET;
|
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_FAILURE_EVT:
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
case LINK_PEER_RESET_EVT:
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_SYNCH_BEGIN_EVT:
|
|
|
|
case LINK_FAILOVER_END_EVT:
|
|
|
|
break;
|
|
|
|
case LINK_SYNCH_END_EVT:
|
|
|
|
default:
|
|
|
|
goto illegal_evt;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case LINK_ESTABLISHED:
|
|
|
|
switch (evt) {
|
|
|
|
case LINK_PEER_RESET_EVT:
|
|
|
|
l->state = LINK_PEER_RESET;
|
|
|
|
rc |= TIPC_LINK_DOWN_EVT;
|
|
|
|
break;
|
|
|
|
case LINK_FAILURE_EVT:
|
|
|
|
l->state = LINK_RESETTING;
|
|
|
|
rc |= TIPC_LINK_DOWN_EVT;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_RESET_EVT:
|
|
|
|
l->state = LINK_RESET;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_ESTABLISH_EVT:
|
2015-08-20 14:12:55 +08:00
|
|
|
case LINK_SYNCH_END_EVT:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_SYNCH_BEGIN_EVT:
|
|
|
|
l->state = LINK_SYNCHING;
|
|
|
|
break;
|
|
|
|
case LINK_FAILOVER_BEGIN_EVT:
|
|
|
|
case LINK_FAILOVER_END_EVT:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
default:
|
2015-07-31 06:24:21 +08:00
|
|
|
goto illegal_evt;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
}
|
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_SYNCHING:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
switch (evt) {
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_PEER_RESET_EVT:
|
|
|
|
l->state = LINK_PEER_RESET;
|
|
|
|
rc |= TIPC_LINK_DOWN_EVT;
|
|
|
|
break;
|
|
|
|
case LINK_FAILURE_EVT:
|
|
|
|
l->state = LINK_RESETTING;
|
|
|
|
rc |= TIPC_LINK_DOWN_EVT;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_RESET_EVT:
|
|
|
|
l->state = LINK_RESET;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_ESTABLISH_EVT:
|
|
|
|
case LINK_SYNCH_BEGIN_EVT:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_SYNCH_END_EVT:
|
|
|
|
l->state = LINK_ESTABLISHED;
|
|
|
|
break;
|
|
|
|
case LINK_FAILOVER_BEGIN_EVT:
|
|
|
|
case LINK_FAILOVER_END_EVT:
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
default:
|
2015-07-31 06:24:21 +08:00
|
|
|
goto illegal_evt;
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2015-07-31 06:24:21 +08:00
|
|
|
pr_err("Unknown FSM state %x in %s\n", l->state, l->name);
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
}
|
2015-07-31 06:24:21 +08:00
|
|
|
return rc;
|
|
|
|
illegal_evt:
|
|
|
|
pr_err("Illegal FSM event %x in state %x on link %s\n",
|
|
|
|
evt, l->state, l->name);
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:27 +08:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2015-07-17 04:54:28 +08:00
|
|
|
/* link_profile_stats - update statistical profiling of traffic
|
|
|
|
*/
|
|
|
|
static void link_profile_stats(struct tipc_link *l)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
struct tipc_msg *msg;
|
|
|
|
int length;
|
|
|
|
|
|
|
|
/* Update counters used in statistical profiling of send traffic */
|
|
|
|
l->stats.accu_queue_sz += skb_queue_len(&l->transmq);
|
|
|
|
l->stats.queue_sz_counts++;
|
|
|
|
|
|
|
|
skb = skb_peek(&l->transmq);
|
|
|
|
if (!skb)
|
|
|
|
return;
|
|
|
|
msg = buf_msg(skb);
|
|
|
|
length = msg_size(msg);
|
|
|
|
|
|
|
|
if (msg_user(msg) == MSG_FRAGMENTER) {
|
|
|
|
if (msg_type(msg) != FIRST_FRAGMENT)
|
|
|
|
return;
|
|
|
|
length = msg_size(msg_get_wrapped(msg));
|
|
|
|
}
|
|
|
|
l->stats.msg_lengths_total += length;
|
|
|
|
l->stats.msg_length_counts++;
|
|
|
|
if (length <= 64)
|
|
|
|
l->stats.msg_length_profile[0]++;
|
|
|
|
else if (length <= 256)
|
|
|
|
l->stats.msg_length_profile[1]++;
|
|
|
|
else if (length <= 1024)
|
|
|
|
l->stats.msg_length_profile[2]++;
|
|
|
|
else if (length <= 4096)
|
|
|
|
l->stats.msg_length_profile[3]++;
|
|
|
|
else if (length <= 16384)
|
|
|
|
l->stats.msg_length_profile[4]++;
|
|
|
|
else if (length <= 32768)
|
|
|
|
l->stats.msg_length_profile[5]++;
|
|
|
|
else
|
|
|
|
l->stats.msg_length_profile[6]++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* tipc_link_timeout - perform periodic task as instructed from node timeout
|
|
|
|
*/
|
|
|
|
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
int rc = 0;
|
2015-07-31 06:24:20 +08:00
|
|
|
int mtyp = STATE_MSG;
|
|
|
|
bool xmit = false;
|
|
|
|
bool prb = false;
|
2015-07-17 04:54:28 +08:00
|
|
|
|
|
|
|
link_profile_stats(l);
|
2015-07-31 06:24:20 +08:00
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
switch (l->state) {
|
|
|
|
case LINK_ESTABLISHED:
|
|
|
|
case LINK_SYNCHING:
|
2015-07-31 06:24:20 +08:00
|
|
|
if (!l->silent_intv_cnt) {
|
|
|
|
if (tipc_bclink_acks_missing(l->owner))
|
|
|
|
xmit = true;
|
|
|
|
} else if (l->silent_intv_cnt <= l->abort_limit) {
|
|
|
|
xmit = true;
|
|
|
|
prb = true;
|
|
|
|
} else {
|
2015-07-31 06:24:21 +08:00
|
|
|
rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
|
2015-07-31 06:24:20 +08:00
|
|
|
}
|
|
|
|
l->silent_intv_cnt++;
|
2015-07-31 06:24:21 +08:00
|
|
|
break;
|
|
|
|
case LINK_RESET:
|
2015-07-31 06:24:20 +08:00
|
|
|
xmit = true;
|
|
|
|
mtyp = RESET_MSG;
|
2015-07-31 06:24:21 +08:00
|
|
|
break;
|
|
|
|
case LINK_ESTABLISHING:
|
2015-07-31 06:24:20 +08:00
|
|
|
xmit = true;
|
|
|
|
mtyp = ACTIVATE_MSG;
|
2015-07-31 06:24:21 +08:00
|
|
|
break;
|
|
|
|
case LINK_PEER_RESET:
|
2015-07-31 06:24:23 +08:00
|
|
|
case LINK_RESETTING:
|
2015-07-31 06:24:21 +08:00
|
|
|
case LINK_FAILINGOVER:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
2015-07-31 06:24:20 +08:00
|
|
|
}
|
2015-07-31 06:24:21 +08:00
|
|
|
|
2015-07-31 06:24:20 +08:00
|
|
|
if (xmit)
|
|
|
|
tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq);
|
|
|
|
|
2015-07-17 04:54:28 +08:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
/**
|
2015-03-26 00:07:25 +08:00
|
|
|
* link_schedule_user - schedule a message sender for wakeup after congestion
|
2014-08-23 06:09:07 +08:00
|
|
|
* @link: congested link
|
2015-03-26 00:07:25 +08:00
|
|
|
* @list: message that was attempted sent
|
2014-08-23 06:09:07 +08:00
|
|
|
* Create pseudo msg to send back to user when congestion abates
|
2015-07-17 04:54:23 +08:00
|
|
|
* Does not consume buffer list
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2015-03-26 00:07:25 +08:00
|
|
|
static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-03-26 00:07:25 +08:00
|
|
|
struct tipc_msg *msg = buf_msg(skb_peek(list));
|
|
|
|
int imp = msg_importance(msg);
|
|
|
|
u32 oport = msg_origport(msg);
|
|
|
|
u32 addr = link_own_addr(link);
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
/* This really cannot happen... */
|
|
|
|
if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) {
|
|
|
|
pr_warn("%s<%s>, send queue full", link_rst_msg, link->name);
|
2015-07-17 04:54:23 +08:00
|
|
|
return -ENOBUFS;
|
2015-03-26 00:07:25 +08:00
|
|
|
}
|
|
|
|
/* Non-blocking sender: */
|
|
|
|
if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending)
|
|
|
|
return -ELINKCONG;
|
|
|
|
|
|
|
|
/* Create and schedule wakeup pseudo message */
|
|
|
|
skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
|
|
|
|
addr, addr, oport, 0, 0);
|
|
|
|
if (!skb)
|
2015-07-17 04:54:23 +08:00
|
|
|
return -ENOBUFS;
|
2015-03-26 00:07:25 +08:00
|
|
|
TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list);
|
|
|
|
TIPC_SKB_CB(skb)->chain_imp = imp;
|
|
|
|
skb_queue_tail(&link->wakeupq, skb);
|
2014-08-23 06:09:07 +08:00
|
|
|
link->stats.link_congs++;
|
2015-03-26 00:07:25 +08:00
|
|
|
return -ELINKCONG;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2014-08-23 06:09:07 +08:00
|
|
|
/**
|
|
|
|
* link_prepare_wakeup - prepare users for wakeup after congestion
|
|
|
|
* @link: congested link
|
|
|
|
* Move a number of waiting users, as permitted by available space in
|
|
|
|
* the send queue, from link wait queue to node wait queue for wakeup
|
|
|
|
*/
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
void link_prepare_wakeup(struct tipc_link *l)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,};
|
|
|
|
int imp, lim;
|
2014-11-26 11:41:51 +08:00
|
|
|
struct sk_buff *skb, *tmp;
|
2014-08-23 06:09:07 +08:00
|
|
|
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
|
|
|
|
imp = TIPC_SKB_CB(skb)->chain_imp;
|
|
|
|
lim = l->window + l->backlog[imp].limit;
|
|
|
|
pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
|
|
|
|
if ((pnd[imp] + l->backlog[imp].len) >= lim)
|
2006-01-03 02:04:38 +08:00
|
|
|
break;
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
skb_unlink(skb, &l->wakeupq);
|
2015-07-17 04:54:21 +08:00
|
|
|
skb_queue_tail(l->inputq, skb);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2006-01-18 07:38:21 +08:00
|
|
|
* tipc_link_reset_fragments - purge link's inbound message fragments queue
|
2006-01-03 02:04:38 +08:00
|
|
|
* @l_ptr: pointer to link
|
|
|
|
*/
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_link_reset_fragments(struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2014-05-14 17:39:12 +08:00
|
|
|
kfree_skb(l_ptr->reasm_buf);
|
|
|
|
l_ptr->reasm_buf = NULL;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-06-28 21:44:44 +08:00
|
|
|
void tipc_link_purge_backlog(struct tipc_link *l)
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
{
|
|
|
|
__skb_queue_purge(&l->backlogq);
|
|
|
|
l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
|
|
|
|
l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
|
|
|
|
l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
|
|
|
|
l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
|
|
|
|
l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
|
|
|
|
}
|
|
|
|
|
2007-02-09 22:25:21 +08:00
|
|
|
/**
|
2014-01-08 06:02:44 +08:00
|
|
|
* tipc_link_purge_queues - purge all pkt queues associated with link
|
2006-01-03 02:04:38 +08:00
|
|
|
* @l_ptr: pointer to link
|
|
|
|
*/
|
2014-01-08 06:02:44 +08:00
|
|
|
void tipc_link_purge_queues(struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-03-14 04:08:10 +08:00
|
|
|
__skb_queue_purge(&l_ptr->deferdq);
|
|
|
|
__skb_queue_purge(&l_ptr->transmq);
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
tipc_link_purge_backlog(l_ptr);
|
2006-01-18 07:38:21 +08:00
|
|
|
tipc_link_reset_fragments(l_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
void tipc_link_reset(struct tipc_link *l)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2008-06-05 08:29:39 +08:00
|
|
|
/* Link is down, accept any session */
|
2015-07-31 06:24:19 +08:00
|
|
|
l->peer_session = WILDCARD_SESSION;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
/* If peer is up, it only accepts an incremented session number */
|
|
|
|
msg_set_session(l->pmsg, msg_session(l->pmsg) + 1);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
/* Prepare for renewed mtu size negotiation */
|
|
|
|
l->mtu = l->advertised_mtu;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-07-31 06:24:24 +08:00
|
|
|
/* Clean up all queues: */
|
2015-07-31 06:24:19 +08:00
|
|
|
__skb_queue_purge(&l->transmq);
|
|
|
|
__skb_queue_purge(&l->deferdq);
|
2015-07-31 06:24:24 +08:00
|
|
|
skb_queue_splice_init(&l->wakeupq, l->inputq);
|
2015-07-31 06:24:19 +08:00
|
|
|
|
|
|
|
tipc_link_purge_backlog(l);
|
|
|
|
kfree_skb(l->reasm_buf);
|
|
|
|
kfree_skb(l->failover_reasm_skb);
|
|
|
|
l->reasm_buf = NULL;
|
|
|
|
l->failover_reasm_skb = NULL;
|
|
|
|
l->rcv_unacked = 0;
|
|
|
|
l->snd_nxt = 1;
|
|
|
|
l->rcv_nxt = 1;
|
|
|
|
l->silent_intv_cnt = 0;
|
|
|
|
l->stats.recv_info = 0;
|
|
|
|
l->stale_count = 0;
|
|
|
|
link_reset_statistics(l);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
/**
|
2014-07-17 08:41:03 +08:00
|
|
|
* __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
* @link: link to use
|
2014-11-26 11:41:55 +08:00
|
|
|
* @list: chain of buffers containing message
|
|
|
|
*
|
2015-07-17 04:54:23 +08:00
|
|
|
* Consumes the buffer chain, except when returning an error code,
|
2015-03-26 00:07:25 +08:00
|
|
|
* Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
|
|
|
|
* Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
*/
|
2015-01-09 15:27:06 +08:00
|
|
|
int __tipc_link_xmit(struct net *net, struct tipc_link *link,
|
|
|
|
struct sk_buff_head *list)
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
{
|
2014-11-26 11:41:55 +08:00
|
|
|
struct tipc_msg *msg = buf_msg(skb_peek(list));
|
2015-03-14 04:08:10 +08:00
|
|
|
unsigned int maxwin = link->window;
|
tipc: improve link congestion algorithm
The link congestion algorithm used until now implies two problems.
- It is too generous towards lower-level messages in situations of high
load by giving "absolute" bandwidth guarantees to the different
priority levels. LOW traffic is guaranteed 10%, MEDIUM is guaranted
20%, HIGH is guaranteed 30%, and CRITICAL is guaranteed 40% of the
available bandwidth. But, in the absence of higher level traffic, the
ratio between two distinct levels becomes unreasonable. E.g. if there
is only LOW and MEDIUM traffic on a system, the former is guaranteed
1/3 of the bandwidth, and the latter 2/3. This again means that if
there is e.g. one LOW user and 10 MEDIUM users, the former will have
33.3% of the bandwidth, and the others will have to compete for the
remainder, i.e. each will end up with 6.7% of the capacity.
- Packets of type MSG_BUNDLER are created at SYSTEM importance level,
but only after the packets bundled into it have passed the congestion
test for their own respective levels. Since bundled packets don't
result in incrementing the level counter for their own importance,
only occasionally for the SYSTEM level counter, they do in practice
obtain SYSTEM level importance. Hence, the current implementation
provides a gap in the congestion algorithm that in the worst case
may lead to a link reset.
We now refine the congestion algorithm as follows:
- A message is accepted to the link backlog only if its own level
counter, and all superior level counters, permit it.
- The importance of a created bundle packet is set according to its
contents. A bundle packet created from messges at levels LOW to
CRITICAL is given importance level CRITICAL, while a bundle created
from a SYSTEM level message is given importance SYSTEM. In the latter
case only subsequent SYSTEM level messages are allowed to be bundled
into it.
This solves the first problem described above, by making the bandwidth
guarantee relative to the total number of users at all levels; only
the upper limit for each level remains absolute. In the example
described above, the single LOW user would use 1/11th of the bandwidth,
the same as each of the ten MEDIUM users, but he still has the same
guarantee against starvation as the latter ones.
The fix also solves the second problem. If the CRITICAL level is filled
up by bundle packets of that level, no lower level packets will be
accepted any more.
Suggested-by: Gergely Kiss <gergely.kiss@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-14 22:46:17 +08:00
|
|
|
unsigned int i, imp = msg_importance(msg);
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 21:33:02 +08:00
|
|
|
uint mtu = link->mtu;
|
2015-05-14 22:46:15 +08:00
|
|
|
u16 ack = mod(link->rcv_nxt - 1);
|
|
|
|
u16 seqno = link->snd_nxt;
|
2015-05-14 22:46:14 +08:00
|
|
|
u16 bc_last_in = link->owner->bclink.last_in;
|
2015-07-31 06:24:26 +08:00
|
|
|
struct tipc_media_addr *addr = link->media_addr;
|
2015-03-14 04:08:10 +08:00
|
|
|
struct sk_buff_head *transmq = &link->transmq;
|
|
|
|
struct sk_buff_head *backlogq = &link->backlogq;
|
2015-05-14 22:46:18 +08:00
|
|
|
struct sk_buff *skb, *bskb;
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
|
tipc: improve link congestion algorithm
The link congestion algorithm used until now implies two problems.
- It is too generous towards lower-level messages in situations of high
load by giving "absolute" bandwidth guarantees to the different
priority levels. LOW traffic is guaranteed 10%, MEDIUM is guaranted
20%, HIGH is guaranteed 30%, and CRITICAL is guaranteed 40% of the
available bandwidth. But, in the absence of higher level traffic, the
ratio between two distinct levels becomes unreasonable. E.g. if there
is only LOW and MEDIUM traffic on a system, the former is guaranteed
1/3 of the bandwidth, and the latter 2/3. This again means that if
there is e.g. one LOW user and 10 MEDIUM users, the former will have
33.3% of the bandwidth, and the others will have to compete for the
remainder, i.e. each will end up with 6.7% of the capacity.
- Packets of type MSG_BUNDLER are created at SYSTEM importance level,
but only after the packets bundled into it have passed the congestion
test for their own respective levels. Since bundled packets don't
result in incrementing the level counter for their own importance,
only occasionally for the SYSTEM level counter, they do in practice
obtain SYSTEM level importance. Hence, the current implementation
provides a gap in the congestion algorithm that in the worst case
may lead to a link reset.
We now refine the congestion algorithm as follows:
- A message is accepted to the link backlog only if its own level
counter, and all superior level counters, permit it.
- The importance of a created bundle packet is set according to its
contents. A bundle packet created from messges at levels LOW to
CRITICAL is given importance level CRITICAL, while a bundle created
from a SYSTEM level message is given importance SYSTEM. In the latter
case only subsequent SYSTEM level messages are allowed to be bundled
into it.
This solves the first problem described above, by making the bandwidth
guarantee relative to the total number of users at all levels; only
the upper limit for each level remains absolute. In the example
described above, the single LOW user would use 1/11th of the bandwidth,
the same as each of the ten MEDIUM users, but he still has the same
guarantee against starvation as the latter ones.
The fix also solves the second problem. If the CRITICAL level is filled
up by bundle packets of that level, no lower level packets will be
accepted any more.
Suggested-by: Gergely Kiss <gergely.kiss@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-14 22:46:17 +08:00
|
|
|
/* Match msg importance against this and all higher backlog limits: */
|
|
|
|
for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
|
|
|
|
if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
|
|
|
|
return link_schedule_user(link, list);
|
|
|
|
}
|
2015-07-17 04:54:23 +08:00
|
|
|
if (unlikely(msg_size(msg) > mtu))
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
return -EMSGSIZE;
|
2015-07-17 04:54:23 +08:00
|
|
|
|
2015-03-14 04:08:10 +08:00
|
|
|
/* Prepare each packet for sending, and add to relevant queue: */
|
2015-05-14 22:46:18 +08:00
|
|
|
while (skb_queue_len(list)) {
|
|
|
|
skb = skb_peek(list);
|
2014-11-26 11:41:52 +08:00
|
|
|
msg = buf_msg(skb);
|
2015-03-14 04:08:10 +08:00
|
|
|
msg_set_seqno(msg, seqno);
|
|
|
|
msg_set_ack(msg, ack);
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
msg_set_bcast_ack(msg, bc_last_in);
|
|
|
|
|
2015-03-14 04:08:10 +08:00
|
|
|
if (likely(skb_queue_len(transmq) < maxwin)) {
|
2015-05-14 22:46:18 +08:00
|
|
|
__skb_dequeue(list);
|
2015-03-14 04:08:10 +08:00
|
|
|
__skb_queue_tail(transmq, skb);
|
|
|
|
tipc_bearer_send(net, link->bearer_id, skb, addr);
|
|
|
|
link->rcv_unacked = 0;
|
|
|
|
seqno++;
|
|
|
|
continue;
|
|
|
|
}
|
2015-05-14 22:46:18 +08:00
|
|
|
if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) {
|
|
|
|
kfree_skb(__skb_dequeue(list));
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
link->stats.sent_bundled++;
|
|
|
|
continue;
|
2015-03-14 04:08:10 +08:00
|
|
|
}
|
2015-05-14 22:46:18 +08:00
|
|
|
if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) {
|
|
|
|
kfree_skb(__skb_dequeue(list));
|
|
|
|
__skb_queue_tail(backlogq, bskb);
|
|
|
|
link->backlog[msg_importance(buf_msg(bskb))].len++;
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
link->stats.sent_bundled++;
|
|
|
|
link->stats.sent_bundles++;
|
2015-05-14 22:46:18 +08:00
|
|
|
continue;
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
}
|
2015-05-14 22:46:18 +08:00
|
|
|
link->backlog[imp].len += skb_queue_len(list);
|
|
|
|
skb_queue_splice_tail_init(list, backlogq);
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
}
|
2015-05-14 22:46:15 +08:00
|
|
|
link->snd_nxt = seqno;
|
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 09:41:32 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-17 04:54:24 +08:00
|
|
|
/**
|
|
|
|
* tipc_link_xmit(): enqueue buffer list according to queue situation
|
|
|
|
* @link: link to use
|
|
|
|
* @list: chain of buffers containing message
|
|
|
|
* @xmitq: returned list of packets to be sent by caller
|
|
|
|
*
|
|
|
|
* Consumes the buffer chain, except when returning -ELINKCONG,
|
|
|
|
* since the caller then may want to make more send attempts.
|
|
|
|
* Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
|
|
|
|
* Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
|
|
|
|
*/
|
|
|
|
int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
|
|
|
|
struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
struct tipc_msg *hdr = buf_msg(skb_peek(list));
|
|
|
|
unsigned int maxwin = l->window;
|
|
|
|
unsigned int i, imp = msg_importance(hdr);
|
|
|
|
unsigned int mtu = l->mtu;
|
|
|
|
u16 ack = l->rcv_nxt - 1;
|
|
|
|
u16 seqno = l->snd_nxt;
|
|
|
|
u16 bc_last_in = l->owner->bclink.last_in;
|
|
|
|
struct sk_buff_head *transmq = &l->transmq;
|
|
|
|
struct sk_buff_head *backlogq = &l->backlogq;
|
|
|
|
struct sk_buff *skb, *_skb, *bskb;
|
|
|
|
|
|
|
|
/* Match msg importance against this and all higher backlog limits: */
|
|
|
|
for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
|
|
|
|
if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
|
|
|
|
return link_schedule_user(l, list);
|
|
|
|
}
|
|
|
|
if (unlikely(msg_size(hdr) > mtu))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
/* Prepare each packet for sending, and add to relevant queue: */
|
|
|
|
while (skb_queue_len(list)) {
|
|
|
|
skb = skb_peek(list);
|
|
|
|
hdr = buf_msg(skb);
|
|
|
|
msg_set_seqno(hdr, seqno);
|
|
|
|
msg_set_ack(hdr, ack);
|
|
|
|
msg_set_bcast_ack(hdr, bc_last_in);
|
|
|
|
|
|
|
|
if (likely(skb_queue_len(transmq) < maxwin)) {
|
|
|
|
_skb = skb_clone(skb, GFP_ATOMIC);
|
|
|
|
if (!_skb)
|
|
|
|
return -ENOBUFS;
|
|
|
|
__skb_dequeue(list);
|
|
|
|
__skb_queue_tail(transmq, skb);
|
|
|
|
__skb_queue_tail(xmitq, _skb);
|
|
|
|
l->rcv_unacked = 0;
|
|
|
|
seqno++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) {
|
|
|
|
kfree_skb(__skb_dequeue(list));
|
|
|
|
l->stats.sent_bundled++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) {
|
|
|
|
kfree_skb(__skb_dequeue(list));
|
|
|
|
__skb_queue_tail(backlogq, bskb);
|
|
|
|
l->backlog[msg_importance(buf_msg(bskb))].len++;
|
|
|
|
l->stats.sent_bundled++;
|
|
|
|
l->stats.sent_bundles++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
l->backlog[imp].len += skb_queue_len(list);
|
|
|
|
skb_queue_splice_tail_init(list, backlogq);
|
|
|
|
}
|
|
|
|
l->snd_nxt = seqno;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 13:51:31 +08:00
|
|
|
/*
|
2014-02-18 16:06:46 +08:00
|
|
|
* tipc_link_sync_rcv - synchronize broadcast link endpoints.
|
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 13:51:31 +08:00
|
|
|
* Receive the sequence number where we should start receiving and
|
|
|
|
* acking broadcast packets from a newly added peer node, and open
|
|
|
|
* up for reception of such packets.
|
|
|
|
*
|
|
|
|
* Called with node locked
|
|
|
|
*/
|
2014-02-18 16:06:46 +08:00
|
|
|
static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf)
|
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 13:51:31 +08:00
|
|
|
{
|
|
|
|
struct tipc_msg *msg = buf_msg(buf);
|
|
|
|
|
|
|
|
n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg);
|
|
|
|
n->bclink.recv_permitted = true;
|
|
|
|
kfree_skb(buf);
|
|
|
|
}
|
|
|
|
|
2007-02-09 22:25:21 +08:00
|
|
|
/*
|
2014-11-26 11:41:48 +08:00
|
|
|
* tipc_link_push_packets - push unsent packets to bearer
|
|
|
|
*
|
|
|
|
* Push out the unsent messages of a link where congestion
|
|
|
|
* has abated. Node is locked.
|
|
|
|
*
|
|
|
|
* Called with node locked
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2015-03-14 04:08:10 +08:00
|
|
|
void tipc_link_push_packets(struct tipc_link *link)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-03-14 04:08:10 +08:00
|
|
|
struct sk_buff *skb;
|
2014-11-26 11:41:48 +08:00
|
|
|
struct tipc_msg *msg;
|
2015-05-14 22:46:18 +08:00
|
|
|
u16 seqno = link->snd_nxt;
|
2015-05-14 22:46:15 +08:00
|
|
|
u16 ack = mod(link->rcv_nxt - 1);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-03-14 04:08:10 +08:00
|
|
|
while (skb_queue_len(&link->transmq) < link->window) {
|
|
|
|
skb = __skb_dequeue(&link->backlogq);
|
|
|
|
if (!skb)
|
2014-11-26 11:41:48 +08:00
|
|
|
break;
|
2015-03-14 04:08:10 +08:00
|
|
|
msg = buf_msg(skb);
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
link->backlog[msg_importance(msg)].len--;
|
2015-03-14 04:08:10 +08:00
|
|
|
msg_set_ack(msg, ack);
|
2015-05-14 22:46:18 +08:00
|
|
|
msg_set_seqno(msg, seqno);
|
|
|
|
seqno = mod(seqno + 1);
|
2015-03-14 04:08:10 +08:00
|
|
|
msg_set_bcast_ack(msg, link->owner->bclink.last_in);
|
|
|
|
link->rcv_unacked = 0;
|
|
|
|
__skb_queue_tail(&link->transmq, skb);
|
|
|
|
tipc_bearer_send(link->owner->net, link->bearer_id,
|
2015-07-31 06:24:26 +08:00
|
|
|
skb, link->media_addr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2015-05-14 22:46:18 +08:00
|
|
|
link->snd_nxt = seqno;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb, *_skb;
|
|
|
|
struct tipc_msg *hdr;
|
|
|
|
u16 seqno = l->snd_nxt;
|
|
|
|
u16 ack = l->rcv_nxt - 1;
|
|
|
|
|
|
|
|
while (skb_queue_len(&l->transmq) < l->window) {
|
|
|
|
skb = skb_peek(&l->backlogq);
|
|
|
|
if (!skb)
|
|
|
|
break;
|
|
|
|
_skb = skb_clone(skb, GFP_ATOMIC);
|
|
|
|
if (!_skb)
|
|
|
|
break;
|
|
|
|
__skb_dequeue(&l->backlogq);
|
|
|
|
hdr = buf_msg(skb);
|
|
|
|
l->backlog[msg_importance(hdr)].len--;
|
|
|
|
__skb_queue_tail(&l->transmq, skb);
|
|
|
|
__skb_queue_tail(xmitq, _skb);
|
|
|
|
msg_set_ack(hdr, ack);
|
|
|
|
msg_set_seqno(hdr, seqno);
|
|
|
|
msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
|
|
|
|
l->rcv_unacked = 0;
|
|
|
|
seqno++;
|
|
|
|
}
|
|
|
|
l->snd_nxt = seqno;
|
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
static void link_retransmit_failure(struct tipc_link *l_ptr,
|
2013-06-17 22:54:47 +08:00
|
|
|
struct sk_buff *buf)
|
2006-06-26 14:40:01 +08:00
|
|
|
{
|
|
|
|
struct tipc_msg *msg = buf_msg(buf);
|
2015-01-09 15:27:07 +08:00
|
|
|
struct net *net = l_ptr->owner->net;
|
2006-06-26 14:40:01 +08:00
|
|
|
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_warn("Retransmission failure on link <%s>\n", l_ptr->name);
|
2006-06-26 14:40:01 +08:00
|
|
|
|
|
|
|
if (l_ptr->addr) {
|
|
|
|
/* Handle failure on standard link */
|
2015-07-17 04:54:30 +08:00
|
|
|
link_print(l_ptr, "Resetting link ");
|
|
|
|
pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
|
|
|
|
msg_user(msg), msg_type(msg), msg_size(msg),
|
|
|
|
msg_errcode(msg));
|
|
|
|
pr_info("sqno %u, prev: %x, src: %x\n",
|
|
|
|
msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg));
|
2006-06-26 14:40:01 +08:00
|
|
|
} else {
|
|
|
|
/* Handle failure on broadcast link */
|
2008-09-03 14:38:32 +08:00
|
|
|
struct tipc_node *n_ptr;
|
2006-06-26 14:40:01 +08:00
|
|
|
char addr_string[16];
|
|
|
|
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("Msg seq number: %u, ", msg_seqno(msg));
|
|
|
|
pr_cont("Outstanding acks: %lu\n",
|
|
|
|
(unsigned long) TIPC_SKB_CB(buf)->handle);
|
2006-10-04 07:25:34 +08:00
|
|
|
|
2015-01-09 15:27:07 +08:00
|
|
|
n_ptr = tipc_bclink_retransmit_to(net);
|
2006-06-26 14:40:01 +08:00
|
|
|
|
2010-05-11 22:30:12 +08:00
|
|
|
tipc_addr_string_fill(addr_string, n_ptr->addr);
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("Broadcast link info for %s\n", addr_string);
|
2012-11-16 13:51:30 +08:00
|
|
|
pr_info("Reception permitted: %d, Acked: %u\n",
|
|
|
|
n_ptr->bclink.recv_permitted,
|
2012-06-29 12:16:37 +08:00
|
|
|
n_ptr->bclink.acked);
|
|
|
|
pr_info("Last in: %u, Oos state: %u, Last sent: %u\n",
|
|
|
|
n_ptr->bclink.last_in,
|
|
|
|
n_ptr->bclink.oos_state,
|
|
|
|
n_ptr->bclink.last_sent);
|
2006-06-26 14:40:01 +08:00
|
|
|
|
tipc: fix potential deadlock when all links are reset
[ 60.988363] ======================================================
[ 60.988754] [ INFO: possible circular locking dependency detected ]
[ 60.989152] 3.19.0+ #194 Not tainted
[ 60.989377] -------------------------------------------------------
[ 60.989781] swapper/3/0 is trying to acquire lock:
[ 60.990079] (&(&n_ptr->lock)->rlock){+.-...}, at: [<ffffffffa0006dca>] tipc_link_retransmit+0x1aa/0x240 [tipc]
[ 60.990743]
[ 60.990743] but task is already holding lock:
[ 60.991106] (&(&bclink->lock)->rlock){+.-...}, at: [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.991738]
[ 60.991738] which lock already depends on the new lock.
[ 60.991738]
[ 60.992174]
[ 60.992174] the existing dependency chain (in reverse order) is:
[ 60.992174]
-> #1 (&(&bclink->lock)->rlock){+.-...}:
[ 60.992174] [<ffffffff810a9c0c>] lock_acquire+0x9c/0x140
[ 60.992174] [<ffffffff8179c41f>] _raw_spin_lock_bh+0x3f/0x50
[ 60.992174] [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.992174] [<ffffffffa0000f57>] tipc_bclink_add_node+0x97/0xf0 [tipc]
[ 60.992174] [<ffffffffa0011815>] tipc_node_link_up+0xf5/0x110 [tipc]
[ 60.992174] [<ffffffffa0007783>] link_state_event+0x2b3/0x4f0 [tipc]
[ 60.992174] [<ffffffffa00193c0>] tipc_link_proto_rcv+0x24c/0x418 [tipc]
[ 60.992174] [<ffffffffa0008857>] tipc_rcv+0x827/0xac0 [tipc]
[ 60.992174] [<ffffffffa0002ca3>] tipc_l2_rcv_msg+0x73/0xd0 [tipc]
[ 60.992174] [<ffffffff81646e66>] __netif_receive_skb_core+0x746/0x980
[ 60.992174] [<ffffffff816470c1>] __netif_receive_skb+0x21/0x70
[ 60.992174] [<ffffffff81647295>] netif_receive_skb_internal+0x35/0x130
[ 60.992174] [<ffffffff81648218>] napi_gro_receive+0x158/0x1d0
[ 60.992174] [<ffffffff81559e05>] e1000_clean_rx_irq+0x155/0x490
[ 60.992174] [<ffffffff8155c1b7>] e1000_clean+0x267/0x990
[ 60.992174] [<ffffffff81647b60>] net_rx_action+0x150/0x360
[ 60.992174] [<ffffffff8105ec43>] __do_softirq+0x123/0x360
[ 60.992174] [<ffffffff8105f12e>] irq_exit+0x8e/0xb0
[ 60.992174] [<ffffffff8179f9f5>] do_IRQ+0x65/0x110
[ 60.992174] [<ffffffff8179da6f>] ret_from_intr+0x0/0x13
[ 60.992174] [<ffffffff8100de9f>] arch_cpu_idle+0xf/0x20
[ 60.992174] [<ffffffff8109dfa6>] cpu_startup_entry+0x2f6/0x3f0
[ 60.992174] [<ffffffff81033cda>] start_secondary+0x13a/0x150
[ 60.992174]
-> #0 (&(&n_ptr->lock)->rlock){+.-...}:
[ 60.992174] [<ffffffff810a8f7d>] __lock_acquire+0x163d/0x1ca0
[ 60.992174] [<ffffffff810a9c0c>] lock_acquire+0x9c/0x140
[ 60.992174] [<ffffffff8179c41f>] _raw_spin_lock_bh+0x3f/0x50
[ 60.992174] [<ffffffffa0006dca>] tipc_link_retransmit+0x1aa/0x240 [tipc]
[ 60.992174] [<ffffffffa0001e11>] tipc_bclink_rcv+0x611/0x640 [tipc]
[ 60.992174] [<ffffffffa0008646>] tipc_rcv+0x616/0xac0 [tipc]
[ 60.992174] [<ffffffffa0002ca3>] tipc_l2_rcv_msg+0x73/0xd0 [tipc]
[ 60.992174] [<ffffffff81646e66>] __netif_receive_skb_core+0x746/0x980
[ 60.992174] [<ffffffff816470c1>] __netif_receive_skb+0x21/0x70
[ 60.992174] [<ffffffff81647295>] netif_receive_skb_internal+0x35/0x130
[ 60.992174] [<ffffffff81648218>] napi_gro_receive+0x158/0x1d0
[ 60.992174] [<ffffffff81559e05>] e1000_clean_rx_irq+0x155/0x490
[ 60.992174] [<ffffffff8155c1b7>] e1000_clean+0x267/0x990
[ 60.992174] [<ffffffff81647b60>] net_rx_action+0x150/0x360
[ 60.992174] [<ffffffff8105ec43>] __do_softirq+0x123/0x360
[ 60.992174] [<ffffffff8105f12e>] irq_exit+0x8e/0xb0
[ 60.992174] [<ffffffff8179f9f5>] do_IRQ+0x65/0x110
[ 60.992174] [<ffffffff8179da6f>] ret_from_intr+0x0/0x13
[ 60.992174] [<ffffffff8100de9f>] arch_cpu_idle+0xf/0x20
[ 60.992174] [<ffffffff8109dfa6>] cpu_startup_entry+0x2f6/0x3f0
[ 60.992174] [<ffffffff81033cda>] start_secondary+0x13a/0x150
[ 60.992174]
[ 60.992174] other info that might help us debug this:
[ 60.992174]
[ 60.992174] Possible unsafe locking scenario:
[ 60.992174]
[ 60.992174] CPU0 CPU1
[ 60.992174] ---- ----
[ 60.992174] lock(&(&bclink->lock)->rlock);
[ 60.992174] lock(&(&n_ptr->lock)->rlock);
[ 60.992174] lock(&(&bclink->lock)->rlock);
[ 60.992174] lock(&(&n_ptr->lock)->rlock);
[ 60.992174]
[ 60.992174] *** DEADLOCK ***
[ 60.992174]
[ 60.992174] 3 locks held by swapper/3/0:
[ 60.992174] #0: (rcu_read_lock){......}, at: [<ffffffff81646791>] __netif_receive_skb_core+0x71/0x980
[ 60.992174] #1: (rcu_read_lock){......}, at: [<ffffffffa0002c35>] tipc_l2_rcv_msg+0x5/0xd0 [tipc]
[ 60.992174] #2: (&(&bclink->lock)->rlock){+.-...}, at: [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.992174]
The correct the sequence of grabbing n_ptr->lock and bclink->lock
should be that the former is first held and the latter is then taken,
which exactly happened on CPU1. But especially when the retransmission
of broadcast link is failed, bclink->lock is first held in
tipc_bclink_rcv(), and n_ptr->lock is taken in link_retransmit_failure()
called by tipc_link_retransmit() subsequently, which is demonstrated on
CPU0. As a result, deadlock occurs.
If the order of holding the two locks happening on CPU0 is reversed, the
deadlock risk will be relieved. Therefore, the node lock taken in
link_retransmit_failure() originally is moved to tipc_bclink_rcv()
so that it's obtained before bclink lock. But the precondition of
the adjustment of node lock is that responding to bclink reset event
must be moved from tipc_bclink_unlock() to tipc_node_unlock().
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 18:10:23 +08:00
|
|
|
n_ptr->action_flags |= TIPC_BCAST_RESET;
|
2006-06-26 14:40:01 +08:00
|
|
|
l_ptr->stale_count = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-26 11:41:52 +08:00
|
|
|
void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
|
2006-01-18 07:38:21 +08:00
|
|
|
u32 retransmits)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
|
|
|
struct tipc_msg *msg;
|
|
|
|
|
2014-11-26 11:41:52 +08:00
|
|
|
if (!skb)
|
2006-06-26 14:40:01 +08:00
|
|
|
return;
|
|
|
|
|
2014-11-26 11:41:52 +08:00
|
|
|
msg = buf_msg(skb);
|
2007-02-09 22:25:21 +08:00
|
|
|
|
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 23:08:00 +08:00
|
|
|
/* Detect repeated retransmit failures */
|
2015-05-14 22:46:15 +08:00
|
|
|
if (l_ptr->last_retransm == msg_seqno(msg)) {
|
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 23:08:00 +08:00
|
|
|
if (++l_ptr->stale_count > 100) {
|
2014-11-26 11:41:52 +08:00
|
|
|
link_retransmit_failure(l_ptr, skb);
|
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 23:08:00 +08:00
|
|
|
return;
|
2006-06-26 14:40:01 +08:00
|
|
|
}
|
|
|
|
} else {
|
2015-05-14 22:46:15 +08:00
|
|
|
l_ptr->last_retransm = msg_seqno(msg);
|
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 23:08:00 +08:00
|
|
|
l_ptr->stale_count = 1;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2006-06-26 14:40:01 +08:00
|
|
|
|
2015-03-14 04:08:10 +08:00
|
|
|
skb_queue_walk_from(&l_ptr->transmq, skb) {
|
|
|
|
if (!retransmits)
|
2014-11-26 11:41:52 +08:00
|
|
|
break;
|
|
|
|
msg = buf_msg(skb);
|
2015-05-14 22:46:15 +08:00
|
|
|
msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
|
2007-02-09 22:25:21 +08:00
|
|
|
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
|
2015-01-09 15:27:06 +08:00
|
|
|
tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb,
|
2015-07-31 06:24:26 +08:00
|
|
|
l_ptr->media_addr);
|
2012-11-15 11:34:45 +08:00
|
|
|
retransmits--;
|
|
|
|
l_ptr->stats.retransmitted++;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
static int tipc_link_retransm(struct tipc_link *l, int retransm,
|
|
|
|
struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
|
|
|
|
struct tipc_msg *hdr;
|
|
|
|
|
|
|
|
if (!skb)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Detect repeated retransmit failures on same packet */
|
|
|
|
if (likely(l->last_retransm != buf_seqno(skb))) {
|
|
|
|
l->last_retransm = buf_seqno(skb);
|
|
|
|
l->stale_count = 1;
|
|
|
|
} else if (++l->stale_count > 100) {
|
|
|
|
link_retransmit_failure(l, skb);
|
2015-07-31 06:24:21 +08:00
|
|
|
return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
}
|
|
|
|
skb_queue_walk(&l->transmq, skb) {
|
|
|
|
if (!retransm)
|
|
|
|
return 0;
|
|
|
|
hdr = buf_msg(skb);
|
|
|
|
_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
|
|
|
|
if (!_skb)
|
|
|
|
return 0;
|
|
|
|
hdr = buf_msg(_skb);
|
|
|
|
msg_set_ack(hdr, l->rcv_nxt - 1);
|
|
|
|
msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
|
|
|
|
_skb->priority = TC_PRIO_CONTROL;
|
|
|
|
__skb_queue_tail(xmitq, _skb);
|
|
|
|
retransm--;
|
|
|
|
l->stats.retransmitted++;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
/* tipc_data_input - deliver data and name distr msgs to upper layer
|
2014-07-01 16:22:40 +08:00
|
|
|
*
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
* Consumes buffer if message is of right type
|
2014-07-01 16:22:40 +08:00
|
|
|
* Node lock must be held
|
|
|
|
*/
|
2015-07-31 06:24:25 +08:00
|
|
|
static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *inputq)
|
2014-07-01 16:22:40 +08:00
|
|
|
{
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
struct tipc_node *node = link->owner;
|
2014-07-01 16:22:40 +08:00
|
|
|
|
2015-07-31 06:24:25 +08:00
|
|
|
switch (msg_user(buf_msg(skb))) {
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
case TIPC_LOW_IMPORTANCE:
|
|
|
|
case TIPC_MEDIUM_IMPORTANCE:
|
|
|
|
case TIPC_HIGH_IMPORTANCE:
|
|
|
|
case TIPC_CRITICAL_IMPORTANCE:
|
|
|
|
case CONN_MANAGER:
|
2015-10-16 02:52:40 +08:00
|
|
|
skb_queue_tail(inputq, skb);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
return true;
|
2014-07-01 16:22:40 +08:00
|
|
|
case NAME_DISTRIBUTOR:
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
node->bclink.recv_permitted = true;
|
2015-07-17 04:54:21 +08:00
|
|
|
skb_queue_tail(link->namedq, skb);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
return true;
|
|
|
|
case MSG_BUNDLER:
|
tipc: eliminate delayed link deletion at link failover
When a bearer is disabled manually, all its links have to be reset
and deleted. However, if there is a remaining, parallel link ready
to take over a deleted link's traffic, we currently delay the delete
of the removed link until the failover procedure is finished. This
is because the remaining link needs to access state from the reset
link, such as the last received packet number, and any partially
reassembled buffer, in order to perform a successful failover.
In this commit, we do instead move the state data over to the new
link, so that it can fulfill the procedure autonomously, without
accessing any data on the old link. This means that we can now
proceed and delete all pertaining links immediately when a bearer
is disabled. This saves us from some unnecessary complexity in such
situations.
We also choose to change the confusing definitions CHANGEOVER_PROTOCOL,
ORIGINAL_MSG and DUPLICATE_MSG to the more descriptive TUNNEL_PROTOCOL,
FAILOVER_MSG and SYNCH_MSG respectively.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 21:33:01 +08:00
|
|
|
case TUNNEL_PROTOCOL:
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
case MSG_FRAGMENTER:
|
2014-07-01 16:22:40 +08:00
|
|
|
case BCAST_PROTOCOL:
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
return false;
|
2014-07-01 16:22:40 +08:00
|
|
|
default:
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
pr_warn("Dropping received illegal msg type\n");
|
|
|
|
kfree_skb(skb);
|
|
|
|
return false;
|
|
|
|
};
|
2014-07-01 16:22:40 +08:00
|
|
|
}
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
|
|
|
|
/* tipc_link_input - process packet that has passed link protocol check
|
|
|
|
*
|
|
|
|
* Consumes buffer
|
2014-07-01 16:22:40 +08:00
|
|
|
*/
|
2015-07-31 06:24:25 +08:00
|
|
|
static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *inputq)
|
2014-07-01 16:22:40 +08:00
|
|
|
{
|
2015-07-31 06:24:19 +08:00
|
|
|
struct tipc_node *node = l->owner;
|
|
|
|
struct tipc_msg *hdr = buf_msg(skb);
|
|
|
|
struct sk_buff **reasm_skb = &l->reasm_buf;
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
struct sk_buff *iskb;
|
2015-10-16 02:52:40 +08:00
|
|
|
struct sk_buff_head tmpq;
|
2015-07-31 06:24:19 +08:00
|
|
|
int usr = msg_user(hdr);
|
2015-07-31 06:24:16 +08:00
|
|
|
int rc = 0;
|
2015-07-31 06:24:19 +08:00
|
|
|
int pos = 0;
|
|
|
|
int ipos = 0;
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
if (unlikely(usr == TUNNEL_PROTOCOL)) {
|
|
|
|
if (msg_type(hdr) == SYNCH_MSG) {
|
|
|
|
__skb_queue_purge(&l->deferdq);
|
|
|
|
goto drop;
|
2015-03-26 00:07:26 +08:00
|
|
|
}
|
2015-07-31 06:24:19 +08:00
|
|
|
if (!tipc_msg_extract(skb, &iskb, &ipos))
|
|
|
|
return rc;
|
|
|
|
kfree_skb(skb);
|
|
|
|
skb = iskb;
|
|
|
|
hdr = buf_msg(skb);
|
|
|
|
if (less(msg_seqno(hdr), l->drop_point))
|
|
|
|
goto drop;
|
2015-07-31 06:24:25 +08:00
|
|
|
if (tipc_data_input(l, skb, inputq))
|
2015-07-31 06:24:19 +08:00
|
|
|
return rc;
|
|
|
|
usr = msg_user(hdr);
|
|
|
|
reasm_skb = &l->failover_reasm_skb;
|
|
|
|
}
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
if (usr == MSG_BUNDLER) {
|
2015-10-16 02:52:40 +08:00
|
|
|
skb_queue_head_init(&tmpq);
|
2015-07-31 06:24:19 +08:00
|
|
|
l->stats.recv_bundles++;
|
|
|
|
l->stats.recv_bundled += msg_msgcnt(hdr);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
while (tipc_msg_extract(skb, &iskb, &pos))
|
2015-10-16 02:52:40 +08:00
|
|
|
tipc_data_input(l, iskb, &tmpq);
|
|
|
|
tipc_skb_queue_splice_tail(&tmpq, inputq);
|
2015-07-31 06:24:21 +08:00
|
|
|
return 0;
|
2015-07-31 06:24:19 +08:00
|
|
|
} else if (usr == MSG_FRAGMENTER) {
|
|
|
|
l->stats.recv_fragments++;
|
|
|
|
if (tipc_buf_append(reasm_skb, &skb)) {
|
|
|
|
l->stats.recv_fragmented++;
|
2015-07-31 06:24:25 +08:00
|
|
|
tipc_data_input(l, skb, inputq);
|
2015-07-31 06:24:19 +08:00
|
|
|
} else if (!*reasm_skb) {
|
2015-07-31 06:24:21 +08:00
|
|
|
return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
}
|
2015-07-31 06:24:21 +08:00
|
|
|
return 0;
|
2015-07-31 06:24:19 +08:00
|
|
|
} else if (usr == BCAST_PROTOCOL) {
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
tipc_link_sync_rcv(node, skb);
|
2015-07-31 06:24:21 +08:00
|
|
|
return 0;
|
2015-07-31 06:24:19 +08:00
|
|
|
}
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
2015-07-31 06:24:21 +08:00
|
|
|
return 0;
|
2014-07-01 16:22:40 +08:00
|
|
|
}
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
|
|
|
|
{
|
|
|
|
bool released = false;
|
|
|
|
struct sk_buff *skb, *tmp;
|
|
|
|
|
|
|
|
skb_queue_walk_safe(&l->transmq, skb, tmp) {
|
|
|
|
if (more(buf_seqno(skb), acked))
|
|
|
|
break;
|
|
|
|
__skb_unlink(skb, &l->transmq);
|
|
|
|
kfree_skb(skb);
|
|
|
|
released = true;
|
|
|
|
}
|
|
|
|
return released;
|
|
|
|
}
|
|
|
|
|
2015-10-16 02:52:41 +08:00
|
|
|
/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission
|
|
|
|
*/
|
|
|
|
void tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
l->rcv_unacked = 0;
|
|
|
|
l->stats.sent_acks++;
|
|
|
|
tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
|
|
|
|
}
|
|
|
|
|
2015-10-16 02:52:45 +08:00
|
|
|
/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message
|
|
|
|
*/
|
|
|
|
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
int mtyp = RESET_MSG;
|
|
|
|
|
|
|
|
if (l->state == LINK_ESTABLISHING)
|
|
|
|
mtyp = ACTIVATE_MSG;
|
|
|
|
|
|
|
|
tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
|
|
|
|
}
|
|
|
|
|
2015-10-16 02:52:41 +08:00
|
|
|
/* tipc_link_build_nack_msg: prepare link nack message for transmission
|
|
|
|
*/
|
|
|
|
static void tipc_link_build_nack_msg(struct tipc_link *l,
|
|
|
|
struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
u32 def_cnt = ++l->stats.deferred_recv;
|
|
|
|
|
|
|
|
if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
|
|
|
|
tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
|
|
|
|
}
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
|
2015-10-16 02:52:41 +08:00
|
|
|
* @l: the link that should handle the message
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
* @skb: TIPC packet
|
|
|
|
* @xmitq: queue to place packets to be sent after this call
|
|
|
|
*/
|
|
|
|
int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *xmitq)
|
|
|
|
{
|
2015-10-16 02:52:41 +08:00
|
|
|
struct sk_buff_head *defq = &l->deferdq;
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
struct tipc_msg *hdr;
|
tipc: improve sequence number checking
The sequence number of an incoming packet is currently only checked
for less than, equality to, or bigger than the next expected number,
meaning that the receive window in practice becomes one half sequence
number cycle, or U16_MAX/2. This does not make sense, and may not even
be safe if there are extreme delays in the network. Any packet sent by
the peer during the ongoing cycle must belong inside his current send
window, or should otherwise be dropped if possible.
Since a link endpoint cannot know its peer's current send window, it
has to base this sanity check on a worst-case assumption, i.e., that
the peer is using a maximum sized window of 8191 packets. Using this
assumption, we now add a check that the sequence number is not bigger
than next_expected + TIPC_MAX_LINK_WIN. We also re-order the checks
done, so that the receive window test is performed before the gap test.
This way, we are guaranteed that no packet with illegal sequence numbers
are ever added to the deferred queue.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:42 +08:00
|
|
|
u16 seqno, rcv_nxt, win_lim;
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
int rc = 0;
|
|
|
|
|
2015-10-16 02:52:41 +08:00
|
|
|
do {
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
hdr = buf_msg(skb);
|
2015-10-16 02:52:41 +08:00
|
|
|
seqno = msg_seqno(hdr);
|
|
|
|
rcv_nxt = l->rcv_nxt;
|
tipc: improve sequence number checking
The sequence number of an incoming packet is currently only checked
for less than, equality to, or bigger than the next expected number,
meaning that the receive window in practice becomes one half sequence
number cycle, or U16_MAX/2. This does not make sense, and may not even
be safe if there are extreme delays in the network. Any packet sent by
the peer during the ongoing cycle must belong inside his current send
window, or should otherwise be dropped if possible.
Since a link endpoint cannot know its peer's current send window, it
has to base this sanity check on a worst-case assumption, i.e., that
the peer is using a maximum sized window of 8191 packets. Using this
assumption, we now add a check that the sequence number is not bigger
than next_expected + TIPC_MAX_LINK_WIN. We also re-order the checks
done, so that the receive window test is performed before the gap test.
This way, we are guaranteed that no packet with illegal sequence numbers
are ever added to the deferred queue.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:42 +08:00
|
|
|
win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
|
|
|
|
/* Verify and update link state */
|
2015-10-16 02:52:41 +08:00
|
|
|
if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
|
|
|
|
return tipc_link_proto_rcv(l, skb, xmitq);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
if (unlikely(!link_is_up(l))) {
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
if (l->state == LINK_ESTABLISHING)
|
|
|
|
rc = TIPC_LINK_UP_EVT;
|
|
|
|
goto drop;
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
}
|
|
|
|
|
2015-10-16 02:52:41 +08:00
|
|
|
/* Don't send probe at next timeout expiration */
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
l->silent_intv_cnt = 0;
|
|
|
|
|
tipc: improve sequence number checking
The sequence number of an incoming packet is currently only checked
for less than, equality to, or bigger than the next expected number,
meaning that the receive window in practice becomes one half sequence
number cycle, or U16_MAX/2. This does not make sense, and may not even
be safe if there are extreme delays in the network. Any packet sent by
the peer during the ongoing cycle must belong inside his current send
window, or should otherwise be dropped if possible.
Since a link endpoint cannot know its peer's current send window, it
has to base this sanity check on a worst-case assumption, i.e., that
the peer is using a maximum sized window of 8191 packets. Using this
assumption, we now add a check that the sequence number is not bigger
than next_expected + TIPC_MAX_LINK_WIN. We also re-order the checks
done, so that the receive window test is performed before the gap test.
This way, we are guaranteed that no packet with illegal sequence numbers
are ever added to the deferred queue.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:42 +08:00
|
|
|
/* Drop if outside receive window */
|
|
|
|
if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
|
|
|
|
l->stats.duplicates++;
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
/* Forward queues and wake up waiting users */
|
|
|
|
if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
|
|
|
|
tipc_link_advance_backlog(l, xmitq);
|
|
|
|
if (unlikely(!skb_queue_empty(&l->wakeupq)))
|
|
|
|
link_prepare_wakeup(l);
|
|
|
|
}
|
|
|
|
|
tipc: improve sequence number checking
The sequence number of an incoming packet is currently only checked
for less than, equality to, or bigger than the next expected number,
meaning that the receive window in practice becomes one half sequence
number cycle, or U16_MAX/2. This does not make sense, and may not even
be safe if there are extreme delays in the network. Any packet sent by
the peer during the ongoing cycle must belong inside his current send
window, or should otherwise be dropped if possible.
Since a link endpoint cannot know its peer's current send window, it
has to base this sanity check on a worst-case assumption, i.e., that
the peer is using a maximum sized window of 8191 packets. Using this
assumption, we now add a check that the sequence number is not bigger
than next_expected + TIPC_MAX_LINK_WIN. We also re-order the checks
done, so that the receive window test is performed before the gap test.
This way, we are guaranteed that no packet with illegal sequence numbers
are ever added to the deferred queue.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:42 +08:00
|
|
|
/* Defer delivery if sequence gap */
|
|
|
|
if (unlikely(seqno != rcv_nxt)) {
|
2015-10-16 02:52:43 +08:00
|
|
|
__tipc_skb_queue_sorted(defq, seqno, skb);
|
2015-10-16 02:52:41 +08:00
|
|
|
tipc_link_build_nack_msg(l, xmitq);
|
|
|
|
break;
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
}
|
|
|
|
|
tipc: improve sequence number checking
The sequence number of an incoming packet is currently only checked
for less than, equality to, or bigger than the next expected number,
meaning that the receive window in practice becomes one half sequence
number cycle, or U16_MAX/2. This does not make sense, and may not even
be safe if there are extreme delays in the network. Any packet sent by
the peer during the ongoing cycle must belong inside his current send
window, or should otherwise be dropped if possible.
Since a link endpoint cannot know its peer's current send window, it
has to base this sanity check on a worst-case assumption, i.e., that
the peer is using a maximum sized window of 8191 packets. Using this
assumption, we now add a check that the sequence number is not bigger
than next_expected + TIPC_MAX_LINK_WIN. We also re-order the checks
done, so that the receive window test is performed before the gap test.
This way, we are guaranteed that no packet with illegal sequence numbers
are ever added to the deferred queue.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:42 +08:00
|
|
|
/* Deliver packet */
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
l->rcv_nxt++;
|
|
|
|
l->stats.recv_info++;
|
2015-10-16 02:52:41 +08:00
|
|
|
if (!tipc_data_input(l, skb, l->inputq))
|
2015-10-16 02:52:40 +08:00
|
|
|
rc = tipc_link_input(l, skb, l->inputq);
|
tipc: improve sequence number checking
The sequence number of an incoming packet is currently only checked
for less than, equality to, or bigger than the next expected number,
meaning that the receive window in practice becomes one half sequence
number cycle, or U16_MAX/2. This does not make sense, and may not even
be safe if there are extreme delays in the network. Any packet sent by
the peer during the ongoing cycle must belong inside his current send
window, or should otherwise be dropped if possible.
Since a link endpoint cannot know its peer's current send window, it
has to base this sanity check on a worst-case assumption, i.e., that
the peer is using a maximum sized window of 8191 packets. Using this
assumption, we now add a check that the sequence number is not bigger
than next_expected + TIPC_MAX_LINK_WIN. We also re-order the checks
done, so that the receive window test is performed before the gap test.
This way, we are guaranteed that no packet with illegal sequence numbers
are ever added to the deferred queue.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:42 +08:00
|
|
|
if (unlikely(rc))
|
2015-10-16 02:52:41 +08:00
|
|
|
break;
|
|
|
|
if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
|
|
|
|
tipc_link_build_ack_msg(l, xmitq);
|
|
|
|
|
|
|
|
} while ((skb = __skb_dequeue(defq)));
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2012-07-10 18:55:09 +08:00
|
|
|
/**
|
2011-10-25 22:44:35 +08:00
|
|
|
* tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue
|
|
|
|
*
|
|
|
|
* Returns increase in queue length (i.e. 0 or 1)
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2014-11-26 11:41:53 +08:00
|
|
|
u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2014-11-26 11:41:53 +08:00
|
|
|
struct sk_buff *skb1;
|
2015-05-14 22:46:14 +08:00
|
|
|
u16 seq_no = buf_seqno(skb);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Empty queue ? */
|
2014-11-26 11:41:53 +08:00
|
|
|
if (skb_queue_empty(list)) {
|
|
|
|
__skb_queue_tail(list, skb);
|
2006-01-03 02:04:38 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Last ? */
|
2014-11-26 11:41:53 +08:00
|
|
|
if (less(buf_seqno(skb_peek_tail(list)), seq_no)) {
|
|
|
|
__skb_queue_tail(list, skb);
|
2006-01-03 02:04:38 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2011-10-25 22:44:35 +08:00
|
|
|
/* Locate insertion point in queue, then insert; discard if duplicate */
|
2014-11-26 11:41:53 +08:00
|
|
|
skb_queue_walk(list, skb1) {
|
2015-05-14 22:46:14 +08:00
|
|
|
u16 curr_seqno = buf_seqno(skb1);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2011-10-25 22:44:35 +08:00
|
|
|
if (seq_no == curr_seqno) {
|
2014-11-26 11:41:53 +08:00
|
|
|
kfree_skb(skb);
|
2011-10-25 22:44:35 +08:00
|
|
|
return 0;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2011-10-25 22:44:35 +08:00
|
|
|
|
|
|
|
if (less(seq_no, curr_seqno))
|
2006-01-03 02:04:38 +08:00
|
|
|
break;
|
2011-10-25 22:44:35 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2014-11-26 11:41:53 +08:00
|
|
|
__skb_queue_before(list, skb1, skb);
|
2011-10-25 22:44:35 +08:00
|
|
|
return 1;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Send protocol message to the other endpoint.
|
|
|
|
*/
|
2015-07-17 04:54:26 +08:00
|
|
|
void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg,
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 21:33:02 +08:00
|
|
|
u32 gap, u32 tolerance, u32 priority)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-07-17 04:54:26 +08:00
|
|
|
struct sk_buff *skb = NULL;
|
|
|
|
struct sk_buff_head xmitq;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-07-17 04:54:26 +08:00
|
|
|
__skb_queue_head_init(&xmitq);
|
|
|
|
tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap,
|
|
|
|
tolerance, priority, &xmitq);
|
|
|
|
skb = __skb_dequeue(&xmitq);
|
|
|
|
if (!skb)
|
2006-01-03 02:04:38 +08:00
|
|
|
return;
|
2015-07-31 06:24:26 +08:00
|
|
|
tipc_bearer_send(l->owner->net, l->bearer_id, skb, l->media_addr);
|
2015-07-17 04:54:26 +08:00
|
|
|
l->rcv_unacked = 0;
|
|
|
|
kfree_skb(skb);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-07-17 04:54:26 +08:00
|
|
|
/* tipc_link_build_proto_msg: prepare link protocol message for transmission
|
|
|
|
*/
|
|
|
|
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
|
|
|
|
u16 rcvgap, int tolerance, int priority,
|
|
|
|
struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb = NULL;
|
|
|
|
struct tipc_msg *hdr = l->pmsg;
|
|
|
|
u16 snd_nxt = l->snd_nxt;
|
|
|
|
u16 rcv_nxt = l->rcv_nxt;
|
|
|
|
u16 rcv_last = rcv_nxt - 1;
|
|
|
|
int node_up = l->owner->bclink.recv_permitted;
|
|
|
|
|
|
|
|
/* Don't send protocol message during reset or link failover */
|
2015-07-31 06:24:21 +08:00
|
|
|
if (tipc_link_is_blocked(l))
|
2015-07-17 04:54:26 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
msg_set_type(hdr, mtyp);
|
|
|
|
msg_set_net_plane(hdr, l->net_plane);
|
|
|
|
msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
|
|
|
|
msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net));
|
|
|
|
msg_set_link_tolerance(hdr, tolerance);
|
|
|
|
msg_set_linkprio(hdr, priority);
|
|
|
|
msg_set_redundant_link(hdr, node_up);
|
|
|
|
msg_set_seq_gap(hdr, 0);
|
|
|
|
|
|
|
|
/* Compatibility: created msg must not be in sequence with pkt flow */
|
|
|
|
msg_set_seqno(hdr, snd_nxt + U16_MAX / 2);
|
|
|
|
|
|
|
|
if (mtyp == STATE_MSG) {
|
|
|
|
if (!tipc_link_is_up(l))
|
|
|
|
return;
|
|
|
|
msg_set_next_sent(hdr, snd_nxt);
|
|
|
|
|
|
|
|
/* Override rcvgap if there are packets in deferred queue */
|
|
|
|
if (!skb_queue_empty(&l->deferdq))
|
|
|
|
rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt;
|
|
|
|
if (rcvgap) {
|
|
|
|
msg_set_seq_gap(hdr, rcvgap);
|
|
|
|
l->stats.sent_nacks++;
|
|
|
|
}
|
|
|
|
msg_set_ack(hdr, rcv_last);
|
|
|
|
msg_set_probe(hdr, probe);
|
|
|
|
if (probe)
|
|
|
|
l->stats.sent_probes++;
|
|
|
|
l->stats.sent_states++;
|
|
|
|
} else {
|
|
|
|
/* RESET_MSG or ACTIVATE_MSG */
|
|
|
|
msg_set_max_pkt(hdr, l->advertised_mtu);
|
2015-07-31 06:24:19 +08:00
|
|
|
msg_set_ack(hdr, l->rcv_nxt - 1);
|
2015-07-17 04:54:26 +08:00
|
|
|
msg_set_next_sent(hdr, 1);
|
|
|
|
}
|
|
|
|
skb = tipc_buf_acquire(msg_size(hdr));
|
|
|
|
if (!skb)
|
|
|
|
return;
|
|
|
|
skb_copy_to_linear_data(skb, hdr, msg_size(hdr));
|
|
|
|
skb->priority = TC_PRIO_CONTROL;
|
2015-07-31 06:24:19 +08:00
|
|
|
__skb_queue_tail(xmitq, skb);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
|
2015-10-16 02:52:41 +08:00
|
|
|
* with contents of the link's transmit and backlog queues.
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2015-07-31 06:24:19 +08:00
|
|
|
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
|
|
|
|
int mtyp, struct sk_buff_head *xmitq)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-07-31 06:24:19 +08:00
|
|
|
struct sk_buff *skb, *tnlskb;
|
|
|
|
struct tipc_msg *hdr, tnlhdr;
|
|
|
|
struct sk_buff_head *queue = &l->transmq;
|
|
|
|
struct sk_buff_head tmpxq, tnlq;
|
|
|
|
u16 pktlen, pktcnt, seqno = l->snd_nxt;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
if (!tnl)
|
2006-01-03 02:04:38 +08:00
|
|
|
return;
|
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
skb_queue_head_init(&tnlq);
|
|
|
|
skb_queue_head_init(&tmpxq);
|
2015-05-14 22:46:18 +08:00
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
/* At least one packet required for safe algorithm => add dummy */
|
|
|
|
skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
|
|
|
|
BASIC_H_SIZE, 0, l->addr, link_own_addr(l),
|
|
|
|
0, 0, TIPC_ERR_NO_PORT);
|
|
|
|
if (!skb) {
|
|
|
|
pr_warn("%sunable to create tunnel packet\n", link_co_err);
|
2006-01-03 02:04:38 +08:00
|
|
|
return;
|
|
|
|
}
|
2015-07-31 06:24:19 +08:00
|
|
|
skb_queue_tail(&tnlq, skb);
|
|
|
|
tipc_link_xmit(l, &tnlq, &tmpxq);
|
|
|
|
__skb_queue_purge(&tmpxq);
|
|
|
|
|
|
|
|
/* Initialize reusable tunnel packet header */
|
|
|
|
tipc_msg_init(link_own_addr(l), &tnlhdr, TUNNEL_PROTOCOL,
|
|
|
|
mtyp, INT_H_SIZE, l->addr);
|
|
|
|
pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq);
|
|
|
|
msg_set_msgcnt(&tnlhdr, pktcnt);
|
|
|
|
msg_set_bearer_id(&tnlhdr, l->peer_bearer_id);
|
|
|
|
tnl:
|
|
|
|
/* Wrap each packet into a tunnel packet */
|
2015-03-14 04:08:10 +08:00
|
|
|
skb_queue_walk(queue, skb) {
|
2015-07-31 06:24:19 +08:00
|
|
|
hdr = buf_msg(skb);
|
|
|
|
if (queue == &l->backlogq)
|
|
|
|
msg_set_seqno(hdr, seqno++);
|
|
|
|
pktlen = msg_size(hdr);
|
|
|
|
msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
|
|
|
|
tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE);
|
|
|
|
if (!tnlskb) {
|
|
|
|
pr_warn("%sunable to send packet\n", link_co_err);
|
2006-01-03 02:04:38 +08:00
|
|
|
return;
|
|
|
|
}
|
2015-07-31 06:24:19 +08:00
|
|
|
skb_copy_to_linear_data(tnlskb, &tnlhdr, INT_H_SIZE);
|
|
|
|
skb_copy_to_linear_data_offset(tnlskb, INT_H_SIZE, hdr, pktlen);
|
|
|
|
__skb_queue_tail(&tnlq, tnlskb);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2015-07-31 06:24:19 +08:00
|
|
|
if (queue != &l->backlogq) {
|
|
|
|
queue = &l->backlogq;
|
|
|
|
goto tnl;
|
tipc: change reception of tunnelled failover packets
When a link is reset, and there is a redundant link available, all
sender sockets will steer their subsequent traffic through the
remaining link. In order to guarantee preserved packet order and
cardinality during the transition, we tunnel the failing link's send
queue through the remaining link before we allow any sockets to use it.
In this commit, we change the algorithm for receiving failover
("ORIGINAL_MSG") packets in tipc_link_tunnel_rcv(), at the same time
delegating it to a new subfuncton, tipc_link_failover_rcv(). Instead
of directly returning an extracted inner packet to the packet reception
loop in tipc_rcv(), we first check if it is a message fragment, in which
case we append it to the reset link's fragment chain. If the fragment
chain is complete, we return the whole chain instead of the individual
buffer, eliminating any need for the tipc_rcv() loop to do reassembly of
tunneled packets.
This change makes it possible to further simplify tipc_link_tunnel_rcv(),
as well as the calling tipc_rcv() loop. We will do that in later
commits. It also makes it possible to identify a single spot in the code
where we can tell that a failover procedure is finished, something that
is useful when we are deleting links after a failover. This will also
be done in a later commit.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:11 +08:00
|
|
|
}
|
tipc: change reception of tunnelled duplicate packets
When a second link to a destination comes up, some sender sockets will
steer their subsequent traffic through the new link. In order to
guarantee preserved packet order and cardinality for those sockets, we
tunnel a duplicate of the old link's send queue through the new link
before we open it for regular traffic. The last arriving packet copy,
on whichever link, will be dropped at the receiving end based on the
original sequence number, to ensure that only one copy is delivered to
the end receiver.
In this commit, we change the algorithm for receiving DUPLICATE_MSG
packets, at the same time delegating it to a new subfunction,
tipc_link_dup_rcv(). Instead of returning an extracted inner packet to
the packet reception loop in tipc_rcv(), we just add it to the receiving
(new) link's deferred packet queue. The packet will then be processed by
that link when it receives its first non-tunneled packet, i.e., at
latest when the changeover procedure is finished.
Because tipc_link_tunnel_rcv()/tipc_link_dup_rcv() now is consuming all
packets of type DUPLICATE_MSG, the calling tipc_rcv() function can omit
testing for this. This in turn means that the current conditional jump
to the label 'protocol_check' becomes redundant, and we can remove that
label.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:10 +08:00
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
tipc_link_xmit(tnl, &tnlq, xmitq);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-07-31 06:24:19 +08:00
|
|
|
if (mtyp == FAILOVER_MSG) {
|
|
|
|
tnl->drop_point = l->rcv_nxt;
|
|
|
|
tnl->failover_reasm_skb = l->reasm_buf;
|
|
|
|
l->reasm_buf = NULL;
|
2015-04-02 21:33:00 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
/* tipc_link_proto_rcv(): receive link level protocol message :
|
|
|
|
* Note that network plane id propagates through the network, and may
|
|
|
|
* change at any time. The node with lowest numerical id determines
|
|
|
|
* network plane
|
|
|
|
*/
|
|
|
|
static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *xmitq)
|
|
|
|
{
|
|
|
|
struct tipc_msg *hdr = buf_msg(skb);
|
|
|
|
u16 rcvgap = 0;
|
|
|
|
u16 nacked_gap = msg_seq_gap(hdr);
|
|
|
|
u16 peers_snd_nxt = msg_next_sent(hdr);
|
|
|
|
u16 peers_tol = msg_link_tolerance(hdr);
|
|
|
|
u16 peers_prio = msg_linkprio(hdr);
|
tipc: fix stale link problem during synchronization
Recent changes to the link synchronization means that we can now just
drop packets arriving on the synchronizing link before the synch point
is reached. This has lead to significant simplifications to the
implementation, but also turns out to have a flip side that we need
to consider.
Under unlucky circumstances, the two endpoints may end up
repeatedly dropping each other's packets, while immediately
asking for retransmission of the same packets, just to drop
them once more. This pattern will eventually be broken when
the synch point is reached on the other link, but before that,
the endpoints may have arrived at the retransmission limit
(stale counter) that indicates that the link should be broken.
We see this happen at rare occasions.
The fix for this is to not ask for retransmissions when a link is in
state LINK_SYNCHING. The fact that the link has reached this state
means that it has already received the first SYNCH packet, and that it
knows the synch point. Hence, it doesn't need any more packets until the
other link has reached the synch point, whereafter it can go ahead and
ask for the missing packets.
However, because of the reduced traffic on the synching link that
follows this change, it may now take longer to discover that the
synch point has been reached. We compensate for this by letting all
packets, on any of the links, trig a check for synchronization
termination. This is possible because the packets themselves don't
contain any information that is needed for discovering this condition.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-20 14:12:56 +08:00
|
|
|
u16 rcv_nxt = l->rcv_nxt;
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
int mtyp = msg_type(hdr);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
char *if_name;
|
|
|
|
int rc = 0;
|
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
if (tipc_link_is_blocked(l))
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
goto exit;
|
|
|
|
|
|
|
|
if (link_own_addr(l) > msg_prevnode(hdr))
|
|
|
|
l->net_plane = msg_net_plane(hdr);
|
|
|
|
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
switch (mtyp) {
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
case RESET_MSG:
|
|
|
|
|
|
|
|
/* Ignore duplicate RESET with old session number */
|
|
|
|
if ((less_eq(msg_session(hdr), l->peer_session)) &&
|
|
|
|
(l->peer_session != WILDCARD_SESSION))
|
|
|
|
break;
|
|
|
|
/* fall thru' */
|
2015-07-31 06:24:21 +08:00
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
case ACTIVATE_MSG:
|
|
|
|
|
|
|
|
/* Complete own link name with peer's interface name */
|
|
|
|
if_name = strrchr(l->name, ':') + 1;
|
|
|
|
if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
|
|
|
|
break;
|
|
|
|
if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
|
|
|
|
break;
|
|
|
|
strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME);
|
|
|
|
|
|
|
|
/* Update own tolerance if peer indicates a non-zero value */
|
|
|
|
if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
|
|
|
|
l->tolerance = peers_tol;
|
|
|
|
|
|
|
|
/* Update own priority if peer's priority is higher */
|
|
|
|
if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
|
|
|
|
l->priority = peers_prio;
|
|
|
|
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
/* ACTIVATE_MSG serves as PEER_RESET if link is already down */
|
|
|
|
if ((mtyp == RESET_MSG) || !link_is_up(l))
|
|
|
|
rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
|
|
|
|
|
|
|
|
/* ACTIVATE_MSG takes up link if it was already locally reset */
|
|
|
|
if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
|
|
|
|
rc = TIPC_LINK_UP_EVT;
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
l->peer_session = msg_session(hdr);
|
|
|
|
l->peer_bearer_id = msg_bearer_id(hdr);
|
|
|
|
if (l->mtu > msg_max_pkt(hdr))
|
|
|
|
l->mtu = msg_max_pkt(hdr);
|
|
|
|
break;
|
2015-07-31 06:24:21 +08:00
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
case STATE_MSG:
|
2015-07-31 06:24:21 +08:00
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
/* Update own tolerance if peer indicates a non-zero value */
|
|
|
|
if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
|
|
|
|
l->tolerance = peers_tol;
|
|
|
|
|
|
|
|
l->silent_intv_cnt = 0;
|
|
|
|
l->stats.recv_states++;
|
|
|
|
if (msg_probe(hdr))
|
|
|
|
l->stats.recv_probes++;
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
|
|
|
|
if (!link_is_up(l)) {
|
|
|
|
if (l->state == LINK_ESTABLISHING)
|
|
|
|
rc = TIPC_LINK_UP_EVT;
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
break;
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-16 02:52:44 +08:00
|
|
|
}
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
/* Send NACK if peer has sent pkts we haven't received yet */
|
tipc: fix stale link problem during synchronization
Recent changes to the link synchronization means that we can now just
drop packets arriving on the synchronizing link before the synch point
is reached. This has lead to significant simplifications to the
implementation, but also turns out to have a flip side that we need
to consider.
Under unlucky circumstances, the two endpoints may end up
repeatedly dropping each other's packets, while immediately
asking for retransmission of the same packets, just to drop
them once more. This pattern will eventually be broken when
the synch point is reached on the other link, but before that,
the endpoints may have arrived at the retransmission limit
(stale counter) that indicates that the link should be broken.
We see this happen at rare occasions.
The fix for this is to not ask for retransmissions when a link is in
state LINK_SYNCHING. The fact that the link has reached this state
means that it has already received the first SYNCH packet, and that it
knows the synch point. Hence, it doesn't need any more packets until the
other link has reached the synch point, whereafter it can go ahead and
ask for the missing packets.
However, because of the reduced traffic on the synching link that
follows this change, it may now take longer to discover that the
synch point has been reached. We compensate for this by letting all
packets, on any of the links, trig a check for synchronization
termination. This is possible because the packets themselves don't
contain any information that is needed for discovering this condition.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-20 14:12:56 +08:00
|
|
|
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
rcvgap = peers_snd_nxt - l->rcv_nxt;
|
|
|
|
if (rcvgap || (msg_probe(hdr)))
|
|
|
|
tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
|
2015-07-21 18:42:28 +08:00
|
|
|
0, 0, xmitq);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
tipc_link_release_pkts(l, msg_ack(hdr));
|
|
|
|
|
|
|
|
/* If NACK, retransmit will now start at right position */
|
|
|
|
if (nacked_gap) {
|
2015-07-31 06:24:19 +08:00
|
|
|
rc = tipc_link_retransm(l, nacked_gap, xmitq);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
l->stats.recv_nacks++;
|
|
|
|
}
|
2015-07-31 06:24:21 +08:00
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 04:54:31 +08:00
|
|
|
tipc_link_advance_backlog(l, xmitq);
|
|
|
|
if (unlikely(!skb_queue_empty(&l->wakeupq)))
|
|
|
|
link_prepare_wakeup(l);
|
|
|
|
}
|
|
|
|
exit:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
tipc: clean up handling of message priorities
Messages transferred by TIPC are assigned an "importance priority", -an
integer value indicating how to treat the message when there is link or
destination socket congestion.
There is no separate header field for this value. Instead, the message
user values have been chosen in ascending order according to perceived
importance, so that the message user field can be used for this.
This is not a good solution. First, we have many more users than the
needed priority levels, so we end up with treating more priority
levels than necessary. Second, the user field cannot always
accurately reflect the priority of the message. E.g., a message
fragment packet should really have the priority of the enveloped
user data message, and not the priority of the MSG_FRAGMENTER user.
Until now, we have been working around this problem in different ways,
but it is now time to implement a consistent way of handling such
priorities, although still within the constraint that we cannot
allocate any more bits in the regular data message header for this.
In this commit, we define a new priority level, TIPC_SYSTEM_IMPORTANCE,
that will be the only one used apart from the four (lower) user data
levels. All non-data messages map down to this priority. Furthermore,
we take some free bits from the MSG_FRAGMENTER header and allocate
them to store the priority of the enveloped message. We then adjust
the functions msg_importance()/msg_set_importance() so that they
read/set the correct header fields depending on user type.
This small protocol change is fully compatible, because the code at
the receiving end of a link currently reads the importance level
only from user data messages, where there is no change.
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-14 04:08:11 +08:00
|
|
|
void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 21:33:02 +08:00
|
|
|
int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
|
tipc: clean up handling of message priorities
Messages transferred by TIPC are assigned an "importance priority", -an
integer value indicating how to treat the message when there is link or
destination socket congestion.
There is no separate header field for this value. Instead, the message
user values have been chosen in ascending order according to perceived
importance, so that the message user field can be used for this.
This is not a good solution. First, we have many more users than the
needed priority levels, so we end up with treating more priority
levels than necessary. Second, the user field cannot always
accurately reflect the priority of the message. E.g., a message
fragment packet should really have the priority of the enveloped
user data message, and not the priority of the MSG_FRAGMENTER user.
Until now, we have been working around this problem in different ways,
but it is now time to implement a consistent way of handling such
priorities, although still within the constraint that we cannot
allocate any more bits in the regular data message header for this.
In this commit, we define a new priority level, TIPC_SYSTEM_IMPORTANCE,
that will be the only one used apart from the four (lower) user data
levels. All non-data messages map down to this priority. Furthermore,
we take some free bits from the MSG_FRAGMENTER header and allocate
them to store the priority of the enveloped message. We then adjust
the functions msg_importance()/msg_set_importance() so that they
read/set the correct header fields depending on user type.
This small protocol change is fully compatible, because the code at
the receiving end of a link currently reads the importance level
only from user data messages, where there is no change.
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-14 04:08:11 +08:00
|
|
|
|
|
|
|
l->window = win;
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2;
|
|
|
|
l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win;
|
|
|
|
l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3;
|
|
|
|
l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2;
|
|
|
|
l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2014-02-14 06:29:18 +08:00
|
|
|
/* tipc_link_find_owner - locate owner node of link by link's name
|
2015-01-09 15:27:05 +08:00
|
|
|
* @net: the applicable net namespace
|
2014-02-14 06:29:18 +08:00
|
|
|
* @name: pointer to link name string
|
|
|
|
* @bearer_id: pointer to index in 'node->links' array where the link was found.
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2014-02-14 06:29:18 +08:00
|
|
|
* Returns pointer to node owning the link, or 0 if no matching link is found.
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2015-01-09 15:27:05 +08:00
|
|
|
static struct tipc_node *tipc_link_find_owner(struct net *net,
|
|
|
|
const char *link_name,
|
2014-02-14 06:29:18 +08:00
|
|
|
unsigned int *bearer_id)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link *l_ptr;
|
2013-10-18 13:23:21 +08:00
|
|
|
struct tipc_node *n_ptr;
|
2014-12-25 19:05:50 +08:00
|
|
|
struct tipc_node *found_node = NULL;
|
2013-10-18 13:23:21 +08:00
|
|
|
int i;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2014-02-14 06:29:18 +08:00
|
|
|
*bearer_id = 0;
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_lock();
|
2015-01-09 15:27:05 +08:00
|
|
|
list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
|
2014-02-15 05:40:44 +08:00
|
|
|
tipc_node_lock(n_ptr);
|
2013-10-18 13:23:21 +08:00
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
2015-07-17 04:54:19 +08:00
|
|
|
l_ptr = n_ptr->links[i].link;
|
2014-02-14 06:29:18 +08:00
|
|
|
if (l_ptr && !strcmp(l_ptr->name, link_name)) {
|
|
|
|
*bearer_id = i;
|
|
|
|
found_node = n_ptr;
|
|
|
|
break;
|
|
|
|
}
|
2013-10-18 13:23:21 +08:00
|
|
|
}
|
2014-02-15 05:40:44 +08:00
|
|
|
tipc_node_unlock(n_ptr);
|
2014-02-14 06:29:18 +08:00
|
|
|
if (found_node)
|
|
|
|
break;
|
2013-10-18 13:23:21 +08:00
|
|
|
}
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_unlock();
|
|
|
|
|
2014-02-14 06:29:18 +08:00
|
|
|
return found_node;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* link_reset_statistics - reset link statistics
|
|
|
|
* @l_ptr: pointer to link
|
|
|
|
*/
|
2011-12-30 09:58:42 +08:00
|
|
|
static void link_reset_statistics(struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
|
|
|
memset(&l_ptr->stats, 0, sizeof(l_ptr->stats));
|
2015-05-14 22:46:15 +08:00
|
|
|
l_ptr->stats.sent_info = l_ptr->snd_nxt;
|
|
|
|
l_ptr->stats.recv_info = l_ptr->rcv_nxt;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-07-17 04:54:30 +08:00
|
|
|
static void link_print(struct tipc_link *l, const char *str)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-07-17 04:54:30 +08:00
|
|
|
struct sk_buff *hskb = skb_peek(&l->transmq);
|
|
|
|
u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt;
|
|
|
|
u16 tail = l->snd_nxt - 1;
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 10:55:46 +08:00
|
|
|
|
2015-07-31 06:24:21 +08:00
|
|
|
pr_info("%s Link <%s> state %x\n", str, l->name, l->state);
|
2015-07-17 04:54:30 +08:00
|
|
|
pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n",
|
|
|
|
skb_queue_len(&l->transmq), head, tail,
|
|
|
|
skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2014-11-20 17:29:07 +08:00
|
|
|
|
|
|
|
/* Parse and validate nested (link) properties valid for media, bearer and link
|
|
|
|
*/
|
|
|
|
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[])
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop,
|
|
|
|
tipc_nl_prop_policy);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (props[TIPC_NLA_PROP_PRIO]) {
|
|
|
|
u32 prio;
|
|
|
|
|
|
|
|
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
|
|
|
|
if (prio > TIPC_MAX_LINK_PRI)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (props[TIPC_NLA_PROP_TOL]) {
|
|
|
|
u32 tol;
|
|
|
|
|
|
|
|
tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
|
|
|
|
if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL))
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (props[TIPC_NLA_PROP_WIN]) {
|
|
|
|
u32 win;
|
|
|
|
|
|
|
|
win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
|
|
|
|
if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN))
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2014-11-20 17:29:12 +08:00
|
|
|
|
2014-11-20 17:29:13 +08:00
|
|
|
int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
int res = 0;
|
|
|
|
int bearer_id;
|
|
|
|
char *name;
|
|
|
|
struct tipc_link *link;
|
|
|
|
struct tipc_node *node;
|
|
|
|
struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
|
2015-02-09 16:50:08 +08:00
|
|
|
struct net *net = sock_net(skb->sk);
|
2014-11-20 17:29:13 +08:00
|
|
|
|
|
|
|
if (!info->attrs[TIPC_NLA_LINK])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
|
|
|
|
info->attrs[TIPC_NLA_LINK],
|
|
|
|
tipc_nl_link_policy);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (!attrs[TIPC_NLA_LINK_NAME])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
|
|
|
|
|
2015-05-06 19:58:55 +08:00
|
|
|
if (strcmp(name, tipc_bclink_name) == 0)
|
|
|
|
return tipc_nl_bc_link_set(net, attrs);
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
node = tipc_link_find_owner(net, name, &bearer_id);
|
2014-11-20 17:29:13 +08:00
|
|
|
if (!node)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
tipc_node_lock(node);
|
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
link = node->links[bearer_id].link;
|
2014-11-20 17:29:13 +08:00
|
|
|
if (!link) {
|
|
|
|
res = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (attrs[TIPC_NLA_LINK_PROP]) {
|
|
|
|
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
|
|
|
|
|
|
|
|
err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP],
|
|
|
|
props);
|
|
|
|
if (err) {
|
|
|
|
res = err;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (props[TIPC_NLA_PROP_TOL]) {
|
|
|
|
u32 tol;
|
|
|
|
|
|
|
|
tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
|
2015-07-17 04:54:29 +08:00
|
|
|
link->tolerance = tol;
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 21:33:02 +08:00
|
|
|
tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0);
|
2014-11-20 17:29:13 +08:00
|
|
|
}
|
|
|
|
if (props[TIPC_NLA_PROP_PRIO]) {
|
|
|
|
u32 prio;
|
|
|
|
|
|
|
|
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
|
|
|
|
link->priority = prio;
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 21:33:02 +08:00
|
|
|
tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio);
|
2014-11-20 17:29:13 +08:00
|
|
|
}
|
|
|
|
if (props[TIPC_NLA_PROP_WIN]) {
|
|
|
|
u32 win;
|
|
|
|
|
|
|
|
win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
|
|
|
|
tipc_link_set_queue_limits(link, win);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
2014-11-24 18:10:29 +08:00
|
|
|
|
|
|
|
static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s)
|
2014-11-20 17:29:12 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct nlattr *stats;
|
|
|
|
|
|
|
|
struct nla_map {
|
|
|
|
u32 key;
|
|
|
|
u32 val;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct nla_map map[] = {
|
|
|
|
{TIPC_NLA_STATS_RX_INFO, s->recv_info},
|
|
|
|
{TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments},
|
|
|
|
{TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented},
|
|
|
|
{TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles},
|
|
|
|
{TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled},
|
|
|
|
{TIPC_NLA_STATS_TX_INFO, s->sent_info},
|
|
|
|
{TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments},
|
|
|
|
{TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented},
|
|
|
|
{TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles},
|
|
|
|
{TIPC_NLA_STATS_TX_BUNDLED, s->sent_bundled},
|
|
|
|
{TIPC_NLA_STATS_MSG_PROF_TOT, (s->msg_length_counts) ?
|
|
|
|
s->msg_length_counts : 1},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_CNT, s->msg_length_counts},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_TOT, s->msg_lengths_total},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_P0, s->msg_length_profile[0]},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_P1, s->msg_length_profile[1]},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_P2, s->msg_length_profile[2]},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_P3, s->msg_length_profile[3]},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_P4, s->msg_length_profile[4]},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_P5, s->msg_length_profile[5]},
|
|
|
|
{TIPC_NLA_STATS_MSG_LEN_P6, s->msg_length_profile[6]},
|
|
|
|
{TIPC_NLA_STATS_RX_STATES, s->recv_states},
|
|
|
|
{TIPC_NLA_STATS_RX_PROBES, s->recv_probes},
|
|
|
|
{TIPC_NLA_STATS_RX_NACKS, s->recv_nacks},
|
|
|
|
{TIPC_NLA_STATS_RX_DEFERRED, s->deferred_recv},
|
|
|
|
{TIPC_NLA_STATS_TX_STATES, s->sent_states},
|
|
|
|
{TIPC_NLA_STATS_TX_PROBES, s->sent_probes},
|
|
|
|
{TIPC_NLA_STATS_TX_NACKS, s->sent_nacks},
|
|
|
|
{TIPC_NLA_STATS_TX_ACKS, s->sent_acks},
|
|
|
|
{TIPC_NLA_STATS_RETRANSMITTED, s->retransmitted},
|
|
|
|
{TIPC_NLA_STATS_DUPLICATES, s->duplicates},
|
|
|
|
{TIPC_NLA_STATS_LINK_CONGS, s->link_congs},
|
|
|
|
{TIPC_NLA_STATS_MAX_QUEUE, s->max_queue_sz},
|
|
|
|
{TIPC_NLA_STATS_AVG_QUEUE, s->queue_sz_counts ?
|
|
|
|
(s->accu_queue_sz / s->queue_sz_counts) : 0}
|
|
|
|
};
|
|
|
|
|
|
|
|
stats = nla_nest_start(skb, TIPC_NLA_LINK_STATS);
|
|
|
|
if (!stats)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(map); i++)
|
|
|
|
if (nla_put_u32(skb, map[i].key, map[i].val))
|
|
|
|
goto msg_full;
|
|
|
|
|
|
|
|
nla_nest_end(skb, stats);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
msg_full:
|
|
|
|
nla_nest_cancel(skb, stats);
|
|
|
|
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Caller should hold appropriate locks to protect the link */
|
2015-01-09 15:27:10 +08:00
|
|
|
static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
|
2015-04-29 00:33:50 +08:00
|
|
|
struct tipc_link *link, int nlflags)
|
2014-11-20 17:29:12 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
void *hdr;
|
|
|
|
struct nlattr *attrs;
|
|
|
|
struct nlattr *prop;
|
2015-01-09 15:27:10 +08:00
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2014-11-20 17:29:12 +08:00
|
|
|
|
2015-02-09 16:50:03 +08:00
|
|
|
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
|
2015-04-29 00:33:50 +08:00
|
|
|
nlflags, TIPC_NL_LINK_GET);
|
2014-11-20 17:29:12 +08:00
|
|
|
if (!hdr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
|
|
|
|
if (!attrs)
|
|
|
|
goto msg_full;
|
|
|
|
|
|
|
|
if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name))
|
|
|
|
goto attr_msg_full;
|
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST,
|
2015-01-09 15:27:10 +08:00
|
|
|
tipc_cluster_mask(tn->own_addr)))
|
2014-11-20 17:29:12 +08:00
|
|
|
goto attr_msg_full;
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 21:33:02 +08:00
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
|
2014-11-20 17:29:12 +08:00
|
|
|
goto attr_msg_full;
|
2015-05-14 22:46:15 +08:00
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->rcv_nxt))
|
2014-11-20 17:29:12 +08:00
|
|
|
goto attr_msg_full;
|
2015-05-14 22:46:15 +08:00
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->snd_nxt))
|
2014-11-20 17:29:12 +08:00
|
|
|
goto attr_msg_full;
|
|
|
|
|
|
|
|
if (tipc_link_is_up(link))
|
|
|
|
if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP))
|
|
|
|
goto attr_msg_full;
|
|
|
|
if (tipc_link_is_active(link))
|
|
|
|
if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE))
|
|
|
|
goto attr_msg_full;
|
|
|
|
|
|
|
|
prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP);
|
|
|
|
if (!prop)
|
|
|
|
goto attr_msg_full;
|
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority))
|
|
|
|
goto prop_msg_full;
|
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance))
|
|
|
|
goto prop_msg_full;
|
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN,
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 00:07:24 +08:00
|
|
|
link->window))
|
2014-11-20 17:29:12 +08:00
|
|
|
goto prop_msg_full;
|
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority))
|
|
|
|
goto prop_msg_full;
|
|
|
|
nla_nest_end(msg->skb, prop);
|
|
|
|
|
|
|
|
err = __tipc_nl_add_stats(msg->skb, &link->stats);
|
|
|
|
if (err)
|
|
|
|
goto attr_msg_full;
|
|
|
|
|
|
|
|
nla_nest_end(msg->skb, attrs);
|
|
|
|
genlmsg_end(msg->skb, hdr);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
prop_msg_full:
|
|
|
|
nla_nest_cancel(msg->skb, prop);
|
|
|
|
attr_msg_full:
|
|
|
|
nla_nest_cancel(msg->skb, attrs);
|
|
|
|
msg_full:
|
|
|
|
genlmsg_cancel(msg->skb, hdr);
|
|
|
|
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Caller should hold node lock */
|
2015-01-09 15:27:10 +08:00
|
|
|
static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
|
|
|
|
struct tipc_node *node, u32 *prev_link)
|
2014-11-20 17:29:12 +08:00
|
|
|
{
|
|
|
|
u32 i;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
for (i = *prev_link; i < MAX_BEARERS; i++) {
|
|
|
|
*prev_link = i;
|
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
if (!node->links[i].link)
|
2014-11-20 17:29:12 +08:00
|
|
|
continue;
|
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
err = __tipc_nl_add_link(net, msg,
|
|
|
|
node->links[i].link, NLM_F_MULTI);
|
2014-11-20 17:29:12 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
*prev_link = 0;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
|
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct net *net = sock_net(skb->sk);
|
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2014-11-20 17:29:12 +08:00
|
|
|
struct tipc_node *node;
|
|
|
|
struct tipc_nl_msg msg;
|
|
|
|
u32 prev_node = cb->args[0];
|
|
|
|
u32 prev_link = cb->args[1];
|
|
|
|
int done = cb->args[2];
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (done)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
msg.skb = skb;
|
|
|
|
msg.portid = NETLINK_CB(cb->skb).portid;
|
|
|
|
msg.seq = cb->nlh->nlmsg_seq;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
if (prev_node) {
|
2015-01-09 15:27:05 +08:00
|
|
|
node = tipc_node_find(net, prev_node);
|
2014-11-20 17:29:12 +08:00
|
|
|
if (!node) {
|
|
|
|
/* We never set seq or call nl_dump_check_consistent()
|
|
|
|
* this means that setting prev_seq here will cause the
|
|
|
|
* consistence check to fail in the netlink callback
|
|
|
|
* handler. Resulting in the last NLMSG_DONE message
|
|
|
|
* having the NLM_F_DUMP_INTR flag set.
|
|
|
|
*/
|
|
|
|
cb->prev_seq = 1;
|
|
|
|
goto out;
|
|
|
|
}
|
2015-03-26 18:10:24 +08:00
|
|
|
tipc_node_put(node);
|
2014-11-20 17:29:12 +08:00
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
list_for_each_entry_continue_rcu(node, &tn->node_list,
|
|
|
|
list) {
|
2014-11-20 17:29:12 +08:00
|
|
|
tipc_node_lock(node);
|
2015-01-09 15:27:10 +08:00
|
|
|
err = __tipc_nl_add_node_links(net, &msg, node,
|
|
|
|
&prev_link);
|
2014-11-20 17:29:12 +08:00
|
|
|
tipc_node_unlock(node);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
prev_node = node->addr;
|
|
|
|
}
|
|
|
|
} else {
|
2015-01-09 15:27:07 +08:00
|
|
|
err = tipc_nl_add_bc_link(net, &msg);
|
2014-11-20 17:29:12 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
list_for_each_entry_rcu(node, &tn->node_list, list) {
|
2014-11-20 17:29:12 +08:00
|
|
|
tipc_node_lock(node);
|
2015-01-09 15:27:10 +08:00
|
|
|
err = __tipc_nl_add_node_links(net, &msg, node,
|
|
|
|
&prev_link);
|
2014-11-20 17:29:12 +08:00
|
|
|
tipc_node_unlock(node);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
prev_node = node->addr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
done = 1;
|
|
|
|
out:
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
cb->args[0] = prev_node;
|
|
|
|
cb->args[1] = prev_link;
|
|
|
|
cb->args[2] = done;
|
|
|
|
|
|
|
|
return skb->len;
|
|
|
|
}
|
|
|
|
|
|
|
|
int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
|
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct net *net = genl_info_net(info);
|
2014-11-20 17:29:12 +08:00
|
|
|
struct tipc_nl_msg msg;
|
|
|
|
char *name;
|
|
|
|
int err;
|
|
|
|
|
2015-05-06 19:58:55 +08:00
|
|
|
msg.portid = info->snd_portid;
|
|
|
|
msg.seq = info->snd_seq;
|
|
|
|
|
2014-11-20 17:29:12 +08:00
|
|
|
if (!info->attrs[TIPC_NLA_LINK_NAME])
|
|
|
|
return -EINVAL;
|
|
|
|
name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
|
|
|
|
|
2015-05-06 19:58:55 +08:00
|
|
|
msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
|
|
|
|
if (!msg.skb)
|
2014-11-20 17:29:12 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-05-06 19:58:55 +08:00
|
|
|
if (strcmp(name, tipc_bclink_name) == 0) {
|
|
|
|
err = tipc_nl_add_bc_link(net, &msg);
|
|
|
|
if (err) {
|
|
|
|
nlmsg_free(msg.skb);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
int bearer_id;
|
|
|
|
struct tipc_node *node;
|
|
|
|
struct tipc_link *link;
|
2014-11-20 17:29:12 +08:00
|
|
|
|
2015-05-06 19:58:55 +08:00
|
|
|
node = tipc_link_find_owner(net, name, &bearer_id);
|
|
|
|
if (!node)
|
|
|
|
return -EINVAL;
|
2014-11-20 17:29:12 +08:00
|
|
|
|
2015-05-06 19:58:55 +08:00
|
|
|
tipc_node_lock(node);
|
2015-07-17 04:54:19 +08:00
|
|
|
link = node->links[bearer_id].link;
|
2015-05-06 19:58:55 +08:00
|
|
|
if (!link) {
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
nlmsg_free(msg.skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2014-11-20 17:29:12 +08:00
|
|
|
|
2015-05-06 19:58:55 +08:00
|
|
|
err = __tipc_nl_add_link(net, &msg, link, 0);
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
if (err) {
|
|
|
|
nlmsg_free(msg.skb);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
2014-11-20 17:29:12 +08:00
|
|
|
|
2015-05-06 19:58:55 +08:00
|
|
|
return genlmsg_reply(msg.skb, info);
|
2014-11-20 17:29:12 +08:00
|
|
|
}
|
2014-11-20 17:29:14 +08:00
|
|
|
|
|
|
|
int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
char *link_name;
|
|
|
|
unsigned int bearer_id;
|
|
|
|
struct tipc_link *link;
|
|
|
|
struct tipc_node *node;
|
|
|
|
struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
|
2015-02-09 16:50:09 +08:00
|
|
|
struct net *net = sock_net(skb->sk);
|
2014-11-20 17:29:14 +08:00
|
|
|
|
|
|
|
if (!info->attrs[TIPC_NLA_LINK])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
|
|
|
|
info->attrs[TIPC_NLA_LINK],
|
|
|
|
tipc_nl_link_policy);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (!attrs[TIPC_NLA_LINK_NAME])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
|
|
|
|
|
|
|
|
if (strcmp(link_name, tipc_bclink_name) == 0) {
|
2015-01-09 15:27:07 +08:00
|
|
|
err = tipc_bclink_reset_stats(net);
|
2014-11-20 17:29:14 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
node = tipc_link_find_owner(net, link_name, &bearer_id);
|
2014-11-20 17:29:14 +08:00
|
|
|
if (!node)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
tipc_node_lock(node);
|
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
link = node->links[bearer_id].link;
|
2014-11-20 17:29:14 +08:00
|
|
|
if (!link) {
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
link_reset_statistics(link);
|
|
|
|
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|