Merge branch 'sfc-TXQ-refactor'

Edward Cree says:

====================
sfc: TXQ refactor

Refactor and unify partner-TXQ handling in the EF100 and legacy drivers.

The main thrust of this series is to remove from the legacy (Siena/EF10)
 driver the assumption that a netdev TX queue has precisely two hardware
 TXQs (checksummed and unchecksummed) associated with it, so that in
 future we can have more (e.g. for handling inner-header checksums) or
 fewer (e.g. to free up hardware queues for XDP usage).

Changes from v1:
 * better explain patch #1 in the commit message, and rename
   xmit_more_available to xmit_pending
 * add new patch #2 applying the same approach to ef100, for consistency
====================

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2020-09-05 12:25:17 -07:00
commit 447a851bdb
8 changed files with 104 additions and 105 deletions

View File

@ -2367,7 +2367,7 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
unsigned int write_ptr;
efx_qword_t *txd;
tx_queue->xmit_more_available = false;
tx_queue->xmit_pending = false;
if (unlikely(tx_queue->write_count == tx_queue->insert_count))
return;

View File

@ -117,11 +117,13 @@ static efx_oword_t *ef100_tx_desc(struct efx_tx_queue *tx_queue, unsigned int in
return NULL;
}
void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue)
static void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue)
{
unsigned int write_ptr;
efx_dword_t reg;
tx_queue->xmit_pending = false;
if (unlikely(tx_queue->notify_count == tx_queue->write_count))
return;
@ -131,7 +133,6 @@ void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue)
efx_writed_page(tx_queue->efx, &reg,
ER_GZ_TX_RING_DOORBELL, tx_queue->queue);
tx_queue->notify_count = tx_queue->write_count;
tx_queue->xmit_more_available = false;
}
static void ef100_tx_push_buffers(struct efx_tx_queue *tx_queue)
@ -359,28 +360,31 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
goto err;
ef100_tx_make_descriptors(tx_queue, skb, segments);
fill_level = efx_channel_tx_fill_level(tx_queue->channel);
fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
if (fill_level > efx->txq_stop_thresh) {
struct efx_tx_queue *txq2;
netif_tx_stop_queue(tx_queue->core_txq);
/* Re-read after a memory barrier in case we've raced with
* the completion path. Otherwise there's a danger we'll never
* restart the queue if all completions have just happened.
*/
smp_mb();
fill_level = efx_channel_tx_fill_level(tx_queue->channel);
efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
txq2->old_read_count = READ_ONCE(txq2->read_count);
fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
if (fill_level < efx->txq_stop_thresh)
netif_tx_start_queue(tx_queue->core_txq);
}
if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more))
tx_queue->xmit_more_available = false; /* push doorbell */
else if (tx_queue->write_count - tx_queue->notify_count > 255)
/* Ensure we never push more than 256 packets at once */
tx_queue->xmit_more_available = false; /* push */
else
tx_queue->xmit_more_available = true; /* don't push yet */
tx_queue->xmit_pending = true;
if (!tx_queue->xmit_more_available)
/* If xmit_more then we don't need to push the doorbell, unless there
* are 256 descriptors already queued in which case we have to push to
* ensure we never push more than 256 at once.
*/
if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
tx_queue->write_count - tx_queue->notify_count > 255)
ef100_tx_push_buffers(tx_queue);
if (segments) {
@ -399,10 +403,10 @@ err:
/* If we're not expecting another transmit and we had something to push
* on this queue then we need to push here to get the previous packets
* out. We only enter this branch from before the 'Update BQL' section
* above, so xmit_more_available still refers to the old state.
* out. We only enter this branch from before the xmit_more handling
* above, so xmit_pending still refers to the old state.
*/
if (tx_queue->xmit_more_available && !xmit_more)
if (tx_queue->xmit_pending && !xmit_more)
ef100_tx_push_buffers(tx_queue);
return rc;
}

View File

@ -17,7 +17,6 @@
int ef100_tx_probe(struct efx_tx_queue *tx_queue);
void ef100_tx_init(struct efx_tx_queue *tx_queue);
void ef100_tx_write(struct efx_tx_queue *tx_queue);
void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue);
unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx);
void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event);

View File

@ -320,7 +320,7 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
unsigned write_ptr;
unsigned old_write_count = tx_queue->write_count;
tx_queue->xmit_more_available = false;
tx_queue->xmit_pending = false;
if (unlikely(tx_queue->write_count == tx_queue->insert_count))
return;

View File

@ -244,7 +244,7 @@ struct efx_tx_buffer {
* @tso_fallbacks: Number of times TSO fallback used
* @pushes: Number of times the TX push feature has been used
* @pio_packets: Number of times the TX PIO feature has been used
* @xmit_more_available: Are any packets waiting to be pushed to the NIC
* @xmit_pending: Are any packets waiting to be pushed to the NIC
* @cb_packets: Number of times the TX copybreak feature has been used
* @notify_count: Count of notified descriptors to the NIC
* @empty_read_count: If the completion path has seen the queue as empty
@ -292,7 +292,7 @@ struct efx_tx_queue {
unsigned int tso_fallbacks;
unsigned int pushes;
unsigned int pio_packets;
bool xmit_more_available;
bool xmit_pending;
unsigned int cb_packets;
unsigned int notify_count;
/* Statistics to supplement MAC stats */
@ -1681,10 +1681,6 @@ efx_channel_tx_fill_level(struct efx_channel *channel)
struct efx_tx_queue *tx_queue;
unsigned int fill_level = 0;
/* This function is currently only used by EF100, which maybe
* could do something simpler and just compute the fill level
* of the single TXQ that's really in use.
*/
efx_for_each_channel_tx_queue(tx_queue, channel)
fill_level = max(fill_level,
tx_queue->insert_count - tx_queue->read_count);
@ -1692,6 +1688,20 @@ efx_channel_tx_fill_level(struct efx_channel *channel)
return fill_level;
}
/* Conservative approximation of efx_channel_tx_fill_level using cached value */
static inline unsigned int
efx_channel_tx_old_fill_level(struct efx_channel *channel)
{
struct efx_tx_queue *tx_queue;
unsigned int fill_level = 0;
efx_for_each_channel_tx_queue(tx_queue, channel)
fill_level = max(fill_level,
tx_queue->insert_count - tx_queue->old_read_count);
return fill_level;
}
/* Get all supported features.
* If a feature is not fixed, it is present in hw_features.
* If a feature is fixed, it does not present in hw_features, but

View File

@ -65,8 +65,7 @@ efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
/* Report whether this TX queue would be empty for the given write_count.
* May return false negative.
*/
static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue,
unsigned int write_count)
static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, unsigned int write_count)
{
unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count);
@ -76,41 +75,6 @@ static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue,
return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
}
/* Report whether the NIC considers this TX queue empty, using
* packet_write_count (the write count recorded for the last completable
* doorbell push). May return false negative. EF10 only, which is OK
* because only EF10 supports PIO.
*/
static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue)
{
EFX_WARN_ON_ONCE_PARANOID(!tx_queue->efx->type->option_descriptors);
return __efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count);
}
/* Get partner of a TX queue, seen as part of the same net core queue */
/* XXX is this a thing on EF100? */
static inline struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
{
if (tx_queue->label & EFX_TXQ_TYPE_OFFLOAD)
return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
else
return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
}
/* Decide whether we can use TX PIO, ie. write packet data directly into
* a buffer on the device. This can reduce latency at the expense of
* throughput, so we only do this if both hardware and software TX rings
* are empty. This also ensures that only one packet at a time can be
* using the PIO buffer.
*/
static inline bool efx_nic_may_tx_pio(struct efx_tx_queue *tx_queue)
{
struct efx_tx_queue *partner = efx_tx_queue_partner(tx_queue);
return tx_queue->piobuf && efx_nic_tx_is_empty(tx_queue) &&
efx_nic_tx_is_empty(partner);
}
int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
bool *data_mapped);
@ -125,7 +89,7 @@ int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue,
unsigned int write_count)
{
bool was_empty = __efx_nic_tx_is_empty(tx_queue, write_count);
bool was_empty = efx_nic_tx_is_empty(tx_queue, write_count);
tx_queue->empty_read_count = 0;
return was_empty && tx_queue->write_count - write_count == 1;

View File

@ -59,13 +59,12 @@ u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
{
/* We need to consider both queues that the net core sees as one */
struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1);
/* We need to consider all queues that the net core sees as one */
struct efx_nic *efx = txq1->efx;
struct efx_tx_queue *txq2;
unsigned int fill_level;
fill_level = max(txq1->insert_count - txq1->old_read_count,
txq2->insert_count - txq2->old_read_count);
fill_level = efx_channel_tx_old_fill_level(txq1->channel);
if (likely(fill_level < efx->txq_stop_thresh))
return;
@ -85,11 +84,10 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
*/
netif_tx_stop_queue(txq1->core_txq);
smp_mb();
txq1->old_read_count = READ_ONCE(txq1->read_count);
txq2->old_read_count = READ_ONCE(txq2->read_count);
efx_for_each_channel_tx_queue(txq2, txq1->channel)
txq2->old_read_count = READ_ONCE(txq2->read_count);
fill_level = max(txq1->insert_count - txq1->old_read_count,
txq2->insert_count - txq2->old_read_count);
fill_level = efx_channel_tx_old_fill_level(txq1->channel);
EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries);
if (likely(fill_level < efx->txq_stop_thresh)) {
smp_mb();
@ -266,8 +264,45 @@ static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
++tx_queue->insert_count;
return 0;
}
/* Decide whether we can use TX PIO, ie. write packet data directly into
* a buffer on the device. This can reduce latency at the expense of
* throughput, so we only do this if both hardware and software TX rings
* are empty, including all queues for the channel. This also ensures that
* only one packet at a time can be using the PIO buffer. If the xmit_more
* flag is set then we don't use this - there'll be another packet along
* shortly and we want to hold off the doorbell.
*/
static bool efx_tx_may_pio(struct efx_tx_queue *tx_queue)
{
struct efx_channel *channel = tx_queue->channel;
if (!tx_queue->piobuf)
return false;
EFX_WARN_ON_ONCE_PARANOID(!channel->efx->type->option_descriptors);
efx_for_each_channel_tx_queue(tx_queue, channel)
if (!efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count))
return false;
return true;
}
#endif /* EFX_USE_PIO */
/* Send any pending traffic for a channel. xmit_more is shared across all
* queues for a channel, so we must check all of them.
*/
static void efx_tx_send_pending(struct efx_channel *channel)
{
struct efx_tx_queue *q;
efx_for_each_channel_tx_queue(q, channel) {
if (q->xmit_pending)
efx_nic_push_buffers(q);
}
}
/*
* Add a socket buffer to a TX queue
*
@ -315,7 +350,7 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb
goto err;
#ifdef EFX_USE_PIO
} else if (skb_len <= efx_piobuf_size && !xmit_more &&
efx_nic_may_tx_pio(tx_queue)) {
efx_tx_may_pio(tx_queue)) {
/* Use PIO for short packets with an empty queue. */
if (efx_enqueue_skb_pio(tx_queue, skb))
goto err;
@ -336,21 +371,11 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb
efx_tx_maybe_stop_queue(tx_queue);
tx_queue->xmit_pending = true;
/* Pass off to hardware */
if (__netdev_tx_sent_queue(tx_queue->core_txq, skb_len, xmit_more)) {
struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
/* There could be packets left on the partner queue if
* xmit_more was set. If we do not push those they
* could be left for a long time and cause a netdev watchdog.
*/
if (txq2->xmit_more_available)
efx_nic_push_buffers(txq2);
efx_nic_push_buffers(tx_queue);
} else {
tx_queue->xmit_more_available = xmit_more;
}
if (__netdev_tx_sent_queue(tx_queue->core_txq, skb_len, xmit_more))
efx_tx_send_pending(tx_queue->channel);
if (segments) {
tx_queue->tso_bursts++;
@ -371,14 +396,8 @@ err:
* on this queue or a partner queue then we need to push here to get the
* previous packets out.
*/
if (!xmit_more) {
struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
if (txq2->xmit_more_available)
efx_nic_push_buffers(txq2);
efx_nic_push_buffers(tx_queue);
}
if (!xmit_more)
efx_tx_send_pending(tx_queue->channel);
return NETDEV_TX_OK;
}
@ -489,18 +508,24 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
EFX_WARN_ON_PARANOID(!netif_device_present(net_dev));
/* PTP "event" packet */
if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
unlikely(efx_ptp_is_ptp_tx(efx, skb))) {
return efx_ptp_tx(efx, skb);
}
index = skb_get_queue_mapping(skb);
type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
if (index >= efx->n_tx_channels) {
index -= efx->n_tx_channels;
type |= EFX_TXQ_TYPE_HIGHPRI;
}
/* PTP "event" packet */
if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
unlikely(efx_ptp_is_ptp_tx(efx, skb))) {
/* There may be existing transmits on the channel that are
* waiting for this packet to trigger the doorbell write.
* We need to send the packets at this point.
*/
efx_tx_send_pending(efx_get_tx_channel(efx, index));
return efx_ptp_tx(efx, skb);
}
tx_queue = efx_get_tx_queue(efx, index, type);
return __efx_enqueue_skb(tx_queue, skb);

View File

@ -78,7 +78,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
tx_queue->read_count = 0;
tx_queue->old_read_count = 0;
tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
tx_queue->xmit_more_available = false;
tx_queue->xmit_pending = false;
tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
tx_queue->channel == efx_ptp_channel(efx));
tx_queue->completed_timestamp_major = 0;
@ -116,7 +116,7 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
++tx_queue->read_count;
}
tx_queue->xmit_more_available = false;
tx_queue->xmit_pending = false;
netdev_tx_reset_queue(tx_queue->core_txq);
}
@ -242,7 +242,6 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
struct efx_nic *efx = tx_queue->efx;
struct efx_tx_queue *txq2;
EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
@ -261,9 +260,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
likely(efx->port_enabled) &&
likely(netif_device_present(efx->net_dev))) {
txq2 = efx_tx_queue_partner(tx_queue);
fill_level = max(tx_queue->insert_count - tx_queue->read_count,
txq2->insert_count - txq2->read_count);
fill_level = efx_channel_tx_fill_level(tx_queue->channel);
if (fill_level <= efx->txq_wake_thresh)
netif_tx_wake_queue(tx_queue->core_txq);
}