Merge branch 'ibmvnic-Failover-hardening'

Thomas Falcon says:

====================
ibmvnic: Failover hardening

Introduce additional transport event hardening to handle
events during device reset. In the driver's current state,
if a transport event is received during device reset, it can
cause the device to become unresponsive as invalid operations
are processed as the backing device context changes. After
a transport event, the device expects a request to begin the
initialization process. If the driver is still processing
a previously queued device reset in this state, it is likely
to fail as firmware will reject any commands other than the
one to initialize the client driver's Command-Response Queue.

Instead of failing and becoming dormant, the driver will make
one more attempt to recover and continue operation. This is
achieved by setting a state flag, which if true will direct
the driver to clean up all allocated resources and perform
a hard reset in an attempt to bring the driver back to an
operational state.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-05-24 22:19:26 -04:00
commit 49a473f5b5
2 changed files with 202 additions and 23 deletions

View File

@ -109,13 +109,14 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
struct ibmvnic_sub_crq_queue *); struct ibmvnic_sub_crq_queue *);
static int ibmvnic_poll(struct napi_struct *napi, int data); static int ibmvnic_poll(struct napi_struct *napi, int data);
static void send_map_query(struct ibmvnic_adapter *adapter); static void send_map_query(struct ibmvnic_adapter *adapter);
static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8); static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
static void send_request_unmap(struct ibmvnic_adapter *, u8); static int send_request_unmap(struct ibmvnic_adapter *, u8);
static int send_login(struct ibmvnic_adapter *adapter); static int send_login(struct ibmvnic_adapter *adapter);
static void send_cap_queries(struct ibmvnic_adapter *adapter); static void send_cap_queries(struct ibmvnic_adapter *adapter);
static int init_sub_crqs(struct ibmvnic_adapter *); static int init_sub_crqs(struct ibmvnic_adapter *);
static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
static int ibmvnic_init(struct ibmvnic_adapter *); static int ibmvnic_init(struct ibmvnic_adapter *);
static int ibmvnic_reset_init(struct ibmvnic_adapter *);
static void release_crq_queue(struct ibmvnic_adapter *); static void release_crq_queue(struct ibmvnic_adapter *);
static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p); static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p);
static int init_crq_queue(struct ibmvnic_adapter *adapter); static int init_crq_queue(struct ibmvnic_adapter *adapter);
@ -172,6 +173,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb, int size) struct ibmvnic_long_term_buff *ltb, int size)
{ {
struct device *dev = &adapter->vdev->dev; struct device *dev = &adapter->vdev->dev;
int rc;
ltb->size = size; ltb->size = size;
ltb->buff = dma_alloc_coherent(dev, ltb->size, &ltb->addr, ltb->buff = dma_alloc_coherent(dev, ltb->size, &ltb->addr,
@ -185,8 +187,12 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
adapter->map_id++; adapter->map_id++;
init_completion(&adapter->fw_done); init_completion(&adapter->fw_done);
send_request_map(adapter, ltb->addr, rc = send_request_map(adapter, ltb->addr,
ltb->size, ltb->map_id); ltb->size, ltb->map_id);
if (rc) {
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
return rc;
}
wait_for_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done);
if (adapter->fw_done_rc) { if (adapter->fw_done_rc) {
@ -215,10 +221,14 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
static int reset_long_term_buff(struct ibmvnic_adapter *adapter, static int reset_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb) struct ibmvnic_long_term_buff *ltb)
{ {
int rc;
memset(ltb->buff, 0, ltb->size); memset(ltb->buff, 0, ltb->size);
init_completion(&adapter->fw_done); init_completion(&adapter->fw_done);
send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
if (rc)
return rc;
wait_for_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done);
if (adapter->fw_done_rc) { if (adapter->fw_done_rc) {
@ -789,6 +799,7 @@ static void release_napi(struct ibmvnic_adapter *adapter)
kfree(adapter->napi); kfree(adapter->napi);
adapter->napi = NULL; adapter->napi = NULL;
adapter->num_active_rx_napi = 0; adapter->num_active_rx_napi = 0;
adapter->napi_enabled = false;
} }
static int ibmvnic_login(struct net_device *netdev) static int ibmvnic_login(struct net_device *netdev)
@ -919,6 +930,10 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
/* Partuial success, delay and re-send */ /* Partuial success, delay and re-send */
mdelay(1000); mdelay(1000);
resend = true; resend = true;
} else if (adapter->init_done_rc) {
netdev_warn(netdev, "Unable to set link state, rc=%d\n",
adapter->init_done_rc);
return adapter->init_done_rc;
} }
} while (resend); } while (resend);
@ -951,6 +966,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
struct device *dev = &adapter->vdev->dev; struct device *dev = &adapter->vdev->dev;
union ibmvnic_crq crq; union ibmvnic_crq crq;
int len = 0; int len = 0;
int rc;
if (adapter->vpd->buff) if (adapter->vpd->buff)
len = adapter->vpd->len; len = adapter->vpd->len;
@ -958,7 +974,9 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
init_completion(&adapter->fw_done); init_completion(&adapter->fw_done);
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE; crq.get_vpd_size.cmd = GET_VPD_SIZE;
ibmvnic_send_crq(adapter, &crq); rc = ibmvnic_send_crq(adapter, &crq);
if (rc)
return rc;
wait_for_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done);
if (!adapter->vpd->len) if (!adapter->vpd->len)
@ -991,7 +1009,12 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
crq.get_vpd.cmd = GET_VPD; crq.get_vpd.cmd = GET_VPD;
crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr); crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len); crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len);
ibmvnic_send_crq(adapter, &crq); rc = ibmvnic_send_crq(adapter, &crq);
if (rc) {
kfree(adapter->vpd->buff);
adapter->vpd->buff = NULL;
return rc;
}
wait_for_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done);
return 0; return 0;
@ -1690,6 +1713,7 @@ static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p)
struct ibmvnic_adapter *adapter = netdev_priv(netdev); struct ibmvnic_adapter *adapter = netdev_priv(netdev);
struct sockaddr *addr = p; struct sockaddr *addr = p;
union ibmvnic_crq crq; union ibmvnic_crq crq;
int rc;
if (!is_valid_ether_addr(addr->sa_data)) if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
@ -1700,7 +1724,9 @@ static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p)
ether_addr_copy(&crq.change_mac_addr.mac_addr[0], addr->sa_data); ether_addr_copy(&crq.change_mac_addr.mac_addr[0], addr->sa_data);
init_completion(&adapter->fw_done); init_completion(&adapter->fw_done);
ibmvnic_send_crq(adapter, &crq); rc = ibmvnic_send_crq(adapter, &crq);
if (rc)
return rc;
wait_for_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done);
/* netdev->dev_addr is changed in handle_change_mac_rsp function */ /* netdev->dev_addr is changed in handle_change_mac_rsp function */
return adapter->fw_done_rc ? -EIO : 0; return adapter->fw_done_rc ? -EIO : 0;
@ -1782,7 +1808,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
return rc; return rc;
} }
rc = ibmvnic_init(adapter); rc = ibmvnic_reset_init(adapter);
if (rc) if (rc)
return IBMVNIC_INIT_FAILED; return IBMVNIC_INIT_FAILED;
@ -1852,6 +1878,85 @@ static int do_reset(struct ibmvnic_adapter *adapter,
return 0; return 0;
} }
static int do_hard_reset(struct ibmvnic_adapter *adapter,
struct ibmvnic_rwi *rwi, u32 reset_state)
{
struct net_device *netdev = adapter->netdev;
int rc;
netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n",
rwi->reset_reason);
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
ibmvnic_cleanup(netdev);
release_resources(adapter);
release_sub_crqs(adapter, 0);
release_crq_queue(adapter);
/* remove the closed state so when we call open it appears
* we are coming from the probed state.
*/
adapter->state = VNIC_PROBED;
rc = init_crq_queue(adapter);
if (rc) {
netdev_err(adapter->netdev,
"Couldn't initialize crq. rc=%d\n", rc);
return rc;
}
rc = ibmvnic_init(adapter);
if (rc)
return rc;
/* If the adapter was in PROBE state prior to the reset,
* exit here.
*/
if (reset_state == VNIC_PROBED)
return 0;
rc = ibmvnic_login(netdev);
if (rc) {
adapter->state = VNIC_PROBED;
return 0;
}
/* netif_set_real_num_xx_queues needs to take rtnl lock here
* unless wait_for_reset is set, in which case the rtnl lock
* has already been taken before initializing the reset
*/
if (!adapter->wait_for_reset) {
rtnl_lock();
rc = init_resources(adapter);
rtnl_unlock();
} else {
rc = init_resources(adapter);
}
if (rc)
return rc;
ibmvnic_disable_irqs(adapter);
adapter->state = VNIC_CLOSED;
if (reset_state == VNIC_CLOSED)
return 0;
rc = __ibmvnic_open(netdev);
if (rc) {
if (list_empty(&adapter->rwi_list))
adapter->state = VNIC_CLOSED;
else
adapter->state = reset_state;
return 0;
}
netif_carrier_on(netdev);
return 0;
}
static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
{ {
struct ibmvnic_rwi *rwi; struct ibmvnic_rwi *rwi;
@ -1893,14 +1998,19 @@ static void __ibmvnic_reset(struct work_struct *work)
netdev = adapter->netdev; netdev = adapter->netdev;
mutex_lock(&adapter->reset_lock); mutex_lock(&adapter->reset_lock);
adapter->resetting = true;
reset_state = adapter->state; reset_state = adapter->state;
rwi = get_next_rwi(adapter); rwi = get_next_rwi(adapter);
while (rwi) { while (rwi) {
rc = do_reset(adapter, rwi, reset_state); if (adapter->force_reset_recovery) {
adapter->force_reset_recovery = false;
rc = do_hard_reset(adapter, rwi, reset_state);
} else {
rc = do_reset(adapter, rwi, reset_state);
}
kfree(rwi); kfree(rwi);
if (rc && rc != IBMVNIC_INIT_FAILED) if (rc && rc != IBMVNIC_INIT_FAILED &&
!adapter->force_reset_recovery)
break; break;
rwi = get_next_rwi(adapter); rwi = get_next_rwi(adapter);
@ -1926,9 +2036,9 @@ static void __ibmvnic_reset(struct work_struct *work)
static int ibmvnic_reset(struct ibmvnic_adapter *adapter, static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
enum ibmvnic_reset_reason reason) enum ibmvnic_reset_reason reason)
{ {
struct list_head *entry, *tmp_entry;
struct ibmvnic_rwi *rwi, *tmp; struct ibmvnic_rwi *rwi, *tmp;
struct net_device *netdev = adapter->netdev; struct net_device *netdev = adapter->netdev;
struct list_head *entry;
int ret; int ret;
if (adapter->state == VNIC_REMOVING || if (adapter->state == VNIC_REMOVING ||
@ -1964,11 +2074,17 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
ret = ENOMEM; ret = ENOMEM;
goto err; goto err;
} }
/* if we just received a transport event,
* flush reset queue and process this reset
*/
if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) {
list_for_each_safe(entry, tmp_entry, &adapter->rwi_list)
list_del(entry);
}
rwi->reset_reason = reason; rwi->reset_reason = reason;
list_add_tail(&rwi->list, &adapter->rwi_list); list_add_tail(&rwi->list, &adapter->rwi_list);
mutex_unlock(&adapter->rwi_lock); mutex_unlock(&adapter->rwi_lock);
adapter->resetting = true;
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(&adapter->ibmvnic_reset); schedule_work(&adapter->ibmvnic_reset);
@ -2364,6 +2480,7 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
struct ibmvnic_adapter *adapter = netdev_priv(dev); struct ibmvnic_adapter *adapter = netdev_priv(dev);
union ibmvnic_crq crq; union ibmvnic_crq crq;
int i, j; int i, j;
int rc;
memset(&crq, 0, sizeof(crq)); memset(&crq, 0, sizeof(crq));
crq.request_statistics.first = IBMVNIC_CRQ_CMD; crq.request_statistics.first = IBMVNIC_CRQ_CMD;
@ -2374,7 +2491,9 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
/* Wait for data to be written */ /* Wait for data to be written */
init_completion(&adapter->stats_done); init_completion(&adapter->stats_done);
ibmvnic_send_crq(adapter, &crq); rc = ibmvnic_send_crq(adapter, &crq);
if (rc)
return;
wait_for_completion(&adapter->stats_done); wait_for_completion(&adapter->stats_done);
for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
@ -3146,6 +3265,12 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
(unsigned long int)cpu_to_be64(u64_crq[0]), (unsigned long int)cpu_to_be64(u64_crq[0]),
(unsigned long int)cpu_to_be64(u64_crq[1])); (unsigned long int)cpu_to_be64(u64_crq[1]));
if (!adapter->crq.active &&
crq->generic.first != IBMVNIC_CRQ_INIT_CMD) {
dev_warn(dev, "Invalid request detected while CRQ is inactive, possible device state change during reset\n");
return -EINVAL;
}
/* Make sure the hypervisor sees the complete request */ /* Make sure the hypervisor sees the complete request */
mb(); mb();
@ -3370,8 +3495,8 @@ buf_alloc_failed:
return -1; return -1;
} }
static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
u32 len, u8 map_id) u32 len, u8 map_id)
{ {
union ibmvnic_crq crq; union ibmvnic_crq crq;
@ -3381,10 +3506,10 @@ static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
crq.request_map.map_id = map_id; crq.request_map.map_id = map_id;
crq.request_map.ioba = cpu_to_be32(addr); crq.request_map.ioba = cpu_to_be32(addr);
crq.request_map.len = cpu_to_be32(len); crq.request_map.len = cpu_to_be32(len);
ibmvnic_send_crq(adapter, &crq); return ibmvnic_send_crq(adapter, &crq);
} }
static void send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id)
{ {
union ibmvnic_crq crq; union ibmvnic_crq crq;
@ -3392,7 +3517,7 @@ static void send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id)
crq.request_unmap.first = IBMVNIC_CRQ_CMD; crq.request_unmap.first = IBMVNIC_CRQ_CMD;
crq.request_unmap.cmd = REQUEST_UNMAP; crq.request_unmap.cmd = REQUEST_UNMAP;
crq.request_unmap.map_id = map_id; crq.request_unmap.map_id = map_id;
ibmvnic_send_crq(adapter, &crq); return ibmvnic_send_crq(adapter, &crq);
} }
static void send_map_query(struct ibmvnic_adapter *adapter) static void send_map_query(struct ibmvnic_adapter *adapter)
@ -4219,11 +4344,15 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
dev_info(dev, "Partner initialized\n"); dev_info(dev, "Partner initialized\n");
adapter->from_passive_init = true; adapter->from_passive_init = true;
adapter->failover_pending = false; adapter->failover_pending = false;
complete(&adapter->init_done); if (!completion_done(&adapter->init_done)) {
complete(&adapter->init_done);
adapter->init_done_rc = -EIO;
}
ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
break; break;
case IBMVNIC_CRQ_INIT_COMPLETE: case IBMVNIC_CRQ_INIT_COMPLETE:
dev_info(dev, "Partner initialization complete\n"); dev_info(dev, "Partner initialization complete\n");
adapter->crq.active = true;
send_version_xchg(adapter); send_version_xchg(adapter);
break; break;
default: default:
@ -4232,6 +4361,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
return; return;
case IBMVNIC_CRQ_XPORT_EVENT: case IBMVNIC_CRQ_XPORT_EVENT:
netif_carrier_off(netdev); netif_carrier_off(netdev);
adapter->crq.active = false;
if (adapter->resetting)
adapter->force_reset_recovery = true;
if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
dev_info(dev, "Migrated, re-enabling adapter\n"); dev_info(dev, "Migrated, re-enabling adapter\n");
ibmvnic_reset(adapter, VNIC_RESET_MOBILITY); ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
@ -4419,6 +4551,7 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter)
/* Clean out the queue */ /* Clean out the queue */
memset(crq->msgs, 0, PAGE_SIZE); memset(crq->msgs, 0, PAGE_SIZE);
crq->cur = 0; crq->cur = 0;
crq->active = false;
/* And re-open it again */ /* And re-open it again */
rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
@ -4453,6 +4586,7 @@ static void release_crq_queue(struct ibmvnic_adapter *adapter)
DMA_BIDIRECTIONAL); DMA_BIDIRECTIONAL);
free_page((unsigned long)crq->msgs); free_page((unsigned long)crq->msgs);
crq->msgs = NULL; crq->msgs = NULL;
crq->active = false;
} }
static int init_crq_queue(struct ibmvnic_adapter *adapter) static int init_crq_queue(struct ibmvnic_adapter *adapter)
@ -4530,7 +4664,7 @@ map_failed:
return retrc; return retrc;
} }
static int ibmvnic_init(struct ibmvnic_adapter *adapter) static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
{ {
struct device *dev = &adapter->vdev->dev; struct device *dev = &adapter->vdev->dev;
unsigned long timeout = msecs_to_jiffies(30000); unsigned long timeout = msecs_to_jiffies(30000);
@ -4589,6 +4723,49 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
return rc; return rc;
} }
static int ibmvnic_init(struct ibmvnic_adapter *adapter)
{
struct device *dev = &adapter->vdev->dev;
unsigned long timeout = msecs_to_jiffies(30000);
int rc;
adapter->from_passive_init = false;
init_completion(&adapter->init_done);
adapter->init_done_rc = 0;
ibmvnic_send_crq_init(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
dev_err(dev, "Initialization sequence timed out\n");
return -1;
}
if (adapter->init_done_rc) {
release_crq_queue(adapter);
return adapter->init_done_rc;
}
if (adapter->from_passive_init) {
adapter->state = VNIC_OPEN;
adapter->from_passive_init = false;
return -1;
}
rc = init_sub_crqs(adapter);
if (rc) {
dev_err(dev, "Initialization of sub crqs failed\n");
release_crq_queue(adapter);
return rc;
}
rc = init_sub_crq_irqs(adapter);
if (rc) {
dev_err(dev, "Failed to initialize sub crq irqs\n");
release_crq_queue(adapter);
}
return rc;
}
static struct device_attribute dev_attr_failover; static struct device_attribute dev_attr_failover;
static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)

View File

@ -865,6 +865,7 @@ struct ibmvnic_crq_queue {
int size, cur; int size, cur;
dma_addr_t msg_token; dma_addr_t msg_token;
spinlock_t lock; spinlock_t lock;
bool active;
}; };
union sub_crq { union sub_crq {
@ -1108,6 +1109,7 @@ struct ibmvnic_adapter {
bool mac_change_pending; bool mac_change_pending;
bool failover_pending; bool failover_pending;
bool force_reset_recovery;
struct ibmvnic_tunables desired; struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback; struct ibmvnic_tunables fallback;