cdc-wdm: fix "out-of-sync" due to missing notifications

The driver enforces a strict one-to-one relationship between the received RESPONSE_AVAILABLE notifications and messages read from the device. At the same time, it will cancel the interrupt URB when there is no client holding the character device open. Many devices do not cope well with this behaviour. They maintain a FIFO queue of messages, and send notifications on a best effort basis. Messages are queued regardless of whether the notification is successful or not. So if the driver loses a single notification, which can easily happen when the interrupt URB is cancelled, then the device and driver becomes out-of-sync. New messages end up at the end of the queue, while the associated notification makes the driver read only the first message from the queue. This state is permanent from a user point of view. There is no no way to flush the device queue without resetting the device or using another driver. The problem is easy to hit with current QMI and MBIM command line tools, which typically close the character device after seeing the reply they expect. Any pending unsolicited messages from the device will then trigger the driver bug. Fix by always reading all queued messages from the device when the notification URB is first submitted. This is expected to end with an -EPIPE status when there are no more pending messages, so demote the printk associated with -EPIPE to debug level. The workaround has been tested on a large number of different MBIM and QMI devices, as well as the Ericsson F5521gw and H5321gw modems with real Device Management functions. Signed-off-by: Bjørn Mork <bjorn@mork.no> Acked-by: Oliver Neukum <oneukum@suse.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-12-27 14:43:58 +08:00 · 2016-07-10 17:45:14 +02:00 · 2016-07-10 17:45:14 +02:00 · 833415a3e7
commit 833415a3e7
parent ec7b126851
1 changed files with 99 additions and 4 deletions
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@ -58,6 +58,7 @@ MODULE_DEVICE_TABLE (usb, wdm_ids);
 #define WDM_SUSPENDING		8
 #define WDM_RESETTING		9
 #define WDM_OVERFLOW		10
+#define WDM_DRAIN_ON_OPEN	11

 #define WDM_MAX			16

@ -178,7 +179,7 @@ static void wdm_in_callback(struct urb *urb)
 				"nonzero urb status received: -ESHUTDOWN");
 			goto skip_error;
 		case -EPIPE:
-			dev_err(&desc->intf->dev,
+			dev_dbg(&desc->intf->dev,
 				"nonzero urb status received: -EPIPE\n");
 			break;
 		default:
@ -200,10 +201,30 @@ static void wdm_in_callback(struct urb *urb)
 			desc->reslength = length;
 		}
 	}
+
+	/*
+	 * Handling devices with the WDM_DRAIN_ON_OPEN flag set:
+	 * If desc->resp_count is unset, then the urb was submitted
+	 * without a prior notification.  If the device returned any
+	 * data, then this implies that it had messages queued without
+	 * notifying us.  Continue reading until that queue is flushed.
+	 */
+	if (!desc->resp_count) {
+		if (!length) {
+			/* do not propagate the expected -EPIPE */
+			desc->rerr = 0;
+			goto unlock;
+		}
+		dev_dbg(&desc->intf->dev, "got %d bytes without notification\n", length);
+		set_bit(WDM_RESPONDING, &desc->flags);
+		usb_submit_urb(desc->response, GFP_ATOMIC);
+	}
+
 skip_error:
 	wake_up(&desc->wait);

 	set_bit(WDM_READ, &desc->flags);
+unlock:
 	spin_unlock(&desc->iuspin);
 }

@ -647,6 +668,17 @@ static int wdm_open(struct inode *inode, struct file *file)
 			dev_err(&desc->intf->dev,
 				"Error submitting int urb - %d\n", rv);
 			rv = usb_translate_errors(rv);
+		} else if (test_bit(WDM_DRAIN_ON_OPEN, &desc->flags)) {
+			/*
+			 * Some devices keep pending messages queued
+			 * without resending notifications.  We must
+			 * flush the message queue before we can
+			 * assume a one-to-one relationship between
+			 * notifications and messages in the queue
+			 */
+			dev_dbg(&desc->intf->dev, "draining queued data\n");
+			set_bit(WDM_RESPONDING, &desc->flags);
+			rv = usb_submit_urb(desc->response, GFP_KERNEL);
 		}
 	} else {
 		rv = 0;
@ -753,7 +785,8 @@ static void wdm_rxwork(struct work_struct *work)
 /* --- hotplug --- */

 static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor *ep,
-		u16 bufsize, int (*manage_power)(struct usb_interface *, int))
+		u16 bufsize, int (*manage_power)(struct usb_interface *, int),
+		bool drain_on_open)
 {
 	int rv = -ENOMEM;
 	struct wdm_device *desc;
@ -840,6 +873,68 @@ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor

 	desc->manage_power = manage_power;

+	/*
+	 * "drain_on_open" enables a hack to work around a firmware
+	 * issue observed on network functions, in particular MBIM
+	 * functions.
+	 *
+	 * Quoting section 7 of the CDC-WMC r1.1 specification:
+	 *
+	 *  "The firmware shall interpret GetEncapsulatedResponse as a
+	 *   request to read response bytes. The firmware shall send
+	 *   the next wLength bytes from the response. The firmware
+	 *   shall allow the host to retrieve data using any number of
+	 *   GetEncapsulatedResponse requests. The firmware shall
+	 *   return a zero- length reply if there are no data bytes
+	 *   available.
+	 *
+	 *   The firmware shall send ResponseAvailable notifications
+	 *   periodically, using any appropriate algorithm, to inform
+	 *   the host that there is data available in the reply
+	 *   buffer. The firmware is allowed to send ResponseAvailable
+	 *   notifications even if there is no data available, but
+	 *   this will obviously reduce overall performance."
+	 *
+	 * These requirements, although they make equally sense, are
+	 * often not implemented by network functions. Some firmwares
+	 * will queue data indefinitely, without ever resending a
+	 * notification. The result is that the driver and firmware
+	 * loses "syncronization" if the driver ever fails to respond
+	 * to a single notification, something which easily can happen
+	 * on release(). When this happens, the driver will appear to
+	 * never receive notifications for the most current data. Each
+	 * notification will only cause a single read, which returns
+	 * the oldest data in the firmware's queue.
+	 *
+	 * The "drain_on_open" hack resolves the situation by draining
+	 * data from the firmware until none is returned, without a
+	 * prior notification.
+	 *
+	 * This will inevitably race with the firmware, risking that
+	 * we read data from the device before handling the associated
+	 * notification. To make things worse, some of the devices
+	 * needing the hack do not implement the "return zero if no
+	 * data is available" requirement either. Instead they return
+	 * an error on the subsequent read in this case.  This means
+	 * that "winning" the race can cause an unexpected EIO to
+	 * userspace.
+	 *
+	 * "winning" the race is more likely on resume() than on
+	 * open(), and the unexpected error is more harmful in the
+	 * middle of an open session. The hack is therefore only
+	 * applied on open(), and not on resume() where it logically
+	 * would be equally necessary. So we define open() as the only
+	 * driver <-> device "syncronization point".  Should we happen
+	 * to lose a notification after open(), then syncronization
+	 * will be lost until release()
+	 *
+	 * The hack should not be enabled for CDC WDM devices
+	 * conforming to the CDC-WMC r1.1 specification.  This is
+	 * ensured by setting drain_on_open to false in wdm_probe().
+	 */
+	if (drain_on_open)
+		set_bit(WDM_DRAIN_ON_OPEN, &desc->flags);
+
 	spin_lock(&wdm_device_list_lock);
 	list_add(&desc->device_list, &wdm_device_list);
 	spin_unlock(&wdm_device_list_lock);
@ -893,7 +988,7 @@ static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id)
 		goto err;
 	ep = &iface->endpoint[0].desc;

-	rv = wdm_create(intf, ep, maxcom, &wdm_manage_power);
+	rv = wdm_create(intf, ep, maxcom, &wdm_manage_power, false);

 err:
 	return rv;
@ -925,7 +1020,7 @@ struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf,
 {
 	int rv = -EINVAL;

-	rv = wdm_create(intf, ep, bufsize, manage_power);
+	rv = wdm_create(intf, ep, bufsize, manage_power, true);
 	if (rv < 0)
 		goto err;