A slightly quieter cycle for documentation this time around.

Three more DocBook template files have been converted to RST; only 21 to go. There are various build improvements and the usual array of documentation improvements and fixes. -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJYriFXAAoJEI3ONVYwIuV6iTMP/iV7ownq9IK1f8askcXKM76i NoRdj4/JywAPQ73vLhOSDVELGdVJNRBjdyOdBRzxPgsqAhFmm79lVYV2eLIffQ2k 7LcVbEQR77I+4z9SwqIVbIWNCBry7Hu8aWh7moDL3I6yeuay408yr5YW2lIlsqHZ V/LZgkTWDe+iQPeXNA4Djzylx0lcRlAy4yMSLjN1+gb9/uBnXb9J0eGJzgfZfrL8 fiIhymg3bv8vB99l6LMR5vT343QLWXf1yS31A7rPQvwkDo6zFehUJA0XNfIsl2dw VQYsvl9vp9wy3e6Y0qKXPn1XhAhCrm64P3crBxK31MMvcKZVCfeRSZ78wrvpvewy MVLlXdqop1bHPHowtRfA5jwxr1NqcYp+Jg0+YGX3iXpPi1Jfk36DNUy9iWvtvIzr lWgQcIKsdCwwYUcvPR8Kt8T/3q/AHbYlI6mimWlkmbZwncQcgCrH5xSG+c2BIPfV fn3W6eLHBn8RyVsxlaXlA0Y9TNtI/Cm85b3Ri10pFvhl868ppWfJxXHi7UtcbU58 sQzahISCTXOH/NQwkkh7kFMtczbB43rAcChvF7EUYpazVBpJ4P4HxKFg3eIzIdc6 VlBSaMu1hxUGoYxNNYuKr/nYstuczLOKzK7q4j/JOExY3RgTWP+T3bF02wgubvoa D/9WfScewkgCJRoA7i17 =C5nd -----END PGP SIGNATURE----- Merge tag 'docs-4.11' of git://git.lwn.net/linux Pull documentation updates from Jonathan Corbet: "A slightly quieter cycle for documentation this time around. Three more DocBook template files have been converted to RST; only 21 to go. There are various build improvements and the usual array of documentation improvements and fixes" * tag 'docs-4.11' of git://git.lwn.net/linux: (44 commits) docs / driver-api: Fix structure references in device_link.rst PM / docs: Fix structure references in device.rst Add a target to check broken external links in the Documentation Documentation: Fix linux-api list typo Documentation: DocBook/Makefile comment typo Improve sparse documentation Documentation: make Makefile.sphinx no-ops quieter Documentation: DMA-ISA-LPC.txt Documentation: input: fix path to input code definitions docs: Remove the copyright year from conf.py docs: Fix a warning in the Korean HOWTO.rst translation PM / sleep / docs: Convert PM notifiers document to reST PM / core / docs: Convert sleep states API document to reST PM / core: Update kerneldoc comments in pm.h doc-rst: Fix recursive make invocation from macros doc-rst: Delete output of failed dot-SVG conversion doc-rst: Break shell command sequences on failure Documentation/sphinx: make targets independent of Sphinx work for HAVE_SPHINX=0 doc-rst: fixed cleandoc target when used with O=dir Documentation/sphinx: prevent generation of .pyc files in the source tree ...
2024-11-16 22:54:39 +08:00 · 2017-02-22 18:51:29 -08:00 · 2017-02-22 18:51:29 -08:00 · c1aac62f36
commit c1aac62f36
parent fd7e9a8834 bd8562626c
56 changed files with 3288 additions and 3510 deletions
--- a/Documentation/DMA-ISA-LPC.txt
+++ b/Documentation/DMA-ISA-LPC.txt
@ -42,7 +42,7 @@ requirements you pass the flag GFP_DMA to kmalloc.
 Unfortunately the memory available for ISA DMA is scarce so unless you
 allocate the memory during boot-up it's a good idea to also pass
-__GFP_REPEAT and __GFP_NOWARN to make the allocater try a bit harder.
+__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder.
 (This scarcity also means that you should allocate the buffer as
 early as possible and not release it until the driver is unloaded.)
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@ -13,7 +13,7 @@ DOCBOOKS := z8530book.xml  \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml scsi.xml \
 	    sh.xml regulator.xml w1.xml \
-	    writing_musb_glue_layer.xml iio.xml
+	    writing_musb_glue_layer.xml
 ifeq ($(DOCBOOKS),)
@ -71,6 +71,7 @@ installmandocs: mandocs
 # no-op for the DocBook toolchain
 epubdocs:
 latexdocs:
 linkcheckdocs:
 ###
 #External programs used
@ -272,6 +273,6 @@ cleandocs:
 	$(Q)rm -rf $(call objectify, $(clean-dirs))
 # Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable se we can use it in if_changed and friends.
+# information in a variable so we can use it in if_changed and friends.
 .PHONY: $(PHONY)
--- a/Documentation/DocBook/deviceiobook.tmpl
+++ b/Documentation/DocBook/deviceiobook.tmpl
@ -1,323 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
 	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
 <book id="DoingIO">
 <bookinfo>
  <title>Bus-Independent Device Accesses</title>
  <authorgroup>
   <author>
    <firstname>Matthew</firstname>
    <surname>Wilcox</surname>
    <affiliation>
     <address>
      <email>matthew@wil.cx</email>
     </address>
    </affiliation>
   </author>
  </authorgroup>
  <authorgroup>
   <author>
    <firstname>Alan</firstname>
    <surname>Cox</surname>
    <affiliation>
     <address>
      <email>alan@lxorguk.ukuu.org.uk</email>
     </address>
    </affiliation>
   </author>
  </authorgroup>
  <copyright>
   <year>2001</year>
   <holder>Matthew Wilcox</holder>
  </copyright>
  <legalnotice>
   <para>
     This documentation is free software; you can redistribute
     it and/or modify it under the terms of the GNU General Public
     License as published by the Free Software Foundation; either
     version 2 of the License, or (at your option) any later
     version.
   </para>
   <para>
     This program is distributed in the hope that it will be
     useful, but WITHOUT ANY WARRANTY; without even the implied
     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     See the GNU General Public License for more details.
   </para>
   <para>
     You should have received a copy of the GNU General Public
     License along with this program; if not, write to the Free
     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
     MA 02111-1307 USA
   </para>
   <para>
     For more details see the file COPYING in the source
     distribution of Linux.
   </para>
  </legalnotice>
 </bookinfo>
 <toc></toc>
  <chapter id="intro">
      <title>Introduction</title>
  <para>
 	Linux provides an API which abstracts performing IO across all busses
 	and devices, allowing device drivers to be written independently of
 	bus type.
  </para>
  </chapter>
  <chapter id="bugs">
     <title>Known Bugs And Assumptions</title>
  <para>
 	None.	
  </para>
  </chapter>
  <chapter id="mmio">
    <title>Memory Mapped IO</title>
    <sect1 id="getting_access_to_the_device">
      <title>Getting Access to the Device</title>
      <para>
 	The most widely supported form of IO is memory mapped IO.
 	That is, a part of the CPU's address space is interpreted
 	not as accesses to memory, but as accesses to a device.  Some
 	architectures define devices to be at a fixed address, but most
 	have some method of discovering devices.  The PCI bus walk is a
 	good example of such a scheme.	This document does not cover how
 	to receive such an address, but assumes you are starting with one.
 	Physical addresses are of type unsigned long. 
      </para>
      <para>
 	This address should not be used directly.  Instead, to get an
 	address suitable for passing to the accessor functions described
 	below, you should call <function>ioremap</function>.
 	An address suitable for accessing the device will be returned to you.
      </para>
      <para>
 	After you've finished using the device (say, in your module's
 	exit routine), call <function>iounmap</function> in order to return
 	the address space to the kernel.  Most architectures allocate new
 	address space each time you call <function>ioremap</function>, and
 	they can run out unless you call <function>iounmap</function>.
      </para>
    </sect1>
    <sect1 id="accessing_the_device">
      <title>Accessing the device</title>
      <para>
 	The part of the interface most used by drivers is reading and
 	writing memory-mapped registers on the device.	Linux provides
 	interfaces to read and write 8-bit, 16-bit, 32-bit and 64-bit
 	quantities.  Due to a historical accident, these are named byte,
 	word, long and quad accesses.  Both read and write accesses are
 	supported; there is no prefetch support at this time.
      </para>
      <para>
 	The functions are named <function>readb</function>,
 	<function>readw</function>, <function>readl</function>,
 	<function>readq</function>, <function>readb_relaxed</function>,
 	<function>readw_relaxed</function>, <function>readl_relaxed</function>,
 	<function>readq_relaxed</function>, <function>writeb</function>,
 	<function>writew</function>, <function>writel</function> and
 	<function>writeq</function>.
      </para>
      <para>
 	Some devices (such as framebuffers) would like to use larger
 	transfers than 8 bytes at a time.  For these devices, the
 	<function>memcpy_toio</function>, <function>memcpy_fromio</function>
 	and <function>memset_io</function> functions are provided.
 	Do not use memset or memcpy on IO addresses; they
 	are not guaranteed to copy data in order.
      </para>
      <para>
 	The read and write functions are defined to be ordered. That is the
 	compiler is not permitted to reorder the I/O sequence. When the 
 	ordering can be compiler optimised, you can use <function>
 	__readb</function> and friends to indicate the relaxed ordering. Use 
 	this with care.
      </para>
      <para>
 	While the basic functions are defined to be synchronous with respect
 	to each other and ordered with respect to each other the busses the
 	devices sit on may themselves have asynchronicity. In particular many
 	authors are burned by the fact that PCI bus writes are posted
 	asynchronously. A driver author must issue a read from the same
 	device to ensure that writes have occurred in the specific cases the
 	author cares. This kind of property cannot be hidden from driver
 	writers in the API.  In some cases, the read used to flush the device
 	may be expected to fail (if the card is resetting, for example).  In
 	that case, the read should be done from config space, which is
 	guaranteed to soft-fail if the card doesn't respond.
      </para>
      <para>
 	The following is an example of flushing a write to a device when
 	the driver would like to ensure the write's effects are visible prior
 	to continuing execution.
      </para>
 <programlisting>
 static inline void
 qla1280_disable_intrs(struct scsi_qla_host *ha)
 {
 	struct device_reg *reg;
 	reg = ha->iobase;
 	/* disable risc and host interrupts */
 	WRT_REG_WORD(&amp;reg->ictrl, 0);
 	/*
 	 * The following read will ensure that the above write
 	 * has been received by the device before we return from this
 	 * function.
 	 */
 	RD_REG_WORD(&amp;reg->ictrl);
 	ha->flags.ints_enabled = 0;
 }
 </programlisting>
      <para>
 	In addition to write posting, on some large multiprocessing systems
 	(e.g. SGI Challenge, Origin and Altix machines) posted writes won't
 	be strongly ordered coming from different CPUs.  Thus it's important
 	to properly protect parts of your driver that do memory-mapped writes
 	with locks and use the <function>mmiowb</function> to make sure they
 	arrive in the order intended.  Issuing a regular <function>readX
 	</function> will also ensure write ordering, but should only be used
 	when the driver has to be sure that the write has actually arrived
 	at the device (not that it's simply ordered with respect to other
 	writes), since a full <function>readX</function> is a relatively
 	expensive operation.
      </para>
      <para>
 	Generally, one should use <function>mmiowb</function> prior to
 	releasing a spinlock that protects regions using <function>writeb
 	</function> or similar functions that aren't surrounded by <function>
 	readb</function> calls, which will ensure ordering and flushing.  The
 	following pseudocode illustrates what might occur if write ordering
 	isn't guaranteed via <function>mmiowb</function> or one of the
 	<function>readX</function> functions.
      </para>
 <programlisting>
 CPU A:  spin_lock_irqsave(&amp;dev_lock, flags)
 CPU A:  ...
 CPU A:  writel(newval, ring_ptr);
 CPU A:  spin_unlock_irqrestore(&amp;dev_lock, flags)
        ...
 CPU B:  spin_lock_irqsave(&amp;dev_lock, flags)
 CPU B:  writel(newval2, ring_ptr);
 CPU B:  ...
 CPU B:  spin_unlock_irqrestore(&amp;dev_lock, flags)
 </programlisting>
      <para>
 	In the case above, newval2 could be written to ring_ptr before
 	newval.  Fixing it is easy though:
      </para>
 <programlisting>
 CPU A:  spin_lock_irqsave(&amp;dev_lock, flags)
 CPU A:  ...
 CPU A:  writel(newval, ring_ptr);
 CPU A:  mmiowb(); /* ensure no other writes beat us to the device */
 CPU A:  spin_unlock_irqrestore(&amp;dev_lock, flags)
        ...
 CPU B:  spin_lock_irqsave(&amp;dev_lock, flags)
 CPU B:  writel(newval2, ring_ptr);
 CPU B:  ...
 CPU B:  mmiowb();
 CPU B:  spin_unlock_irqrestore(&amp;dev_lock, flags)
 </programlisting>
      <para>
 	See tg3.c for a real world example of how to use <function>mmiowb
 	</function>
      </para>
      <para>
 	PCI ordering rules also guarantee that PIO read responses arrive
 	after any outstanding DMA writes from that bus, since for some devices
 	the result of a <function>readb</function> call may signal to the
 	driver that a DMA transaction is complete.  In many cases, however,
 	the driver may want to indicate that the next
 	<function>readb</function> call has no relation to any previous DMA
 	writes performed by the device.  The driver can use
 	<function>readb_relaxed</function> for these cases, although only
 	some platforms will honor the relaxed semantics.  Using the relaxed
 	read functions will provide significant performance benefits on
 	platforms that support it.  The qla2xxx driver provides examples
 	of how to use <function>readX_relaxed</function>.  In many cases,
 	a majority of the driver's <function>readX</function> calls can
 	safely be converted to <function>readX_relaxed</function> calls, since
 	only a few will indicate or depend on DMA completion.
      </para>
    </sect1>
  </chapter>
  <chapter id="port_space_accesses">
    <title>Port Space Accesses</title>
    <sect1 id="port_space_explained">
      <title>Port Space Explained</title>
      <para>
 	Another form of IO commonly supported is Port Space.  This is a
 	range of addresses separate to the normal memory address space.
 	Access to these addresses is generally not as fast as accesses
 	to the memory mapped addresses, and it also has a potentially
 	smaller address space.
      </para>
      <para>
 	Unlike memory mapped IO, no preparation is required
 	to access port space.
      </para>
    </sect1>
    <sect1 id="accessing_port_space">
      <title>Accessing Port Space</title>
      <para>
 	Accesses to this space are provided through a set of functions
 	which allow 8-bit, 16-bit and 32-bit accesses; also
 	known as byte, word and long.  These functions are
 	<function>inb</function>, <function>inw</function>,
 	<function>inl</function>, <function>outb</function>,
 	<function>outw</function> and <function>outl</function>.
      </para>
      <para>
 	Some variants are provided for these functions.  Some devices
 	require that accesses to their ports are slowed down.  This
 	functionality is provided by appending a <function>_p</function>
 	to the end of the function.  There are also equivalents to memcpy.
 	The <function>ins</function> and <function>outs</function>
 	functions copy bytes, words or longs to the given port.
      </para>
    </sect1>
  </chapter>
  <chapter id="pubfunctions">
     <title>Public Functions Provided</title>
 !Iarch/x86/include/asm/io.h
 !Elib/pci_iomap.c
  </chapter>
 </book>
--- a/Documentation/DocBook/iio.tmpl
+++ b/Documentation/DocBook/iio.tmpl
@ -1,697 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
 	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
 <book id="iioid">
  <bookinfo>
    <title>Industrial I/O driver developer's guide </title>
    <authorgroup>
      <author>
        <firstname>Daniel</firstname>
        <surname>Baluta</surname>
        <affiliation>
          <address>
            <email>daniel.baluta@intel.com</email>
          </address>
        </affiliation>
      </author>
    </authorgroup>
    <copyright>
      <year>2015</year>
      <holder>Intel Corporation</holder>
    </copyright>
    <legalnotice>
      <para>
        This documentation is free software; you can redistribute
        it and/or modify it under the terms of the GNU General Public
        License version 2.
      </para>
    </legalnotice>
  </bookinfo>
  <toc></toc>
  <chapter id="intro">
    <title>Introduction</title>
    <para>
      The main purpose of the Industrial I/O subsystem (IIO) is to provide
      support for devices that in some sense perform either analog-to-digital
      conversion (ADC) or digital-to-analog conversion (DAC) or both. The aim
      is to fill the gap between the somewhat similar hwmon and input
      subsystems.
      Hwmon is directed at low sample rate sensors used to monitor and
      control the system itself, like fan speed control or temperature
      measurement. Input is, as its name suggests, focused on human interaction
      input devices (keyboard, mouse, touchscreen). In some cases there is
      considerable overlap between these and IIO.
  </para>
  <para>
    Devices that fall into this category include:
    <itemizedlist>
      <listitem>
        analog to digital converters (ADCs)
      </listitem>
      <listitem>
        accelerometers
      </listitem>
      <listitem>
        capacitance to digital converters (CDCs)
      </listitem>
      <listitem>
        digital to analog converters (DACs)
      </listitem>
      <listitem>
        gyroscopes
      </listitem>
      <listitem>
        inertial measurement units (IMUs)
      </listitem>
      <listitem>
        color and light sensors
      </listitem>
      <listitem>
        magnetometers
      </listitem>
      <listitem>
        pressure sensors
      </listitem>
      <listitem>
        proximity sensors
      </listitem>
      <listitem>
        temperature sensors
      </listitem>
    </itemizedlist>
    Usually these sensors are connected via SPI or I2C. A common use case of the
    sensors devices is to have combined functionality (e.g. light plus proximity
    sensor).
  </para>
  </chapter>
  <chapter id='iiosubsys'>
    <title>Industrial I/O core</title>
    <para>
      The Industrial I/O core offers:
      <itemizedlist>
        <listitem>
         a unified framework for writing drivers for many different types of
         embedded sensors.
        </listitem>
        <listitem>
         a standard interface to user space applications manipulating sensors.
        </listitem>
      </itemizedlist>
      The implementation can be found under <filename>
      drivers/iio/industrialio-*</filename>
  </para>
  <sect1 id="iiodevice">
    <title> Industrial I/O devices </title>
 !Finclude/linux/iio/iio.h iio_dev
 !Fdrivers/iio/industrialio-core.c iio_device_alloc
 !Fdrivers/iio/industrialio-core.c iio_device_free
 !Fdrivers/iio/industrialio-core.c iio_device_register
 !Fdrivers/iio/industrialio-core.c iio_device_unregister
    <para>
      An IIO device usually corresponds to a single hardware sensor and it
      provides all the information needed by a driver handling a device.
      Let's first have a look at the functionality embedded in an IIO
      device then we will show how a device driver makes use of an IIO
      device.
    </para>
    <para>
        There are two ways for a user space application to interact
        with an IIO driver.
      <itemizedlist>
        <listitem>
          <filename>/sys/bus/iio/iio:deviceX/</filename>, this
          represents a hardware sensor and groups together the data
          channels of the same chip.
        </listitem>
        <listitem>
          <filename>/dev/iio:deviceX</filename>, character device node
          interface used for buffered data transfer and for events information
          retrieval.
        </listitem>
      </itemizedlist>
    </para>
    A typical IIO driver will register itself as an I2C or SPI driver and will
    create two routines, <function> probe </function> and <function> remove
    </function>. At <function>probe</function>:
    <itemizedlist>
    <listitem>call <function>iio_device_alloc</function>, which allocates memory
      for an IIO device.
    </listitem>
    <listitem> initialize IIO device fields with driver specific information
              (e.g. device name, device channels).
    </listitem>
    <listitem>call <function> iio_device_register</function>, this registers the
      device with the IIO core. After this call the device is ready to accept
      requests from user space applications.
    </listitem>
    </itemizedlist>
      At <function>remove</function>, we free the resources allocated in
      <function>probe</function> in reverse order:
    <itemizedlist>
    <listitem><function>iio_device_unregister</function>, unregister the device
      from the IIO core.
    </listitem>
    <listitem><function>iio_device_free</function>, free the memory allocated
      for the IIO device.
    </listitem>
    </itemizedlist>
    <sect2 id="iioattr"> <title> IIO device sysfs interface </title>
      <para>
        Attributes are sysfs files used to expose chip info and also allowing
        applications to set various configuration parameters. For device
        with index X, attributes can be found under
        <filename>/sys/bus/iio/iio:deviceX/ </filename> directory.
        Common attributes are:
        <itemizedlist>
          <listitem><filename>name</filename>, description of the physical
            chip.
          </listitem>
          <listitem><filename>dev</filename>, shows the major:minor pair
            associated with <filename>/dev/iio:deviceX</filename> node.
          </listitem>
          <listitem><filename>sampling_frequency_available</filename>,
            available discrete set of sampling frequency values for
            device.
          </listitem>
      </itemizedlist>
      Available standard attributes for IIO devices are described in the
      <filename>Documentation/ABI/testing/sysfs-bus-iio </filename> file
      in the Linux kernel sources.
      </para>
    </sect2>
    <sect2 id="iiochannel"> <title> IIO device channels </title>
 !Finclude/linux/iio/iio.h iio_chan_spec structure.
      <para>
        An IIO device channel is a representation of a data channel. An
        IIO device can have one or multiple channels. For example:
        <itemizedlist>
          <listitem>
          a thermometer sensor has one channel representing the
          temperature measurement.
          </listitem>
          <listitem>
          a light sensor with two channels indicating the measurements in
          the visible and infrared spectrum.
          </listitem>
          <listitem>
          an accelerometer can have up to 3 channels representing
          acceleration on X, Y and Z axes.
          </listitem>
        </itemizedlist>
      An IIO channel is described by the <type> struct iio_chan_spec
      </type>. A thermometer driver for the temperature sensor in the
      example above would have to describe its channel as follows:
      <programlisting>
      static const struct iio_chan_spec temp_channel[] = {
          {
              .type = IIO_TEMP,
              .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
          },
      };
      </programlisting>
      Channel sysfs attributes exposed to userspace are specified in
      the form of <emphasis>bitmasks</emphasis>. Depending on their
      shared info, attributes can be set in one of the following masks:
      <itemizedlist>
      <listitem><emphasis>info_mask_separate</emphasis>, attributes will
        be specific to this channel</listitem>
      <listitem><emphasis>info_mask_shared_by_type</emphasis>,
        attributes are shared by all channels of the same type</listitem>
      <listitem><emphasis>info_mask_shared_by_dir</emphasis>, attributes
        are shared by all channels of the same direction </listitem>
      <listitem><emphasis>info_mask_shared_by_all</emphasis>,
        attributes are shared by all channels</listitem>
      </itemizedlist>
      When there are multiple data channels per channel type we have two
      ways to distinguish between them:
      <itemizedlist>
      <listitem> set <emphasis> .modified</emphasis> field of <type>
        iio_chan_spec</type> to 1. Modifiers are specified using
        <emphasis>.channel2</emphasis> field of the same
        <type>iio_chan_spec</type> structure and are used to indicate a
        physically unique characteristic of the channel such as its direction
        or spectral response. For example, a light sensor can have two channels,
        one for infrared light and one for both infrared and visible light.
      </listitem>
      <listitem> set <emphasis>.indexed </emphasis> field of
        <type>iio_chan_spec</type> to 1. In this case the channel is
        simply another instance with an index specified by the
        <emphasis>.channel</emphasis> field.
      </listitem>
      </itemizedlist>
      Here is how we can make use of the channel's modifiers:
      <programlisting>
      static const struct iio_chan_spec light_channels[] = {
          {
              .type = IIO_INTENSITY,
              .modified = 1,
              .channel2 = IIO_MOD_LIGHT_IR,
              .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
              .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
          },
          {
              .type = IIO_INTENSITY,
              .modified = 1,
              .channel2 = IIO_MOD_LIGHT_BOTH,
              .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
              .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
          },
          {
              .type = IIO_LIGHT,
              .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
              .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
          },
      }
      </programlisting>
      This channel's definition will generate two separate sysfs files
      for raw data retrieval:
      <itemizedlist>
      <listitem>
      <filename>/sys/bus/iio/iio:deviceX/in_intensity_ir_raw</filename>
      </listitem>
      <listitem>
      <filename>/sys/bus/iio/iio:deviceX/in_intensity_both_raw</filename>
      </listitem>
      </itemizedlist>
      one file for processed data:
      <itemizedlist>
      <listitem>
      <filename>/sys/bus/iio/iio:deviceX/in_illuminance_input
      </filename>
      </listitem>
      </itemizedlist>
      and one shared sysfs file for sampling frequency:
      <itemizedlist>
      <listitem>
      <filename>/sys/bus/iio/iio:deviceX/sampling_frequency.
      </filename>
      </listitem>
      </itemizedlist>
      </para>
      <para>
      Here is how we can make use of the channel's indexing:
      <programlisting>
      static const struct iio_chan_spec light_channels[] = {
          {
              .type = IIO_VOLTAGE,
              .indexed = 1,
              .channel = 0,
              .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
          },
          {
              .type = IIO_VOLTAGE,
              .indexed = 1,
              .channel = 1,
              .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
          },
      }
      </programlisting>
      This will generate two separate attributes files for raw data
      retrieval:
      <itemizedlist>
      <listitem>
        <filename>/sys/bus/iio/devices/iio:deviceX/in_voltage0_raw</filename>,
          representing voltage measurement for channel 0.
      </listitem>
      <listitem>
        <filename>/sys/bus/iio/devices/iio:deviceX/in_voltage1_raw</filename>,
          representing voltage measurement for channel 1.
      </listitem>
      </itemizedlist>
      </para>
    </sect2>
  </sect1>
  <sect1 id="iiobuffer"> <title> Industrial I/O buffers </title>
 !Finclude/linux/iio/buffer.h iio_buffer
 !Edrivers/iio/industrialio-buffer.c
    <para>
    The Industrial I/O core offers a way for continuous data capture
    based on a trigger source. Multiple data channels can be read at once
    from <filename>/dev/iio:deviceX</filename> character device node,
    thus reducing the CPU load.
    </para>
    <sect2 id="iiobuffersysfs">
    <title>IIO buffer sysfs interface </title>
    <para>
      An IIO buffer has an associated attributes directory under <filename>
      /sys/bus/iio/iio:deviceX/buffer/</filename>. Here are the existing
      attributes:
      <itemizedlist>
      <listitem>
      <emphasis>length</emphasis>, the total number of data samples
      (capacity) that can be stored by the buffer.
      </listitem>
      <listitem>
        <emphasis>enable</emphasis>, activate buffer capture.
      </listitem>
      </itemizedlist>
    </para>
    </sect2>
    <sect2 id="iiobuffersetup"> <title> IIO buffer setup </title>
      <para>The meta information associated with a channel reading
        placed in a buffer is called a <emphasis> scan element </emphasis>.
        The important bits configuring scan elements are exposed to
        userspace applications via the <filename>
        /sys/bus/iio/iio:deviceX/scan_elements/</filename> directory. This
        file contains attributes of the following form:
      <itemizedlist>
      <listitem><emphasis>enable</emphasis>, used for enabling a channel.
        If and only if its attribute is non zero, then a triggered capture
        will contain data samples for this channel.
      </listitem>
      <listitem><emphasis>type</emphasis>, description of the scan element
        data storage within the buffer and hence the form in which it is
        read from user space. Format is <emphasis>
        [be|le]:[s|u]bits/storagebitsXrepeat[>>shift] </emphasis>.
        <itemizedlist>
        <listitem> <emphasis>be</emphasis> or <emphasis>le</emphasis>, specifies
          big or little endian.
        </listitem>
        <listitem>
        <emphasis>s </emphasis>or <emphasis>u</emphasis>, specifies if
          signed (2's complement) or unsigned.
        </listitem>
        <listitem><emphasis>bits</emphasis>, is the number of valid data
          bits.
        </listitem>
        <listitem><emphasis>storagebits</emphasis>, is the number of bits
          (after padding) that it occupies in the buffer.
        </listitem>
        <listitem>
        <emphasis>shift</emphasis>, if specified, is the shift that needs
          to be applied prior to masking out unused bits.
        </listitem>
        <listitem>
        <emphasis>repeat</emphasis>, specifies the number of bits/storagebits
        repetitions. When the repeat element is 0 or 1, then the repeat
        value is omitted.
        </listitem>
        </itemizedlist>
      </listitem>
      </itemizedlist>
      For example, a driver for a 3-axis accelerometer with 12 bit
      resolution where data is stored in two 8-bits registers as
      follows:
      <programlisting>
        7   6   5   4   3   2   1   0
      +---+---+---+---+---+---+---+---+
      |D3 |D2 |D1 |D0 | X | X | X | X | (LOW byte, address 0x06)
      +---+---+---+---+---+---+---+---+
        7   6   5   4   3   2   1   0
      +---+---+---+---+---+---+---+---+
      |D11|D10|D9 |D8 |D7 |D6 |D5 |D4 | (HIGH byte, address 0x07)
      +---+---+---+---+---+---+---+---+
      </programlisting>
      will have the following scan element type for each axis:
      <programlisting>
      $ cat /sys/bus/iio/devices/iio:device0/scan_elements/in_accel_y_type
      le:s12/16>>4
      </programlisting>
      A user space application will interpret data samples read from the
      buffer as two byte little endian signed data, that needs a 4 bits
      right shift before masking out the 12 valid bits of data.
    </para>
    <para>
      For implementing buffer support a driver should initialize the following
      fields in <type>iio_chan_spec</type> definition:
      <programlisting>
          struct iio_chan_spec {
              /* other members */
              int scan_index
              struct {
                  char sign;
                  u8 realbits;
                  u8 storagebits;
                  u8 shift;
                  u8 repeat;
                  enum iio_endian endianness;
              } scan_type;
          };
      </programlisting>
      The driver implementing the accelerometer described above will
      have the following channel definition:
      <programlisting>
      struct struct iio_chan_spec accel_channels[] = {
          {
            .type = IIO_ACCEL,
            .modified = 1,
            .channel2 = IIO_MOD_X,
            /* other stuff here */
            .scan_index = 0,
            .scan_type = {
              .sign = 's',
              .realbits = 12,
              .storagebits = 16,
              .shift = 4,
              .endianness = IIO_LE,
            },
        }
        /* similar for Y (with channel2 = IIO_MOD_Y, scan_index = 1)
         * and Z (with channel2 = IIO_MOD_Z, scan_index = 2) axis
         */
    }
    </programlisting>
    </para>
    <para>
    Here <emphasis> scan_index </emphasis> defines the order in which
    the enabled channels are placed inside the buffer. Channels with a lower
    scan_index will be placed before channels with a higher index. Each
    channel needs to have a unique scan_index.
    </para>
    <para>
    Setting scan_index to -1 can be used to indicate that the specific
    channel does not support buffered capture. In this case no entries will
    be created for the channel in the scan_elements directory.
    </para>
    </sect2>
  </sect1>
  <sect1 id="iiotrigger"> <title> Industrial I/O triggers  </title>
 !Finclude/linux/iio/trigger.h iio_trigger
 !Edrivers/iio/industrialio-trigger.c
    <para>
      In many situations it is useful for a driver to be able to
      capture data based on some external event (trigger) as opposed
      to periodically polling for data. An IIO trigger can be provided
      by a device driver that also has an IIO device based on hardware
      generated events (e.g. data ready or threshold exceeded) or
      provided by a separate driver from an independent interrupt
      source (e.g. GPIO line connected to some external system, timer
      interrupt or user space writing a specific file in sysfs). A
      trigger may initiate data capture for a number of sensors and
      also it may be completely unrelated to the sensor itself.
    </para>
    <sect2 id="iiotrigsysfs"> <title> IIO trigger sysfs interface </title>
      There are two locations in sysfs related to triggers:
      <itemizedlist>
        <listitem><filename>/sys/bus/iio/devices/triggerY</filename>,
          this file is created once an IIO trigger is registered with
          the IIO core and corresponds to trigger with index Y. Because
          triggers can be very different depending on type there are few
          standard attributes that we can describe here:
          <itemizedlist>
            <listitem>
              <emphasis>name</emphasis>, trigger name that can be later
                used for association with a device.
            </listitem>
            <listitem>
            <emphasis>sampling_frequency</emphasis>, some timer based
              triggers use this attribute to specify the frequency for
              trigger calls.
            </listitem>
          </itemizedlist>
        </listitem>
        <listitem>
          <filename>/sys/bus/iio/devices/iio:deviceX/trigger/</filename>, this
          directory is created once the device supports a triggered
          buffer. We can associate a trigger with our device by writing
          the trigger's name in the <filename>current_trigger</filename> file.
        </listitem>
      </itemizedlist>
    </sect2>
    <sect2 id="iiotrigattr"> <title> IIO trigger setup</title>
    <para>
      Let's see a simple example of how to setup a trigger to be used
      by a driver.
      <programlisting>
      struct iio_trigger_ops trigger_ops = {
          .set_trigger_state = sample_trigger_state,
          .validate_device = sample_validate_device,
      }
      struct iio_trigger *trig;
      /* first, allocate memory for our trigger */
      trig = iio_trigger_alloc(dev, "trig-%s-%d", name, idx);
      /* setup trigger operations field */
      trig->ops = &amp;trigger_ops;
      /* now register the trigger with the IIO core */
      iio_trigger_register(trig);
      </programlisting>
    </para>
    </sect2>
    <sect2 id="iiotrigsetup"> <title> IIO trigger ops</title>
 !Finclude/linux/iio/trigger.h iio_trigger_ops
     <para>
        Notice that a trigger has a set of operations attached:
        <itemizedlist>
        <listitem>
          <function>set_trigger_state</function>, switch the trigger on/off
          on demand.
        </listitem>
        <listitem>
          <function>validate_device</function>, function to validate the
          device when the current trigger gets changed.
        </listitem>
        </itemizedlist>
      </para>
    </sect2>
  </sect1>
  <sect1 id="iiotriggered_buffer">
    <title> Industrial I/O triggered buffers </title>
    <para>
    Now that we know what buffers and triggers are let's see how they
    work together.
    </para>
    <sect2 id="iiotrigbufsetup"> <title> IIO triggered buffer setup</title>
 !Edrivers/iio/buffer/industrialio-triggered-buffer.c
 !Finclude/linux/iio/iio.h iio_buffer_setup_ops
    <para>
    A typical triggered buffer setup looks like this:
    <programlisting>
    const struct iio_buffer_setup_ops sensor_buffer_setup_ops = {
      .preenable    = sensor_buffer_preenable,
      .postenable   = sensor_buffer_postenable,
      .postdisable  = sensor_buffer_postdisable,
      .predisable   = sensor_buffer_predisable,
    };
    irqreturn_t sensor_iio_pollfunc(int irq, void *p)
    {
        pf->timestamp = iio_get_time_ns((struct indio_dev *)p);
        return IRQ_WAKE_THREAD;
    }
    irqreturn_t sensor_trigger_handler(int irq, void *p)
    {
        u16 buf[8];
        int i = 0;
        /* read data for each active channel */
        for_each_set_bit(bit, active_scan_mask, masklength)
            buf[i++] = sensor_get_data(bit)
        iio_push_to_buffers_with_timestamp(indio_dev, buf, timestamp);
        iio_trigger_notify_done(trigger);
        return IRQ_HANDLED;
    }
    /* setup triggered buffer, usually in probe function */
    iio_triggered_buffer_setup(indio_dev, sensor_iio_polfunc,
                               sensor_trigger_handler,
                               sensor_buffer_setup_ops);
    </programlisting>
    </para>
    The important things to notice here are:
    <itemizedlist>
    <listitem><function> iio_buffer_setup_ops</function>, the buffer setup
    functions to be called at predefined points in the buffer configuration
    sequence (e.g. before enable, after disable). If not specified, the
    IIO core uses the default <type>iio_triggered_buffer_setup_ops</type>.
    </listitem>
    <listitem><function>sensor_iio_pollfunc</function>, the function that
    will be used as top half of poll function. It should do as little
    processing as possible, because it runs in interrupt context. The most
    common operation is recording of the current timestamp and for this reason
    one can use the IIO core defined <function>iio_pollfunc_store_time
    </function> function.
    </listitem>
    <listitem><function>sensor_trigger_handler</function>, the function that
    will be used as bottom half of the poll function. This runs in the
    context of a kernel thread and all the processing takes place here.
    It usually reads data from the device and stores it in the internal
    buffer together with the timestamp recorded in the top half.
    </listitem>
    </itemizedlist>
    </sect2>
  </sect1>
  </chapter>
  <chapter id='iioresources'>
    <title> Resources </title>
      IIO core may change during time so the best documentation to read is the
      source code. There are several locations where you should look:
      <itemizedlist>
        <listitem>
          <filename>drivers/iio/</filename>, contains the IIO core plus
          and directories for each sensor type (e.g. accel, magnetometer,
          etc.)
        </listitem>
        <listitem>
          <filename>include/linux/iio/</filename>, contains the header
          files, nice to read for the internal kernel interfaces.
        </listitem>
        <listitem>
        <filename>include/uapi/linux/iio/</filename>, contains files to be
          used by user space applications.
        </listitem>
        <listitem>
         <filename>tools/iio/</filename>, contains tools for rapidly
          testing buffers, events and device creation.
        </listitem>
        <listitem>
          <filename>drivers/staging/iio/</filename>, contains code for some
          drivers or experimental features that are not yet mature enough
          to be moved out.
        </listitem>
      </itemizedlist>
    <para>
    Besides the code, there are some good online documentation sources:
    <itemizedlist>
    <listitem>
      <ulink url="http://marc.info/?l=linux-iio"> Industrial I/O mailing
      list </ulink>
    </listitem>
    <listitem>
      <ulink url="http://wiki.analog.com/software/linux/docs/iio/iio">
      Analog Device IIO wiki page </ulink>
    </listitem>
    <listitem>
      <ulink url="https://fosdem.org/2015/schedule/event/iiosdr/">
      Using the Linux IIO framework for SDR, Lars-Peter Clausen's
      presentation at FOSDEM </ulink>
    </listitem>
    </itemizedlist>
    </para>
  </chapter>
 </book>
 <!--
 vim: softtabstop=2:shiftwidth=2:expandtab:textwidth=72
 -->
--- a/Documentation/DocBook/regulator.tmpl
+++ b/Documentation/DocBook/regulator.tmpl
@ -1,304 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
 	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
 <book id="regulator-api">
 <bookinfo>
  <title>Voltage and current regulator API</title>
  <authorgroup>
   <author>
    <firstname>Liam</firstname>
    <surname>Girdwood</surname>
    <affiliation>
     <address>
      <email>lrg@slimlogic.co.uk</email>
     </address>
    </affiliation>
   </author>
   <author>
    <firstname>Mark</firstname>
    <surname>Brown</surname>
    <affiliation>
     <orgname>Wolfson Microelectronics</orgname>
     <address>
      <email>broonie@opensource.wolfsonmicro.com</email>
     </address>
    </affiliation>
   </author>
  </authorgroup>
  <copyright>
   <year>2007-2008</year>
   <holder>Wolfson Microelectronics</holder>
  </copyright>
  <copyright>
   <year>2008</year>
   <holder>Liam Girdwood</holder>
  </copyright>
  <legalnotice>
   <para>
     This documentation is free software; you can redistribute
     it and/or modify it under the terms of the GNU General Public
     License version 2 as published by the Free Software Foundation.
   </para>
   <para>
     This program is distributed in the hope that it will be
     useful, but WITHOUT ANY WARRANTY; without even the implied
     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     See the GNU General Public License for more details.
   </para>
   <para>
     You should have received a copy of the GNU General Public
     License along with this program; if not, write to the Free
     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
     MA 02111-1307 USA
   </para>
   <para>
     For more details see the file COPYING in the source
     distribution of Linux.
   </para>
  </legalnotice>
 </bookinfo>
 <toc></toc>
  <chapter id="intro">
    <title>Introduction</title>
    <para>
 	This framework is designed to provide a standard kernel
 	interface to control voltage and current regulators.
    </para>
    <para>
 	The intention is to allow systems to dynamically control
 	regulator power output in order to save power and prolong
 	battery life.  This applies to both voltage regulators (where
 	voltage output is controllable) and current sinks (where current
 	limit is controllable).
    </para>
    <para>
 	Note that additional (and currently more complete) documentation
 	is available in the Linux kernel source under
 	<filename>Documentation/power/regulator</filename>.
    </para>
    <sect1 id="glossary">
       <title>Glossary</title>
       <para>
 	The regulator API uses a number of terms which may not be
 	familiar:
       </para>
       <glossary>
         <glossentry>
 	   <glossterm>Regulator</glossterm>
 	   <glossdef>
 	     <para>
 	Electronic device that supplies power to other devices.  Most
 	regulators can enable and disable their output and some can also
 	control their output voltage or current.
 	     </para>
 	   </glossdef>
         </glossentry>
 	 <glossentry>
 	   <glossterm>Consumer</glossterm>
 	   <glossdef>
 	     <para>
 	Electronic device which consumes power provided by a regulator.
 	These may either be static, requiring only a fixed supply, or
 	dynamic, requiring active management of the regulator at
 	runtime.
 	     </para>
 	   </glossdef>
 	 </glossentry>
 	 <glossentry>
 	   <glossterm>Power Domain</glossterm>
 	   <glossdef>
 	     <para>
 	The electronic circuit supplied by a given regulator, including
 	the regulator and all consumer devices.  The configuration of
 	the regulator is shared between all the components in the
 	circuit.
 	     </para>
 	   </glossdef>
 	 </glossentry>
 	 <glossentry>
 	   <glossterm>Power Management Integrated Circuit</glossterm>
 	   <acronym>PMIC</acronym>
 	   <glossdef>
 	     <para>
 	An IC which contains numerous regulators and often also other
 	subsystems.  In an embedded system the primary PMIC is often
 	equivalent to a combination of the PSU and southbridge in a
 	desktop system.
 	     </para>
 	   </glossdef>
 	 </glossentry>
 	</glossary>
     </sect1>
  </chapter>
  <chapter id="consumer">
     <title>Consumer driver interface</title>
     <para>
       This offers a similar API to the kernel clock framework.
       Consumer drivers use <link
       linkend='API-regulator-get'>get</link> and <link
       linkend='API-regulator-put'>put</link> operations to acquire and
       release regulators.  Functions are
       provided to <link linkend='API-regulator-enable'>enable</link>
       and <link linkend='API-regulator-disable'>disable</link> the
       regulator and to get and set the runtime parameters of the
       regulator.
     </para>
     <para>
       When requesting regulators consumers use symbolic names for their
       supplies, such as "Vcc", which are mapped into actual regulator
       devices by the machine interface.
     </para>
     <para>
 	A stub version of this API is provided when the regulator
 	framework is not in use in order to minimise the need to use
 	ifdefs.
     </para>
     <sect1 id="consumer-enable">
       <title>Enabling and disabling</title>
       <para>
         The regulator API provides reference counted enabling and
 	 disabling of regulators. Consumer devices use the <function><link
 	 linkend='API-regulator-enable'>regulator_enable</link></function>
 	 and <function><link
 	 linkend='API-regulator-disable'>regulator_disable</link>
 	 </function> functions to enable and disable regulators.  Calls
 	 to the two functions must be balanced.
       </para>
       <para>
         Note that since multiple consumers may be using a regulator and
 	 machine constraints may not allow the regulator to be disabled
 	 there is no guarantee that calling
 	 <function>regulator_disable</function> will actually cause the
 	 supply provided by the regulator to be disabled. Consumer
 	 drivers should assume that the regulator may be enabled at all
 	 times.
       </para>
     </sect1>
     <sect1 id="consumer-config">
       <title>Configuration</title>
       <para>
         Some consumer devices may need to be able to dynamically
 	 configure their supplies.  For example, MMC drivers may need to
 	 select the correct operating voltage for their cards.  This may
 	 be done while the regulator is enabled or disabled.
       </para>
       <para>
 	 The <function><link
 	 linkend='API-regulator-set-voltage'>regulator_set_voltage</link>
 	 </function> and <function><link
 	 linkend='API-regulator-set-current-limit'
 	 >regulator_set_current_limit</link>
 	 </function> functions provide the primary interface for this.
 	 Both take ranges of voltages and currents, supporting drivers
 	 that do not require a specific value (eg, CPU frequency scaling
 	 normally permits the CPU to use a wider range of supply
 	 voltages at lower frequencies but does not require that the
 	 supply voltage be lowered).  Where an exact value is required
 	 both minimum and maximum values should be identical.
       </para>
     </sect1>
     <sect1 id="consumer-callback">
       <title>Callbacks</title>
       <para>
 	  Callbacks may also be <link
 	  linkend='API-regulator-register-notifier'>registered</link>
 	  for events such as regulation failures.
       </para>
     </sect1>
   </chapter>
   <chapter id="driver">
     <title>Regulator driver interface</title>
     <para>
       Drivers for regulator chips <link
       linkend='API-regulator-register'>register</link> the regulators
       with the regulator core, providing operations structures to the
       core.  A <link
       linkend='API-regulator-notifier-call-chain'>notifier</link> interface
       allows error conditions to be reported to the core.
     </para>
     <para>
       Registration should be triggered by explicit setup done by the
       platform, supplying a <link
       linkend='API-struct-regulator-init-data'>struct
       regulator_init_data</link> for the regulator containing
       <link linkend='machine-constraint'>constraint</link> and
       <link linkend='machine-supply'>supply</link> information.
     </para>
   </chapter>
   <chapter id="machine">
     <title>Machine interface</title>
     <para>
       This interface provides a way to define how regulators are
       connected to consumers on a given system and what the valid
       operating parameters are for the system.
     </para>
     <sect1 id="machine-supply">
       <title>Supplies</title>
       <para>
         Regulator supplies are specified using <link
 	 linkend='API-struct-regulator-consumer-supply'>struct
 	 regulator_consumer_supply</link>.  This is done at
 	 <link linkend='driver'>driver registration
 	 time</link> as part of the machine constraints.
       </para>
     </sect1>
     <sect1 id="machine-constraint">
       <title>Constraints</title>
       <para>
 	 As well as defining the connections the machine interface
 	 also provides constraints defining the operations that
 	 clients are allowed to perform and the parameters that may be
 	 set.  This is required since generally regulator devices will
 	 offer more flexibility than it is safe to use on a given
 	 system, for example supporting higher supply voltages than the
 	 consumers are rated for.
       </para>
       <para>
 	 This is done at <link linkend='driver'>driver
 	 registration time</link> by providing a <link
 	 linkend='API-struct-regulation-constraints'>struct
 	 regulation_constraints</link>.
       </para>
       <para>
         The constraints may also specify an initial configuration for the
         regulator in the constraints, which is particularly useful for
         use with static consumers.
       </para>
     </sect1>
  </chapter>
  <chapter id="api">
    <title>API reference</title>
    <para>
      Due to limitations of the kernel documentation framework and the
      existing layout of the source code the entire regulator API is
      documented here.
    </para>
 !Iinclude/linux/regulator/consumer.h
 !Iinclude/linux/regulator/machine.h
 !Iinclude/linux/regulator/driver.h
 !Edrivers/regulator/core.c
  </chapter>
 </book>
--- a/Documentation/Makefile.sphinx
+++ b/Documentation/Makefile.sphinx
@ -43,7 +43,7 @@ ALLSPHINXOPTS   =  $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
 I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 # commands; the 'cmd' from scripts/Kbuild.include is not *loopable*
-loop_cmd = $(echo-cmd) $(cmd_$(1))
+loop_cmd = $(echo-cmd) $(cmd_$(1)) || exit;
 # $2 sphinx builder e.g. "html"
 # $3 name of the build subfolder / e.g. "media", used as:
@ -54,7 +54,8 @@ loop_cmd = $(echo-cmd) $(cmd_$(1))
 #    e.g. "media" for the linux-tv book-set at ./Documentation/media
 quiet_cmd_sphinx = SPHINX  $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
-      cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media $2;\
+      cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media $2 && \
 	PYTHONDONTWRITEBYTECODE=1 \
 	BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
 	$(SPHINXBUILD) \
 	-b $2 \
@ -63,13 +64,16 @@ quiet_cmd_sphinx = SPHINX  $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
 	-D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) \
 	$(ALLSPHINXOPTS) \
 	$(abspath $(srctree)/$(src)/$5) \
-	$(abspath $(BUILDDIR)/$3/$4);
+	$(abspath $(BUILDDIR)/$3/$4)
 htmldocs:
-	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
+	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
 linkcheckdocs:
 	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var)))
 latexdocs:
-	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
+	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
 ifeq ($(HAVE_PDFLATEX),0)
@ -80,27 +84,34 @@ pdfdocs:
 else # HAVE_PDFLATEX
 pdfdocs: latexdocs
-	$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX=$(PDFLATEX) LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex;)
+	$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX=$(PDFLATEX) LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit;)
 endif # HAVE_PDFLATEX
 epubdocs:
-	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
+	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
 xmldocs:
-	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
+	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
 endif # HAVE_SPHINX
 # The following targets are independent of HAVE_SPHINX, and the rules should
 # work or silently pass without Sphinx.
 # no-ops for the Sphinx toolchain
 sgmldocs:
 	@:
 psdocs:
 	@:
 mandocs:
 	@:
 installmandocs:
 	@:
 cleandocs:
 	$(Q)rm -rf $(BUILDDIR)
-	$(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) -C Documentation/media clean
+	$(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media clean
 endif # HAVE_SPHINX
 dochelp:
 	@echo  ' Linux kernel internal documentation in different formats (Sphinx):'
@ -109,6 +120,7 @@ dochelp:
 	@echo  '  pdfdocs         - PDF'
 	@echo  '  epubdocs        - EPUB'
 	@echo  '  xmldocs         - XML'
 	@echo  '  linkcheckdocs   - check for broken external links (will connect to external hosts)'
 	@echo  '  cleandocs       - clean all generated files'
 	@echo
 	@echo  '  make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2'
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@ -17,7 +17,7 @@ What is Linux?
  loading, shared copy-on-write executables, proper memory management,
  and multistack networking including IPv4 and IPv6.
-  It is distributed under the GNU General Public License - see the
+  It is distributed under the GNU General Public License v2 - see the
  accompanying COPYING file for more details.
 On what hardware does it run?
@ -236,7 +236,7 @@ Configuring the kernel
    - Having unnecessary drivers will make the kernel bigger, and can
      under some circumstances lead to problems: probing for a
-      nonexistent controller card may confuse your other controllers
+      nonexistent controller card may confuse your other controllers.
    - A kernel with math-emulation compiled in will still use the
      coprocessor if one is present: the math emulation will just
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@ -93,9 +93,9 @@ Command Language Reference
 At the lexical level, a command comprises a sequence of words separated
 by spaces or tabs.  So these are all equivalent::
-  nullarbor:~ # echo -c 'file svcsock.c line 1603 +p' >
+  nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
 				<debugfs>/dynamic_debug/control
-  nullarbor:~ # echo -c '  file   svcsock.c     line  1603 +p  ' >
+  nullarbor:~ # echo -n '  file   svcsock.c     line  1603 +p  ' >
 				<debugfs>/dynamic_debug/control
  nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
 				<debugfs>/dynamic_debug/control
--- a/Documentation/block/pr.txt
+++ b/Documentation/block/pr.txt
@ -90,7 +90,7 @@ and thus removes any access restriction implied by it.
 4. IOC_PR_PREEMPT
 This ioctl command releases the existing reservation referred to by
-old_key and replaces it with a a new reservation of type for the
+old_key and replaces it with a new reservation of type for the
 reservation key new_key.
--- a/Documentation/cgroup-v1/cpusets.txt
+++ b/Documentation/cgroup-v1/cpusets.txt
@ -615,7 +615,7 @@ to allocate a page of memory for that task.
 If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
 will have its allowed CPU placement changed immediately.  Similarly,
-if a task's pid is written to another cpusets 'cpuset.tasks' file, then its
+if a task's pid is written to another cpuset's 'tasks' file, then its
 allowed CPU placement is changed immediately.  If such a task had been
 bound to some subset of its cpuset using the sched_setaffinity() call,
 the task will be allowed to run on any CPU allowed in its new cpuset,
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@ -58,7 +58,7 @@ master_doc = 'index'
 # General information about the project.
 project = 'The Linux Kernel'
-copyright = '2016, The kernel development community'
+copyright = 'The kernel development community'
 author = 'The kernel development community'
 # The version info for the project you're documenting, acts as replacement for
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@ -0,0 +1,372 @@
 =========================
 CPU hotplug in the Kernel
 =========================
 :Date: December, 2016
 :Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
          Rusty Russell <rusty@rustcorp.com.au>,
          Srivatsa Vaddagiri <vatsa@in.ibm.com>,
          Ashok Raj <ashok.raj@intel.com>,
          Joel Schopp <jschopp@austin.ibm.com>
 Introduction
 ============
 Modern advances in system architectures have introduced advanced error
 reporting and correction capabilities in processors. There are couple OEMS that
 support NUMA hardware which are hot pluggable as well, where physical node
 insertion and removal require support for CPU hotplug.
 Such advances require CPUs available to a kernel to be removed either for
 provisioning reasons, or for RAS purposes to keep an offending CPU off
 system execution path. Hence the need for CPU hotplug support in the
 Linux kernel.
 A more novel use of CPU-hotplug support is its use today in suspend resume
 support for SMP. Dual-core and HT support makes even a laptop run SMP kernels
 which didn't support these methods.
 Command Line Switches
 =====================
 ``maxcpus=n``
  Restrict boot time CPUs to *n*. Say if you have fourV CPUs, using
  ``maxcpus=2`` will only boot two. You can choose to bring the
  other CPUs later online.
 ``nr_cpus=n``
  Restrict the total amount CPUs the kernel will support. If the number
  supplied here is lower than the number of physically available CPUs than
  those CPUs can not be brought online later.
 ``additional_cpus=n``
  Use this to limit hotpluggable CPUs. This option sets
  ``cpu_possible_mask = cpu_present_mask + additional_cpus``
  This option is limited to the IA64 architecture.
 ``possible_cpus=n``
  This option sets ``possible_cpus`` bits in ``cpu_possible_mask``.
  This option is limited to the X86 and S390 architecture.
 ``cede_offline={"off","on"}``
  Use this option to disable/enable putting offlined processors to an extended
  ``H_CEDE`` state on supported pseries platforms. If nothing is specified,
  ``cede_offline`` is set to "on".
  This option is limited to the PowerPC architecture.
 ``cpu0_hotplug``
  Allow to shutdown CPU0.
  This option is limited to the X86 architecture.
 CPU maps
 ========
 ``cpu_possible_mask``
  Bitmap of possible CPUs that can ever be available in the
  system. This is used to allocate some boot time memory for per_cpu variables
  that aren't designed to grow/shrink as CPUs are made available or removed.
  Once set during boot time discovery phase, the map is static, i.e no bits
  are added or removed anytime. Trimming it accurately for your system needs
  upfront can save some boot time memory.
 ``cpu_online_mask``
  Bitmap of all CPUs currently online. Its set in ``__cpu_up()``
  after a CPU is available for kernel scheduling and ready to receive
  interrupts from devices. Its cleared when a CPU is brought down using
  ``__cpu_disable()``, before which all OS services including interrupts are
  migrated to another target CPU.
 ``cpu_present_mask``
  Bitmap of CPUs currently present in the system. Not all
  of them may be online. When physical hotplug is processed by the relevant
  subsystem (e.g ACPI) can change and new bit either be added or removed
  from the map depending on the event is hot-add/hot-remove. There are currently
  no locking rules as of now. Typical usage is to init topology during boot,
  at which time hotplug is disabled.
 You really don't need to manipulate any of the system CPU maps. They should
 be read-only for most use. When setting up per-cpu resources almost always use
 ``cpu_possible_mask`` or ``for_each_possible_cpu()`` to iterate. To macro
 ``for_each_cpu()`` can be used to iterate over a custom CPU mask.
 Never use anything other than ``cpumask_t`` to represent bitmap of CPUs.
 Using CPU hotplug
 =================
 The kernel option *CONFIG_HOTPLUG_CPU* needs to be enabled. It is currently
 available on multiple architectures including ARM, MIPS, PowerPC and X86. The
 configuration is done via the sysfs interface: ::
 $ ls -lh /sys/devices/system/cpu
 total 0
 drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu0
 drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu1
 drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu2
 drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu3
 drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu4
 drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu5
 drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu6
 drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu7
 drwxr-xr-x  2 root root    0 Dec 21 16:33 hotplug
 -r--r--r--  1 root root 4.0K Dec 21 16:33 offline
 -r--r--r--  1 root root 4.0K Dec 21 16:33 online
 -r--r--r--  1 root root 4.0K Dec 21 16:33 possible
 -r--r--r--  1 root root 4.0K Dec 21 16:33 present
 The files *offline*, *online*, *possible*, *present* represent the CPU masks.
 Each CPU folder contains an *online* file which controls the logical on (1) and
 off (0) state. To logically shutdown CPU4: ::
 $ echo 0 > /sys/devices/system/cpu/cpu4/online
  smpboot: CPU 4 is now offline
 Once the CPU is shutdown, it will be removed from */proc/interrupts*,
 */proc/cpuinfo* and should also not be shown visible by the *top* command. To
 bring CPU4 back online: ::
 $ echo 1 > /sys/devices/system/cpu/cpu4/online
 smpboot: Booting Node 0 Processor 4 APIC 0x1
 The CPU is usable again. This should work on all CPUs. CPU0 is often special
 and excluded from CPU hotplug. On X86 the kernel option
 *CONFIG_BOOTPARAM_HOTPLUG_CPU0* has to be enabled in order to be able to
 shutdown CPU0. Alternatively the kernel command option *cpu0_hotplug* can be
 used. Some known dependencies of CPU0:
 * Resume from hibernate/suspend. Hibernate/suspend will fail if CPU0 is offline.
 * PIC interrupts. CPU0 can't be removed if a PIC interrupt is detected.
 Please let Fenghua Yu <fenghua.yu@intel.com> know if you find any dependencies
 on CPU0.
 The CPU hotplug coordination
 ============================
 The offline case
 ----------------
 Once a CPU has been logically shutdown the teardown callbacks of registered
 hotplug states will be invoked, starting with ``CPUHP_ONLINE`` and terminating
 at state ``CPUHP_OFFLINE``. This includes:
 * If tasks are frozen due to a suspend operation then *cpuhp_tasks_frozen*
  will be set to true.
 * All processes are migrated away from this outgoing CPU to new CPUs.
  The new CPU is chosen from each process' current cpuset, which may be
  a subset of all online CPUs.
 * All interrupts targeted to this CPU are migrated to a new CPU
 * timers are also migrated to a new CPU
 * Once all services are migrated, kernel calls an arch specific routine
  ``__cpu_disable()`` to perform arch specific cleanup.
 Using the hotplug API
 ---------------------
 It is possible to receive notifications once a CPU is offline or onlined. This
 might be important to certain drivers which need to perform some kind of setup
 or clean up functions based on the number of available CPUs: ::
  #include <linux/cpuhotplug.h>
  ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "X/Y:online",
                          Y_online, Y_prepare_down);
 *X* is the subsystem and *Y* the particular driver. The *Y_online* callback
 will be invoked during registration on all online CPUs. If an error
 occurs during the online callback the *Y_prepare_down* callback will be
 invoked on all CPUs on which the online callback was previously invoked.
 After registration completed, the *Y_online* callback will be invoked
 once a CPU is brought online and *Y_prepare_down* will be invoked when a
 CPU is shutdown. All resources which were previously allocated in
 *Y_online* should be released in *Y_prepare_down*.
 The return value *ret* is negative if an error occurred during the
 registration process. Otherwise a positive value is returned which
 contains the allocated hotplug for dynamically allocated states
 (*CPUHP_AP_ONLINE_DYN*). It will return zero for predefined states.
 The callback can be remove by invoking ``cpuhp_remove_state()``. In case of a
 dynamically allocated state (*CPUHP_AP_ONLINE_DYN*) use the returned state.
 During the removal of a hotplug state the teardown callback will be invoked.
 Multiple instances
 ~~~~~~~~~~~~~~~~~~
 If a driver has multiple instances and each instance needs to perform the
 callback independently then it is likely that a ''multi-state'' should be used.
 First a multi-state state needs to be registered: ::
  ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "X/Y:online,
                                Y_online, Y_prepare_down);
  Y_hp_online = ret;
 The ``cpuhp_setup_state_multi()`` behaves similar to ``cpuhp_setup_state()``
 except it prepares the callbacks for a multi state and does not invoke
 the callbacks. This is a one time setup.
 Once a new instance is allocated, you need to register this new instance: ::
  ret = cpuhp_state_add_instance(Y_hp_online, &d->node);
 This function will add this instance to your previously allocated
 *Y_hp_online* state and invoke the previously registered callback
 (*Y_online*) on all online CPUs. The *node* element is a ``struct
 hlist_node`` member of your per-instance data structure.
 On removal of the instance: ::
  cpuhp_state_remove_instance(Y_hp_online, &d->node)
 should be invoked which will invoke the teardown callback on all online
 CPUs.
 Manual setup
 ~~~~~~~~~~~~
 Usually it is handy to invoke setup and teardown callbacks on registration or
 removal of a state because usually the operation needs to performed once a CPU
 goes online (offline) and during initial setup (shutdown) of the driver. However
 each registration and removal function is also available with a ``_nocalls``
 suffix which does not invoke the provided callbacks if the invocation of the
 callbacks is not desired. During the manual setup (or teardown) the functions
 ``get_online_cpus()`` and ``put_online_cpus()`` should be used to inhibit CPU
 hotplug operations.
 The ordering of the events
 --------------------------
 The hotplug states are defined in ``include/linux/cpuhotplug.h``:
 * The states *CPUHP_OFFLINE* … *CPUHP_AP_OFFLINE* are invoked before the
  CPU is up.
 * The states *CPUHP_AP_OFFLINE* … *CPUHP_AP_ONLINE* are invoked
  just the after the CPU has been brought up. The interrupts are off and
  the scheduler is not yet active on this CPU. Starting with *CPUHP_AP_OFFLINE*
  the callbacks are invoked on the target CPU.
 * The states between *CPUHP_AP_ONLINE_DYN* and *CPUHP_AP_ONLINE_DYN_END* are
  reserved for the dynamic allocation.
 * The states are invoked in the reverse order on CPU shutdown starting with
  *CPUHP_ONLINE* and stopping at *CPUHP_OFFLINE*. Here the callbacks are
  invoked on the CPU that will be shutdown until *CPUHP_AP_OFFLINE*.
 A dynamically allocated state via *CPUHP_AP_ONLINE_DYN* is often enough.
 However if an earlier invocation during the bring up or shutdown is required
 then an explicit state should be acquired. An explicit state might also be
 required if the hotplug event requires specific ordering in respect to
 another hotplug event.
 Testing of hotplug states
 =========================
 One way to verify whether a custom state is working as expected or not is to
 shutdown a CPU and then put it online again. It is also possible to put the CPU
 to certain state (for instance *CPUHP_AP_ONLINE*) and then go back to
 *CPUHP_ONLINE*. This would simulate an error one state after *CPUHP_AP_ONLINE*
 which would lead to rollback to the online state.
 All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states``: ::
 $ tail /sys/devices/system/cpu/hotplug/states
 138: mm/vmscan:online
 139: mm/vmstat:online
 140: lib/percpu_cnt:online
 141: acpi/cpu-drv:online
 142: base/cacheinfo:online
 143: virtio/net:online
 144: x86/mce:online
 145: printk:online
 168: sched:active
 169: online
 To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue: ::
  $ cat /sys/devices/system/cpu/cpu4/hotplug/state
  169
  $ echo 140 > /sys/devices/system/cpu/cpu4/hotplug/target
  $ cat /sys/devices/system/cpu/cpu4/hotplug/state
  140
 It is important to note that the teardown callbac of state 140 have been
 invoked. And now get back online: ::
  $ echo 169 > /sys/devices/system/cpu/cpu4/hotplug/target
  $ cat /sys/devices/system/cpu/cpu4/hotplug/state
  169
 With trace events enabled, the individual steps are visible, too: ::
  #  TASK-PID   CPU#    TIMESTAMP  FUNCTION
  #     | |       |        |         |
      bash-394  [001]  22.976: cpuhp_enter: cpu: 0004 target: 140 step: 169 (cpuhp_kick_ap_work)
   cpuhp/4-31   [004]  22.977: cpuhp_enter: cpu: 0004 target: 140 step: 168 (sched_cpu_deactivate)
   cpuhp/4-31   [004]  22.990: cpuhp_exit:  cpu: 0004  state: 168 step: 168 ret: 0
   cpuhp/4-31   [004]  22.991: cpuhp_enter: cpu: 0004 target: 140 step: 144 (mce_cpu_pre_down)
   cpuhp/4-31   [004]  22.992: cpuhp_exit:  cpu: 0004  state: 144 step: 144 ret: 0
   cpuhp/4-31   [004]  22.993: cpuhp_multi_enter: cpu: 0004 target: 140 step: 143 (virtnet_cpu_down_prep)
   cpuhp/4-31   [004]  22.994: cpuhp_exit:  cpu: 0004  state: 143 step: 143 ret: 0
   cpuhp/4-31   [004]  22.995: cpuhp_enter: cpu: 0004 target: 140 step: 142 (cacheinfo_cpu_pre_down)
   cpuhp/4-31   [004]  22.996: cpuhp_exit:  cpu: 0004  state: 142 step: 142 ret: 0
      bash-394  [001]  22.997: cpuhp_exit:  cpu: 0004  state: 140 step: 169 ret: 0
      bash-394  [005]  95.540: cpuhp_enter: cpu: 0004 target: 169 step: 140 (cpuhp_kick_ap_work)
   cpuhp/4-31   [004]  95.541: cpuhp_enter: cpu: 0004 target: 169 step: 141 (acpi_soft_cpu_online)
   cpuhp/4-31   [004]  95.542: cpuhp_exit:  cpu: 0004  state: 141 step: 141 ret: 0
   cpuhp/4-31   [004]  95.543: cpuhp_enter: cpu: 0004 target: 169 step: 142 (cacheinfo_cpu_online)
   cpuhp/4-31   [004]  95.544: cpuhp_exit:  cpu: 0004  state: 142 step: 142 ret: 0
   cpuhp/4-31   [004]  95.545: cpuhp_multi_enter: cpu: 0004 target: 169 step: 143 (virtnet_cpu_online)
   cpuhp/4-31   [004]  95.546: cpuhp_exit:  cpu: 0004  state: 143 step: 143 ret: 0
   cpuhp/4-31   [004]  95.547: cpuhp_enter: cpu: 0004 target: 169 step: 144 (mce_cpu_online)
   cpuhp/4-31   [004]  95.548: cpuhp_exit:  cpu: 0004  state: 144 step: 144 ret: 0
   cpuhp/4-31   [004]  95.549: cpuhp_enter: cpu: 0004 target: 169 step: 145 (console_cpu_notify)
   cpuhp/4-31   [004]  95.550: cpuhp_exit:  cpu: 0004  state: 145 step: 145 ret: 0
   cpuhp/4-31   [004]  95.551: cpuhp_enter: cpu: 0004 target: 169 step: 168 (sched_cpu_activate)
   cpuhp/4-31   [004]  95.552: cpuhp_exit:  cpu: 0004  state: 168 step: 168 ret: 0
      bash-394  [005]  95.553: cpuhp_exit:  cpu: 0004  state: 169 step: 140 ret: 0
 As it an be seen, CPU4 went down until timestamp 22.996 and then back up until
 95.552. All invoked callbacks including their return codes are visible in the
 trace.
 Architecture's requirements
 ===========================
 The following functions and configurations are required:
 ``CONFIG_HOTPLUG_CPU``
  This entry needs to be enabled in Kconfig
 ``__cpu_up()``
  Arch interface to bring up a CPU
 ``__cpu_disable()``
  Arch interface to shutdown a CPU, no more interrupts can be handled by the
  kernel after the routine returns. This includes the shutdown of the timer.
 ``__cpu_die()``
  This actually supposed to ensure death of the CPU. Actually look at some
  example code in other arch that implement CPU hotplug. The processor is taken
  down from the ``idle()`` loop for that specific architecture. ``__cpu_die()``
  typically waits for some per_cpu state to be set, to ensure the processor dead
  routine is called to be sure positively.
 User Space Notification
 =======================
 After CPU successfully onlined or offline udev events are sent. A udev rule like: ::
  SUBSYSTEM=="cpu", DRIVERS=="processor", DEVPATH=="/devices/system/cpu/*", RUN+="the_hotplug_receiver.sh"
 will receive all events. A script like: ::
  #!/bin/sh
  if [ "${ACTION}" = "offline" ]
  then
      echo "CPU ${DEVPATH##*/} offline"
  elif [ "${ACTION}" = "online" ]
  then
      echo "CPU ${DEVPATH##*/} online"
  fi
 can process the event further.
 Kernel Inline Documentations Reference
 ======================================
 .. kernel-doc:: include/linux/cpuhotplug.h
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@ -13,6 +13,7 @@ Core utilities
   assoc_array
   atomic_ops
   cpu_hotplug
   local_ops
   workqueue
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@ -82,7 +82,9 @@ UltraSPARC-III
 -------
 Several "PowerBook" and "iBook2" notebooks are supported.
-
+The following POWER processors are supported in powernv mode:
 POWER8
 POWER9
 1.5 SuperH
 ----------
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@ -1,452 +0,0 @@
 		CPU hotplug Support in Linux(tm) Kernel
 		Maintainers:
 		CPU Hotplug Core:
 			Rusty Russell <rusty@rustcorp.com.au>
 			Srivatsa Vaddagiri <vatsa@in.ibm.com>
 		i386:
 			Zwane Mwaikambo <zwanem@gmail.com>
 		ppc64:
 			Nathan Lynch <nathanl@austin.ibm.com>
 			Joel Schopp <jschopp@austin.ibm.com>
 		ia64/x86_64:
 			Ashok Raj <ashok.raj@intel.com>
 		s390:
 			Heiko Carstens <heiko.carstens@de.ibm.com>
 Authors: Ashok Raj <ashok.raj@intel.com>
 Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
 	     Joel Schopp <jschopp@austin.ibm.com>
 Introduction
 Modern advances in system architectures have introduced advanced error
 reporting and correction capabilities in processors. CPU architectures permit
 partitioning support, where compute resources of a single CPU could be made
 available to virtual machine environments. There are couple OEMS that
 support NUMA hardware which are hot pluggable as well, where physical
 node insertion and removal require support for CPU hotplug.
 Such advances require CPUs available to a kernel to be removed either for
 provisioning reasons, or for RAS purposes to keep an offending CPU off
 system execution path. Hence the need for CPU hotplug support in the
 Linux kernel.
 A more novel use of CPU-hotplug support is its use today in suspend
 resume support for SMP. Dual-core and HT support makes even
 a laptop run SMP kernels which didn't support these methods. SMP support
 for suspend/resume is a work in progress.
 General Stuff about CPU Hotplug
 --------------------------------
 Command Line Switches
 ---------------------
 maxcpus=n    Restrict boot time cpus to n. Say if you have 4 cpus, using
             maxcpus=2 will only boot 2. You can choose to bring the
             other cpus later online, read FAQ's for more info.
 additional_cpus=n (*)	Use this to limit hotpluggable cpus. This option sets
  			cpu_possible_mask = cpu_present_mask + additional_cpus
 cede_offline={"off","on"}  Use this option to disable/enable putting offlined
 		            processors to an extended H_CEDE state on
 			    supported pseries platforms.
 			    If nothing is specified,
 			    cede_offline is set to "on".
 (*) Option valid only for following architectures
 - ia64
 ia64 uses the number of disabled local apics in ACPI tables MADT to
 determine the number of potentially hot-pluggable cpus. The implementation
 should only rely on this to count the # of cpus, but *MUST* not rely
 on the apicid values in those tables for disabled apics. In the event
 BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
 use this parameter "additional_cpus=x" to represent those cpus in the
 cpu_possible_mask.
 possible_cpus=n		[s390,x86_64] use this to set hotpluggable cpus.
 			This option sets possible_cpus bits in
 			cpu_possible_mask. Thus keeping the numbers of bits set
 			constant even if the machine gets rebooted.
 CPU maps and such
 -----------------
 [More on cpumaps and primitive to manipulate, please check
 include/linux/cpumask.h that has more descriptive text.]
 cpu_possible_mask: Bitmap of possible CPUs that can ever be available in the
 system. This is used to allocate some boot time memory for per_cpu variables
 that aren't designed to grow/shrink as CPUs are made available or removed.
 Once set during boot time discovery phase, the map is static, i.e no bits
 are added or removed anytime.  Trimming it accurately for your system needs
 upfront can save some boot time memory. See below for how we use heuristics
 in x86_64 case to keep this under check.
 cpu_online_mask: Bitmap of all CPUs currently online. It's set in __cpu_up()
 after a CPU is available for kernel scheduling and ready to receive
 interrupts from devices. It's cleared when a CPU is brought down using
 __cpu_disable(), before which all OS services including interrupts are
 migrated to another target CPU.
 cpu_present_mask: Bitmap of CPUs currently present in the system. Not all
 of them may be online. When physical hotplug is processed by the relevant
 subsystem (e.g ACPI) can change and new bit either be added or removed
 from the map depending on the event is hot-add/hot-remove. There are currently
 no locking rules as of now. Typical usage is to init topology during boot,
 at which time hotplug is disabled.
 You really dont need to manipulate any of the system cpu maps. They should
 be read-only for most use. When setting up per-cpu resources almost always use
 cpu_possible_mask/for_each_possible_cpu() to iterate.
 Never use anything other than cpumask_t to represent bitmap of CPUs.
 	#include <linux/cpumask.h>
 	for_each_possible_cpu     - Iterate over cpu_possible_mask
 	for_each_online_cpu       - Iterate over cpu_online_mask
 	for_each_present_cpu      - Iterate over cpu_present_mask
 	for_each_cpu(x,mask)      - Iterate over some random collection of cpu mask.
 	#include <linux/cpu.h>
 	get_online_cpus() and put_online_cpus():
 The above calls are used to inhibit cpu hotplug operations. While the
 cpu_hotplug.refcount is non zero, the cpu_online_mask will not change.
 If you merely need to avoid cpus going away, you could also use
 preempt_disable() and preempt_enable() for those sections.
 Just remember the critical section cannot call any
 function that can sleep or schedule this process away. The preempt_disable()
 will work as long as stop_machine_run() is used to take a cpu down.
 CPU Hotplug - Frequently Asked Questions.
 Q: How to enable my kernel to support CPU hotplug?
 A: When doing make defconfig, Enable CPU hotplug support
   "Processor type and Features" -> Support for Hotpluggable CPUs
 Make sure that you have CONFIG_SMP turned on as well.
 You would need to enable CONFIG_HOTPLUG_CPU for SMP suspend/resume support
 as well.
 Q: What architectures support CPU hotplug?
 A: As of 2.6.14, the following architectures support CPU hotplug.
 i386 (Intel), ppc, ppc64, parisc, s390, ia64 and x86_64
 Q: How to test if hotplug is supported on the newly built kernel?
 A: You should now notice an entry in sysfs.
 Check if sysfs is mounted, using the "mount" command. You should notice
 an entry as shown below in the output.
 	....
 	none on /sys type sysfs (rw)
 	....
 If this is not mounted, do the following.
 	#mkdir /sys
 	#mount -t sysfs sys /sys
 Now you should see entries for all present cpu, the following is an example
 in a 8-way system.
 	#pwd
 	#/sys/devices/system/cpu
 	#ls -l
 	total 0
 	drwxr-xr-x  10 root root 0 Sep 19 07:44 .
 	drwxr-xr-x  13 root root 0 Sep 19 07:45 ..
 	drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu0
 	drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu1
 	drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu2
 	drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu3
 	drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu4
 	drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu5
 	drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu6
 	drwxr-xr-x   3 root root 0 Sep 19 07:48 cpu7
 Under each directory you would find an "online" file which is the control
 file to logically online/offline a processor.
 Q: Does hot-add/hot-remove refer to physical add/remove of cpus?
 A: The usage of hot-add/remove may not be very consistently used in the code.
 CONFIG_HOTPLUG_CPU enables logical online/offline capability in the kernel.
 To support physical addition/removal, one would need some BIOS hooks and
 the platform should have something like an attention button in PCI hotplug.
 CONFIG_ACPI_HOTPLUG_CPU enables ACPI support for physical add/remove of CPUs.
 Q: How do I logically offline a CPU?
 A: Do the following.
 	#echo 0 > /sys/devices/system/cpu/cpuX/online
 Once the logical offline is successful, check
 	#cat /proc/interrupts
 You should now not see the CPU that you removed. Also online file will report
 the state as 0 when a CPU is offline and 1 when it's online.
 	#To display the current cpu state.
 	#cat /sys/devices/system/cpu/cpuX/online
 Q: Why can't I remove CPU0 on some systems?
 A: Some architectures may have some special dependency on a certain CPU.
 For e.g in IA64 platforms we have ability to send platform interrupts to the
 OS. a.k.a Corrected Platform Error Interrupts (CPEI). In current ACPI
 specifications, we didn't have a way to change the target CPU. Hence if the
 current ACPI version doesn't support such re-direction, we disable that CPU
 by making it not-removable.
 In such cases you will also notice that the online file is missing under cpu0.
 Q: Is CPU0 removable on X86?
 A: Yes. If kernel is compiled with CONFIG_BOOTPARAM_HOTPLUG_CPU0=y, CPU0 is
 removable by default. Otherwise, CPU0 is also removable by kernel option
 cpu0_hotplug.
 But some features depend on CPU0. Two known dependencies are:
 1. Resume from hibernate/suspend depends on CPU0. Hibernate/suspend will fail if
 CPU0 is offline and you need to online CPU0 before hibernate/suspend can
 continue.
 2. PIC interrupts also depend on CPU0. CPU0 can't be removed if a PIC interrupt
 is detected.
 It's said poweroff/reboot may depend on CPU0 on some machines although I haven't
 seen any poweroff/reboot failure so far after CPU0 is offline on a few tested
 machines.
 Please let me know if you know or see any other dependencies of CPU0.
 If the dependencies are under your control, you can turn on CPU0 hotplug feature
 either by CONFIG_BOOTPARAM_HOTPLUG_CPU0 or by kernel parameter cpu0_hotplug.
 --Fenghua Yu <fenghua.yu@intel.com>
 Q: How do I find out if a particular CPU is not removable?
 A: Depending on the implementation, some architectures may show this by the
 absence of the "online" file. This is done if it can be determined ahead of
 time that this CPU cannot be removed.
 In some situations, this can be a run time check, i.e if you try to remove the
 last CPU, this will not be permitted. You can find such failures by
 investigating the return value of the "echo" command.
 Q: What happens when a CPU is being logically offlined?
 A: The following happen, listed in no particular order :-)
 - A notification is sent to in-kernel registered modules by sending an event
  CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
  CPU is being offlined while tasks are frozen due to a suspend operation in
  progress
 - All processes are migrated away from this outgoing CPU to new CPUs.
  The new CPU is chosen from each process' current cpuset, which may be
  a subset of all online CPUs.
 - All interrupts targeted to this CPU are migrated to a new CPU
 - timers/bottom half/task lets are also migrated to a new CPU
 - Once all services are migrated, kernel calls an arch specific routine
  __cpu_disable() to perform arch specific cleanup.
 - Once this is successful, an event for successful cleanup is sent by an event
  CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the
  CPU is being offlined).
  "It is expected that each service cleans up when the CPU_DOWN_PREPARE
  notifier is called, when CPU_DEAD is called it's expected there is nothing
  running on behalf of this CPU that was offlined"
 Q: If I have some kernel code that needs to be aware of CPU arrival and
   departure, how to i arrange for proper notification?
 A: This is what you would need in your kernel code to receive notifications.
 	#include <linux/cpu.h>
 	static int foobar_cpu_callback(struct notifier_block *nfb,
 				       unsigned long action, void *hcpu)
 	{
 		unsigned int cpu = (unsigned long)hcpu;
 		switch (action) {
 		case CPU_ONLINE:
 		case CPU_ONLINE_FROZEN:
 			foobar_online_action(cpu);
 			break;
 		case CPU_DEAD:
 		case CPU_DEAD_FROZEN:
 			foobar_dead_action(cpu);
 			break;
 		}
 		return NOTIFY_OK;
 	}
 	static struct notifier_block foobar_cpu_notifier =
 	{
 	   .notifier_call = foobar_cpu_callback,
 	};
 You need to call register_cpu_notifier() from your init function.
 Init functions could be of two types:
 1. early init (init function called when only the boot processor is online).
 2. late init (init function called _after_ all the CPUs are online).
 For the first case, you should add the following to your init function
 	register_cpu_notifier(&foobar_cpu_notifier);
 For the second case, you should add the following to your init function
 	register_hotcpu_notifier(&foobar_cpu_notifier);
 You can fail PREPARE notifiers if something doesn't work to prepare resources.
 This will stop the activity and send a following CANCELED event back.
 CPU_DEAD should not be failed, its just a goodness indication, but bad
 things will happen if a notifier in path sent a BAD notify code.
 Q: I don't see my action being called for all CPUs already up and running?
 A: Yes, CPU notifiers are called only when new CPUs are on-lined or offlined.
   If you need to perform some action for each CPU already in the system, then
   do this:
 	for_each_online_cpu(i) {
 		foobar_cpu_callback(&foobar_cpu_notifier, CPU_UP_PREPARE, i);
 		foobar_cpu_callback(&foobar_cpu_notifier, CPU_ONLINE, i);
 	}
   However, if you want to register a hotplug callback, as well as perform
   some initialization for CPUs that are already online, then do this:
   Version 1: (Correct)
   ---------
   	cpu_notifier_register_begin();
 		for_each_online_cpu(i) {
 			foobar_cpu_callback(&foobar_cpu_notifier,
 					    CPU_UP_PREPARE, i);
 			foobar_cpu_callback(&foobar_cpu_notifier,
 					    CPU_ONLINE, i);
 		}
 	/* Note the use of the double underscored version of the API */
 	__register_cpu_notifier(&foobar_cpu_notifier);
 	cpu_notifier_register_done();
   Note that the following code is *NOT* the right way to achieve this,
   because it is prone to an ABBA deadlock between the cpu_add_remove_lock
   and the cpu_hotplug.lock.
   Version 2: (Wrong!)
   ---------
 	get_online_cpus();
 		for_each_online_cpu(i) {
 			foobar_cpu_callback(&foobar_cpu_notifier,
 					    CPU_UP_PREPARE, i);
 			foobar_cpu_callback(&foobar_cpu_notifier,
 					    CPU_ONLINE, i);
 		}
 	register_cpu_notifier(&foobar_cpu_notifier);
 	put_online_cpus();
    So always use the first version shown above when you want to register
    callbacks as well as initialize the already online CPUs.
 Q: If I would like to develop CPU hotplug support for a new architecture,
   what do I need at a minimum?
 A: The following are what is required for CPU hotplug infrastructure to work
   correctly.
    - Make sure you have an entry in Kconfig to enable CONFIG_HOTPLUG_CPU
    - __cpu_up()        - Arch interface to bring up a CPU
    - __cpu_disable()   - Arch interface to shutdown a CPU, no more interrupts
                          can be handled by the kernel after the routine
                          returns. Including local APIC timers etc are
                          shutdown.
     - __cpu_die()      - This actually supposed to ensure death of the CPU.
                          Actually look at some example code in other arch
                          that implement CPU hotplug. The processor is taken
                          down from the idle() loop for that specific
                          architecture. __cpu_die() typically waits for some
                          per_cpu state to be set, to ensure the processor
                          dead routine is called to be sure positively.
 Q: I need to ensure that a particular CPU is not removed when there is some
   work specific to this CPU in progress.
 A: There are two ways.  If your code can be run in interrupt context, use
   smp_call_function_single(), otherwise use work_on_cpu().  Note that
   work_on_cpu() is slow, and can fail due to out of memory:
 	int my_func_on_cpu(int cpu)
 	{
 		int err;
 		get_online_cpus();
 		if (!cpu_online(cpu))
 			err = -EINVAL;
 		else
 #if NEEDS_BLOCKING
 			err = work_on_cpu(cpu, __my_func_on_cpu, NULL);
 #else
 			smp_call_function_single(cpu, __my_func_on_cpu, &err,
 						 true);
 #endif
 		put_online_cpus();
 		return err;
 	}
 Q: How do we determine how many CPUs are available for hotplug.
 A: There is no clear spec defined way from ACPI that can give us that
   information today. Based on some input from Natalie of Unisys,
   that the ACPI MADT (Multiple APIC Description Tables) marks those possible
   CPUs in a system with disabled status.
   Andi implemented some simple heuristics that count the number of disabled
   CPUs in MADT as hotpluggable CPUS.  In the case there are no disabled CPUS
   we assume 1/2 the number of CPUs currently present can be hotplugged.
   Caveat: ACPI MADT can only provide 256 entries in systems with only ACPI 2.0c
   or earlier ACPI version supported, because the apicid field in MADT is only
   8 bits. From ACPI 3.0, this limitation was removed since the apicid field
   was extended to 32 bits with x2APIC introduced.
 User Space Notification
 Hotplug support for devices is common in Linux today. Its being used today to
 support automatic configuration of network, usb and pci devices. A hotplug
 event can be used to invoke an agent script to perform the configuration task.
 You can add /etc/hotplug/cpu.agent to handle hotplug notification user space
 scripts.
 	#!/bin/bash
 	# $Id: cpu.agent
 	# Kernel hotplug params include:
 	#ACTION=%s [online or offline]
 	#DEVPATH=%s
 	#
 	cd /etc/hotplug
 	. ./hotplug.functions
 	case $ACTION in
 		online)
 			echo `date` ":cpu.agent" add cpu >> /tmp/hotplug.txt
 			;;
 		offline)
 			echo `date` ":cpu.agent" remove cpu >>/tmp/hotplug.txt
 			;;
 		*)
 			debug_mesg CPU $ACTION event not supported
        exit 1
        ;;
 	esac
--- a/Documentation/dev-tools/sparse.rst
+++ b/Documentation/dev-tools/sparse.rst
@ -103,3 +103,9 @@ have already built it.
 The optional make variable CF can be used to pass arguments to sparse.  The
 build system passes -Wbitwise to sparse automatically.
 Checking RCU annotations
 ~~~~~~~~~~~~~~~~~~~~~~~~
 RCU annotations are not checked by default.  To enable RCU annotation
 checks, include -DCONFIG_SPARSE_RCU_POINTER in your CF flags.
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@ -116,9 +116,11 @@ crc32table.h*
 cscope.*
 defkeymap.c
 devlist.h*
 devicetable-offsets.h
 dnotify_test
 docproc
 dslm
 dtc
 elf2ecoff
 elfconfig.h*
 evergreen_reg_safe.h
@ -153,8 +155,8 @@ keywords.c
 ksym.c*
 ksym.h*
 kxgettext
-lex.c
+*lex.c
-lex.*.c
+*lex.*.c
 linux
 logo_*.c
 logo_*_clut224.c
@ -215,6 +217,7 @@ series
 setup
 setup.bin
 setup.elf
 sortextable
 sImage
 sm_tbl*
 split-include
--- a/Documentation/driver-api/device-io.rst
+++ b/Documentation/driver-api/device-io.rst
@ -0,0 +1,201 @@
 .. Copyright 2001 Matthew Wilcox
 ..
 ..     This documentation is free software; you can redistribute
 ..     it and/or modify it under the terms of the GNU General Public
 ..     License as published by the Free Software Foundation; either
 ..     version 2 of the License, or (at your option) any later
 ..     version.
 ===============================
 Bus-Independent Device Accesses
 ===============================
 :Author: Matthew Wilcox
 :Author: Alan Cox
 Introduction
 ============
 Linux provides an API which abstracts performing IO across all busses
 and devices, allowing device drivers to be written independently of bus
 type.
 Memory Mapped IO
 ================
 Getting Access to the Device
 ----------------------------
 The most widely supported form of IO is memory mapped IO. That is, a
 part of the CPU's address space is interpreted not as accesses to
 memory, but as accesses to a device. Some architectures define devices
 to be at a fixed address, but most have some method of discovering
 devices. The PCI bus walk is a good example of such a scheme. This
 document does not cover how to receive such an address, but assumes you
 are starting with one. Physical addresses are of type unsigned long.
 This address should not be used directly. Instead, to get an address
 suitable for passing to the accessor functions described below, you
 should call :c:func:`ioremap()`. An address suitable for accessing
 the device will be returned to you.
 After you've finished using the device (say, in your module's exit
 routine), call :c:func:`iounmap()` in order to return the address
 space to the kernel. Most architectures allocate new address space each
 time you call :c:func:`ioremap()`, and they can run out unless you
 call :c:func:`iounmap()`.
 Accessing the device
 --------------------
 The part of the interface most used by drivers is reading and writing
 memory-mapped registers on the device. Linux provides interfaces to read
 and write 8-bit, 16-bit, 32-bit and 64-bit quantities. Due to a
 historical accident, these are named byte, word, long and quad accesses.
 Both read and write accesses are supported; there is no prefetch support
 at this time.
 The functions are named readb(), readw(), readl(), readq(),
 readb_relaxed(), readw_relaxed(), readl_relaxed(), readq_relaxed(),
 writeb(), writew(), writel() and writeq().
 Some devices (such as framebuffers) would like to use larger transfers than
 8 bytes at a time. For these devices, the :c:func:`memcpy_toio()`,
 :c:func:`memcpy_fromio()` and :c:func:`memset_io()` functions are
 provided. Do not use memset or memcpy on IO addresses; they are not
 guaranteed to copy data in order.
 The read and write functions are defined to be ordered. That is the
 compiler is not permitted to reorder the I/O sequence. When the ordering
 can be compiler optimised, you can use __readb() and friends to
 indicate the relaxed ordering. Use this with care.
 While the basic functions are defined to be synchronous with respect to
 each other and ordered with respect to each other the busses the devices
 sit on may themselves have asynchronicity. In particular many authors
 are burned by the fact that PCI bus writes are posted asynchronously. A
 driver author must issue a read from the same device to ensure that
 writes have occurred in the specific cases the author cares. This kind
 of property cannot be hidden from driver writers in the API. In some
 cases, the read used to flush the device may be expected to fail (if the
 card is resetting, for example). In that case, the read should be done
 from config space, which is guaranteed to soft-fail if the card doesn't
 respond.
 The following is an example of flushing a write to a device when the
 driver would like to ensure the write's effects are visible prior to
 continuing execution::
    static inline void
    qla1280_disable_intrs(struct scsi_qla_host *ha)
    {
        struct device_reg *reg;
        reg = ha->iobase;
        /* disable risc and host interrupts */
        WRT_REG_WORD(&reg->ictrl, 0);
        /*
         * The following read will ensure that the above write
         * has been received by the device before we return from this
         * function.
         */
        RD_REG_WORD(&reg->ictrl);
        ha->flags.ints_enabled = 0;
    }
 In addition to write posting, on some large multiprocessing systems
 (e.g. SGI Challenge, Origin and Altix machines) posted writes won't be
 strongly ordered coming from different CPUs. Thus it's important to
 properly protect parts of your driver that do memory-mapped writes with
 locks and use the :c:func:`mmiowb()` to make sure they arrive in the
 order intended. Issuing a regular readX() will also ensure write ordering,
 but should only be used when the 
 driver has to be sure that the write has actually arrived at the device
 (not that it's simply ordered with respect to other writes), since a
 full readX() is a relatively expensive operation.
 Generally, one should use :c:func:`mmiowb()` prior to releasing a spinlock
 that protects regions using :c:func:`writeb()` or similar functions that
 aren't surrounded by readb() calls, which will ensure ordering
 and flushing. The following pseudocode illustrates what might occur if
 write ordering isn't guaranteed via :c:func:`mmiowb()` or one of the
 readX() functions::
    CPU A:  spin_lock_irqsave(&dev_lock, flags)
    CPU A:  ...
    CPU A:  writel(newval, ring_ptr);
    CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
            ...
    CPU B:  spin_lock_irqsave(&dev_lock, flags)
    CPU B:  writel(newval2, ring_ptr);
    CPU B:  ...
    CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
 In the case above, newval2 could be written to ring_ptr before newval.
 Fixing it is easy though::
    CPU A:  spin_lock_irqsave(&dev_lock, flags)
    CPU A:  ...
    CPU A:  writel(newval, ring_ptr);
    CPU A:  mmiowb(); /* ensure no other writes beat us to the device */
    CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
            ...
    CPU B:  spin_lock_irqsave(&dev_lock, flags)
    CPU B:  writel(newval2, ring_ptr);
    CPU B:  ...
    CPU B:  mmiowb();
    CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
 See tg3.c for a real world example of how to use :c:func:`mmiowb()`
 PCI ordering rules also guarantee that PIO read responses arrive after any
 outstanding DMA writes from that bus, since for some devices the result of
 a readb() call may signal to the driver that a DMA transaction is
 complete. In many cases, however, the driver may want to indicate that the
 next readb() call has no relation to any previous DMA writes
 performed by the device. The driver can use readb_relaxed() for
 these cases, although only some platforms will honor the relaxed
 semantics. Using the relaxed read functions will provide significant
 performance benefits on platforms that support it. The qla2xxx driver
 provides examples of how to use readX_relaxed(). In many cases, a majority
 of the driver's readX() calls can safely be converted to readX_relaxed()
 calls, since only a few will indicate or depend on DMA completion.
 Port Space Accesses
 ===================
 Port Space Explained
 --------------------
 Another form of IO commonly supported is Port Space. This is a range of
 addresses separate to the normal memory address space. Access to these
 addresses is generally not as fast as accesses to the memory mapped
 addresses, and it also has a potentially smaller address space.
 Unlike memory mapped IO, no preparation is required to access port
 space.
 Accessing Port Space
 --------------------
 Accesses to this space are provided through a set of functions which
 allow 8-bit, 16-bit and 32-bit accesses; also known as byte, word and
 long. These functions are :c:func:`inb()`, :c:func:`inw()`,
 :c:func:`inl()`, :c:func:`outb()`, :c:func:`outw()` and
 :c:func:`outl()`.
 Some variants are provided for these functions. Some devices require
 that accesses to their ports are slowed down. This functionality is
 provided by appending a ``_p`` to the end of the function.
 There are also equivalents to memcpy. The :c:func:`ins()` and
 :c:func:`outs()` functions copy bytes, words or longs to the given
 port.
 Public Functions Provided
 =========================
 .. kernel-doc:: arch/x86/include/asm/io.h
   :internal:
 .. kernel-doc:: lib/pci_iomap.c
   :export:
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@ -1,3 +1,6 @@
 .. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
 .. |struct generic_pm_domain| replace:: :c:type:`struct generic_pm_domain <generic_pm_domain>`
 ============
 Device links
 ============
@ -120,12 +123,11 @@ Examples
  is the same as if the MMU was the parent of the master device.
  The fact that both devices share the same power domain would normally
-  suggest usage of a :c:type:`struct dev_pm_domain` or :c:type:`struct
+  suggest usage of a |struct dev_pm_domain| or |struct generic_pm_domain|,
-  generic_pm_domain`, however these are not independent devices that
+  however these are not independent devices that happen to share a power
-  happen to share a power switch, but rather the MMU device serves the
+  switch, but rather the MMU device serves the busmaster device and is
-  busmaster device and is useless without it.  A device link creates a
+  useless without it.  A device link creates a synthetic hierarchical
-  synthetic hierarchical relationship between the devices and is thus
+  relationship between the devices and is thus more apt.
  more apt.
 * A Thunderbolt host controller comprises a number of PCIe hotplug ports
  and an NHI device to manage the PCIe switch.  On resume from system sleep,
@ -157,7 +159,7 @@ Examples
 Alternatives
 ============
-* A :c:type:`struct dev_pm_domain` can be used to override the bus,
+* A |struct dev_pm_domain| can be used to override the bus,
  class or device type callbacks.  It is intended for devices sharing
  a single on/off switch, however it does not guarantee a specific
  suspend/resume ordering, this needs to be implemented separately.
@ -166,7 +168,7 @@ Alternatives
  suspended.  Furthermore it cannot be used to enforce a specific shutdown
  ordering or a driver presence dependency.
-* A :c:type:`struct generic_pm_domain` is a lot more heavyweight than a
+* A |struct generic_pm_domain| is a lot more heavyweight than a
  device link and does not allow for shutdown ordering or driver presence
  dependencies.  It also cannot be used on ACPI systems.
--- a/Documentation/driver-api/iio/buffers.rst
+++ b/Documentation/driver-api/iio/buffers.rst
@ -0,0 +1,125 @@
 =======
 Buffers
 =======
 * struct :c:type:`iio_buffer` — general buffer structure
 * :c:func:`iio_validate_scan_mask_onehot` — Validates that exactly one channel
  is selected
 * :c:func:`iio_buffer_get` — Grab a reference to the buffer
 * :c:func:`iio_buffer_put` — Release the reference to the buffer
 The Industrial I/O core offers a way for continuous data capture based on a
 trigger source. Multiple data channels can be read at once from
 :file:`/dev/iio:device{X}` character device node, thus reducing the CPU load.
 IIO buffer sysfs interface
 ==========================
 An IIO buffer has an associated attributes directory under
 :file:`/sys/bus/iio/iio:device{X}/buffer/*`. Here are some of the existing
 attributes:
 * :file:`length`, the total number of data samples (capacity) that can be
  stored by the buffer.
 * :file:`enable`, activate buffer capture.
 IIO buffer setup
 ================
 The meta information associated with a channel reading placed in a buffer is
 called a scan element . The important bits configuring scan elements are
 exposed to userspace applications via the
 :file:`/sys/bus/iio/iio:device{X}/scan_elements/*` directory. This file contains
 attributes of the following form:
 * :file:`enable`, used for enabling a channel. If and only if its attribute
  is non *zero*, then a triggered capture will contain data samples for this
  channel.
 * :file:`type`, description of the scan element data storage within the buffer
  and hence the form in which it is read from user space.
  Format is [be|le]:[s|u]bits/storagebitsXrepeat[>>shift] .
  * *be* or *le*, specifies big or little endian.
  * *s* or *u*, specifies if signed (2's complement) or unsigned.
  * *bits*, is the number of valid data bits.
  * *storagebits*, is the number of bits (after padding) that it occupies in the
  buffer.
  * *shift*, if specified, is the shift that needs to be applied prior to
  masking out unused bits.
  * *repeat*, specifies the number of bits/storagebits repetitions. When the
  repeat element is 0 or 1, then the repeat value is omitted.
 For example, a driver for a 3-axis accelerometer with 12 bit resolution where
 data is stored in two 8-bits registers as follows::
        7   6   5   4   3   2   1   0
      +---+---+---+---+---+---+---+---+
      |D3 |D2 |D1 |D0 | X | X | X | X | (LOW byte, address 0x06)
      +---+---+---+---+---+---+---+---+
        7   6   5   4   3   2   1   0
      +---+---+---+---+---+---+---+---+
      |D11|D10|D9 |D8 |D7 |D6 |D5 |D4 | (HIGH byte, address 0x07)
      +---+---+---+---+---+---+---+---+
 will have the following scan element type for each axis::
      $ cat /sys/bus/iio/devices/iio:device0/scan_elements/in_accel_y_type
      le:s12/16>>4
 A user space application will interpret data samples read from the buffer as
 two byte little endian signed data, that needs a 4 bits right shift before
 masking out the 12 valid bits of data.
 For implementing buffer support a driver should initialize the following
 fields in iio_chan_spec definition::
   struct iio_chan_spec {
   /* other members */
           int scan_index
           struct {
                   char sign;
                   u8 realbits;
                   u8 storagebits;
                   u8 shift;
                   u8 repeat;
                   enum iio_endian endianness;
                  } scan_type;
          };
 The driver implementing the accelerometer described above will have the
 following channel definition::
   struct struct iio_chan_spec accel_channels[] = {
           {
                   .type = IIO_ACCEL,
 		   .modified = 1,
 		   .channel2 = IIO_MOD_X,
 		   /* other stuff here */
 		   .scan_index = 0,
 		   .scan_type = {
 		           .sign = 's',
 			   .realbits = 12,
 			   .storagebits = 16,
 			   .shift = 4,
 			   .endianness = IIO_LE,
 		   },
           }
           /* similar for Y (with channel2 = IIO_MOD_Y, scan_index = 1)
            * and Z (with channel2 = IIO_MOD_Z, scan_index = 2) axis
            */
    }
 Here **scan_index** defines the order in which the enabled channels are placed
 inside the buffer. Channels with a lower **scan_index** will be placed before
 channels with a higher index. Each channel needs to have a unique
 **scan_index**.
 Setting **scan_index** to -1 can be used to indicate that the specific channel
 does not support buffered capture. In this case no entries will be created for
 the channel in the scan_elements directory.
 More details
 ============
 .. kernel-doc:: include/linux/iio/buffer.h
 .. kernel-doc:: drivers/iio/industrialio-buffer.c
   :export:
--- a/Documentation/driver-api/iio/core.rst
+++ b/Documentation/driver-api/iio/core.rst
@ -0,0 +1,182 @@
 =============
 Core elements
 =============
 The Industrial I/O core offers a unified framework for writing drivers for
 many different types of embedded sensors. a standard interface to user space
 applications manipulating sensors. The implementation can be found under
 :file:`drivers/iio/industrialio-*`
 Industrial I/O Devices
 ----------------------
 * struct :c:type:`iio_dev` - industrial I/O device
 * :c:func:`iio_device_alloc()` - alocate an :c:type:`iio_dev` from a driver
 * :c:func:`iio_device_free()` - free an :c:type:`iio_dev` from a driver
 * :c:func:`iio_device_register()` - register a device with the IIO subsystem
 * :c:func:`iio_device_unregister()` - unregister a device from the IIO
  subsystem
 An IIO device usually corresponds to a single hardware sensor and it
 provides all the information needed by a driver handling a device.
 Let's first have a look at the functionality embedded in an IIO device
 then we will show how a device driver makes use of an IIO device.
 There are two ways for a user space application to interact with an IIO driver.
 1. :file:`/sys/bus/iio/iio:device{X}/`, this represents a hardware sensor
   and groups together the data channels of the same chip.
 2. :file:`/dev/iio:device{X}`, character device node interface used for
   buffered data transfer and for events information retrieval.
 A typical IIO driver will register itself as an :doc:`I2C <../i2c>` or
 :doc:`SPI <../spi>` driver and will create two routines, probe and remove.
 At probe:
 1. Call :c:func:`iio_device_alloc()`, which allocates memory for an IIO device.
 2. Initialize IIO device fields with driver specific information (e.g.
   device name, device channels).
 3. Call :c:func:`iio_device_register()`, this registers the device with the
   IIO core. After this call the device is ready to accept requests from user
   space applications.
 At remove, we free the resources allocated in probe in reverse order:
 1. :c:func:`iio_device_unregister()`, unregister the device from the IIO core.
 2. :c:func:`iio_device_free()`, free the memory allocated for the IIO device.
 IIO device sysfs interface
 ==========================
 Attributes are sysfs files used to expose chip info and also allowing
 applications to set various configuration parameters. For device with
 index X, attributes can be found under /sys/bus/iio/iio:deviceX/ directory.
 Common attributes are:
 * :file:`name`, description of the physical chip.
 * :file:`dev`, shows the major:minor pair associated with
  :file:`/dev/iio:deviceX` node.
 * :file:`sampling_frequency_available`, available discrete set of sampling
  frequency values for device.
 * Available standard attributes for IIO devices are described in the
  :file:`Documentation/ABI/testing/sysfs-bus-iio` file in the Linux kernel
  sources.
 IIO device channels
 ===================
 struct :c:type:`iio_chan_spec` - specification of a single channel
 An IIO device channel is a representation of a data channel. An IIO device can
 have one or multiple channels. For example:
 * a thermometer sensor has one channel representing the temperature measurement.
 * a light sensor with two channels indicating the measurements in the visible
  and infrared spectrum.
 * an accelerometer can have up to 3 channels representing acceleration on X, Y
  and Z axes.
 An IIO channel is described by the struct :c:type:`iio_chan_spec`.
 A thermometer driver for the temperature sensor in the example above would
 have to describe its channel as follows::
   static const struct iio_chan_spec temp_channel[] = {
        {
            .type = IIO_TEMP,
            .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
        },
   };
 Channel sysfs attributes exposed to userspace are specified in the form of
 bitmasks. Depending on their shared info, attributes can be set in one of the
 following masks:
 * **info_mask_separate**, attributes will be specific to
  this channel
 * **info_mask_shared_by_type**, attributes are shared by all channels of the
  same type
 * **info_mask_shared_by_dir**, attributes are shared by all channels of the same
  direction
 * **info_mask_shared_by_all**, attributes are shared by all channels
 When there are multiple data channels per channel type we have two ways to
 distinguish between them:
 * set **.modified** field of :c:type:`iio_chan_spec` to 1. Modifiers are
  specified using **.channel2** field of the same :c:type:`iio_chan_spec`
  structure and are used to indicate a physically unique characteristic of the
  channel such as its direction or spectral response. For example, a light
  sensor can have two channels, one for infrared light and one for both
  infrared and visible light.
 * set **.indexed** field of :c:type:`iio_chan_spec` to 1. In this case the
  channel is simply another instance with an index specified by the **.channel**
  field.
 Here is how we can make use of the channel's modifiers::
   static const struct iio_chan_spec light_channels[] = {
           {
                   .type = IIO_INTENSITY,
                   .modified = 1,
                   .channel2 = IIO_MOD_LIGHT_IR,
                   .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
                   .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
           },
           {
                   .type = IIO_INTENSITY,
                   .modified = 1,
                   .channel2 = IIO_MOD_LIGHT_BOTH,
                   .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
                   .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
           },
           {
                   .type = IIO_LIGHT,
                   .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
                   .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
           },
      }
 This channel's definition will generate two separate sysfs files for raw data
 retrieval:
 * :file:`/sys/bus/iio/iio:device{X}/in_intensity_ir_raw`
 * :file:`/sys/bus/iio/iio:device{X}/in_intensity_both_raw`
 one file for processed data:
 * :file:`/sys/bus/iio/iio:device{X}/in_illuminance_input`
 and one shared sysfs file for sampling frequency:
 * :file:`/sys/bus/iio/iio:device{X}/sampling_frequency`.
 Here is how we can make use of the channel's indexing::
   static const struct iio_chan_spec light_channels[] = {
           {
                   .type = IIO_VOLTAGE,
 		   .indexed = 1,
 		   .channel = 0,
 		   .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
 	   },
           {
 	           .type = IIO_VOLTAGE,
                   .indexed = 1,
                   .channel = 1,
                   .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
           },
   }
 This will generate two separate attributes files for raw data retrieval:
 * :file:`/sys/bus/iio/devices/iio:device{X}/in_voltage0_raw`, representing
  voltage measurement for channel 0.
 * :file:`/sys/bus/iio/devices/iio:device{X}/in_voltage1_raw`, representing
  voltage measurement for channel 1.
 More details
 ============
 .. kernel-doc:: include/linux/iio/iio.h
 .. kernel-doc:: drivers/iio/industrialio-core.c
   :export:
--- a/Documentation/driver-api/iio/index.rst
+++ b/Documentation/driver-api/iio/index.rst
@ -0,0 +1,17 @@
 .. include:: <isonum.txt>
 Industrial I/O
 ==============
 **Copyright** |copy| 2015 Intel Corporation
 Contents:
 .. toctree::
   :maxdepth: 2
   intro
   core
   buffers
   triggers
   triggered-buffers
--- a/Documentation/driver-api/iio/intro.rst
+++ b/Documentation/driver-api/iio/intro.rst
@ -0,0 +1,33 @@
 .. include:: <isonum.txt>
 ============
 Introduction
 ============
 The main purpose of the Industrial I/O subsystem (IIO) is to provide support
 for devices that in some sense perform either
 analog-to-digital conversion (ADC) or digital-to-analog conversion (DAC)
 or both. The aim is to fill the gap between the somewhat similar hwmon and
 :doc:`input <../input>` subsystems. Hwmon is directed at low sample rate
 sensors used to monitor and control the system itself, like fan speed control
 or temperature measurement. :doc:`Input <../input>` is, as its name suggests,
 focused on human interaction input devices (keyboard, mouse, touchscreen).
 In some cases there is considerable overlap between these and IIO.
 Devices that fall into this category include:
 * analog to digital converters (ADCs)
 * accelerometers
 * capacitance to digital converters (CDCs)
 * digital to analog converters (DACs)
 * gyroscopes
 * inertial measurement units (IMUs)
 * color and light sensors
 * magnetometers
 * pressure sensors
 * proximity sensors
 * temperature sensors
 Usually these sensors are connected via :doc:`SPI <../spi>` or
 :doc:`I2C <../i2c>`. A common use case of the sensors devices is to have
 combined functionality (e.g. light plus proximity sensor).
--- a/Documentation/driver-api/iio/triggered-buffers.rst
+++ b/Documentation/driver-api/iio/triggered-buffers.rst
@ -0,0 +1,69 @@
 =================
 Triggered Buffers
 =================
 Now that we know what buffers and triggers are let's see how they work together.
 IIO triggered buffer setup
 ==========================
 * :c:func:`iio_triggered_buffer_setup` — Setup triggered buffer and pollfunc
 * :c:func:`iio_triggered_buffer_cleanup` — Free resources allocated by
  :c:func:`iio_triggered_buffer_setup`
 * struct :c:type:`iio_buffer_setup_ops` — buffer setup related callbacks
 A typical triggered buffer setup looks like this::
    const struct iio_buffer_setup_ops sensor_buffer_setup_ops = {
      .preenable    = sensor_buffer_preenable,
      .postenable   = sensor_buffer_postenable,
      .postdisable  = sensor_buffer_postdisable,
      .predisable   = sensor_buffer_predisable,
    };
    irqreturn_t sensor_iio_pollfunc(int irq, void *p)
    {
        pf->timestamp = iio_get_time_ns((struct indio_dev *)p);
        return IRQ_WAKE_THREAD;
    }
    irqreturn_t sensor_trigger_handler(int irq, void *p)
    {
        u16 buf[8];
        int i = 0;
        /* read data for each active channel */
        for_each_set_bit(bit, active_scan_mask, masklength)
            buf[i++] = sensor_get_data(bit)
        iio_push_to_buffers_with_timestamp(indio_dev, buf, timestamp);
        iio_trigger_notify_done(trigger);
        return IRQ_HANDLED;
    }
    /* setup triggered buffer, usually in probe function */
    iio_triggered_buffer_setup(indio_dev, sensor_iio_polfunc,
                               sensor_trigger_handler,
                               sensor_buffer_setup_ops);
 The important things to notice here are:
 * :c:type:`iio_buffer_setup_ops`, the buffer setup functions to be called at
  predefined points in the buffer configuration sequence (e.g. before enable,
  after disable). If not specified, the IIO core uses the default
  iio_triggered_buffer_setup_ops.
 * **sensor_iio_pollfunc**, the function that will be used as top half of poll
  function. It should do as little processing as possible, because it runs in
  interrupt context. The most common operation is recording of the current
  timestamp and for this reason one can use the IIO core defined
  :c:func:`iio_pollfunc_store_time` function.
 * **sensor_trigger_handler**, the function that will be used as bottom half of
  the poll function. This runs in the context of a kernel thread and all the
  processing takes place here. It usually reads data from the device and
  stores it in the internal buffer together with the timestamp recorded in the
  top half.
 More details
 ============
 .. kernel-doc:: drivers/iio/buffer/industrialio-triggered-buffer.c
--- a/Documentation/driver-api/iio/triggers.rst
+++ b/Documentation/driver-api/iio/triggers.rst
@ -0,0 +1,80 @@
 ========
 Triggers
 ========
 * struct :c:type:`iio_trigger` — industrial I/O trigger device
 * :c:func:`devm_iio_trigger_alloc` — Resource-managed iio_trigger_alloc
 * :c:func:`devm_iio_trigger_free` — Resource-managed iio_trigger_free
 * :c:func:`devm_iio_trigger_register` — Resource-managed iio_trigger_register
 * :c:func:`devm_iio_trigger_unregister` — Resource-managed
  iio_trigger_unregister
 * :c:func:`iio_trigger_validate_own_device` — Check if a trigger and IIO
  device belong to the same device
 In many situations it is useful for a driver to be able to capture data based
 on some external event (trigger) as opposed to periodically polling for data.
 An IIO trigger can be provided by a device driver that also has an IIO device
 based on hardware generated events (e.g. data ready or threshold exceeded) or
 provided by a separate driver from an independent interrupt source (e.g. GPIO
 line connected to some external system, timer interrupt or user space writing
 a specific file in sysfs). A trigger may initiate data capture for a number of
 sensors and also it may be completely unrelated to the sensor itself.
 IIO trigger sysfs interface
 ===========================
 There are two locations in sysfs related to triggers:
 * :file:`/sys/bus/iio/devices/trigger{Y}/*`, this file is created once an
  IIO trigger is registered with the IIO core and corresponds to trigger
  with index Y.
  Because triggers can be very different depending on type there are few
  standard attributes that we can describe here:
  * :file:`name`, trigger name that can be later used for association with a
    device.
  * :file:`sampling_frequency`, some timer based triggers use this attribute to
    specify the frequency for trigger calls.
 * :file:`/sys/bus/iio/devices/iio:device{X}/trigger/*`, this directory is
  created once the device supports a triggered buffer. We can associate a
  trigger with our  device by writing the trigger's name in the
  :file:`current_trigger` file.
 IIO trigger setup
 =================
 Let's see a simple example of how to setup a trigger to be used by a driver::
      struct iio_trigger_ops trigger_ops = {
          .set_trigger_state = sample_trigger_state,
          .validate_device = sample_validate_device,
      }
      struct iio_trigger *trig;
      /* first, allocate memory for our trigger */
      trig = iio_trigger_alloc(dev, "trig-%s-%d", name, idx);
      /* setup trigger operations field */
      trig->ops = &trigger_ops;
      /* now register the trigger with the IIO core */
      iio_trigger_register(trig);
 IIO trigger ops
 ===============
 * struct :c:type:`iio_trigger_ops` — operations structure for an iio_trigger.
 Notice that a trigger has a set of operations attached:
 * :file:`set_trigger_state`, switch the trigger on/off on demand.
 * :file:`validate_device`, function to validate the device when the current
  trigger gets changed.
 More details
 ============
 .. kernel-doc:: include/linux/iio/trigger.h
 .. kernel-doc:: drivers/iio/industrialio-trigger.c
   :export:
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@ -16,11 +16,15 @@ available subsections can be seen below.
   basics
   infrastructure
   pm/index
   device-io
   dma-buf
   device_link
   message-based
   sound
   frame-buffer
   regulator
   iio/index
   input
   usb
   spi
--- a/Documentation/driver-api/pm/conf.py
+++ b/Documentation/driver-api/pm/conf.py
@ -0,0 +1,10 @@
 # -*- coding: utf-8; mode: python -*-
 project = "Device Power Management"
 tags.add("subproject")
 latex_documents = [
    ('index', 'pm.tex', project,
     'The kernel development community', 'manual'),
 ]
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@ -0,0 +1,736 @@
 .. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops <dev_pm_ops>`
 .. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
 .. |struct bus_type| replace:: :c:type:`struct bus_type <bus_type>`
 .. |struct device_type| replace:: :c:type:`struct device_type <device_type>`
 .. |struct class| replace:: :c:type:`struct class <class>`
 .. |struct wakeup_source| replace:: :c:type:`struct wakeup_source <wakeup_source>`
 .. |struct device| replace:: :c:type:`struct device <device>`
 ==============================
 Device Power Management Basics
 ==============================
 ::
 Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
 Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
 Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 Most of the code in Linux is device drivers, so most of the Linux power
 management (PM) code is also driver-specific.  Most drivers will do very
 little; others, especially for platforms with small batteries (like cell
 phones), will do a lot.
 This writeup gives an overview of how drivers interact with system-wide
 power management goals, emphasizing the models and interfaces that are
 shared by everything that hooks up to the driver model core.  Read it as
 background for the domain-specific work you'd do with any specific driver.
 Two Models for Device Power Management
 ======================================
 Drivers will use one or both of these models to put devices into low-power
 states:
    System Sleep model:
 	Drivers can enter low-power states as part of entering system-wide
 	low-power states like "suspend" (also known as "suspend-to-RAM"), or
 	(mostly for systems with disks) "hibernation" (also known as
 	"suspend-to-disk").
 	This is something that device, bus, and class drivers collaborate on
 	by implementing various role-specific suspend and resume methods to
 	cleanly power down hardware and software subsystems, then reactivate
 	them without loss of data.
 	Some drivers can manage hardware wakeup events, which make the system
 	leave the low-power state.  This feature may be enabled or disabled
 	using the relevant :file:`/sys/devices/.../power/wakeup` file (for
 	Ethernet drivers the ioctl interface used by ethtool may also be used
 	for this purpose); enabling it may cost some power usage, but let the
 	whole system enter low-power states more often.
    Runtime Power Management model:
 	Devices may also be put into low-power states while the system is
 	running, independently of other power management activity in principle.
 	However, devices are not generally independent of each other (for
 	example, a parent device cannot be suspended unless all of its child
 	devices have been suspended).  Moreover, depending on the bus type the
 	device is on, it may be necessary to carry out some bus-specific
 	operations on the device for this purpose.  Devices put into low power
 	states at run time may require special handling during system-wide power
 	transitions (suspend or hibernation).
 	For these reasons not only the device driver itself, but also the
 	appropriate subsystem (bus type, device type or device class) driver and
 	the PM core are involved in runtime power management.  As in the system
 	sleep power management case, they need to collaborate by implementing
 	various role-specific suspend and resume methods, so that the hardware
 	is cleanly powered down and reactivated without data or service loss.
 There's not a lot to be said about those low-power states except that they are
 very system-specific, and often device-specific.  Also, that if enough devices
 have been put into low-power states (at runtime), the effect may be very similar
 to entering some system-wide low-power state (system sleep) ... and that
 synergies exist, so that several drivers using runtime PM might put the system
 into a state where even deeper power saving options are available.
 Most suspended devices will have quiesced all I/O: no more DMA or IRQs (except
 for wakeup events), no more data read or written, and requests from upstream
 drivers are no longer accepted.  A given bus or platform may have different
 requirements though.
 Examples of hardware wakeup events include an alarm from a real time clock,
 network wake-on-LAN packets, keyboard or mouse activity, and media insertion
 or removal (for PCMCIA, MMC/SD, USB, and so on).
 Interfaces for Entering System Sleep States
 ===========================================
 There are programming interfaces provided for subsystems (bus type, device type,
 device class) and device drivers to allow them to participate in the power
 management of devices they are concerned with.  These interfaces cover both
 system sleep and runtime power management.
 Device Power Management Operations
 ----------------------------------
 Device power management operations, at the subsystem level as well as at the
 device driver level, are implemented by defining and populating objects of type
 |struct dev_pm_ops| defined in :file:`include/linux/pm.h`.  The roles of the
 methods included in it will be explained in what follows.  For now, it should be
 sufficient to remember that the last three methods are specific to runtime power
 management while the remaining ones are used during system-wide power
 transitions.
 There also is a deprecated "old" or "legacy" interface for power management
 operations available at least for some subsystems.  This approach does not use
 |struct dev_pm_ops| objects and it is suitable only for implementing system
 sleep power management methods in a limited way.  Therefore it is not described
 in this document, so please refer directly to the source code for more
 information about it.
 Subsystem-Level Methods
 -----------------------
 The core methods to suspend and resume devices reside in
 |struct dev_pm_ops| pointed to by the :c:member:`ops` member of
 |struct dev_pm_domain|, or by the :c:member:`pm` member of |struct bus_type|,
 |struct device_type| and |struct class|.  They are mostly of interest to the
 people writing infrastructure for platforms and buses, like PCI or USB, or
 device type and device class drivers.  They also are relevant to the writers of
 device drivers whose subsystems (PM domains, device types, device classes and
 bus types) don't provide all power management methods.
 Bus drivers implement these methods as appropriate for the hardware and the
 drivers using it; PCI works differently from USB, and so on.  Not many people
 write subsystem-level drivers; most driver code is a "device driver" that builds
 on top of bus-specific framework code.
 For more information on these driver calls, see the description later;
 they are called in phases for every device, respecting the parent-child
 sequencing in the driver model tree.
 :file:`/sys/devices/.../power/wakeup` files
 -------------------------------------------
 All device objects in the driver model contain fields that control the handling
 of system wakeup events (hardware signals that can force the system out of a
 sleep state).  These fields are initialized by bus or device driver code using
 :c:func:`device_set_wakeup_capable()` and :c:func:`device_set_wakeup_enable()`,
 defined in :file:`include/linux/pm_wakeup.h`.
 The :c:member:`power.can_wakeup` flag just records whether the device (and its
 driver) can physically support wakeup events.  The
 :c:func:`device_set_wakeup_capable()` routine affects this flag.  The
 :c:member:`power.wakeup` field is a pointer to an object of type
 |struct wakeup_source| used for controlling whether or not the device should use
 its system wakeup mechanism and for notifying the PM core of system wakeup
 events signaled by the device.  This object is only present for wakeup-capable
 devices (i.e. devices whose :c:member:`can_wakeup` flags are set) and is created
 (or removed) by :c:func:`device_set_wakeup_capable()`.
 Whether or not a device is capable of issuing wakeup events is a hardware
 matter, and the kernel is responsible for keeping track of it.  By contrast,
 whether or not a wakeup-capable device should issue wakeup events is a policy
 decision, and it is managed by user space through a sysfs attribute: the
 :file:`power/wakeup` file.  User space can write the "enabled" or "disabled"
 strings to it to indicate whether or not, respectively, the device is supposed
 to signal system wakeup.  This file is only present if the
 :c:member:`power.wakeup` object exists for the given device and is created (or
 removed) along with that object, by :c:func:`device_set_wakeup_capable()`.
 Reads from the file will return the corresponding string.
 The initial value in the :file:`power/wakeup` file is "disabled" for the
 majority of devices; the major exceptions are power buttons, keyboards, and
 Ethernet adapters whose WoL (wake-on-LAN) feature has been set up with ethtool.
 It should also default to "enabled" for devices that don't generate wakeup
 requests on their own but merely forward wakeup requests from one bus to another
 (like PCI Express ports).
 The :c:func:`device_may_wakeup()` routine returns true only if the
 :c:member:`power.wakeup` object exists and the corresponding :file:`power/wakeup`
 file contains the "enabled" string.  This information is used by subsystems,
 like the PCI bus type code, to see whether or not to enable the devices' wakeup
 mechanisms.  If device wakeup mechanisms are enabled or disabled directly by
 drivers, they also should use :c:func:`device_may_wakeup()` to decide what to do
 during a system sleep transition.  Device drivers, however, are not expected to
 call :c:func:`device_set_wakeup_enable()` directly in any case.
 It ought to be noted that system wakeup is conceptually different from "remote
 wakeup" used by runtime power management, although it may be supported by the
 same physical mechanism.  Remote wakeup is a feature allowing devices in
 low-power states to trigger specific interrupts to signal conditions in which
 they should be put into the full-power state.  Those interrupts may or may not
 be used to signal system wakeup events, depending on the hardware design.  On
 some systems it is impossible to trigger them from system sleep states.  In any
 case, remote wakeup should always be enabled for runtime power management for
 all devices and drivers that support it.
 :file:`/sys/devices/.../power/control` files
 --------------------------------------------
 Each device in the driver model has a flag to control whether it is subject to
 runtime power management.  This flag, :c:member:`runtime_auto`, is initialized
 by the bus type (or generally subsystem) code using :c:func:`pm_runtime_allow()`
 or :c:func:`pm_runtime_forbid()`; the default is to allow runtime power
 management.
 The setting can be adjusted by user space by writing either "on" or "auto" to
 the device's :file:`power/control` sysfs file.  Writing "auto" calls
 :c:func:`pm_runtime_allow()`, setting the flag and allowing the device to be
 runtime power-managed by its driver.  Writing "on" calls
 :c:func:`pm_runtime_forbid()`, clearing the flag, returning the device to full
 power if it was in a low-power state, and preventing the
 device from being runtime power-managed.  User space can check the current value
 of the :c:member:`runtime_auto` flag by reading that file.
 The device's :c:member:`runtime_auto` flag has no effect on the handling of
 system-wide power transitions.  In particular, the device can (and in the
 majority of cases should and will) be put into a low-power state during a
 system-wide transition to a sleep state even though its :c:member:`runtime_auto`
 flag is clear.
 For more information about the runtime power management framework, refer to
 :file:`Documentation/power/runtime_pm.txt`.
 Calling Drivers to Enter and Leave System Sleep States
 ======================================================
 When the system goes into a sleep state, each device's driver is asked to
 suspend the device by putting it into a state compatible with the target
 system state.  That's usually some version of "off", but the details are
 system-specific.  Also, wakeup-enabled devices will usually stay partly
 functional in order to wake the system.
 When the system leaves that low-power state, the device's driver is asked to
 resume it by returning it to full power.  The suspend and resume operations
 always go together, and both are multi-phase operations.
 For simple drivers, suspend might quiesce the device using class code
 and then turn its hardware as "off" as possible during suspend_noirq.  The
 matching resume calls would then completely reinitialize the hardware
 before reactivating its class I/O queues.
 More power-aware drivers might prepare the devices for triggering system wakeup
 events.
 Call Sequence Guarantees
 ------------------------
 To ensure that bridges and similar links needing to talk to a device are
 available when the device is suspended or resumed, the device hierarchy is
 walked in a bottom-up order to suspend devices.  A top-down order is
 used to resume those devices.
 The ordering of the device hierarchy is defined by the order in which devices
 get registered:  a child can never be registered, probed or resumed before
 its parent; and can't be removed or suspended after that parent.
 The policy is that the device hierarchy should match hardware bus topology.
 [Or at least the control bus, for devices which use multiple busses.]
 In particular, this means that a device registration may fail if the parent of
 the device is suspending (i.e. has been chosen by the PM core as the next
 device to suspend) or has already suspended, as well as after all of the other
 devices have been suspended.  Device drivers must be prepared to cope with such
 situations.
 System Power Management Phases
 ------------------------------
 Suspending or resuming the system is done in several phases.  Different phases
 are used for suspend-to-idle, shallow (standby), and deep ("suspend-to-RAM")
 sleep states and the hibernation state ("suspend-to-disk").  Each phase involves
 executing callbacks for every device before the next phase begins.  Not all
 buses or classes support all these callbacks and not all drivers use all the
 callbacks.  The various phases always run after tasks have been frozen and
 before they are unfrozen.  Furthermore, the ``*_noirq phases`` run at a time
 when IRQ handlers have been disabled (except for those marked with the
 IRQF_NO_SUSPEND flag).
 All phases use PM domain, bus, type, class or driver callbacks (that is, methods
 defined in ``dev->pm_domain->ops``, ``dev->bus->pm``, ``dev->type->pm``,
 ``dev->class->pm`` or ``dev->driver->pm``).  These callbacks are regarded by the
 PM core as mutually exclusive.  Moreover, PM domain callbacks always take
 precedence over all of the other callbacks and, for example, type callbacks take
 precedence over bus, class and driver callbacks.  To be precise, the following
 rules are used to determine which callback to execute in the given phase:
    1.	If ``dev->pm_domain`` is present, the PM core will choose the callback
 	provided by ``dev->pm_domain->ops`` for execution.
    2.	Otherwise, if both ``dev->type`` and ``dev->type->pm`` are present, the
 	callback provided by ``dev->type->pm`` will be chosen for execution.
    3.	Otherwise, if both ``dev->class`` and ``dev->class->pm`` are present,
 	the callback provided by ``dev->class->pm`` will be chosen for
 	execution.
    4.	Otherwise, if both ``dev->bus`` and ``dev->bus->pm`` are present, the
 	callback provided by ``dev->bus->pm`` will be chosen for execution.
 This allows PM domains and device types to override callbacks provided by bus
 types or device classes if necessary.
 The PM domain, type, class and bus callbacks may in turn invoke device- or
 driver-specific methods stored in ``dev->driver->pm``, but they don't have to do
 that.
 If the subsystem callback chosen for execution is not present, the PM core will
 execute the corresponding method from the ``dev->driver->pm`` set instead if
 there is one.
 Entering System Suspend
 -----------------------
 When the system goes into the freeze, standby or memory sleep state,
 the phases are: ``prepare``, ``suspend``, ``suspend_late``, ``suspend_noirq``.
    1.	The ``prepare`` phase is meant to prevent races by preventing new
 	devices from being registered; the PM core would never know that all the
 	children of a device had been suspended if new children could be
 	registered at will.  [By contrast, from the PM core's perspective,
 	devices may be unregistered at any time.]  Unlike the other
 	suspend-related phases, during the ``prepare`` phase the device
 	hierarchy is traversed top-down.
 	After the ``->prepare`` callback method returns, no new children may be
 	registered below the device.  The method may also prepare the device or
 	driver in some way for the upcoming system power transition, but it
 	should not put the device into a low-power state.
 	For devices supporting runtime power management, the return value of the
 	prepare callback can be used to indicate to the PM core that it may
 	safely leave the device in runtime suspend (if runtime-suspended
 	already), provided that all of the device's descendants are also left in
 	runtime suspend.  Namely, if the prepare callback returns a positive
 	number and that happens for all of the descendants of the device too,
 	and all of them (including the device itself) are runtime-suspended, the
 	PM core will skip the ``suspend``, ``suspend_late`` and
 	``suspend_noirq`` phases as well as all of the corresponding phases of
 	the subsequent device resume for all of these devices.	In that case,
 	the ``->complete`` callback will be invoked directly after the
 	``->prepare`` callback and is entirely responsible for putting the
 	device into a consistent state as appropriate.
 	Note that this direct-complete procedure applies even if the device is
 	disabled for runtime PM; only the runtime-PM status matters.  It follows
 	that if a device has system-sleep callbacks but does not support runtime
 	PM, then its prepare callback must never return a positive value.  This
 	is because all such devices are initially set to runtime-suspended with
 	runtime PM disabled.
    2.	The ``->suspend`` methods should quiesce the device to stop it from
 	performing I/O.  They also may save the device registers and put it into
 	the appropriate low-power state, depending on the bus type the device is
 	on, and they may enable wakeup events.
    3.	For a number of devices it is convenient to split suspend into the
 	"quiesce device" and "save device state" phases, in which cases
 	``suspend_late`` is meant to do the latter.  It is always executed after
 	runtime power management has been disabled for the device in question.
    4.	The ``suspend_noirq`` phase occurs after IRQ handlers have been disabled,
 	which means that the driver's interrupt handler will not be called while
 	the callback method is running.  The ``->suspend_noirq`` methods should
 	save the values of the device's registers that weren't saved previously
 	and finally put the device into the appropriate low-power state.
 	The majority of subsystems and device drivers need not implement this
 	callback.  However, bus types allowing devices to share interrupt
 	vectors, like PCI, generally need it; otherwise a driver might encounter
 	an error during the suspend phase by fielding a shared interrupt
 	generated by some other device after its own device had been set to low
 	power.
 At the end of these phases, drivers should have stopped all I/O transactions
 (DMA, IRQs), saved enough state that they can re-initialize or restore previous
 state (as needed by the hardware), and placed the device into a low-power state.
 On many platforms they will gate off one or more clock sources; sometimes they
 will also switch off power supplies or reduce voltages.  [Drivers supporting
 runtime PM may already have performed some or all of these steps.]
 If :c:func:`device_may_wakeup(dev)` returns ``true``, the device should be
 prepared for generating hardware wakeup signals to trigger a system wakeup event
 when the system is in the sleep state.  For example, :c:func:`enable_irq_wake()`
 might identify GPIO signals hooked up to a switch or other external hardware,
 and :c:func:`pci_enable_wake()` does something similar for the PCI PME signal.
 If any of these callbacks returns an error, the system won't enter the desired
 low-power state.  Instead, the PM core will unwind its actions by resuming all
 the devices that were suspended.
 Leaving System Suspend
 ----------------------
 When resuming from freeze, standby or memory sleep, the phases are:
 ``resume_noirq``, ``resume_early``, ``resume``, ``complete``.
    1.	The ``->resume_noirq`` callback methods should perform any actions
 	needed before the driver's interrupt handlers are invoked.  This
 	generally means undoing the actions of the ``suspend_noirq`` phase.  If
 	the bus type permits devices to share interrupt vectors, like PCI, the
 	method should bring the device and its driver into a state in which the
 	driver can recognize if the device is the source of incoming interrupts,
 	if any, and handle them correctly.
 	For example, the PCI bus type's ``->pm.resume_noirq()`` puts the device
 	into the full-power state (D0 in the PCI terminology) and restores the
 	standard configuration registers of the device.  Then it calls the
 	device driver's ``->pm.resume_noirq()`` method to perform device-specific
 	actions.
    2.	The ``->resume_early`` methods should prepare devices for the execution
 	of the resume methods.  This generally involves undoing the actions of
 	the preceding ``suspend_late`` phase.
    3.	The ``->resume`` methods should bring the device back to its operating
 	state, so that it can perform normal I/O.  This generally involves
 	undoing the actions of the ``suspend`` phase.
    4.	The ``complete`` phase should undo the actions of the ``prepare`` phase.
        For this reason, unlike the other resume-related phases, during the
        ``complete`` phase the device hierarchy is traversed bottom-up.
 	Note, however, that new children may be registered below the device as
 	soon as the ``->resume`` callbacks occur; it's not necessary to wait
 	until the ``complete`` phase with that.
 	Moreover, if the preceding ``->prepare`` callback returned a positive
 	number, the device may have been left in runtime suspend throughout the
 	whole system suspend and resume (the ``suspend``, ``suspend_late``,
 	``suspend_noirq`` phases of system suspend and the ``resume_noirq``,
 	``resume_early``, ``resume`` phases of system resume may have been
 	skipped for it).  In that case, the ``->complete`` callback is entirely
 	responsible for putting the device into a consistent state after system
 	suspend if necessary.  [For example, it may need to queue up a runtime
 	resume request for the device for this purpose.]  To check if that is
 	the case, the ``->complete`` callback can consult the device's
 	``power.direct_complete`` flag.  Namely, if that flag is set when the
 	``->complete`` callback is being run, it has been called directly after
 	the preceding ``->prepare`` and special actions may be required
 	to make the device work correctly afterward.
 At the end of these phases, drivers should be as functional as they were before
 suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are
 gated on.
 However, the details here may again be platform-specific.  For example,
 some systems support multiple "run" states, and the mode in effect at
 the end of resume might not be the one which preceded suspension.
 That means availability of certain clocks or power supplies changed,
 which could easily affect how a driver works.
 Drivers need to be able to handle hardware which has been reset since all of the
 suspend methods were called, for example by complete reinitialization.
 This may be the hardest part, and the one most protected by NDA'd documents
 and chip errata.  It's simplest if the hardware state hasn't changed since
 the suspend was carried out, but that can only be guaranteed if the target
 system sleep entered was suspend-to-idle.  For the other system sleep states
 that may not be the case (and usually isn't for ACPI-defined system sleep
 states, like S3).
 Drivers must also be prepared to notice that the device has been removed
 while the system was powered down, whenever that's physically possible.
 PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses
 where common Linux platforms will see such removal.  Details of how drivers
 will notice and handle such removals are currently bus-specific, and often
 involve a separate thread.
 These callbacks may return an error value, but the PM core will ignore such
 errors since there's nothing it can do about them other than printing them in
 the system log.
 Entering Hibernation
 --------------------
 Hibernating the system is more complicated than putting it into sleep states,
 because it involves creating and saving a system image.  Therefore there are
 more phases for hibernation, with a different set of callbacks.  These phases
 always run after tasks have been frozen and enough memory has been freed.
 The general procedure for hibernation is to quiesce all devices ("freeze"),
 create an image of the system memory while everything is stable, reactivate all
 devices ("thaw"), write the image to permanent storage, and finally shut down
 the system ("power off").  The phases used to accomplish this are: ``prepare``,
 ``freeze``, ``freeze_late``, ``freeze_noirq``, ``thaw_noirq``, ``thaw_early``,
 ``thaw``, ``complete``, ``prepare``, ``poweroff``, ``poweroff_late``,
 ``poweroff_noirq``.
    1.	The ``prepare`` phase is discussed in the "Entering System Suspend"
 	section above.
    2.	The ``->freeze`` methods should quiesce the device so that it doesn't
 	generate IRQs or DMA, and they may need to save the values of device
 	registers.  However the device does not have to be put in a low-power
 	state, and to save time it's best not to do so.  Also, the device should
 	not be prepared to generate wakeup events.
    3.	The ``freeze_late`` phase is analogous to the ``suspend_late`` phase
 	described earlier, except that the device should not be put into a
 	low-power state and should not be allowed to generate wakeup events.
    4.	The ``freeze_noirq`` phase is analogous to the ``suspend_noirq`` phase
 	discussed earlier, except again that the device should not be put into
 	a low-power state and should not be allowed to generate wakeup events.
 At this point the system image is created.  All devices should be inactive and
 the contents of memory should remain undisturbed while this happens, so that the
 image forms an atomic snapshot of the system state.
    5.	The ``thaw_noirq`` phase is analogous to the ``resume_noirq`` phase
 	discussed earlier.  The main difference is that its methods can assume
 	the device is in the same state as at the end of the ``freeze_noirq``
 	phase.
    6.	The ``thaw_early`` phase is analogous to the ``resume_early`` phase
 	described above.  Its methods should undo the actions of the preceding
 	``freeze_late``, if necessary.
    7.	The ``thaw`` phase is analogous to the ``resume`` phase discussed
 	earlier.  Its methods should bring the device back to an operating
 	state, so that it can be used for saving the image if necessary.
    8.	The ``complete`` phase is discussed in the "Leaving System Suspend"
 	section above.
 At this point the system image is saved, and the devices then need to be
 prepared for the upcoming system shutdown.  This is much like suspending them
 before putting the system into the suspend-to-idle, shallow or deep sleep state,
 and the phases are similar.
    9.	The ``prepare`` phase is discussed above.
    10.	The ``poweroff`` phase is analogous to the ``suspend`` phase.
    11.	The ``poweroff_late`` phase is analogous to the ``suspend_late`` phase.
    12.	The ``poweroff_noirq`` phase is analogous to the ``suspend_noirq`` phase.
 The ``->poweroff``, ``->poweroff_late`` and ``->poweroff_noirq`` callbacks
 should do essentially the same things as the ``->suspend``, ``->suspend_late``
 and ``->suspend_noirq`` callbacks, respectively.  The only notable difference is
 that they need not store the device register values, because the registers
 should already have been stored during the ``freeze``, ``freeze_late`` or
 ``freeze_noirq`` phases.
 Leaving Hibernation
 -------------------
 Resuming from hibernation is, again, more complicated than resuming from a sleep
 state in which the contents of main memory are preserved, because it requires
 a system image to be loaded into memory and the pre-hibernation memory contents
 to be restored before control can be passed back to the image kernel.
 Although in principle the image might be loaded into memory and the
 pre-hibernation memory contents restored by the boot loader, in practice this
 can't be done because boot loaders aren't smart enough and there is no
 established protocol for passing the necessary information.  So instead, the
 boot loader loads a fresh instance of the kernel, called "the restore kernel",
 into memory and passes control to it in the usual way.  Then the restore kernel
 reads the system image, restores the pre-hibernation memory contents, and passes
 control to the image kernel.  Thus two different kernel instances are involved
 in resuming from hibernation.  In fact, the restore kernel may be completely
 different from the image kernel: a different configuration and even a different
 version.  This has important consequences for device drivers and their
 subsystems.
 To be able to load the system image into memory, the restore kernel needs to
 include at least a subset of device drivers allowing it to access the storage
 medium containing the image, although it doesn't need to include all of the
 drivers present in the image kernel.  After the image has been loaded, the
 devices managed by the boot kernel need to be prepared for passing control back
 to the image kernel.  This is very similar to the initial steps involved in
 creating a system image, and it is accomplished in the same way, using
 ``prepare``, ``freeze``, and ``freeze_noirq`` phases.  However, the devices
 affected by these phases are only those having drivers in the restore kernel;
 other devices will still be in whatever state the boot loader left them.
 Should the restoration of the pre-hibernation memory contents fail, the restore
 kernel would go through the "thawing" procedure described above, using the
 ``thaw_noirq``, ``thaw_early``, ``thaw``, and ``complete`` phases, and then
 continue running normally.  This happens only rarely.  Most often the
 pre-hibernation memory contents are restored successfully and control is passed
 to the image kernel, which then becomes responsible for bringing the system back
 to the working state.
 To achieve this, the image kernel must restore the devices' pre-hibernation
 functionality.  The operation is much like waking up from a sleep state (with
 the memory contents preserved), although it involves different phases:
 ``restore_noirq``, ``restore_early``, ``restore``, ``complete``.
    1.	The ``restore_noirq`` phase is analogous to the ``resume_noirq`` phase.
    2.	The ``restore_early`` phase is analogous to the ``resume_early`` phase.
    3.	The ``restore`` phase is analogous to the ``resume`` phase.
    4.	The ``complete`` phase is discussed above.
 The main difference from ``resume[_early|_noirq]`` is that
 ``restore[_early|_noirq]`` must assume the device has been accessed and
 reconfigured by the boot loader or the restore kernel.  Consequently, the state
 of the device may be different from the state remembered from the ``freeze``,
 ``freeze_late`` and ``freeze_noirq`` phases.  The device may even need to be
 reset and completely re-initialized.  In many cases this difference doesn't
 matter, so the ``->resume[_early|_noirq]`` and ``->restore[_early|_norq]``
 method pointers can be set to the same routines.  Nevertheless, different
 callback pointers are used in case there is a situation where it actually does
 matter.
 Power Management Notifiers
 ==========================
 There are some operations that cannot be carried out by the power management
 callbacks discussed above, because the callbacks occur too late or too early.
 To handle these cases, subsystems and device drivers may register power
 management notifiers that are called before tasks are frozen and after they have
 been thawed.  Generally speaking, the PM notifiers are suitable for performing
 actions that either require user space to be available, or at least won't
 interfere with user space.
 For details refer to :doc:`notifiers`.
 Device Low-Power (suspend) States
 =================================
 Device low-power states aren't standard.  One device might only handle
 "on" and "off", while another might support a dozen different versions of
 "on" (how many engines are active?), plus a state that gets back to "on"
 faster than from a full "off".
 Some buses define rules about what different suspend states mean.  PCI
 gives one example: after the suspend sequence completes, a non-legacy
 PCI device may not perform DMA or issue IRQs, and any wakeup events it
 issues would be issued through the PME# bus signal.  Plus, there are
 several PCI-standard device states, some of which are optional.
 In contrast, integrated system-on-chip processors often use IRQs as the
 wakeup event sources (so drivers would call :c:func:`enable_irq_wake`) and
 might be able to treat DMA completion as a wakeup event (sometimes DMA can stay
 active too, it'd only be the CPU and some peripherals that sleep).
 Some details here may be platform-specific.  Systems may have devices that
 can be fully active in certain sleep states, such as an LCD display that's
 refreshed using DMA while most of the system is sleeping lightly ... and
 its frame buffer might even be updated by a DSP or other non-Linux CPU while
 the Linux control processor stays idle.
 Moreover, the specific actions taken may depend on the target system state.
 One target system state might allow a given device to be very operational;
 another might require a hard shut down with re-initialization on resume.
 And two different target systems might use the same device in different
 ways; the aforementioned LCD might be active in one product's "standby",
 but a different product using the same SOC might work differently.
 Device Power Management Domains
 ===============================
 Sometimes devices share reference clocks or other power resources.  In those
 cases it generally is not possible to put devices into low-power states
 individually.  Instead, a set of devices sharing a power resource can be put
 into a low-power state together at the same time by turning off the shared
 power resource.  Of course, they also need to be put into the full-power state
 together, by turning the shared power resource on.  A set of devices with this
 property is often referred to as a power domain. A power domain may also be
 nested inside another power domain. The nested domain is referred to as the
 sub-domain of the parent domain.
 Support for power domains is provided through the :c:member:`pm_domain` field of
 |struct device|.  This field is a pointer to an object of type
 |struct dev_pm_domain|, defined in :file:`include/linux/pm.h``, providing a set
 of power management callbacks analogous to the subsystem-level and device driver
 callbacks that are executed for the given device during all power transitions,
 instead of the respective subsystem-level callbacks.  Specifically, if a
 device's :c:member:`pm_domain` pointer is not NULL, the ``->suspend()`` callback
 from the object pointed to by it will be executed instead of its subsystem's
 (e.g. bus type's) ``->suspend()`` callback and analogously for all of the
 remaining callbacks.  In other words, power management domain callbacks, if
 defined for the given device, always take precedence over the callbacks provided
 by the device's subsystem (e.g. bus type).
 The support for device power management domains is only relevant to platforms
 needing to use the same device driver power management callbacks in many
 different power domain configurations and wanting to avoid incorporating the
 support for power domains into subsystem-level callbacks, for example by
 modifying the platform bus type.  Other platforms need not implement it or take
 it into account in any way.
 Devices may be defined as IRQ-safe which indicates to the PM core that their
 runtime PM callbacks may be invoked with disabled interrupts (see
 :file:`Documentation/power/runtime_pm.txt` for more information).  If an
 IRQ-safe device belongs to a PM domain, the runtime PM of the domain will be
 disallowed, unless the domain itself is defined as IRQ-safe. However, it
 makes sense to define a PM domain as IRQ-safe only if all the devices in it
 are IRQ-safe. Moreover, if an IRQ-safe domain has a parent domain, the runtime
 PM of the parent is only allowed if the parent itself is IRQ-safe too with the
 additional restriction that all child domains of an IRQ-safe parent must also
 be IRQ-safe.
 Runtime Power Management
 ========================
 Many devices are able to dynamically power down while the system is still
 running. This feature is useful for devices that are not being used, and
 can offer significant power savings on a running system.  These devices
 often support a range of runtime power states, which might use names such
 as "off", "sleep", "idle", "active", and so on.  Those states will in some
 cases (like PCI) be partially constrained by the bus the device uses, and will
 usually include hardware states that are also used in system sleep states.
 A system-wide power transition can be started while some devices are in low
 power states due to runtime power management.  The system sleep PM callbacks
 should recognize such situations and react to them appropriately, but the
 necessary actions are subsystem-specific.
 In some cases the decision may be made at the subsystem level while in other
 cases the device driver may be left to decide.  In some cases it may be
 desirable to leave a suspended device in that state during a system-wide power
 transition, but in other cases the device must be put back into the full-power
 state temporarily, for example so that its system wakeup capability can be
 disabled.  This all depends on the hardware and the design of the subsystem and
 device driver in question.
 During system-wide resume from a sleep state it's easiest to put devices into
 the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`.
 Refer to that document for more information regarding this particular issue as
 well as for information on the device runtime power management framework in
 general.
--- a/Documentation/driver-api/pm/index.rst
+++ b/Documentation/driver-api/pm/index.rst
@ -0,0 +1,16 @@
 =======================
 Device Power Management
 =======================
 .. toctree::
   devices
   notifiers
   types
 .. only::  subproject and html
   Indices
   =======
   * :ref:`genindex`
--- a/Documentation/driver-api/pm/notifiers.rst
+++ b/Documentation/driver-api/pm/notifiers.rst
@ -0,0 +1,70 @@
 =============================
 Suspend/Hibernation Notifiers
 =============================
 ::
 Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 There are some operations that subsystems or drivers may want to carry out
 before hibernation/suspend or after restore/resume, but they require the system
 to be fully functional, so the drivers' and subsystems' ``->suspend()`` and
 ``->resume()`` or even ``->prepare()`` and ``->complete()`` callbacks are not
 suitable for this purpose.
 For example, device drivers may want to upload firmware to their devices after
 resume/restore, but they cannot do it by calling :c:func:`request_firmware()`
 from their ``->resume()`` or ``->complete()`` callback routines (user land
 processes are frozen at these points).  The solution may be to load the firmware
 into memory before processes are frozen and upload it from there in the
 ``->resume()`` routine.  A suspend/hibernation notifier may be used for that.
 Subsystems or drivers having such needs can register suspend notifiers that
 will be called upon the following events by the PM core:
 ``PM_HIBERNATION_PREPARE``
 	The system is going to hibernate, tasks will be frozen immediately. This
 	is different from ``PM_SUSPEND_PREPARE`` below,	because in this case
 	additional work is done between the notifiers and the invocation of PM
 	callbacks for the "freeze" transition.
 ``PM_POST_HIBERNATION``
 	The system memory state has been restored from a hibernation image or an
 	error occurred during hibernation.  Device restore callbacks have been
 	executed and tasks have been thawed.
 ``PM_RESTORE_PREPARE``
 	The system is going to restore a hibernation image.  If all goes well,
 	the restored image kernel will issue a ``PM_POST_HIBERNATION``
 	notification.
 ``PM_POST_RESTORE``
 	An error occurred during restore from hibernation.  Device restore
 	callbacks have been executed and tasks have been thawed.
 ``PM_SUSPEND_PREPARE``
 	The system is preparing for suspend.
 ``PM_POST_SUSPEND``
 	The system has just resumed or an error occurred during suspend.  Device
 	resume callbacks have been executed and tasks have been thawed.
 It is generally assumed that whatever the notifiers do for
 ``PM_HIBERNATION_PREPARE``, should be undone for ``PM_POST_HIBERNATION``.
 Analogously, operations carried out for ``PM_SUSPEND_PREPARE`` should be
 reversed for ``PM_POST_SUSPEND``.
 Moreover, if one of the notifiers fails for the ``PM_HIBERNATION_PREPARE`` or
 ``PM_SUSPEND_PREPARE`` event, the notifiers that have already succeeded for that
 event will be called for ``PM_POST_HIBERNATION`` or ``PM_POST_SUSPEND``,
 respectively.
 The hibernation and suspend notifiers are called with :c:data:`pm_mutex` held.
 They are defined in the usual way, but their last argument is meaningless (it is
 always NULL).
 To register and/or unregister a suspend notifier use
 :c:func:`register_pm_notifier()` and :c:func:`unregister_pm_notifier()`,
 respectively (both defined in :file:`include/linux/suspend.h`).  If you don't
 need to unregister the notifier, you can also use the :c:func:`pm_notifier()`
 macro defined in :file:`include/linux/suspend.h`.
--- a/Documentation/driver-api/pm/types.rst
+++ b/Documentation/driver-api/pm/types.rst
@ -0,0 +1,5 @@
 ==================================
 Device Power Management Data Types
 ==================================
 .. kernel-doc:: include/linux/pm.h
--- a/Documentation/driver-api/regulator.rst
+++ b/Documentation/driver-api/regulator.rst
@ -0,0 +1,170 @@
 .. Copyright 2007-2008 Wolfson Microelectronics
 ..   This documentation is free software; you can redistribute
 ..   it and/or modify it under the terms of the GNU General Public
 ..   License version 2 as published by the Free Software Foundation.
 =================================
 Voltage and current regulator API
 =================================
 :Author: Liam Girdwood
 :Author: Mark Brown
 Introduction
 ============
 This framework is designed to provide a standard kernel interface to
 control voltage and current regulators.
 The intention is to allow systems to dynamically control regulator power
 output in order to save power and prolong battery life. This applies to
 both voltage regulators (where voltage output is controllable) and
 current sinks (where current limit is controllable).
 Note that additional (and currently more complete) documentation is
 available in the Linux kernel source under
 ``Documentation/power/regulator``.
 Glossary
 --------
 The regulator API uses a number of terms which may not be familiar:
 Regulator
    Electronic device that supplies power to other devices. Most regulators
    can enable and disable their output and some can also control their
    output voltage or current.
 Consumer
    Electronic device which consumes power provided by a regulator. These
    may either be static, requiring only a fixed supply, or dynamic,
    requiring active management of the regulator at runtime.
 Power Domain
    The electronic circuit supplied by a given regulator, including the
    regulator and all consumer devices. The configuration of the regulator
    is shared between all the components in the circuit.
 Power Management Integrated Circuit (PMIC)
    An IC which contains numerous regulators and often also other
    subsystems. In an embedded system the primary PMIC is often equivalent
    to a combination of the PSU and southbridge in a desktop system.
 Consumer driver interface
 =========================
 This offers a similar API to the kernel clock framework. Consumer
 drivers use `get <#API-regulator-get>`__ and
 `put <#API-regulator-put>`__ operations to acquire and release
 regulators. Functions are provided to `enable <#API-regulator-enable>`__
 and `disable <#API-regulator-disable>`__ the regulator and to get and
 set the runtime parameters of the regulator.
 When requesting regulators consumers use symbolic names for their
 supplies, such as "Vcc", which are mapped into actual regulator devices
 by the machine interface.
 A stub version of this API is provided when the regulator framework is
 not in use in order to minimise the need to use ifdefs.
 Enabling and disabling
 ----------------------
 The regulator API provides reference counted enabling and disabling of
 regulators. Consumer devices use the :c:func:`regulator_enable()` and
 :c:func:`regulator_disable()` functions to enable and disable
 regulators. Calls to the two functions must be balanced.
 Note that since multiple consumers may be using a regulator and machine
 constraints may not allow the regulator to be disabled there is no
 guarantee that calling :c:func:`regulator_disable()` will actually
 cause the supply provided by the regulator to be disabled. Consumer
 drivers should assume that the regulator may be enabled at all times.
 Configuration
 -------------
 Some consumer devices may need to be able to dynamically configure their
 supplies. For example, MMC drivers may need to select the correct
 operating voltage for their cards. This may be done while the regulator
 is enabled or disabled.
 The :c:func:`regulator_set_voltage()` and
 :c:func:`regulator_set_current_limit()` functions provide the primary
 interface for this. Both take ranges of voltages and currents, supporting
 drivers that do not require a specific value (eg, CPU frequency scaling
 normally permits the CPU to use a wider range of supply voltages at lower
 frequencies but does not require that the supply voltage be lowered). Where
 an exact value is required both minimum and maximum values should be
 identical.
 Callbacks
 ---------
 Callbacks may also be registered for events such as regulation failures.
 Regulator driver interface
 ==========================
 Drivers for regulator chips register the regulators with the regulator
 core, providing operations structures to the core. A notifier interface
 allows error conditions to be reported to the core.
 Registration should be triggered by explicit setup done by the platform,
 supplying a struct :c:type:`regulator_init_data` for the regulator
 containing constraint and supply information.
 Machine interface
 =================
 This interface provides a way to define how regulators are connected to
 consumers on a given system and what the valid operating parameters are
 for the system.
 Supplies
 --------
 Regulator supplies are specified using struct
 :c:type:`regulator_consumer_supply`. This is done at driver registration
 time as part of the machine constraints.
 Constraints
 -----------
 As well as defining the connections the machine interface also provides
 constraints defining the operations that clients are allowed to perform
 and the parameters that may be set. This is required since generally
 regulator devices will offer more flexibility than it is safe to use on
 a given system, for example supporting higher supply voltages than the
 consumers are rated for.
 This is done at driver registration time` by providing a
 struct :c:type:`regulation_constraints`.
 The constraints may also specify an initial configuration for the
 regulator in the constraints, which is particularly useful for use with
 static consumers.
 API reference
 =============
 Due to limitations of the kernel documentation framework and the
 existing layout of the source code the entire regulator API is
 documented here.
 .. kernel-doc:: include/linux/regulator/consumer.h
   :internal:
 .. kernel-doc:: include/linux/regulator/machine.h
   :internal:
 .. kernel-doc:: include/linux/regulator/driver.h
   :internal:
 .. kernel-doc:: drivers/regulator/core.c
   :export:
--- a/Documentation/hwmon/ds1621
+++ b/Documentation/hwmon/ds1621
@ -117,10 +117,10 @@ support, which is achieved via the R0 and R1 config register bits, where:
 R0..R1
 ------
- 0  0 => 9 bits, 0.5 degrees Celcius
+ 0  0 => 9 bits, 0.5 degrees Celsius
- 1  0 => 10 bits, 0.25 degrees Celcius
+ 1  0 => 10 bits, 0.25 degrees Celsius
- 0  1 => 11 bits, 0.125 degrees Celcius
+ 0  1 => 11 bits, 0.125 degrees Celsius
- 1  1 => 12 bits, 0.0625 degrees Celcius
+ 1  1 => 12 bits, 0.0625 degrees Celsius
 Note:
 At initial device power-on, the default resolution is set to 12-bits.
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@ -47,7 +47,7 @@ These books get into the details of how specific kernel subsystems work
 from the point of view of a kernel developer.  Much of the information here
 is taken directly from the kernel source, with supplemental material added
 as needed (or at least as we managed to add it — probably *not* all that is
-needed). 
+needed).
 .. toctree::
   :maxdepth: 2
@ -68,6 +68,14 @@ Korean translations
   translations/ko_KR/index
 Chinese translations
 --------------------
 .. toctree::
   :maxdepth: 1
   translations/zh_CN/index
 Indices and tables
 ==================
--- a/Documentation/input/input.txt
+++ b/Documentation/input/input.txt
@ -279,10 +279,10 @@ struct input_event {
  'time' is the timestamp, it returns the time at which the event happened.
 Type is for example EV_REL for relative moment, EV_KEY for a keypress or
-release. More types are defined in include/linux/input.h.
+release. More types are defined in include/uapi/linux/input-event-codes.h.
  'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete
-list is in include/linux/input.h.
+list is in include/uapi/linux/input-event-codes.h.
  'value' is the value the event carries. Either a relative change for
 EV_REL, absolute new value for EV_ABS (joysticks ...), or 0 for EV_KEY for
--- a/Documentation/ioctl/botching-up-ioctls.txt
+++ b/Documentation/ioctl/botching-up-ioctls.txt
@ -24,7 +24,7 @@ Prerequisites
 -------------
 First the prerequisites. Without these you have already failed, because you
-will need to add a a 32-bit compat layer:
+will need to add a 32-bit compat layer:
 * Only use fixed sized integers. To avoid conflicts with typedefs in userspace
   the kernel has special types like __u32, __s64. Use them.
--- a/Documentation/livepatch/livepatch.txt
+++ b/Documentation/livepatch/livepatch.txt
@ -358,7 +358,7 @@ The current Livepatch implementation has several limitations:
    Each function has to handle TOC and save LR before it could call
    the ftrace handler. This operation has to be reverted on return.
    Fortunately, the generic ftrace code has the same problem and all
-    this is is handled on the ftrace level.
+    this is handled on the ftrace level.
  + Kretprobes using the ftrace framework conflict with the patched
--- a/Documentation/media/Makefile
+++ b/Documentation/media/Makefile
@ -36,7 +36,7 @@ quiet_cmd_genpdf = GENPDF  $2
      cmd_genpdf = convert $2 $3
 quiet_cmd_gendot = DOT     $2
-      cmd_gendot = dot -Tsvg $2 > $3
+      cmd_gendot = dot -Tsvg $2 > $3 || { rm -f $3; exit 1; }
 %.pdf: %.svg
 	@$(call cmd,genpdf,$<,$@)
@ -103,6 +103,7 @@ html: all
 epub: all
 xml: all
 latex: $(IMGPDF) all
 linkcheck:
 clean:
 	-rm -f $(DOTTGT) $(IMGTGT) ${TARGETS} 2>/dev/null
--- a/Documentation/networking/kcm.txt
+++ b/Documentation/networking/kcm.txt
@ -272,7 +272,7 @@ on the socket thus waking up the application thread. When the application
 sees the error (which may just be a disconnect) it should unattach the
 socket from KCM and then close it. It is assumed that once an error is
 posted on the TCP socket the data stream is unrecoverable (i.e. an error
-may have occurred in in the middle of receiving a messssge).
+may have occurred in the middle of receiving a messssge).
 TCP connection monitoring
 -------------------------
--- a/Documentation/power/00-INDEX
+++ b/Documentation/power/00-INDEX
@ -14,8 +14,6 @@ freezing-of-tasks.txt
 	- How processes and controlled during suspend
 interface.txt
 	- Power management user interface in /sys/power
 notifiers.txt
 	- Registering suspend notifiers in device drivers
 opp.txt
 	- Operating Performance Point library
 pci.txt
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@ -1,716 +0,0 @@
 Device Power Management
 Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
 Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
 Copyright (c) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 Most of the code in Linux is device drivers, so most of the Linux power
 management (PM) code is also driver-specific.  Most drivers will do very
 little; others, especially for platforms with small batteries (like cell
 phones), will do a lot.
 This writeup gives an overview of how drivers interact with system-wide
 power management goals, emphasizing the models and interfaces that are
 shared by everything that hooks up to the driver model core.  Read it as
 background for the domain-specific work you'd do with any specific driver.
 Two Models for Device Power Management
 ======================================
 Drivers will use one or both of these models to put devices into low-power
 states:
    System Sleep model:
 	Drivers can enter low-power states as part of entering system-wide
 	low-power states like "suspend" (also known as "suspend-to-RAM"), or
 	(mostly for systems with disks) "hibernation" (also known as
 	"suspend-to-disk").
 	This is something that device, bus, and class drivers collaborate on
 	by implementing various role-specific suspend and resume methods to
 	cleanly power down hardware and software subsystems, then reactivate
 	them without loss of data.
 	Some drivers can manage hardware wakeup events, which make the system
 	leave the low-power state.  This feature may be enabled or disabled
 	using the relevant /sys/devices/.../power/wakeup file (for Ethernet
 	drivers the ioctl interface used by ethtool may also be used for this
 	purpose); enabling it may cost some power usage, but let the whole
 	system enter low-power states more often.
    Runtime Power Management model:
 	Devices may also be put into low-power states while the system is
 	running, independently of other power management activity in principle.
 	However, devices are not generally independent of each other (for
 	example, a parent device cannot be suspended unless all of its child
 	devices have been suspended).  Moreover, depending on the bus type the
 	device is on, it may be necessary to carry out some bus-specific
 	operations on the device for this purpose.  Devices put into low power
 	states at run time may require special handling during system-wide power
 	transitions (suspend or hibernation).
 	For these reasons not only the device driver itself, but also the
 	appropriate subsystem (bus type, device type or device class) driver and
 	the PM core are involved in runtime power management.  As in the system
 	sleep power management case, they need to collaborate by implementing
 	various role-specific suspend and resume methods, so that the hardware
 	is cleanly powered down and reactivated without data or service loss.
 There's not a lot to be said about those low-power states except that they are
 very system-specific, and often device-specific.  Also, that if enough devices
 have been put into low-power states (at runtime), the effect may be very similar
 to entering some system-wide low-power state (system sleep) ... and that
 synergies exist, so that several drivers using runtime PM might put the system
 into a state where even deeper power saving options are available.
 Most suspended devices will have quiesced all I/O: no more DMA or IRQs (except
 for wakeup events), no more data read or written, and requests from upstream
 drivers are no longer accepted.  A given bus or platform may have different
 requirements though.
 Examples of hardware wakeup events include an alarm from a real time clock,
 network wake-on-LAN packets, keyboard or mouse activity, and media insertion
 or removal (for PCMCIA, MMC/SD, USB, and so on).
 Interfaces for Entering System Sleep States
 ===========================================
 There are programming interfaces provided for subsystems (bus type, device type,
 device class) and device drivers to allow them to participate in the power
 management of devices they are concerned with.  These interfaces cover both
 system sleep and runtime power management.
 Device Power Management Operations
 ----------------------------------
 Device power management operations, at the subsystem level as well as at the
 device driver level, are implemented by defining and populating objects of type
 struct dev_pm_ops:
 struct dev_pm_ops {
 	int (*prepare)(struct device *dev);
 	void (*complete)(struct device *dev);
 	int (*suspend)(struct device *dev);
 	int (*resume)(struct device *dev);
 	int (*freeze)(struct device *dev);
 	int (*thaw)(struct device *dev);
 	int (*poweroff)(struct device *dev);
 	int (*restore)(struct device *dev);
 	int (*suspend_late)(struct device *dev);
 	int (*resume_early)(struct device *dev);
 	int (*freeze_late)(struct device *dev);
 	int (*thaw_early)(struct device *dev);
 	int (*poweroff_late)(struct device *dev);
 	int (*restore_early)(struct device *dev);
 	int (*suspend_noirq)(struct device *dev);
 	int (*resume_noirq)(struct device *dev);
 	int (*freeze_noirq)(struct device *dev);
 	int (*thaw_noirq)(struct device *dev);
 	int (*poweroff_noirq)(struct device *dev);
 	int (*restore_noirq)(struct device *dev);
 	int (*runtime_suspend)(struct device *dev);
 	int (*runtime_resume)(struct device *dev);
 	int (*runtime_idle)(struct device *dev);
 };
 This structure is defined in include/linux/pm.h and the methods included in it
 are also described in that file.  Their roles will be explained in what follows.
 For now, it should be sufficient to remember that the last three methods are
 specific to runtime power management while the remaining ones are used during
 system-wide power transitions.
 There also is a deprecated "old" or "legacy" interface for power management
 operations available at least for some subsystems.  This approach does not use
 struct dev_pm_ops objects and it is suitable only for implementing system sleep
 power management methods.  Therefore it is not described in this document, so
 please refer directly to the source code for more information about it.
 Subsystem-Level Methods
 -----------------------
 The core methods to suspend and resume devices reside in struct dev_pm_ops
 pointed to by the ops member of struct dev_pm_domain, or by the pm member of
 struct bus_type, struct device_type and struct class.  They are mostly of
 interest to the people writing infrastructure for platforms and buses, like PCI
 or USB, or device type and device class drivers.  They also are relevant to the
 writers of device drivers whose subsystems (PM domains, device types, device
 classes and bus types) don't provide all power management methods.
 Bus drivers implement these methods as appropriate for the hardware and the
 drivers using it; PCI works differently from USB, and so on.  Not many people
 write subsystem-level drivers; most driver code is a "device driver" that builds
 on top of bus-specific framework code.
 For more information on these driver calls, see the description later;
 they are called in phases for every device, respecting the parent-child
 sequencing in the driver model tree.
 /sys/devices/.../power/wakeup files
 -----------------------------------
 All device objects in the driver model contain fields that control the handling
 of system wakeup events (hardware signals that can force the system out of a
 sleep state).  These fields are initialized by bus or device driver code using
 device_set_wakeup_capable() and device_set_wakeup_enable(), defined in
 include/linux/pm_wakeup.h.
 The "power.can_wakeup" flag just records whether the device (and its driver) can
 physically support wakeup events.  The device_set_wakeup_capable() routine
 affects this flag.  The "power.wakeup" field is a pointer to an object of type
 struct wakeup_source used for controlling whether or not the device should use
 its system wakeup mechanism and for notifying the PM core of system wakeup
 events signaled by the device.  This object is only present for wakeup-capable
 devices (i.e. devices whose "can_wakeup" flags are set) and is created (or
 removed) by device_set_wakeup_capable().
 Whether or not a device is capable of issuing wakeup events is a hardware
 matter, and the kernel is responsible for keeping track of it.  By contrast,
 whether or not a wakeup-capable device should issue wakeup events is a policy
 decision, and it is managed by user space through a sysfs attribute: the
 "power/wakeup" file.  User space can write the strings "enabled" or "disabled"
 to it to indicate whether or not, respectively, the device is supposed to signal
 system wakeup.  This file is only present if the "power.wakeup" object exists
 for the given device and is created (or removed) along with that object, by
 device_set_wakeup_capable().  Reads from the file will return the corresponding
 string.
 The "power/wakeup" file is supposed to contain the "disabled" string initially
 for the majority of devices; the major exceptions are power buttons, keyboards,
 and Ethernet adapters whose WoL (wake-on-LAN) feature has been set up with
 ethtool.  It should also default to "enabled" for devices that don't generate
 wakeup requests on their own but merely forward wakeup requests from one bus to
 another (like PCI Express ports).
 The device_may_wakeup() routine returns true only if the "power.wakeup" object
 exists and the corresponding "power/wakeup" file contains the string "enabled".
 This information is used by subsystems, like the PCI bus type code, to see
 whether or not to enable the devices' wakeup mechanisms.  If device wakeup
 mechanisms are enabled or disabled directly by drivers, they also should use
 device_may_wakeup() to decide what to do during a system sleep transition.
 Device drivers, however, are not supposed to call device_set_wakeup_enable()
 directly in any case.
 It ought to be noted that system wakeup is conceptually different from "remote
 wakeup" used by runtime power management, although it may be supported by the
 same physical mechanism.  Remote wakeup is a feature allowing devices in
 low-power states to trigger specific interrupts to signal conditions in which
 they should be put into the full-power state.  Those interrupts may or may not
 be used to signal system wakeup events, depending on the hardware design.  On
 some systems it is impossible to trigger them from system sleep states.  In any
 case, remote wakeup should always be enabled for runtime power management for
 all devices and drivers that support it.
 /sys/devices/.../power/control files
 ------------------------------------
 Each device in the driver model has a flag to control whether it is subject to
 runtime power management.  This flag, called runtime_auto, is initialized by the
 bus type (or generally subsystem) code using pm_runtime_allow() or
 pm_runtime_forbid(); the default is to allow runtime power management.
 The setting can be adjusted by user space by writing either "on" or "auto" to
 the device's power/control sysfs file.  Writing "auto" calls pm_runtime_allow(),
 setting the flag and allowing the device to be runtime power-managed by its
 driver.  Writing "on" calls pm_runtime_forbid(), clearing the flag, returning
 the device to full power if it was in a low-power state, and preventing the
 device from being runtime power-managed.  User space can check the current value
 of the runtime_auto flag by reading the file.
 The device's runtime_auto flag has no effect on the handling of system-wide
 power transitions.  In particular, the device can (and in the majority of cases
 should and will) be put into a low-power state during a system-wide transition
 to a sleep state even though its runtime_auto flag is clear.
 For more information about the runtime power management framework, refer to
 Documentation/power/runtime_pm.txt.
 Calling Drivers to Enter and Leave System Sleep States
 ======================================================
 When the system goes into a sleep state, each device's driver is asked to
 suspend the device by putting it into a state compatible with the target
 system state.  That's usually some version of "off", but the details are
 system-specific.  Also, wakeup-enabled devices will usually stay partly
 functional in order to wake the system.
 When the system leaves that low-power state, the device's driver is asked to
 resume it by returning it to full power.  The suspend and resume operations
 always go together, and both are multi-phase operations.
 For simple drivers, suspend might quiesce the device using class code
 and then turn its hardware as "off" as possible during suspend_noirq.  The
 matching resume calls would then completely reinitialize the hardware
 before reactivating its class I/O queues.
 More power-aware drivers might prepare the devices for triggering system wakeup
 events.
 Call Sequence Guarantees
 ------------------------
 To ensure that bridges and similar links needing to talk to a device are
 available when the device is suspended or resumed, the device tree is
 walked in a bottom-up order to suspend devices.  A top-down order is
 used to resume those devices.
 The ordering of the device tree is defined by the order in which devices
 get registered:  a child can never be registered, probed or resumed before
 its parent; and can't be removed or suspended after that parent.
 The policy is that the device tree should match hardware bus topology.
 (Or at least the control bus, for devices which use multiple busses.)
 In particular, this means that a device registration may fail if the parent of
 the device is suspending (i.e. has been chosen by the PM core as the next
 device to suspend) or has already suspended, as well as after all of the other
 devices have been suspended.  Device drivers must be prepared to cope with such
 situations.
 System Power Management Phases
 ------------------------------
 Suspending or resuming the system is done in several phases.  Different phases
 are used for freeze, standby, and memory sleep states ("suspend-to-RAM") and the
 hibernation state ("suspend-to-disk").  Each phase involves executing callbacks
 for every device before the next phase begins.  Not all busses or classes
 support all these callbacks and not all drivers use all the callbacks.  The
 various phases always run after tasks have been frozen and before they are
 unfrozen.  Furthermore, the *_noirq phases run at a time when IRQ handlers have
 been disabled (except for those marked with the IRQF_NO_SUSPEND flag).
 All phases use PM domain, bus, type, class or driver callbacks (that is, methods
 defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, dev->class->pm or
 dev->driver->pm).  These callbacks are regarded by the PM core as mutually
 exclusive.  Moreover, PM domain callbacks always take precedence over all of the
 other callbacks and, for example, type callbacks take precedence over bus, class
 and driver callbacks.  To be precise, the following rules are used to determine
 which callback to execute in the given phase:
    1.	If dev->pm_domain is present, the PM core will choose the callback
 	included in dev->pm_domain->ops for execution
    2.	Otherwise, if both dev->type and dev->type->pm are present, the callback
 	included in dev->type->pm will be chosen for execution.
    3.	Otherwise, if both dev->class and dev->class->pm are present, the
 	callback included in dev->class->pm will be chosen for execution.
    4.	Otherwise, if both dev->bus and dev->bus->pm are present, the callback
 	included in dev->bus->pm will be chosen for execution.
 This allows PM domains and device types to override callbacks provided by bus
 types or device classes if necessary.
 The PM domain, type, class and bus callbacks may in turn invoke device- or
 driver-specific methods stored in dev->driver->pm, but they don't have to do
 that.
 If the subsystem callback chosen for execution is not present, the PM core will
 execute the corresponding method from dev->driver->pm instead if there is one.
 Entering System Suspend
 -----------------------
 When the system goes into the freeze, standby or memory sleep state,
 the phases are:
 		prepare, suspend, suspend_late, suspend_noirq.
    1.	The prepare phase is meant to prevent races by preventing new devices
 	from being registered; the PM core would never know that all the
 	children of a device had been suspended if new children could be
 	registered at will.  (By contrast, devices may be unregistered at any
 	time.)  Unlike the other suspend-related phases, during the prepare
 	phase the device tree is traversed top-down.
 	After the prepare callback method returns, no new children may be
 	registered below the device.  The method may also prepare the device or
 	driver in some way for the upcoming system power transition, but it
 	should not put the device into a low-power state.
 	For devices supporting runtime power management, the return value of the
 	prepare callback can be used to indicate to the PM core that it may
 	safely leave the device in runtime suspend (if runtime-suspended
 	already), provided that all of the device's descendants are also left in
 	runtime suspend.  Namely, if the prepare callback returns a positive
 	number and that happens for all of the descendants of the device too,
 	and all of them (including the device itself) are runtime-suspended, the
 	PM core will skip the suspend, suspend_late and	suspend_noirq suspend
 	phases as well as the resume_noirq, resume_early and resume phases of
 	the following system resume for all of these devices.	In that case,
 	the complete callback will be called directly after the prepare callback
 	and is entirely responsible for bringing the device back to the
 	functional state as appropriate.
 	Note that this direct-complete procedure applies even if the device is
 	disabled for runtime PM; only the runtime-PM status matters.  It follows
 	that if a device has system-sleep callbacks but does not support runtime
 	PM, then its prepare callback must never return a positive value.  This
 	is because all devices are initially set to runtime-suspended with
 	runtime PM disabled.
    2.	The suspend methods should quiesce the device to stop it from performing
 	I/O.  They also may save the device registers and put it into the
 	appropriate low-power state, depending on the bus type the device is on,
 	and they may enable wakeup events.
    3	For a number of devices it is convenient to split suspend into the
 	"quiesce device" and "save device state" phases, in which cases
 	suspend_late is meant to do the latter.  It is always executed after
 	runtime power management has been disabled for all devices.
    4.	The suspend_noirq phase occurs after IRQ handlers have been disabled,
 	which means that the driver's interrupt handler will not be called while
 	the callback method is running.  The methods should save the values of
 	the device's registers that weren't saved previously and finally put the
 	device into the appropriate low-power state.
 	The majority of subsystems and device drivers need not implement this
 	callback.  However, bus types allowing devices to share interrupt
 	vectors, like PCI, generally need it; otherwise a driver might encounter
 	an error during the suspend phase by fielding a shared interrupt
 	generated by some other device after its own device had been set to low
 	power.
 At the end of these phases, drivers should have stopped all I/O transactions
 (DMA, IRQs), saved enough state that they can re-initialize or restore previous
 state (as needed by the hardware), and placed the device into a low-power state.
 On many platforms they will gate off one or more clock sources; sometimes they
 will also switch off power supplies or reduce voltages.  (Drivers supporting
 runtime PM may already have performed some or all of these steps.)
 If device_may_wakeup(dev) returns true, the device should be prepared for
 generating hardware wakeup signals to trigger a system wakeup event when the
 system is in the sleep state.  For example, enable_irq_wake() might identify
 GPIO signals hooked up to a switch or other external hardware, and
 pci_enable_wake() does something similar for the PCI PME signal.
 If any of these callbacks returns an error, the system won't enter the desired
 low-power state.  Instead the PM core will unwind its actions by resuming all
 the devices that were suspended.
 Leaving System Suspend
 ----------------------
 When resuming from freeze, standby or memory sleep, the phases are:
 		resume_noirq, resume_early, resume, complete.
    1.	The resume_noirq callback methods should perform any actions needed
 	before the driver's interrupt handlers are invoked.  This generally
 	means undoing the actions of the suspend_noirq phase.  If the bus type
 	permits devices to share interrupt vectors, like PCI, the method should
 	bring the device and its driver into a state in which the driver can
 	recognize if the device is the source of incoming interrupts, if any,
 	and handle them correctly.
 	For example, the PCI bus type's ->pm.resume_noirq() puts the device into
 	the full-power state (D0 in the PCI terminology) and restores the
 	standard configuration registers of the device.  Then it calls the
 	device driver's ->pm.resume_noirq() method to perform device-specific
 	actions.
    2.	The resume_early methods should prepare devices for the execution of
 	the resume methods.  This generally involves undoing the actions of the
 	preceding suspend_late phase.
    3	The resume methods should bring the device back to its operating
 	state, so that it can perform normal I/O.  This generally involves
 	undoing the actions of the suspend phase.
    4.	The complete phase should undo the actions of the prepare phase.  Note,
 	however, that new children may be registered below the device as soon as
 	the resume callbacks occur; it's not necessary to wait until the
 	complete phase.
 	Moreover, if the preceding prepare callback returned a positive number,
 	the device may have been left in runtime suspend throughout the whole
 	system suspend and resume (the suspend, suspend_late, suspend_noirq
 	phases of system suspend and the resume_noirq, resume_early, resume
 	phases of system resume may have been skipped for it).  In that case,
 	the complete callback is entirely responsible for bringing the device
 	back to the functional state after system suspend if necessary.  [For
 	example, it may need to queue up a runtime resume request for the device
 	for this purpose.]  To check if that is the case, the complete callback
 	can consult the device's power.direct_complete flag.  Namely, if that
 	flag is set when the complete callback is being run, it has been called
 	directly after the preceding prepare and special action may be required
 	to make the device work correctly afterward.
 At the end of these phases, drivers should be as functional as they were before
 suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are
 gated on.
 However, the details here may again be platform-specific.  For example,
 some systems support multiple "run" states, and the mode in effect at
 the end of resume might not be the one which preceded suspension.
 That means availability of certain clocks or power supplies changed,
 which could easily affect how a driver works.
 Drivers need to be able to handle hardware which has been reset since the
 suspend methods were called, for example by complete reinitialization.
 This may be the hardest part, and the one most protected by NDA'd documents
 and chip errata.  It's simplest if the hardware state hasn't changed since
 the suspend was carried out, but that can't be guaranteed (in fact, it usually
 is not the case).
 Drivers must also be prepared to notice that the device has been removed
 while the system was powered down, whenever that's physically possible.
 PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses
 where common Linux platforms will see such removal.  Details of how drivers
 will notice and handle such removals are currently bus-specific, and often
 involve a separate thread.
 These callbacks may return an error value, but the PM core will ignore such
 errors since there's nothing it can do about them other than printing them in
 the system log.
 Entering Hibernation
 --------------------
 Hibernating the system is more complicated than putting it into the other
 sleep states, because it involves creating and saving a system image.
 Therefore there are more phases for hibernation, with a different set of
 callbacks.  These phases always run after tasks have been frozen and memory has
 been freed.
 The general procedure for hibernation is to quiesce all devices (freeze), create
 an image of the system memory while everything is stable, reactivate all
 devices (thaw), write the image to permanent storage, and finally shut down the
 system (poweroff).  The phases used to accomplish this are:
 	prepare, freeze, freeze_late, freeze_noirq, thaw_noirq, thaw_early,
 	thaw, complete, prepare, poweroff, poweroff_late, poweroff_noirq
    1.	The prepare phase is discussed in the "Entering System Suspend" section
 	above.
    2.	The freeze methods should quiesce the device so that it doesn't generate
 	IRQs or DMA, and they may need to save the values of device registers.
 	However the device does not have to be put in a low-power state, and to
 	save time it's best not to do so.  Also, the device should not be
 	prepared to generate wakeup events.
    3.	The freeze_late phase is analogous to the suspend_late phase described
 	above, except that the device should not be put in a low-power state and
 	should not be allowed to generate wakeup events by it.
    4.	The freeze_noirq phase is analogous to the suspend_noirq phase discussed
 	above, except again that the device should not be put in a low-power
 	state and should not be allowed to generate wakeup events.
 At this point the system image is created.  All devices should be inactive and
 the contents of memory should remain undisturbed while this happens, so that the
 image forms an atomic snapshot of the system state.
    5.	The thaw_noirq phase is analogous to the resume_noirq phase discussed
 	above.  The main difference is that its methods can assume the device is
 	in the same state as at the end of the freeze_noirq phase.
    6.	The thaw_early phase is analogous to the resume_early phase described
 	above.  Its methods should undo the actions of the preceding
 	freeze_late, if necessary.
    7.	The thaw phase is analogous to the resume phase discussed above.  Its
 	methods should bring the device back to an operating state, so that it
 	can be used for saving the image if necessary.
    8.	The complete phase is discussed in the "Leaving System Suspend" section
 	above.
 At this point the system image is saved, and the devices then need to be
 prepared for the upcoming system shutdown.  This is much like suspending them
 before putting the system into the freeze, standby or memory sleep state,
 and the phases are similar.
    9.	The prepare phase is discussed above.
    10.	The poweroff phase is analogous to the suspend phase.
    11.	The poweroff_late phase is analogous to the suspend_late phase.
    12.	The poweroff_noirq phase is analogous to the suspend_noirq phase.
 The poweroff, poweroff_late and poweroff_noirq callbacks should do essentially
 the same things as the suspend, suspend_late and suspend_noirq callbacks,
 respectively.  The only notable difference is that they need not store the
 device register values, because the registers should already have been stored
 during the freeze, freeze_late or freeze_noirq phases.
 Leaving Hibernation
 -------------------
 Resuming from hibernation is, again, more complicated than resuming from a sleep
 state in which the contents of main memory are preserved, because it requires
 a system image to be loaded into memory and the pre-hibernation memory contents
 to be restored before control can be passed back to the image kernel.
 Although in principle, the image might be loaded into memory and the
 pre-hibernation memory contents restored by the boot loader, in practice this
 can't be done because boot loaders aren't smart enough and there is no
 established protocol for passing the necessary information.  So instead, the
 boot loader loads a fresh instance of the kernel, called the boot kernel, into
 memory and passes control to it in the usual way.  Then the boot kernel reads
 the system image, restores the pre-hibernation memory contents, and passes
 control to the image kernel.  Thus two different kernels are involved in
 resuming from hibernation.  In fact, the boot kernel may be completely different
 from the image kernel: a different configuration and even a different version.
 This has important consequences for device drivers and their subsystems.
 To be able to load the system image into memory, the boot kernel needs to
 include at least a subset of device drivers allowing it to access the storage
 medium containing the image, although it doesn't need to include all of the
 drivers present in the image kernel.  After the image has been loaded, the
 devices managed by the boot kernel need to be prepared for passing control back
 to the image kernel.  This is very similar to the initial steps involved in
 creating a system image, and it is accomplished in the same way, using prepare,
 freeze, and freeze_noirq phases.  However the devices affected by these phases
 are only those having drivers in the boot kernel; other devices will still be in
 whatever state the boot loader left them.
 Should the restoration of the pre-hibernation memory contents fail, the boot
 kernel would go through the "thawing" procedure described above, using the
 thaw_noirq, thaw, and complete phases, and then continue running normally.  This
 happens only rarely.  Most often the pre-hibernation memory contents are
 restored successfully and control is passed to the image kernel, which then
 becomes responsible for bringing the system back to the working state.
 To achieve this, the image kernel must restore the devices' pre-hibernation
 functionality.  The operation is much like waking up from the memory sleep
 state, although it involves different phases:
 	restore_noirq, restore_early, restore, complete
    1.	The restore_noirq phase is analogous to the resume_noirq phase.
    2.	The restore_early phase is analogous to the resume_early phase.
    3.	The restore phase is analogous to the resume phase.
    4.	The complete phase is discussed above.
 The main difference from resume[_early|_noirq] is that restore[_early|_noirq]
 must assume the device has been accessed and reconfigured by the boot loader or
 the boot kernel.  Consequently the state of the device may be different from the
 state remembered from the freeze, freeze_late and freeze_noirq phases.  The
 device may even need to be reset and completely re-initialized.  In many cases
 this difference doesn't matter, so the resume[_early|_noirq] and
 restore[_early|_norq] method pointers can be set to the same routines.
 Nevertheless, different callback pointers are used in case there is a situation
 where it actually does matter.
 Device Power Management Domains
 -------------------------------
 Sometimes devices share reference clocks or other power resources.  In those
 cases it generally is not possible to put devices into low-power states
 individually.  Instead, a set of devices sharing a power resource can be put
 into a low-power state together at the same time by turning off the shared
 power resource.  Of course, they also need to be put into the full-power state
 together, by turning the shared power resource on.  A set of devices with this
 property is often referred to as a power domain. A power domain may also be
 nested inside another power domain. The nested domain is referred to as the
 sub-domain of the parent domain.
 Support for power domains is provided through the pm_domain field of struct
 device.  This field is a pointer to an object of type struct dev_pm_domain,
 defined in include/linux/pm.h, providing a set of power management callbacks
 analogous to the subsystem-level and device driver callbacks that are executed
 for the given device during all power transitions, instead of the respective
 subsystem-level callbacks.  Specifically, if a device's pm_domain pointer is
 not NULL, the ->suspend() callback from the object pointed to by it will be
 executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and
 analogously for all of the remaining callbacks.  In other words, power
 management domain callbacks, if defined for the given device, always take
 precedence over the callbacks provided by the device's subsystem (e.g. bus
 type).
 The support for device power management domains is only relevant to platforms
 needing to use the same device driver power management callbacks in many
 different power domain configurations and wanting to avoid incorporating the
 support for power domains into subsystem-level callbacks, for example by
 modifying the platform bus type.  Other platforms need not implement it or take
 it into account in any way.
 Devices may be defined as IRQ-safe which indicates to the PM core that their
 runtime PM callbacks may be invoked with disabled interrupts (see
 Documentation/power/runtime_pm.txt for more information).  If an IRQ-safe
 device belongs to a PM domain, the runtime PM of the domain will be
 disallowed, unless the domain itself is defined as IRQ-safe. However, it
 makes sense to define a PM domain as IRQ-safe only if all the devices in it
 are IRQ-safe. Moreover, if an IRQ-safe domain has a parent domain, the runtime
 PM of the parent is only allowed if the parent itself is IRQ-safe too with the
 additional restriction that all child domains of an IRQ-safe parent must also
 be IRQ-safe.
 Device Low Power (suspend) States
 ---------------------------------
 Device low-power states aren't standard.  One device might only handle
 "on" and "off", while another might support a dozen different versions of
 "on" (how many engines are active?), plus a state that gets back to "on"
 faster than from a full "off".
 Some busses define rules about what different suspend states mean.  PCI
 gives one example:  after the suspend sequence completes, a non-legacy
 PCI device may not perform DMA or issue IRQs, and any wakeup events it
 issues would be issued through the PME# bus signal.  Plus, there are
 several PCI-standard device states, some of which are optional.
 In contrast, integrated system-on-chip processors often use IRQs as the
 wakeup event sources (so drivers would call enable_irq_wake) and might
 be able to treat DMA completion as a wakeup event (sometimes DMA can stay
 active too, it'd only be the CPU and some peripherals that sleep).
 Some details here may be platform-specific.  Systems may have devices that
 can be fully active in certain sleep states, such as an LCD display that's
 refreshed using DMA while most of the system is sleeping lightly ... and
 its frame buffer might even be updated by a DSP or other non-Linux CPU while
 the Linux control processor stays idle.
 Moreover, the specific actions taken may depend on the target system state.
 One target system state might allow a given device to be very operational;
 another might require a hard shut down with re-initialization on resume.
 And two different target systems might use the same device in different
 ways; the aforementioned LCD might be active in one product's "standby",
 but a different product using the same SOC might work differently.
 Power Management Notifiers
 --------------------------
 There are some operations that cannot be carried out by the power management
 callbacks discussed above, because the callbacks occur too late or too early.
 To handle these cases, subsystems and device drivers may register power
 management notifiers that are called before tasks are frozen and after they have
 been thawed.  Generally speaking, the PM notifiers are suitable for performing
 actions that either require user space to be available, or at least won't
 interfere with user space.
 For details refer to Documentation/power/notifiers.txt.
 Runtime Power Management
 ========================
 Many devices are able to dynamically power down while the system is still
 running. This feature is useful for devices that are not being used, and
 can offer significant power savings on a running system.  These devices
 often support a range of runtime power states, which might use names such
 as "off", "sleep", "idle", "active", and so on.  Those states will in some
 cases (like PCI) be partially constrained by the bus the device uses, and will
 usually include hardware states that are also used in system sleep states.
 A system-wide power transition can be started while some devices are in low
 power states due to runtime power management.  The system sleep PM callbacks
 should recognize such situations and react to them appropriately, but the
 necessary actions are subsystem-specific.
 In some cases the decision may be made at the subsystem level while in other
 cases the device driver may be left to decide.  In some cases it may be
 desirable to leave a suspended device in that state during a system-wide power
 transition, but in other cases the device must be put back into the full-power
 state temporarily, for example so that its system wakeup capability can be
 disabled.  This all depends on the hardware and the design of the subsystem and
 device driver in question.
 During system-wide resume from a sleep state it's easiest to put devices into
 the full-power state, as explained in Documentation/power/runtime_pm.txt.  Refer
 to that document for more information regarding this particular issue as well as
 for information on the device runtime power management framework in general.
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@ -197,7 +197,8 @@ tasks, since it generally exists anyway.
 A driver must have all firmwares it may need in RAM before suspend() is called.
 If keeping them is not practical, for example due to their size, they must be
-requested early enough using the suspend notifier API described in notifiers.txt.
+requested early enough using the suspend notifier API described in
 Documentation/driver-api/pm/notifiers.rst.
 VI. Are there any precautions to be taken to prevent freezing failures?
--- a/Documentation/power/notifiers.txt
+++ b/Documentation/power/notifiers.txt
@ -1,55 +0,0 @@
 Suspend notifiers
 	(C) 2007-2011 Rafael J. Wysocki <rjw@sisk.pl>, GPL
 There are some operations that subsystems or drivers may want to carry out
 before hibernation/suspend or after restore/resume, but they require the system
 to be fully functional, so the drivers' and subsystems' .suspend() and .resume()
 or even .prepare() and .complete() callbacks are not suitable for this purpose.
 For example, device drivers may want to upload firmware to their devices after
 resume/restore, but they cannot do it by calling request_firmware() from their
 .resume() or .complete() routines (user land processes are frozen at these
 points).  The solution may be to load the firmware into memory before processes
 are frozen and upload it from there in the .resume() routine.
 A suspend/hibernation notifier may be used for this purpose.
 The subsystems or drivers having such needs can register suspend notifiers that
 will be called upon the following events by the PM core:
 PM_HIBERNATION_PREPARE	The system is going to hibernate, tasks will be frozen
 			immediately. This is different from PM_SUSPEND_PREPARE
 			below because here we do additional work between notifiers
 			and drivers freezing.
 PM_POST_HIBERNATION	The system memory state has been restored from a
 			hibernation image or an error occurred during
 			hibernation.  Device drivers' restore callbacks have
 			been executed and tasks have been thawed.
 PM_RESTORE_PREPARE	The system is going to restore a hibernation image.
 			If all goes well, the restored kernel will issue a
 			PM_POST_HIBERNATION notification.
 PM_POST_RESTORE		An error occurred during restore from hibernation.
 			Device drivers' restore callbacks have been executed
 			and tasks have been thawed.
 PM_SUSPEND_PREPARE	The system is preparing for suspend.
 PM_POST_SUSPEND		The system has just resumed or an error occurred during
 			suspend.  Device drivers' resume callbacks have been
 			executed and tasks have been thawed.
 It is generally assumed that whatever the notifiers do for
 PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION.  Analogously,
 operations performed for PM_SUSPEND_PREPARE should be reversed for
 PM_POST_SUSPEND.  Additionally, all of the notifiers are called for
 PM_POST_HIBERNATION if one of them fails for PM_HIBERNATION_PREPARE, and
 all of the notifiers are called for PM_POST_SUSPEND if one of them fails for
 PM_SUSPEND_PREPARE.
 The hibernation and suspend notifiers are called with pm_mutex held.  They are
 defined in the usual way, but their last argument is meaningless (it is always
 NULL).  To register and/or unregister a suspend notifier use the functions
 register_pm_notifier() and unregister_pm_notifier(), respectively, defined in
 include/linux/suspend.h .  If you don't need to unregister the notifier, you can
 also use the pm_notifier() macro defined in include/linux/suspend.h .
--- a/Documentation/power/pci.txt
+++ b/Documentation/power/pci.txt
@ -713,7 +713,7 @@ In addition to that the prepare() callback may carry out some operations
 preparing the device to be suspended, although it should not allocate memory
 (if additional memory is required to suspend the device, it has to be
 preallocated earlier, for example in a suspend/hibernate notifier as described
-in Documentation/power/notifiers.txt).
+in Documentation/driver-api/pm/notifiers.rst).
 3.1.2. suspend()
--- a/Documentation/pps/pps.txt
+++ b/Documentation/pps/pps.txt
@ -63,7 +63,7 @@ for instance) is a PPS source too, and if not they should provide the
 possibility to open another device as PPS source.
 In LinuxPPS the PPS sources are simply char devices usually mapped
-into files /dev/pps0, /dev/pps1, etc..
+into files /dev/pps0, /dev/pps1, etc.
 PPS with USB to serial devices
@ -71,9 +71,12 @@ PPS with USB to serial devices
 It is possible to grab the PPS from an USB to serial device. However,
 you should take into account the latencies and jitter introduced by
-the USB stack. Users has reported clock instability around +-1ms when
+the USB stack. Users have reported clock instability around +-1ms when
-synchronized with PPS through USB. This isn't suited for time server
+synchronized with PPS through USB. With USB 2.0, jitter may decrease
-synchronization.
+down to the order of 125 microseconds.
 This may be suitable for time server synchronization with NTP because
 of its undersampling and algorithms.
 If your device doesn't report PPS, you can check that the feature is
 supported by its driver. Most of the time, you only need to add a call
@ -166,7 +169,8 @@ Testing the PPS support
 In order to test the PPS support even without specific hardware you can use
 the ktimer driver (see the client subsection in the PPS configuration menu)
-and the userland tools provided in the Documentation/pps/ directory.
+and the userland tools available in your distribution's pps-tools package,
 http://linuxpps.org , or https://github.com/ago/pps-tools .
 Once you have enabled the compilation of ktimer just modprobe it (if
 not statically compiled):
@ -183,8 +187,8 @@ and the run ppstest as follow:
   source 0 - assert 1186592700.388931295, sequence: 365 - clear  0.000000000, sequence: 0
   source 0 - assert 1186592701.389032765, sequence: 366 - clear  0.000000000, sequence: 0
-Please, note that to compile userland programs you need the file timepps.h
+Please, note that to compile userland programs you need the file timepps.h .
-(see Documentation/pps/).
+This is available in the pps-tools repository mentioned above.
 Generators
--- a/Documentation/thermal/nouveau_thermal
+++ b/Documentation/thermal/nouveau_thermal
@ -42,7 +42,7 @@ thresholds can be configured thanks to the following HWMON attributes:
 * Critical: temp1_crit and temp1_crit_hyst;
 * Shutdown: temp1_emergency and temp1_emergency_hyst.
-NOTE: Remember that the values are stored as milli degrees Celcius. Don't forget
+NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget
 to multiply!
 Fan management
--- a/Documentation/translations/ja_JP/HOWTO
+++ b/Documentation/translations/ja_JP/HOWTO
@ -111,7 +111,7 @@ Linux カーネルソースツリーは幅広い範囲のドキュメントを
 カーネルの変更が、カーネルがユーザ空間に公開しているインターフェイスの
 変更を引き起こす場合、その変更を説明するマニュアルページのパッチや情報
 をマニュアルページのメンテナ mtk.manpages@gmail.com に送り、CC を
-linux-api@ver.kernel.org に送ることを勧めます。
+linux-api@vger.kernel.org に送ることを勧めます。
 以下はカーネルソースツリーに含まれている読んでおくべきファイルの一覧で
 す-
--- a/Documentation/translations/ko_KR/howto.rst
+++ b/Documentation/translations/ko_KR/howto.rst
@ -289,8 +289,8 @@ pub/linux/kernel/v4.x/ 디렉토리에서 참조될 수 있다.개발 프로세
 Andrew Morton의 글이 있다.
        *"커널이 언제 배포될지는 아무도 모른다. 왜냐하면 배포는 알려진
-         버그의 상황에 따라 배포되는 것이지 미리정해 놓은 시간에 따라
+        버그의 상황에 따라 배포되는 것이지 미리정해 놓은 시간에 따라
-         배포되는 것은 아니기 때문이다."*
+        배포되는 것은 아니기 때문이다."*
 4.x.y - 안정 커널 트리
 ~~~~~~~~~~~~~~~~~~~~~~
--- a/Documentation/translations/zh_CN/CodingStyle
+++ b/Documentation/translations/zh_CN/CodingStyle
@ -1,813 +0,0 @@
 Chinese translated version of Documentation/process/coding-style.rst
 If you have any comment or update to the content, please post to LKML directly.
 However, if you have problem communicating in English you can also ask the
 Chinese maintainer for help.  Contact the Chinese maintainer, if this
 translation is outdated or there is problem with translation.
 Chinese maintainer: Zhang Le <r0bertz@gentoo.org>
 ---------------------------------------------------------------------
 Documentation/process/coding-style.rst的中文翻译
 如果想评论或更新本文的内容，请直接发信到LKML。如果你使用英文交流有困难的话，也可
 以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题，请联系中文版维护者。
 中文版维护者： 张乐 Zhang Le <r0bertz@gentoo.org>
 中文版翻译者： 张乐 Zhang Le <r0bertz@gentoo.org>
 中文版校译者： 王聪 Wang Cong <xiyou.wangcong@gmail.com>
               wheelz <kernel.zeng@gmail.com>
               管旭东 Xudong Guan <xudong.guan@gmail.com>
               Li Zefan <lizf@cn.fujitsu.com>
               Wang Chen <wangchen@cn.fujitsu.com>
 以下为正文
 ---------------------------------------------------------------------
 		Linux内核代码风格
 这是一个简短的文档，描述了 linux 内核的首选代码风格。代码风格是因人而异的，而且我
 不愿意把自己的观点强加给任何人，但这就像我去做任何事情都必须遵循的原则那样，我也
 希望在绝大多数事上保持这种的态度。请（在写代码时）至少考虑一下这里的代码风格。
 首先，我建议你打印一份 GNU 代码规范，然后不要读。烧了它，这是一个具有重大象征性意义
 的动作。
 不管怎样，现在我们开始：
 		第一章：缩进
 制表符是 8 个字符，所以缩进也是 8 个字符。有些异端运动试图将缩进变为 4（甚至 2！）
 个字符深，这几乎相当于尝试将圆周率的值定义为 3。
 理由：缩进的全部意义就在于清楚的定义一个控制块起止于何处。尤其是当你盯着你的屏幕
 连续看了 20 小时之后，你将会发现大一点的缩进会使你更容易分辨缩进。
 现在，有些人会抱怨 8 个字符的缩进会使代码向右边移动的太远，在 80 个字符的终端屏幕上
 就很难读这样的代码。这个问题的答案是，如果你需要 3 级以上的缩进，不管用何种方式你
 的代码已经有问题了，应该修正你的程序。
 简而言之，8 个字符的缩进可以让代码更容易阅读，还有一个好处是当你的函数嵌套太深的
 时候可以给你警告。留心这个警告。
 在 switch 语句中消除多级缩进的首选的方式是让 “switch” 和从属于它的 “case” 标签
 对齐于同一列，而不要 “两次缩进” “case” 标签。比如：
 	switch (suffix) {
 	case 'G':
 	case 'g':
 		mem <<= 30;
 		break;
 	case 'M':
 	case 'm':
 		mem <<= 20;
 		break;
 	case 'K':
 	case 'k':
 		mem <<= 10;
 		/* fall through */
 	default:
 		break;
 	}
 不要把多个语句放在一行里，除非你有什么东西要隐藏：
 	if (condition) do_this;
 	  do_something_everytime;
 也不要在一行里放多个赋值语句。内核代码风格超级简单。就是避免可能导致别人误读的表
 达式。
 除了注释、文档和 Kconfig 之外，不要使用空格来缩进，前面的例子是例外，是有意为之。
 选用一个好的编辑器，不要在行尾留空格。
 		第二章：把长的行和字符串打散
 代码风格的意义就在于使用平常使用的工具来维持代码的可读性和可维护性。
 每一行的长度的限制是 80 列，我们强烈建议您遵守这个惯例。
 长于 80 列的语句要打散成有意义的片段。除非超过 80 列能显著增加可读性，并且不会隐藏
 信息。子片段要明显短于母片段，并明显靠右。这同样适用于有着很长参数列表的函数头。
 然而，绝对不要打散对用户可见的字符串，例如 printk 信息，因为这将导致无法 grep 这些
 信息。
 		第三章：大括号和空格的放置
 C语言风格中另外一个常见问题是大括号的放置。和缩进大小不同，选择或弃用某种放置策
 略并没有多少技术上的原因，不过首选的方式，就像 Kernighan 和 Ritchie 展示给我们的，
 是把起始大括号放在行尾，而把结束大括号放在行首，所以：
 	if (x is true) {
 		we do y
 	}
 这适用于所有的非函数语句块（if、switch、for、while、do）。比如：
 	switch (action) {
 	case KOBJ_ADD:
 		return "add";
 	case KOBJ_REMOVE:
 		return "remove";
 	case KOBJ_CHANGE:
 		return "change";
 	default:
 		return NULL;
 	}
 不过，有一个例外，那就是函数：函数的起始大括号放置于下一行的开头，所以：
 	int function(int x)
 	{
 		body of function
 	}
 全世界的异端可能会抱怨这个不一致性是……呃……不一致的，不过所有思维健全的人都知道
 (a) K&R 是 _正确的_，并且 (b) K&R 是正确的。此外，不管怎样函数都是特殊的（C
 函数是不能嵌套的）。
 注意结束大括号独自占据一行，除非它后面跟着同一个语句的剩余部分，也就是 do 语句中的
 “while” 或者 if 语句中的 “else”，像这样：
 	do {
 		body of do-loop
 	} while (condition);
 和
 	if (x == y) {
 		..
 	} else if (x > y) {
 		...
 	} else {
 		....
 	}
 理由：K&R。
 也请注意这种大括号的放置方式也能使空（或者差不多空的）行的数量最小化，同时不失可
 读性。因此，由于你的屏幕上的新行是不可再生资源（想想 25 行的终端屏幕），你将会有更
 多的空行来放置注释。
 当只有一个单独的语句的时候，不用加不必要的大括号。
 	if (condition)
 		action();
 和
 	if (condition)
 		do_this();
 	else
 		do_that();
 这并不适用于只有一个条件分支是单语句的情况；这时所有分支都要使用大括号：
 	if (condition) {
 		do_this();
 		do_that();
 	} else {
 		otherwise();
 	}
 		3.1：空格
 Linux 内核的空格使用方式（主要）取决于它是用于函数还是关键字。（大多数）关键字后
 要加一个空格。值得注意的例外是 sizeof、typeof、alignof 和 __attribute__，这些
 关键字某些程度上看起来更像函数（它们在 Linux 里也常常伴随小括号而使用，尽管在 C 里
 这样的小括号不是必需的，就像 “struct fileinfo info” 声明过后的 “sizeof info”）。
 所以在这些关键字之后放一个空格：
 	if, switch, case, for, do, while
 但是不要在 sizeof、typeof、alignof 或者 __attribute__ 这些关键字之后放空格。例如，
 	s = sizeof(struct file);
 不要在小括号里的表达式两侧加空格。这是一个反例：
 	s = sizeof( struct file );
 当声明指针类型或者返回指针类型的函数时，“*” 的首选使用方式是使之靠近变量名或者函
 数名，而不是靠近类型名。例子：
 	char *linux_banner;
 	unsigned long long memparse(char *ptr, char **retptr);
 	char *match_strdup(substring_t *s);
 在大多数二元和三元操作符两侧使用一个空格，例如下面所有这些操作符：
 	=  +  -  <  >  *  /  %  |  &  ^  <=  >=  ==  !=  ?  :
 但是一元操作符后不要加空格：
 	&  *  +  -  ~  !  sizeof  typeof  alignof  __attribute__  defined
 后缀自加和自减一元操作符前不加空格：
 	++  --
 前缀自加和自减一元操作符后不加空格：
 	++  --
 ‘.’ 和 “->” 结构体成员操作符前后不加空格。
 不要在行尾留空白。有些可以自动缩进的编辑器会在新行的行首加入适量的空白，然后你
 就可以直接在那一行输入代码。不过假如你最后没有在那一行输入代码，有些编辑器就不
 会移除已经加入的空白，就像你故意留下一个只有空白的行。包含行尾空白的行就这样产
 生了。
 当git发现补丁包含了行尾空白的时候会警告你，并且可以应你的要求去掉行尾空白；不过
 如果你是正在打一系列补丁，这样做会导致后面的补丁失败，因为你改变了补丁的上下文。
 		第四章：命名
 C是一个简朴的语言，你的命名也应该这样。和 Modula-2 和 Pascal 程序员不同，C 程序员
 不使用类似 ThisVariableIsATemporaryCounter 这样华丽的名字。C 程序员会称那个变量
 为 “tmp”，这样写起来会更容易，而且至少不会令其难于理解。
 不过，虽然混用大小写的名字是不提倡使用的，但是全局变量还是需要一个具描述性的名字
 。称一个全局函数为 “foo” 是一个难以饶恕的错误。
 全局变量（只有当你真正需要它们的时候再用它）需要有一个具描述性的名字，就像全局函
 数。如果你有一个可以计算活动用户数量的函数，你应该叫它 “count_active_users()”
 或者类似的名字，你不应该叫它 “cntuser()”。
 在函数名中包含函数类型（所谓的匈牙利命名法）是脑子出了问题——编译器知道那些类型而
 且能够检查那些类型，这样做只能把程序员弄糊涂了。难怪微软总是制造出有问题的程序。
 本地变量名应该简短，而且能够表达相关的含义。如果你有一些随机的整数型的循环计数器
 ，它应该被称为 “i”。叫它 “loop_counter” 并无益处，如果它没有被误解的可能的话。
 类似的，“tmp” 可以用来称呼任意类型的临时变量。
 如果你怕混淆了你的本地变量名，你就遇到另一个问题了，叫做函数增长荷尔蒙失衡综合症
 。请看第六章（函数）。
 		第五章：Typedef
 不要使用类似 “vps_t” 之类的东西。
 对结构体和指针使用 typedef 是一个错误。当你在代码里看到：
 	vps_t a;
 这代表什么意思呢？
 相反，如果是这样
 	struct virtual_container *a;
 你就知道 “a” 是什么了。
 很多人认为 typedef “能提高可读性”。实际不是这样的。它们只在下列情况下有用：
 (a) 完全不透明的对象（这种情况下要主动使用 typedef 来隐藏这个对象实际上是什么）。
     例如：“pte_t” 等不透明对象，你只能用合适的访问函数来访问它们。
     注意！不透明性和“访问函数”本身是不好的。我们使用 pte_t 等类型的原因在于真的是
     完全没有任何共用的可访问信息。
 (b) 清楚的整数类型，如此，这层抽象就可以帮助消除到底是 “int” 还是 “long” 的混淆。
     u8/u16/u32 是完全没有问题的 typedef，不过它们更符合类别 (d) 而不是这里。
     再次注意！要这样做，必须事出有因。如果某个变量是 “unsigned long“，那么没有必要
 	typedef unsigned long myflags_t;
     不过如果有一个明确的原因，比如它在某种情况下可能会是一个 “unsigned int” 而在
     其他情况下可能为 “unsigned long”，那么就不要犹豫，请务必使用 typedef。
 (c) 当你使用sparse按字面的创建一个新类型来做类型检查的时候。
 (d) 和标准C99类型相同的类型，在某些例外的情况下。
     虽然让眼睛和脑筋来适应新的标准类型比如 “uint32_t” 不需要花很多时间，可是有些
     人仍然拒绝使用它们。
     因此，Linux 特有的等同于标准类型的 “u8/u16/u32/u64” 类型和它们的有符号类型是被
     允许的——尽管在你自己的新代码中，它们不是强制要求要使用的。
     当编辑已经使用了某个类型集的已有代码时，你应该遵循那些代码中已经做出的选择。
 (e) 可以在用户空间安全使用的类型。
     在某些用户空间可见的结构体里，我们不能要求C99类型而且不能用上面提到的 “u32”
     类型。因此，我们在与用户空间共享的所有结构体中使用 __u32 和类似的类型。
 可能还有其他的情况，不过基本的规则是永远不要使用 typedef，除非你可以明确的应用上
 述某个规则中的一个。
 总的来说，如果一个指针或者一个结构体里的元素可以合理的被直接访问到，那么它们就不
 应该是一个 typedef。
 		第六章：函数
 函数应该简短而漂亮，并且只完成一件事情。函数应该可以一屏或者两屏显示完（我们都知
 道 ISO/ANSI 屏幕大小是 80x24），只做一件事情，而且把它做好。
 一个函数的最大长度是和该函数的复杂度和缩进级数成反比的。所以，如果你有一个理论上
 很简单的只有一个很长（但是简单）的 case 语句的函数，而且你需要在每个 case 里做
 很多很小的事情，这样的函数尽管很长，但也是可以的。
 不过，如果你有一个复杂的函数，而且你怀疑一个天分不是很高的高中一年级学生可能甚至
 搞不清楚这个函数的目的，你应该严格的遵守前面提到的长度限制。使用辅助函数，并为之
 取个具描述性的名字（如果你觉得它们的性能很重要的话，可以让编译器内联它们，这样的
 效果往往会比你写一个复杂函数的效果要好。）
 函数的另外一个衡量标准是本地变量的数量。此数量不应超过 5－10 个，否则你的函数就有
 问题了。重新考虑一下你的函数，把它分拆成更小的函数。人的大脑一般可以轻松的同时跟
 踪 7 个不同的事物，如果再增多的话，就会糊涂了。即便你聪颖过人，你也可能会记不清你
 2 个星期前做过的事情。
 在源文件里，使用空行隔开不同的函数。如果该函数需要被导出，它的 EXPORT* 宏应该紧贴
 在它的结束大括号之下。比如：
 	int system_is_up(void)
 	{
 		return system_state == SYSTEM_RUNNING;
 	}
 	EXPORT_SYMBOL(system_is_up);
 在函数原型中，包含函数名和它们的数据类型。虽然C语言里没有这样的要求，在 Linux 里这
 是提倡的做法，因为这样可以很简单的给读者提供更多的有价值的信息。
 		第七章：集中的函数退出途径
 虽然被某些人声称已经过时，但是 goto 语句的等价物还是经常被编译器所使用，具体形式是
 无条件跳转指令。
 当一个函数从多个位置退出，并且需要做一些类似清理的常见操作时，goto 语句就很方便了。
 如果并不需要清理操作，那么直接 return 即可。
 理由是：
 - 无条件语句容易理解和跟踪
 - 嵌套程度减小
 - 可以避免由于修改时忘记更新某个单独的退出点而导致的错误
 - 减轻了编译器的工作，无需删除冗余代码;)
 	int fun(int a)
 	{
 		int result = 0;
 		char *buffer;
 		buffer = kmalloc(SIZE, GFP_KERNEL);
 		if (!buffer)
 			return -ENOMEM;
 		if (condition1) {
 			while (loop1) {
 				...
 			}
 			result = 1;
 			goto out_buffer;
 		}
 		...
 	out_buffer:
 		kfree(buffer);
 		return result;
 	}
 一个需要注意的常见错误是“一个 err 错误”，就像这样：
 	err:
 		kfree(foo->bar);
 		kfree(foo);
 		return ret;
 这段代码的错误是，在某些退出路径上 “foo” 是 NULL。通常情况下，通过把它分离成两个
 错误标签 “err_bar:” 和 “err_foo:” 来修复这个错误。
 		第八章：注释
 注释是好的，不过有过度注释的危险。永远不要在注释里解释你的代码是如何运作的：更好
 的做法是让别人一看你的代码就可以明白，解释写的很差的代码是浪费时间。
 一般的，你想要你的注释告诉别人你的代码做了什么，而不是怎么做的。也请你不要把注释
 放在一个函数体内部：如果函数复杂到你需要独立的注释其中的一部分，你很可能需要回到
 第六章看一看。你可以做一些小注释来注明或警告某些很聪明（或者槽糕）的做法，但不要
 加太多。你应该做的，是把注释放在函数的头部，告诉人们它做了什么，也可以加上它做这
 些事情的原因。
 当注释内核API函数时，请使用 kernel-doc 格式。请看
 Documentation/doc-guide/和scripts/kernel-doc 以获得详细信息。
 Linux的注释风格是 C89 “/* ... */” 风格。不要使用 C99 风格 “// ...” 注释。
 长（多行）的首选注释风格是：
 	/*
 	 * This is the preferred style for multi-line
 	 * comments in the Linux kernel source code.
 	 * Please use it consistently.
 	 *
 	 * Description:  A column of asterisks on the left side,
 	 * with beginning and ending almost-blank lines.
 	 */
 对于在 net/ 和 drivers/net/ 的文件，首选的长（多行）注释风格有些不同。
 	/* The preferred comment style for files in net/ and drivers/net
 	 * looks like this.
 	 *
 	 * It is nearly the same as the generally preferred comment style,
 	 * but there is no initial almost-blank line.
 	 */
 注释数据也是很重要的，不管是基本类型还是衍生类型。为了方便实现这一点，每一行应只
 声明一个数据（不要使用逗号来一次声明多个数据）。这样你就有空间来为每个数据写一段
 小注释来解释它们的用途了。
 		第九章：你已经把事情弄糟了
 这没什么，我们都是这样。可能你的使用了很长时间 Unix 的朋友已经告诉你 “GNU emacs” 能
 自动帮你格式化 C 源代码，而且你也注意到了，确实是这样，不过它所使用的默认值和我们
 想要的相去甚远（实际上，甚至比随机打的还要差——无数个猴子在 GNU emacs 里打字永远不
 会创造出一个好程序）（译注：请参考 Infinite Monkey Theorem）
 所以你要么放弃 GNU emacs，要么改变它让它使用更合理的设定。要采用后一个方案，你可
 以把下面这段粘贴到你的 .emacs 文件里。
 (defun c-lineup-arglist-tabs-only (ignored)
  "Line up argument lists by tabs, not spaces"
  (let* ((anchor (c-langelem-pos c-syntactic-element))
         (column (c-langelem-2nd-pos c-syntactic-element))
         (offset (- (1+ column) anchor))
         (steps (floor offset c-basic-offset)))
    (* (max steps 1)
       c-basic-offset)))
 (add-hook 'c-mode-common-hook
          (lambda ()
            ;; Add kernel style
            (c-add-style
             "linux-tabs-only"
             '("linux" (c-offsets-alist
                        (arglist-cont-nonempty
                         c-lineup-gcc-asm-reg
                         c-lineup-arglist-tabs-only))))))
 (add-hook 'c-mode-hook
          (lambda ()
            (let ((filename (buffer-file-name)))
              ;; Enable kernel mode for the appropriate files
              (when (and filename
                         (string-match (expand-file-name "~/src/linux-trees")
                                       filename))
                (setq indent-tabs-mode t)
                (setq show-trailing-whitespace t)
                (c-set-style "linux-tabs-only")))))
 这会让 emacs 在 ~/src/linux-trees 目录下的 C 源文件获得更好的内核代码风格。
 不过就算你尝试让 emacs 正确的格式化代码失败了，也并不意味着你失去了一切：还可以用
 “indent”。
 不过，GNU indent 也有和 GNU emacs 一样有问题的设定，所以你需要给它一些命令选项。不
 过，这还不算太糟糕，因为就算是 GNU indent 的作者也认同 K&R 的权威性（GNU 的人并不是
 坏人，他们只是在这个问题上被严重的误导了），所以你只要给 indent 指定选项 “-kr -i8”
 （代表 “K&R，8 个字符缩进”），或者使用 “scripts/Lindent”，这样就可以以最时髦的方式
 缩进源代码。
 “indent” 有很多选项，特别是重新格式化注释的时候，你可能需要看一下它的手册页。不过
 记住：“indent” 不能修正坏的编程习惯。
 		第十章：Kconfig 配置文件
 对于遍布源码树的所有 Kconfig* 配置文件来说，它们缩进方式与 C 代码相比有所不同。紧挨
 在 “config” 定义下面的行缩进一个制表符，帮助信息则再多缩进 2 个空格。比如：
 config AUDIT
 	bool "Auditing support"
 	depends on NET
 	help
 	  Enable auditing infrastructure that can be used with another
 	  kernel subsystem, such as SELinux (which requires this for
 	  logging of avc messages output).  Does not do system-call
 	  auditing without CONFIG_AUDITSYSCALL.
 而那些危险的功能（比如某些文件系统的写支持）应该在它们的提示字符串里显著的声明这
 一点：
 config ADFS_FS_RW
 	bool "ADFS write support (DANGEROUS)"
 	depends on ADFS_FS
 	...
 要查看配置文件的完整文档，请看 Documentation/kbuild/kconfig-language.txt。
 		第十一章：数据结构
 如果一个数据结构，在创建和销毁它的单线执行环境之外可见，那么它必须要有一个引用计
 数器。内核里没有垃圾收集（并且内核之外的垃圾收集慢且效率低下），这意味着你绝对需
 要记录你对这种数据结构的使用情况。
 引用计数意味着你能够避免上锁，并且允许多个用户并行访问这个数据结构——而不需要担心
 这个数据结构仅仅因为暂时不被使用就消失了，那些用户可能不过是沉睡了一阵或者做了一
 些其他事情而已。
 注意上锁不能取代引用计数。上锁是为了保持数据结构的一致性，而引用计数是一个内存管
 理技巧。通常二者都需要，不要把两个搞混了。
 很多数据结构实际上有2级引用计数，它们通常有不同“类”的用户。子类计数器统计子类用
 户的数量，每当子类计数器减至零时，全局计数器减一。
 这种“多级引用计数”的例子可以在内存管理（“struct mm_struct”：mm_users 和 mm_count）
 和文件系统（“struct super_block”：s_count和s_active）中找到。
 记住：如果另一个执行线索可以找到你的数据结构，但是这个数据结构没有引用计数器，这
 里几乎肯定是一个 bug。
 		第十二章：宏，枚举和RTL
 用于定义常量的宏的名字及枚举里的标签需要大写。
 #define CONSTANT 0x12345
 在定义几个相关的常量时，最好用枚举。
 宏的名字请用大写字母，不过形如函数的宏的名字可以用小写字母。
 一般的，如果能写成内联函数就不要写成像函数的宏。
 含有多个语句的宏应该被包含在一个 do-while 代码块里：
 	#define macrofun(a, b, c)			\
 		do {					\
 			if (a == 5)			\
 				do_this(b, c);		\
 		} while (0)
 使用宏的时候应避免的事情：
 1) 影响控制流程的宏：
 	#define FOO(x)					\
 		do {					\
 			if (blah(x) < 0)		\
 				return -EBUGGERED;	\
 		} while (0)
 非常不好。它看起来像一个函数，不过却能导致“调用”它的函数退出；不要打乱读者大脑里
 的语法分析器。
 2) 依赖于一个固定名字的本地变量的宏：
 	#define FOO(val) bar(index, val)
 可能看起来像是个不错的东西，不过它非常容易把读代码的人搞糊涂，而且容易导致看起来
 不相关的改动带来错误。
 3) 作为左值的带参数的宏： FOO(x) = y；如果有人把 FOO 变成一个内联函数的话，这种用
 法就会出错了。
 4) 忘记了优先级：使用表达式定义常量的宏必须将表达式置于一对小括号之内。带参数的
 宏也要注意此类问题。
 	#define CONSTANT 0x4000
 	#define CONSTEXP (CONSTANT | 3)
 5) 在宏里定义类似函数的本地变量时命名冲突：
 	#define FOO(x)				\
 	({					\
 		typeof(x) ret;			\
 		ret = calc_ret(x);		\
 		(ret);				\
 	})
 ret 是本地变量的通用名字 - __foo_ret 更不容易与一个已存在的变量冲突。
 cpp 手册对宏的讲解很详细。gcc internals 手册也详细讲解了 RTL（译注：register
 transfer language），内核里的汇编语言经常用到它。
 		第十三章：打印内核消息
 内核开发者应该是受过良好教育的。请一定注意内核信息的拼写，以给人以好的印象。不要
 用不规范的单词比如 “dont”，而要用 “do not”或者 “don't”。保证这些信息简单、明了、
 无歧义。
 内核信息不必以句号（译注：英文句号，即点）结束。
 在小括号里打印数字 (%d) 没有任何价值，应该避免这样做。
 <linux/device.h> 里有一些驱动模型诊断宏，你应该使用它们，以确保信息对应于正确的
 设备和驱动，并且被标记了正确的消息级别。这些宏有：dev_err()，dev_warn()，
 dev_info() 等等。对于那些不和某个特定设备相关连的信息，<linux/printk.h> 定义了
 pr_notice()，pr_info()，pr_warn()，pr_err() 和其他。
 写出好的调试信息可以是一个很大的挑战；一旦你写出后，这些信息在远程除错时能提供极大
 的帮助。然而打印调试信息的处理方式同打印非调试信息不同。其他 pr_XXX() 函数能无条件地
 打印，pr_debug() 却不；默认情况下它不会被编译，除非定义了 DEBUG 或设定了
 CONFIG_DYNAMIC_DEBUG。实际这同样是为了 dev_dbg()，一个相关约定是在一个已经开启了
 DEBUG 时，使用 VERBOSE_DEBUG 来添加 dev_vdbg()。
 许多子系统拥有 Kconfig 调试选项来开启 -DDEBUG 在对应的 Makefile 里面；在其他
 情况下，特殊文件使用 #define DEBUG。当一条调试信息需要被无条件打印时，例如，如果
 已经包含一个调试相关的 #ifdef 条件，printk(KERN_DEBUG ...) 就可被使用。
 		第十四章：分配内存
 内核提供了下面的一般用途的内存分配函数：
 kmalloc()，kzalloc()，kmalloc_array()，kcalloc()，vmalloc() 和 vzalloc()。
 请参考 API 文档以获取有关它们的详细信息。
 传递结构体大小的首选形式是这样的：
 	p = kmalloc(sizeof(*p), ...);
 另外一种传递方式中，sizeof 的操作数是结构体的名字，这样会降低可读性，并且可能会引
 入 bug。有可能指针变量类型被改变时，而对应的传递给内存分配函数的 sizeof 的结果不变。
 强制转换一个 void 指针返回值是多余的。C 语言本身保证了从 void 指针到其他任何指针类型
 的转换是没有问题的。
 分配一个数组的首选形式是这样的：
 	p = kmalloc_array(n, sizeof(...), ...);
 分配一个零长数组的首选形式是这样的：
 	p = kcalloc(n, sizeof(...), ...);
 两种形式检查分配大小 n * sizeof(...) 的溢出，如果溢出返回 NULL。
 		第十五章：内联弊病
 有一个常见的误解是内联函数是 gcc 提供的可以让代码运行更快的一个选项。虽然使用内联
 函数有时候是恰当的（比如作为一种替代宏的方式，请看第十二章），不过很多情况下不是
 这样。inline 关键字的过度使用会使内核变大，从而使整个系统运行速度变慢。因为大内核
 会占用更多的指令高速缓存（译注：一级缓存通常是指令缓存和数据缓存分开的）而且会导
 致 pagecache 的可用内存减少。想象一下，一次pagecache未命中就会导致一次磁盘寻址，
 将耗时 5 毫秒。5 毫秒的时间内 CPU 能执行很多很多指令。
 一个基本的原则是如果一个函数有 3 行以上，就不要把它变成内联函数。这个原则的一个例
 外是，如果你知道某个参数是一个编译时常量，而且因为这个常量你确定编译器在编译时能
 优化掉你的函数的大部分代码，那仍然可以给它加上 inline 关键字。kmalloc() 内联函数就
 是一个很好的例子。
 人们经常主张给 static 的而且只用了一次的函数加上 inline，如此不会有任何损失，因为没
 有什么好权衡的。虽然从技术上说这是正确的，但是实际上这种情况下即使不加 inline gcc
 也可以自动使其内联。而且其他用户可能会要求移除 inline，由此而来的争论会抵消 inline
 自身的潜在价值，得不偿失。
 		第十六章：函数返回值及命名
 函数可以返回很多种不同类型的值，最常见的一种是表明函数执行成功或者失败的值。这样
 的一个值可以表示为一个错误代码整数（-Exxx＝失败，0＝成功）或者一个“成功”布尔值（
 0＝失败，非0＝成功）。
 混合使用这两种表达方式是难于发现的 bug 的来源。如果 C 语言本身严格区分整形和布尔型变
 量，那么编译器就能够帮我们发现这些错误……不过 C 语言不区分。为了避免产生这种 bug，请
 遵循下面的惯例：
 	如果函数的名字是一个动作或者强制性的命令，那么这个函数应该返回错误代码整
 	数。如果是一个判断，那么函数应该返回一个“成功”布尔值。
 比如，“add work” 是一个命令，所以 add_work() 函数在成功时返回 0，在失败时返回 -EBUSY。
 类似的，因为 “PCI device present” 是一个判断，所以 pci_dev_present() 函数在成功找到
 一个匹配的设备时应该返回 1，如果找不到时应该返回 0。
 所有导出（译注：EXPORT）的函数都必须遵守这个惯例，所有的公共函数也都应该如此。私
 有（static）函数不需要如此，但是我们也推荐这样做。
 返回值是实际计算结果而不是计算是否成功的标志的函数不受此惯例的限制。一般的，他们
 通过返回一些正常值范围之外的结果来表示出错。典型的例子是返回指针的函数，他们使用
 NULL 或者 ERR_PTR 机制来报告错误。
 		第十七章：不要重新发明内核宏
 头文件 include/linux/kernel.h 包含了一些宏，你应该使用它们，而不要自己写一些它们的
 变种。比如，如果你需要计算一个数组的长度，使用这个宏
 	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 类似的，如果你要计算某结构体成员的大小，使用
 	#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 还有可以做严格的类型检查的 min() 和 max() 宏，如果你需要可以使用它们。你可以自己看看
 那个头文件里还定义了什么你可以拿来用的东西，如果有定义的话，你就不应在你的代码里
 自己重新定义。
 		第十八章：编辑器模式行和其他需要罗嗦的事情
 有一些编辑器可以解释嵌入在源文件里的由一些特殊标记标明的配置信息。比如，emacs
 能够解释被标记成这样的行：
 	-*- mode: c -*-
 或者这样的：
 	/*
 	Local Variables:
 	compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
 	End:
 	*/
 Vim 能够解释这样的标记：
 	/* vim:set sw=8 noet */
 不要在源代码中包含任何这样的内容。每个人都有他自己的编辑器配置，你的源文件不应
 该覆盖别人的配置。这包括有关缩进和模式配置的标记。人们可以使用他们自己定制的模
 式，或者使用其他可以产生正确的缩进的巧妙方法。
 		第十九章：内联汇编
 在特定架构的代码中，你也许需要内联汇编来使用 CPU 接口和平台相关功能。在需要
 这么做时，不要犹豫。然而，当 C 可以完成工作时，不要无端地使用内联汇编。如果
 可能，你可以并且应该用 C 和硬件交互。
 考虑去写通用一点的内联汇编作为简明的辅助函数，而不是重复写下它们的细节。记住
 内联汇编可以使用 C 参数。
 大而特殊的汇编函数应该放在 .S 文件中，对应 C 的原型定义在 C 头文件中。汇编
 函数的 C 原型应该使用 “asmlinkage”。
 你可能需要将你的汇编语句标记为 volatile，来阻止 GCC 在没发现任何副作用后就
 移除了它。你不必总是这样做，虽然，这样可以限制不必要的优化。
 在写一个包含多条指令的单个内联汇编语句时，把每条指令用引号字符串分离，并写在
 单独一行，在每个字符串结尾，除了 \n\t 结尾之外，在汇编输出中适当地缩进下
 一条指令：
 	asm ("magic %reg1, #42\n\t"
 	     "more_magic %reg2, %reg3"
 	     : /* outputs */ : /* inputs */ : /* clobbers */);
 		第二十章：条件编译
 只要可能，就不要在 .c 文件里面使用预处理条件；这样做让代码更难阅读并且逻辑难以
 跟踪。替代方案是，在头文件定义函数在这些 .c 文件中使用这类的条件表达式，提供空
 操作的桩版本（译注：桩程序，是指用来替换一部分功能的程序段）在 #else 情况下，
 再从 .c 文件中无条件地调用这些函数。编译器会避免生成任何桩调用的代码，产生一致
 的结果，但逻辑将更加清晰。
 宁可编译整个函数，而不是部分函数或部分表达式。而不是在一个表达式添加 ifdef，
 解析部分或全部表达式到一个单独的辅助函数，并应用条件到该函数内。
 如果你有一个在特定配置中可能是未使用的函数或变量，编译器将警告它定义了但未使用，
 标记这个定义为 __maybe_unused 而不是将它包含在一个预处理条件中。（然而，如果
 一个函数或变量总是未使用的，就直接删除它。）
 在代码中，可能的情况下，使用 IS_ENABLED 宏来转化某个 Kconfig 标记为 C 的布尔
 表达式，并在正常的 C 条件中使用它：
 	if (IS_ENABLED(CONFIG_SOMETHING)) {
 		...
 	}
 编译器会无条件地做常数合并，就像使用 #ifdef 那样，包含或排除代码块，所以这不会
 带来任何运行时开销。然而，这种方法依旧允许 C 编译器查看块内的代码，并检查它的正确
 性（语法，类型，符号引用，等等）。因此，如果条件不满足，代码块内的引用符号将不存在，
 你必须继续使用 #ifdef。
 在任何有意义的 #if 或 #ifdef 块的末尾（超过几行），在 #endif 同一行的后面写下
 注释，指出该条件表达式被使用。例如：
 	#ifdef CONFIG_SOMETHING
 	...
 	#endif /* CONFIG_SOMETHING */
 		附录 I：参考
 The C Programming Language, 第二版
 作者：Brian W. Kernighan 和 Denni M. Ritchie.
 Prentice Hall, Inc., 1988.
 ISBN 0-13-110362-8 (软皮), 0-13-110370-9 (硬皮).
 The Practice of Programming
 作者：Brian W. Kernighan 和 Rob Pike.
 Addison-Wesley, Inc., 1999.
 ISBN 0-201-61586-X.
 GNU 手册 - 遵循 K&R 标准和此文本 - cpp, gcc, gcc internals and indent,
 都可以从 http://www.gnu.org/manual/ 找到
 WG14是C语言的国际标准化工作组，URL: http://www.open-std.org/JTC1/SC22/WG14/
 Kernel process/coding-style.rst，作者 greg@kroah.com 发表于OLS 2002：
 http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
--- a/Documentation/translations/zh_CN/coding-style.rst
+++ b/Documentation/translations/zh_CN/coding-style.rst
@ -0,0 +1,950 @@
 Chinese translated version of Documentation/process/coding-style.rst
 If you have any comment or update to the content, please post to LKML directly.
 However, if you have problem communicating in English you can also ask the
 Chinese maintainer for help.  Contact the Chinese maintainer, if this
 translation is outdated or there is problem with translation.
 Chinese maintainer: Zhang Le <r0bertz@gentoo.org>
 ---------------------------------------------------------------------
 Documentation/process/coding-style.rst 的中文翻译
 如果想评论或更新本文的内容，请直接发信到LKML。如果你使用英文交流有困难的话，
 也可以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题，请联系中文版
 维护者::
  中文版维护者： 张乐 Zhang Le <r0bertz@gentoo.org>
  中文版翻译者： 张乐 Zhang Le <r0bertz@gentoo.org>
  中文版校译者： 王聪 Wang Cong <xiyou.wangcong@gmail.com>
                 wheelz <kernel.zeng@gmail.com>
                 管旭东 Xudong Guan <xudong.guan@gmail.com>
                 Li Zefan <lizf@cn.fujitsu.com>
                 Wang Chen <wangchen@cn.fujitsu.com>
 以下为正文
 ---------------------------------------------------------------------
 Linux 内核代码风格
 =========================
 这是一个简短的文档，描述了 linux 内核的首选代码风格。代码风格是因人而异的，
 而且我不愿意把自己的观点强加给任何人，但这就像我去做任何事情都必须遵循的原则
 那样，我也希望在绝大多数事上保持这种的态度。请 (在写代码时) 至少考虑一下这里
 的代码风格。
 首先，我建议你打印一份 GNU 代码规范，然后不要读。烧了它，这是一个具有重大象征
 性意义的动作。
 不管怎样，现在我们开始：
 1) 缩进
 --------------
 制表符是 8 个字符，所以缩进也是 8 个字符。有些异端运动试图将缩进变为 4 (甚至
 2！) 字符深，这几乎相当于尝试将圆周率的值定义为 3。
 理由：缩进的全部意义就在于清楚的定义一个控制块起止于何处。尤其是当你盯着你的
 屏幕连续看了 20 小时之后，你将会发现大一点的缩进会使你更容易分辨缩进。
 现在，有些人会抱怨 8 个字符的缩进会使代码向右边移动的太远，在 80 个字符的终端
 屏幕上就很难读这样的代码。这个问题的答案是，如果你需要 3 级以上的缩进，不管用
 何种方式你的代码已经有问题了，应该修正你的程序。
 简而言之，8 个字符的缩进可以让代码更容易阅读，还有一个好处是当你的函数嵌套太
 深的时候可以给你警告。留心这个警告。
 在 switch 语句中消除多级缩进的首选的方式是让 ``switch`` 和从属于它的 ``case``
 标签对齐于同一列，而不要 ``两次缩进`` ``case`` 标签。比如：
 .. code-block:: c
 	switch (suffix) {
 	case 'G':
 	case 'g':
 		mem <<= 30;
 		break;
 	case 'M':
 	case 'm':
 		mem <<= 20;
 		break;
 	case 'K':
 	case 'k':
 		mem <<= 10;
 		/* fall through */
 	default:
 		break;
 	}
 不要把多个语句放在一行里，除非你有什么东西要隐藏：
 .. code-block:: c
 	if (condition) do_this;
 	  do_something_everytime;
 也不要在一行里放多个赋值语句。内核代码风格超级简单。就是避免可能导致别人误读
 的表达式。
 除了注释、文档和 Kconfig 之外，不要使用空格来缩进，前面的例子是例外，是有意为
 之。
 选用一个好的编辑器，不要在行尾留空格。
 2) 把长的行和字符串打散
 ------------------------------
 代码风格的意义就在于使用平常使用的工具来维持代码的可读性和可维护性。
 每一行的长度的限制是 80 列，我们强烈建议您遵守这个惯例。
 长于 80 列的语句要打散成有意义的片段。除非超过 80 列能显著增加可读性，并且不
 会隐藏信息。子片段要明显短于母片段，并明显靠右。这同样适用于有着很长参数列表
 的函数头。然而，绝对不要打散对用户可见的字符串，例如 printk 信息，因为这样就
 很难对它们 grep。
 3) 大括号和空格的放置
 ------------------------------
 C 语言风格中另外一个常见问题是大括号的放置。和缩进大小不同，选择或弃用某种放
 置策略并没有多少技术上的原因，不过首选的方式，就像 Kernighan 和 Ritchie 展示
 给我们的，是把起始大括号放在行尾，而把结束大括号放在行首，所以：
 .. code-block:: c
 	if (x is true) {
 		we do y
 	}
 这适用于所有的非函数语句块 (if, switch, for, while, do)。比如：
 .. code-block:: c
 	switch (action) {
 	case KOBJ_ADD:
 		return "add";
 	case KOBJ_REMOVE:
 		return "remove";
 	case KOBJ_CHANGE:
 		return "change";
 	default:
 		return NULL;
 	}
 不过，有一个例外，那就是函数：函数的起始大括号放置于下一行的开头，所以：
 .. code-block:: c
 	int function(int x)
 	{
 		body of function
 	}
 全世界的异端可能会抱怨这个不一致性是... 呃... 不一致的，不过所有思维健全的人
 都知道 (a) K&R 是 **正确的** 并且 (b) K&R 是正确的。此外，不管怎样函数都是特
 殊的 (C 函数是不能嵌套的)。
 注意结束大括号独自占据一行，除非它后面跟着同一个语句的剩余部分，也就是 do 语
 句中的 "while" 或者 if 语句中的 "else"，像这样：
 .. code-block:: c
 	do {
 		body of do-loop
 	} while (condition);
 和
 .. code-block:: c
 	if (x == y) {
 		..
 	} else if (x > y) {
 		...
 	} else {
 		....
 	}
 理由：K&R。
 也请注意这种大括号的放置方式也能使空 (或者差不多空的) 行的数量最小化，同时不
 失可读性。因此，由于你的屏幕上的新行是不可再生资源 (想想 25 行的终端屏幕)，你
 将会有更多的空行来放置注释。
 当只有一个单独的语句的时候，不用加不必要的大括号。
 .. code-block:: c
 	if (condition)
 		action();
 和
 .. code-block:: c
 	if (condition)
 		do_this();
 	else
 		do_that();
 这并不适用于只有一个条件分支是单语句的情况；这时所有分支都要使用大括号：
 .. code-block:: c
 	if (condition) {
 		do_this();
 		do_that();
 	} else {
 		otherwise();
 	}
 3.1) 空格
 ********************
 Linux 内核的空格使用方式 (主要) 取决于它是用于函数还是关键字。(大多数) 关键字
 后要加一个空格。值得注意的例外是 sizeof, typeof, alignof 和 __attribute__，这
 些关键字某些程度上看起来更像函数 (它们在 Linux 里也常常伴随小括号而使用，尽管
 在 C 里这样的小括号不是必需的，就像 ``struct fileinfo info;`` 声明过后的
 ``sizeof info``)。
 所以在这些关键字之后放一个空格::
 	if, switch, case, for, do, while
 但是不要在 sizeof, typeof, alignof 或者 __attribute__ 这些关键字之后放空格。
 例如，
 .. code-block:: c
 	s = sizeof(struct file);
 不要在小括号里的表达式两侧加空格。这是一个 **反例** ：
 .. code-block:: c
 	s = sizeof( struct file );
 当声明指针类型或者返回指针类型的函数时， ``*`` 的首选使用方式是使之靠近变量名
 或者函数名，而不是靠近类型名。例子：
 .. code-block:: c
 	char *linux_banner;
 	unsigned long long memparse(char *ptr, char **retptr);
 	char *match_strdup(substring_t *s);
 在大多数二元和三元操作符两侧使用一个空格，例如下面所有这些操作符::
 	=  +  -  <  >  *  /  %  |  &  ^  <=  >=  ==  !=  ?  :
 但是一元操作符后不要加空格::
 	&  *  +  -  ~  !  sizeof  typeof  alignof  __attribute__  defined
 后缀自加和自减一元操作符前不加空格::
 	++  --
 前缀自加和自减一元操作符后不加空格::
 	++  --
 ``.`` 和 ``->`` 结构体成员操作符前后不加空格。
 不要在行尾留空白。有些可以自动缩进的编辑器会在新行的行首加入适量的空白，然后
 你就可以直接在那一行输入代码。不过假如你最后没有在那一行输入代码，有些编辑器
 就不会移除已经加入的空白，就像你故意留下一个只有空白的行。包含行尾空白的行就
 这样产生了。
 当 git 发现补丁包含了行尾空白的时候会警告你，并且可以应你的要求去掉行尾空白；
 不过如果你是正在打一系列补丁，这样做会导致后面的补丁失败，因为你改变了补丁的
 上下文。
 4) 命名
 ------------------------------
 C 是一个简朴的语言，你的命名也应该这样。和 Modula-2 和 Pascal 程序员不同，
 C 程序员不使用类似 ThisVariableIsATemporaryCounter 这样华丽的名字。C 程序员会
 称那个变量为 ``tmp`` ，这样写起来会更容易，而且至少不会令其难于理解。
 不过，虽然混用大小写的名字是不提倡使用的，但是全局变量还是需要一个具描述性的
 名字。称一个全局函数为 ``foo`` 是一个难以饶恕的错误。
 全局变量 (只有当你 **真正** 需要它们的时候再用它) 需要有一个具描述性的名字，就
 像全局函数。如果你有一个可以计算活动用户数量的函数，你应该叫它
 ``count_active_users()`` 或者类似的名字，你不应该叫它 ``cntuser()`` 。
 在函数名中包含函数类型 (所谓的匈牙利命名法) 是脑子出了问题——编译器知道那些类
 型而且能够检查那些类型，这样做只能把程序员弄糊涂了。难怪微软总是制造出有问题
 的程序。
 本地变量名应该简短，而且能够表达相关的含义。如果你有一些随机的整数型的循环计
 数器，它应该被称为 ``i`` 。叫它 ``loop_counter`` 并无益处，如果它没有被误解的
 可能的话。类似的， ``tmp`` 可以用来称呼任意类型的临时变量。
 如果你怕混淆了你的本地变量名，你就遇到另一个问题了，叫做函数增长荷尔蒙失衡综
 合症。请看第六章 (函数)。
 5) Typedef
 -----------
 不要使用类似 ``vps_t`` 之类的东西。
 对结构体和指针使用 typedef 是一个 **错误** 。当你在代码里看到：
 .. code-block:: c
 	vps_t a;
 这代表什么意思呢？
 相反，如果是这样
 .. code-block:: c
 	struct virtual_container *a;
 你就知道 ``a`` 是什么了。
 很多人认为 typedef ``能提高可读性`` 。实际不是这样的。它们只在下列情况下有用：
 (a) 完全不透明的对象 (这种情况下要主动使用 typedef 来 **隐藏** 这个对象实际上
     是什么)。
     例如： ``pte_t`` 等不透明对象，你只能用合适的访问函数来访问它们。
     .. note::
       不透明性和 "访问函数" 本身是不好的。我们使用 pte_t 等类型的原因在于真
       的是完全没有任何共用的可访问信息。
 (b) 清楚的整数类型，如此，这层抽象就可以 **帮助** 消除到底是 ``int`` 还是
     ``long`` 的混淆。
     u8/u16/u32 是完全没有问题的 typedef，不过它们更符合类别 (d) 而不是这里。
     .. note::
       要这样做，必须事出有因。如果某个变量是 ``unsigned long`` ，那么没有必要
 	typedef unsigned long myflags_t;
     不过如果有一个明确的原因，比如它在某种情况下可能会是一个 ``unsigned int``
     而在其他情况下可能为 ``unsigned long`` ，那么就不要犹豫，请务必使用
     typedef。
 (c) 当你使用 sparse 按字面的创建一个 **新** 类型来做类型检查的时候。
 (d) 和标准 C99 类型相同的类型，在某些例外的情况下。
     虽然让眼睛和脑筋来适应新的标准类型比如 ``uint32_t`` 不需要花很多时间，可
     是有些人仍然拒绝使用它们。
     因此，Linux 特有的等同于标准类型的 ``u8/u16/u32/u64`` 类型和它们的有符号
     类型是被允许的——尽管在你自己的新代码中，它们不是强制要求要使用的。
     当编辑已经使用了某个类型集的已有代码时，你应该遵循那些代码中已经做出的选
     择。
 (e) 可以在用户空间安全使用的类型。
     在某些用户空间可见的结构体里，我们不能要求 C99 类型而且不能用上面提到的
     ``u32`` 类型。因此，我们在与用户空间共享的所有结构体中使用 __u32 和类似
     的类型。
 可能还有其他的情况，不过基本的规则是 **永远不要** 使用 typedef，除非你可以明
 确的应用上述某个规则中的一个。
 总的来说，如果一个指针或者一个结构体里的元素可以合理的被直接访问到，那么它们
 就不应该是一个 typedef。
 6) 函数
 ------------------------------
 函数应该简短而漂亮，并且只完成一件事情。函数应该可以一屏或者两屏显示完 (我们
 都知道 ISO/ANSI 屏幕大小是 80x24)，只做一件事情，而且把它做好。
 一个函数的最大长度是和该函数的复杂度和缩进级数成反比的。所以，如果你有一个理
 论上很简单的只有一个很长 (但是简单) 的 case 语句的函数，而且你需要在每个 case
 里做很多很小的事情，这样的函数尽管很长，但也是可以的。
 不过，如果你有一个复杂的函数，而且你怀疑一个天分不是很高的高中一年级学生可能
 甚至搞不清楚这个函数的目的，你应该严格遵守前面提到的长度限制。使用辅助函数，
 并为之取个具描述性的名字 (如果你觉得它们的性能很重要的话，可以让编译器内联它
 们，这样的效果往往会比你写一个复杂函数的效果要好。)
 函数的另外一个衡量标准是本地变量的数量。此数量不应超过 5－10 个，否则你的函数
 就有问题了。重新考虑一下你的函数，把它分拆成更小的函数。人的大脑一般可以轻松
 的同时跟踪 7 个不同的事物，如果再增多的话，就会糊涂了。即便你聪颖过人，你也可
 能会记不清你 2 个星期前做过的事情。
 在源文件里，使用空行隔开不同的函数。如果该函数需要被导出，它的 **EXPORT** 宏
 应该紧贴在它的结束大括号之下。比如：
 .. code-block:: c
 	int system_is_up(void)
 	{
 		return system_state == SYSTEM_RUNNING;
 	}
 	EXPORT_SYMBOL(system_is_up);
 在函数原型中，包含函数名和它们的数据类型。虽然 C 语言里没有这样的要求，在
 Linux 里这是提倡的做法，因为这样可以很简单的给读者提供更多的有价值的信息。
 7) 集中的函数退出途径
 ------------------------------
 虽然被某些人声称已经过时，但是 goto 语句的等价物还是经常被编译器所使用，具体
 形式是无条件跳转指令。
 当一个函数从多个位置退出，并且需要做一些类似清理的常见操作时，goto 语句就很方
 便了。如果并不需要清理操作，那么直接 return 即可。
 选择一个能够说明 goto 行为或它为何存在的标签名。如果 goto 要释放 ``buffer``,
 一个不错的名字可以是 ``out_free_buffer:`` 。别去使用像 ``err1:`` 和 ``err2:``
 这样的GW_BASIC 名称，因为一旦你添加或删除了 (函数的) 退出路径，你就必须对它们
 重新编号，这样会难以去检验正确性。
 使用 goto 的理由是：
 - 无条件语句容易理解和跟踪
 - 嵌套程度减小
 - 可以避免由于修改时忘记更新个别的退出点而导致错误
 - 让编译器省去删除冗余代码的工作 ;)
 .. code-block:: c
 	int fun(int a)
 	{
 		int result = 0;
 		char *buffer;
 		buffer = kmalloc(SIZE, GFP_KERNEL);
 		if (!buffer)
 			return -ENOMEM;
 		if (condition1) {
 			while (loop1) {
 				...
 			}
 			result = 1;
 			goto out_free_buffer;
 		}
 		...
 	out_free_buffer:
 		kfree(buffer);
 		return result;
 	}
 一个需要注意的常见错误是 ``一个 err 错误`` ，就像这样：
 .. code-block:: c
 	err:
 		kfree(foo->bar);
 		kfree(foo);
 		return ret;
 这段代码的错误是，在某些退出路径上 ``foo`` 是 NULL。通常情况下，通过把它分离
 成两个错误标签 ``err_free_bar:`` 和 ``err_free_foo:`` 来修复这个错误：
 .. code-block:: c
 	 err_free_bar:
 		kfree(foo->bar);
 	 err_free_foo:
 		kfree(foo);
 		return ret;
 理想情况下，你应该模拟错误来测试所有退出路径。
 8) 注释
 ------------------------------
 注释是好的，不过有过度注释的危险。永远不要在注释里解释你的代码是如何运作的：
 更好的做法是让别人一看你的代码就可以明白，解释写的很差的代码是浪费时间。
 一般的，你想要你的注释告诉别人你的代码做了什么，而不是怎么做的。也请你不要把
 注释放在一个函数体内部：如果函数复杂到你需要独立的注释其中的一部分，你很可能
 需要回到第六章看一看。你可以做一些小注释来注明或警告某些很聪明 (或者槽糕) 的
 做法，但不要加太多。你应该做的，是把注释放在函数的头部，告诉人们它做了什么，
 也可以加上它做这些事情的原因。
 当注释内核 API 函数时，请使用 kernel-doc 格式。请看
 Documentation/doc-guide/ 和 scripts/kernel-doc 以获得详细信息。
 长 (多行) 注释的首选风格是：
 .. code-block:: c
 	/*
 	 * This is the preferred style for multi-line
 	 * comments in the Linux kernel source code.
 	 * Please use it consistently.
 	 *
 	 * Description:  A column of asterisks on the left side,
 	 * with beginning and ending almost-blank lines.
 	 */
 对于在 net/ 和 drivers/net/ 的文件，首选的长 (多行) 注释风格有些不同。
 .. code-block:: c
 	/* The preferred comment style for files in net/ and drivers/net
 	 * looks like this.
 	 *
 	 * It is nearly the same as the generally preferred comment style,
 	 * but there is no initial almost-blank line.
 	 */
 注释数据也是很重要的，不管是基本类型还是衍生类型。为了方便实现这一点，每一行
 应只声明一个数据 (不要使用逗号来一次声明多个数据)。这样你就有空间来为每个数据
 写一段小注释来解释它们的用途了。
 9) 你已经把事情弄糟了
 ------------------------------
 这没什么，我们都是这样。可能你的使用了很长时间 Unix 的朋友已经告诉你
 ``GNU emacs`` 能自动帮你格式化 C 源代码，而且你也注意到了，确实是这样，不过它
 所使用的默认值和我们想要的相去甚远 (实际上，甚至比随机打的还要差——无数个猴子
 在 GNU emacs 里打字永远不会创造出一个好程序) (译注：Infinite Monkey Theorem)
 所以你要么放弃 GNU emacs，要么改变它让它使用更合理的设定。要采用后一个方案，
 你可以把下面这段粘贴到你的 .emacs 文件里。
 .. code-block:: none
  (defun c-lineup-arglist-tabs-only (ignored)
    "Line up argument lists by tabs, not spaces"
    (let* ((anchor (c-langelem-pos c-syntactic-element))
           (column (c-langelem-2nd-pos c-syntactic-element))
           (offset (- (1+ column) anchor))
           (steps (floor offset c-basic-offset)))
      (* (max steps 1)
         c-basic-offset)))
  (add-hook 'c-mode-common-hook
            (lambda ()
              ;; Add kernel style
              (c-add-style
               "linux-tabs-only"
               '("linux" (c-offsets-alist
                          (arglist-cont-nonempty
                           c-lineup-gcc-asm-reg
                           c-lineup-arglist-tabs-only))))))
  (add-hook 'c-mode-hook
            (lambda ()
              (let ((filename (buffer-file-name)))
                ;; Enable kernel mode for the appropriate files
                (when (and filename
                           (string-match (expand-file-name "~/src/linux-trees")
                                         filename))
                  (setq indent-tabs-mode t)
                  (setq show-trailing-whitespace t)
                  (c-set-style "linux-tabs-only")))))
 这会让 emacs 在 ``~/src/linux-trees`` 下的 C 源文件获得更好的内核代码风格。
 不过就算你尝试让 emacs 正确的格式化代码失败了，也并不意味着你失去了一切：还可
 以用 ``indent`` 。
 不过，GNU indent 也有和 GNU emacs 一样有问题的设定，所以你需要给它一些命令选
 项。不过，这还不算太糟糕，因为就算是 GNU indent 的作者也认同 K&R 的权威性
 (GNU 的人并不是坏人，他们只是在这个问题上被严重的误导了)，所以你只要给 indent
 指定选项 ``-kr -i8`` (代表 ``K&R，8 字符缩进``)，或使用 ``scripts/Lindent``
 这样就可以以最时髦的方式缩进源代码。
 ``indent`` 有很多选项，特别是重新格式化注释的时候，你可能需要看一下它的手册。
 不过记住： ``indent`` 不能修正坏的编程习惯。
 10) Kconfig 配置文件
 ------------------------------
 对于遍布源码树的所有 Kconfig* 配置文件来说，它们缩进方式有所不同。紧挨着
 ``config`` 定义的行，用一个制表符缩进，然而 help 信息的缩进则额外增加 2 个空
 格。举个例子::
  config AUDIT
 	bool "Auditing support"
 	depends on NET
 	help
 	  Enable auditing infrastructure that can be used with another
 	  kernel subsystem, such as SELinux (which requires this for
 	  logging of avc messages output).  Does not do system-call
 	  auditing without CONFIG_AUDITSYSCALL.
 而那些危险的功能 (比如某些文件系统的写支持) 应该在它们的提示字符串里显著的声
 明这一点::
  config ADFS_FS_RW
 	bool "ADFS write support (DANGEROUS)"
 	depends on ADFS_FS
 	...
 要查看配置文件的完整文档，请看 Documentation/kbuild/kconfig-language.txt。
 11) 数据结构
 ------------------------------
 如果一个数据结构，在创建和销毁它的单线执行环境之外可见，那么它必须要有一个引
 用计数器。内核里没有垃圾收集 (并且内核之外的垃圾收集慢且效率低下)，这意味着你
 绝对需要记录你对这种数据结构的使用情况。
 引用计数意味着你能够避免上锁，并且允许多个用户并行访问这个数据结构——而不需要
 担心这个数据结构仅仅因为暂时不被使用就消失了，那些用户可能不过是沉睡了一阵或
 者做了一些其他事情而已。
 注意上锁 **不能** 取代引用计数。上锁是为了保持数据结构的一致性，而引用计数是一
 个内存管理技巧。通常二者都需要，不要把两个搞混了。
 很多数据结构实际上有 2 级引用计数，它们通常有不同 ``类`` 的用户。子类计数器统
 计子类用户的数量，每当子类计数器减至零时，全局计数器减一。
 这种 ``多级引用计数`` 的例子可以在内存管理 (``struct mm_struct``: mm_users 和
 mm_count)，和文件系统 (``struct super_block``: s_count 和 s_active) 中找到。
 记住：如果另一个执行线索可以找到你的数据结构，但这个数据结构没有引用计数器，
 这里几乎肯定是一个 bug。
 12) 宏，枚举和RTL
 ------------------------------
 用于定义常量的宏的名字及枚举里的标签需要大写。
 .. code-block:: c
 	#define CONSTANT 0x12345
 在定义几个相关的常量时，最好用枚举。
 宏的名字请用大写字母，不过形如函数的宏的名字可以用小写字母。
 一般的，如果能写成内联函数就不要写成像函数的宏。
 含有多个语句的宏应该被包含在一个 do-while 代码块里：
 .. code-block:: c
 	#define macrofun(a, b, c)			\
 		do {					\
 			if (a == 5)			\
 				do_this(b, c);		\
 		} while (0)
 使用宏的时候应避免的事情：
 1) 影响控制流程的宏：
 .. code-block:: c
 	#define FOO(x)					\
 		do {					\
 			if (blah(x) < 0)		\
 				return -EBUGGERED;	\
 		} while (0)
 **非常** 不好。它看起来像一个函数，不过却能导致 ``调用`` 它的函数退出；不要打
 乱读者大脑里的语法分析器。
 2) 依赖于一个固定名字的本地变量的宏：
 .. code-block:: c
 	#define FOO(val) bar(index, val)
 可能看起来像是个不错的东西，不过它非常容易把读代码的人搞糊涂，而且容易导致看起
 来不相关的改动带来错误。
 3) 作为左值的带参数的宏： FOO(x) = y；如果有人把 FOO 变成一个内联函数的话，这
   种用法就会出错了。
 4) 忘记了优先级：使用表达式定义常量的宏必须将表达式置于一对小括号之内。带参数
   的宏也要注意此类问题。
 .. code-block:: c
 	#define CONSTANT 0x4000
 	#define CONSTEXP (CONSTANT | 3)
 5) 在宏里定义类似函数的本地变量时命名冲突：
 .. code-block:: c
 	#define FOO(x)				\
 	({					\
 		typeof(x) ret;			\
 		ret = calc_ret(x);		\
 		(ret);				\
 	})
 ret 是本地变量的通用名字 - __foo_ret 更不容易与一个已存在的变量冲突。
 cpp 手册对宏的讲解很详细。gcc internals 手册也详细讲解了 RTL，内核里的汇编语
 言经常用到它。
 13) 打印内核消息
 ------------------------------
 内核开发者应该是受过良好教育的。请一定注意内核信息的拼写，以给人以好的印象。
 不要用不规范的单词比如 ``dont``，而要用 ``do not`` 或者 ``don't`` 。保证这些信
 息简单明了,无歧义。
 内核信息不必以英文句号结束。
 在小括号里打印数字 (%d) 没有任何价值，应该避免这样做。
 <linux/device.h> 里有一些驱动模型诊断宏，你应该使用它们，以确保信息对应于正确
 的设备和驱动，并且被标记了正确的消息级别。这些宏有：dev_err(), dev_warn(),
 dev_info() 等等。对于那些不和某个特定设备相关连的信息，<linux/printk.h> 定义
 了 pr_notice(), pr_info(), pr_warn(), pr_err() 和其他。
 写出好的调试信息可以是一个很大的挑战；一旦你写出后，这些信息在远程除错时能提
 供极大的帮助。然而打印调试信息的处理方式同打印非调试信息不同。其他 pr_XXX()
 函数能无条件地打印，pr_debug() 却不；默认情况下它不会被编译，除非定义了 DEBUG
 或设定了 CONFIG_DYNAMIC_DEBUG。实际这同样是为了 dev_dbg()，一个相关约定是在一
 个已经开启了 DEBUG 时，使用 VERBOSE_DEBUG 来添加 dev_vdbg()。
 许多子系统拥有 Kconfig 调试选项来开启 -DDEBUG 在对应的 Makefile 里面；在其他
 情况下，特殊文件使用 #define DEBUG。当一条调试信息需要被无条件打印时，例如，
 如果已经包含一个调试相关的 #ifdef 条件，printk(KERN_DEBUG ...) 就可被使用。
 14) 分配内存
 ------------------------------
 内核提供了下面的一般用途的内存分配函数：
 kmalloc(), kzalloc(), kmalloc_array(), kcalloc(), vmalloc() 和 vzalloc()。
 请参考 API 文档以获取有关它们的详细信息。
 传递结构体大小的首选形式是这样的：
 .. code-block:: c
 	p = kmalloc(sizeof(*p), ...);
 另外一种传递方式中，sizeof 的操作数是结构体的名字，这样会降低可读性，并且可能
 会引入 bug。有可能指针变量类型被改变时，而对应的传递给内存分配函数的 sizeof
 的结果不变。
 强制转换一个 void 指针返回值是多余的。C 语言本身保证了从 void 指针到其他任何
 指针类型的转换是没有问题的。
 分配一个数组的首选形式是这样的：
 .. code-block:: c
 	p = kmalloc_array(n, sizeof(...), ...);
 分配一个零长数组的首选形式是这样的：
 .. code-block:: c
 	p = kcalloc(n, sizeof(...), ...);
 两种形式检查分配大小 n * sizeof(...) 的溢出，如果溢出返回 NULL。
 15) 内联弊病
 ------------------------------
 有一个常见的误解是 ``内联`` 是 gcc 提供的可以让代码运行更快的一个选项。虽然使
 用内联函数有时候是恰当的 (比如作为一种替代宏的方式，请看第十二章)，不过很多情
 况下不是这样。inline 的过度使用会使内核变大，从而使整个系统运行速度变慢。
 因为体积大内核会占用更多的指令高速缓存，而且会导致 pagecache 的可用内存减少。
 想象一下，一次 pagecache 未命中就会导致一次磁盘寻址，将耗时 5 毫秒。5 毫秒的
 时间内 CPU 能执行很多很多指令。
 一个基本的原则是如果一个函数有 3 行以上，就不要把它变成内联函数。这个原则的一
 个例外是，如果你知道某个参数是一个编译时常量，而且因为这个常量你确定编译器在
 编译时能优化掉你的函数的大部分代码，那仍然可以给它加上 inline 关键字。
 kmalloc() 内联函数就是一个很好的例子。
 人们经常主张给 static 的而且只用了一次的函数加上 inline，如此不会有任何损失，
 因为没有什么好权衡的。虽然从技术上说这是正确的，但是实际上这种情况下即使不加
 inline gcc 也可以自动使其内联。而且其他用户可能会要求移除 inline，由此而来的
 争论会抵消 inline 自身的潜在价值，得不偿失。
 16) 函数返回值及命名
 ------------------------------
 函数可以返回多种不同类型的值，最常见的一种是表明函数执行成功或者失败的值。这样
 的一个值可以表示为一个错误代码整数 (-Exxx＝失败，0＝成功) 或者一个 ``成功``
 布尔值 (0＝失败，非0＝成功)。
 混合使用这两种表达方式是难于发现的 bug 的来源。如果 C 语言本身严格区分整形和
 布尔型变量，那么编译器就能够帮我们发现这些错误... 不过 C 语言不区分。为了避免
 产生这种 bug，请遵循下面的惯例::
 	如果函数的名字是一个动作或者强制性的命令，那么这个函数应该返回错误代
 	码整数。如果是一个判断，那么函数应该返回一个 "成功" 布尔值。
 比如， ``add work`` 是一个命令，所以 add_work() 在成功时返回 0，在失败时返回
 -EBUSY。类似的，因为 ``PCI device present`` 是一个判断，所以 pci_dev_present()
 在成功找到一个匹配的设备时应该返回 1，如果找不到时应该返回 0。
 所有 EXPORTed 函数都必须遵守这个惯例，所有的公共函数也都应该如此。私有
 (static) 函数不需要如此，但是我们也推荐这样做。
 返回值是实际计算结果而不是计算是否成功的标志的函数不受此惯例的限制。一般的，
 他们通过返回一些正常值范围之外的结果来表示出错。典型的例子是返回指针的函数，
 他们使用 NULL 或者 ERR_PTR 机制来报告错误。
 17) 不要重新发明内核宏
 ------------------------------
 头文件 include/linux/kernel.h 包含了一些宏，你应该使用它们，而不要自己写一些
 它们的变种。比如，如果你需要计算一个数组的长度，使用这个宏
 .. code-block:: c
 	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 类似的，如果你要计算某结构体成员的大小，使用
 .. code-block:: c
 	#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 还有可以做严格的类型检查的 min() 和 max() 宏，如果你需要可以使用它们。你可以
 自己看看那个头文件里还定义了什么你可以拿来用的东西，如果有定义的话，你就不应
 在你的代码里自己重新定义。
 18) 编辑器模式行和其他需要罗嗦的事情
 --------------------------------------------------
 有一些编辑器可以解释嵌入在源文件里的由一些特殊标记标明的配置信息。比如，emacs
 能够解释被标记成这样的行：
 .. code-block:: c
 	-*- mode: c -*-
 或者这样的：
 .. code-block:: c
 	/*
 	Local Variables:
 	compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
 	End:
 	*/
 Vim 能够解释这样的标记：
 .. code-block:: c
 	/* vim:set sw=8 noet */
 不要在源代码中包含任何这样的内容。每个人都有他自己的编辑器配置，你的源文件不
 应该覆盖别人的配置。这包括有关缩进和模式配置的标记。人们可以使用他们自己定制
 的模式，或者使用其他可以产生正确的缩进的巧妙方法。
 19) 内联汇编
 ------------------------------
 在特定架构的代码中，你可能需要内联汇编与 CPU 和平台相关功能连接。需要这么做时
 就不要犹豫。然而，当 C 可以完成工作时，不要平白无故地使用内联汇编。在可能的情
 况下，你可以并且应该用 C 和硬件沟通。
 请考虑去写捆绑通用位元 (wrap common bits) 的内联汇编的简单辅助函数，别去重复
 地写下只有细微差异内联汇编。记住内联汇编可以使用 C 参数。
 大型，有一定复杂度的汇编函数应该放在 .S 文件内，用相应的 C 原型定义在 C 头文
 件中。汇编函数的 C 原型应该使用 ``asmlinkage`` 。
 你可能需要把汇编语句标记为 volatile，用来阻止 GCC 在没发现任何副作用后就把它
 移除了。你不必总是这样做，尽管，这不必要的举动会限制优化。
 在写一个包含多条指令的单个内联汇编语句时，把每条指令用引号分割而且各占一行，
 除了最后一条指令外，在每个指令结尾加上 \n\t，让汇编输出时可以正确地缩进下一条
 指令：
 .. code-block:: c
 	asm ("magic %reg1, #42\n\t"
 	     "more_magic %reg2, %reg3"
 	     : /* outputs */ : /* inputs */ : /* clobbers */);
 20) 条件编译
 ------------------------------
 只要可能，就不要在 .c 文件里面使用预处理条件 (#if, #ifdef)；这样做让代码更难
 阅读并且更难去跟踪逻辑。替代方案是，在头文件中用预处理条件提供给那些 .c 文件
 使用，再给 #else 提供一个空桩 (no-op stub) 版本，然后在 .c 文件内无条件地调用
 那些 (定义在头文件内的) 函数。这样做，编译器会避免为桩函数 (stub) 的调用生成
 任何代码，产生的结果是相同的，但逻辑将更加清晰。
 最好倾向于编译整个函数，而不是函数的一部分或表达式的一部分。与其放一个 ifdef
 在表达式内，不如分解出部分或全部表达式，放进一个单独的辅助函数，并应用预处理
 条件到这个辅助函数内。
 如果你有一个在特定配置中，可能变成未使用的函数或变量，编译器会警告它定义了但
 未使用，把它标记为 __maybe_unused 而不是将它包含在一个预处理条件中。(然而，如
 果一个函数或变量总是未使用，就直接删除它。)
 在代码中，尽可能地使用 IS_ENABLED 宏来转化某个 Kconfig 标记为 C 的布尔
 表达式，并在一般的 C 条件中使用它：
 .. code-block:: c
 	if (IS_ENABLED(CONFIG_SOMETHING)) {
 		...
 	}
 编译器会做常量折叠，然后就像使用 #ifdef 那样去包含或排除代码块，所以这不会带
 来任何运行时开销。然而，这种方法依旧允许 C 编译器查看块内的代码，并检查它的正
 确性 (语法，类型，符号引用，等等)。因此，如果条件不满足，代码块内的引用符号就
 不存在时，你还是必须去用 #ifdef。
 在任何有意义的 #if 或 #ifdef 块的末尾 (超过几行的)，在 #endif 同一行的后面写下
 注解，注释这个条件表达式。例如：
 .. code-block:: c
 	#ifdef CONFIG_SOMETHING
 	...
 	#endif /* CONFIG_SOMETHING */
 附录 I) 参考
 -------------------
 The C Programming Language, 第二版
 作者：Brian W. Kernighan 和 Denni M. Ritchie.
 Prentice Hall, Inc., 1988.
 ISBN 0-13-110362-8 (软皮), 0-13-110370-9 (硬皮).
 The Practice of Programming
 作者：Brian W. Kernighan 和 Rob Pike.
 Addison-Wesley, Inc., 1999.
 ISBN 0-201-61586-X.
 GNU 手册 - 遵循 K&R 标准和此文本 - cpp, gcc, gcc internals and indent,
 都可以从 http://www.gnu.org/manual/ 找到
 WG14 是 C 语言的国际标准化工作组，URL: http://www.open-std.org/JTC1/SC22/WG14/
 Kernel process/coding-style.rst，作者 greg@kroah.com 发表于 OLS 2002：
 http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@ -0,0 +1,12 @@
 .. raw:: latex
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 Chinese translations
 ====================
 .. toctree::
   :maxdepth: 1
   coding-style
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@ -543,7 +543,7 @@ relevant attribute files are usb2_hardware_lpm and usb3_hardware_lpm.
 		When a USB 3.0 lpm-capable device is plugged in to a
 		xHCI host which supports link PM, it will check if U1
 		and U2 exit latencies have been set in the BOS
-		descriptor; if the check is is passed and the host
+		descriptor; if the check is passed and the host
 		supports USB3 hardware LPM, USB3 hardware LPM will be
 		enabled for the device and these files will be created.
 		The files hold a string value (enable or disable)
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@ -296,7 +296,7 @@ thp_split_page is incremented every time a huge page is split into base
 	reason is that a huge page is old and is being reclaimed.
 	This action implies splitting all PMD the page mapped with.
-thp_split_page_failed is is incremented if kernel fails to split huge
+thp_split_page_failed is incremented if kernel fails to split huge
 	page. This can happen if the page was pinned by somebody.
 thp_deferred_split_page is incremented when a huge page is put onto split
--- a/2
+++ b/2
@ -1446,7 +1446,7 @@ $(help-board-dirs): help-%:
 # Documentation targets
 # ---------------------------------------------------------------------------
-DOC_TARGETS := xmldocs sgmldocs psdocs latexdocs pdfdocs htmldocs mandocs installmandocs epubdocs cleandocs
+DOC_TARGETS := xmldocs sgmldocs psdocs latexdocs pdfdocs htmldocs mandocs installmandocs epubdocs cleandocs linkcheckdocs
 PHONY += $(DOC_TARGETS)
 $(DOC_TARGETS): scripts_basic FORCE
 	$(Q)$(MAKE) $(build)=scripts build_docproc build_check-lc_ctype
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@ -64,24 +64,7 @@ typedef struct pm_message {
 } pm_message_t;
 /**
- * struct dev_pm_ops - device PM callbacks
+ * struct dev_pm_ops - device PM callbacks.
 *
 * Several device power state transitions are externally visible, affecting
 * the state of pending I/O queues and (for drivers that touch hardware)
 * interrupts, wakeups, DMA, and other hardware state.  There may also be
 * internal transitions to various low-power modes which are transparent
 * to the rest of the driver stack (such as a driver that's ON gating off
 * clocks which are not in active use).
 *
 * The externally visible transitions are handled with the help of callbacks
 * included in this structure in such a way that two levels of callbacks are
 * involved.  First, the PM core executes callbacks provided by PM domains,
 * device types, classes and bus types.  They are the subsystem-level callbacks
 * supposed to execute callbacks provided by device drivers, although they may
 * choose not to do that.  If the driver callbacks are executed, they have to
 * collaborate with the subsystem-level callbacks to achieve the goals
 * appropriate for the given system transition, given transition phase and the
 * subsystem the device belongs to.
 *
 * @prepare: The principal role of this callback is to prevent new children of
 *	the device from being registered after it has returned (the driver's
@ -240,34 +223,6 @@ typedef struct pm_message {
 *	driver's interrupt handler, which is guaranteed not to run while
 *	@restore_noirq() is being executed.  Analogous to @resume_noirq().
 *
 * All of the above callbacks, except for @complete(), return error codes.
 * However, the error codes returned by the resume operations, @resume(),
 * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do
 * not cause the PM core to abort the resume transition during which they are
 * returned.  The error codes returned in those cases are only printed by the PM
 * core to the system logs for debugging purposes.  Still, it is recommended
 * that drivers only return error codes from their resume methods in case of an
 * unrecoverable failure (i.e. when the device being handled refuses to resume
 * and becomes unusable) to allow us to modify the PM core in the future, so
 * that it can avoid attempting to handle devices that failed to resume and
 * their children.
 *
 * It is allowed to unregister devices while the above callbacks are being
 * executed.  However, a callback routine must NOT try to unregister the device
 * it was called for, although it may unregister children of that device (for
 * example, if it detects that a child was unplugged while the system was
 * asleep).
 *
 * Refer to Documentation/power/admin-guide/devices.rst for more information about the role
 * of the above callbacks in the system suspend process.
 *
 * There also are callbacks related to runtime power management of devices.
 * Again, these callbacks are executed by the PM core only for subsystems
 * (PM domains, device types, classes and bus types) and the subsystem-level
 * callbacks are supposed to invoke the driver callbacks.  Moreover, the exact
 * actions to be performed by a device driver's callbacks generally depend on
 * the platform and subsystem the device belongs to.
 *
 * @runtime_suspend: Prepare the device for a condition in which it won't be
 *	able to communicate with the CPU(s) and RAM due to power management.
 *	This need not mean that the device should be put into a low-power state.
@ -287,11 +242,51 @@ typedef struct pm_message {
 *	Check these conditions, and return 0 if it's appropriate to let the PM
 *	core queue a suspend request for the device.
 *
- * Refer to Documentation/power/runtime_pm.txt for more information about the
+ * Several device power state transitions are externally visible, affecting
- * role of the above callbacks in device runtime power management.
+ * the state of pending I/O queues and (for drivers that touch hardware)
 * interrupts, wakeups, DMA, and other hardware state.  There may also be
 * internal transitions to various low-power modes which are transparent
 * to the rest of the driver stack (such as a driver that's ON gating off
 * clocks which are not in active use).
 *
 * The externally visible transitions are handled with the help of callbacks
 * included in this structure in such a way that, typically, two levels of
 * callbacks are involved.  First, the PM core executes callbacks provided by PM
 * domains, device types, classes and bus types.  They are the subsystem-level
 * callbacks expected to execute callbacks provided by device drivers, although
 * they may choose not to do that.  If the driver callbacks are executed, they
 * have to collaborate with the subsystem-level callbacks to achieve the goals
 * appropriate for the given system transition, given transition phase and the
 * subsystem the device belongs to.
 *
 * All of the above callbacks, except for @complete(), return error codes.
 * However, the error codes returned by @resume(), @thaw(), @restore(),
 * @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do not cause the PM
 * core to abort the resume transition during which they are returned.  The
 * error codes returned in those cases are only printed to the system logs for
 * debugging purposes.  Still, it is recommended that drivers only return error
 * codes from their resume methods in case of an unrecoverable failure (i.e.
 * when the device being handled refuses to resume and becomes unusable) to
 * allow the PM core to be modified in the future, so that it can avoid
 * attempting to handle devices that failed to resume and their children.
 *
 * It is allowed to unregister devices while the above callbacks are being
 * executed.  However, a callback routine MUST NOT try to unregister the device
 * it was called for, although it may unregister children of that device (for
 * example, if it detects that a child was unplugged while the system was
 * asleep).
 *
 * There also are callbacks related to runtime power management of devices.
 * Again, as a rule these callbacks are executed by the PM core for subsystems
 * (PM domains, device types, classes and bus types) and the subsystem-level
 * callbacks are expected to invoke the driver callbacks.  Moreover, the exact
 * actions to be performed by a device driver's callbacks generally depend on
 * the platform and subsystem the device belongs to.
 *
 * Refer to Documentation/power/runtime_pm.txt for more information about the
 * role of the @runtime_suspend(), @runtime_resume() and @runtime_idle()
 * callbacks in device runtime power management.
 */
 struct dev_pm_ops {
 	int (*prepare)(struct device *dev);
 	void (*complete)(struct device *dev);
@ -391,7 +386,7 @@ const struct dev_pm_ops name = { \
 	SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 }
-/**
+/*
 * PM_EVENT_ messages
 *
 * The following PM_EVENT_ messages are defined for the internal use of the PM
@ -487,7 +482,7 @@ const struct dev_pm_ops name = { \
 #define PMSG_IS_AUTO(msg)	(((msg).event & PM_EVENT_AUTO) != 0)
-/**
+/*
 * Device run-time power management status.
 *
 * These status labels are used internally by the PM core to indicate the
@ -517,7 +512,7 @@ enum rpm_status {
 	RPM_SUSPENDING,
 };
-/**
+/*
 * Device run-time power management request types.
 *
 * RPM_REQ_NONE		Do nothing.
@ -616,15 +611,18 @@ extern void update_pm_runtime_accounting(struct device *dev);
 extern int dev_pm_get_subsys_data(struct device *dev);
 extern void dev_pm_put_subsys_data(struct device *dev);
-/*
+/**
- * Power domains provide callbacks that are executed during system suspend,
+ * struct dev_pm_domain - power management domain representation.
 * hibernation, system resume and during runtime PM transitions along with
 * subsystem-level and driver-level callbacks.
 *
 * @ops: Power management operations associated with this domain.
 * @detach: Called when removing a device from the domain.
 * @activate: Called before executing probe routines for bus types and drivers.
 * @sync: Called after successful driver probe.
 * @dismiss: Called after unsuccessful driver probe and after driver removal.
 *
 * Power domains provide callbacks that are executed during system suspend,
 * hibernation, system resume and during runtime PM transitions instead of
 * subsystem-level and driver-level callbacks.
 */
 struct dev_pm_domain {
 	struct dev_pm_ops	ops;
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@ -199,12 +199,12 @@ EOF
 # 'funcname()' - function
 # '$ENVVAR' - environmental variable
 # '&struct_name' - name of a structure (up to two words including 'struct')
 # '&struct_name.member' - name of a structure member
 # '@parameter' - name of a parameter
 # '%CONST' - name of a constant.
 ## init lots of data
 my $errors = 0;
 my $warnings = 0;
 my $anon_struct_union = 0;
@ -214,14 +214,19 @@ my $type_constant = '\%([-_\w]+)';
 my $type_func = '(\w+)\(\)';
 my $type_param = '\@(\w+(\.\.\.)?)';
 my $type_fp_param = '\@(\w+)\(\)';  # Special RST handling for func ptr params
 my $type_struct = '\&((struct\s*)*[_\w]+)';
 my $type_struct_xml = '\\&amp;((struct\s*)*[_\w]+)';
 my $type_env = '(\$\w+)';
-my $type_enum_full = '\&(enum)\s*([_\w]+)';
+my $type_enum = '\&(enum\s*([_\w]+))';
-my $type_struct_full = '\&(struct)\s*([_\w]+)';
+my $type_struct = '\&(struct\s*([_\w]+))';
-my $type_typedef_full = '\&(typedef)\s*([_\w]+)';
+my $type_typedef = '\&(typedef\s*([_\w]+))';
-my $type_union_full = '\&(union)\s*([_\w]+)';
+my $type_union = '\&(union\s*([_\w]+))';
-my $type_member = '\&([_\w]+)((\.|->)[_\w]+)';
+my $type_member = '\&([_\w]+)(\.|->)([_\w]+)';
 my $type_fallback = '\&([_\w]+)';
 my $type_enum_xml = '\&amp;(enum\s*([_\w]+))';
 my $type_struct_xml = '\&amp;(struct\s*([_\w]+))';
 my $type_typedef_xml = '\&amp;(typedef\s*([_\w]+))';
 my $type_union_xml = '\&amp;(union\s*([_\w]+))';
 my $type_member_xml = '\&amp;([_\w]+)(\.|-\&gt;)([_\w]+)';
 my $type_fallback_xml = '\&amp([_\w]+)';
 my $type_member_func = $type_member . '\(\)';
 # Output conversion substitutions.
@ -231,9 +236,14 @@ my $type_member_func = $type_member . '\(\)';
 my @highlights_html = (
                       [$type_constant, "<i>\$1</i>"],
                       [$type_func, "<b>\$1</b>"],
                       [$type_enum_xml, "<i>\$1</i>"],
                       [$type_struct_xml, "<i>\$1</i>"],
                       [$type_typedef_xml, "<i>\$1</i>"],
                       [$type_union_xml, "<i>\$1</i>"],
                       [$type_env, "<b><i>\$1</i></b>"],
-                       [$type_param, "<tt><b>\$1</b></tt>"]
+                       [$type_param, "<tt><b>\$1</b></tt>"],
                       [$type_member_xml, "<tt><i>\$1</i>\$2\$3</tt>"],
                       [$type_fallback_xml, "<i>\$1</i>"]
                      );
 my $local_lt = "\\\\\\\\lt:";
 my $local_gt = "\\\\\\\\gt:";
@ -243,9 +253,14 @@ my $blankline_html = $local_lt . "p" . $local_gt;	# was "<p>"
 my @highlights_html5 = (
                        [$type_constant, "<span class=\"const\">\$1</span>"],
                        [$type_func, "<span class=\"func\">\$1</span>"],
                        [$type_enum_xml, "<span class=\"enum\">\$1</span>"],
                        [$type_struct_xml, "<span class=\"struct\">\$1</span>"],
                        [$type_typedef_xml, "<span class=\"typedef\">\$1</span>"],
                        [$type_union_xml, "<span class=\"union\">\$1</span>"],
                        [$type_env, "<span class=\"env\">\$1</span>"],
-                        [$type_param, "<span class=\"param\">\$1</span>]"]
+                        [$type_param, "<span class=\"param\">\$1</span>]"],
                        [$type_member_xml, "<span class=\"literal\"><span class=\"struct\">\$1</span>\$2<span class=\"member\">\$3</span></span>"],
                        [$type_fallback_xml, "<span class=\"struct\">\$1</span>"]
 		       );
 my $blankline_html5 = $local_lt . "br /" . $local_gt;
@ -253,10 +268,15 @@ my $blankline_html5 = $local_lt . "br /" . $local_gt;
 my @highlights_xml = (
                      ["([^=])\\\"([^\\\"<]+)\\\"", "\$1<quote>\$2</quote>"],
                      [$type_constant, "<constant>\$1</constant>"],
                      [$type_enum_xml, "<type>\$1</type>"],
                      [$type_struct_xml, "<structname>\$1</structname>"],
                      [$type_typedef_xml, "<type>\$1</type>"],
                      [$type_union_xml, "<structname>\$1</structname>"],
                      [$type_param, "<parameter>\$1</parameter>"],
                      [$type_func, "<function>\$1</function>"],
-                      [$type_env, "<envar>\$1</envar>"]
+                      [$type_env, "<envar>\$1</envar>"],
                      [$type_member_xml, "<literal><structname>\$1</structname>\$2<structfield>\$3</structfield></literal>"],
                      [$type_fallback_xml, "<structname>\$1</structname>"]
 		     );
 my $blankline_xml = $local_lt . "/para" . $local_gt . $local_lt . "para" . $local_gt . "\n";
@ -264,9 +284,14 @@ my $blankline_xml = $local_lt . "/para" . $local_gt . $local_lt . "para" . $loca
 my @highlights_gnome = (
                        [$type_constant, "<replaceable class=\"option\">\$1</replaceable>"],
                        [$type_func, "<function>\$1</function>"],
                        [$type_enum, "<type>\$1</type>"],
                        [$type_struct, "<structname>\$1</structname>"],
                        [$type_typedef, "<type>\$1</type>"],
                        [$type_union, "<structname>\$1</structname>"],
                        [$type_env, "<envar>\$1</envar>"],
-                        [$type_param, "<parameter>\$1</parameter>" ]
+                        [$type_param, "<parameter>\$1</parameter>" ],
                        [$type_member, "<literal><structname>\$1</structname>\$2<structfield>\$3</structfield></literal>"],
                        [$type_fallback, "<structname>\$1</structname>"]
 		       );
 my $blankline_gnome = "</para><para>\n";
@ -274,8 +299,13 @@ my $blankline_gnome = "</para><para>\n";
 my @highlights_man = (
                      [$type_constant, "\$1"],
                      [$type_func, "\\\\fB\$1\\\\fP"],
                      [$type_enum, "\\\\fI\$1\\\\fP"],
                      [$type_struct, "\\\\fI\$1\\\\fP"],
-                      [$type_param, "\\\\fI\$1\\\\fP"]
+                      [$type_typedef, "\\\\fI\$1\\\\fP"],
                      [$type_union, "\\\\fI\$1\\\\fP"],
                      [$type_param, "\\\\fI\$1\\\\fP"],
                      [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
                      [$type_fallback, "\\\\fI\$1\\\\fP"]
 		     );
 my $blankline_man = "";
@ -283,8 +313,13 @@ my $blankline_man = "";
 my @highlights_text = (
                       [$type_constant, "\$1"],
                       [$type_func, "\$1"],
                       [$type_enum, "\$1"],
                       [$type_struct, "\$1"],
-                       [$type_param, "\$1"]
+                       [$type_typedef, "\$1"],
                       [$type_union, "\$1"],
                       [$type_param, "\$1"],
                       [$type_member, "\$1\$2\$3"],
                       [$type_fallback, "\$1"]
 		      );
 my $blankline_text = "";
@ -292,16 +327,16 @@ my $blankline_text = "";
 my @highlights_rst = (
                       [$type_constant, "``\$1``"],
                       # Note: need to escape () to avoid func matching later
-                       [$type_member_func, "\\:c\\:type\\:`\$1\$2\\\\(\\\\) <\$1>`"],
+                       [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
-                       [$type_member, "\\:c\\:type\\:`\$1\$2 <\$1>`"],
+                       [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
 		       [$type_fp_param, "**\$1\\\\(\\\\)**"],
                       [$type_func, "\\:c\\:func\\:`\$1()`"],
-                       [$type_struct_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
+                       [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
-                       [$type_enum_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
+                       [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
-                       [$type_typedef_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
+                       [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"],
-                       [$type_union_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
+                       [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
                       # in rst this can refer to any type
-                       [$type_struct, "\\:c\\:type\\:`\$1`"],
+                       [$type_fallback, "\\:c\\:type\\:`\$1`"],
                       [$type_param, "**\$1**"]
 		      );
 my $blankline_rst = "\n";
@ -310,8 +345,13 @@ my $blankline_rst = "\n";
 my @highlights_list = (
                       [$type_constant, "\$1"],
                       [$type_func, "\$1"],
                       [$type_enum, "\$1"],
                       [$type_struct, "\$1"],
-                       [$type_param, "\$1"]
+                       [$type_typedef, "\$1"],
                       [$type_union, "\$1"],
                       [$type_param, "\$1"],
                       [$type_member, "\$1"],
                       [$type_fallback, "\$1"]
 		      );
 my $blankline_list = "";
@ -1131,8 +1171,9 @@ sub output_function_xml(%) {
 	foreach $parameter (@{$args{'parameterlist'}}) {
 	    my $parameter_name = $parameter;
 	    $parameter_name =~ s/\[.*//;
 	    $type = $args{'parametertypes'}{$parameter};
-	    print "  <varlistentry>\n   <term><parameter>$parameter</parameter></term>\n";
+	    print "  <varlistentry>\n   <term><parameter>$type $parameter</parameter></term>\n";
 	    print "   <listitem>\n    <para>\n";
 	    $lineprefix="     ";
 	    output_highlight($args{'parameterdescs'}{$parameter_name});
@ -1223,8 +1264,9 @@ sub output_struct_xml(%) {
      defined($args{'parameterdescs'}{$parameter_name}) || next;
      ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
      $type = $args{'parametertypes'}{$parameter};
      print "    <varlistentry>";
-      print "      <term>$parameter</term>\n";
+      print "      <term><literal>$type $parameter</literal></term>\n";
      print "      <listitem><para>\n";
      output_highlight($args{'parameterdescs'}{$parameter_name});
      print "      </para></listitem>\n";
@ -1883,7 +1925,7 @@ sub output_function_rst(%) {
    $lineprefix = "  ";
    foreach $parameter (@{$args{'parameterlist'}}) {
 	my $parameter_name = $parameter;
-	#$parameter_name =~ s/\[.*//;
+	$parameter_name =~ s/\[.*//;
 	$type = $args{'parametertypes'}{$parameter};
 	if ($type ne "") {
@ -2409,6 +2451,7 @@ sub push_parameter($$$) {
 	# "[blah" in a parameter string;
 	###$param =~ s/\s*//g;
 	push @parameterlist, $param;
 	$type =~ s/\s\s+/ /g;
 	$parametertypes{$param} = $type;
 }
@ -2505,7 +2548,13 @@ sub dump_function($$) {
    $prototype =~ s/__must_check +//;
    $prototype =~ s/__weak +//;
    my $define = $prototype =~ s/^#\s*define\s+//; #ak added
-    $prototype =~ s/__attribute__\s*\(\([a-z,]*\)\)//;
+    $prototype =~ s/__attribute__\s*\(\(
            (?:
                 [\w\s]++          # attribute name
                 (?:\([^)]*+\))?   # attribute arguments
                 \s*+,?            # optional comma at the end
            )+
          \)\)\s+//x;
    # Yes, this truly is vile.  We are looking for:
    # 1. Return type (may be nothing if we're looking at a macro)
@ -2533,21 +2582,21 @@ sub dump_function($$) {
        $noret = 1;
    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
 	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
 	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
 	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
 	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
 	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
 	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
 	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
 	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
 	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*\s*\w+\s*\*\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
+	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
 	$return_type = $1;
 	$declaration_name = $2;
 	my $args = $3;