mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 04:18:39 +08:00
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "A few misc subsystems and some of MM. 175 patches. Subsystems affected by this patch series: ia64, kbuild, scripts, sh, ocfs2, kfifo, vfs, kernel/watchdog, and mm (slab-generic, slub, kmemleak, debug, pagecache, msync, gup, memremap, memcg, pagemap, mremap, dma, sparsemem, vmalloc, documentation, kasan, initialization, pagealloc, and memory-failure)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (175 commits) mm/memory-failure: unnecessary amount of unmapping mm/mmzone.h: fix existing kernel-doc comments and link them to core-api mm: page_alloc: ignore init_on_free=1 for debug_pagealloc=1 net: page_pool: use alloc_pages_bulk in refill code path net: page_pool: refactor dma_map into own function page_pool_dma_map SUNRPC: refresh rq_pages using a bulk page allocator SUNRPC: set rq_page_end differently mm/page_alloc: inline __rmqueue_pcplist mm/page_alloc: optimize code layout for __alloc_pages_bulk mm/page_alloc: add an array-based interface to the bulk page allocator mm/page_alloc: add a bulk page allocator mm/page_alloc: rename alloced to allocated mm/page_alloc: duplicate include linux/vmalloc.h mm, page_alloc: avoid page_to_pfn() in move_freepages() mm/Kconfig: remove default DISCONTIGMEM_MANUAL mm: page_alloc: dump migrate-failed pages mm/mempolicy: fix mpol_misplaced kernel-doc mm/mempolicy: rewrite alloc_pages_vma documentation mm/mempolicy: rewrite alloc_pages documentation mm/mempolicy: rename alloc_pages_current to alloc_pages ...
This commit is contained in:
commit
d42f323a7d
@ -4996,6 +4996,10 @@
|
||||
|
||||
slram= [HW,MTD]
|
||||
|
||||
slab_merge [MM]
|
||||
Enable merging of slabs with similar size when the
|
||||
kernel is built without CONFIG_SLAB_MERGE_DEFAULT.
|
||||
|
||||
slab_nomerge [MM]
|
||||
Disable merging of slabs with similar size. May be
|
||||
necessary if there is some reason to distinguish
|
||||
@ -5043,6 +5047,9 @@
|
||||
lower than slub_max_order.
|
||||
For more information see Documentation/vm/slub.rst.
|
||||
|
||||
slub_merge [MM, SLUB]
|
||||
Same with slab_merge.
|
||||
|
||||
slub_nomerge [MM, SLUB]
|
||||
Same with slab_nomerge. This is supported for legacy.
|
||||
See slab_nomerge for more information.
|
||||
|
@ -402,7 +402,7 @@ compact_fail
|
||||
but failed.
|
||||
|
||||
It is possible to establish how long the stalls were using the function
|
||||
tracer to record how long was spent in __alloc_pages_nodemask and
|
||||
tracer to record how long was spent in __alloc_pages() and
|
||||
using the mm_page_alloc tracepoint to identify which allocations were
|
||||
for huge pages.
|
||||
|
||||
|
@ -213,9 +213,9 @@ Here are the routines, one by one:
|
||||
there will be no entries in the cache for the kernel address
|
||||
space for virtual addresses in the range 'start' to 'end-1'.
|
||||
|
||||
The first of these two routines is invoked after map_kernel_range()
|
||||
The first of these two routines is invoked after vmap_range()
|
||||
has installed the page table entries. The second is invoked
|
||||
before unmap_kernel_range() deletes the page table entries.
|
||||
before vunmap_range() deletes the page table entries.
|
||||
|
||||
There exists another whole class of cpu cache issues which currently
|
||||
require a whole different set of interfaces to handle properly.
|
||||
|
@ -92,3 +92,9 @@ More Memory Management Functions
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: mm/page_alloc.c
|
||||
.. kernel-doc:: mm/mempolicy.c
|
||||
.. kernel-doc:: include/linux/mm_types.h
|
||||
:internal:
|
||||
.. kernel-doc:: include/linux/mm.h
|
||||
:internal:
|
||||
.. kernel-doc:: include/linux/mmzone.h
|
||||
|
@ -11,46 +11,56 @@ designed to find out-of-bound and use-after-free bugs. KASAN has three modes:
|
||||
2. software tag-based KASAN (similar to userspace HWASan),
|
||||
3. hardware tag-based KASAN (based on hardware memory tagging).
|
||||
|
||||
Software KASAN modes (1 and 2) use compile-time instrumentation to insert
|
||||
validity checks before every memory access, and therefore require a compiler
|
||||
Generic KASAN is mainly used for debugging due to a large memory overhead.
|
||||
Software tag-based KASAN can be used for dogfood testing as it has a lower
|
||||
memory overhead that allows using it with real workloads. Hardware tag-based
|
||||
KASAN comes with low memory and performance overheads and, therefore, can be
|
||||
used in production. Either as an in-field memory bug detector or as a security
|
||||
mitigation.
|
||||
|
||||
Software KASAN modes (#1 and #2) use compile-time instrumentation to insert
|
||||
validity checks before every memory access and, therefore, require a compiler
|
||||
version that supports that.
|
||||
|
||||
Generic KASAN is supported in both GCC and Clang. With GCC it requires version
|
||||
Generic KASAN is supported in GCC and Clang. With GCC, it requires version
|
||||
8.3.0 or later. Any supported Clang version is compatible, but detection of
|
||||
out-of-bounds accesses for global variables is only supported since Clang 11.
|
||||
|
||||
Tag-based KASAN is only supported in Clang.
|
||||
Software tag-based KASAN mode is only supported in Clang.
|
||||
|
||||
Currently generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390
|
||||
The hardware KASAN mode (#3) relies on hardware to perform the checks but
|
||||
still requires a compiler version that supports memory tagging instructions.
|
||||
This mode is supported in GCC 10+ and Clang 11+.
|
||||
|
||||
Both software KASAN modes work with SLUB and SLAB memory allocators,
|
||||
while the hardware tag-based KASAN currently only supports SLUB.
|
||||
|
||||
Currently, generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390,
|
||||
and riscv architectures, and tag-based KASAN modes are supported only for arm64.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
To enable KASAN configure kernel with::
|
||||
To enable KASAN, configure the kernel with::
|
||||
|
||||
CONFIG_KASAN = y
|
||||
CONFIG_KASAN=y
|
||||
|
||||
and choose between CONFIG_KASAN_GENERIC (to enable generic KASAN),
|
||||
CONFIG_KASAN_SW_TAGS (to enable software tag-based KASAN), and
|
||||
CONFIG_KASAN_HW_TAGS (to enable hardware tag-based KASAN).
|
||||
and choose between ``CONFIG_KASAN_GENERIC`` (to enable generic KASAN),
|
||||
``CONFIG_KASAN_SW_TAGS`` (to enable software tag-based KASAN), and
|
||||
``CONFIG_KASAN_HW_TAGS`` (to enable hardware tag-based KASAN).
|
||||
|
||||
For software modes, you also need to choose between CONFIG_KASAN_OUTLINE and
|
||||
CONFIG_KASAN_INLINE. Outline and inline are compiler instrumentation types.
|
||||
The former produces smaller binary while the latter is 1.1 - 2 times faster.
|
||||
For software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and
|
||||
``CONFIG_KASAN_INLINE``. Outline and inline are compiler instrumentation types.
|
||||
The former produces a smaller binary while the latter is 1.1-2 times faster.
|
||||
|
||||
Both software KASAN modes work with both SLUB and SLAB memory allocators,
|
||||
while the hardware tag-based KASAN currently only support SLUB.
|
||||
|
||||
For better error reports that include stack traces, enable CONFIG_STACKTRACE.
|
||||
|
||||
To augment reports with last allocation and freeing stack of the physical page,
|
||||
it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on.
|
||||
To include alloc and free stack traces of affected slab objects into reports,
|
||||
enable ``CONFIG_STACKTRACE``. To include alloc and free stack traces of affected
|
||||
physical pages, enable ``CONFIG_PAGE_OWNER`` and boot with ``page_owner=on``.
|
||||
|
||||
Error reports
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
A typical out-of-bounds access generic KASAN report looks like this::
|
||||
A typical KASAN report looks like this::
|
||||
|
||||
==================================================================
|
||||
BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [test_kasan]
|
||||
@ -123,41 +133,57 @@ A typical out-of-bounds access generic KASAN report looks like this::
|
||||
ffff8801f44ec400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
|
||||
==================================================================
|
||||
|
||||
The header of the report provides a short summary of what kind of bug happened
|
||||
and what kind of access caused it. It's followed by a stack trace of the bad
|
||||
access, a stack trace of where the accessed memory was allocated (in case bad
|
||||
access happens on a slab object), and a stack trace of where the object was
|
||||
freed (in case of a use-after-free bug report). Next comes a description of
|
||||
the accessed slab object and information about the accessed memory page.
|
||||
The report header summarizes what kind of bug happened and what kind of access
|
||||
caused it. It is followed by a stack trace of the bad access, a stack trace of
|
||||
where the accessed memory was allocated (in case a slab object was accessed),
|
||||
and a stack trace of where the object was freed (in case of a use-after-free
|
||||
bug report). Next comes a description of the accessed slab object and the
|
||||
information about the accessed memory page.
|
||||
|
||||
In the last section the report shows memory state around the accessed address.
|
||||
Internally KASAN tracks memory state separately for each memory granule, which
|
||||
In the end, the report shows the memory state around the accessed address.
|
||||
Internally, KASAN tracks memory state separately for each memory granule, which
|
||||
is either 8 or 16 aligned bytes depending on KASAN mode. Each number in the
|
||||
memory state section of the report shows the state of one of the memory
|
||||
granules that surround the accessed address.
|
||||
|
||||
For generic KASAN the size of each memory granule is 8. The state of each
|
||||
For generic KASAN, the size of each memory granule is 8. The state of each
|
||||
granule is encoded in one shadow byte. Those 8 bytes can be accessible,
|
||||
partially accessible, freed or be a part of a redzone. KASAN uses the following
|
||||
encoding for each shadow byte: 0 means that all 8 bytes of the corresponding
|
||||
partially accessible, freed, or be a part of a redzone. KASAN uses the following
|
||||
encoding for each shadow byte: 00 means that all 8 bytes of the corresponding
|
||||
memory region are accessible; number N (1 <= N <= 7) means that the first N
|
||||
bytes are accessible, and other (8 - N) bytes are not; any negative value
|
||||
indicates that the entire 8-byte word is inaccessible. KASAN uses different
|
||||
negative values to distinguish between different kinds of inaccessible memory
|
||||
like redzones or freed memory (see mm/kasan/kasan.h).
|
||||
|
||||
In the report above the arrows point to the shadow byte 03, which means that
|
||||
the accessed address is partially accessible. For tag-based KASAN modes this
|
||||
last report section shows the memory tags around the accessed address
|
||||
(see the `Implementation details`_ section).
|
||||
In the report above, the arrow points to the shadow byte ``03``, which means
|
||||
that the accessed address is partially accessible.
|
||||
|
||||
For tag-based KASAN modes, this last report section shows the memory tags around
|
||||
the accessed address (see the `Implementation details`_ section).
|
||||
|
||||
Note that KASAN bug titles (like ``slab-out-of-bounds`` or ``use-after-free``)
|
||||
are best-effort: KASAN prints the most probable bug type based on the limited
|
||||
information it has. The actual type of the bug might be different.
|
||||
|
||||
Generic KASAN also reports up to two auxiliary call stack traces. These stack
|
||||
traces point to places in code that interacted with the object but that are not
|
||||
directly present in the bad access stack trace. Currently, this includes
|
||||
call_rcu() and workqueue queuing.
|
||||
|
||||
Boot parameters
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
KASAN is affected by the generic ``panic_on_warn`` command line parameter.
|
||||
When it is enabled, KASAN panics the kernel after printing a bug report.
|
||||
|
||||
By default, KASAN prints a bug report only for the first invalid memory access.
|
||||
With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
|
||||
effectively disables ``panic_on_warn`` for KASAN reports.
|
||||
|
||||
Hardware tag-based KASAN mode (see the section about various modes below) is
|
||||
intended for use in production as a security mitigation. Therefore, it supports
|
||||
boot parameters that allow to disable KASAN competely or otherwise control
|
||||
particular KASAN features.
|
||||
boot parameters that allow disabling KASAN or controlling its features.
|
||||
|
||||
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
|
||||
|
||||
@ -174,26 +200,8 @@ particular KASAN features.
|
||||
traces collection (default: ``on``).
|
||||
|
||||
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
|
||||
report or also panic the kernel (default: ``report``). Note, that tag
|
||||
checking gets disabled after the first reported bug.
|
||||
|
||||
For developers
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
Software KASAN modes use compiler instrumentation to insert validity checks.
|
||||
Such instrumentation might be incompatible with some part of the kernel, and
|
||||
therefore needs to be disabled. To disable instrumentation for specific files
|
||||
or directories, add a line similar to the following to the respective kernel
|
||||
Makefile:
|
||||
|
||||
- For a single file (e.g. main.o)::
|
||||
|
||||
KASAN_SANITIZE_main.o := n
|
||||
|
||||
- For all files in one directory::
|
||||
|
||||
KASAN_SANITIZE := n
|
||||
|
||||
report or also panic the kernel (default: ``report``). The panic happens even
|
||||
if ``kasan_multi_shot`` is enabled.
|
||||
|
||||
Implementation details
|
||||
----------------------
|
||||
@ -201,12 +209,11 @@ Implementation details
|
||||
Generic KASAN
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
From a high level perspective, KASAN's approach to memory error detection is
|
||||
similar to that of kmemcheck: use shadow memory to record whether each byte of
|
||||
memory is safe to access, and use compile-time instrumentation to insert checks
|
||||
of shadow memory on each memory access.
|
||||
Software KASAN modes use shadow memory to record whether each byte of memory is
|
||||
safe to access and use compile-time instrumentation to insert shadow memory
|
||||
checks before each memory access.
|
||||
|
||||
Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (e.g. 16TB
|
||||
Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (16TB
|
||||
to cover 128TB on x86_64) and uses direct mapping with a scale and offset to
|
||||
translate a memory address to its corresponding shadow address.
|
||||
|
||||
@ -215,113 +222,105 @@ address::
|
||||
|
||||
static inline void *kasan_mem_to_shadow(const void *addr)
|
||||
{
|
||||
return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
|
||||
return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
|
||||
+ KASAN_SHADOW_OFFSET;
|
||||
}
|
||||
|
||||
where ``KASAN_SHADOW_SCALE_SHIFT = 3``.
|
||||
|
||||
Compile-time instrumentation is used to insert memory access checks. Compiler
|
||||
inserts function calls (__asan_load*(addr), __asan_store*(addr)) before each
|
||||
memory access of size 1, 2, 4, 8 or 16. These functions check whether memory
|
||||
access is valid or not by checking corresponding shadow memory.
|
||||
inserts function calls (``__asan_load*(addr)``, ``__asan_store*(addr)``) before
|
||||
each memory access of size 1, 2, 4, 8, or 16. These functions check whether
|
||||
memory accesses are valid or not by checking corresponding shadow memory.
|
||||
|
||||
GCC 5.0 has possibility to perform inline instrumentation. Instead of making
|
||||
function calls GCC directly inserts the code to check the shadow memory.
|
||||
This option significantly enlarges kernel but it gives x1.1-x2 performance
|
||||
boost over outline instrumented kernel.
|
||||
With inline instrumentation, instead of making function calls, the compiler
|
||||
directly inserts the code to check shadow memory. This option significantly
|
||||
enlarges the kernel, but it gives an x1.1-x2 performance boost over the
|
||||
outline-instrumented kernel.
|
||||
|
||||
Generic KASAN also reports the last 2 call stacks to creation of work that
|
||||
potentially has access to an object. Call stacks for the following are shown:
|
||||
call_rcu() and workqueue queuing.
|
||||
|
||||
Generic KASAN is the only mode that delays the reuse of freed object via
|
||||
Generic KASAN is the only mode that delays the reuse of freed objects via
|
||||
quarantine (see mm/kasan/quarantine.c for implementation).
|
||||
|
||||
Software tag-based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Software tag-based KASAN requires software memory tagging support in the form
|
||||
of HWASan-like compiler instrumentation (see HWASan documentation for details).
|
||||
|
||||
Software tag-based KASAN is currently only implemented for arm64 architecture.
|
||||
Software tag-based KASAN uses a software memory tagging approach to checking
|
||||
access validity. It is currently only implemented for the arm64 architecture.
|
||||
|
||||
Software tag-based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
|
||||
to store a pointer tag in the top byte of kernel pointers. Like generic KASAN
|
||||
it uses shadow memory to store memory tags associated with each 16-byte memory
|
||||
cell (therefore it dedicates 1/16th of the kernel memory for shadow memory).
|
||||
to store a pointer tag in the top byte of kernel pointers. It uses shadow memory
|
||||
to store memory tags associated with each 16-byte memory cell (therefore, it
|
||||
dedicates 1/16th of the kernel memory for shadow memory).
|
||||
|
||||
On each memory allocation software tag-based KASAN generates a random tag, tags
|
||||
the allocated memory with this tag, and embeds this tag into the returned
|
||||
On each memory allocation, software tag-based KASAN generates a random tag, tags
|
||||
the allocated memory with this tag, and embeds the same tag into the returned
|
||||
pointer.
|
||||
|
||||
Software tag-based KASAN uses compile-time instrumentation to insert checks
|
||||
before each memory access. These checks make sure that tag of the memory that
|
||||
is being accessed is equal to tag of the pointer that is used to access this
|
||||
memory. In case of a tag mismatch software tag-based KASAN prints a bug report.
|
||||
before each memory access. These checks make sure that the tag of the memory
|
||||
that is being accessed is equal to the tag of the pointer that is used to access
|
||||
this memory. In case of a tag mismatch, software tag-based KASAN prints a bug
|
||||
report.
|
||||
|
||||
Software tag-based KASAN also has two instrumentation modes (outline, that
|
||||
emits callbacks to check memory accesses; and inline, that performs the shadow
|
||||
Software tag-based KASAN also has two instrumentation modes (outline, which
|
||||
emits callbacks to check memory accesses; and inline, which performs the shadow
|
||||
memory checks inline). With outline instrumentation mode, a bug report is
|
||||
simply printed from the function that performs the access check. With inline
|
||||
instrumentation a brk instruction is emitted by the compiler, and a dedicated
|
||||
brk handler is used to print bug reports.
|
||||
printed from the function that performs the access check. With inline
|
||||
instrumentation, a ``brk`` instruction is emitted by the compiler, and a
|
||||
dedicated ``brk`` handler is used to print bug reports.
|
||||
|
||||
Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with 0xFF pointer tag aren't checked). The value 0xFE is currently
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Software tag-based KASAN currently only supports tagging of
|
||||
kmem_cache_alloc/kmalloc and page_alloc memory.
|
||||
Software tag-based KASAN currently only supports tagging of slab and page_alloc
|
||||
memory.
|
||||
|
||||
Hardware tag-based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Hardware tag-based KASAN is similar to the software mode in concept, but uses
|
||||
Hardware tag-based KASAN is similar to the software mode in concept but uses
|
||||
hardware memory tagging support instead of compiler instrumentation and
|
||||
shadow memory.
|
||||
|
||||
Hardware tag-based KASAN is currently only implemented for arm64 architecture
|
||||
and based on both arm64 Memory Tagging Extension (MTE) introduced in ARMv8.5
|
||||
Instruction Set Architecture, and Top Byte Ignore (TBI).
|
||||
Instruction Set Architecture and Top Byte Ignore (TBI).
|
||||
|
||||
Special arm64 instructions are used to assign memory tags for each allocation.
|
||||
Same tags are assigned to pointers to those allocations. On every memory
|
||||
access, hardware makes sure that tag of the memory that is being accessed is
|
||||
equal to tag of the pointer that is used to access this memory. In case of a
|
||||
tag mismatch a fault is generated and a report is printed.
|
||||
access, hardware makes sure that the tag of the memory that is being accessed is
|
||||
equal to the tag of the pointer that is used to access this memory. In case of a
|
||||
tag mismatch, a fault is generated, and a report is printed.
|
||||
|
||||
Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with 0xFF pointer tag aren't checked). The value 0xFE is currently
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Hardware tag-based KASAN currently only supports tagging of
|
||||
kmem_cache_alloc/kmalloc and page_alloc memory.
|
||||
Hardware tag-based KASAN currently only supports tagging of slab and page_alloc
|
||||
memory.
|
||||
|
||||
If the hardware doesn't support MTE (pre ARMv8.5), hardware tag-based KASAN
|
||||
won't be enabled. In this case all boot parameters are ignored.
|
||||
If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
|
||||
will not be enabled. In this case, all KASAN boot parameters are ignored.
|
||||
|
||||
Note, that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
|
||||
enabled. Even when kasan.mode=off is provided, or when the hardware doesn't
|
||||
Note that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
|
||||
enabled. Even when ``kasan.mode=off`` is provided or when the hardware does not
|
||||
support MTE (but supports TBI).
|
||||
|
||||
Hardware tag-based KASAN only reports the first found bug. After that MTE tag
|
||||
Hardware tag-based KASAN only reports the first found bug. After that, MTE tag
|
||||
checking gets disabled.
|
||||
|
||||
What memory accesses are sanitised by KASAN?
|
||||
--------------------------------------------
|
||||
Shadow memory
|
||||
-------------
|
||||
|
||||
The kernel maps memory in a number of different parts of the address
|
||||
space. This poses something of a problem for KASAN, which requires
|
||||
that all addresses accessed by instrumented code have a valid shadow
|
||||
region.
|
||||
The kernel maps memory in several different parts of the address space.
|
||||
The range of kernel virtual addresses is large: there is not enough real
|
||||
memory to support a real shadow region for every address that could be
|
||||
accessed by the kernel. Therefore, KASAN only maps real shadow for certain
|
||||
parts of the address space.
|
||||
|
||||
The range of kernel virtual addresses is large: there is not enough
|
||||
real memory to support a real shadow region for every address that
|
||||
could be accessed by the kernel.
|
||||
|
||||
By default
|
||||
~~~~~~~~~~
|
||||
Default behaviour
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
By default, architectures only map real memory over the shadow region
|
||||
for the linear mapping (and potentially other small areas). For all
|
||||
@ -330,10 +329,9 @@ page is mapped over the shadow area. This read-only shadow page
|
||||
declares all memory accesses as permitted.
|
||||
|
||||
This presents a problem for modules: they do not live in the linear
|
||||
mapping, but in a dedicated module space. By hooking in to the module
|
||||
allocator, KASAN can temporarily map real shadow memory to cover
|
||||
them. This allows detection of invalid accesses to module globals, for
|
||||
example.
|
||||
mapping but in a dedicated module space. By hooking into the module
|
||||
allocator, KASAN temporarily maps real shadow memory to cover them.
|
||||
This allows detection of invalid accesses to module globals, for example.
|
||||
|
||||
This also creates an incompatibility with ``VMAP_STACK``: if the stack
|
||||
lives in vmalloc space, it will be shadowed by the read-only page, and
|
||||
@ -344,9 +342,10 @@ CONFIG_KASAN_VMALLOC
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
|
||||
cost of greater memory usage. Currently this is only supported on x86.
|
||||
cost of greater memory usage. Currently, this is supported on x86,
|
||||
riscv, s390, and powerpc.
|
||||
|
||||
This works by hooking into vmalloc and vmap, and dynamically
|
||||
This works by hooking into vmalloc and vmap and dynamically
|
||||
allocating real shadow memory to back the mappings.
|
||||
|
||||
Most mappings in vmalloc space are small, requiring less than a full
|
||||
@ -365,28 +364,76 @@ memory.
|
||||
|
||||
To avoid the difficulties around swapping mappings around, KASAN expects
|
||||
that the part of the shadow region that covers the vmalloc space will
|
||||
not be covered by the early shadow page, but will be left
|
||||
unmapped. This will require changes in arch-specific code.
|
||||
not be covered by the early shadow page but will be left unmapped.
|
||||
This will require changes in arch-specific code.
|
||||
|
||||
This allows ``VMAP_STACK`` support on x86, and can simplify support of
|
||||
This allows ``VMAP_STACK`` support on x86 and can simplify support of
|
||||
architectures that do not have a fixed module region.
|
||||
|
||||
CONFIG_KASAN_KUNIT_TEST and CONFIG_KASAN_MODULE_TEST
|
||||
----------------------------------------------------
|
||||
For developers
|
||||
--------------
|
||||
|
||||
KASAN tests consist of two parts:
|
||||
Ignoring accesses
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
Software KASAN modes use compiler instrumentation to insert validity checks.
|
||||
Such instrumentation might be incompatible with some parts of the kernel, and
|
||||
therefore needs to be disabled.
|
||||
|
||||
Other parts of the kernel might access metadata for allocated objects.
|
||||
Normally, KASAN detects and reports such accesses, but in some cases (e.g.,
|
||||
in memory allocators), these accesses are valid.
|
||||
|
||||
For software KASAN modes, to disable instrumentation for a specific file or
|
||||
directory, add a ``KASAN_SANITIZE`` annotation to the respective kernel
|
||||
Makefile:
|
||||
|
||||
- For a single file (e.g., main.o)::
|
||||
|
||||
KASAN_SANITIZE_main.o := n
|
||||
|
||||
- For all files in one directory::
|
||||
|
||||
KASAN_SANITIZE := n
|
||||
|
||||
For software KASAN modes, to disable instrumentation on a per-function basis,
|
||||
use the KASAN-specific ``__no_sanitize_address`` function attribute or the
|
||||
generic ``noinstr`` one.
|
||||
|
||||
Note that disabling compiler instrumentation (either on a per-file or a
|
||||
per-function basis) makes KASAN ignore the accesses that happen directly in
|
||||
that code for software KASAN modes. It does not help when the accesses happen
|
||||
indirectly (through calls to instrumented functions) or with the hardware
|
||||
tag-based mode that does not use compiler instrumentation.
|
||||
|
||||
For software KASAN modes, to disable KASAN reports in a part of the kernel code
|
||||
for the current task, annotate this part of the code with a
|
||||
``kasan_disable_current()``/``kasan_enable_current()`` section. This also
|
||||
disables the reports for indirect accesses that happen through function calls.
|
||||
|
||||
For tag-based KASAN modes (include the hardware one), to disable access
|
||||
checking, use ``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that
|
||||
temporarily disabling access checking via ``page_kasan_tag_reset()`` requires
|
||||
saving and restoring the per-page KASAN tag via
|
||||
``page_kasan_tag``/``page_kasan_tag_set``.
|
||||
|
||||
Tests
|
||||
~~~~~
|
||||
|
||||
There are KASAN tests that allow verifying that KASAN works and can detect
|
||||
certain types of memory corruptions. The tests consist of two parts:
|
||||
|
||||
1. Tests that are integrated with the KUnit Test Framework. Enabled with
|
||||
``CONFIG_KASAN_KUNIT_TEST``. These tests can be run and partially verified
|
||||
automatically in a few different ways, see the instructions below.
|
||||
automatically in a few different ways; see the instructions below.
|
||||
|
||||
2. Tests that are currently incompatible with KUnit. Enabled with
|
||||
``CONFIG_KASAN_MODULE_TEST`` and can only be run as a module. These tests can
|
||||
only be verified manually, by loading the kernel module and inspecting the
|
||||
only be verified manually by loading the kernel module and inspecting the
|
||||
kernel log for KASAN reports.
|
||||
|
||||
Each KUnit-compatible KASAN test prints a KASAN report if an error is detected.
|
||||
Then the test prints its number and status.
|
||||
Each KUnit-compatible KASAN test prints one of multiple KASAN reports if an
|
||||
error is detected. Then the test prints its number and status.
|
||||
|
||||
When a test passes::
|
||||
|
||||
@ -414,30 +461,24 @@ Or, if one of the tests failed::
|
||||
|
||||
not ok 1 - kasan
|
||||
|
||||
|
||||
There are a few ways to run KUnit-compatible KASAN tests.
|
||||
|
||||
1. Loadable module
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
With ``CONFIG_KUNIT`` enabled, ``CONFIG_KASAN_KUNIT_TEST`` can be built as
|
||||
a loadable module and run on any architecture that supports KASAN by loading
|
||||
the module with insmod or modprobe. The module is called ``test_kasan``.
|
||||
With ``CONFIG_KUNIT`` enabled, KASAN-KUnit tests can be built as a loadable
|
||||
module and run by loading ``test_kasan.ko`` with ``insmod`` or ``modprobe``.
|
||||
|
||||
2. Built-In
|
||||
~~~~~~~~~~~
|
||||
|
||||
With ``CONFIG_KUNIT`` built-in, ``CONFIG_KASAN_KUNIT_TEST`` can be built-in
|
||||
on any architecure that supports KASAN. These and any other KUnit tests enabled
|
||||
will run and print the results at boot as a late-init call.
|
||||
With ``CONFIG_KUNIT`` built-in, KASAN-KUnit tests can be built-in as well.
|
||||
In this case, the tests will run at boot as a late-init call.
|
||||
|
||||
3. Using kunit_tool
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, it's also
|
||||
possible use ``kunit_tool`` to see the results of these and other KUnit tests
|
||||
in a more readable way. This will not print the KASAN reports of the tests that
|
||||
passed. Use `KUnit documentation <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_
|
||||
for more up-to-date information on ``kunit_tool``.
|
||||
With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, it is also
|
||||
possible to use ``kunit_tool`` to see the results of KUnit tests in a more
|
||||
readable way. This will not print the KASAN reports of the tests that passed.
|
||||
See `KUnit documentation <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_
|
||||
for more up-to-date information on ``kunit_tool``.
|
||||
|
||||
.. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html
|
||||
|
@ -47,7 +47,7 @@ size change due to this facility.
|
||||
|
||||
text data bss dec hex filename
|
||||
48800 2445 644 51889 cab1 mm/page_alloc.o
|
||||
6574 108 29 6711 1a37 mm/page_owner.o
|
||||
6662 108 29 6799 1a8f mm/page_owner.o
|
||||
1025 8 8 1041 411 mm/page_ext.o
|
||||
|
||||
Although, roughly, 8 KB code is added in total, page_alloc.o increase by
|
||||
|
@ -53,11 +53,6 @@ prevent the page from being split by anyone.
|
||||
of handling GUP on hugetlbfs will also work fine on transparent
|
||||
hugepage backed mappings.
|
||||
|
||||
In case you can't handle compound pages if they're returned by
|
||||
follow_page, the FOLL_SPLIT bit can be specified as a parameter to
|
||||
follow_page, so that it will split the hugepages before returning
|
||||
them.
|
||||
|
||||
Graceful fallback
|
||||
=================
|
||||
|
||||
|
@ -11770,6 +11770,7 @@ F: include/linux/gfp.h
|
||||
F: include/linux/memory_hotplug.h
|
||||
F: include/linux/mm.h
|
||||
F: include/linux/mmzone.h
|
||||
F: include/linux/pagewalk.h
|
||||
F: include/linux/vmalloc.h
|
||||
F: mm/
|
||||
|
||||
|
11
arch/Kconfig
11
arch/Kconfig
@ -829,6 +829,17 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
|
||||
config HAVE_ARCH_HUGE_VMAP
|
||||
bool
|
||||
|
||||
#
|
||||
# Archs that select this would be capable of PMD-sized vmaps (i.e.,
|
||||
# arch_vmap_pmd_supported() returns true), and they must make no assumptions
|
||||
# that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag
|
||||
# can be used to prohibit arch-specific allocations from using hugepages to
|
||||
# help with this (e.g., modules may require it).
|
||||
#
|
||||
config HAVE_ARCH_HUGE_VMALLOC
|
||||
depends on HAVE_ARCH_HUGE_VMAP
|
||||
bool
|
||||
|
||||
config ARCH_WANT_HUGE_PMD_SHARE
|
||||
bool
|
||||
|
||||
|
@ -282,5 +282,4 @@ mem_init(void)
|
||||
set_max_mapnr(max_low_pfn);
|
||||
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
|
||||
memblock_free_all();
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
@ -194,7 +194,6 @@ void __init mem_init(void)
|
||||
{
|
||||
memblock_free_all();
|
||||
highmem_init();
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
|
@ -33,6 +33,7 @@ config ARM
|
||||
select ARCH_SUPPORTS_ATOMIC_RMW
|
||||
select ARCH_USE_BUILTIN_BSWAP
|
||||
select ARCH_USE_CMPXCHG_LOCKREF
|
||||
select ARCH_USE_MEMTEST
|
||||
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
select ARCH_WANT_LD_ORPHAN_WARN
|
||||
|
@ -186,8 +186,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
|
||||
|
||||
#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
|
||||
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
|
||||
#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
|
||||
#define pud_write(pud) pmd_write(__pmd(pud_val(pud)))
|
||||
|
||||
#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd))
|
||||
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
|
||||
|
@ -166,6 +166,9 @@ extern struct page *empty_zero_page;
|
||||
|
||||
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
|
||||
|
||||
#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
|
||||
#define pud_write(pud) pmd_write(__pmd(pud_val(pud)))
|
||||
|
||||
#define pmd_none(pmd) (!pmd_val(pmd))
|
||||
|
||||
static inline pte_t *pmd_page_vaddr(pmd_t pmd)
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <asm/shmparam.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
@ -316,8 +316,6 @@ void __init mem_init(void)
|
||||
|
||||
free_highpages();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
/*
|
||||
* Check boundaries twice: Some fundamental inconsistencies can
|
||||
* be detected at build time already.
|
||||
|
@ -67,6 +67,7 @@ config ARM64
|
||||
select ARCH_KEEP_MEMBLOCK
|
||||
select ARCH_USE_CMPXCHG_LOCKREF
|
||||
select ARCH_USE_GNU_PROPERTY
|
||||
select ARCH_USE_MEMTEST
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_USE_SYM_ANNOTATIONS
|
||||
|
@ -250,8 +250,8 @@ static inline const void *__tag_set(const void *addr, u8 tag)
|
||||
#define arch_init_tags(max_tag) mte_init_tags(max_tag)
|
||||
#define arch_get_random_tag() mte_get_random_tag()
|
||||
#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
|
||||
#define arch_set_mem_tag_range(addr, size, tag) \
|
||||
mte_set_mem_tag_range((addr), (size), (tag))
|
||||
#define arch_set_mem_tag_range(addr, size, tag, init) \
|
||||
mte_set_mem_tag_range((addr), (size), (tag), (init))
|
||||
#endif /* CONFIG_KASAN_HW_TAGS */
|
||||
|
||||
/*
|
||||
|
@ -53,7 +53,8 @@ static inline u8 mte_get_random_tag(void)
|
||||
* Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
|
||||
* size must be non-zero and MTE_GRANULE_SIZE aligned.
|
||||
*/
|
||||
static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
|
||||
static inline void mte_set_mem_tag_range(void *addr, size_t size,
|
||||
u8 tag, bool init)
|
||||
{
|
||||
u64 curr, end;
|
||||
|
||||
@ -63,18 +64,27 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
|
||||
curr = (u64)__tag_set(addr, tag);
|
||||
end = curr + size;
|
||||
|
||||
do {
|
||||
/*
|
||||
* 'asm volatile' is required to prevent the compiler to move
|
||||
* the statement outside of the loop.
|
||||
*/
|
||||
asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
|
||||
:
|
||||
: "r" (curr)
|
||||
: "memory");
|
||||
|
||||
curr += MTE_GRANULE_SIZE;
|
||||
} while (curr != end);
|
||||
/*
|
||||
* 'asm volatile' is required to prevent the compiler to move
|
||||
* the statement outside of the loop.
|
||||
*/
|
||||
if (init) {
|
||||
do {
|
||||
asm volatile(__MTE_PREAMBLE "stzg %0, [%0]"
|
||||
:
|
||||
: "r" (curr)
|
||||
: "memory");
|
||||
curr += MTE_GRANULE_SIZE;
|
||||
} while (curr != end);
|
||||
} else {
|
||||
do {
|
||||
asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
|
||||
:
|
||||
: "r" (curr)
|
||||
: "memory");
|
||||
curr += MTE_GRANULE_SIZE;
|
||||
} while (curr != end);
|
||||
}
|
||||
}
|
||||
|
||||
void mte_enable_kernel_sync(void);
|
||||
@ -101,7 +111,8 @@ static inline u8 mte_get_random_tag(void)
|
||||
return 0xFF;
|
||||
}
|
||||
|
||||
static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
|
||||
static inline void mte_set_mem_tag_range(void *addr, size_t size,
|
||||
u8 tag, bool init)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,28 @@
|
||||
#ifndef _ASM_ARM64_VMALLOC_H
|
||||
#define _ASM_ARM64_VMALLOC_H
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
|
||||
|
||||
#define arch_vmap_pud_supported arch_vmap_pud_supported
|
||||
static inline bool arch_vmap_pud_supported(pgprot_t prot)
|
||||
{
|
||||
/*
|
||||
* Only 4k granule supports level 1 block mappings.
|
||||
* SW table walks can't handle removal of intermediate entries.
|
||||
*/
|
||||
return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
|
||||
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
|
||||
}
|
||||
|
||||
#define arch_vmap_pmd_supported arch_vmap_pmd_supported
|
||||
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
|
||||
{
|
||||
/* See arch_vmap_pud_supported() */
|
||||
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_ARM64_VMALLOC_H */
|
||||
|
@ -491,8 +491,6 @@ void __init mem_init(void)
|
||||
/* this will put all unused low memory onto the freelists */
|
||||
memblock_free_all();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
/*
|
||||
* Check boundaries twice: Some fundamental inconsistencies can be
|
||||
* detected at build time already.
|
||||
@ -521,7 +519,7 @@ void free_initmem(void)
|
||||
* prevents the region from being reused for kernel modules, which
|
||||
* is not supported by kallsyms.
|
||||
*/
|
||||
unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
|
||||
vunmap_range((u64)__init_begin, (u64)__init_end);
|
||||
}
|
||||
|
||||
void dump_mem_limit(void)
|
||||
|
@ -1339,27 +1339,6 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
|
||||
return dt_virt;
|
||||
}
|
||||
|
||||
int __init arch_ioremap_p4d_supported(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __init arch_ioremap_pud_supported(void)
|
||||
{
|
||||
/*
|
||||
* Only 4k granule supports level 1 block mappings.
|
||||
* SW table walks can't handle removal of intermediate entries.
|
||||
*/
|
||||
return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
|
||||
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
|
||||
}
|
||||
|
||||
int __init arch_ioremap_pmd_supported(void)
|
||||
{
|
||||
/* See arch_ioremap_pud_supported() */
|
||||
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
|
||||
}
|
||||
|
||||
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
|
||||
{
|
||||
pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
|
||||
@ -1451,11 +1430,6 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
|
||||
return 1;
|
||||
}
|
||||
|
||||
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
|
||||
{
|
||||
return 0; /* Don't attempt a block mapping */
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
|
||||
{
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <asm/page.h>
|
||||
|
@ -107,7 +107,6 @@ void __init mem_init(void)
|
||||
free_highmem_page(page);
|
||||
}
|
||||
#endif
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
||||
void free_initmem(void)
|
||||
|
@ -98,6 +98,4 @@ void __init mem_init(void)
|
||||
|
||||
/* this will put all low memory onto the freelists */
|
||||
memblock_free_all();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
@ -55,7 +55,6 @@ void __init mem_init(void)
|
||||
{
|
||||
/* No idea where this is actually declared. Seems to evade LXR. */
|
||||
memblock_free_all();
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
/*
|
||||
* To-Do: someone somewhere should wipe out the bootmem map
|
||||
|
@ -286,15 +286,6 @@ config FORCE_CPEI_RETARGET
|
||||
config ARCH_SELECT_MEMORY_MODEL
|
||||
def_bool y
|
||||
|
||||
config ARCH_DISCONTIGMEM_ENABLE
|
||||
def_bool y
|
||||
depends on BROKEN
|
||||
help
|
||||
Say Y to support efficient handling of discontiguous physical memory,
|
||||
for architectures which are either NUMA (Non-Uniform Memory Access)
|
||||
or have huge holes in the physical address space for other reasons.
|
||||
See <file:Documentation/vm/numa.rst> for more.
|
||||
|
||||
config ARCH_FLATMEM_ENABLE
|
||||
def_bool y
|
||||
|
||||
@ -325,22 +316,8 @@ config NODES_SHIFT
|
||||
MAX_NUMNODES will be 2^(This value).
|
||||
If in doubt, use the default.
|
||||
|
||||
# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
|
||||
# VIRTUAL_MEM_MAP has been retained for historical reasons.
|
||||
config VIRTUAL_MEM_MAP
|
||||
bool "Virtual mem map"
|
||||
depends on !SPARSEMEM && !FLATMEM
|
||||
default y
|
||||
help
|
||||
Say Y to compile the kernel with support for a virtual mem map.
|
||||
This code also only takes effect if a memory hole of greater than
|
||||
1 Gb is found during boot. You must turn this option on if you
|
||||
require the DISCONTIGMEM option for your machine. If you are
|
||||
unsure, say Y.
|
||||
|
||||
config HOLES_IN_ZONE
|
||||
bool
|
||||
default y if VIRTUAL_MEM_MAP
|
||||
|
||||
config HAVE_ARCH_NODEDATA_EXTENSION
|
||||
def_bool y
|
||||
|
@ -9,7 +9,6 @@ CONFIG_SGI_PARTITION=y
|
||||
CONFIG_SMP=y
|
||||
CONFIG_NR_CPUS=2
|
||||
CONFIG_PREEMPT=y
|
||||
# CONFIG_VIRTUAL_MEM_MAP is not set
|
||||
CONFIG_IA64_PALINFO=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_BINFMT_MISC=m
|
||||
|
@ -58,15 +58,4 @@ extern int reserve_elfcorehdr(u64 *start, u64 *end);
|
||||
|
||||
extern int register_active_ranges(u64 start, u64 len, int nid);
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
extern unsigned long VMALLOC_END;
|
||||
extern struct page *vmem_map;
|
||||
extern int create_mem_map_page_table(u64 start, u64 end, void *arg);
|
||||
extern int vmemmap_find_next_valid_pfn(int, int);
|
||||
#else
|
||||
static inline int vmemmap_find_next_valid_pfn(int node, int i)
|
||||
{
|
||||
return i + 1;
|
||||
}
|
||||
#endif
|
||||
#endif /* meminit_h */
|
||||
|
@ -14,16 +14,20 @@
|
||||
struct elf64_shdr; /* forward declration */
|
||||
|
||||
struct mod_arch_specific {
|
||||
/* Used only at module load time. */
|
||||
struct elf64_shdr *core_plt; /* core PLT section */
|
||||
struct elf64_shdr *init_plt; /* init PLT section */
|
||||
struct elf64_shdr *got; /* global offset table */
|
||||
struct elf64_shdr *opd; /* official procedure descriptors */
|
||||
struct elf64_shdr *unwind; /* unwind-table section */
|
||||
unsigned long gp; /* global-pointer for module */
|
||||
unsigned int next_got_entry; /* index of next available got entry */
|
||||
|
||||
/* Used at module run and cleanup time. */
|
||||
void *core_unw_table; /* core unwind-table cookie returned by unwinder */
|
||||
void *init_unw_table; /* init unwind-table cookie returned by unwinder */
|
||||
unsigned int next_got_entry; /* index of next available got entry */
|
||||
void *opd_addr; /* symbolize uses .opd to get to actual function */
|
||||
unsigned long opd_size;
|
||||
};
|
||||
|
||||
#define ARCH_SHF_SMALL SHF_IA_64_SHORT
|
||||
|
@ -95,31 +95,10 @@ do { \
|
||||
|
||||
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
extern int ia64_pfn_valid (unsigned long pfn);
|
||||
#else
|
||||
# define ia64_pfn_valid(pfn) 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
extern struct page *vmem_map;
|
||||
#ifdef CONFIG_DISCONTIGMEM
|
||||
# define page_to_pfn(page) ((unsigned long) (page - vmem_map))
|
||||
# define pfn_to_page(pfn) (vmem_map + (pfn))
|
||||
# define __pfn_to_phys(pfn) PFN_PHYS(pfn)
|
||||
#else
|
||||
# include <asm-generic/memory_model.h>
|
||||
#endif
|
||||
#else
|
||||
# include <asm-generic/memory_model.h>
|
||||
#endif
|
||||
#include <asm-generic/memory_model.h>
|
||||
|
||||
#ifdef CONFIG_FLATMEM
|
||||
# define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
|
||||
#elif defined(CONFIG_DISCONTIGMEM)
|
||||
extern unsigned long min_low_pfn;
|
||||
extern unsigned long max_low_pfn;
|
||||
# define pfn_valid(pfn) (((pfn) >= min_low_pfn) && ((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
|
||||
# define pfn_valid(pfn) ((pfn) < max_mapnr)
|
||||
#endif
|
||||
|
||||
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
|
||||
|
@ -223,10 +223,6 @@ ia64_phys_addr_valid (unsigned long addr)
|
||||
|
||||
|
||||
#define VMALLOC_START (RGN_BASE(RGN_GATE) + 0x200000000UL)
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
# define VMALLOC_END_INIT (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)))
|
||||
extern unsigned long VMALLOC_END;
|
||||
#else
|
||||
#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
/* SPARSEMEM_VMEMMAP uses half of vmalloc... */
|
||||
# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 10)))
|
||||
@ -234,7 +230,6 @@ extern unsigned long VMALLOC_END;
|
||||
#else
|
||||
# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* fs/proc/kcore.c */
|
||||
#define kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE))
|
||||
@ -328,7 +323,7 @@ extern void __ia64_sync_icache_dcache(pte_t pteval);
|
||||
static inline void set_pte(pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
/* page is present && page is user && page is executable
|
||||
* && (page swapin or new page or page migraton
|
||||
* && (page swapin or new page or page migration
|
||||
* || copy_on_write with page copying.)
|
||||
*/
|
||||
if (pte_present_exec_user(pteval) &&
|
||||
|
@ -9,7 +9,7 @@ endif
|
||||
|
||||
extra-y := head.o vmlinux.lds
|
||||
|
||||
obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
|
||||
obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o irq.o irq_ia64.o \
|
||||
irq_lsapic.o ivt.o pal.o patch.o process.o ptrace.o sal.o \
|
||||
salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
|
||||
unwind.o mca.o mca_asm.o topology.o dma-mapping.o iosapic.o acpi.o \
|
||||
|
@ -446,7 +446,8 @@ void __init acpi_numa_fixup(void)
|
||||
if (srat_num_cpus == 0) {
|
||||
node_set_online(0);
|
||||
node_cpuid[0].phys_id = hard_smp_processor_id();
|
||||
return;
|
||||
slit_distance(0, 0) = LOCAL_DISTANCE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -489,7 +490,7 @@ void __init acpi_numa_fixup(void)
|
||||
for (j = 0; j < MAX_NUMNODES; j++)
|
||||
slit_distance(i, j) = i == j ?
|
||||
LOCAL_DISTANCE : REMOTE_DISTANCE;
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(numa_slit, -1, sizeof(numa_slit));
|
||||
@ -514,6 +515,8 @@ void __init acpi_numa_fixup(void)
|
||||
printk("\n");
|
||||
}
|
||||
#endif
|
||||
out:
|
||||
node_possible_map = node_online_map;
|
||||
}
|
||||
#endif /* CONFIG_ACPI_NUMA */
|
||||
|
||||
|
@ -415,10 +415,10 @@ efi_get_pal_addr (void)
|
||||
mask = ~((1 << IA64_GRANULE_SHIFT) - 1);
|
||||
|
||||
printk(KERN_INFO "CPU %d: mapping PAL code "
|
||||
"[0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
|
||||
smp_processor_id(), md->phys_addr,
|
||||
md->phys_addr + efi_md_size(md),
|
||||
vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
|
||||
"[0x%llx-0x%llx) into [0x%llx-0x%llx)\n",
|
||||
smp_processor_id(), md->phys_addr,
|
||||
md->phys_addr + efi_md_size(md),
|
||||
vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
|
||||
#endif
|
||||
return __va(md->phys_addr);
|
||||
}
|
||||
@ -560,6 +560,7 @@ efi_init (void)
|
||||
{
|
||||
efi_memory_desc_t *md;
|
||||
void *p;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0, p = efi_map_start; p < efi_map_end;
|
||||
++i, p += efi_desc_size)
|
||||
@ -586,7 +587,7 @@ efi_init (void)
|
||||
}
|
||||
|
||||
printk("mem%02d: %s "
|
||||
"range=[0x%016lx-0x%016lx) (%4lu%s)\n",
|
||||
"range=[0x%016llx-0x%016llx) (%4lu%s)\n",
|
||||
i, efi_md_typeattr_format(buf, sizeof(buf), md),
|
||||
md->phys_addr,
|
||||
md->phys_addr + efi_md_size(md), size, unit);
|
||||
|
@ -172,7 +172,7 @@ ENTRY(fsys_gettimeofday)
|
||||
// r25 = itc_lastcycle value
|
||||
// r26 = address clocksource cycle_last
|
||||
// r27 = (not used)
|
||||
// r28 = sequence number at the beginning of critcal section
|
||||
// r28 = sequence number at the beginning of critical section
|
||||
// r29 = address of itc_jitter
|
||||
// r30 = time processing flags / memory address
|
||||
// r31 = pointer to result
|
||||
@ -432,7 +432,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
|
||||
* - r29: psr
|
||||
*
|
||||
* We used to clear some PSR bits here but that requires slow
|
||||
* serialization. Fortuntely, that isn't really necessary.
|
||||
* serialization. Fortunately, that isn't really necessary.
|
||||
* The rationale is as follows: we used to clear bits
|
||||
* ~PSR_PRESERVED_BITS in PSR.L. Since
|
||||
* PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
|
||||
|
@ -33,7 +33,6 @@
|
||||
#include <asm/mca_asm.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <asm/export.h>
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
@ -405,11 +404,6 @@ start_ap:
|
||||
|
||||
// This is executed by the bootstrap processor (bsp) only:
|
||||
|
||||
#ifdef CONFIG_IA64_FW_EMU
|
||||
// initialize PAL & SAL emulator:
|
||||
br.call.sptk.many rp=sys_fw_init
|
||||
.ret1:
|
||||
#endif
|
||||
br.call.sptk.many rp=start_kernel
|
||||
.ret2: addl r3=@ltoff(halt_msg),gp
|
||||
;;
|
||||
|
@ -1,12 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Architecture-specific kernel symbols
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_VIRTUAL_MEM_MAP) || defined(CONFIG_DISCONTIGMEM)
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/memblock.h>
|
||||
EXPORT_SYMBOL(min_low_pfn); /* defined by bootmem.c, but not exported by generic code */
|
||||
EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */
|
||||
#endif
|
@ -143,7 +143,7 @@ void machine_kexec(struct kimage *image)
|
||||
|
||||
void arch_crash_save_vmcoreinfo(void)
|
||||
{
|
||||
#if defined(CONFIG_DISCONTIGMEM) || defined(CONFIG_SPARSEMEM)
|
||||
#if defined(CONFIG_SPARSEMEM)
|
||||
VMCOREINFO_SYMBOL(pgdat_list);
|
||||
VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES);
|
||||
#endif
|
||||
|
@ -109,9 +109,9 @@
|
||||
#include "irq.h"
|
||||
|
||||
#if defined(IA64_MCA_DEBUG_INFO)
|
||||
# define IA64_MCA_DEBUG(fmt...) printk(fmt)
|
||||
# define IA64_MCA_DEBUG(fmt...) printk(fmt)
|
||||
#else
|
||||
# define IA64_MCA_DEBUG(fmt...)
|
||||
# define IA64_MCA_DEBUG(fmt...) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define NOTIFY_INIT(event, regs, arg, spin) \
|
||||
|
@ -905,9 +905,31 @@ register_unwind_table (struct module *mod)
|
||||
int
|
||||
module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
|
||||
{
|
||||
struct mod_arch_specific *mas = &mod->arch;
|
||||
|
||||
DEBUGP("%s: init: entry=%p\n", __func__, mod->init);
|
||||
if (mod->arch.unwind)
|
||||
if (mas->unwind)
|
||||
register_unwind_table(mod);
|
||||
|
||||
/*
|
||||
* ".opd" was already relocated to the final destination. Store
|
||||
* it's address for use in symbolizer.
|
||||
*/
|
||||
mas->opd_addr = (void *)mas->opd->sh_addr;
|
||||
mas->opd_size = mas->opd->sh_size;
|
||||
|
||||
/*
|
||||
* Module relocation was already done at this point. Section
|
||||
* headers are about to be deleted. Wipe out load-time context.
|
||||
*/
|
||||
mas->core_plt = NULL;
|
||||
mas->init_plt = NULL;
|
||||
mas->got = NULL;
|
||||
mas->opd = NULL;
|
||||
mas->unwind = NULL;
|
||||
mas->gp = 0;
|
||||
mas->next_got_entry = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -926,10 +948,9 @@ module_arch_cleanup (struct module *mod)
|
||||
|
||||
void *dereference_module_function_descriptor(struct module *mod, void *ptr)
|
||||
{
|
||||
Elf64_Shdr *opd = mod->arch.opd;
|
||||
struct mod_arch_specific *mas = &mod->arch;
|
||||
|
||||
if (ptr < (void *)opd->sh_addr ||
|
||||
ptr >= (void *)(opd->sh_addr + opd->sh_size))
|
||||
if (ptr < mas->opd_addr || ptr >= mas->opd_addr + mas->opd_size)
|
||||
return ptr;
|
||||
|
||||
return dereference_function_descriptor(ptr);
|
||||
|
@ -86,7 +86,7 @@ GLOBAL_ENTRY(ia64_pal_call_static)
|
||||
mov ar.pfs = loc1
|
||||
mov rp = loc0
|
||||
;;
|
||||
srlz.d // seralize restoration of psr.l
|
||||
srlz.d // serialize restoration of psr.l
|
||||
br.ret.sptk.many b0
|
||||
END(ia64_pal_call_static)
|
||||
EXPORT_SYMBOL(ia64_pal_call_static)
|
||||
@ -194,7 +194,7 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static)
|
||||
mov rp = loc0
|
||||
;;
|
||||
mov ar.rsc=loc4 // restore RSE configuration
|
||||
srlz.d // seralize restoration of psr.l
|
||||
srlz.d // serialize restoration of psr.l
|
||||
br.ret.sptk.many b0
|
||||
END(ia64_pal_call_phys_static)
|
||||
EXPORT_SYMBOL(ia64_pal_call_phys_static)
|
||||
@ -252,7 +252,7 @@ GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
|
||||
mov rp = loc0
|
||||
;;
|
||||
mov ar.rsc=loc4 // restore RSE configuration
|
||||
srlz.d // seralize restoration of psr.l
|
||||
srlz.d // serialize restoration of psr.l
|
||||
br.ret.sptk.many b0
|
||||
END(ia64_pal_call_phys_stacked)
|
||||
EXPORT_SYMBOL(ia64_pal_call_phys_stacked)
|
||||
|
@ -7,6 +7,5 @@ obj-y := init.o fault.o tlb.o extable.o ioremap.o
|
||||
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
obj-$(CONFIG_NUMA) += numa.o
|
||||
obj-$(CONFIG_DISCONTIGMEM) += discontig.o
|
||||
obj-$(CONFIG_SPARSEMEM) += discontig.o
|
||||
obj-$(CONFIG_FLATMEM) += contig.o
|
||||
|
@ -153,11 +153,7 @@ find_memory (void)
|
||||
efi_memmap_walk(find_max_min_low_pfn, NULL);
|
||||
max_pfn = max_low_pfn;
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
efi_memmap_walk(filter_memory, register_active_ranges);
|
||||
#else
|
||||
memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
|
||||
#endif
|
||||
|
||||
find_initrd();
|
||||
|
||||
|
@ -585,25 +585,6 @@ void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
|
||||
}
|
||||
}
|
||||
|
||||
static void __init virtual_map_init(void)
|
||||
{
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
int node;
|
||||
|
||||
VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
|
||||
sizeof(struct page));
|
||||
vmem_map = (struct page *) VMALLOC_END;
|
||||
efi_memmap_walk(create_mem_map_page_table, NULL);
|
||||
printk("Virtual mem_map starts at 0x%p\n", vmem_map);
|
||||
|
||||
for_each_online_node(node) {
|
||||
unsigned long pfn_offset = mem_data[node].min_pfn;
|
||||
|
||||
NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* paging_init - setup page tables
|
||||
*
|
||||
@ -619,8 +600,6 @@ void __init paging_init(void)
|
||||
|
||||
sparse_init();
|
||||
|
||||
virtual_map_init();
|
||||
|
||||
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
|
||||
max_zone_pfns[ZONE_DMA32] = max_dma;
|
||||
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
|
||||
|
@ -84,18 +84,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
|
||||
if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
/*
|
||||
* If fault is in region 5 and we are in the kernel, we may already
|
||||
* have the mmap_lock (pfn_valid macro is called during mmap). There
|
||||
* is no vma for region 5 addr's anyway, so skip getting the semaphore
|
||||
* and go directly to the exception handling code.
|
||||
*/
|
||||
|
||||
if ((REGION_NUMBER(address) == 5) && !user_mode(regs))
|
||||
goto bad_area_no_up;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is to handle the kprobes on user space access instructions
|
||||
*/
|
||||
@ -213,9 +201,6 @@ retry:
|
||||
|
||||
bad_area:
|
||||
mmap_read_unlock(mm);
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
bad_area_no_up:
|
||||
#endif
|
||||
if ((isr & IA64_ISR_SP)
|
||||
|| ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
|
||||
{
|
||||
|
@ -43,13 +43,6 @@ extern void ia64_tlb_init (void);
|
||||
|
||||
unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
unsigned long VMALLOC_END = VMALLOC_END_INIT;
|
||||
EXPORT_SYMBOL(VMALLOC_END);
|
||||
struct page *vmem_map;
|
||||
EXPORT_SYMBOL(vmem_map);
|
||||
#endif
|
||||
|
||||
struct page *zero_page_memmap_ptr; /* map entry for zero page */
|
||||
EXPORT_SYMBOL(zero_page_memmap_ptr);
|
||||
|
||||
@ -373,212 +366,6 @@ void ia64_mmu_init(void *my_cpu_data)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
int vmemmap_find_next_valid_pfn(int node, int i)
|
||||
{
|
||||
unsigned long end_address, hole_next_pfn;
|
||||
unsigned long stop_address;
|
||||
pg_data_t *pgdat = NODE_DATA(node);
|
||||
|
||||
end_address = (unsigned long) &vmem_map[pgdat->node_start_pfn + i];
|
||||
end_address = PAGE_ALIGN(end_address);
|
||||
stop_address = (unsigned long) &vmem_map[pgdat_end_pfn(pgdat)];
|
||||
|
||||
do {
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
pgd = pgd_offset_k(end_address);
|
||||
if (pgd_none(*pgd)) {
|
||||
end_address += PGDIR_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
p4d = p4d_offset(pgd, end_address);
|
||||
if (p4d_none(*p4d)) {
|
||||
end_address += P4D_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
pud = pud_offset(p4d, end_address);
|
||||
if (pud_none(*pud)) {
|
||||
end_address += PUD_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
pmd = pmd_offset(pud, end_address);
|
||||
if (pmd_none(*pmd)) {
|
||||
end_address += PMD_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
pte = pte_offset_kernel(pmd, end_address);
|
||||
retry_pte:
|
||||
if (pte_none(*pte)) {
|
||||
end_address += PAGE_SIZE;
|
||||
pte++;
|
||||
if ((end_address < stop_address) &&
|
||||
(end_address != ALIGN(end_address, 1UL << PMD_SHIFT)))
|
||||
goto retry_pte;
|
||||
continue;
|
||||
}
|
||||
/* Found next valid vmem_map page */
|
||||
break;
|
||||
} while (end_address < stop_address);
|
||||
|
||||
end_address = min(end_address, stop_address);
|
||||
end_address = end_address - (unsigned long) vmem_map + sizeof(struct page) - 1;
|
||||
hole_next_pfn = end_address / sizeof(struct page);
|
||||
return hole_next_pfn - pgdat->node_start_pfn;
|
||||
}
|
||||
|
||||
int __init create_mem_map_page_table(u64 start, u64 end, void *arg)
|
||||
{
|
||||
unsigned long address, start_page, end_page;
|
||||
struct page *map_start, *map_end;
|
||||
int node;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
|
||||
map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
|
||||
|
||||
start_page = (unsigned long) map_start & PAGE_MASK;
|
||||
end_page = PAGE_ALIGN((unsigned long) map_end);
|
||||
node = paddr_to_nid(__pa(start));
|
||||
|
||||
for (address = start_page; address < end_page; address += PAGE_SIZE) {
|
||||
pgd = pgd_offset_k(address);
|
||||
if (pgd_none(*pgd)) {
|
||||
p4d = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
|
||||
if (!p4d)
|
||||
goto err_alloc;
|
||||
pgd_populate(&init_mm, pgd, p4d);
|
||||
}
|
||||
p4d = p4d_offset(pgd, address);
|
||||
|
||||
if (p4d_none(*p4d)) {
|
||||
pud = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
|
||||
if (!pud)
|
||||
goto err_alloc;
|
||||
p4d_populate(&init_mm, p4d, pud);
|
||||
}
|
||||
pud = pud_offset(p4d, address);
|
||||
|
||||
if (pud_none(*pud)) {
|
||||
pmd = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
|
||||
if (!pmd)
|
||||
goto err_alloc;
|
||||
pud_populate(&init_mm, pud, pmd);
|
||||
}
|
||||
pmd = pmd_offset(pud, address);
|
||||
|
||||
if (pmd_none(*pmd)) {
|
||||
pte = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
|
||||
if (!pte)
|
||||
goto err_alloc;
|
||||
pmd_populate_kernel(&init_mm, pmd, pte);
|
||||
}
|
||||
pte = pte_offset_kernel(pmd, address);
|
||||
|
||||
if (pte_none(*pte)) {
|
||||
void *page = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE,
|
||||
node);
|
||||
if (!page)
|
||||
goto err_alloc;
|
||||
set_pte(pte, pfn_pte(__pa(page) >> PAGE_SHIFT,
|
||||
PAGE_KERNEL));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
err_alloc:
|
||||
panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d\n",
|
||||
__func__, PAGE_SIZE, PAGE_SIZE, node);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
struct memmap_init_callback_data {
|
||||
struct page *start;
|
||||
struct page *end;
|
||||
int nid;
|
||||
unsigned long zone;
|
||||
};
|
||||
|
||||
static int __meminit
|
||||
virtual_memmap_init(u64 start, u64 end, void *arg)
|
||||
{
|
||||
struct memmap_init_callback_data *args;
|
||||
struct page *map_start, *map_end;
|
||||
|
||||
args = (struct memmap_init_callback_data *) arg;
|
||||
map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
|
||||
map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
|
||||
|
||||
if (map_start < args->start)
|
||||
map_start = args->start;
|
||||
if (map_end > args->end)
|
||||
map_end = args->end;
|
||||
|
||||
/*
|
||||
* We have to initialize "out of bounds" struct page elements that fit completely
|
||||
* on the same pages that were allocated for the "in bounds" elements because they
|
||||
* may be referenced later (and found to be "reserved").
|
||||
*/
|
||||
map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page);
|
||||
map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end)
|
||||
/ sizeof(struct page));
|
||||
|
||||
if (map_start < map_end)
|
||||
memmap_init_range((unsigned long)(map_end - map_start),
|
||||
args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
|
||||
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __meminit memmap_init_zone(struct zone *zone)
|
||||
{
|
||||
int nid = zone_to_nid(zone), zone_id = zone_idx(zone);
|
||||
unsigned long start_pfn = zone->zone_start_pfn;
|
||||
unsigned long size = zone->spanned_pages;
|
||||
|
||||
if (!vmem_map) {
|
||||
memmap_init_range(size, nid, zone_id, start_pfn, start_pfn + size,
|
||||
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
|
||||
} else {
|
||||
struct page *start;
|
||||
struct memmap_init_callback_data args;
|
||||
|
||||
start = pfn_to_page(start_pfn);
|
||||
args.start = start;
|
||||
args.end = start + size;
|
||||
args.nid = nid;
|
||||
args.zone = zone_id;
|
||||
|
||||
efi_memmap_walk(virtual_memmap_init, &args);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
ia64_pfn_valid (unsigned long pfn)
|
||||
{
|
||||
char byte;
|
||||
struct page *pg = pfn_to_page(pfn);
|
||||
|
||||
return (__get_user(byte, (char __user *) pg) == 0)
|
||||
&& ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
|
||||
|| (__get_user(byte, (char __user *) (pg + 1) - 1) == 0));
|
||||
}
|
||||
EXPORT_SYMBOL(ia64_pfn_valid);
|
||||
|
||||
#endif /* CONFIG_VIRTUAL_MEM_MAP */
|
||||
|
||||
int __init register_active_ranges(u64 start, u64 len, int nid)
|
||||
{
|
||||
u64 end = start + len;
|
||||
@ -644,13 +431,16 @@ mem_init (void)
|
||||
* _before_ any drivers that may need the PCI DMA interface are
|
||||
* initialized or bootmem has been freed.
|
||||
*/
|
||||
do {
|
||||
#ifdef CONFIG_INTEL_IOMMU
|
||||
detect_intel_iommu();
|
||||
if (!iommu_detected)
|
||||
detect_intel_iommu();
|
||||
if (iommu_detected)
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_SWIOTLB
|
||||
swiotlb_init(1);
|
||||
#endif
|
||||
} while (0);
|
||||
|
||||
#ifdef CONFIG_FLATMEM
|
||||
BUG_ON(!mem_map);
|
||||
@ -659,7 +449,6 @@ mem_init (void)
|
||||
set_max_mapnr(max_low_pfn);
|
||||
high_memory = __va(max_low_pfn * PAGE_SIZE);
|
||||
memblock_free_all();
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
/*
|
||||
* For fsyscall entrpoints with no light-weight handler, use the ordinary
|
||||
|
@ -153,5 +153,4 @@ void __init mem_init(void)
|
||||
/* this will put all memory onto the freelists */
|
||||
memblock_free_all();
|
||||
init_pointer_tables();
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
@ -131,7 +131,6 @@ void __init mem_init(void)
|
||||
highmem_setup();
|
||||
#endif
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
mem_init_done = 1;
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@ config MIPS
|
||||
select ARCH_SUPPORTS_UPROBES
|
||||
select ARCH_USE_BUILTIN_BSWAP
|
||||
select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
|
||||
select ARCH_USE_MEMTEST
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
|
||||
|
@ -178,7 +178,6 @@ void __init mem_init(void)
|
||||
high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
|
||||
memblock_free_all();
|
||||
setup_zero_pages(); /* This comes from node 0 */
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
||||
/* All PCI device belongs to logical Node-0 */
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/processor.h>
|
||||
|
@ -467,7 +467,6 @@ void __init mem_init(void)
|
||||
memblock_free_all();
|
||||
setup_zero_pages(); /* Setup zeroed pages. */
|
||||
mem_init_free_highmem();
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
if ((unsigned long) &_text > (unsigned long) CKSEG0)
|
||||
|
@ -420,5 +420,4 @@ void __init mem_init(void)
|
||||
high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
|
||||
memblock_free_all();
|
||||
setup_zero_pages(); /* This comes from node 0 */
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
@ -191,7 +191,6 @@ void __init mem_init(void)
|
||||
|
||||
/* this will put all low memory onto the freelists */
|
||||
memblock_free_all();
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
pr_info("virtual kernel memory layout:\n"
|
||||
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/cpuinfo.h>
|
||||
|
@ -71,7 +71,6 @@ void __init mem_init(void)
|
||||
|
||||
/* this will put all memory onto the freelists */
|
||||
memblock_free_all();
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
||||
void __init mmu_init(void)
|
||||
|
@ -211,8 +211,6 @@ void __init mem_init(void)
|
||||
/* this will put all low memory onto the freelists */
|
||||
memblock_free_all();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
printk("mem_init_done ...........................................\n");
|
||||
mem_init_done = 1;
|
||||
return;
|
||||
|
@ -573,8 +573,6 @@ void __init mem_init(void)
|
||||
#endif
|
||||
parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START);
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Do not expose the virtual kernel memory layout to userspace.
|
||||
|
@ -151,6 +151,7 @@ config PPC
|
||||
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64
|
||||
select ARCH_USE_BUILTIN_BSWAP
|
||||
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
|
||||
select ARCH_USE_MEMTEST
|
||||
select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
|
@ -1,4 +1,24 @@
|
||||
#ifndef _ASM_POWERPC_VMALLOC_H
|
||||
#define _ASM_POWERPC_VMALLOC_H
|
||||
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
|
||||
|
||||
#define arch_vmap_pud_supported arch_vmap_pud_supported
|
||||
static inline bool arch_vmap_pud_supported(pgprot_t prot)
|
||||
{
|
||||
/* HPT does not cope with large pages in the vmalloc area */
|
||||
return radix_enabled();
|
||||
}
|
||||
|
||||
#define arch_vmap_pmd_supported arch_vmap_pmd_supported
|
||||
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
|
||||
{
|
||||
return radix_enabled();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_POWERPC_VMALLOC_H */
|
||||
|
@ -48,7 +48,7 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size)
|
||||
if (slab_is_available()) {
|
||||
if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
|
||||
pgprot_noncached(PAGE_KERNEL)))
|
||||
unmap_kernel_range(ISA_IO_BASE, size);
|
||||
vunmap_range(ISA_IO_BASE, ISA_IO_BASE + size);
|
||||
} else {
|
||||
early_ioremap_range(ISA_IO_BASE, pa, size,
|
||||
pgprot_noncached(PAGE_KERNEL));
|
||||
@ -311,7 +311,7 @@ static void isa_bridge_remove(void)
|
||||
isa_bridge_pcidev = NULL;
|
||||
|
||||
/* Unmap the ISA area */
|
||||
unmap_kernel_range(ISA_IO_BASE, 0x10000);
|
||||
vunmap_range(ISA_IO_BASE, ISA_IO_BASE + 0x10000);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -140,7 +140,7 @@ void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size)
|
||||
addr = (unsigned long)area->addr;
|
||||
if (ioremap_page_range(addr, addr + size, paddr,
|
||||
pgprot_noncached(PAGE_KERNEL))) {
|
||||
unmap_kernel_range(addr, size);
|
||||
vunmap_range(addr, addr + size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -1082,22 +1082,6 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
|
||||
set_pte_at(mm, addr, ptep, pte);
|
||||
}
|
||||
|
||||
int __init arch_ioremap_pud_supported(void)
|
||||
{
|
||||
/* HPT does not cope with large pages in the vmalloc area */
|
||||
return radix_enabled();
|
||||
}
|
||||
|
||||
int __init arch_ioremap_pmd_supported(void)
|
||||
{
|
||||
return radix_enabled();
|
||||
}
|
||||
|
||||
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
|
||||
{
|
||||
pte_t *ptep = (pte_t *)pud;
|
||||
@ -1181,8 +1165,3 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int __init arch_ioremap_p4d_supported(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -93,7 +93,7 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
|
||||
if (!ret)
|
||||
return (void __iomem *)area->addr + offset;
|
||||
|
||||
unmap_kernel_range(va, size);
|
||||
vunmap_range(va, va + size);
|
||||
free_vm_area(area);
|
||||
|
||||
return NULL;
|
||||
|
@ -282,7 +282,6 @@ void __init mem_init(void)
|
||||
(mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
|
||||
#endif
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
#ifdef CONFIG_PPC32
|
||||
pr_info("Kernel virtual memory layout:\n");
|
||||
#ifdef CONFIG_KASAN
|
||||
|
@ -990,16 +990,12 @@ EXPORT_SYMBOL_GPL(is_xive_irq);
|
||||
void xive_cleanup_irq_data(struct xive_irq_data *xd)
|
||||
{
|
||||
if (xd->eoi_mmio) {
|
||||
unmap_kernel_range((unsigned long)xd->eoi_mmio,
|
||||
1u << xd->esb_shift);
|
||||
iounmap(xd->eoi_mmio);
|
||||
if (xd->eoi_mmio == xd->trig_mmio)
|
||||
xd->trig_mmio = NULL;
|
||||
xd->eoi_mmio = NULL;
|
||||
}
|
||||
if (xd->trig_mmio) {
|
||||
unmap_kernel_range((unsigned long)xd->trig_mmio,
|
||||
1u << xd->esb_shift);
|
||||
iounmap(xd->trig_mmio);
|
||||
xd->trig_mmio = NULL;
|
||||
}
|
||||
|
@ -102,7 +102,6 @@ void __init mem_init(void)
|
||||
high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
|
||||
memblock_free_all();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
print_vm_layout();
|
||||
}
|
||||
|
||||
|
@ -209,8 +209,6 @@ void __init mem_init(void)
|
||||
setup_zero_pages(); /* Setup zeroed pages. */
|
||||
|
||||
cmma_init_nodat();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
||||
void free_initmem(void)
|
||||
|
@ -4,12 +4,11 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/pagemap.h>
|
||||
#include <asm-generic/tlb.h>
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
#include <linux/swap.h>
|
||||
|
||||
#include <asm-generic/tlb.h>
|
||||
|
||||
#if defined(CONFIG_CPU_SH4)
|
||||
extern void tlb_wire_entry(struct vm_area_struct *, unsigned long, pte_t);
|
||||
extern void tlb_unwire_entry(void);
|
||||
@ -24,12 +23,7 @@ static inline void tlb_unwire_entry(void)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_MMU */
|
||||
|
||||
#include <asm-generic/tlb.h>
|
||||
|
||||
#endif /* CONFIG_CPU_SH4 */
|
||||
#endif /* CONFIG_MMU */
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* __ASM_SH_TLB_H */
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/cache_insns.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/mman.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/threads.h>
|
||||
#include <asm/addrspace.h>
|
||||
#include <asm/page.h>
|
||||
|
@ -359,7 +359,6 @@ void __init mem_init(void)
|
||||
|
||||
vsyscall_init();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
pr_info("virtual kernel memory layout:\n"
|
||||
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
||||
" vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
|
||||
|
@ -321,6 +321,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
||||
pgprot_val(newprot));
|
||||
}
|
||||
|
||||
/* only used by the huge vmap code, should never be called */
|
||||
#define pud_page(pud) NULL
|
||||
|
||||
struct seq_file;
|
||||
void mmu_info(struct seq_file *m);
|
||||
|
||||
|
@ -292,8 +292,6 @@ void __init mem_init(void)
|
||||
|
||||
map_high_region(start_pfn, end_pfn);
|
||||
}
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
||||
void sparc_flush_page_to_ram(struct page *page)
|
||||
|
@ -2520,7 +2520,6 @@ void __init mem_init(void)
|
||||
}
|
||||
mark_page_reserved(mem_map_zero);
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
if (tlb_type == cheetah || tlb_type == cheetah_plus)
|
||||
cheetah_ecache_flush_init();
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
@ -54,7 +54,6 @@ void __init mem_init(void)
|
||||
memblock_free_all();
|
||||
max_low_pfn = totalram_pages();
|
||||
max_pfn = max_low_pfn;
|
||||
mem_init_print_info(NULL);
|
||||
kmalloc_ok = 1;
|
||||
}
|
||||
|
||||
|
@ -100,6 +100,7 @@ config X86
|
||||
select ARCH_SUPPORTS_LTO_CLANG if X86_64
|
||||
select ARCH_SUPPORTS_LTO_CLANG_THIN if X86_64
|
||||
select ARCH_USE_BUILTIN_BSWAP
|
||||
select ARCH_USE_MEMTEST
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_USE_SYM_ANNOTATIONS
|
||||
|
@ -1,6 +1,26 @@
|
||||
#ifndef _ASM_X86_VMALLOC_H
|
||||
#define _ASM_X86_VMALLOC_H
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable_areas.h>
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define arch_vmap_pud_supported arch_vmap_pud_supported
|
||||
static inline bool arch_vmap_pud_supported(pgprot_t prot)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_GBPAGES);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define arch_vmap_pmd_supported arch_vmap_pmd_supported
|
||||
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_PSE);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_VMALLOC_H */
|
||||
|
@ -1458,7 +1458,7 @@ static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pseudo_lock_dev_mremap(struct vm_area_struct *area, unsigned long flags)
|
||||
static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
|
||||
{
|
||||
/* Not supported */
|
||||
return -EINVAL;
|
||||
|
@ -755,8 +755,6 @@ void __init mem_init(void)
|
||||
after_bootmem = 1;
|
||||
x86_init.hyper.init_after_bootmem();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
/*
|
||||
* Check boundaries twice: Some fundamental inconsistencies can
|
||||
* be detected at build time already.
|
||||
|
@ -826,6 +826,106 @@ void __init paging_init(void)
|
||||
zone_sizes_init();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
#define PAGE_UNUSED 0xFD
|
||||
|
||||
/*
|
||||
* The unused vmemmap range, which was not yet memset(PAGE_UNUSED), ranges
|
||||
* from unused_pmd_start to next PMD_SIZE boundary.
|
||||
*/
|
||||
static unsigned long unused_pmd_start __meminitdata;
|
||||
|
||||
static void __meminit vmemmap_flush_unused_pmd(void)
|
||||
{
|
||||
if (!unused_pmd_start)
|
||||
return;
|
||||
/*
|
||||
* Clears (unused_pmd_start, PMD_END]
|
||||
*/
|
||||
memset((void *)unused_pmd_start, PAGE_UNUSED,
|
||||
ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
|
||||
unused_pmd_start = 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/* Returns true if the PMD is completely unused and thus it can be freed */
|
||||
static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
|
||||
{
|
||||
unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
|
||||
|
||||
/*
|
||||
* Flush the unused range cache to ensure that memchr_inv() will work
|
||||
* for the whole range.
|
||||
*/
|
||||
vmemmap_flush_unused_pmd();
|
||||
memset((void *)addr, PAGE_UNUSED, end - addr);
|
||||
|
||||
return !memchr_inv((void *)start, PAGE_UNUSED, PMD_SIZE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __meminit __vmemmap_use_sub_pmd(unsigned long start)
|
||||
{
|
||||
/*
|
||||
* As we expect to add in the same granularity as we remove, it's
|
||||
* sufficient to mark only some piece used to block the memmap page from
|
||||
* getting removed when removing some other adjacent memmap (just in
|
||||
* case the first memmap never gets initialized e.g., because the memory
|
||||
* block never gets onlined).
|
||||
*/
|
||||
memset((void *)start, 0, sizeof(struct page));
|
||||
}
|
||||
|
||||
static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
|
||||
{
|
||||
/*
|
||||
* We only optimize if the new used range directly follows the
|
||||
* previously unused range (esp., when populating consecutive sections).
|
||||
*/
|
||||
if (unused_pmd_start == start) {
|
||||
if (likely(IS_ALIGNED(end, PMD_SIZE)))
|
||||
unused_pmd_start = 0;
|
||||
else
|
||||
unused_pmd_start = end;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the range does not contiguously follows previous one, make sure
|
||||
* to mark the unused range of the previous one so it can be removed.
|
||||
*/
|
||||
vmemmap_flush_unused_pmd();
|
||||
__vmemmap_use_sub_pmd(start);
|
||||
}
|
||||
|
||||
|
||||
static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
|
||||
{
|
||||
vmemmap_flush_unused_pmd();
|
||||
|
||||
/*
|
||||
* Could be our memmap page is filled with PAGE_UNUSED already from a
|
||||
* previous remove. Make sure to reset it.
|
||||
*/
|
||||
__vmemmap_use_sub_pmd(start);
|
||||
|
||||
/*
|
||||
* Mark with PAGE_UNUSED the unused parts of the new memmap range
|
||||
*/
|
||||
if (!IS_ALIGNED(start, PMD_SIZE))
|
||||
memset((void *)start, PAGE_UNUSED,
|
||||
start - ALIGN_DOWN(start, PMD_SIZE));
|
||||
|
||||
/*
|
||||
* We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
|
||||
* consecutive sections. Remember for the last added PMD where the
|
||||
* unused range begins.
|
||||
*/
|
||||
if (!IS_ALIGNED(end, PMD_SIZE))
|
||||
unused_pmd_start = end;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Memory hotplug specific functions
|
||||
*/
|
||||
@ -871,8 +971,6 @@ int arch_add_memory(int nid, u64 start, u64 size,
|
||||
return add_pages(nid, start_pfn, nr_pages, params);
|
||||
}
|
||||
|
||||
#define PAGE_INUSE 0xFD
|
||||
|
||||
static void __meminit free_pagetable(struct page *page, int order)
|
||||
{
|
||||
unsigned long magic;
|
||||
@ -962,7 +1060,6 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
|
||||
{
|
||||
unsigned long next, pages = 0;
|
||||
pte_t *pte;
|
||||
void *page_addr;
|
||||
phys_addr_t phys_addr;
|
||||
|
||||
pte = pte_start + pte_index(addr);
|
||||
@ -983,42 +1080,15 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
|
||||
if (phys_addr < (phys_addr_t)0x40000000)
|
||||
return;
|
||||
|
||||
if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
|
||||
/*
|
||||
* Do not free direct mapping pages since they were
|
||||
* freed when offlining, or simply not in use.
|
||||
*/
|
||||
if (!direct)
|
||||
free_pagetable(pte_page(*pte), 0);
|
||||
if (!direct)
|
||||
free_pagetable(pte_page(*pte), 0);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte_clear(&init_mm, addr, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte_clear(&init_mm, addr, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
|
||||
/* For non-direct mapping, pages means nothing. */
|
||||
pages++;
|
||||
} else {
|
||||
/*
|
||||
* If we are here, we are freeing vmemmap pages since
|
||||
* direct mapped memory ranges to be freed are aligned.
|
||||
*
|
||||
* If we are not removing the whole page, it means
|
||||
* other page structs in this page are being used and
|
||||
* we cannot remove them. So fill the unused page_structs
|
||||
* with 0xFD, and remove the page when it is wholly
|
||||
* filled with 0xFD.
|
||||
*/
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pte_page(*pte));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
|
||||
free_pagetable(pte_page(*pte), 0);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte_clear(&init_mm, addr, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
/* For non-direct mapping, pages means nothing. */
|
||||
pages++;
|
||||
}
|
||||
|
||||
/* Call free_pte_table() in remove_pmd_table(). */
|
||||
@ -1034,7 +1104,6 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
|
||||
unsigned long next, pages = 0;
|
||||
pte_t *pte_base;
|
||||
pmd_t *pmd;
|
||||
void *page_addr;
|
||||
|
||||
pmd = pmd_start + pmd_index(addr);
|
||||
for (; addr < end; addr = next, pmd++) {
|
||||
@ -1054,22 +1123,16 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
|
||||
pmd_clear(pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pages++;
|
||||
} else {
|
||||
/* If here, we are freeing vmemmap pages. */
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pmd_page(*pmd));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE,
|
||||
PMD_SIZE)) {
|
||||
}
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
else if (vmemmap_pmd_is_unused(addr, next)) {
|
||||
free_hugepage_table(pmd_page(*pmd),
|
||||
altmap);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_clear(pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1090,7 +1153,6 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
|
||||
unsigned long next, pages = 0;
|
||||
pmd_t *pmd_base;
|
||||
pud_t *pud;
|
||||
void *page_addr;
|
||||
|
||||
pud = pud_start + pud_index(addr);
|
||||
for (; addr < end; addr = next, pud++) {
|
||||
@ -1099,33 +1161,13 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
|
||||
if (!pud_present(*pud))
|
||||
continue;
|
||||
|
||||
if (pud_large(*pud)) {
|
||||
if (IS_ALIGNED(addr, PUD_SIZE) &&
|
||||
IS_ALIGNED(next, PUD_SIZE)) {
|
||||
if (!direct)
|
||||
free_pagetable(pud_page(*pud),
|
||||
get_order(PUD_SIZE));
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pages++;
|
||||
} else {
|
||||
/* If here, we are freeing vmemmap pages. */
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pud_page(*pud));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE,
|
||||
PUD_SIZE)) {
|
||||
free_pagetable(pud_page(*pud),
|
||||
get_order(PUD_SIZE));
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
|
||||
if (pud_large(*pud) &&
|
||||
IS_ALIGNED(addr, PUD_SIZE) &&
|
||||
IS_ALIGNED(next, PUD_SIZE)) {
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pages++;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1197,6 +1239,9 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct,
|
||||
void __ref vmemmap_free(unsigned long start, unsigned long end,
|
||||
struct vmem_altmap *altmap)
|
||||
{
|
||||
VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
|
||||
VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
|
||||
|
||||
remove_pagetable(start, end, false, altmap);
|
||||
}
|
||||
|
||||
@ -1306,8 +1351,6 @@ void __init mem_init(void)
|
||||
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
|
||||
|
||||
preallocate_vmalloc_pages();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
@ -1538,11 +1581,17 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
|
||||
|
||||
addr_end = addr + PMD_SIZE;
|
||||
p_end = p + PMD_SIZE;
|
||||
|
||||
if (!IS_ALIGNED(addr, PMD_SIZE) ||
|
||||
!IS_ALIGNED(next, PMD_SIZE))
|
||||
vmemmap_use_new_sub_pmd(addr, next);
|
||||
|
||||
continue;
|
||||
} else if (altmap)
|
||||
return -ENOMEM; /* no fallback */
|
||||
} else if (pmd_large(*pmd)) {
|
||||
vmemmap_verify((pte_t *)pmd, node, addr, next);
|
||||
vmemmap_use_sub_pmd(addr, next);
|
||||
continue;
|
||||
}
|
||||
if (vmemmap_populate_basepages(addr, next, node, NULL))
|
||||
@ -1556,6 +1605,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
||||
{
|
||||
int err;
|
||||
|
||||
VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
|
||||
VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
|
||||
|
||||
if (end - start < PAGES_PER_SECTION * sizeof(struct page))
|
||||
err = vmemmap_populate_basepages(start, end, node, NULL);
|
||||
else if (boot_cpu_has(X86_FEATURE_PSE))
|
||||
|
@ -481,25 +481,6 @@ void iounmap(volatile void __iomem *addr)
|
||||
}
|
||||
EXPORT_SYMBOL(iounmap);
|
||||
|
||||
int __init arch_ioremap_p4d_supported(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __init arch_ioremap_pud_supported(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return boot_cpu_has(X86_FEATURE_GBPAGES);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int __init arch_ioremap_pmd_supported(void)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_PSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
|
||||
* access
|
||||
|
@ -780,14 +780,6 @@ int pmd_clear_huge(pmd_t *pmd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Until we support 512GB pages, skip them in the vmap area.
|
||||
*/
|
||||
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/**
|
||||
* pud_free_pmd_page - Clear pud entry and free pmd page.
|
||||
@ -861,11 +853,6 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
|
||||
|
||||
#else /* !CONFIG_X86_64 */
|
||||
|
||||
int pud_free_pmd_page(pud_t *pud, unsigned long addr)
|
||||
{
|
||||
return pud_none(*pud);
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable free page handling on x86-PAE. This assures that ioremap()
|
||||
* does not update sync'd pmd entries. See vmalloc_sync_one().
|
||||
|
@ -7,6 +7,7 @@ config XTENSA
|
||||
select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU
|
||||
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if MMU
|
||||
select ARCH_HAS_DMA_SET_UNCACHED if MMU
|
||||
select ARCH_USE_MEMTEST
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_WANT_FRAME_POINTERS
|
||||
|
@ -119,7 +119,6 @@ void __init mem_init(void)
|
||||
|
||||
memblock_free_all();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
pr_info("virtual kernel memory layout:\n"
|
||||
#ifdef CONFIG_KASAN
|
||||
" kasan : 0x%08lx - 0x%08lx (%5lu MB)\n"
|
||||
|
@ -764,6 +764,10 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
/* Root-level stats are sourced from system-wide IO stats */
|
||||
if (!cgroup_parent(css->cgroup))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
@ -786,8 +790,8 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
blkg_iostat_add(&bisc->last, &delta);
|
||||
u64_stats_update_end(&blkg->iostat.sync);
|
||||
|
||||
/* propagate global delta to parent */
|
||||
if (parent) {
|
||||
/* propagate global delta to parent (unless that's root) */
|
||||
if (parent && parent->parent) {
|
||||
u64_stats_update_begin(&parent->iostat.sync);
|
||||
blkg_iostat_set(&delta, &blkg->iostat.cur);
|
||||
blkg_iostat_sub(&delta, &blkg->iostat.last);
|
||||
@ -801,10 +805,11 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* The rstat algorithms intentionally don't handle the root cgroup to avoid
|
||||
* incurring overhead when no cgroups are defined. For that reason,
|
||||
* cgroup_rstat_flush in blkcg_print_stat does not actually fill out the
|
||||
* iostat in the root cgroup's blkcg_gq.
|
||||
* We source root cgroup stats from the system-wide stats to avoid
|
||||
* tracking the same information twice and incurring overhead when no
|
||||
* cgroups are defined. For that reason, cgroup_rstat_flush in
|
||||
* blkcg_print_stat does not actually fill out the iostat in the root
|
||||
* cgroup's blkcg_gq.
|
||||
*
|
||||
* However, we would like to re-use the printing code between the root and
|
||||
* non-root cgroups to the extent possible. For that reason, we simulate
|
||||
|
@ -20,6 +20,7 @@ config DRM_I915
|
||||
select INPUT if ACPI
|
||||
select ACPI_VIDEO if ACPI
|
||||
select ACPI_BUTTON if ACPI
|
||||
select IO_MAPPING
|
||||
select SYNC_FILE
|
||||
select IOSF_MBI
|
||||
select CRC32
|
||||
|
@ -367,11 +367,10 @@ retry:
|
||||
goto err_unpin;
|
||||
|
||||
/* Finally, remap it using the new GTT offset */
|
||||
ret = remap_io_mapping(area,
|
||||
area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
|
||||
(ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
|
||||
min_t(u64, vma->size, area->vm_end - area->vm_start),
|
||||
&ggtt->iomap);
|
||||
ret = io_mapping_map_user(&ggtt->iomap, area, area->vm_start +
|
||||
(vma->ggtt_view.partial.offset << PAGE_SHIFT),
|
||||
(ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
|
||||
min_t(u64, vma->size, area->vm_end - area->vm_start));
|
||||
if (ret)
|
||||
goto err_fence;
|
||||
|
||||
|
@ -1905,9 +1905,6 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *file);
|
||||
|
||||
/* i915_mm.c */
|
||||
int remap_io_mapping(struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long pfn, unsigned long size,
|
||||
struct io_mapping *iomap);
|
||||
int remap_io_sg(struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long size,
|
||||
struct scatterlist *sgl, resource_size_t iobase);
|
||||
|
@ -28,90 +28,10 @@
|
||||
|
||||
#include "i915_drv.h"
|
||||
|
||||
struct remap_pfn {
|
||||
struct mm_struct *mm;
|
||||
unsigned long pfn;
|
||||
pgprot_t prot;
|
||||
|
||||
struct sgt_iter sgt;
|
||||
resource_size_t iobase;
|
||||
};
|
||||
|
||||
static int remap_pfn(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
struct remap_pfn *r = data;
|
||||
|
||||
/* Special PTE are not associated with any struct page */
|
||||
set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot)));
|
||||
r->pfn++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
|
||||
|
||||
#define use_dma(io) ((io) != -1)
|
||||
|
||||
static inline unsigned long sgt_pfn(const struct remap_pfn *r)
|
||||
{
|
||||
if (use_dma(r->iobase))
|
||||
return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT;
|
||||
else
|
||||
return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static int remap_sg(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
struct remap_pfn *r = data;
|
||||
|
||||
if (GEM_WARN_ON(!r->sgt.sgp))
|
||||
return -EINVAL;
|
||||
|
||||
/* Special PTE are not associated with any struct page */
|
||||
set_pte_at(r->mm, addr, pte,
|
||||
pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot)));
|
||||
r->pfn++; /* track insertions in case we need to unwind later */
|
||||
|
||||
r->sgt.curr += PAGE_SIZE;
|
||||
if (r->sgt.curr >= r->sgt.max)
|
||||
r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* remap_io_mapping - remap an IO mapping to userspace
|
||||
* @vma: user vma to map to
|
||||
* @addr: target user address to start at
|
||||
* @pfn: physical address of kernel memory
|
||||
* @size: size of map area
|
||||
* @iomap: the source io_mapping
|
||||
*
|
||||
* Note: this is only safe if the mm semaphore is held when called.
|
||||
*/
|
||||
int remap_io_mapping(struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long pfn, unsigned long size,
|
||||
struct io_mapping *iomap)
|
||||
{
|
||||
struct remap_pfn r;
|
||||
int err;
|
||||
|
||||
#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
|
||||
GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
|
||||
|
||||
/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
|
||||
r.mm = vma->vm_mm;
|
||||
r.pfn = pfn;
|
||||
r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
|
||||
(pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
|
||||
|
||||
err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r);
|
||||
if (unlikely(err)) {
|
||||
zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* remap_io_sg - remap an IO mapping to userspace
|
||||
* @vma: user vma to map to
|
||||
@ -126,12 +46,7 @@ int remap_io_sg(struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long size,
|
||||
struct scatterlist *sgl, resource_size_t iobase)
|
||||
{
|
||||
struct remap_pfn r = {
|
||||
.mm = vma->vm_mm,
|
||||
.prot = vma->vm_page_prot,
|
||||
.sgt = __sgt_iter(sgl, use_dma(iobase)),
|
||||
.iobase = iobase,
|
||||
};
|
||||
unsigned long pfn, len, remapped = 0;
|
||||
int err;
|
||||
|
||||
/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
|
||||
@ -140,11 +55,25 @@ int remap_io_sg(struct vm_area_struct *vma,
|
||||
if (!use_dma(iobase))
|
||||
flush_cache_range(vma, addr, size);
|
||||
|
||||
err = apply_to_page_range(r.mm, addr, size, remap_sg, &r);
|
||||
if (unlikely(err)) {
|
||||
zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT);
|
||||
return err;
|
||||
}
|
||||
do {
|
||||
if (use_dma(iobase)) {
|
||||
if (!sg_dma_len(sgl))
|
||||
break;
|
||||
pfn = (sg_dma_address(sgl) + iobase) >> PAGE_SHIFT;
|
||||
len = sg_dma_len(sgl);
|
||||
} else {
|
||||
pfn = page_to_pfn(sg_page(sgl));
|
||||
len = sgl->length;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err = remap_pfn_range(vma, addr + remapped, pfn, len,
|
||||
vma->vm_page_prot);
|
||||
if (err)
|
||||
break;
|
||||
remapped += len;
|
||||
} while ((sgl = __sg_next(sgl)));
|
||||
|
||||
if (err)
|
||||
zap_vma_ptes(vma, addr, remapped);
|
||||
return err;
|
||||
}
|
||||
|
@ -47,17 +47,17 @@
|
||||
|
||||
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
|
||||
{
|
||||
struct sg_page_iter sg_iter;
|
||||
struct page *page;
|
||||
bool make_dirty = umem->writable && dirty;
|
||||
struct scatterlist *sg;
|
||||
unsigned int i;
|
||||
|
||||
if (umem->nmap > 0)
|
||||
ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
|
||||
DMA_BIDIRECTIONAL);
|
||||
|
||||
for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
|
||||
page = sg_page_iter_page(&sg_iter);
|
||||
unpin_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
|
||||
}
|
||||
for_each_sg(umem->sg_head.sgl, sg, umem->sg_nents, i)
|
||||
unpin_user_page_range_dirty_lock(sg_page(sg),
|
||||
DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
|
||||
|
||||
sg_free_table(&umem->sg_head);
|
||||
}
|
||||
|
@ -4102,7 +4102,7 @@ void pci_unmap_iospace(struct resource *res)
|
||||
#if defined(PCI_IOBASE) && defined(CONFIG_MMU)
|
||||
unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
|
||||
|
||||
unmap_kernel_range(vaddr, resource_size(res));
|
||||
vunmap_range(vaddr, vaddr + resource_size(res));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(pci_unmap_iospace);
|
||||
|
5
fs/aio.c
5
fs/aio.c
@ -323,16 +323,13 @@ static void aio_free_ring(struct kioctx *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
static int aio_ring_mremap(struct vm_area_struct *vma, unsigned long flags)
|
||||
static int aio_ring_mremap(struct vm_area_struct *vma)
|
||||
{
|
||||
struct file *file = vma->vm_file;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct kioctx_table *table;
|
||||
int i, res = -EINVAL;
|
||||
|
||||
if (flags & MREMAP_DONTUNMAP)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&mm->ioctx_lock);
|
||||
rcu_read_lock();
|
||||
table = rcu_dereference(mm->ioctx_table);
|
||||
|
@ -310,7 +310,6 @@ EXPORT_SYMBOL(fs_param_is_path);
|
||||
#ifdef CONFIG_VALIDATE_FS_PARSER
|
||||
/**
|
||||
* validate_constant_table - Validate a constant table
|
||||
* @name: Name to use in reporting
|
||||
* @tbl: The constant table to validate.
|
||||
* @tbl_size: The size of the table.
|
||||
* @low: The lowest permissible value.
|
||||
@ -360,6 +359,7 @@ bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
|
||||
|
||||
/**
|
||||
* fs_validate_description - Validate a parameter description
|
||||
* @name: The parameter name to search for.
|
||||
* @desc: The parameter description to validate.
|
||||
*/
|
||||
bool fs_validate_description(const char *name,
|
||||
|
@ -487,12 +487,28 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
if (pos >= dio->i_size)
|
||||
goto out_free_dio;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (filemap_range_needs_writeback(mapping, pos, end)) {
|
||||
ret = -EAGAIN;
|
||||
goto out_free_dio;
|
||||
}
|
||||
iomap_flags |= IOMAP_NOWAIT;
|
||||
}
|
||||
|
||||
if (iter_is_iovec(iter))
|
||||
dio->flags |= IOMAP_DIO_DIRTY;
|
||||
} else {
|
||||
iomap_flags |= IOMAP_WRITE;
|
||||
dio->flags |= IOMAP_DIO_WRITE;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (filemap_range_has_page(mapping, pos, end)) {
|
||||
ret = -EAGAIN;
|
||||
goto out_free_dio;
|
||||
}
|
||||
iomap_flags |= IOMAP_NOWAIT;
|
||||
}
|
||||
|
||||
/* for data sync or sync, we need sync completion processing */
|
||||
if (iocb->ki_flags & IOCB_DSYNC)
|
||||
dio->flags |= IOMAP_DIO_NEED_SYNC;
|
||||
@ -507,14 +523,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
dio->flags |= IOMAP_DIO_WRITE_FUA;
|
||||
}
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (filemap_range_has_page(mapping, pos, end)) {
|
||||
ret = -EAGAIN;
|
||||
goto out_free_dio;
|
||||
}
|
||||
iomap_flags |= IOMAP_NOWAIT;
|
||||
}
|
||||
|
||||
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
|
||||
ret = -EAGAIN;
|
||||
if (pos >= dio->i_size || pos + count > dio->i_size)
|
||||
|
@ -229,7 +229,7 @@ static int blockcheck_u64_get(void *data, u64 *val)
|
||||
*val = *(u64 *)data;
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
|
||||
|
||||
static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
|
||||
{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user