Skip to content

Commit

Permalink
Merge branch 'akpm' (patches from Andrew)
Browse files Browse the repository at this point in the history
Merge misc updates from Andrew Morton:

 - a few misc things and hotfixes

 - ocfs2

 - almost all of MM

* emailed patches from Andrew Morton <[email protected]>: (139 commits)
  kernel/memremap.c: remove the unused device_private_entry_fault() export
  mm: delete find_get_entries_tag
  mm/huge_memory.c: make __thp_get_unmapped_area static
  mm/mprotect.c: fix compilation warning because of unused 'mm' variable
  mm/page-writeback: introduce tracepoint for wait_on_page_writeback()
  mm/vmscan: simplify trace_reclaim_flags and trace_shrink_flags
  mm/Kconfig: update "Memory Model" help text
  mm/vmscan.c: don't disable irq again when count pgrefill for memcg
  mm: memblock: make keeping memblock memory opt-in rather than opt-out
  hugetlbfs: always use address space in inode for resv_map pointer
  mm/z3fold.c: support page migration
  mm/z3fold.c: add structure for buddy handles
  mm/z3fold.c: improve compression by extending search
  mm/z3fold.c: introduce helper functions
  mm/page_alloc.c: remove unnecessary parameter in rmqueue_pcplist
  mm/hmm: add ARCH_HAS_HMM_MIRROR ARCH_HAS_HMM_DEVICE Kconfig
  mm/vmscan.c: simplify shrink_inactive_list()
  fs/sync.c: sync_file_range(2) may use WB_SYNC_ALL writeback
  xen/privcmd-buf.c: convert to use vm_map_pages_zero()
  xen/gntdev.c: convert to use vm_map_pages()
  ...
  • Loading branch information
torvalds committed May 14, 2019
2 parents 7e9890a + 640be2d commit 318222a
Show file tree
Hide file tree
Showing 189 changed files with 3,646 additions and 2,332 deletions.
12 changes: 12 additions & 0 deletions Documentation/sysctl/vm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ Currently, these files are in /proc/sys/vm:
- stat_refresh
- numa_stat
- swappiness
- unprivileged_userfaultfd
- user_reserve_kbytes
- vfs_cache_pressure
- watermark_boost_factor
Expand Down Expand Up @@ -818,6 +819,17 @@ The default value is 60.

==============================================================

unprivileged_userfaultfd

This flag controls whether unprivileged users can use the userfaultfd
system calls. Set this to 1 to allow unprivileged users to use the
userfaultfd system calls, or set this to 0 to restrict userfaultfd to only
privileged users (with SYS_CAP_PTRACE capability).

The default value is 1.

==============================================================

- user_reserve_kbytes

When overcommit_memory is set to 2, "never overcommit" mode, reserve
Expand Down
7 changes: 4 additions & 3 deletions Documentation/trace/postprocess/trace-vmscan-postprocess.pl
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ sub sigint_handler {
my $regex_kswapd_sleep_default = 'nid=([0-9]*)';
my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)';
my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)';
my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate_anon=([0-9]*) nr_activate_file=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)';
my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)';

Expand Down Expand Up @@ -212,7 +212,8 @@ sub generate_traceevent_regex {
"vmscan/mm_vmscan_lru_shrink_inactive",
$regex_lru_shrink_inactive_default,
"nid", "nr_scanned", "nr_reclaimed", "nr_dirty", "nr_writeback",
"nr_congested", "nr_immediate", "nr_activate", "nr_ref_keep",
"nr_congested", "nr_immediate", "nr_activate_anon",
"nr_activate_file", "nr_ref_keep",
"nr_unmap_fail", "priority", "flags");
$regex_lru_shrink_active = generate_traceevent_regex(
"vmscan/mm_vmscan_lru_shrink_active",
Expand Down Expand Up @@ -407,7 +408,7 @@ sub process_events {
}

my $nr_reclaimed = $3;
my $flags = $12;
my $flags = $13;
my $file = 0;
if ($flags =~ /RECLAIM_WB_FILE/) {
$file = 1;
Expand Down
94 changes: 75 additions & 19 deletions Documentation/vm/hmm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,20 +189,10 @@ the driver callback returns.
When the device driver wants to populate a range of virtual addresses, it can
use either::

int hmm_vma_get_pfns(struct vm_area_struct *vma,
struct hmm_range *range,
unsigned long start,
unsigned long end,
hmm_pfn_t *pfns);
int hmm_vma_fault(struct vm_area_struct *vma,
struct hmm_range *range,
unsigned long start,
unsigned long end,
hmm_pfn_t *pfns,
bool write,
bool block);

The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
long hmm_range_snapshot(struct hmm_range *range);
long hmm_range_fault(struct hmm_range *range, bool block);

The first one (hmm_range_snapshot()) will only fetch present CPU page table
entries and will not trigger a page fault on missing or non-present entries.
The second one does trigger a page fault on missing or read-only entry if the
write parameter is true. Page faults use the generic mm page fault code path
Expand All @@ -220,25 +210,56 @@ respect in order to keep things properly synchronized. The usage pattern is::
{
struct hmm_range range;
...

range.start = ...;
range.end = ...;
range.pfns = ...;
range.flags = ...;
range.values = ...;
range.pfn_shift = ...;
hmm_range_register(&range);

/*
* Just wait for range to be valid, safe to ignore return value as we
* will use the return value of hmm_range_snapshot() below under the
* mmap_sem to ascertain the validity of the range.
*/
hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);

again:
ret = hmm_vma_get_pfns(vma, &range, start, end, pfns);
if (ret)
down_read(&mm->mmap_sem);
ret = hmm_range_snapshot(&range);
if (ret) {
up_read(&mm->mmap_sem);
if (ret == -EAGAIN) {
/*
* No need to check hmm_range_wait_until_valid() return value
* on retry we will get proper error with hmm_range_snapshot()
*/
hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
goto again;
}
hmm_mirror_unregister(&range);
return ret;
}
take_lock(driver->update);
if (!hmm_vma_range_done(vma, &range)) {
if (!range.valid) {
release_lock(driver->update);
up_read(&mm->mmap_sem);
goto again;
}

// Use pfns array content to update device page table

hmm_mirror_unregister(&range);
release_lock(driver->update);
up_read(&mm->mmap_sem);
return 0;
}

The driver->update lock is the same lock that the driver takes inside its
update() callback. That lock must be held before hmm_vma_range_done() to avoid
any race with a concurrent CPU page table update.
update() callback. That lock must be held before checking the range.valid
field to avoid any race with a concurrent CPU page table update.

HMM implements all this on top of the mmu_notifier API because we wanted a
simpler API and also to be able to perform optimizations latter on like doing
Expand All @@ -255,6 +276,41 @@ report commands as executed is serialized (there is no point in doing this
concurrently).


Leverage default_flags and pfn_flags_mask
=========================================

The hmm_range struct has 2 fields default_flags and pfn_flags_mask that allows
to set fault or snapshot policy for a whole range instead of having to set them
for each entries in the range.

For instance if the device flags for device entries are:
VALID (1 << 63)
WRITE (1 << 62)

Now let say that device driver wants to fault with at least read a range then
it does set:
range->default_flags = (1 << 63)
range->pfn_flags_mask = 0;

and calls hmm_range_fault() as described above. This will fill fault all page
in the range with at least read permission.

Now let say driver wants to do the same except for one page in the range for
which its want to have write. Now driver set:
range->default_flags = (1 << 63);
range->pfn_flags_mask = (1 << 62);
range->pfns[index_of_write] = (1 << 62);

With this HMM will fault in all page with at least read (ie valid) and for the
address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
write permission ie if the CPU pte does not have write permission set then HMM
will call handle_mm_fault().

Note that HMM will populate the pfns array with write permission for any entry
that have write permission within the CPU pte no matter what are the values set
in default_flags or pfn_flags_mask.


Represent and manage device memory from core kernel point of view
=================================================================

Expand Down
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -11746,6 +11746,7 @@ F: include/linux/oprofile.h
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
M: Mark Fasheh <[email protected]>
M: Joel Becker <[email protected]>
M: Joseph Qi <[email protected]>
L: [email protected] (moderated for non-subscribers)
W: http://ocfs2.wiki.kernel.org
S: Supported
Expand Down
7 changes: 7 additions & 0 deletions arch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,13 @@ config ARCH_HAS_FORTIFY_SOURCE
An architecture should select this when it can successfully
build and run with CONFIG_FORTIFY_SOURCE.

#
# Select if the arch provides a historic keepinit alias for the retain_initrd
# command line option
#
config ARCH_HAS_KEEPINITRD
bool

# Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h
config ARCH_HAS_SET_MEMORY
bool
Expand Down
14 changes: 0 additions & 14 deletions arch/alpha/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,17 +285,3 @@ mem_init(void)
memblock_free_all();
mem_init_print_info(NULL);
}

void
free_initmem(void)
{
free_initmem_default(-1);
}

#ifdef CONFIG_BLK_DEV_INITRD
void
free_initrd_mem(unsigned long start, unsigned long end)
{
free_reserved_area((void *)start, (void *)end, -1, "initrd");
}
#endif
15 changes: 0 additions & 15 deletions arch/arc/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,18 +206,3 @@ void __init mem_init(void)
memblock_free_all();
mem_init_print_info(NULL);
}

/*
* free_initmem: Free all the __init memory.
*/
void __ref free_initmem(void)
{
free_initmem_default(-1);
}

#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
free_reserved_area((void *)start, (void *)end, -1, "initrd");
}
#endif
3 changes: 2 additions & 1 deletion arch/arm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ config ARM
default y
select ARCH_32BIT_OFF_T
select ARCH_CLOCKSOURCE_DATA
select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID && !KEXEC
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
Expand All @@ -21,6 +21,7 @@ config ARM
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAVE_CUSTOM_GPIO_H
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_KEEP_MEMBLOCK if HAVE_ARCH_PFN_VALID || KEXEC
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
Expand Down
22 changes: 6 additions & 16 deletions arch/arm/mm/dma-mapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -1577,31 +1577,21 @@ static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
unsigned long uaddr = vma->vm_start;
unsigned long usize = vma->vm_end - vma->vm_start;
struct page **pages = __iommu_get_pages(cpu_addr, attrs);
unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long off = vma->vm_pgoff;
int err;

if (!pages)
return -ENXIO;

if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off)
if (vma->vm_pgoff >= nr_pages)
return -ENXIO;

pages += off;

do {
int ret = vm_insert_page(vma, uaddr, *pages++);
if (ret) {
pr_err("Remapping memory failed: %d\n", ret);
return ret;
}
uaddr += PAGE_SIZE;
usize -= PAGE_SIZE;
} while (usize > 0);
err = vm_map_pages(vma, pages, nr_pages);
if (err)
pr_err("Remapping memory failed: %d\n", err);

return 0;
return err;
}
static int arm_iommu_mmap_attrs(struct device *dev,
struct vm_area_struct *vma, void *cpu_addr,
Expand Down
25 changes: 6 additions & 19 deletions arch/arm/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -695,27 +695,14 @@ void free_initmem(void)
}

#ifdef CONFIG_BLK_DEV_INITRD

static int keep_initrd;

void free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd) {
if (start == initrd_start)
start = round_down(start, PAGE_SIZE);
if (end == initrd_end)
end = round_up(end, PAGE_SIZE);
if (start == initrd_start)
start = round_down(start, PAGE_SIZE);
if (end == initrd_end)
end = round_up(end, PAGE_SIZE);

poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
free_reserved_area((void *)start, (void *)end, -1, "initrd");
}
poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
free_reserved_area((void *)start, (void *)end, -1, "initrd");
}

static int __init keepinitrd_setup(char *__unused)
{
keep_initrd = 1;
return 1;
}

__setup("keepinitrd", keepinitrd_setup);
#endif
4 changes: 3 additions & 1 deletion arch/arm64/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ config ARM64
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SETUP_DMA_OPS
Expand Down Expand Up @@ -59,6 +60,7 @@ config ARM64
select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT
select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT
select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT
select ARCH_KEEP_MEMBLOCK
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
Expand Down
4 changes: 0 additions & 4 deletions arch/arm64/include/asm/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,4 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,

#include <asm-generic/hugetlb.h>

#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
static inline bool gigantic_page_supported(void) { return true; }
#endif

#endif /* __ASM_HUGETLB_H */
17 changes: 2 additions & 15 deletions arch/arm64/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,24 +578,11 @@ void free_initmem(void)
}

#ifdef CONFIG_BLK_DEV_INITRD

static int keep_initrd __initdata;

void __init free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd) {
free_reserved_area((void *)start, (void *)end, 0, "initrd");
memblock_free(__virt_to_phys(start), end - start);
}
}

static int __init keepinitrd_setup(char *__unused)
{
keep_initrd = 1;
return 1;
free_reserved_area((void *)start, (void *)end, 0, "initrd");
memblock_free(__virt_to_phys(start), end - start);
}

__setup("keepinitrd", keepinitrd_setup);
#endif

/*
Expand Down
Loading

0 comments on commit 318222a

Please sign in to comment.