linux/fs/proc/task_nommu.c
Ben Wolsieffer 00d2cb8066 proc: nommu: /proc/<pid>/maps: release mmap read lock
commit 578d7699e5 upstream.

The no-MMU implementation of /proc/<pid>/map doesn't normally release
the mmap read lock, because it uses !IS_ERR_OR_NULL(_vml) to determine
whether to release the lock.  Since _vml is NULL when the end of the
mappings is reached, the lock is not released.

Reading /proc/1/maps twice doesn't cause a hang because it only
takes the read lock, which can be taken multiple times and therefore
doesn't show any problem if the lock isn't released. Instead, you need
to perform some operation that attempts to take the write lock after
reading /proc/<pid>/maps. To actually reproduce the bug, compile the
following code as 'proc_maps_bug':

#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>

int main(int argc, char *argv[]) {
        void *buf;
        sleep(1);
        buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
        puts("mmap returned");
        return 0;
}

Then, run:

  ./proc_maps_bug &; cat /proc/$!/maps; fg

Without this patch, mmap() will hang and the command will never
complete.

This code was incorrectly adapted from the MMU implementation, which at
the time released the lock in m_next() before returning the last entry.

The MMU implementation has diverged further from the no-MMU version since
then, so this patch brings their locking and error handling into sync,
fixing the bug and hopefully avoiding similar issues in the future.

Link: https://lkml.kernel.org/r/20230914163019.4050530-2-ben.wolsieffer@hefring.com
Fixes: 47fecca15c ("fs/proc/task_nommu.c: don't use priv->task->mm")
Signed-off-by: Ben Wolsieffer <ben.wolsieffer@hefring.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Giulio Benetti <giulio.benetti@benettiengineering.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-10-06 13:18:22 +02:00

306 lines
6.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/fs_struct.h>
#include <linux/mount.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sched/mm.h>
#include "internal.h"
/*
* Logic: we've got two memory sums for each process, "shared", and
* "non-shared". Shared memory may get counted more than once, for
* each process that owns it. Non-shared memory is counted
* accurately.
*/
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
unsigned long bytes = 0, sbytes = 0, slack = 0, size;
mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
bytes += kobjsize(vma);
region = vma->vm_region;
if (region) {
size = kobjsize(region);
size += region->vm_end - region->vm_start;
} else {
size = vma->vm_end - vma->vm_start;
}
if (atomic_read(&mm->mm_count) > 1 ||
vma->vm_flags & VM_MAYSHARE) {
sbytes += size;
} else {
bytes += size;
if (region)
slack = region->vm_end - vma->vm_end;
}
}
if (atomic_read(&mm->mm_count) > 1)
sbytes += kobjsize(mm);
else
bytes += kobjsize(mm);
if (current->fs && current->fs->users > 1)
sbytes += kobjsize(current->fs);
else
bytes += kobjsize(current->fs);
if (current->files && atomic_read(&current->files->count) > 1)
sbytes += kobjsize(current->files);
else
bytes += kobjsize(current->files);
if (current->sighand && refcount_read(&current->sighand->count) > 1)
sbytes += kobjsize(current->sighand);
else
bytes += kobjsize(current->sighand);
bytes += kobjsize(current); /* includes kernel stack */
seq_printf(m,
"Mem:\t%8lu bytes\n"
"Slack:\t%8lu bytes\n"
"Shared:\t%8lu bytes\n",
bytes, slack, sbytes);
mmap_read_unlock(mm);
}
unsigned long task_vsize(struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct rb_node *p;
unsigned long vsize = 0;
mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
vsize += vma->vm_end - vma->vm_start;
}
mmap_read_unlock(mm);
return vsize;
}
unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
unsigned long size = kobjsize(mm);
mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
size += kobjsize(vma);
region = vma->vm_region;
if (region) {
size += kobjsize(region);
size += region->vm_end - region->vm_start;
}
}
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = (PAGE_ALIGN(mm->start_stack) - (mm->start_data & PAGE_MASK))
>> PAGE_SHIFT;
mmap_read_unlock(mm);
size >>= PAGE_SHIFT;
size += *text + *data;
*resident = size;
return size;
}
static int is_stack(struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
/*
* We make no effort to guess what a given thread considers to be
* its "stack". It's not even well-defined for programs written
* languages like Go.
*/
return vma->vm_start <= mm->start_stack &&
vma->vm_end >= mm->start_stack;
}
/*
* display a single VMA to a sequenced file
*/
static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
int flags;
unsigned long long pgoff = 0;
flags = vma->vm_flags;
file = vma->vm_file;
if (file) {
struct inode *inode = file_inode(vma->vm_file);
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
}
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m,
"%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
vma->vm_start,
vma->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
pgoff,
MAJOR(dev), MINOR(dev), ino);
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "");
} else if (mm && is_stack(vma)) {
seq_pad(m, ' ');
seq_puts(m, "[stack]");
}
seq_putc(m, '\n');
return 0;
}
/*
* display mapping lines for a particular process's /proc/pid/maps
*/
static int show_map(struct seq_file *m, void *_p)
{
struct rb_node *p = _p;
return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
}
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
struct mm_struct *mm;
struct rb_node *p;
loff_t n = *pos;
/* pin the task and mm whilst we play with them */
priv->task = get_proc_task(priv->inode);
if (!priv->task)
return ERR_PTR(-ESRCH);
mm = priv->mm;
if (!mm || !mmget_not_zero(mm)) {
put_task_struct(priv->task);
priv->task = NULL;
return NULL;
}
if (mmap_read_lock_killable(mm)) {
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
return ERR_PTR(-EINTR);
}
/* start from the Nth VMA */
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
if (n-- == 0)
return p;
return NULL;
}
static void m_stop(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
struct mm_struct *mm = priv->mm;
if (!priv->task)
return;
mmap_read_unlock(mm);
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
}
static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
{
struct rb_node *p = _p;
(*pos)++;
return p ? rb_next(p) : NULL;
}
static const struct seq_operations proc_pid_maps_ops = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = show_map
};
static int maps_open(struct inode *inode, struct file *file,
const struct seq_operations *ops)
{
struct proc_maps_private *priv;
priv = __seq_open_private(file, ops, sizeof(*priv));
if (!priv)
return -ENOMEM;
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
if (IS_ERR(priv->mm)) {
int err = PTR_ERR(priv->mm);
seq_release_private(inode, file);
return err;
}
return 0;
}
static int map_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
struct proc_maps_private *priv = seq->private;
if (priv->mm)
mmdrop(priv->mm);
return seq_release_private(inode, file);
}
static int pid_maps_open(struct inode *inode, struct file *file)
{
return maps_open(inode, file, &proc_pid_maps_ops);
}
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = map_release,
};