linux/include/trace/events/writeback.h
Rafael Aquini 54abe19e00 writeback: fix dereferencing NULL mapping->host on writeback_page_template
When commit 19343b5bdd ("mm/page-writeback: introduce tracepoint for
wait_on_page_writeback()") repurposed the writeback_dirty_page trace event
as a template to create its new wait_on_page_writeback trace event, it
ended up opening a window to NULL pointer dereference crashes due to the
(infrequent) occurrence of a race where an access to a page in the
swap-cache happens concurrently with the moment this page is being written
to disk and the tracepoint is enabled:

    BUG: kernel NULL pointer dereference, address: 0000000000000040
    #PF: supervisor read access in kernel mode
    #PF: error_code(0x0000) - not-present page
    PGD 800000010ec0a067 P4D 800000010ec0a067 PUD 102353067 PMD 0
    Oops: 0000 [#1] PREEMPT SMP PTI
    CPU: 1 PID: 1320 Comm: shmem-worker Kdump: loaded Not tainted 6.4.0-rc5+ #13
    Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230301gitf80f052277c8-1.fc37 03/01/2023
    RIP: 0010:trace_event_raw_event_writeback_folio_template+0x76/0xf0
    Code: 4d 85 e4 74 5c 49 8b 3c 24 e8 06 98 ee ff 48 89 c7 e8 9e 8b ee ff ba 20 00 00 00 48 89 ef 48 89 c6 e8 fe d4 1a 00 49 8b 04 24 <48> 8b 40 40 48 89 43 28 49 8b 45 20 48 89 e7 48 89 43 30 e8 a2 4d
    RSP: 0000:ffffaad580b6fb60 EFLAGS: 00010246
    RAX: 0000000000000000 RBX: ffff90e38035c01c RCX: 0000000000000000
    RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff90e38035c044
    RBP: ffff90e38035c024 R08: 0000000000000002 R09: 0000000000000006
    R10: ffff90e38035c02e R11: 0000000000000020 R12: ffff90e380bac000
    R13: ffffe3a7456d9200 R14: 0000000000001b81 R15: ffffe3a7456d9200
    FS:  00007f2e4e8a15c0(0000) GS:ffff90e3fbc80000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 0000000000000040 CR3: 00000001150c6003 CR4: 0000000000170ee0
    Call Trace:
     <TASK>
     ? __die+0x20/0x70
     ? page_fault_oops+0x76/0x170
     ? kernelmode_fixup_or_oops+0x84/0x110
     ? exc_page_fault+0x65/0x150
     ? asm_exc_page_fault+0x22/0x30
     ? trace_event_raw_event_writeback_folio_template+0x76/0xf0
     folio_wait_writeback+0x6b/0x80
     shmem_swapin_folio+0x24a/0x500
     ? filemap_get_entry+0xe3/0x140
     shmem_get_folio_gfp+0x36e/0x7c0
     ? find_busiest_group+0x43/0x1a0
     shmem_fault+0x76/0x2a0
     ? __update_load_avg_cfs_rq+0x281/0x2f0
     __do_fault+0x33/0x130
     do_read_fault+0x118/0x160
     do_pte_missing+0x1ed/0x2a0
     __handle_mm_fault+0x566/0x630
     handle_mm_fault+0x91/0x210
     do_user_addr_fault+0x22c/0x740
     exc_page_fault+0x65/0x150
     asm_exc_page_fault+0x22/0x30

This problem arises from the fact that the repurposed writeback_dirty_page
trace event code was written assuming that every pointer to mapping
(struct address_space) would come from a file-mapped page-cache object,
thus mapping->host would always be populated, and that was a valid case
before commit 19343b5bdd.  The swap-cache address space
(swapper_spaces), however, doesn't populate its ->host (struct inode)
pointer, thus leading to the crashes in the corner-case aforementioned.

commit 19343b5bdd ended up breaking the assignment of __entry->name and
__entry->ino for the wait_on_page_writeback tracepoint -- both dependent
on mapping->host carrying a pointer to a valid inode.  The assignment of
__entry->name was fixed by commit 68f23b8906 ("memcg: fix a crash in
wb_workfn when a device disappears"), and this commit fixes the remaining
case, for __entry->ino.

Link: https://lkml.kernel.org/r/20230606233613.1290819-1-aquini@redhat.com
Fixes: 19343b5bdd ("mm/page-writeback: introduce tracepoint for wait_on_page_writeback()")
Signed-off-by: Rafael Aquini <aquini@redhat.com>
Reviewed-by: Yafang Shao <laoar.shao@gmail.com>
Cc: Aristeu Rozanski <aris@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-06-19 13:19:31 -07:00

862 lines
22 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM writeback
#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_WRITEBACK_H
#include <linux/tracepoint.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
#define show_inode_state(state) \
__print_flags(state, "|", \
{I_DIRTY_SYNC, "I_DIRTY_SYNC"}, \
{I_DIRTY_DATASYNC, "I_DIRTY_DATASYNC"}, \
{I_DIRTY_PAGES, "I_DIRTY_PAGES"}, \
{I_NEW, "I_NEW"}, \
{I_WILL_FREE, "I_WILL_FREE"}, \
{I_FREEING, "I_FREEING"}, \
{I_CLEAR, "I_CLEAR"}, \
{I_SYNC, "I_SYNC"}, \
{I_DIRTY_TIME, "I_DIRTY_TIME"}, \
{I_REFERENCED, "I_REFERENCED"} \
)
/* enums need to be exported to user space */
#undef EM
#undef EMe
#define EM(a,b) TRACE_DEFINE_ENUM(a);
#define EMe(a,b) TRACE_DEFINE_ENUM(a);
#define WB_WORK_REASON \
EM( WB_REASON_BACKGROUND, "background") \
EM( WB_REASON_VMSCAN, "vmscan") \
EM( WB_REASON_SYNC, "sync") \
EM( WB_REASON_PERIODIC, "periodic") \
EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \
EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \
EM( WB_REASON_FORKER_THREAD, "forker_thread") \
EMe(WB_REASON_FOREIGN_FLUSH, "foreign_flush")
WB_WORK_REASON
/*
* Now redefine the EM() and EMe() macros to map the enums to the strings
* that will be printed in the output.
*/
#undef EM
#undef EMe
#define EM(a,b) { a, b },
#define EMe(a,b) { a, b }
struct wb_writeback_work;
DECLARE_EVENT_CLASS(writeback_folio_template,
TP_PROTO(struct folio *folio, struct address_space *mapping),
TP_ARGS(folio, mapping),
TP_STRUCT__entry (
__array(char, name, 32)
__field(ino_t, ino)
__field(pgoff_t, index)
),
TP_fast_assign(
strscpy_pad(__entry->name,
bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
NULL), 32);
__entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0;
__entry->index = folio->index;
),
TP_printk("bdi %s: ino=%lu index=%lu",
__entry->name,
(unsigned long)__entry->ino,
__entry->index
)
);
DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio,
TP_PROTO(struct folio *folio, struct address_space *mapping),
TP_ARGS(folio, mapping)
);
DEFINE_EVENT(writeback_folio_template, folio_wait_writeback,
TP_PROTO(struct folio *folio, struct address_space *mapping),
TP_ARGS(folio, mapping)
);
DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
TP_PROTO(struct inode *inode, int flags),
TP_ARGS(inode, flags),
TP_STRUCT__entry (
__array(char, name, 32)
__field(ino_t, ino)
__field(unsigned long, state)
__field(unsigned long, flags)
),
TP_fast_assign(
struct backing_dev_info *bdi = inode_to_bdi(inode);
/* may be called for files on pseudo FSes w/ unregistered bdi */
strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->flags = flags;
),
TP_printk("bdi %s: ino=%lu state=%s flags=%s",
__entry->name,
(unsigned long)__entry->ino,
show_inode_state(__entry->state),
show_inode_state(__entry->flags)
)
);
DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty,
TP_PROTO(struct inode *inode, int flags),
TP_ARGS(inode, flags)
);
DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start,
TP_PROTO(struct inode *inode, int flags),
TP_ARGS(inode, flags)
);
DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
TP_PROTO(struct inode *inode, int flags),
TP_ARGS(inode, flags)
);
#ifdef CREATE_TRACE_POINTS
#ifdef CONFIG_CGROUP_WRITEBACK
static inline ino_t __trace_wb_assign_cgroup(struct bdi_writeback *wb)
{
return cgroup_ino(wb->memcg_css->cgroup);
}
static inline ino_t __trace_wbc_assign_cgroup(struct writeback_control *wbc)
{
if (wbc->wb)
return __trace_wb_assign_cgroup(wbc->wb);
else
return 1;
}
#else /* CONFIG_CGROUP_WRITEBACK */
static inline ino_t __trace_wb_assign_cgroup(struct bdi_writeback *wb)
{
return 1;
}
static inline ino_t __trace_wbc_assign_cgroup(struct writeback_control *wbc)
{
return 1;
}
#endif /* CONFIG_CGROUP_WRITEBACK */
#endif /* CREATE_TRACE_POINTS */
#ifdef CONFIG_CGROUP_WRITEBACK
TRACE_EVENT(inode_foreign_history,
TP_PROTO(struct inode *inode, struct writeback_control *wbc,
unsigned int history),
TP_ARGS(inode, wbc, history),
TP_STRUCT__entry(
__array(char, name, 32)
__field(ino_t, ino)
__field(ino_t, cgroup_ino)
__field(unsigned int, history)
),
TP_fast_assign(
strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
__entry->history = history;
),
TP_printk("bdi %s: ino=%lu cgroup_ino=%lu history=0x%x",
__entry->name,
(unsigned long)__entry->ino,
(unsigned long)__entry->cgroup_ino,
__entry->history
)
);
TRACE_EVENT(inode_switch_wbs,
TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb,
struct bdi_writeback *new_wb),
TP_ARGS(inode, old_wb, new_wb),
TP_STRUCT__entry(
__array(char, name, 32)
__field(ino_t, ino)
__field(ino_t, old_cgroup_ino)
__field(ino_t, new_cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32);
__entry->ino = inode->i_ino;
__entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
__entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
),
TP_printk("bdi %s: ino=%lu old_cgroup_ino=%lu new_cgroup_ino=%lu",
__entry->name,
(unsigned long)__entry->ino,
(unsigned long)__entry->old_cgroup_ino,
(unsigned long)__entry->new_cgroup_ino
)
);
TRACE_EVENT(track_foreign_dirty,
TP_PROTO(struct folio *folio, struct bdi_writeback *wb),
TP_ARGS(folio, wb),
TP_STRUCT__entry(
__array(char, name, 32)
__field(u64, bdi_id)
__field(ino_t, ino)
__field(unsigned int, memcg_id)
__field(ino_t, cgroup_ino)
__field(ino_t, page_cgroup_ino)
),
TP_fast_assign(
struct address_space *mapping = folio_mapping(folio);
struct inode *inode = mapping ? mapping->host : NULL;
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->bdi_id = wb->bdi->id;
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
__entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
),
TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
__entry->name,
__entry->bdi_id,
(unsigned long)__entry->ino,
__entry->memcg_id,
(unsigned long)__entry->cgroup_ino,
(unsigned long)__entry->page_cgroup_ino
)
);
TRACE_EVENT(flush_foreign,
TP_PROTO(struct bdi_writeback *wb, unsigned int frn_bdi_id,
unsigned int frn_memcg_id),
TP_ARGS(wb, frn_bdi_id, frn_memcg_id),
TP_STRUCT__entry(
__array(char, name, 32)
__field(ino_t, cgroup_ino)
__field(unsigned int, frn_bdi_id)
__field(unsigned int, frn_memcg_id)
),
TP_fast_assign(
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
__entry->frn_bdi_id = frn_bdi_id;
__entry->frn_memcg_id = frn_memcg_id;
),
TP_printk("bdi %s: cgroup_ino=%lu frn_bdi_id=%u frn_memcg_id=%u",
__entry->name,
(unsigned long)__entry->cgroup_ino,
__entry->frn_bdi_id,
__entry->frn_memcg_id
)
);
#endif
DECLARE_EVENT_CLASS(writeback_write_inode_template,
TP_PROTO(struct inode *inode, struct writeback_control *wbc),
TP_ARGS(inode, wbc),
TP_STRUCT__entry (
__array(char, name, 32)
__field(ino_t, ino)
__field(int, sync_mode)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->name,
bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
),
TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%lu",
__entry->name,
(unsigned long)__entry->ino,
__entry->sync_mode,
(unsigned long)__entry->cgroup_ino
)
);
DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start,
TP_PROTO(struct inode *inode, struct writeback_control *wbc),
TP_ARGS(inode, wbc)
);
DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode,
TP_PROTO(struct inode *inode, struct writeback_control *wbc),
TP_ARGS(inode, wbc)
);
DECLARE_EVENT_CLASS(writeback_work_class,
TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work),
TP_ARGS(wb, work),
TP_STRUCT__entry(
__array(char, name, 32)
__field(long, nr_pages)
__field(dev_t, sb_dev)
__field(int, sync_mode)
__field(int, for_kupdate)
__field(int, range_cyclic)
__field(int, for_background)
__field(int, reason)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
__entry->sync_mode = work->sync_mode;
__entry->for_kupdate = work->for_kupdate;
__entry->range_cyclic = work->range_cyclic;
__entry->for_background = work->for_background;
__entry->reason = work->reason;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
"kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%lu",
__entry->name,
MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
__entry->nr_pages,
__entry->sync_mode,
__entry->for_kupdate,
__entry->range_cyclic,
__entry->for_background,
__print_symbolic(__entry->reason, WB_WORK_REASON),
(unsigned long)__entry->cgroup_ino
)
);
#define DEFINE_WRITEBACK_WORK_EVENT(name) \
DEFINE_EVENT(writeback_work_class, name, \
TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), \
TP_ARGS(wb, work))
DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
DEFINE_WRITEBACK_WORK_EVENT(writeback_written);
DEFINE_WRITEBACK_WORK_EVENT(writeback_wait);
TRACE_EVENT(writeback_pages_written,
TP_PROTO(long pages_written),
TP_ARGS(pages_written),
TP_STRUCT__entry(
__field(long, pages)
),
TP_fast_assign(
__entry->pages = pages_written;
),
TP_printk("%ld", __entry->pages)
);
DECLARE_EVENT_CLASS(writeback_class,
TP_PROTO(struct bdi_writeback *wb),
TP_ARGS(wb),
TP_STRUCT__entry(
__array(char, name, 32)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: cgroup_ino=%lu",
__entry->name,
(unsigned long)__entry->cgroup_ino
)
);
#define DEFINE_WRITEBACK_EVENT(name) \
DEFINE_EVENT(writeback_class, name, \
TP_PROTO(struct bdi_writeback *wb), \
TP_ARGS(wb))
DEFINE_WRITEBACK_EVENT(writeback_wake_background);
TRACE_EVENT(writeback_bdi_register,
TP_PROTO(struct backing_dev_info *bdi),
TP_ARGS(bdi),
TP_STRUCT__entry(
__array(char, name, 32)
),
TP_fast_assign(
strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
),
TP_printk("bdi %s",
__entry->name
)
);
DECLARE_EVENT_CLASS(wbc_class,
TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
TP_ARGS(wbc, bdi),
TP_STRUCT__entry(
__array(char, name, 32)
__field(long, nr_to_write)
__field(long, pages_skipped)
__field(int, sync_mode)
__field(int, for_kupdate)
__field(int, for_background)
__field(int, for_reclaim)
__field(int, range_cyclic)
__field(long, range_start)
__field(long, range_end)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->sync_mode = wbc->sync_mode;
__entry->for_kupdate = wbc->for_kupdate;
__entry->for_background = wbc->for_background;
__entry->for_reclaim = wbc->for_reclaim;
__entry->range_cyclic = wbc->range_cyclic;
__entry->range_start = (long)wbc->range_start;
__entry->range_end = (long)wbc->range_end;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
),
TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
"bgrd=%d reclm=%d cyclic=%d "
"start=0x%lx end=0x%lx cgroup_ino=%lu",
__entry->name,
__entry->nr_to_write,
__entry->pages_skipped,
__entry->sync_mode,
__entry->for_kupdate,
__entry->for_background,
__entry->for_reclaim,
__entry->range_cyclic,
__entry->range_start,
__entry->range_end,
(unsigned long)__entry->cgroup_ino
)
)
#define DEFINE_WBC_EVENT(name) \
DEFINE_EVENT(wbc_class, name, \
TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \
TP_ARGS(wbc, bdi))
DEFINE_WBC_EVENT(wbc_writepage);
TRACE_EVENT(writeback_queue_io,
TP_PROTO(struct bdi_writeback *wb,
struct wb_writeback_work *work,
unsigned long dirtied_before,
int moved),
TP_ARGS(wb, work, dirtied_before, moved),
TP_STRUCT__entry(
__array(char, name, 32)
__field(unsigned long, older)
__field(long, age)
__field(int, moved)
__field(int, reason)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->older = dirtied_before;
__entry->age = (jiffies - dirtied_before) * 1000 / HZ;
__entry->moved = moved;
__entry->reason = work->reason;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%lu",
__entry->name,
__entry->older, /* dirtied_before in jiffies */
__entry->age, /* dirtied_before in relative milliseconds */
__entry->moved,
__print_symbolic(__entry->reason, WB_WORK_REASON),
(unsigned long)__entry->cgroup_ino
)
);
TRACE_EVENT(global_dirty_state,
TP_PROTO(unsigned long background_thresh,
unsigned long dirty_thresh
),
TP_ARGS(background_thresh,
dirty_thresh
),
TP_STRUCT__entry(
__field(unsigned long, nr_dirty)
__field(unsigned long, nr_writeback)
__field(unsigned long, background_thresh)
__field(unsigned long, dirty_thresh)
__field(unsigned long, dirty_limit)
__field(unsigned long, nr_dirtied)
__field(unsigned long, nr_written)
),
TP_fast_assign(
__entry->nr_dirty = global_node_page_state(NR_FILE_DIRTY);
__entry->nr_writeback = global_node_page_state(NR_WRITEBACK);
__entry->nr_dirtied = global_node_page_state(NR_DIRTIED);
__entry->nr_written = global_node_page_state(NR_WRITTEN);
__entry->background_thresh = background_thresh;
__entry->dirty_thresh = dirty_thresh;
__entry->dirty_limit = global_wb_domain.dirty_limit;
),
TP_printk("dirty=%lu writeback=%lu "
"bg_thresh=%lu thresh=%lu limit=%lu "
"dirtied=%lu written=%lu",
__entry->nr_dirty,
__entry->nr_writeback,
__entry->background_thresh,
__entry->dirty_thresh,
__entry->dirty_limit,
__entry->nr_dirtied,
__entry->nr_written
)
);
#define KBps(x) ((x) << (PAGE_SHIFT - 10))
TRACE_EVENT(bdi_dirty_ratelimit,
TP_PROTO(struct bdi_writeback *wb,
unsigned long dirty_rate,
unsigned long task_ratelimit),
TP_ARGS(wb, dirty_rate, task_ratelimit),
TP_STRUCT__entry(
__array(char, bdi, 32)
__field(unsigned long, write_bw)
__field(unsigned long, avg_write_bw)
__field(unsigned long, dirty_rate)
__field(unsigned long, dirty_ratelimit)
__field(unsigned long, task_ratelimit)
__field(unsigned long, balanced_dirty_ratelimit)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->write_bw = KBps(wb->write_bandwidth);
__entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
__entry->dirty_ratelimit = KBps(wb->dirty_ratelimit);
__entry->task_ratelimit = KBps(task_ratelimit);
__entry->balanced_dirty_ratelimit =
KBps(wb->balanced_dirty_ratelimit);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: "
"write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
"balanced_dirty_ratelimit=%lu cgroup_ino=%lu",
__entry->bdi,
__entry->write_bw, /* write bandwidth */
__entry->avg_write_bw, /* avg write bandwidth */
__entry->dirty_rate, /* bdi dirty rate */
__entry->dirty_ratelimit, /* base ratelimit */
__entry->task_ratelimit, /* ratelimit with position control */
__entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
(unsigned long)__entry->cgroup_ino
)
);
TRACE_EVENT(balance_dirty_pages,
TP_PROTO(struct bdi_writeback *wb,
unsigned long thresh,
unsigned long bg_thresh,
unsigned long dirty,
unsigned long bdi_thresh,
unsigned long bdi_dirty,
unsigned long dirty_ratelimit,
unsigned long task_ratelimit,
unsigned long dirtied,
unsigned long period,
long pause,
unsigned long start_time),
TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
dirty_ratelimit, task_ratelimit,
dirtied, period, pause, start_time),
TP_STRUCT__entry(
__array( char, bdi, 32)
__field(unsigned long, limit)
__field(unsigned long, setpoint)
__field(unsigned long, dirty)
__field(unsigned long, bdi_setpoint)
__field(unsigned long, bdi_dirty)
__field(unsigned long, dirty_ratelimit)
__field(unsigned long, task_ratelimit)
__field(unsigned int, dirtied)
__field(unsigned int, dirtied_pause)
__field(unsigned long, paused)
__field( long, pause)
__field(unsigned long, period)
__field( long, think)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
unsigned long freerun = (thresh + bg_thresh) / 2;
strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
freerun) / 2;
__entry->dirty = dirty;
__entry->bdi_setpoint = __entry->setpoint *
bdi_thresh / (thresh + 1);
__entry->bdi_dirty = bdi_dirty;
__entry->dirty_ratelimit = KBps(dirty_ratelimit);
__entry->task_ratelimit = KBps(task_ratelimit);
__entry->dirtied = dirtied;
__entry->dirtied_pause = current->nr_dirtied_pause;
__entry->think = current->dirty_paused_when == 0 ? 0 :
(long)(jiffies - current->dirty_paused_when) * 1000/HZ;
__entry->period = period * 1000 / HZ;
__entry->pause = pause * 1000 / HZ;
__entry->paused = (jiffies - start_time) * 1000 / HZ;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: "
"limit=%lu setpoint=%lu dirty=%lu "
"bdi_setpoint=%lu bdi_dirty=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
"dirtied=%u dirtied_pause=%u "
"paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%lu",
__entry->bdi,
__entry->limit,
__entry->setpoint,
__entry->dirty,
__entry->bdi_setpoint,
__entry->bdi_dirty,
__entry->dirty_ratelimit,
__entry->task_ratelimit,
__entry->dirtied,
__entry->dirtied_pause,
__entry->paused, /* ms */
__entry->pause, /* ms */
__entry->period, /* ms */
__entry->think, /* ms */
(unsigned long)__entry->cgroup_ino
)
);
TRACE_EVENT(writeback_sb_inodes_requeue,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__array(char, name, 32)
__field(ino_t, ino)
__field(unsigned long, state)
__field(unsigned long, dirtied_when)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->name,
bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
__entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode));
),
TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%lu",
__entry->name,
(unsigned long)__entry->ino,
show_inode_state(__entry->state),
__entry->dirtied_when,
(jiffies - __entry->dirtied_when) / HZ,
(unsigned long)__entry->cgroup_ino
)
);
DECLARE_EVENT_CLASS(writeback_single_inode_template,
TP_PROTO(struct inode *inode,
struct writeback_control *wbc,
unsigned long nr_to_write
),
TP_ARGS(inode, wbc, nr_to_write),
TP_STRUCT__entry(
__array(char, name, 32)
__field(ino_t, ino)
__field(unsigned long, state)
__field(unsigned long, dirtied_when)
__field(unsigned long, writeback_index)
__field(long, nr_to_write)
__field(unsigned long, wrote)
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
strscpy_pad(__entry->name,
bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
__entry->writeback_index = inode->i_mapping->writeback_index;
__entry->nr_to_write = nr_to_write;
__entry->wrote = nr_to_write - wbc->nr_to_write;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
),
TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
"index=%lu to_write=%ld wrote=%lu cgroup_ino=%lu",
__entry->name,
(unsigned long)__entry->ino,
show_inode_state(__entry->state),
__entry->dirtied_when,
(jiffies - __entry->dirtied_when) / HZ,
__entry->writeback_index,
__entry->nr_to_write,
__entry->wrote,
(unsigned long)__entry->cgroup_ino
)
);
DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start,
TP_PROTO(struct inode *inode,
struct writeback_control *wbc,
unsigned long nr_to_write),
TP_ARGS(inode, wbc, nr_to_write)
);
DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
TP_PROTO(struct inode *inode,
struct writeback_control *wbc,
unsigned long nr_to_write),
TP_ARGS(inode, wbc, nr_to_write)
);
DECLARE_EVENT_CLASS(writeback_inode_template,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field(unsigned long, state )
__field( __u16, mode )
__field(unsigned long, dirtied_when )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->mode = inode->i_mode;
__entry->dirtied_when = inode->dirtied_when;
),
TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long)__entry->ino, __entry->dirtied_when,
show_inode_state(__entry->state), __entry->mode)
);
DEFINE_EVENT(writeback_inode_template, writeback_lazytime,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
DEFINE_EVENT(writeback_inode_template, writeback_lazytime_iput,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
DEFINE_EVENT(writeback_inode_template, writeback_dirty_inode_enqueue,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
/*
* Inode writeback list tracking.
*/
DEFINE_EVENT(writeback_inode_template, sb_mark_inode_writeback,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
DEFINE_EVENT(writeback_inode_template, sb_clear_inode_writeback,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
#endif /* _TRACE_WRITEBACK_H */
/* This part must be outside protection */
#include <trace/define_trace.h>