From eda25860bf6e71932311f1b5acdf8fc05cd84ff3 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:27 +0206 Subject: [PATCH 01/54] printk: Add notation to console_srcu locking kernel/printk/printk.c:284:5: sparse: sparse: context imbalance in 'console_srcu_read_lock' - wrong count at exit include/linux/srcu.h:301:9: sparse: sparse: context imbalance in 'console_srcu_read_unlock' - unexpected unlock Fixes: 6c4afa79147e ("printk: Prepare for SRCU console list protection") Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Paul E. McKenney Link: https://lore.kernel.org/r/20240820063001.36405-2-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c22b07049c38..93c67eb7ca9e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -282,6 +282,7 @@ EXPORT_SYMBOL(console_list_unlock); * Return: A cookie to pass to console_srcu_read_unlock(). */ int console_srcu_read_lock(void) + __acquires(&console_srcu) { return srcu_read_lock_nmisafe(&console_srcu); } @@ -295,6 +296,7 @@ EXPORT_SYMBOL(console_srcu_read_lock); * Counterpart to console_srcu_read_lock() */ void console_srcu_read_unlock(int cookie) + __releases(&console_srcu) { srcu_read_unlock_nmisafe(&console_srcu, cookie); } From f37b105faef637cdaff334c0369b451410566ca0 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:28 +0206 Subject: [PATCH 02/54] printk: nbcon: Consolidate alloc() and init() Rather than splitting the nbcon allocation and initialization into two pieces, perform all initialization in nbcon_alloc(). Later, the initial sequence is calculated and can be explicitly set using nbcon_seq_force(). This removes the need for the strong rules of nbcon_init() that even included a BUG_ON(). Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-3-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 2 -- kernel/printk/nbcon.c | 37 +++++++++++-------------------------- kernel/printk/printk.c | 2 +- 3 files changed, 12 insertions(+), 29 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 19dcc5832651..398ecb40d279 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -75,7 +75,6 @@ u16 printk_parse_prefix(const char *text, int *level, u64 nbcon_seq_read(struct console *con); void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); -void nbcon_init(struct console *con); void nbcon_free(struct console *con); #else @@ -96,7 +95,6 @@ static inline bool printk_percpu_data_ready(void) { return false; } static inline u64 nbcon_seq_read(struct console *con) { return 0; } static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } -static inline void nbcon_init(struct console *con) { } static inline void nbcon_free(struct console *con) { } #endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index c8093bcc01fe..670692dc9b10 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -929,17 +929,22 @@ update_con: } /** - * nbcon_alloc - Allocate buffers needed by the nbcon console - * @con: Console to allocate buffers for + * nbcon_alloc - Allocate and init the nbcon console specific data + * @con: Console to initialize * - * Return: True on success. False otherwise and the console cannot - * be used. + * Return: True if the console was fully allocated and initialized. + * Otherwise @con must not be registered. * - * This is not part of nbcon_init() because buffer allocation must - * be performed earlier in the console registration process. + * When allocation and init was successful, the console must be properly + * freed using nbcon_free() once it is no longer needed. */ bool nbcon_alloc(struct console *con) { + struct nbcon_state state = { }; + + nbcon_state_set(con, &state); + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), 0); + if (con->flags & CON_BOOT) { /* * Boot console printing is synchronized with legacy console @@ -958,26 +963,6 @@ bool nbcon_alloc(struct console *con) return true; } -/** - * nbcon_init - Initialize the nbcon console specific data - * @con: Console to initialize - * - * nbcon_alloc() *must* be called and succeed before this function - * is called. - * - * This function expects that the legacy @con->seq has been set. - */ -void nbcon_init(struct console *con) -{ - struct nbcon_state state = { }; - - /* nbcon_alloc() must have been called and successful! */ - BUG_ON(!con->pbufs); - - nbcon_seq_force(con, con->seq); - nbcon_state_set(con, &state); -} - /** * nbcon_free - Free and cleanup the nbcon console specific data * @con: Console to free/cleanup nbcon data diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 93c67eb7ca9e..a47017c932be 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3618,7 +3618,7 @@ void register_console(struct console *newcon) console_init_seq(newcon, bootcon_registered); if (newcon->flags & CON_NBCON) - nbcon_init(newcon); + nbcon_seq_force(newcon, newcon->seq); /* * Put this console in the list - keep the From d3ff380d47b6312d537c00829008528d1caad639 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 20 Aug 2024 08:35:29 +0206 Subject: [PATCH 03/54] printk: Properly deal with nbcon consoles on seq init If a non-boot console is registering and boot consoles exist, the consoles are flushed before being unregistered. This allows the non-boot console to continue where the boot console left off. If for whatever reason flushing fails, the lowest seq found from any of the enabled boot consoles is used. Until now con->seq was checked. However, if it is an nbcon boot console, the function nbcon_seq_read() must be used to read seq because con->seq is not updated for nbcon consoles. Check if it is an nbcon boot console and if so call nbcon_seq_read() to read seq. Also, avoid usage of con->seq as temporary storage of the starting record. Instead, rename console_init_seq() to get_init_console_seq() and just return the value. For nbcon consoles set the sequence via nbcon_seq_force(), for legacy consoles set con->seq. The cleaned design should make sure that the value stays and is set before the console is added to the console list. It also unifies the sequence number initialization for legacy and nbcon consoles. Reviewed-by: John Ogness Link: https://lore.kernel.org/r/20240820063001.36405-4-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 3 --- kernel/printk/printk.c | 41 +++++++++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 670692dc9b10..776746d20fc0 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -172,9 +172,6 @@ void nbcon_seq_force(struct console *con, u64 seq) u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); - - /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ - con->seq = 0; } /** diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a47017c932be..20c39505f5aa 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3448,19 +3448,21 @@ static void try_enable_default_console(struct console *newcon) newcon->flags |= CON_CONSDEV; } -static void console_init_seq(struct console *newcon, bool bootcon_registered) +/* Return the starting sequence number for a newly registered console. */ +static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered) { struct console *con; bool handover; + u64 init_seq; if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); - newcon->seq = syslog_seq; + init_seq = syslog_seq; mutex_unlock(&syslog_lock); } else { /* Begin with next message added to ringbuffer. */ - newcon->seq = prb_next_seq(prb); + init_seq = prb_next_seq(prb); /* * If any enabled boot consoles are due to be unregistered @@ -3481,7 +3483,7 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) * Flush all consoles and set the console to start at * the next unprinted sequence number. */ - if (!console_flush_all(true, &newcon->seq, &handover)) { + if (!console_flush_all(true, &init_seq, &handover)) { /* * Flushing failed. Just choose the lowest * sequence of the enabled boot consoles. @@ -3494,19 +3496,30 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) if (handover) console_lock(); - newcon->seq = prb_next_seq(prb); + init_seq = prb_next_seq(prb); for_each_console(con) { - if ((con->flags & CON_BOOT) && - (con->flags & CON_ENABLED) && - con->seq < newcon->seq) { - newcon->seq = con->seq; + u64 seq; + + if (!(con->flags & CON_BOOT) || + !(con->flags & CON_ENABLED)) { + continue; } + + if (con->flags & CON_NBCON) + seq = nbcon_seq_read(con); + else + seq = con->seq; + + if (seq < init_seq) + init_seq = seq; } } console_unlock(); } } + + return init_seq; } #define console_first() \ @@ -3538,6 +3551,7 @@ void register_console(struct console *newcon) struct console *con; bool bootcon_registered = false; bool realcon_registered = false; + u64 init_seq; int err; console_list_lock(); @@ -3615,10 +3629,13 @@ void register_console(struct console *newcon) } newcon->dropped = 0; - console_init_seq(newcon, bootcon_registered); + init_seq = get_init_console_seq(newcon, bootcon_registered); - if (newcon->flags & CON_NBCON) - nbcon_seq_force(newcon, newcon->seq); + if (newcon->flags & CON_NBCON) { + nbcon_seq_force(newcon, init_seq); + } else { + newcon->seq = init_seq; + } /* * Put this console in the list - keep the From 0e1d5731d3c1e2214249ef36dcd13ad51ad304cf Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Aug 2024 08:35:30 +0206 Subject: [PATCH 04/54] printk: Check printk_deferred_enter()/_exit() usage Add validation that printk_deferred_enter()/_exit() are called in non-migration contexts. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-5-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 9 +++++---- kernel/printk/internal.h | 3 +++ kernel/printk/printk_safe.c | 12 ++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index b937cefcb31c..eee8e97da681 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -161,15 +161,16 @@ int _printk(const char *fmt, ...); */ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); -extern void __printk_safe_enter(void); -extern void __printk_safe_exit(void); +extern void __printk_deferred_enter(void); +extern void __printk_deferred_exit(void); + /* * The printk_deferred_enter/exit macros are available only as a hack for * some code paths that need to defer all printk console printing. Interrupts * must be disabled for the deferred duration. */ -#define printk_deferred_enter __printk_safe_enter -#define printk_deferred_exit __printk_safe_exit +#define printk_deferred_enter() __printk_deferred_enter() +#define printk_deferred_exit() __printk_deferred_exit() /* * Please don't use printk_ratelimit(), because it shares ratelimiting state diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 398ecb40d279..dc8bc0890fd2 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -53,6 +53,9 @@ int vprintk_store(int facility, int level, __printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); +void __printk_safe_enter(void); +void __printk_safe_exit(void); + bool printk_percpu_data_ready(void); #define printk_safe_enter_irqsave(flags) \ diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 6d10927a07d8..4421ccac3113 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -26,6 +26,18 @@ void __printk_safe_exit(void) this_cpu_dec(printk_context); } +void __printk_deferred_enter(void) +{ + cant_migrate(); + __printk_safe_enter(); +} + +void __printk_deferred_exit(void) +{ + cant_migrate(); + __printk_safe_exit(); +} + asmlinkage int vprintk(const char *fmt, va_list args) { #ifdef CONFIG_KGDB_KDB From 8c9dab2c55ad74680728c72949971b20d70b56ca Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:31 +0206 Subject: [PATCH 05/54] printk: nbcon: Clarify rules of the owner/waiter matching The functions nbcon_owner_matches() and nbcon_waiter_matches() use a minimal set of data to determine if a context matches. The existing kerneldoc and comments were not clear enough and caused the printk folks to re-prove that the functions are indeed reliable in all cases. Update and expand the explanations so that it is clear that the implementations are sufficient for all cases. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-6-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 56 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 776746d20fc0..931b8f086902 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -228,6 +228,13 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, struct nbcon_state new; do { + /* + * Panic does not imply that the console is owned. However, it + * is critical that non-panic CPUs during panic are unable to + * acquire ownership in order to satisfy the assumptions of + * nbcon_waiter_matches(). In particular, the assumption that + * lower priorities are ignored during panic. + */ if (other_cpu_in_panic()) return -EPERM; @@ -259,18 +266,29 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) /* * The request context is well defined by the @req_prio because: * - * - Only a context with a higher priority can take over the request. + * - Only a context with a priority higher than the owner can become + * a waiter. + * - Only a context with a priority higher than the waiter can + * directly take over the request. * - There are only three priorities. * - Only one CPU is allowed to request PANIC priority. * - Lower priorities are ignored during panic() until reboot. * * As a result, the following scenario is *not* possible: * - * 1. Another context with a higher priority directly takes ownership. - * 2. The higher priority context releases the ownership. - * 3. A lower priority context takes the ownership. - * 4. Another context with the same priority as this context + * 1. This context is currently a waiter. + * 2. Another context with a higher priority than this context + * directly takes ownership. + * 3. The higher priority context releases the ownership. + * 4. Another lower priority context takes the ownership. + * 5. Another context with the same priority as this context * creates a request and starts waiting. + * + * Event #1 implies this context is EMERGENCY. + * Event #2 implies the new context is PANIC. + * Event #3 occurs when panic() has flushed the console. + * Events #4 and #5 are not possible due to the other_cpu_in_panic() + * check in nbcon_context_try_acquire_direct(). */ return (cur->req_prio == expected_prio); @@ -578,11 +596,29 @@ static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, int expected_prio) { /* - * Since consoles can only be acquired by higher priorities, - * owning contexts are uniquely identified by @prio. However, - * since contexts can unexpectedly lose ownership, it is - * possible that later another owner appears with the same - * priority. For this reason @cpu is also needed. + * A similar function, nbcon_waiter_matches(), only deals with + * EMERGENCY and PANIC priorities. However, this function must also + * deal with the NORMAL priority, which requires additional checks + * and constraints. + * + * For the case where preemption and interrupts are disabled, it is + * enough to also verify that the owning CPU has not changed. + * + * For the case where preemption or interrupts are enabled, an + * external synchronization method *must* be used. In particular, + * the driver-specific locking mechanism used in device_lock() + * (including disabling migration) should be used. It prevents + * scenarios such as: + * + * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and + * is scheduled out. + * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY + * and releases it. + * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X] + * and is scheduled out. + * 4. [Task A] gets running on [CPU X] and sees that the console is + * still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus + * [Task A] thinks it is the owner when it is not. */ if (cur->prio != expected_prio) From b7049d88c1763d5f133b0e93e39da8e89a9f944b Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:32 +0206 Subject: [PATCH 06/54] printk: nbcon: Remove return value for write_atomic() The return value of write_atomic() does not provide any useful information. On the contrary, it makes things more complicated for the caller to appropriately deal with the information. Change write_atomic() to not have a return value. If the message did not get printed due to loss of ownership, the caller will notice this on its own. If ownership was not lost, it will be assumed that the driver successfully printed the message and the sequence number for that console will be incremented. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-7-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 +- kernel/printk/nbcon.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 31a8f5b85f5d..577b157fe4fb 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -345,7 +345,7 @@ struct console { struct hlist_node node; /* nbcon console specific members */ - bool (*write_atomic)(struct console *con, + void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 931b8f086902..f279f839741a 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -885,7 +885,6 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) unsigned long con_dropped; struct nbcon_state cur; unsigned long dropped; - bool done; /* * The printk buffers are filled within an unsafe section. This @@ -925,16 +924,16 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) wctxt->unsafe_takeover = cur.unsafe_takeover; if (con->write_atomic) { - done = con->write_atomic(con, wctxt); + con->write_atomic(con, wctxt); } else { - nbcon_context_release(ctxt); + /* + * This function should never be called for legacy consoles. + * Handle it as if ownership was lost and try to continue. + */ WARN_ON_ONCE(1); - done = false; - } - - /* If not done, the emit was aborted. */ - if (!done) + nbcon_context_release(ctxt); return false; + } /* * Since any dropped message was successfully output, reset the From 7d56936c1e5208b5eeea2a9a2f70e85248f6da49 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:33 +0206 Subject: [PATCH 07/54] printk: nbcon: Add detailed doc for write_atomic() The write_atomic() callback has special requirements and is allowed to use special helper functions. Provide detailed documentation of the callback so that a developer has a chance of implementing it correctly. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-8-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 577b157fe4fb..35c64ee3827b 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -303,7 +303,7 @@ struct nbcon_write_context { /** * struct console - The console descriptor structure * @name: The name of the console driver - * @write: Write callback to output messages (Optional) + * @write: Legacy write callback to output messages (Optional) * @read: Read callback for console input (Optional) * @device: The underlying TTY device driver (Optional) * @unblank: Callback to unblank the console (Optional) @@ -320,7 +320,6 @@ struct nbcon_write_context { * @data: Driver private data * @node: hlist node for the console list * - * @write_atomic: Write callback for atomic context * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer @@ -345,8 +344,34 @@ struct console { struct hlist_node node; /* nbcon console specific members */ - void (*write_atomic)(struct console *con, - struct nbcon_write_context *wctxt); + + /** + * @write_atomic: + * + * NBCON callback to write out text in any context. (Optional) + * + * This callback is called with the console already acquired. However, + * a higher priority context is allowed to take it over by default. + * + * The callback must call nbcon_enter_unsafe() and nbcon_exit_unsafe() + * around any code where the takeover is not safe, for example, when + * manipulating the serial port registers. + * + * nbcon_enter_unsafe() will fail if the context has lost the console + * ownership in the meantime. In this case, the callback is no longer + * allowed to go forward. It must back out immediately and carefully. + * The buffer content is also no longer trusted since it no longer + * belongs to the context. + * + * The callback should allow the takeover whenever it is safe. It + * increases the chance to see messages when the system is in trouble. + * + * The callback can be called from any context (including NMI). + * Therefore it must avoid usage of any locking and instead rely + * on the console ownership for synchronization. + */ + void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; From 7a16a771890746b4060333d665ebe674945a3cfa Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:34 +0206 Subject: [PATCH 08/54] printk: nbcon: Add callbacks to synchronize with driver Console drivers typically must deal with access to the hardware via user input/output (such as an interactive login shell) and output of kernel messages via printk() calls. To provide the necessary synchronization, usually some driver-specific locking mechanism is used (for example, the port spinlock for uart serial consoles). Until now, usage of this driver-specific locking has been hidden from the printk-subsystem and implemented within the various console callbacks. However, nbcon consoles would need to use it even in the generic code. Add device_lock() and device_unlock() callback which will need to get implemented by nbcon consoles. The callbacks will use whatever synchronization mechanism the driver is using for itself. The minimum requirement is to prevent CPU migration. It would allow a context friendly acquiring of nbcon console ownership in non-emergency and non-panic context. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-9-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/include/linux/console.h b/include/linux/console.h index 35c64ee3827b..46b3c210b931 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -372,6 +372,49 @@ struct console { */ void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + /** + * @device_lock: + * + * NBCON callback to begin synchronization with driver code. + * + * Console drivers typically must deal with access to the hardware + * via user input/output (such as an interactive login shell) and + * output of kernel messages via printk() calls. This callback is + * called by the printk-subsystem whenever it needs to synchronize + * with hardware access by the driver. It should be implemented to + * use whatever synchronization mechanism the driver is using for + * itself (for example, the port lock for uart serial consoles). + * + * The callback is always called from task context. It may use any + * synchronization method required by the driver. + * + * IMPORTANT: The callback MUST disable migration. The console driver + * may be using a synchronization mechanism that already takes + * care of this (such as spinlocks). Otherwise this function must + * explicitly call migrate_disable(). + * + * The flags argument is provided as a convenience to the driver. It + * will be passed again to device_unlock(). It can be ignored if the + * driver does not need it. + */ + void (*device_lock)(struct console *con, unsigned long *flags); + + /** + * @device_unlock: + * + * NBCON callback to finish synchronization with driver code. + * + * It is the counterpart to device_lock(). + * + * This callback is always called from task context. It must + * appropriately re-enable migration (depending on how device_lock() + * disabled migration). + * + * The flags argument is the value of the same variable that was + * passed to device_lock(). + */ + void (*device_unlock)(struct console *con, unsigned long flags); + atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; From e55c3bcf380c7593d768f31994eff63935081d06 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:35 +0206 Subject: [PATCH 09/54] printk: nbcon: Use driver synchronization while (un)registering Console drivers typically have to deal with access to the hardware via user input/output (such as an interactive login shell) and output of kernel messages via printk() calls. They use some classic driver-specific locking mechanism in most situations. But console->write_atomic() callbacks, used by nbcon consoles, are synchronized only by acquiring the console context. The synchronization via the console context ownership is possible only when the console driver is registered. It is when a particular device driver is connected with a particular console driver. The two synchronization mechanisms must be synchronized between each other. It is tricky because the console context ownership is quite special. It might be taken over by a higher priority context. Also CPU migration must be disabled. The most tricky part is to (dis)connect these two mechanisms during the console (un)registration. Use the driver-specific locking callbacks: device_lock(), device_unlock(). They allow taking the device-specific lock while the device is being (un)registered by the related console driver. For example, these callbacks lock/unlock the port lock for serial port drivers. Note that the driver-specific locking is only needed during (un)register if it is an nbcon console with the write_atomic() callback implemented. If write_atomic() is not implemented, the driver should never attempt to access the hardware without first acquiring its driver-specific lock. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-10-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 20c39505f5aa..4cd2c50dd06d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3548,9 +3548,11 @@ static int unregister_console_locked(struct console *console); */ void register_console(struct console *newcon) { - struct console *con; + bool use_device_lock = (newcon->flags & CON_NBCON) && newcon->write_atomic; bool bootcon_registered = false; bool realcon_registered = false; + struct console *con; + unsigned long flags; u64 init_seq; int err; @@ -3637,6 +3639,19 @@ void register_console(struct console *newcon) newcon->seq = init_seq; } + /* + * If another context is actively using the hardware of this new + * console, it will not be aware of the nbcon synchronization. This + * is a risk that two contexts could access the hardware + * simultaneously if this new console is used for atomic printing + * and the other context is still using the hardware. + * + * Use the driver synchronization to ensure that the hardware is not + * in use while this new console transitions to being registered. + */ + if (use_device_lock) + newcon->device_lock(newcon, &flags); + /* * Put this console in the list - keep the * preferred driver at the head of the list. @@ -3661,6 +3676,10 @@ void register_console(struct console *newcon) * register_console() completes. */ + /* This new console is now registered. */ + if (use_device_lock) + newcon->device_unlock(newcon, flags); + console_sysfs_notify(); /* @@ -3689,6 +3708,8 @@ EXPORT_SYMBOL(register_console); /* Must be called under console_list_lock(). */ static int unregister_console_locked(struct console *console) { + bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; + unsigned long flags; int res; lockdep_assert_console_list_lock_held(); @@ -3707,8 +3728,18 @@ static int unregister_console_locked(struct console *console) if (!console_is_registered_locked(console)) return -ENODEV; + /* + * Use the driver synchronization to ensure that the hardware is not + * in use while this console transitions to being unregistered. + */ + if (use_device_lock) + console->device_lock(console, &flags); + hlist_del_init_rcu(&console->node); + if (use_device_lock) + console->device_unlock(console, flags); + /* * * If this isn't the last console and it has CON_CONSDEV set, we From 77e73c0687f5bc884fa1b0cb97aca912bcc01957 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:36 +0206 Subject: [PATCH 10/54] serial: core: Provide low-level functions to lock port It will be necessary at times for the uart nbcon console drivers to acquire the port lock directly (without the additional nbcon functionality of the port lock wrappers). These are special cases such as the implementation of the device_lock()/device_unlock() callbacks or for internal port lock wrapper synchronization. Provide low-level variants __uart_port_lock_irqsave() and __uart_port_unlock_irqrestore() for this purpose. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240820063001.36405-11-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/serial_core.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index aea25eef9a1a..8872cd21e70a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -590,6 +590,24 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/* + * Only for console->device_lock()/_unlock() callbacks and internal + * port lock wrapper synchronization. + */ +static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/* + * Only for console->device_lock()/_unlock() callbacks and internal + * port lock wrapper synchronization. + */ +static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure From eabd4600dafacf9895184259373f2445ff748654 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:37 +0206 Subject: [PATCH 11/54] serial: core: Introduce wrapper to set @uart_port->cons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce uart_port_set_cons() as a wrapper to set @cons of a uart_port. The wrapper sets @cons under the port lock in order to prevent @cons from disappearing while another context is holding the port lock. This is necessary for a follow-up commit relating to the port lock wrappers, which rely on @cons not changing between lock and unlock. Signed-off-by: John Ogness Tested-by: Théo Lebrun # EyeQ5, AMBA-PL011 Acked-by: Greg Kroah-Hartman Reviewed-by: Petr Mladek Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240820063001.36405-12-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- drivers/tty/serial/8250/8250_core.c | 6 +++--- drivers/tty/serial/amba-pl011.c | 2 +- drivers/tty/serial/serial_core.c | 16 ++++++++-------- include/linux/serial_core.h | 17 +++++++++++++++++ 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 29e4b83e0376..5f9f06911795 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -423,11 +423,11 @@ static int univ8250_console_setup(struct console *co, char *options) port = &serial8250_ports[co->index].port; /* link port to console */ - port->cons = co; + uart_port_set_cons(port, co); retval = serial8250_console_setup(port, options, false); if (retval != 0) - port->cons = NULL; + uart_port_set_cons(port, NULL); return retval; } @@ -485,7 +485,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx, continue; co->index = i; - port->cons = co; + uart_port_set_cons(port, co); return serial8250_console_setup(port, options, true); } diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 8b1644f5411e..7d0134ecd82f 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2480,7 +2480,7 @@ static int pl011_console_match(struct console *co, char *name, int idx, continue; co->index = i; - port->cons = co; + uart_port_set_cons(port, co); return pl011_console_setup(co, options); } diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 5bea3af46abc..1e3e28e364df 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -3176,8 +3176,15 @@ static int serial_core_add_one_port(struct uart_driver *drv, struct uart_port *u state->uart_port = uport; uport->state = state; + /* + * If this port is in use as a console then the spinlock is already + * initialised. + */ + if (!uart_console_registered(uport)) + uart_port_spin_lock_init(uport); + state->pm_state = UART_PM_STATE_UNDEFINED; - uport->cons = drv->cons; + uart_port_set_cons(uport, drv->cons); uport->minor = drv->tty_driver->minor_start + uport->line; uport->name = kasprintf(GFP_KERNEL, "%s%d", drv->dev_name, drv->tty_driver->name_base + uport->line); @@ -3186,13 +3193,6 @@ static int serial_core_add_one_port(struct uart_driver *drv, struct uart_port *u goto out; } - /* - * If this port is in use as a console then the spinlock is already - * initialised. - */ - if (!uart_console_registered(uport)) - uart_port_spin_lock_init(uport); - if (uport->cons && uport->dev) of_console_check(uport->dev->of_node, uport->cons->name, uport->line); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8872cd21e70a..2cf03ff2056a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -608,6 +608,23 @@ static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned spin_unlock_irqrestore(&up->lock, flags); } +/** + * uart_port_set_cons - Safely set the @cons field for a uart + * @up: The uart port to set + * @con: The new console to set to + * + * This function must be used to set @up->cons. It uses the port lock to + * synchronize with the port lock wrappers in order to ensure that the console + * cannot change or disappear while another context is holding the port lock. + */ +static inline void uart_port_set_cons(struct uart_port *up, struct console *con) +{ + unsigned long flags; + + __uart_port_lock_irqsave(up, &flags); + up->cons = con; + __uart_port_unlock_irqrestore(up, flags); +} /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure From dc219d8d858d95549543aa7a41b6e64a0da9e406 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:38 +0206 Subject: [PATCH 12/54] console: Improve console_srcu_read_flags() comments It was not clear when exactly console_srcu_read_flags() must be used vs. directly reading @console->flags. Refactor and clarify that console_srcu_read_flags() is only needed if the console is registered or the caller is in a context where the registration status of the console may change (due to another context). The function requires the caller holds @console_srcu, which will ensure that the caller sees an appropriate @flags value for the registered console and that exit/cleanup routines will not run if the console is in the process of unregistration. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-13-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 46b3c210b931..aafe3121b74e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -446,28 +446,34 @@ extern void console_list_unlock(void) __releases(console_mutex); extern struct hlist_head console_list; /** - * console_srcu_read_flags - Locklessly read the console flags + * console_srcu_read_flags - Locklessly read flags of a possibly registered + * console * @con: struct console pointer of console to read flags from * - * This function provides the necessary READ_ONCE() and data_race() - * notation for locklessly reading the console flags. The READ_ONCE() - * in this function matches the WRITE_ONCE() when @flags are modified - * for registered consoles with console_srcu_write_flags(). + * Locklessly reading @con->flags provides a consistent read value because + * there is at most one CPU modifying @con->flags and that CPU is using only + * read-modify-write operations to do so. * - * Only use this function to read console flags when locklessly - * iterating the console list via srcu. + * Requires console_srcu_read_lock to be held, which implies that @con might + * be a registered console. The purpose of holding console_srcu_read_lock is + * to guarantee that the console state is valid (CON_SUSPENDED/CON_ENABLED) + * and that no exit/cleanup routines will run if the console is currently + * undergoing unregistration. + * + * If the caller is holding the console_list_lock or it is _certain_ that + * @con is not and will not become registered, the caller may read + * @con->flags directly instead. * * Context: Any context. + * Return: The current value of the @con->flags field. */ static inline short console_srcu_read_flags(const struct console *con) { WARN_ON_ONCE(!console_srcu_read_lock_is_held()); /* - * Locklessly reading console->flags provides a consistent - * read value because there is at most one CPU modifying - * console->flags and that CPU is using only read-modify-write - * operations to do so. + * The READ_ONCE() matches the WRITE_ONCE() when @flags are modified + * for registered consoles with console_srcu_write_flags(). */ return data_race(READ_ONCE(con->flags)); } From adf6f37d142e14896115929f3892bbc0a86e25bf Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:39 +0206 Subject: [PATCH 13/54] nbcon: Add API to acquire context for non-printing operations Provide functions nbcon_device_try_acquire() and nbcon_device_release() which will try to acquire the nbcon console ownership with NBCON_PRIO_NORMAL and mark it unsafe for handover/takeover. These functions are to be used together with the device-specific locking when performing non-printing activities on the console device. They will allow synchronization against the atomic_write() callback which will be serialized, for higher priority contexts, only by acquiring the console context ownership. Pitfalls: The API requires to be called in a context with migration disabled because it uses per-CPU variables internally. The context is set unsafe for a takeover all the time. It guarantees full serialization against any atomic_write() caller except for the final flush in panic() which might try an unsafe takeover. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-14-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 ++ include/linux/printk.h | 14 ++++++++++ kernel/printk/nbcon.c | 58 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/include/linux/console.h b/include/linux/console.h index aafe3121b74e..3706f944de46 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -322,6 +322,7 @@ struct nbcon_write_context { * * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print + * @nbcon_device_ctxt: Context available for non-printing operations * @pbufs: Pointer to nbcon private buffer */ struct console { @@ -417,6 +418,7 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; + struct nbcon_context __private nbcon_device_ctxt; struct printk_buffers *pbufs; }; diff --git a/include/linux/printk.h b/include/linux/printk.h index eee8e97da681..9687089f5ace 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -9,6 +9,8 @@ #include #include +struct console; + extern const char linux_banner[]; extern const char linux_proc_banner[]; @@ -198,6 +200,8 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); +extern bool nbcon_device_try_acquire(struct console *con); +extern void nbcon_device_release(struct console *con); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -280,6 +284,16 @@ static inline void printk_trigger_flush(void) static inline void console_try_replay_all(void) { } + +static inline bool nbcon_device_try_acquire(struct console *con) +{ + return false; +} + +static inline void nbcon_device_release(struct console *con) +{ +} + #endif bool this_cpu_in_panic(void); diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index f279f839741a..61f0ae6a4809 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include "internal.h" /* * Printk console printing implementation for consoles which does not depend @@ -546,6 +548,7 @@ static struct printk_buffers panic_nbcon_pbufs; * nbcon_context_try_acquire - Try to acquire nbcon console * @ctxt: The context of the caller * + * Context: Under @ctxt->con->device_lock() or local_irq_save(). * Return: True if the console was acquired. False otherwise. * * If the caller allowed an unsafe hostile takeover, on success the @@ -553,7 +556,6 @@ static struct printk_buffers panic_nbcon_pbufs; * in an unsafe state. Otherwise, on success the caller may assume * the console is not in an unsafe state. */ -__maybe_unused static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) { unsigned int cpu = smp_processor_id(); @@ -1011,3 +1013,57 @@ void nbcon_free(struct console *con) con->pbufs = NULL; } + +/** + * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe + * section + * @con: The nbcon console to acquire + * + * Context: Under the locking mechanism implemented in + * @con->device_lock() including disabling migration. + * Return: True if the console was acquired. False otherwise. + * + * Console drivers will usually use their own internal synchronization + * mechasism to synchronize between console printing and non-printing + * activities (such as setting baud rates). However, nbcon console drivers + * supporting atomic consoles may also want to mark unsafe sections when + * performing non-printing activities in order to synchronize against their + * atomic_write() callback. + * + * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL + * and marks it unsafe for handover/takeover. + */ +bool nbcon_device_try_acquire(struct console *con) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); + + cant_migrate(); + + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->console = con; + ctxt->prio = NBCON_PRIO_NORMAL; + + if (!nbcon_context_try_acquire(ctxt)) + return false; + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(nbcon_device_try_acquire); + +/** + * nbcon_device_release - Exit unsafe section and release the nbcon console + * @con: The nbcon console acquired in nbcon_device_try_acquire() + */ +void nbcon_device_release(struct console *con) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); + + if (!nbcon_context_exit_unsafe(ctxt)) + return; + + nbcon_context_release(ctxt); +} +EXPORT_SYMBOL_GPL(nbcon_device_release); From 4126f149c48b2ae757d11ea24675f7071ab22ebf Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:40 +0206 Subject: [PATCH 14/54] serial: core: Acquire nbcon context in port->lock wrapper Currently the port->lock wrappers uart_port_lock(), uart_port_unlock() (and their variants) only lock/unlock the spin_lock. If the port is an nbcon console that has implemented the write_atomic() callback, the wrappers must also acquire/release the console context and mark the region as unsafe. This allows general port->lock synchronization to be synchronized against the nbcon write_atomic() callback. Note that __uart_port_using_nbcon() relies on the port->lock being held while a console is added and removed from the console list (i.e. all uart nbcon drivers *must* take the port->lock in their device_lock() callbacks). Signed-off-by: John Ogness Acked-by: Greg Kroah-Hartman Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-15-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/serial_core.h | 82 ++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 2cf03ff2056a..4ab65874a850 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -625,6 +627,60 @@ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) up->cons = con; __uart_port_unlock_irqrestore(up, flags); } + +/* Only for internal port lock wrapper usage. */ +static inline bool __uart_port_using_nbcon(struct uart_port *up) +{ + lockdep_assert_held_once(&up->lock); + + if (likely(!uart_console(up))) + return false; + + /* + * @up->cons is only modified under the port lock. Therefore it is + * certain that it cannot disappear here. + * + * @up->cons->node is added/removed from the console list under the + * port lock. Therefore it is certain that the registration status + * cannot change here, thus @up->cons->flags can be read directly. + */ + if (hlist_unhashed_lockless(&up->cons->node) || + !(up->cons->flags & CON_NBCON) || + !up->cons->write_atomic) { + return false; + } + + return true; +} + +/* Only for internal port lock wrapper usage. */ +static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return true; + + return nbcon_device_try_acquire(up->cons); +} + +/* Only for internal port lock wrapper usage. */ +static inline void __uart_port_nbcon_acquire(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return; + + while (!nbcon_device_try_acquire(up->cons)) + cpu_relax(); +} + +/* Only for internal port lock wrapper usage. */ +static inline void __uart_port_nbcon_release(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return; + + nbcon_device_release(up->cons); +} + /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure @@ -632,6 +688,7 @@ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) static inline void uart_port_lock(struct uart_port *up) { spin_lock(&up->lock); + __uart_port_nbcon_acquire(up); } /** @@ -641,6 +698,7 @@ static inline void uart_port_lock(struct uart_port *up) static inline void uart_port_lock_irq(struct uart_port *up) { spin_lock_irq(&up->lock); + __uart_port_nbcon_acquire(up); } /** @@ -651,6 +709,7 @@ static inline void uart_port_lock_irq(struct uart_port *up) static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); + __uart_port_nbcon_acquire(up); } /** @@ -661,7 +720,15 @@ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f */ static inline bool uart_port_trylock(struct uart_port *up) { - return spin_trylock(&up->lock); + if (!spin_trylock(&up->lock)) + return false; + + if (!__uart_port_nbcon_try_acquire(up)) { + spin_unlock(&up->lock); + return false; + } + + return true; } /** @@ -673,7 +740,15 @@ static inline bool uart_port_trylock(struct uart_port *up) */ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) { - return spin_trylock_irqsave(&up->lock, *flags); + if (!spin_trylock_irqsave(&up->lock, *flags)) + return false; + + if (!__uart_port_nbcon_try_acquire(up)) { + spin_unlock_irqrestore(&up->lock, *flags); + return false; + } + + return true; } /** @@ -682,6 +757,7 @@ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long */ static inline void uart_port_unlock(struct uart_port *up) { + __uart_port_nbcon_release(up); spin_unlock(&up->lock); } @@ -691,6 +767,7 @@ static inline void uart_port_unlock(struct uart_port *up) */ static inline void uart_port_unlock_irq(struct uart_port *up) { + __uart_port_nbcon_release(up); spin_unlock_irq(&up->lock); } @@ -701,6 +778,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up) */ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { + __uart_port_nbcon_release(up); spin_unlock_irqrestore(&up->lock, flags); } From 1c17ebb7907a809a92f978995c27a53ead2526ee Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:41 +0206 Subject: [PATCH 15/54] printk: nbcon: Do not rely on proxy headers The headers kernel.h, serial_core.h, and console.h allow for the definitions of many types and functions from other headers. Rather than relying on these as proxy headers, explicitly include all headers providing needed definitions. Also sort the list alphabetically to be able to easily detect duplicates. Suggested-by: Andy Shevchenko Signed-off-by: John Ogness Reviewed-by: Andy Shevchenko Acked-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-16-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 8 ++++++-- kernel/printk/nbcon.c | 13 ++++++++++++- kernel/printk/printk_ringbuffer.h | 2 ++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index dc8bc0890fd2..ccb916688178 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -2,11 +2,12 @@ /* * internal.h - printk internal definitions */ -#include #include -#include "printk_ringbuffer.h" +#include +#include #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) +struct ctl_table; void __init printk_sysctl_init(void); int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); @@ -43,6 +44,9 @@ enum printk_info_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +struct printk_ringbuffer; +struct dev_printk_info; + extern struct printk_ringbuffer *prb; __printf(4, 0) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 61f0ae6a4809..e8ddcb6f7053 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -2,13 +2,24 @@ // Copyright (C) 2022 Linutronix GmbH, John Ogness // Copyright (C) 2022 Intel, Thomas Gleixner -#include +#include +#include #include #include +#include #include +#include +#include +#include +#include +#include #include +#include +#include #include +#include #include "internal.h" +#include "printk_ringbuffer.h" /* * Printk console printing implementation for consoles which does not depend * on the legacy style console_lock mechanism. diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 52626d0f1fa3..bd2a892deac1 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -5,6 +5,8 @@ #include #include +#include +#include /* * Meta information about each stored message. From 864c25c83d834bdf2cb5a24c768d37236b418410 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:42 +0206 Subject: [PATCH 16/54] printk: Make console_is_usable() available to nbcon.c Move console_is_usable() as-is into internal.h so that it can be used by nbcon printing functions as well. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-17-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 32 ++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 30 ------------------------------ 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index ccb916688178..5d9deb56b582 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -84,6 +84,36 @@ void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); void nbcon_free(struct console *con); +/* + * Check if the given console is currently capable and allowed to print + * records. + * + * Requires the console_srcu_read_lock. + */ +static inline bool console_is_usable(struct console *con) +{ + short flags = console_srcu_read_flags(con); + + if (!(flags & CON_ENABLED)) + return false; + + if ((flags & CON_SUSPENDED)) + return false; + + if (!con->write) + return false; + + /* + * Console drivers may assume that per-cpu resources have been + * allocated. So unless they're explicitly marked as being able to + * cope (CON_ANYTIME) don't call them until this CPU is officially up. + */ + if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) + return false; + + return true; +} + #else #define PRINTK_PREFIX_MAX 0 @@ -104,6 +134,8 @@ static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_free(struct console *con) { } +static inline bool console_is_usable(struct console *con) { return false; } + #endif /* CONFIG_PRINTK */ extern struct printk_buffers printk_shared_pbufs; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 4cd2c50dd06d..b9c8fff9a493 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2767,36 +2767,6 @@ int is_console_locked(void) } EXPORT_SYMBOL(is_console_locked); -/* - * Check if the given console is currently capable and allowed to print - * records. - * - * Requires the console_srcu_read_lock. - */ -static inline bool console_is_usable(struct console *con) -{ - short flags = console_srcu_read_flags(con); - - if (!(flags & CON_ENABLED)) - return false; - - if ((flags & CON_SUSPENDED)) - return false; - - if (!con->write) - return false; - - /* - * Console drivers may assume that per-cpu resources have been - * allocated. So unless they're explicitly marked as being able to - * cope (CON_ANYTIME) don't call them until this CPU is officially up. - */ - if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) - return false; - - return true; -} - static void __console_unlock(void) { console_locked = 0; From 20846d1ce2adacd2b1f8672e24d6acb26b2e757b Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:43 +0206 Subject: [PATCH 17/54] printk: Let console_is_usable() handle nbcon The nbcon consoles use a different printing callback. For nbcon consoles, check for the write_atomic() callback instead of write(). Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-18-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 5d9deb56b582..448a5fcd5228 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -86,6 +86,8 @@ void nbcon_free(struct console *con); /* * Check if the given console is currently capable and allowed to print + * records. Note that this function does not consider the current context, + * which can also play a role in deciding if @con can be used to print * records. * * Requires the console_srcu_read_lock. @@ -100,8 +102,13 @@ static inline bool console_is_usable(struct console *con) if ((flags & CON_SUSPENDED)) return false; - if (!con->write) - return false; + if (flags & CON_NBCON) { + if (!con->write_atomic) + return false; + } else { + if (!con->write) + return false; + } /* * Console drivers may assume that per-cpu resources have been From fc400d5f63570afdadd718ae962cf5aa0feeace6 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:44 +0206 Subject: [PATCH 18/54] printk: Add @flags argument for console_is_usable() The caller of console_is_usable() usually needs @console->flags for its own checks. Rather than having console_is_usable() read its own copy, make the caller pass in the @flags. This also ensures that the caller saw the same @flags value. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-19-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 8 ++------ kernel/printk/printk.c | 5 +++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 448a5fcd5228..fe8d84d78f1c 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -89,13 +89,9 @@ void nbcon_free(struct console *con); * records. Note that this function does not consider the current context, * which can also play a role in deciding if @con can be used to print * records. - * - * Requires the console_srcu_read_lock. */ -static inline bool console_is_usable(struct console *con) +static inline bool console_is_usable(struct console *con, short flags) { - short flags = console_srcu_read_flags(con); - if (!(flags & CON_ENABLED)) return false; @@ -141,7 +137,7 @@ static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_free(struct console *con) { } -static inline bool console_is_usable(struct console *con) { return false; } +static inline bool console_is_usable(struct console *con, short flags) { return false; } #endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b9c8fff9a493..ffb56c2150b0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3012,9 +3012,10 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove cookie = console_srcu_read_lock(); for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); bool progress; - if (!console_is_usable(con)) + if (!console_is_usable(con, flags)) continue; any_usable = true; @@ -3925,7 +3926,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * that they make forward progress, so only increment * @diff for usable consoles. */ - if (!console_is_usable(c)) + if (!console_is_usable(c, flags)) continue; if (flags & CON_NBCON) { From 06683a6649895ccf279c35ca2fb77fd7afb7a6c5 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:45 +0206 Subject: [PATCH 19/54] printk: nbcon: Add helper to assign priority based on CPU state Add a helper function to use the current state of the CPU to determine which priority to assign to the printing context. The EMERGENCY priority handling is added in a follow-up commit. It will use a per-CPU variable. Note: nbcon_device_try_acquire(), which is used by console drivers to acquire the nbcon console for non-printing activities, is hard-coded to always use NORMAL priority. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-20-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 2 ++ kernel/printk/nbcon.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index fe8d84d78f1c..72f229382cfa 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -83,6 +83,7 @@ u64 nbcon_seq_read(struct console *con); void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); void nbcon_free(struct console *con); +enum nbcon_prio nbcon_get_default_prio(void); /* * Check if the given console is currently capable and allowed to print @@ -136,6 +137,7 @@ static inline u64 nbcon_seq_read(struct console *con) { return 0; } static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_free(struct console *con) { } +static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } static inline bool console_is_usable(struct console *con, short flags) { return false; } diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index e8ddcb6f7053..c6a9aa9f62f6 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -973,6 +973,25 @@ update_con: return nbcon_context_exit_unsafe(ctxt); } +/** + * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon + * printing on the current CPU + * + * Context: Any context. + * Return: The nbcon_prio to use for acquiring an nbcon console in this + * context for printing. + * + * The function is safe for reading per-CPU data in any context because + * preemption is disabled if the current CPU is in the panic state. + */ +enum nbcon_prio nbcon_get_default_prio(void) +{ + if (this_cpu_in_panic()) + return NBCON_PRIO_PANIC; + + return NBCON_PRIO_NORMAL; +} + /** * nbcon_alloc - Allocate and init the nbcon console specific data * @con: Console to initialize From d3a9f82ec5c095d6eb1eb94ecaa494470b4cef70 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Aug 2024 08:35:46 +0206 Subject: [PATCH 20/54] printk: nbcon: Provide function to flush using write_atomic() Provide nbcon_atomic_flush_pending() to perform flushing of all registered nbcon consoles using their write_atomic() callback. Unlike console_flush_all(), nbcon_atomic_flush_pending() will only flush up through the newest record at the time of the call. This prevents a CPU from printing unbounded when other CPUs are adding records. If new records are added while flushing, it is expected that the dedicated printer threads will print those records. If the printer thread is not available (which is always the case at this point in the rework), nbcon_atomic_flush_pending() _will_ flush all records in the ringbuffer. Unlike console_flush_all(), nbcon_atomic_flush_pending() will fully flush one console before flushing the next. This helps to guarantee that a block of pending records (such as a stack trace in an emergency situation) can be printed atomically at once before releasing console ownership. nbcon_atomic_flush_pending() is safe in any context because it uses write_atomic() and acquires with unsafe_takeover disabled. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-21-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 2 + kernel/printk/nbcon.c | 151 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 152 insertions(+), 1 deletion(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 72f229382cfa..0dc9b92b5dd0 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -84,6 +84,7 @@ void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); void nbcon_free(struct console *con); enum nbcon_prio nbcon_get_default_prio(void); +void nbcon_atomic_flush_pending(void); /* * Check if the given console is currently capable and allowed to print @@ -138,6 +139,7 @@ static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_free(struct console *con) { } static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } +static inline void nbcon_atomic_flush_pending(void) { } static inline bool console_is_usable(struct console *con, short flags) { return false; } diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index c6a9aa9f62f6..3982d68979d6 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -886,7 +886,6 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); * When true is returned, @wctxt->ctxt.backlog indicates whether there are * still records pending in the ringbuffer, */ -__maybe_unused static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); @@ -992,6 +991,156 @@ enum nbcon_prio nbcon_get_default_prio(void) return NBCON_PRIO_NORMAL; } +/* + * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its + * write_atomic() callback + * @con: The nbcon console to flush + * @stop_seq: Flush up until this record + * + * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on + * failure. + * + * Errors: + * + * -EPERM: Unable to acquire console ownership. + * + * -EAGAIN: Another context took over ownership while printing. + * + * -ENOENT: A record before @stop_seq is not available. + * + * If flushing up to @stop_seq was not successful, it only makes sense for the + * caller to try again when -EAGAIN was returned. When -EPERM is returned, + * this context is not allowed to acquire the console. When -ENOENT is + * returned, it cannot be expected that the unfinalized record will become + * available. + */ +static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) +{ + struct nbcon_write_context wctxt = { }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + int err = 0; + + ctxt->console = con; + ctxt->spinwait_max_us = 2000; + ctxt->prio = nbcon_get_default_prio(); + + if (!nbcon_context_try_acquire(ctxt)) + return -EPERM; + + while (nbcon_seq_read(con) < stop_seq) { + /* + * nbcon_emit_next_record() returns false when the console was + * handed over or taken over. In both cases the context is no + * longer valid. + */ + if (!nbcon_emit_next_record(&wctxt)) + return -EAGAIN; + + if (!ctxt->backlog) { + /* Are there reserved but not yet finalized records? */ + if (nbcon_seq_read(con) < stop_seq) + err = -ENOENT; + break; + } + } + + nbcon_context_release(ctxt); + return err; +} + +/** + * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its + * write_atomic() callback + * @con: The nbcon console to flush + * @stop_seq: Flush up until this record + * + * This will stop flushing before @stop_seq if another context has ownership. + * That context is then responsible for the flushing. Likewise, if new records + * are added while this context was flushing and there is no other context + * to handle the printing, this context must also flush those records. + */ +static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) +{ + unsigned long flags; + int err; + +again: + /* + * Atomic flushing does not use console driver synchronization (i.e. + * it does not hold the port lock for uart consoles). Therefore IRQs + * must be disabled to avoid being interrupted and then calling into + * a driver that will deadlock trying to acquire console ownership. + */ + local_irq_save(flags); + + err = __nbcon_atomic_flush_pending_con(con, stop_seq); + + local_irq_restore(flags); + + /* + * If there was a new owner (-EPERM, -EAGAIN), that context is + * responsible for completing. + * + * Do not wait for records not yet finalized (-ENOENT) to avoid a + * possible deadlock. They will either get flushed by the writer or + * eventually skipped on panic CPU. + */ + if (err) + return; + + /* + * If flushing was successful but more records are available, this + * context must flush those remaining records because there is no + * other context that will do it. + */ + if (prb_read_valid(prb, nbcon_seq_read(con), NULL)) { + stop_seq = prb_next_reserve_seq(prb); + goto again; + } +} + +/** + * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their + * write_atomic() callback + * @stop_seq: Flush up until this record + */ +static void __nbcon_atomic_flush_pending(u64 stop_seq) +{ + struct console *con; + int cookie; + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + + if (!(flags & CON_NBCON)) + continue; + + if (!console_is_usable(con, flags)) + continue; + + if (nbcon_seq_read(con) >= stop_seq) + continue; + + nbcon_atomic_flush_pending_con(con, stop_seq); + } + console_srcu_read_unlock(cookie); +} + +/** + * nbcon_atomic_flush_pending - Flush all nbcon consoles using their + * write_atomic() callback + * + * Flush the backlog up through the currently newest record. Any new + * records added while flushing will not be flushed if there is another + * context available to handle the flushing. This is to avoid one CPU + * printing unbounded because other CPUs continue to add records. + */ +void nbcon_atomic_flush_pending(void) +{ + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb)); +} + /** * nbcon_alloc - Allocate and init the nbcon console specific data * @con: Console to initialize From 97ea9bccfcbe4c97f127e736823cc8d984d781bf Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:47 +0206 Subject: [PATCH 21/54] printk: Track registered boot consoles Unfortunately it is not known if a boot console and a regular (legacy or nbcon) console use the same hardware. For this reason they must not be allowed to print simultaneously. For legacy consoles this is not an issue because they are already synchronized with the boot consoles using the console lock. However nbcon consoles can be triggered separately. Add a global flag @have_boot_console to identify if any boot consoles are registered. This will be used in follow-up commits to ensure that boot consoles and nbcon consoles cannot print simultaneously. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-22-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ffb56c2150b0..b8634a153d1d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -463,6 +463,14 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); +/* + * Specifies if a boot console is registered. If boot consoles are present, + * nbcon consoles cannot print simultaneously and must be synchronized by + * the console lock. This is because boot consoles and nbcon consoles may + * have mapped the same hardware. + */ +static bool have_boot_console; + #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ @@ -3610,6 +3618,9 @@ void register_console(struct console *newcon) newcon->seq = init_seq; } + if (newcon->flags & CON_BOOT) + have_boot_console = true; + /* * If another context is actively using the hardware of this new * console, it will not be aware of the nbcon synchronization. This @@ -3680,7 +3691,9 @@ EXPORT_SYMBOL(register_console); static int unregister_console_locked(struct console *console) { bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; + bool found_boot_con = false; unsigned long flags; + struct console *c; int res; lockdep_assert_console_list_lock_held(); @@ -3738,6 +3751,17 @@ static int unregister_console_locked(struct console *console) if (console->exit) res = console->exit(console); + /* + * With this console gone, the global flags tracking registered + * console types may have changed. Update them. + */ + for_each_console(c) { + if (c->flags & CON_BOOT) + found_boot_con = true; + } + if (!found_boot_con) + have_boot_console = found_boot_con; + return res; } From c158834b223fbfab3a14855ac203b8d9cddbbefd Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:48 +0206 Subject: [PATCH 22/54] printk: nbcon: Use nbcon consoles in console_flush_all() Allow nbcon consoles to print messages in the legacy printk() caller context (printing via unlock) by integrating them into console_flush_all(). The write_atomic() callback is used for printing. Provide nbcon_legacy_emit_next_record(), which acts as the nbcon variant of console_emit_next_record(). Call this variant within console_flush_all() for nbcon consoles. Since nbcon consoles use their own @nbcon_seq variable to track the next record to print, this also must be appropriately handled in console_flush_all(). Note that the legacy printing logic uses @handover to detect handovers for printing all consoles. For nbcon consoles, handovers/takeovers occur on a per-console basis and thus do not cause the console_flush_all() loop to abort. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-23-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 6 +++ kernel/printk/nbcon.c | 87 ++++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 17 +++++--- 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 0dc9b92b5dd0..44468f3828f3 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -78,6 +78,8 @@ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); +void console_lock_spinning_enable(void); +int console_lock_spinning_disable_and_check(int cookie); u64 nbcon_seq_read(struct console *con); void nbcon_seq_force(struct console *con, u64 seq); @@ -85,6 +87,8 @@ bool nbcon_alloc(struct console *con); void nbcon_free(struct console *con); enum nbcon_prio nbcon_get_default_prio(void); void nbcon_atomic_flush_pending(void); +bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie); /* * Check if the given console is currently capable and allowed to print @@ -140,6 +144,8 @@ static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_free(struct console *con) { } static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } static inline void nbcon_atomic_flush_pending(void) { } +static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie) { return false; } static inline bool console_is_usable(struct console *con, short flags) { return false; } diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 3982d68979d6..d09c084c9af4 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -992,6 +992,93 @@ enum nbcon_prio nbcon_get_default_prio(void) } /* + * nbcon_atomic_emit_one - Print one record for an nbcon console using the + * write_atomic() callback + * @wctxt: An initialized write context struct to use for this context + * + * Return: True, when a record has been printed and there are still + * pending records. The caller might want to continue flushing. + * + * False, when there is no pending record, or when the console + * context cannot be acquired, or the ownership has been lost. + * The caller should give up. Either the job is done, cannot be + * done, or will be handled by the owning context. + * + * This is an internal helper to handle the locking of the console before + * calling nbcon_emit_next_record(). + */ +static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + if (!nbcon_context_try_acquire(ctxt)) + return false; + + /* + * nbcon_emit_next_record() returns false when the console was + * handed over or taken over. In both cases the context is no + * longer valid. + * + * The higher priority printing context takes over responsibility + * to print the pending records. + */ + if (!nbcon_emit_next_record(wctxt)) + return false; + + nbcon_context_release(ctxt); + + return ctxt->backlog; +} + +/** + * nbcon_legacy_emit_next_record - Print one record for an nbcon console + * in legacy contexts + * @con: The console to print on + * @handover: Will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding + * both the console_lock and the SRCU read lock. Otherwise it + * is set to false. + * @cookie: The cookie from the SRCU read lock. + * + * Context: Any context except NMI. + * Return: True, when a record has been printed and there are still + * pending records. The caller might want to continue flushing. + * + * False, when there is no pending record, or when the console + * context cannot be acquired, or the ownership has been lost. + * The caller should give up. Either the job is done, cannot be + * done, or will be handled by the owning context. + * + * This function is meant to be called by console_flush_all() to print records + * on nbcon consoles from legacy context (printing via console unlocking). + * Essentially it is the nbcon version of console_emit_next_record(). + */ +bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie) +{ + struct nbcon_write_context wctxt = { }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + unsigned long flags; + bool progress; + + /* Use the same procedure as console_emit_next_record(). */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); + stop_critical_timings(); + + ctxt->console = con; + ctxt->prio = nbcon_get_default_prio(); + + progress = nbcon_atomic_emit_one(&wctxt); + + start_critical_timings(); + *handover = console_lock_spinning_disable_and_check(cookie); + printk_safe_exit_irqrestore(flags); + + return progress; +} + +/** * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its * write_atomic() callback * @con: The nbcon console to flush diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b8634a153d1d..f08bf5e82fc7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1860,7 +1860,7 @@ static bool console_waiter; * there may be a waiter spinning (like a spinlock). Also it must be * ready to hand over the lock at the end of the section. */ -static void console_lock_spinning_enable(void) +void console_lock_spinning_enable(void) { /* * Do not use spinning in panic(). The panic CPU wants to keep the lock. @@ -1899,7 +1899,7 @@ lockdep: * * Return: 1 if the lock rights were passed, 0 otherwise. */ -static int console_lock_spinning_disable_and_check(int cookie) +int console_lock_spinning_disable_and_check(int cookie) { int waiter; @@ -3021,13 +3021,20 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove cookie = console_srcu_read_lock(); for_each_console_srcu(con) { short flags = console_srcu_read_flags(con); + u64 printk_seq; bool progress; if (!console_is_usable(con, flags)) continue; any_usable = true; - progress = console_emit_next_record(con, handover, cookie); + if (flags & CON_NBCON) { + progress = nbcon_legacy_emit_next_record(con, handover, cookie); + printk_seq = nbcon_seq_read(con); + } else { + progress = console_emit_next_record(con, handover, cookie); + printk_seq = con->seq; + } /* * If a handover has occurred, the SRCU read lock @@ -3037,8 +3044,8 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove return false; /* Track the next of the highest seq flushed. */ - if (con->seq > *next_seq) - *next_seq = con->seq; + if (printk_seq > *next_seq) + *next_seq = printk_seq; if (!progress) continue; From 70411bf8d22ab3d2f794f199d81d70b62d3a85fa Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:49 +0206 Subject: [PATCH 23/54] printk: Add is_printk_legacy_deferred() If printk has been explicitly deferred or is called from NMI context, legacy console printing must be deferred to an irq_work context. Introduce a helper function is_printk_legacy_deferred() for a CPU to query if it must defer legacy console printing. In follow-up commits this helper will be needed at other call sites as well. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-24-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 2 ++ kernel/printk/printk_safe.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 44468f3828f3..84706c1c934b 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -75,6 +75,7 @@ bool printk_percpu_data_ready(void); } while (0) void defer_console_output(void); +bool is_printk_legacy_deferred(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); @@ -138,6 +139,7 @@ static inline bool console_is_usable(struct console *con, short flags) #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline bool is_printk_legacy_deferred(void) { return false; } static inline u64 nbcon_seq_read(struct console *con) { return 0; } static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 4421ccac3113..86439fd20aab 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -38,6 +38,15 @@ void __printk_deferred_exit(void) __printk_safe_exit(); } +bool is_printk_legacy_deferred(void) +{ + /* + * The per-CPU variable @printk_context can be read safely in any + * context. CPU migration is always disabled when set. + */ + return (this_cpu_read(printk_context) || in_nmi()); +} + asmlinkage int vprintk(const char *fmt, va_list args) { #ifdef CONFIG_KGDB_KDB @@ -50,7 +59,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) * Use the main logbuf even in NMI. But avoid calling console * drivers that might have their own locks. */ - if (this_cpu_read(printk_context) || in_nmi()) + if (is_printk_legacy_deferred()) return vprintk_deferred(fmt, args); /* No obstacles. */ From 8ba77712a7501ca941603d6d5ed650cd0d42cafb Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:50 +0206 Subject: [PATCH 24/54] printk: nbcon: Flush new records on device_release() There may be new records that were added while a driver was holding the nbcon context for non-printing purposes. These new records must be flushed by the nbcon_device_release() context because no other context will do it. If boot consoles are registered, the legacy loop is used (either direct or per irq_work) to handle the flushing. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-25-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 2 ++ kernel/printk/nbcon.c | 20 ++++++++++++++++++++ kernel/printk/printk.c | 2 +- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 84706c1c934b..7679e18f24b3 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -153,6 +153,8 @@ static inline bool console_is_usable(struct console *con, short flags) { return #endif /* CONFIG_PRINTK */ +extern bool have_boot_console; + extern struct printk_buffers printk_shared_pbufs; /** diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index d09c084c9af4..269aeed18064 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1326,10 +1326,30 @@ EXPORT_SYMBOL_GPL(nbcon_device_try_acquire); void nbcon_device_release(struct console *con) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); + int cookie; if (!nbcon_context_exit_unsafe(ctxt)) return; nbcon_context_release(ctxt); + + /* + * This context must flush any new records added while the console + * was locked. The console_srcu_read_lock must be taken to ensure + * the console is usable throughout flushing. + */ + cookie = console_srcu_read_lock(); + if (console_is_usable(con, console_srcu_read_flags(con)) && + prb_read_valid(prb, nbcon_seq_read(con), NULL)) { + if (!have_boot_console) { + __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb)); + } else if (!is_printk_legacy_deferred()) { + if (console_trylock()) + console_unlock(); + } else { + printk_trigger_flush(); + } + } + console_srcu_read_unlock(cookie); } EXPORT_SYMBOL_GPL(nbcon_device_release); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f08bf5e82fc7..7c9f8f6e1738 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -469,7 +469,7 @@ static DEFINE_MUTEX(syslog_lock); * the console lock. This is because boot consoles and nbcon consoles may * have mapped the same hardware. */ -static bool have_boot_console; +bool have_boot_console; #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); From d2e85ca7a736d2d889bac9aef6ede0a67f6870b2 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:51 +0206 Subject: [PATCH 25/54] printk: Flush nbcon consoles first on panic In console_flush_on_panic(), flush the nbcon consoles before flushing legacy consoles. The legacy write() callbacks are not fully safe when oops_in_progress is set. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-26-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7c9f8f6e1738..c6e633329e4d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3269,6 +3269,9 @@ void console_flush_on_panic(enum con_flush_mode mode) if (mode == CONSOLE_REPLAY_ALL) __console_rewind_all(); + if (!have_boot_console) + nbcon_atomic_flush_pending(); + console_flush_all(false, &next_seq, &handover); } From 5dde3b7354133846079fa51e55e74ef90a836759 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:52 +0206 Subject: [PATCH 26/54] printk: nbcon: Add unsafe flushing on panic Add nbcon_atomic_flush_unsafe() to flush all nbcon consoles using the write_atomic() callback and allowing unsafe hostile takeovers. Call this at the end of panic() as a final attempt to flush any pending messages. Note that legacy consoles use unsafe methods for flushing from the beginning of panic (see bust_spinlocks()). Therefore, systems using both legacy and nbcon consoles may still fail to see panic messages due to unsafe legacy console usage. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-27-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 5 +++++ kernel/panic.c | 1 + kernel/printk/nbcon.c | 32 +++++++++++++++++++++++++------- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 9687089f5ace..2e083f01f8a3 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -202,6 +202,7 @@ void printk_trigger_flush(void); void console_try_replay_all(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); +void nbcon_atomic_flush_unsafe(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -294,6 +295,10 @@ static inline void nbcon_device_release(struct console *con) { } +static inline void nbcon_atomic_flush_unsafe(void) +{ +} + #endif bool this_cpu_in_panic(void); diff --git a/kernel/panic.c b/kernel/panic.c index 2a0449144f82..df37c913b010 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -463,6 +463,7 @@ void panic(const char *fmt, ...) * Explicitly flush the kernel log buffer one last time. */ console_flush_on_panic(CONSOLE_FLUSH_PENDING); + nbcon_atomic_flush_unsafe(); local_irq_enable(); for (i = 0; ; i += PANIC_TIMER_STEP) { diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 269aeed18064..afdb16c1c733 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1083,6 +1083,7 @@ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record + * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers * * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on * failure. @@ -1101,7 +1102,8 @@ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, * returned, it cannot be expected that the unfinalized record will become * available. */ -static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) +static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, + bool allow_unsafe_takeover) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); @@ -1110,6 +1112,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) ctxt->console = con; ctxt->spinwait_max_us = 2000; ctxt->prio = nbcon_get_default_prio(); + ctxt->allow_unsafe_takeover = allow_unsafe_takeover; if (!nbcon_context_try_acquire(ctxt)) return -EPERM; @@ -1140,13 +1143,15 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record + * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers * * This will stop flushing before @stop_seq if another context has ownership. * That context is then responsible for the flushing. Likewise, if new records * are added while this context was flushing and there is no other context * to handle the printing, this context must also flush those records. */ -static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) +static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, + bool allow_unsafe_takeover) { unsigned long flags; int err; @@ -1160,7 +1165,7 @@ again: */ local_irq_save(flags); - err = __nbcon_atomic_flush_pending_con(con, stop_seq); + err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); local_irq_restore(flags); @@ -1190,8 +1195,9 @@ again: * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their * write_atomic() callback * @stop_seq: Flush up until this record + * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers */ -static void __nbcon_atomic_flush_pending(u64 stop_seq) +static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover) { struct console *con; int cookie; @@ -1209,7 +1215,7 @@ static void __nbcon_atomic_flush_pending(u64 stop_seq) if (nbcon_seq_read(con) >= stop_seq) continue; - nbcon_atomic_flush_pending_con(con, stop_seq); + nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); } console_srcu_read_unlock(cookie); } @@ -1225,7 +1231,19 @@ static void __nbcon_atomic_flush_pending(u64 stop_seq) */ void nbcon_atomic_flush_pending(void) { - __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb)); + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false); +} + +/** + * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their + * write_atomic() callback and allowing unsafe hostile takeovers + * + * Flush the backlog up through the currently newest record. Unsafe hostile + * takeovers will be performed, if necessary. + */ +void nbcon_atomic_flush_unsafe(void) +{ + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true); } /** @@ -1342,7 +1360,7 @@ void nbcon_device_release(struct console *con) if (console_is_usable(con, console_srcu_read_flags(con)) && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { if (!have_boot_console) { - __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb)); + __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false); } else if (!is_printk_legacy_deferred()) { if (console_trylock()) console_unlock(); From 60013065fdc677df7b71f9a0bac501020e3bbd4f Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:53 +0206 Subject: [PATCH 27/54] printk: Avoid console_lock dance if no legacy or boot consoles Currently the console lock is used to attempt legacy-type printing even if there are no legacy or boot consoles registered. If no such consoles are registered, the console lock does not need to be taken. Add tracking of legacy console registration and use it with boot console tracking to avoid unnecessary code paths, i.e. do not use the console lock if there are no boot consoles and no legacy consoles. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-28-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c6e633329e4d..b3ddcf39a53c 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -463,6 +463,13 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); +/* + * Specifies if a legacy console is registered. If legacy consoles are + * present, it is necessary to perform the console lock/unlock dance + * whenever console flushing should occur. + */ +static bool have_legacy_console; + /* * Specifies if a boot console is registered. If boot consoles are present, * nbcon consoles cannot print simultaneously and must be synchronized by @@ -471,6 +478,14 @@ static DEFINE_MUTEX(syslog_lock); */ bool have_boot_console; +/* + * Specifies if the console lock/unlock dance is needed for console + * printing. If @have_boot_console is true, the nbcon consoles will + * be printed serially along with the legacy consoles because nbcon + * consoles cannot print simultaneously with boot consoles. + */ +#define printing_via_unlock (have_legacy_console || have_boot_console) + #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ @@ -2339,7 +2354,7 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len = vprintk_store(facility, level, dev_info, fmt, args); /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (!in_sched && printing_via_unlock) { /* * The caller may be holding system-critical or * timing-sensitive locks. Disable preemption during @@ -2359,7 +2374,7 @@ asmlinkage int vprintk_emit(int facility, int level, preempt_enable(); } - if (in_sched) + if (in_sched && printing_via_unlock) defer_console_output(); else wake_up_klogd(); @@ -2718,7 +2733,7 @@ void resume_console(void) */ static int console_cpu_notify(unsigned int cpu) { - if (!cpuhp_tasks_frozen) { + if (!cpuhp_tasks_frozen && printing_via_unlock) { /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); @@ -3625,6 +3640,7 @@ void register_console(struct console *newcon) if (newcon->flags & CON_NBCON) { nbcon_seq_force(newcon, init_seq); } else { + have_legacy_console = true; newcon->seq = init_seq; } @@ -3701,6 +3717,7 @@ EXPORT_SYMBOL(register_console); static int unregister_console_locked(struct console *console) { bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; + bool found_legacy_con = false; bool found_boot_con = false; unsigned long flags; struct console *c; @@ -3768,9 +3785,13 @@ static int unregister_console_locked(struct console *console) for_each_console(c) { if (c->flags & CON_BOOT) found_boot_con = true; + if (!(c->flags & CON_NBCON)) + found_legacy_con = true; } if (!found_boot_con) have_boot_console = found_boot_con; + if (!found_legacy_con) + have_legacy_console = found_legacy_con; return res; } @@ -3931,8 +3952,10 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre seq = prb_next_reserve_seq(prb); /* Flush the consoles so that records up to @seq are printed. */ - console_lock(); - console_unlock(); + if (printing_via_unlock) { + console_lock(); + console_unlock(); + } for (;;) { unsigned long begin_jiffies; @@ -3945,6 +3968,12 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * console->seq. Releasing console_lock flushes more * records in case @seq is still not printed on all * usable consoles. + * + * Holding the console_lock is not necessary if there + * are no legacy or boot consoles. However, such a + * console could register at any time. Always hold the + * console_lock as a precaution rather than + * synchronizing against register_console(). */ console_lock(); From bebd87ae27e052e390c203893c7ee46f54b0bf9e Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:54 +0206 Subject: [PATCH 28/54] printk: Track nbcon consoles Add a global flag @have_nbcon_console to identify if any nbcon consoles are registered. This will be used in follow-up commits to preserve legacy behavior when no nbcon consoles are registered. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-29-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b3ddcf39a53c..e30107d216a5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -470,6 +470,11 @@ static DEFINE_MUTEX(syslog_lock); */ static bool have_legacy_console; +/* + * Specifies if an nbcon console is registered. + */ +static bool have_nbcon_console; + /* * Specifies if a boot console is registered. If boot consoles are present, * nbcon consoles cannot print simultaneously and must be synchronized by @@ -3638,6 +3643,7 @@ void register_console(struct console *newcon) init_seq = get_init_console_seq(newcon, bootcon_registered); if (newcon->flags & CON_NBCON) { + have_nbcon_console = true; nbcon_seq_force(newcon, init_seq); } else { have_legacy_console = true; @@ -3718,6 +3724,7 @@ static int unregister_console_locked(struct console *console) { bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; bool found_legacy_con = false; + bool found_nbcon_con = false; bool found_boot_con = false; unsigned long flags; struct console *c; @@ -3785,13 +3792,18 @@ static int unregister_console_locked(struct console *console) for_each_console(c) { if (c->flags & CON_BOOT) found_boot_con = true; - if (!(c->flags & CON_NBCON)) + + if (c->flags & CON_NBCON) + found_nbcon_con = true; + else found_legacy_con = true; } if (!found_boot_con) have_boot_console = found_boot_con; if (!found_legacy_con) have_legacy_console = found_legacy_con; + if (!found_nbcon_con) + have_nbcon_console = found_nbcon_con; return res; } From e35a8884270bae11196eedf3b0a5bf22619f11f2 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:55 +0206 Subject: [PATCH 29/54] printk: Coordinate direct printing in panic If legacy and nbcon consoles are registered and the nbcon consoles are allowed to flush (i.e. no boot consoles registered), the legacy consoles will no longer perform direct printing on the panic CPU until after the backtrace has been stored. This will give the safe nbcon consoles a chance to print the panic messages before allowing the unsafe legacy consoles to print. If no nbcon consoles are registered or they are not allowed to flush because boot consoles are registered, there is no change in behavior (i.e. legacy consoles will always attempt to print from the printk() caller context). Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-30-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 5 ++++ kernel/panic.c | 2 ++ kernel/printk/internal.h | 1 + kernel/printk/printk.c | 55 +++++++++++++++++++++++++++++++++++----- 4 files changed, 56 insertions(+), 7 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 2e083f01f8a3..eca9bb2ee637 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -200,6 +200,7 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); +void printk_legacy_allow_panic_sync(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); void nbcon_atomic_flush_unsafe(void); @@ -286,6 +287,10 @@ static inline void console_try_replay_all(void) { } +static inline void printk_legacy_allow_panic_sync(void) +{ +} + static inline bool nbcon_device_try_acquire(struct console *con) { return false; diff --git a/kernel/panic.c b/kernel/panic.c index df37c913b010..93096d5abcc7 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -374,6 +374,8 @@ void panic(const char *fmt, ...) panic_other_cpus_shutdown(_crash_kexec_post_notifiers); + printk_legacy_allow_panic_sync(); + /* * Run any panic handlers, including those that might need to * add information to the kmsg dump output. diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7679e18f24b3..6b61350a3026 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -154,6 +154,7 @@ static inline bool console_is_usable(struct console *con, short flags) { return #endif /* CONFIG_PRINTK */ extern bool have_boot_console; +extern bool legacy_allow_panic_sync; extern struct printk_buffers printk_shared_pbufs; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e30107d216a5..fc3f8970eea2 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -471,7 +471,9 @@ static DEFINE_MUTEX(syslog_lock); static bool have_legacy_console; /* - * Specifies if an nbcon console is registered. + * Specifies if an nbcon console is registered. If nbcon consoles are present, + * synchronous printing of legacy consoles will not occur during panic until + * the backtrace has been stored to the ringbuffer. */ static bool have_nbcon_console; @@ -483,6 +485,9 @@ static bool have_nbcon_console; */ bool have_boot_console; +/* See printk_legacy_allow_panic_sync() for details. */ +bool legacy_allow_panic_sync; + /* * Specifies if the console lock/unlock dance is needed for console * printing. If @have_boot_console is true, the nbcon consoles will @@ -2330,12 +2335,28 @@ out: return ret; } +/* + * This acts as a one-way switch to allow legacy consoles to print from + * the printk() caller context on a panic CPU. It also attempts to flush + * the legacy consoles in this context. + */ +void printk_legacy_allow_panic_sync(void) +{ + legacy_allow_panic_sync = true; + + if (printing_via_unlock && !is_printk_legacy_deferred()) { + if (console_trylock()) + console_unlock(); + } +} + asmlinkage int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args) { + bool do_trylock_unlock = printing_via_unlock; + bool defer_legacy = false; int printed_len; - bool in_sched = false; /* Suppress unimportant messages after panic happens */ if (unlikely(suppress_printk)) @@ -2349,17 +2370,35 @@ asmlinkage int vprintk_emit(int facility, int level, if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; + /* If called from the scheduler, we can not call up(). */ if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; - in_sched = true; + defer_legacy = do_trylock_unlock; + do_trylock_unlock = false; } printk_delay(level); printed_len = vprintk_store(facility, level, dev_info, fmt, args); - /* If called from the scheduler, we can not call up(). */ - if (!in_sched && printing_via_unlock) { + if (have_nbcon_console && !have_boot_console) { + nbcon_atomic_flush_pending(); + + /* + * In panic, the legacy consoles are not allowed to print from + * the printk calling context unless explicitly allowed. This + * gives the safe nbcon consoles a chance to print out all the + * panic messages first. This restriction only applies if + * there are nbcon consoles registered and they are allowed to + * flush. + */ + if (this_cpu_in_panic() && !legacy_allow_panic_sync) { + do_trylock_unlock = false; + defer_legacy = false; + } + } + + if (do_trylock_unlock) { /* * The caller may be holding system-critical or * timing-sensitive locks. Disable preemption during @@ -2379,7 +2418,7 @@ asmlinkage int vprintk_emit(int facility, int level, preempt_enable(); } - if (in_sched && printing_via_unlock) + if (defer_legacy) defer_console_output(); else wake_up_klogd(); @@ -3292,7 +3331,9 @@ void console_flush_on_panic(enum con_flush_mode mode) if (!have_boot_console) nbcon_atomic_flush_pending(); - console_flush_all(false, &next_seq, &handover); + /* Flush legacy consoles once allowed, even when dangerous. */ + if (legacy_allow_panic_sync) + console_flush_all(false, &next_seq, &handover); } /* From 6690d6b52726bcb2b743466a1833e0b9f049b9d7 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:56 +0206 Subject: [PATCH 30/54] printk: Add helper for flush type logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are many call sites where console flushing occur. Depending on the system state and types of consoles, the flush methods to use are different. A flush call site generally must consider: @have_boot_console @have_nbcon_console @have_legacy_console @legacy_allow_panic_sync is_printk_preferred() and take into account the current CPU state: NBCON_PRIO_NORMAL NBCON_PRIO_EMERGENCY NBCON_PRIO_PANIC in order to decide if it should: flush nbcon directly via atomic_write() callback flush legacy directly via console_unlock flush legacy via offload to irq_work All of these call sites use their own logic to make this decision, which is complicated and error prone. Especially later when two more flush methods will be introduced: flush nbcon via offload to kthread flush legacy via offload to kthread Introduce a new internal struct console_flush_type that specifies which console flushing methods should be used in the context of the caller. Introduce a helper function to fill out console_flush_type to be used for flushing call sites. Replace the logic of all flushing call sites to use the new helper. This change standardizes behavior, leading to both fixes and optimizations across various call sites. For instance, in console_cpu_notify(), the new logic ensures that nbcon consoles are flushed when they aren’t managed by the legacy loop. Similarly, in console_flush_on_panic(), the system no longer needs to flush nbcon consoles if none are present. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-31-john.ogness@linutronix.de [pmladek@suse.com: Updated the commit message.] Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 73 ++++++++++++++++++++++++++++++++++++++++ kernel/printk/nbcon.c | 12 +++++-- kernel/printk/printk.c | 68 +++++++++++++++++-------------------- 3 files changed, 112 insertions(+), 41 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 6b61350a3026..ba2e0f1940bd 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -154,8 +154,81 @@ static inline bool console_is_usable(struct console *con, short flags) { return #endif /* CONFIG_PRINTK */ extern bool have_boot_console; +extern bool have_nbcon_console; +extern bool have_legacy_console; extern bool legacy_allow_panic_sync; +/** + * struct console_flush_type - Define available console flush methods + * @nbcon_atomic: Flush directly using nbcon_atomic() callback + * @legacy_direct: Call the legacy loop in this context + * @legacy_offload: Offload the legacy loop into IRQ + * + * Note that the legacy loop also flushes the nbcon consoles. + */ +struct console_flush_type { + bool nbcon_atomic; + bool legacy_direct; + bool legacy_offload; +}; + +/* + * Identify which console flushing methods should be used in the context of + * the caller. + */ +static inline void printk_get_console_flush_type(struct console_flush_type *ft) +{ + memset(ft, 0, sizeof(*ft)); + + switch (nbcon_get_default_prio()) { + case NBCON_PRIO_NORMAL: + if (have_nbcon_console && !have_boot_console) + ft->nbcon_atomic = true; + + /* Legacy consoles are flushed directly when possible. */ + if (have_legacy_console || have_boot_console) { + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + else + ft->legacy_offload = true; + } + break; + + case NBCON_PRIO_PANIC: + /* + * In panic, the nbcon consoles will directly print. But + * only allowed if there are no boot consoles. + */ + if (have_nbcon_console && !have_boot_console) + ft->nbcon_atomic = true; + + if (have_legacy_console || have_boot_console) { + /* + * This is the same decision as NBCON_PRIO_NORMAL + * except that offloading never occurs in panic. + * + * Note that console_flush_on_panic() will flush + * legacy consoles anyway, even if unsafe. + */ + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + + /* + * In panic, if nbcon atomic printing occurs, + * the legacy consoles must remain silent until + * explicitly allowed. + */ + if (ft->nbcon_atomic && !legacy_allow_panic_sync) + ft->legacy_direct = false; + } + break; + + default: + WARN_ON_ONCE(1); + break; + } +} + extern struct printk_buffers printk_shared_pbufs; /** diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index afdb16c1c733..18488d6c17c0 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1344,6 +1344,7 @@ EXPORT_SYMBOL_GPL(nbcon_device_try_acquire); void nbcon_device_release(struct console *con) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); + struct console_flush_type ft; int cookie; if (!nbcon_context_exit_unsafe(ctxt)) @@ -1359,12 +1360,17 @@ void nbcon_device_release(struct console *con) cookie = console_srcu_read_lock(); if (console_is_usable(con, console_srcu_read_flags(con)) && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { - if (!have_boot_console) { + /* + * If nbcon_atomic flushing is not available, fallback to + * using the legacy loop. + */ + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) { __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false); - } else if (!is_printk_legacy_deferred()) { + } else if (ft.legacy_direct) { if (console_trylock()) console_unlock(); - } else { + } else if (ft.legacy_offload) { printk_trigger_flush(); } } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fc3f8970eea2..6accd1704e73 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -468,14 +468,14 @@ static DEFINE_MUTEX(syslog_lock); * present, it is necessary to perform the console lock/unlock dance * whenever console flushing should occur. */ -static bool have_legacy_console; +bool have_legacy_console; /* * Specifies if an nbcon console is registered. If nbcon consoles are present, * synchronous printing of legacy consoles will not occur during panic until * the backtrace has been stored to the ringbuffer. */ -static bool have_nbcon_console; +bool have_nbcon_console; /* * Specifies if a boot console is registered. If boot consoles are present, @@ -488,14 +488,6 @@ bool have_boot_console; /* See printk_legacy_allow_panic_sync() for details. */ bool legacy_allow_panic_sync; -/* - * Specifies if the console lock/unlock dance is needed for console - * printing. If @have_boot_console is true, the nbcon consoles will - * be printed serially along with the legacy consoles because nbcon - * consoles cannot print simultaneously with boot consoles. - */ -#define printing_via_unlock (have_legacy_console || have_boot_console) - #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ @@ -2342,9 +2334,12 @@ out: */ void printk_legacy_allow_panic_sync(void) { + struct console_flush_type ft; + legacy_allow_panic_sync = true; - if (printing_via_unlock && !is_printk_legacy_deferred()) { + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) { if (console_trylock()) console_unlock(); } @@ -2354,8 +2349,7 @@ asmlinkage int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args) { - bool do_trylock_unlock = printing_via_unlock; - bool defer_legacy = false; + struct console_flush_type ft; int printed_len; /* Suppress unimportant messages after panic happens */ @@ -2370,35 +2364,23 @@ asmlinkage int vprintk_emit(int facility, int level, if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; + printk_get_console_flush_type(&ft); + /* If called from the scheduler, we can not call up(). */ if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; - defer_legacy = do_trylock_unlock; - do_trylock_unlock = false; + ft.legacy_offload |= ft.legacy_direct; + ft.legacy_direct = false; } printk_delay(level); printed_len = vprintk_store(facility, level, dev_info, fmt, args); - if (have_nbcon_console && !have_boot_console) { + if (ft.nbcon_atomic) nbcon_atomic_flush_pending(); - /* - * In panic, the legacy consoles are not allowed to print from - * the printk calling context unless explicitly allowed. This - * gives the safe nbcon consoles a chance to print out all the - * panic messages first. This restriction only applies if - * there are nbcon consoles registered and they are allowed to - * flush. - */ - if (this_cpu_in_panic() && !legacy_allow_panic_sync) { - do_trylock_unlock = false; - defer_legacy = false; - } - } - - if (do_trylock_unlock) { + if (ft.legacy_direct) { /* * The caller may be holding system-critical or * timing-sensitive locks. Disable preemption during @@ -2418,7 +2400,7 @@ asmlinkage int vprintk_emit(int facility, int level, preempt_enable(); } - if (defer_legacy) + if (ft.legacy_offload) defer_console_output(); else wake_up_klogd(); @@ -2777,10 +2759,16 @@ void resume_console(void) */ static int console_cpu_notify(unsigned int cpu) { - if (!cpuhp_tasks_frozen && printing_via_unlock) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + struct console_flush_type ft; + + if (!cpuhp_tasks_frozen) { + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } } return 0; } @@ -3305,6 +3293,7 @@ static void __console_rewind_all(void) */ void console_flush_on_panic(enum con_flush_mode mode) { + struct console_flush_type ft; bool handover; u64 next_seq; @@ -3328,7 +3317,8 @@ void console_flush_on_panic(enum con_flush_mode mode) if (mode == CONSOLE_REPLAY_ALL) __console_rewind_all(); - if (!have_boot_console) + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) nbcon_atomic_flush_pending(); /* Flush legacy consoles once allowed, even when dangerous. */ @@ -3992,6 +3982,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre { unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); unsigned long remaining_jiffies = timeout_jiffies; + struct console_flush_type ft; struct console *c; u64 last_diff = 0; u64 printk_seq; @@ -4005,7 +3996,8 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre seq = prb_next_reserve_seq(prb); /* Flush the consoles so that records up to @seq are printed. */ - if (printing_via_unlock) { + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) { console_lock(); console_unlock(); } From ecb5e1aa82c86642ec1eaafefd4e317dfba3a238 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Aug 2024 08:35:57 +0206 Subject: [PATCH 31/54] printk: nbcon: Implement emergency sections In emergency situations (something has gone wrong but the system continues to operate), usually important information (such as a backtrace) is generated via printk(). This information should be pushed out to the consoles ASAP. Add per-CPU emergency nesting tracking because an emergency can arise while in an emergency situation. Add functions to mark the beginning and end of emergency sections where the urgent messages are generated. Perform direct console flushing at the emergency priority if the current CPU is in an emergency state and it is safe to do so. Note that the emergency state is not system-wide. While one CPU is in an emergency state, another CPU may attempt to print console messages at normal priority. Also note that printk() already attempts to flush consoles in the caller context for normal priority. However, follow-up changes will introduce printing kthreads, in which case the normal priority printk() calls will offload to the kthreads. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-32-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 4 +++ kernel/printk/internal.h | 1 + kernel/printk/nbcon.c | 75 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 79 insertions(+), 1 deletion(-) diff --git a/include/linux/console.h b/include/linux/console.h index 3706f944de46..9a13f91b0c43 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -553,10 +553,14 @@ static inline bool console_is_registered(const struct console *con) hlist_for_each_entry(con, &console_list, node) #ifdef CONFIG_PRINTK +extern void nbcon_cpu_emergency_enter(void); +extern void nbcon_cpu_emergency_exit(void); extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); #else +static inline void nbcon_cpu_emergency_enter(void) { } +static inline void nbcon_cpu_emergency_exit(void) { } static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index ba2e0f1940bd..8e36d8695f81 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -182,6 +182,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft) switch (nbcon_get_default_prio()) { case NBCON_PRIO_NORMAL: + case NBCON_PRIO_EMERGENCY: if (have_nbcon_console && !have_boot_console) ft->nbcon_atomic = true; diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 18488d6c17c0..92ac5c590927 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -972,6 +972,36 @@ update_con: return nbcon_context_exit_unsafe(ctxt); } +/* Track the nbcon emergency nesting per CPU. */ +static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); +static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; + +/** + * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer + * + * Context: For reading, any context. For writing, any context which could + * not be migrated to another CPU. + * Return: Either a pointer to the per CPU emergency nesting counter of + * the current CPU or to the init data during early boot. + * + * The function is safe for reading per-CPU variables in any context because + * preemption is disabled if the current CPU is in the emergency state. See + * also nbcon_cpu_emergency_enter(). + */ +static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) +{ + /* + * The value of __printk_percpu_data_ready gets set in normal + * context and before SMP initialization. As a result it could + * never change while inside an nbcon emergency section. + */ + if (!printk_percpu_data_ready()) + return &early_nbcon_pcpu_emergency_nesting; + + /* Open code this_cpu_ptr() without checking migration. */ + return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id()); +} + /** * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon * printing on the current CPU @@ -981,13 +1011,20 @@ update_con: * context for printing. * * The function is safe for reading per-CPU data in any context because - * preemption is disabled if the current CPU is in the panic state. + * preemption is disabled if the current CPU is in the emergency or panic + * state. */ enum nbcon_prio nbcon_get_default_prio(void) { + unsigned int *cpu_emergency_nesting; + if (this_cpu_in_panic()) return NBCON_PRIO_PANIC; + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + if (*cpu_emergency_nesting) + return NBCON_PRIO_EMERGENCY; + return NBCON_PRIO_NORMAL; } @@ -1246,6 +1283,42 @@ void nbcon_atomic_flush_unsafe(void) __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true); } +/** + * nbcon_cpu_emergency_enter - Enter an emergency section where printk() + * messages for that CPU are flushed directly + * + * Context: Any context. Disables preemption. + * + * When within an emergency section, printk() calls will attempt to flush any + * pending messages in the ringbuffer. + */ +void nbcon_cpu_emergency_enter(void) +{ + unsigned int *cpu_emergency_nesting; + + preempt_disable(); + + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + (*cpu_emergency_nesting)++; +} + +/** + * nbcon_cpu_emergency_exit - Exit an emergency section + * + * Context: Within an emergency section. Enables preemption. + */ +void nbcon_cpu_emergency_exit(void) +{ + unsigned int *cpu_emergency_nesting; + + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + + if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0)) + (*cpu_emergency_nesting)--; + + preempt_enable(); +} + /** * nbcon_alloc - Allocate and init the nbcon console specific data * @con: Console to initialize From 4833794db61c8cc4de4563a2754c7aedaffbb684 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Aug 2024 08:35:58 +0206 Subject: [PATCH 32/54] panic: Mark emergency section in warn Mark the full contents of __warn() as an emergency section. In this section, every printk() call will attempt to directly flush to the consoles using the EMERGENCY priority. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-33-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/panic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index 93096d5abcc7..1a10b6e2a855 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -718,6 +718,8 @@ struct warn_args { void __warn(const char *file, int line, void *caller, unsigned taint, struct pt_regs *regs, struct warn_args *args) { + nbcon_cpu_emergency_enter(); + disable_trace_on_warning(); if (file) @@ -753,6 +755,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, /* Just a warning, don't kill lockdep. */ add_taint(taint, LOCKDEP_STILL_OK); + + nbcon_cpu_emergency_exit(); } #ifdef CONFIG_BUG From 4bdfa0d8e920c391e6cc0aa1feef8ed91d81f724 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:59 +0206 Subject: [PATCH 33/54] panic: Mark emergency section in oops Mark an emergency section beginning with oops_enter() until the end of oops_exit(). In this section, every printk() call will attempt to directly flush to the consoles using the EMERGENCY priority. The very end of oops_exit() performs a kmsg_dump(). This is not included in the emergency section because it is another flushing mechanism that should occur after the consoles have flushed the oops messages. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-34-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/panic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index 1a10b6e2a855..753d12f4dc8f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -685,6 +685,7 @@ bool oops_may_print(void) */ void oops_enter(void) { + nbcon_cpu_emergency_enter(); tracing_off(); /* can't trust the integrity of the kernel anymore: */ debug_locks_off(); @@ -707,6 +708,7 @@ void oops_exit(void) { do_oops_enter_exit(); print_oops_end_marker(); + nbcon_cpu_emergency_exit(); kmsg_dump(KMSG_DUMP_OOPS); } From 8c03273a509c88b12c53a9ddf07702f83983e4de Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:36:00 +0206 Subject: [PATCH 34/54] rcu: Mark emergency sections in rcu stalls Mark emergency sections wherever multiple lines of rcu stall information are generated. In an emergency section, every printk() call will attempt to directly flush to the consoles using the EMERGENCY priority. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Paul E. McKenney Link: https://lore.kernel.org/r/20240820063001.36405-35-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/rcu/tree_exp.h | 7 +++++++ kernel/rcu/tree_stall.h | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 4acd29d16fdb..f6b35a0585a8 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -7,6 +7,7 @@ * Authors: Paul E. McKenney */ +#include #include static void rcu_exp_handler(void *unused); @@ -590,6 +591,9 @@ static void synchronize_rcu_expedited_wait(void) return; if (rcu_stall_is_suppressed()) continue; + + nbcon_cpu_emergency_enter(); + j = jiffies; rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start)); trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall")); @@ -643,6 +647,9 @@ static void synchronize_rcu_expedited_wait(void) rcu_exp_print_detail_task_stall_rnp(rnp); } jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3; + + nbcon_cpu_emergency_exit(); + panic_on_rcu_stall(); } } diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 4b0e9d7c4c68..b3a6943127bc 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -7,6 +7,7 @@ * Author: Paul E. McKenney */ +#include #include #include @@ -605,6 +606,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) if (rcu_stall_is_suppressed()) return; + nbcon_cpu_emergency_enter(); + /* * OK, time to rat on our buddy... * See Documentation/RCU/stallwarn.rst for info on how to debug @@ -657,6 +660,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) rcu_check_gp_kthread_expired_fqs_timer(); rcu_check_gp_kthread_starvation(); + nbcon_cpu_emergency_exit(); + panic_on_rcu_stall(); rcu_force_quiescent_state(); /* Kick them all. */ @@ -677,6 +682,8 @@ static void print_cpu_stall(unsigned long gps) if (rcu_stall_is_suppressed()) return; + nbcon_cpu_emergency_enter(); + /* * OK, time to rat on ourselves... * See Documentation/RCU/stallwarn.rst for info on how to debug @@ -706,6 +713,8 @@ static void print_cpu_stall(unsigned long gps) jiffies + 3 * rcu_jiffies_till_stall_check() + 3); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + nbcon_cpu_emergency_exit(); + panic_on_rcu_stall(); /* From 59cd94ef80094857f0d0085daa2e32badc4cddf4 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:36:01 +0206 Subject: [PATCH 35/54] lockdep: Mark emergency sections in lockdep splats Mark emergency sections wherever multiple lines of lock debugging output are generated. In an emergency section, every printk() call will attempt to directly flush to the consoles using the EMERGENCY priority. Note that debug_show_all_locks() and lockdep_print_held_locks() rely on their callers to enter the emergency section. This is because these functions can also be called in non-emergency situations (such as sysrq). Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-36-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/locking/lockdep.c | 83 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 0349f957e672..7963deac33c3 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -573,8 +574,10 @@ static struct lock_trace *save_trace(void) if (!debug_locks_off_graph_unlock()) return NULL; + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return NULL; } @@ -887,11 +890,13 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { instrumentation_begin(); debug_locks_off(); + nbcon_cpu_emergency_enter(); printk(KERN_ERR "BUG: looking up invalid subclass: %u\n", subclass); printk(KERN_ERR "turning off the locking correctness validator.\n"); dump_stack(); + nbcon_cpu_emergency_exit(); instrumentation_end(); return NULL; } @@ -968,11 +973,13 @@ static bool assign_lock_key(struct lockdep_map *lock) else { /* Debug-check: all keys must be persistent! */ debug_locks_off(); + nbcon_cpu_emergency_enter(); pr_err("INFO: trying to register non-static key.\n"); pr_err("The code is fine but needs lockdep annotation, or maybe\n"); pr_err("you didn't initialize this object before use?\n"); pr_err("turning off the locking correctness validator.\n"); dump_stack(); + nbcon_cpu_emergency_exit(); return false; } @@ -1316,8 +1323,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) return NULL; } + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return NULL; } nr_lock_classes++; @@ -1349,11 +1358,13 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) if (verbose(class)) { graph_unlock(); + nbcon_cpu_emergency_enter(); printk("\nnew class %px: %s", class->key, class->name); if (class->name_version > 1) printk(KERN_CONT "#%d", class->name_version); printk(KERN_CONT "\n"); dump_stack(); + nbcon_cpu_emergency_exit(); if (!graph_lock()) { return NULL; @@ -1392,8 +1403,10 @@ static struct lock_list *alloc_list_entry(void) if (!debug_locks_off_graph_unlock()) return NULL; + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return NULL; } nr_list_entries++; @@ -2039,6 +2052,8 @@ static noinline void print_circular_bug(struct lock_list *this, depth = get_lock_depth(target); + nbcon_cpu_emergency_enter(); + print_circular_bug_header(target, depth, check_src, check_tgt); parent = get_lock_parent(target); @@ -2057,6 +2072,8 @@ static noinline void print_circular_bug(struct lock_list *this, printk("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static noinline void print_bfs_bug(int ret) @@ -2569,6 +2586,8 @@ print_bad_irq_dependency(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=====================================================\n"); pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n", @@ -2618,11 +2637,13 @@ print_bad_irq_dependency(struct task_struct *curr, pr_warn(" and %s-irq-unsafe lock:\n", irqclass); next_root->trace = save_trace(); if (!next_root->trace) - return; + goto out; print_shortest_lock_dependencies(forwards_entry, next_root); pr_warn("\nstack backtrace:\n"); dump_stack(); +out: + nbcon_cpu_emergency_exit(); } static const char *state_names[] = { @@ -2987,6 +3008,8 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("============================================\n"); pr_warn("WARNING: possible recursive locking detected\n"); @@ -3009,6 +3032,8 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } /* @@ -3606,6 +3631,8 @@ static void print_collision(struct task_struct *curr, struct held_lock *hlock_next, struct lock_chain *chain) { + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("============================\n"); pr_warn("WARNING: chain_key collision\n"); @@ -3622,6 +3649,8 @@ static void print_collision(struct task_struct *curr, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } #endif @@ -3712,8 +3741,10 @@ static inline int add_chain_cache(struct task_struct *curr, if (!debug_locks_off_graph_unlock()) return 0; + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return 0; } chain->chain_key = chain_key; @@ -3730,8 +3761,10 @@ static inline int add_chain_cache(struct task_struct *curr, if (!debug_locks_off_graph_unlock()) return 0; + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return 0; } @@ -3970,6 +4003,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, if (!debug_locks_off() || debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("================================\n"); pr_warn("WARNING: inconsistent lock state\n"); @@ -3998,6 +4033,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } /* @@ -4032,6 +4069,8 @@ print_irq_inversion_bug(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("========================================================\n"); pr_warn("WARNING: possible irq lock inversion dependency detected\n"); @@ -4072,11 +4111,13 @@ print_irq_inversion_bug(struct task_struct *curr, pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); root->trace = save_trace(); if (!root->trace) - return; + goto out; print_shortest_lock_dependencies(other, root); pr_warn("\nstack backtrace:\n"); dump_stack(); +out: + nbcon_cpu_emergency_exit(); } /* @@ -4153,6 +4194,8 @@ void print_irqtrace_events(struct task_struct *curr) { const struct irqtrace_events *trace = &curr->irqtrace; + nbcon_cpu_emergency_enter(); + printk("irq event stamp: %u\n", trace->irq_events); printk("hardirqs last enabled at (%u): [<%px>] %pS\n", trace->hardirq_enable_event, (void *)trace->hardirq_enable_ip, @@ -4166,6 +4209,8 @@ void print_irqtrace_events(struct task_struct *curr) printk("softirqs last disabled at (%u): [<%px>] %pS\n", trace->softirq_disable_event, (void *)trace->softirq_disable_ip, (void *)trace->softirq_disable_ip); + + nbcon_cpu_emergency_exit(); } static int HARDIRQ_verbose(struct lock_class *class) @@ -4686,10 +4731,12 @@ unlock: * We must printk outside of the graph_lock: */ if (ret == 2) { + nbcon_cpu_emergency_enter(); printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); print_lock(this); print_irqtrace_events(curr); dump_stack(); + nbcon_cpu_emergency_exit(); } return ret; @@ -4730,6 +4777,8 @@ print_lock_invalid_wait_context(struct task_struct *curr, if (debug_locks_silent) return 0; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=============================\n"); pr_warn("[ BUG: Invalid wait context ]\n"); @@ -4749,6 +4798,8 @@ print_lock_invalid_wait_context(struct task_struct *curr, pr_warn("stack backtrace:\n"); dump_stack(); + nbcon_cpu_emergency_exit(); + return 0; } @@ -4956,6 +5007,8 @@ print_lock_nested_lock_not_held(struct task_struct *curr, if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("==================================\n"); pr_warn("WARNING: Nested lock was not taken\n"); @@ -4976,6 +5029,8 @@ print_lock_nested_lock_not_held(struct task_struct *curr, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static int __lock_is_held(const struct lockdep_map *lock, int read); @@ -5024,11 +5079,13 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, debug_class_ops_inc(class); if (very_verbose(class)) { + nbcon_cpu_emergency_enter(); printk("\nacquire class [%px] %s", class->key, class->name); if (class->name_version > 1) printk(KERN_CONT "#%d", class->name_version); printk(KERN_CONT "\n"); dump_stack(); + nbcon_cpu_emergency_exit(); } /* @@ -5155,6 +5212,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, #endif if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { debug_locks_off(); + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!"); printk(KERN_DEBUG "depth: %i max: %lu!\n", curr->lockdep_depth, MAX_LOCK_DEPTH); @@ -5162,6 +5220,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, lockdep_print_held_locks(current); debug_show_all_locks(); dump_stack(); + nbcon_cpu_emergency_exit(); return 0; } @@ -5181,6 +5240,8 @@ static void print_unlock_imbalance_bug(struct task_struct *curr, if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=====================================\n"); pr_warn("WARNING: bad unlock balance detected!\n"); @@ -5197,6 +5258,8 @@ static void print_unlock_imbalance_bug(struct task_struct *curr, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static noinstr int match_held_lock(const struct held_lock *hlock, @@ -5901,6 +5964,8 @@ static void print_lock_contention_bug(struct task_struct *curr, if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=================================\n"); pr_warn("WARNING: bad contention detected!\n"); @@ -5917,6 +5982,8 @@ static void print_lock_contention_bug(struct task_struct *curr, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static void @@ -6536,6 +6603,8 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=========================\n"); pr_warn("WARNING: held lock freed!\n"); @@ -6548,6 +6617,8 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static inline int not_in_range(const void* mem_from, unsigned long mem_len, @@ -6594,6 +6665,8 @@ static void print_held_locks_bug(void) if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("====================================\n"); pr_warn("WARNING: %s/%d still has locks held!\n", @@ -6603,6 +6676,8 @@ static void print_held_locks_bug(void) lockdep_print_held_locks(current); pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } void debug_check_no_locks_held(void) @@ -6660,6 +6735,7 @@ asmlinkage __visible void lockdep_sys_exit(void) if (unlikely(curr->lockdep_depth)) { if (!debug_locks_off()) return; + nbcon_cpu_emergency_enter(); pr_warn("\n"); pr_warn("================================================\n"); pr_warn("WARNING: lock held when returning to user space!\n"); @@ -6668,6 +6744,7 @@ asmlinkage __visible void lockdep_sys_exit(void) pr_warn("%s/%d is leaving the kernel with locks still held!\n", curr->comm, curr->pid); lockdep_print_held_locks(curr); + nbcon_cpu_emergency_exit(); } /* @@ -6684,6 +6761,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) bool rcu = warn_rcu_enter(); /* Note: the following can be executed concurrently, so be careful. */ + nbcon_cpu_emergency_enter(); pr_warn("\n"); pr_warn("=============================\n"); pr_warn("WARNING: suspicious RCU usage\n"); @@ -6722,6 +6800,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) lockdep_print_held_locks(curr); pr_warn("\nstack backtrace:\n"); dump_stack(); + nbcon_cpu_emergency_exit(); warn_rcu_exit(rcu); } EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); From 85a147a986e4feafb9286fabaf03a302a611cd85 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 3 Sep 2024 11:53:58 +0800 Subject: [PATCH 36/54] printk: Use the BITS_PER_LONG macro sizeof(unsigned long) * 8 is the number of bits in an unsigned long variable, replace it with BITS_PER_LONG macro to make it simpler. Signed-off-by: Jinjie Ruan Reviewed-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240903035358.308482-1-ruanjinjie@huawei.com Signed-off-by: Petr Mladek --- kernel/printk/printk_ringbuffer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index bd2a892deac1..8de6c495cf2b 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -4,6 +4,7 @@ #define _KERNEL_PRINTK_RINGBUFFER_H #include +#include #include #include #include @@ -122,7 +123,7 @@ enum desc_state { #define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) #define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) -#define DESC_SV_BITS (sizeof(unsigned long) * 8) +#define DESC_SV_BITS BITS_PER_LONG #define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2) #define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT) #define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT)) From d33d5e683b0d3b4f5fc6a49ce17583f8ca663944 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 27 Aug 2024 16:25:31 +0206 Subject: [PATCH 37/54] printk: nbcon: Use raw_cpu_ptr() instead of open coding There is no need to open code a non-migration-checking this_cpu_ptr(). That is exactly what raw_cpu_ptr() is. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/87plpum4jw.fsf@jogness.linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 92ac5c590927..cf62f675c673 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -998,8 +998,7 @@ static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) if (!printk_percpu_data_ready()) return &early_nbcon_pcpu_emergency_nesting; - /* Open code this_cpu_ptr() without checking migration. */ - return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id()); + return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting); } /** From bd07d864522e7c3e4ee364e91aee8754992f5855 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:20 +0206 Subject: [PATCH 38/54] printk: nbcon: Add function for printers to reacquire ownership Since ownership can be lost at any time due to handover or takeover, a printing context _must_ be prepared to back out immediately and carefully. However, there are scenarios where the printing context must reacquire ownership in order to finalize or revert hardware changes. One such example is when interrupts are disabled during printing. No other context will automagically re-enable the interrupts. For this case, the disabling context _must_ reacquire nbcon ownership so that it can re-enable the interrupts. Provide nbcon_reacquire_nobuf() for exactly this purpose. It allows a printing context to reacquire ownership using the same priority as its previous ownership. Note that after a successful reacquire the printing context will have no output buffer because that has been lost. This function cannot be used to resume printing. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-2-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 6 ++++ kernel/printk/nbcon.c | 74 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 9a13f91b0c43..88050d30a9cc 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -366,6 +366,10 @@ struct console { * * The callback should allow the takeover whenever it is safe. It * increases the chance to see messages when the system is in trouble. + * If the driver must reacquire ownership in order to finalize or + * revert hardware changes, nbcon_reacquire_nobuf() can be used. + * However, on reacquire the buffer content is no longer available. A + * reacquire cannot be used to resume printing. * * The callback can be called from any context (including NMI). * Therefore it must avoid usage of any locking and instead rely @@ -558,12 +562,14 @@ extern void nbcon_cpu_emergency_exit(void); extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +extern void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt); #else static inline void nbcon_cpu_emergency_enter(void) { } static inline void nbcon_cpu_emergency_exit(void) { } static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { } #endif extern int console_set_on_cmdline; diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index cf62f675c673..8a1bf6f94bce 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -830,6 +830,19 @@ out: return nbcon_context_can_proceed(ctxt, &cur); } +static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt, + char *buf, unsigned int len) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + struct nbcon_state cur; + + wctxt->outbuf = buf; + wctxt->len = len; + nbcon_state_read(con, &cur); + wctxt->unsafe_takeover = cur.unsafe_takeover; +} + /** * nbcon_enter_unsafe - Enter an unsafe region in the driver * @wctxt: The write context that was handed to the write function @@ -845,8 +858,12 @@ out: bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + bool is_owner; - return nbcon_context_enter_unsafe(ctxt); + is_owner = nbcon_context_enter_unsafe(ctxt); + if (!is_owner) + nbcon_write_context_set_buf(wctxt, NULL, 0); + return is_owner; } EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); @@ -865,11 +882,43 @@ EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + bool ret; - return nbcon_context_exit_unsafe(ctxt); + ret = nbcon_context_exit_unsafe(ctxt); + if (!ret) + nbcon_write_context_set_buf(wctxt, NULL, 0); + return ret; } EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); +/** + * nbcon_reacquire_nobuf - Reacquire a console after losing ownership + * while printing + * @wctxt: The write context that was handed to the write callback + * + * Since ownership can be lost at any time due to handover or takeover, a + * printing context _must_ be prepared to back out immediately and + * carefully. However, there are scenarios where the printing context must + * reacquire ownership in order to finalize or revert hardware changes. + * + * This function allows a printing context to reacquire ownership using the + * same priority as its previous ownership. + * + * Note that after a successful reacquire the printing context will have no + * output buffer because that has been lost. This function cannot be used to + * resume printing. + */ +void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + while (!nbcon_context_try_acquire(ctxt)) + cpu_relax(); + + nbcon_write_context_set_buf(wctxt, NULL, 0); +} +EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); + /** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function @@ -895,7 +944,6 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) .pbufs = ctxt->pbufs, }; unsigned long con_dropped; - struct nbcon_state cur; unsigned long dropped; /* @@ -930,10 +978,7 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) goto update_con; /* Initialize the write context for driver callbacks. */ - wctxt->outbuf = &pmsg.pbufs->outbuf[0]; - wctxt->len = pmsg.outbuf_len; - nbcon_state_read(con, &cur); - wctxt->unsafe_takeover = cur.unsafe_takeover; + nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len); if (con->write_atomic) { con->write_atomic(con, wctxt); @@ -947,6 +992,21 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) return false; } + if (!wctxt->outbuf) { + /* + * Ownership was lost and reacquired by the driver. Handle it + * as if ownership was lost. + */ + nbcon_context_release(ctxt); + return false; + } + + /* + * Ownership may have been lost but _not_ reacquired by the driver. + * This case is detected and handled when entering unsafe to update + * dropped/seq values. + */ + /* * Since any dropped message was successfully output, reset the * dropped count for the console. From e37577ebbf1e875f8cc6159f25a1b559018d087f Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:21 +0206 Subject: [PATCH 39/54] printk: Fail pr_flush() if before SYSTEM_SCHEDULING A follow-up change adds pr_flush() to console unregistration. However, with boot consoles unregistration can happen very early if there are also regular consoles registering as well. In this case the pr_flush() is not important because all consoles are flushed when checking the initial console sequence number. Allow pr_flush() to fail if @system_state has not yet reached SYSTEM_SCHEDULING. This avoids might_sleep() and msleep() explosions that would otherwise occur: [ 0.436739][ T0] printk: legacy console [ttyS0] enabled [ 0.439820][ T0] printk: legacy bootconsole [earlyser0] disabled [ 0.446822][ T0] BUG: scheduling while atomic: swapper/0/0/0x00000002 [ 0.450491][ T0] 1 lock held by swapper/0/0: [ 0.457897][ T0] #0: ffffffff82ae5f88 (console_mutex){+.+.}-{4:4}, at: console_list_lock+0x20/0x70 [ 0.463141][ T0] Modules linked in: [ 0.465307][ T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.10.0-rc1+ #372 [ 0.469394][ T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 0.474402][ T0] Call Trace: [ 0.476246][ T0] [ 0.481473][ T0] dump_stack_lvl+0x93/0xb0 [ 0.483949][ T0] dump_stack+0x10/0x20 [ 0.486256][ T0] __schedule_bug+0x68/0x90 [ 0.488753][ T0] __schedule+0xb9b/0xd80 [ 0.491179][ T0] ? lock_release+0xb5/0x270 [ 0.493732][ T0] schedule+0x43/0x170 [ 0.495998][ T0] schedule_timeout+0xc5/0x1e0 [ 0.498634][ T0] ? __pfx_process_timeout+0x10/0x10 [ 0.501522][ T0] ? msleep+0x13/0x50 [ 0.503728][ T0] msleep+0x3c/0x50 [ 0.505847][ T0] __pr_flush.constprop.0.isra.0+0x56/0x500 [ 0.509050][ T0] ? _printk+0x58/0x80 [ 0.511332][ T0] ? lock_is_held_type+0x9c/0x110 [ 0.514106][ T0] unregister_console_locked+0xe1/0x450 [ 0.517144][ T0] register_console+0x509/0x620 [ 0.519827][ T0] ? __pfx_univ8250_console_init+0x10/0x10 [ 0.523042][ T0] univ8250_console_init+0x24/0x40 [ 0.525845][ T0] console_init+0x43/0x210 [ 0.528280][ T0] start_kernel+0x493/0x980 [ 0.530773][ T0] x86_64_start_reservations+0x18/0x30 [ 0.533755][ T0] x86_64_start_kernel+0xae/0xc0 [ 0.536473][ T0] common_startup_64+0x12c/0x138 [ 0.539210][ T0] And then the kernel goes into an infinite loop complaining about: 1. releasing a pinned lock 2. unpinning an unpinned lock 3. bad: scheduling from the idle thread! 4. goto 1 Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-3-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 6accd1704e73..acf668001096 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3991,6 +3991,10 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre u64 diff; u64 seq; + /* Sorry, pr_flush() will not work this early. */ + if (system_state < SYSTEM_SCHEDULING) + return false; + might_sleep(); seq = prb_next_reserve_seq(prb); From 0e53e2d9f72080b7ea1a4003558041fee78cdef9 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:22 +0206 Subject: [PATCH 40/54] printk: Flush console on unregister_console() Ensure consoles have flushed pending records before unregistering. The console should print up to at least its related "console disabled" record. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-4-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index acf668001096..c79e962b822a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3771,11 +3771,16 @@ static int unregister_console_locked(struct console *console) if (res > 0) return 0; + if (!console_is_registered_locked(console)) + res = -ENODEV; + else if (console_is_usable(console, console->flags)) + __pr_flush(console, 1000, true); + /* Disable it unconditionally */ console_srcu_write_flags(console, console->flags & ~CON_ENABLED); - if (!console_is_registered_locked(console)) - return -ENODEV; + if (res < 0) + return res; /* * Use the driver synchronization to ensure that the hardware is not From 6cb58cfebb293286f5ce8f6fba3e29122308e9e0 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:23 +0206 Subject: [PATCH 41/54] printk: nbcon: Add context to usable() and emit() The nbcon consoles will have two callbacks to be used for different contexts. In order to determine if an nbcon console is usable, console_is_usable() must know if it is a context that will need to use the optional write_atomic() callback. Also, nbcon_emit_next_record() must know which callback it needs to call. Add an extra parameter @use_atomic to console_is_usable() and nbcon_emit_next_record() to specify this. Since so far only the write_atomic() callback exists, @use_atomic is set to true for all call sites. For legacy consoles, @use_atomic is not used. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-5-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 8 +++++--- kernel/printk/nbcon.c | 34 +++++++++++++++++++--------------- kernel/printk/printk.c | 6 +++--- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 8e36d8695f81..ad631824e7e5 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -97,7 +97,7 @@ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, * which can also play a role in deciding if @con can be used to print * records. */ -static inline bool console_is_usable(struct console *con, short flags) +static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) { if (!(flags & CON_ENABLED)) return false; @@ -106,7 +106,8 @@ static inline bool console_is_usable(struct console *con, short flags) return false; if (flags & CON_NBCON) { - if (!con->write_atomic) + /* The write_atomic() callback is optional. */ + if (use_atomic && !con->write_atomic) return false; } else { if (!con->write) @@ -149,7 +150,8 @@ static inline void nbcon_atomic_flush_pending(void) { } static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie) { return false; } -static inline bool console_is_usable(struct console *con, short flags) { return false; } +static inline bool console_is_usable(struct console *con, short flags, + bool use_atomic) { return false; } #endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 8a1bf6f94bce..88db24f9a8de 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -922,6 +922,7 @@ EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); /** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function + * @use_atomic: True if the write_atomic() callback is to be used * * Return: True if this context still owns the console. False if * ownership was handed over or taken. @@ -935,7 +936,7 @@ EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); * When true is returned, @wctxt->ctxt.backlog indicates whether there are * still records pending in the ringbuffer, */ -static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) +static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; @@ -946,6 +947,18 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) unsigned long con_dropped; unsigned long dropped; + /* + * This function should never be called for consoles that have not + * implemented the necessary callback for writing: i.e. legacy + * consoles and, when atomic, nbcon consoles with no write_atomic(). + * Handle it as if ownership was lost and try to continue. + */ + if (WARN_ON_ONCE((use_atomic && !con->write_atomic) || + !(console_srcu_read_flags(con) & CON_NBCON))) { + nbcon_context_release(ctxt); + return false; + } + /* * The printk buffers are filled within an unsafe section. This * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from @@ -980,17 +993,8 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) /* Initialize the write context for driver callbacks. */ nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len); - if (con->write_atomic) { + if (use_atomic) con->write_atomic(con, wctxt); - } else { - /* - * This function should never be called for legacy consoles. - * Handle it as if ownership was lost and try to continue. - */ - WARN_ON_ONCE(1); - nbcon_context_release(ctxt); - return false; - } if (!wctxt->outbuf) { /* @@ -1118,7 +1122,7 @@ static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) * The higher priority printing context takes over responsibility * to print the pending records. */ - if (!nbcon_emit_next_record(wctxt)) + if (!nbcon_emit_next_record(wctxt, true)) return false; nbcon_context_release(ctxt); @@ -1219,7 +1223,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, * handed over or taken over. In both cases the context is no * longer valid. */ - if (!nbcon_emit_next_record(&wctxt)) + if (!nbcon_emit_next_record(&wctxt, true)) return -EAGAIN; if (!ctxt->backlog) { @@ -1305,7 +1309,7 @@ static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeove if (!(flags & CON_NBCON)) continue; - if (!console_is_usable(con, flags)) + if (!console_is_usable(con, flags, true)) continue; if (nbcon_seq_read(con) >= stop_seq) @@ -1490,7 +1494,7 @@ void nbcon_device_release(struct console *con) * the console is usable throughout flushing. */ cookie = console_srcu_read_lock(); - if (console_is_usable(con, console_srcu_read_flags(con)) && + if (console_is_usable(con, console_srcu_read_flags(con), true) && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { /* * If nbcon_atomic flushing is not available, fallback to diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c79e962b822a..846306ac2663 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3071,7 +3071,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove u64 printk_seq; bool progress; - if (!console_is_usable(con, flags)) + if (!console_is_usable(con, flags, true)) continue; any_usable = true; @@ -3773,7 +3773,7 @@ static int unregister_console_locked(struct console *console) if (!console_is_registered_locked(console)) res = -ENODEV; - else if (console_is_usable(console, console->flags)) + else if (console_is_usable(console, console->flags, true)) __pr_flush(console, 1000, true); /* Disable it unconditionally */ @@ -4043,7 +4043,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * that they make forward progress, so only increment * @diff for usable consoles. */ - if (!console_is_usable(c, flags)) + if (!console_is_usable(c, flags, true)) continue; if (flags & CON_NBCON) { From fb9fabf3d86216961d1103ecbc33e98d5f6c48f5 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:24 +0206 Subject: [PATCH 42/54] printk: nbcon: Init @nbcon_seq to highest possible When initializing an nbcon console, have nbcon_alloc() set @nbcon_seq to the highest possible sequence number. For all practical purposes, this will guarantee that the console will have nothing to print until later when @nbcon_seq is set to the proper initial printing value. This will be particularly important once kthread printing is introduced because nbcon_alloc() can create/start the kthread before the desired initial sequence number is known. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-6-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 8 +++++++- kernel/printk/printk_ringbuffer.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 88db24f9a8de..bc684ff5028a 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1397,7 +1397,13 @@ bool nbcon_alloc(struct console *con) struct nbcon_state state = { }; nbcon_state_set(con, &state); - atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), 0); + + /* + * Initialize @nbcon_seq to the highest possible sequence number so + * that practically speaking it will have nothing to print until a + * desired initial sequence number has been set via nbcon_seq_force(). + */ + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb)); if (con->flags & CON_BOOT) { /* diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 8de6c495cf2b..4ef81349d9fb 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -404,10 +404,12 @@ u64 prb_next_reserve_seq(struct printk_ringbuffer *rb); #define __u64seq_to_ulseq(u64seq) (u64seq) #define __ulseq_to_u64seq(rb, ulseq) (ulseq) +#define ULSEQ_MAX(rb) (-1) #else /* CONFIG_64BIT */ #define __u64seq_to_ulseq(u64seq) ((u32)u64seq) +#define ULSEQ_MAX(rb) __u64seq_to_ulseq(prb_first_seq(rb) + 0x80000000UL) static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) { From 76f258bf3f2aae570209319944703a92ac64e29e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Sep 2024 14:11:25 +0206 Subject: [PATCH 43/54] printk: nbcon: Introduce printer kthreads Provide the main implementation for running a printer kthread per nbcon console that is takeover/handover aware. This includes: - new mandatory write_thread() callback - kthread creation - kthread main printing loop - kthread wakeup mechanism - kthread shutdown kthread creation is a bit tricky because consoles may register before kthreads can be created. In such cases, registration will succeed, even though no kthread exists. Once kthreads can be created, an early_initcall will set @printk_kthreads_ready. If there are no registered boot consoles, the early_initcall creates the kthreads for all registered nbcon consoles. If kthread creation fails, the related console is unregistered. If there are registered boot consoles when @printk_kthreads_ready is set, no kthreads are created until the final boot console unregisters. Once kthread creation finally occurs, @printk_kthreads_running is set so that the system knows kthreads are available for all registered nbcon consoles. If @printk_kthreads_running is already set when the console is registering, the kthread is created during registration. If kthread creation fails, the registration will fail. Until @printk_kthreads_running is set, console printing occurs directly via the console_lock. kthread shutdown on system shutdown/reboot is necessary to ensure the printer kthreads finish their printing so that the system can cleanly transition back to direct printing via the console_lock in order to reliably push out the final shutdown/reboot messages. @printk_kthreads_running is cleared before shutting down the individual kthreads. The kthread uses a new mandatory write_thread() callback that is called with both device_lock() and the console context acquired. The console ownership handling is necessary for synchronization against write_atomic() which is synchronized only via the console context ownership. The device_lock() serializes acquiring the console context with NBCON_PRIO_NORMAL. It is needed in case the device_lock() does not disable preemption. It prevents the following race: CPU0 CPU1 [ task A ] nbcon_context_try_acquire() # success with NORMAL prio # .unsafe == false; // safe for takeover [ schedule: task A -> B ] WARN_ON() nbcon_atomic_flush_pending() nbcon_context_try_acquire() # success with EMERGENCY prio # flushing nbcon_context_release() # HERE: con->nbcon_state is free # to take by anyone !!! nbcon_context_try_acquire() # success with NORMAL prio [ task B ] [ schedule: task B -> A ] nbcon_enter_unsafe() nbcon_context_can_proceed() BUG: nbcon_context_can_proceed() returns "true" because the console is owned by a context on CPU0 with NBCON_PRIO_NORMAL. But it should return "false". The console is owned by a context from task B and we do the check in a context from task A. Note that with these changes, the printer kthreads do not yet take over full responsibility for nbcon printing during normal operation. These changes only focus on the lifecycle of the kthreads. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-7-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 40 +++++++ kernel/printk/internal.h | 27 +++++ kernel/printk/nbcon.c | 245 +++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 108 +++++++++++++++++ 4 files changed, 420 insertions(+) diff --git a/include/linux/console.h b/include/linux/console.h index 88050d30a9cc..788ce9c829f6 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include #include @@ -324,6 +326,9 @@ struct nbcon_write_context { * @nbcon_seq: Sequence number of the next record for nbcon to print * @nbcon_device_ctxt: Context available for non-printing operations * @pbufs: Pointer to nbcon private buffer + * @kthread: Printer kthread for this console + * @rcuwait: RCU-safe wait object for @kthread waking + * @irq_work: Defer @kthread waking to IRQ work context */ struct console { char name[16]; @@ -377,6 +382,37 @@ struct console { */ void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + /** + * @write_thread: + * + * NBCON callback to write out text in task context. + * + * This callback must be called only in task context with both + * device_lock() and the nbcon console acquired with + * NBCON_PRIO_NORMAL. + * + * The same rules for console ownership verification and unsafe + * sections handling applies as with write_atomic(). + * + * The console ownership handling is necessary for synchronization + * against write_atomic() which is synchronized only via the context. + * + * The device_lock() provides the primary serialization for operations + * on the device. It might be as relaxed (mutex)[*] or as tight + * (disabled preemption and interrupts) as needed. It allows + * the kthread to operate in the least restrictive mode[**]. + * + * [*] Standalone nbcon_context_try_acquire() is not safe with + * the preemption enabled, see nbcon_owner_matches(). But it + * can be safe when always called in the preemptive context + * under the device_lock(). + * + * [**] The device_lock() makes sure that nbcon_context_try_acquire() + * would never need to spin which is important especially with + * PREEMPT_RT. + */ + void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt); + /** * @device_lock: * @@ -423,7 +459,11 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct nbcon_context __private nbcon_device_ctxt; + struct printk_buffers *pbufs; + struct task_struct *kthread; + struct rcuwait rcuwait; + struct irq_work irq_work; }; #ifdef CONFIG_LOCKDEP diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index ad631824e7e5..14f7fc71e20d 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -48,6 +48,7 @@ struct printk_ringbuffer; struct dev_printk_info; extern struct printk_ringbuffer *prb; +extern bool printk_kthreads_running; __printf(4, 0) int vprintk_store(int facility, int level, @@ -90,6 +91,9 @@ enum nbcon_prio nbcon_get_default_prio(void); void nbcon_atomic_flush_pending(void); bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie); +bool nbcon_kthread_create(struct console *con); +void nbcon_kthread_stop(struct console *con); +void nbcon_kthreads_wake(void); /* * Check if the given console is currently capable and allowed to print @@ -125,12 +129,34 @@ static inline bool console_is_usable(struct console *con, short flags, bool use_ return true; } +/** + * nbcon_kthread_wake - Wake up a console printing thread + * @con: Console to operate on + */ +static inline void nbcon_kthread_wake(struct console *con) +{ + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the rcuwait is empty. + * + * The full memory barrier in rcuwait_wake_up() pairs with the full + * memory barrier within set_current_state() of + * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait() + * adds the waiter but before it has checked the wait condition. + * + * This pairs with nbcon_kthread_func:A. + */ + rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */ +} + #else #define PRINTK_PREFIX_MAX 0 #define PRINTK_MESSAGE_MAX 0 #define PRINTKRB_RECORD_MAX 0 +#define printk_kthreads_running (false) + /* * In !PRINTK builds we still export console_sem * semaphore and some of console functions (console_unlock()/etc.), so @@ -149,6 +175,7 @@ static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_N static inline void nbcon_atomic_flush_pending(void) { } static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie) { return false; } +static inline void nbcon_kthread_wake(struct console *con) { } static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) { return false; } diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index bc684ff5028a..388322c74349 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -952,6 +953,9 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_a * implemented the necessary callback for writing: i.e. legacy * consoles and, when atomic, nbcon consoles with no write_atomic(). * Handle it as if ownership was lost and try to continue. + * + * Note that for nbcon consoles the write_thread() callback is + * mandatory and was already checked in nbcon_alloc(). */ if (WARN_ON_ONCE((use_atomic && !con->write_atomic) || !(console_srcu_read_flags(con) & CON_NBCON))) { @@ -995,6 +999,8 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_a if (use_atomic) con->write_atomic(con, wctxt); + else + con->write_thread(con, wctxt); if (!wctxt->outbuf) { /* @@ -1036,6 +1042,228 @@ update_con: return nbcon_context_exit_unsafe(ctxt); } +/** + * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup + * @con: Console to operate on + * @ctxt: The nbcon context from nbcon_context_try_acquire() + * + * Return: True if the thread should shutdown or if the console is + * allowed to print and a record is available. False otherwise. + * + * After the thread wakes up, it must first check if it should shutdown before + * attempting any printing. + */ +static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt) +{ + bool ret = false; + short flags; + int cookie; + + if (kthread_should_stop()) + return true; + + cookie = console_srcu_read_lock(); + + flags = console_srcu_read_flags(con); + if (console_is_usable(con, flags, false)) { + /* Bring the sequence in @ctxt up to date */ + ctxt->seq = nbcon_seq_read(con); + + ret = prb_read_valid(prb, ctxt->seq, NULL); + } + + console_srcu_read_unlock(cookie); + return ret; +} + +/** + * nbcon_kthread_func - The printer thread function + * @__console: Console to operate on + * + * Return: 0 + */ +static int nbcon_kthread_func(void *__console) +{ + struct console *con = __console; + struct nbcon_write_context wctxt = { + .ctxt.console = con, + .ctxt.prio = NBCON_PRIO_NORMAL, + }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + short con_flags; + bool backlog; + int cookie; + +wait_for_event: + /* + * Guarantee this task is visible on the rcuwait before + * checking the wake condition. + * + * The full memory barrier within set_current_state() of + * ___rcuwait_wait_event() pairs with the full memory + * barrier within rcuwait_has_sleeper(). + * + * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A. + */ + rcuwait_wait_event(&con->rcuwait, + nbcon_kthread_should_wakeup(con, ctxt), + TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */ + + do { + if (kthread_should_stop()) + return 0; + + backlog = false; + + /* + * Keep the srcu read lock around the entire operation so that + * synchronize_srcu() can guarantee that the kthread stopped + * or suspended printing. + */ + cookie = console_srcu_read_lock(); + + con_flags = console_srcu_read_flags(con); + + if (console_is_usable(con, con_flags, false)) { + unsigned long lock_flags; + + con->device_lock(con, &lock_flags); + + /* + * Ensure this stays on the CPU to make handover and + * takeover possible. + */ + cant_migrate(); + + if (nbcon_context_try_acquire(ctxt)) { + /* + * If the emit fails, this context is no + * longer the owner. + */ + if (nbcon_emit_next_record(&wctxt, false)) { + nbcon_context_release(ctxt); + backlog = ctxt->backlog; + } + } + + con->device_unlock(con, lock_flags); + } + + console_srcu_read_unlock(cookie); + + cond_resched(); + + } while (backlog); + + goto wait_for_event; +} + +/** + * nbcon_irq_work - irq work to wake console printer thread + * @irq_work: The irq work to operate on + */ +static void nbcon_irq_work(struct irq_work *irq_work) +{ + struct console *con = container_of(irq_work, struct console, irq_work); + + nbcon_kthread_wake(con); +} + +static inline bool rcuwait_has_sleeper(struct rcuwait *w) +{ + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the rcuwait is empty. + * + * This full memory barrier pairs with the full memory barrier within + * set_current_state() of ___rcuwait_wait_event(), which is called + * after prepare_to_rcuwait() adds the waiter but before it has + * checked the wait condition. + * + * This pairs with nbcon_kthread_func:A. + */ + smp_mb(); /* LMM(rcuwait_has_sleeper:A) */ + return rcuwait_active(w); +} + +/** + * nbcon_kthreads_wake - Wake up printing threads using irq_work + */ +void nbcon_kthreads_wake(void) +{ + struct console *con; + int cookie; + + if (!printk_kthreads_running) + return; + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + if (!(console_srcu_read_flags(con) & CON_NBCON)) + continue; + + /* + * Only schedule irq_work if the printing thread is + * actively waiting. If not waiting, the thread will + * notice by itself that it has work to do. + */ + if (rcuwait_has_sleeper(&con->rcuwait)) + irq_work_queue(&con->irq_work); + } + console_srcu_read_unlock(cookie); +} + +/* + * nbcon_kthread_stop - Stop a console printer thread + * @con: Console to operate on + */ +void nbcon_kthread_stop(struct console *con) +{ + lockdep_assert_console_list_lock_held(); + + if (!con->kthread) + return; + + kthread_stop(con->kthread); + con->kthread = NULL; +} + +/** + * nbcon_kthread_create - Create a console printer thread + * @con: Console to operate on + * + * Return: True if the kthread was started or already exists. + * Otherwise false and @con must not be registered. + * + * This function is called when it will be expected that nbcon consoles are + * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL + * will be no longer flushed by the legacy loop. This is why failure must + * be fatal for console registration. + * + * If @con was already registered and this function fails, @con must be + * unregistered before the global state variable @printk_kthreads_running + * can be set. + */ +bool nbcon_kthread_create(struct console *con) +{ + struct task_struct *kt; + + lockdep_assert_console_list_lock_held(); + + if (con->kthread) + return true; + + kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index); + if (WARN_ON(IS_ERR(kt))) { + con_printk(KERN_ERR, con, "failed to start printing thread\n"); + return false; + } + + con->kthread = kt; + + return true; +} + /* Track the nbcon emergency nesting per CPU. */ static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; @@ -1396,6 +1624,12 @@ bool nbcon_alloc(struct console *con) { struct nbcon_state state = { }; + /* The write_thread() callback is mandatory. */ + if (WARN_ON(!con->write_thread)) + return false; + + rcuwait_init(&con->rcuwait); + init_irq_work(&con->irq_work, nbcon_irq_work); nbcon_state_set(con, &state); /* @@ -1418,6 +1652,14 @@ bool nbcon_alloc(struct console *con) con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); return false; } + + if (printk_kthreads_running) { + if (!nbcon_kthread_create(con)) { + kfree(con->pbufs); + con->pbufs = NULL; + return false; + } + } } return true; @@ -1431,6 +1673,9 @@ void nbcon_free(struct console *con) { struct nbcon_state state = { }; + if (printk_kthreads_running) + nbcon_kthread_stop(con); + nbcon_state_set(con, &state); /* Boot consoles share global printk buffers. */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 846306ac2663..f27c76c3b5cf 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -496,6 +497,9 @@ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; +/* True when _all_ printer threads are available for printing. */ +bool printk_kthreads_running; + struct latched_seq { seqcount_latch_t latch; u64 val[2]; @@ -3027,6 +3031,8 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co return false; } +static inline void printk_kthreads_check_locked(void) { } + #endif /* CONFIG_PRINTK */ /* @@ -3387,6 +3393,102 @@ void console_start(struct console *console) } EXPORT_SYMBOL(console_start); +#ifdef CONFIG_PRINTK +static int unregister_console_locked(struct console *console); + +/* True when system boot is far enough to create printer threads. */ +static bool printk_kthreads_ready __ro_after_init; + +/** + * printk_kthreads_shutdown - shutdown all threaded printers + * + * On system shutdown all threaded printers are stopped. This allows printk + * to transition back to atomic printing, thus providing a robust mechanism + * for the final shutdown/reboot messages to be output. + */ +static void printk_kthreads_shutdown(void) +{ + struct console *con; + + console_list_lock(); + if (printk_kthreads_running) { + printk_kthreads_running = false; + + for_each_console(con) { + if (con->flags & CON_NBCON) + nbcon_kthread_stop(con); + } + + /* + * The threads may have been stopped while printing a + * backlog. Flush any records left over. + */ + nbcon_atomic_flush_pending(); + } + console_list_unlock(); +} + +static struct syscore_ops printk_syscore_ops = { + .shutdown = printk_kthreads_shutdown, +}; + +/* + * If appropriate, start nbcon kthreads and set @printk_kthreads_running. + * If any kthreads fail to start, those consoles are unregistered. + * + * Must be called under console_list_lock(). + */ +static void printk_kthreads_check_locked(void) +{ + struct hlist_node *tmp; + struct console *con; + + lockdep_assert_console_list_lock_held(); + + if (!printk_kthreads_ready) + return; + + /* + * Printer threads cannot be started as long as any boot console is + * registered because there is no way to synchronize the hardware + * registers between boot console code and regular console code. + * It can only be known that there will be no new boot consoles when + * an nbcon console is registered. + */ + if (have_boot_console || !have_nbcon_console) { + /* Clear flag in case all nbcon consoles unregistered. */ + printk_kthreads_running = false; + return; + } + + if (printk_kthreads_running) + return; + + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (!(con->flags & CON_NBCON)) + continue; + + if (!nbcon_kthread_create(con)) + unregister_console_locked(con); + } + + printk_kthreads_running = true; +} + +static int __init printk_set_kthreads_ready(void) +{ + register_syscore_ops(&printk_syscore_ops); + + console_list_lock(); + printk_kthreads_ready = true; + printk_kthreads_check_locked(); + console_list_unlock(); + + return 0; +} +early_initcall(printk_set_kthreads_ready); +#endif /* CONFIG_PRINTK */ + static int __read_mostly keep_bootcon; static int __init keep_bootcon_setup(char *str) @@ -3745,6 +3847,9 @@ void register_console(struct console *newcon) unregister_console_locked(con); } } + + /* Changed console list, may require printer threads to start/stop. */ + printk_kthreads_check_locked(); unlock: console_list_unlock(); } @@ -3841,6 +3946,9 @@ static int unregister_console_locked(struct console *console) if (!found_nbcon_con) have_nbcon_console = found_nbcon_con; + /* Changed console list, may require printer threads to start/stop. */ + printk_kthreads_check_locked(); + return res; } From 9b79a3d0d64abae92457cb830baa6c6a717778b0 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:26 +0206 Subject: [PATCH 44/54] printk: nbcon: Relocate nbcon_atomic_emit_one() Move nbcon_atomic_emit_one() so that it can be used by nbcon_kthread_func() in a follow-up commit. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-8-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 78 +++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 388322c74349..57a0e9b542fe 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1042,6 +1042,45 @@ update_con: return nbcon_context_exit_unsafe(ctxt); } +/* + * nbcon_atomic_emit_one - Print one record for an nbcon console using the + * write_atomic() callback + * @wctxt: An initialized write context struct to use for this context + * + * Return: True, when a record has been printed and there are still + * pending records. The caller might want to continue flushing. + * + * False, when there is no pending record, or when the console + * context cannot be acquired, or the ownership has been lost. + * The caller should give up. Either the job is done, cannot be + * done, or will be handled by the owning context. + * + * This is an internal helper to handle the locking of the console before + * calling nbcon_emit_next_record(). + */ +static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + if (!nbcon_context_try_acquire(ctxt)) + return false; + + /* + * nbcon_emit_next_record() returns false when the console was + * handed over or taken over. In both cases the context is no + * longer valid. + * + * The higher priority printing context takes over responsibility + * to print the pending records. + */ + if (!nbcon_emit_next_record(wctxt, true)) + return false; + + nbcon_context_release(ctxt); + + return ctxt->backlog; +} + /** * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup * @con: Console to operate on @@ -1319,45 +1358,6 @@ enum nbcon_prio nbcon_get_default_prio(void) return NBCON_PRIO_NORMAL; } -/* - * nbcon_atomic_emit_one - Print one record for an nbcon console using the - * write_atomic() callback - * @wctxt: An initialized write context struct to use for this context - * - * Return: True, when a record has been printed and there are still - * pending records. The caller might want to continue flushing. - * - * False, when there is no pending record, or when the console - * context cannot be acquired, or the ownership has been lost. - * The caller should give up. Either the job is done, cannot be - * done, or will be handled by the owning context. - * - * This is an internal helper to handle the locking of the console before - * calling nbcon_emit_next_record(). - */ -static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) -{ - struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); - - if (!nbcon_context_try_acquire(ctxt)) - return false; - - /* - * nbcon_emit_next_record() returns false when the console was - * handed over or taken over. In both cases the context is no - * longer valid. - * - * The higher priority printing context takes over responsibility - * to print the pending records. - */ - if (!nbcon_emit_next_record(wctxt, true)) - return false; - - nbcon_context_release(ctxt); - - return ctxt->backlog; -} - /** * nbcon_legacy_emit_next_record - Print one record for an nbcon console * in legacy contexts From 5c586baa60e46d9fccd04f04e16f8a50b2fbc1af Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:27 +0206 Subject: [PATCH 45/54] printk: nbcon: Use thread callback if in task context for legacy When printing via console_lock, the write_atomic() callback is used for nbcon consoles. However, if it is known that the current context is a task context, the write_thread() callback can be used instead. Using write_thread() instead of write_atomic() helps to reduce large disabled preemption regions when the device_lock does not disable preemption. This is mainly a preparatory change to allow avoiding write_atomic() completely during normal operation if boot consoles are registered. As a side-effect, it also allows consolidating the printing code for legacy printing and the kthread printer. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-9-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 4 +- kernel/printk/nbcon.c | 95 +++++++++++++++++++++++----------------- kernel/printk/printk.c | 5 ++- 3 files changed, 59 insertions(+), 45 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 14f7fc71e20d..a96d4114a1db 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -90,7 +90,7 @@ void nbcon_free(struct console *con); enum nbcon_prio nbcon_get_default_prio(void); void nbcon_atomic_flush_pending(void); bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, - int cookie); + int cookie, bool use_atomic); bool nbcon_kthread_create(struct console *con); void nbcon_kthread_stop(struct console *con); void nbcon_kthreads_wake(void); @@ -174,7 +174,7 @@ static inline void nbcon_free(struct console *con) { } static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } static inline void nbcon_atomic_flush_pending(void) { } static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, - int cookie) { return false; } + int cookie, bool use_atomic) { return false; } static inline void nbcon_kthread_wake(struct console *con) { } static inline bool console_is_usable(struct console *con, short flags, diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 57a0e9b542fe..784e5de88abf 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1043,9 +1043,10 @@ update_con: } /* - * nbcon_atomic_emit_one - Print one record for an nbcon console using the - * write_atomic() callback + * nbcon_emit_one - Print one record for an nbcon console using the + * specified callback * @wctxt: An initialized write context struct to use for this context + * @use_atomic: True if the write_atomic() callback is to be used * * Return: True, when a record has been printed and there are still * pending records. The caller might want to continue flushing. @@ -1058,12 +1059,25 @@ update_con: * This is an internal helper to handle the locking of the console before * calling nbcon_emit_next_record(). */ -static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) +static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + unsigned long flags; + bool ret = false; + + if (!use_atomic) { + con->device_lock(con, &flags); + + /* + * Ensure this stays on the CPU to make handover and + * takeover possible. + */ + cant_migrate(); + } if (!nbcon_context_try_acquire(ctxt)) - return false; + goto out; /* * nbcon_emit_next_record() returns false when the console was @@ -1073,12 +1087,16 @@ static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) * The higher priority printing context takes over responsibility * to print the pending records. */ - if (!nbcon_emit_next_record(wctxt, true)) - return false; + if (!nbcon_emit_next_record(wctxt, use_atomic)) + goto out; nbcon_context_release(ctxt); - return ctxt->backlog; + ret = ctxt->backlog; +out: + if (!use_atomic) + con->device_unlock(con, flags); + return ret; } /** @@ -1163,30 +1181,8 @@ wait_for_event: con_flags = console_srcu_read_flags(con); - if (console_is_usable(con, con_flags, false)) { - unsigned long lock_flags; - - con->device_lock(con, &lock_flags); - - /* - * Ensure this stays on the CPU to make handover and - * takeover possible. - */ - cant_migrate(); - - if (nbcon_context_try_acquire(ctxt)) { - /* - * If the emit fails, this context is no - * longer the owner. - */ - if (nbcon_emit_next_record(&wctxt, false)) { - nbcon_context_release(ctxt); - backlog = ctxt->backlog; - } - } - - con->device_unlock(con, lock_flags); - } + if (console_is_usable(con, con_flags, false)) + backlog = nbcon_emit_one(&wctxt, false); console_srcu_read_unlock(cookie); @@ -1367,6 +1363,13 @@ enum nbcon_prio nbcon_get_default_prio(void) * both the console_lock and the SRCU read lock. Otherwise it * is set to false. * @cookie: The cookie from the SRCU read lock. + * @use_atomic: Set true when called in an atomic or unknown context. + * It affects which nbcon callback will be used: write_atomic() + * or write_thread(). + * + * When false, the write_thread() callback is used and would be + * called in a preemtible context unless disabled by the + * device_lock. The legacy handover is not allowed in this mode. * * Context: Any context except NMI. * Return: True, when a record has been printed and there are still @@ -1382,26 +1385,36 @@ enum nbcon_prio nbcon_get_default_prio(void) * Essentially it is the nbcon version of console_emit_next_record(). */ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, - int cookie) + int cookie, bool use_atomic) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); unsigned long flags; bool progress; - /* Use the same procedure as console_emit_next_record(). */ - printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); - stop_critical_timings(); - ctxt->console = con; ctxt->prio = nbcon_get_default_prio(); - progress = nbcon_atomic_emit_one(&wctxt); + if (use_atomic) { + /* + * In an atomic or unknown context, use the same procedure as + * in console_emit_next_record(). It allows to handover. + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); + stop_critical_timings(); + } - start_critical_timings(); - *handover = console_lock_spinning_disable_and_check(cookie); - printk_safe_exit_irqrestore(flags); + progress = nbcon_emit_one(&wctxt, use_atomic); + + if (use_atomic) { + start_critical_timings(); + *handover = console_lock_spinning_disable_and_check(cookie); + printk_safe_exit_irqrestore(flags); + } else { + /* Non-atomic does not perform legacy spinning handovers. */ + *handover = false; + } return progress; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f27c76c3b5cf..55d75db00042 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3077,12 +3077,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove u64 printk_seq; bool progress; - if (!console_is_usable(con, flags, true)) + if (!console_is_usable(con, flags, !do_cond_resched)) continue; any_usable = true; if (flags & CON_NBCON) { - progress = nbcon_legacy_emit_next_record(con, handover, cookie); + progress = nbcon_legacy_emit_next_record(con, handover, cookie, + !do_cond_resched); printk_seq = nbcon_seq_read(con); } else { progress = console_emit_next_record(con, handover, cookie); From 13189fa73afae2b961d29132fd36d9cc0be77ecb Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:28 +0206 Subject: [PATCH 46/54] printk: nbcon: Rely on kthreads for normal operation Once the kthread is running and available (i.e. @printk_kthreads_running is set), the kthread becomes responsible for flushing any pending messages which are added in NBCON_PRIO_NORMAL context. Namely the legacy console_flush_all() and device_release() no longer flush the console. And nbcon_atomic_flush_pending() used by nbcon_cpu_emergency_exit() no longer flushes messages added after the emergency messages. The console context is safe when used by the kthread only when one of the following conditions are true: 1. Other caller acquires the console context with NBCON_PRIO_NORMAL with preemption disabled. It will release the context before rescheduling. 2. Other caller acquires the console context with NBCON_PRIO_NORMAL under the device_lock. 3. The kthread is the only context which acquires the console with NBCON_PRIO_NORMAL. This is satisfied for all atomic printing call sites: nbcon_legacy_emit_next_record() (#1) nbcon_atomic_flush_pending_con() (#1) nbcon_device_release() (#2) It is even double guaranteed when @printk_kthreads_running is set because then _only_ the kthread will print for NBCON_PRIO_NORMAL. (#3) Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-10-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 26 ++++++++++++++++++++++ kernel/printk/nbcon.c | 17 +++++++++----- kernel/printk/printk.c | 48 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index a96d4114a1db..8166e24f8780 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -113,6 +113,13 @@ static inline bool console_is_usable(struct console *con, short flags, bool use_ /* The write_atomic() callback is optional. */ if (use_atomic && !con->write_atomic) return false; + + /* + * For the !use_atomic case, @printk_kthreads_running is not + * checked because the write_thread() callback is also used + * via the legacy loop when the printer threads are not + * available. + */ } else { if (!con->write) return false; @@ -176,6 +183,7 @@ static inline void nbcon_atomic_flush_pending(void) { } static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie, bool use_atomic) { return false; } static inline void nbcon_kthread_wake(struct console *con) { } +static inline void nbcon_kthreads_wake(void) { } static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) { return false; } @@ -190,6 +198,7 @@ extern bool legacy_allow_panic_sync; /** * struct console_flush_type - Define available console flush methods * @nbcon_atomic: Flush directly using nbcon_atomic() callback + * @nbcon_offload: Offload flush to printer thread * @legacy_direct: Call the legacy loop in this context * @legacy_offload: Offload the legacy loop into IRQ * @@ -197,6 +206,7 @@ extern bool legacy_allow_panic_sync; */ struct console_flush_type { bool nbcon_atomic; + bool nbcon_offload; bool legacy_direct; bool legacy_offload; }; @@ -211,6 +221,22 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft) switch (nbcon_get_default_prio()) { case NBCON_PRIO_NORMAL: + if (have_nbcon_console && !have_boot_console) { + if (printk_kthreads_running) + ft->nbcon_offload = true; + else + ft->nbcon_atomic = true; + } + + /* Legacy consoles are flushed directly when possible. */ + if (have_legacy_console || have_boot_console) { + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + else + ft->legacy_offload = true; + } + break; + case NBCON_PRIO_EMERGENCY: if (have_nbcon_console && !have_boot_console) ft->nbcon_atomic = true; diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 784e5de88abf..5146ce9853a3 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1494,6 +1494,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, bool allow_unsafe_takeover) { + struct console_flush_type ft; unsigned long flags; int err; @@ -1523,10 +1524,12 @@ again: /* * If flushing was successful but more records are available, this - * context must flush those remaining records because there is no - * other context that will do it. + * context must flush those remaining records if the printer thread + * is not available do it. */ - if (prb_read_valid(prb, nbcon_seq_read(con), NULL)) { + printk_get_console_flush_type(&ft); + if (!ft.nbcon_offload && + prb_read_valid(prb, nbcon_seq_read(con), NULL)) { stop_seq = prb_next_reserve_seq(prb); goto again; } @@ -1754,17 +1757,19 @@ void nbcon_device_release(struct console *con) /* * This context must flush any new records added while the console - * was locked. The console_srcu_read_lock must be taken to ensure - * the console is usable throughout flushing. + * was locked if the printer thread is not available to do it. The + * console_srcu_read_lock must be taken to ensure the console is + * usable throughout flushing. */ cookie = console_srcu_read_lock(); + printk_get_console_flush_type(&ft); if (console_is_usable(con, console_srcu_read_flags(con), true) && + !ft.nbcon_offload && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { /* * If nbcon_atomic flushing is not available, fallback to * using the legacy loop. */ - printk_get_console_flush_type(&ft); if (ft.nbcon_atomic) { __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false); } else if (ft.legacy_direct) { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 55d75db00042..149c3e04c2b5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2384,6 +2384,9 @@ asmlinkage int vprintk_emit(int facility, int level, if (ft.nbcon_atomic) nbcon_atomic_flush_pending(); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_direct) { /* * The caller may be holding system-critical or @@ -2732,6 +2735,7 @@ void suspend_console(void) void resume_console(void) { + struct console_flush_type ft; struct console *con; if (!console_suspend_enabled) @@ -2749,6 +2753,10 @@ void resume_console(void) */ synchronize_srcu(&console_srcu); + printk_get_console_flush_type(&ft); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + pr_flush(1000, true); } @@ -3060,6 +3068,7 @@ static inline void printk_kthreads_check_locked(void) { } */ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) { + struct console_flush_type ft; bool any_usable = false; struct console *con; bool any_progress; @@ -3071,12 +3080,22 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove do { any_progress = false; + printk_get_console_flush_type(&ft); + cookie = console_srcu_read_lock(); for_each_console_srcu(con) { short flags = console_srcu_read_flags(con); u64 printk_seq; bool progress; + /* + * console_flush_all() is only responsible for nbcon + * consoles when the nbcon consoles cannot print via + * their atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + if (!console_is_usable(con, flags, !do_cond_resched)) continue; any_usable = true; @@ -3387,9 +3406,25 @@ EXPORT_SYMBOL(console_stop); void console_start(struct console *console) { + struct console_flush_type ft; + bool is_nbcon; + console_list_lock(); console_srcu_write_flags(console, console->flags | CON_ENABLED); + is_nbcon = console->flags & CON_NBCON; console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. The related + * printing context must be able to see it is enabled so that + * it is guaranteed to wake up and resume printing. + */ + synchronize_srcu(&console_srcu); + + printk_get_console_flush_type(&ft); + if (is_nbcon && ft.nbcon_offload) + nbcon_kthread_wake(console); + __pr_flush(console, 1000, true); } EXPORT_SYMBOL(console_start); @@ -4115,6 +4150,8 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre /* Flush the consoles so that records up to @seq are printed. */ printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); if (ft.legacy_direct) { console_lock(); console_unlock(); @@ -4152,8 +4189,10 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * that they make forward progress, so only increment * @diff for usable consoles. */ - if (!console_is_usable(c, flags, true)) + if (!console_is_usable(c, flags, true) && + !console_is_usable(c, flags, false)) { continue; + } if (flags & CON_NBCON) { printk_seq = nbcon_seq_read(c); @@ -4629,8 +4668,15 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind); */ void console_try_replay_all(void) { + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); if (console_trylock()) { __console_rewind_all(); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); /* Consoles are flushed as part of console_unlock(). */ console_unlock(); } From 75d430372abba5210fbc56a813d2515c001a4f45 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:29 +0206 Subject: [PATCH 47/54] printk: Provide helper for message prepending In order to support prepending different texts to printk messages, split out the prepending code into a helper function. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-11-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 149c3e04c2b5..e9458569bcfd 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2843,30 +2843,31 @@ static void __console_unlock(void) #ifdef CONFIG_PRINTK /* - * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This - * is achieved by shifting the existing message over and inserting the dropped - * message. + * Prepend the message in @pmsg->pbufs->outbuf. This is achieved by shifting + * the existing message over and inserting the scratchbuf message. * - * @pmsg is the printk message to prepend. + * @pmsg is the original printk message. + * @fmt is the printf format of the message which will prepend the existing one. * - * @dropped is the dropped count to report in the dropped message. - * - * If the message text in @pmsg->pbufs->outbuf does not have enough space for - * the dropped message, the message text will be sufficiently truncated. + * If there is not enough space in @pmsg->pbufs->outbuf, the existing + * message text will be sufficiently truncated. * * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. */ -void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) +__printf(2, 3) +static void console_prepend_message(struct printk_message *pmsg, const char *fmt, ...) { struct printk_buffers *pbufs = pmsg->pbufs; const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); const size_t outbuf_sz = sizeof(pbufs->outbuf); char *scratchbuf = &pbufs->scratchbuf[0]; char *outbuf = &pbufs->outbuf[0]; + va_list args; size_t len; - len = scnprintf(scratchbuf, scratchbuf_sz, - "** %lu printk messages dropped **\n", dropped); + va_start(args, fmt); + len = vscnprintf(scratchbuf, scratchbuf_sz, fmt, args); + va_end(args); /* * Make sure outbuf is sufficiently large before prepending. @@ -2888,6 +2889,19 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) pmsg->outbuf_len += len; } +/* + * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". + * @pmsg->outbuf_len is updated appropriately. + * + * @pmsg is the printk message to prepend. + * + * @dropped is the dropped count to report in the dropped message. + */ +void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) +{ + console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped); +} + /* * Read and format the specified record (or a later record if the specified * record is not available). From 5102981d5e2a5a7a12424b5d26c7524ac2899898 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:30 +0206 Subject: [PATCH 48/54] printk: nbcon: Show replay message on takeover An emergency or panic context can takeover console ownership while the current owner was printing a printk message. The atomic printer will re-print the message that the previous owner was printing. However, this can look confusing to the user and may even seem as though a message was lost. [3430014.1 [3430014.181123] usb 1-2: Product: USB Audio Add a new field @nbcon_prev_seq to struct console to track the sequence number to print that was assigned to the previous console owner. If this matches the sequence number to print that the current owner is assigned, then a takeover must have occurred. In this case, print an additional message to inform the user that the previous message is being printed again. [3430014.1 ** replaying previous printk message ** [3430014.181123] usb 1-2: Product: USB Audio Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-12-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 ++ kernel/printk/internal.h | 1 + kernel/printk/nbcon.c | 26 ++++++++++++++++++++++++++ kernel/printk/printk.c | 11 +++++++++++ 4 files changed, 40 insertions(+) diff --git a/include/linux/console.h b/include/linux/console.h index 788ce9c829f6..eba367bf605d 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -325,6 +325,7 @@ struct nbcon_write_context { * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @nbcon_device_ctxt: Context available for non-printing operations + * @nbcon_prev_seq: Seq num the previous nbcon owner was assigned to print * @pbufs: Pointer to nbcon private buffer * @kthread: Printer kthread for this console * @rcuwait: RCU-safe wait object for @kthread waking @@ -459,6 +460,7 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct nbcon_context __private nbcon_device_ctxt; + atomic_long_t __private nbcon_prev_seq; struct printk_buffers *pbufs; struct task_struct *kthread; diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 8166e24f8780..c365d25b13c7 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -319,4 +319,5 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, #ifdef CONFIG_PRINTK void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped); +void console_prepend_replay(struct printk_message *pmsg); #endif diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 5146ce9853a3..98440889d222 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -946,7 +946,9 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_a .pbufs = ctxt->pbufs, }; unsigned long con_dropped; + struct nbcon_state cur; unsigned long dropped; + unsigned long ulseq; /* * This function should never be called for consoles that have not @@ -987,6 +989,29 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_a if (dropped && !is_extended) console_prepend_dropped(&pmsg, dropped); + /* + * If the previous owner was assigned the same record, this context + * has taken over ownership and is replaying the record. Prepend a + * message to let the user know the record is replayed. + */ + ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq)); + if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) { + console_prepend_replay(&pmsg); + } else { + /* + * Ensure this context is still the owner before trying to + * update @nbcon_prev_seq. Otherwise the value in @ulseq may + * not be from the previous owner and instead be some later + * value from the context that took over ownership. + */ + nbcon_state_read(con, &cur); + if (!nbcon_context_can_proceed(ctxt, &cur)) + return false; + + atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq, + __u64seq_to_ulseq(pmsg.seq)); + } + if (!nbcon_context_exit_unsafe(ctxt)) return false; @@ -1646,6 +1671,7 @@ bool nbcon_alloc(struct console *con) rcuwait_init(&con->rcuwait); init_irq_work(&con->irq_work, nbcon_irq_work); + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL); nbcon_state_set(con, &state); /* diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e9458569bcfd..c27dc54f4151 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2902,6 +2902,17 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped); } +/* + * Prepend the message in @pmsg->pbufs->outbuf with a "replay message". + * @pmsg->outbuf_len is updated appropriately. + * + * @pmsg is the printk message to prepend. + */ +void console_prepend_replay(struct printk_message *pmsg) +{ + console_prepend_message(pmsg, "** replaying previous printk message **\n"); +} + /* * Read and format the specified record (or a later record if the specified * record is not available). From fe6fa88d865e9c1a41b41f92ae0531470c86f0da Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:31 +0206 Subject: [PATCH 49/54] proc: consoles: Add notation to c_start/c_stop fs/proc/consoles.c:78:13: warning: context imbalance in 'c_start' - wrong count at exit fs/proc/consoles.c:104:13: warning: context imbalance in 'c_stop' - unexpected unlock Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-13-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- fs/proc/consoles.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index e0758fe7936d..7036fdfa0bec 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -68,6 +68,7 @@ static int show_console_dev(struct seq_file *m, void *v) } static void *c_start(struct seq_file *m, loff_t *pos) + __acquires(&console_mutex) { struct console *con; loff_t off = 0; @@ -94,6 +95,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos) } static void c_stop(struct seq_file *m, void *v) + __releases(&console_mutex) { console_list_unlock(); } From c83a20662dd099dbac9ef6df44134ef446980c56 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:32 +0206 Subject: [PATCH 50/54] proc: Add nbcon support for /proc/consoles Update /proc/consoles output to show 'W' if an nbcon console is registered. Since the write_thread() callback is mandatory, it enough just to check if it is an nbcon console. Also update /proc/consoles output to show 'N' if it is an nbcon console. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-14-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- fs/proc/consoles.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index 7036fdfa0bec..b7cab1ad990d 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -21,6 +21,7 @@ static int show_console_dev(struct seq_file *m, void *v) { CON_ENABLED, 'E' }, { CON_CONSDEV, 'C' }, { CON_BOOT, 'B' }, + { CON_NBCON, 'N' }, { CON_PRINTBUFFER, 'p' }, { CON_BRL, 'b' }, { CON_ANYTIME, 'a' }, @@ -58,8 +59,8 @@ static int show_console_dev(struct seq_file *m, void *v) seq_printf(m, "%s%d", con->name, con->index); seq_pad(m, ' '); seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', - con->write ? 'W' : '-', con->unblank ? 'U' : '-', - flags); + ((con->flags & CON_NBCON) || con->write) ? 'W' : '-', + con->unblank ? 'U' : '-', flags); if (dev) seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); From def84b4467771d0ea0b55e6a5d0d70eb54bdf46a Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:33 +0206 Subject: [PATCH 51/54] tty: sysfs: Add nbcon support for 'active' Allow the 'active' attribute to list nbcon consoles. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-15-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- drivers/tty/tty_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 407b0d87b7c1..9140825e810f 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3567,7 +3567,7 @@ static ssize_t show_cons_active(struct device *dev, for_each_console(c) { if (!c->device) continue; - if (!c->write) + if (!(c->flags & CON_NBCON) && !c->write) continue; if ((c->flags & CON_ENABLED) == 0) continue; From 5f53ca3ff83b4f2f3ff360bb22bba7a9a3af71a3 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:34 +0206 Subject: [PATCH 52/54] printk: Implement legacy printer kthread for PREEMPT_RT The write() callback of legacy consoles usually makes use of spinlocks. This is not permitted with PREEMPT_RT in atomic contexts. For PREEMPT_RT, create a new kthread to handle printing of all the legacy consoles (and nbcon consoles if boot consoles are registered). This allows legacy consoles to work on PREEMPT_RT without requiring modification. (However they will not have the reliability properties guaranteed by nbcon atomic consoles.) Use the existing printk_kthreads_check_locked() to start/stop the legacy kthread as needed. Introduce the macro force_legacy_kthread() to query if the forced threading of legacy consoles is in effect. Although currently only enabled for PREEMPT_RT, this acts as a simple mechanism for the future to allow other preemption models to easily take advantage of the non-interference property provided by the legacy kthread. When force_legacy_kthread() is true, the legacy kthread fulfills the role of the console_flush_type @legacy_offload by waking the legacy kthread instead of printing via the console_lock in the irq_work. If the legacy kthread is not yet available, no legacy printing takes place (unless in panic). If for some reason the legacy kthread fails to create, any legacy consoles are unregistered. With force_legacy_kthread(), the legacy kthread is a critical component for legacy consoles. These changes only affect CONFIG_PREEMPT_RT. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-16-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 16 +++- kernel/printk/printk.c | 157 ++++++++++++++++++++++++++++++++---- kernel/printk/printk_safe.c | 4 +- 3 files changed, 159 insertions(+), 18 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index c365d25b13c7..3fcb48502adb 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -21,6 +21,19 @@ int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, (con->flags & CON_BOOT) ? "boot" : "", \ con->name, con->index, ##__VA_ARGS__) +/* + * Identify if legacy printing is forced in a dedicated kthread. If + * true, all printing via console lock occurs within a dedicated + * legacy printer thread. The only exception is on panic, after the + * nbcon consoles have had their chance to print the panic messages + * first. + */ +#ifdef CONFIG_PREEMPT_RT +# define force_legacy_kthread() (true) +#else +# define force_legacy_kthread() (false) +#endif + #ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK_CALLER @@ -173,6 +186,7 @@ static inline void nbcon_kthread_wake(struct console *con) #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline void defer_console_output(void) { } static inline bool is_printk_legacy_deferred(void) { return false; } static inline u64 nbcon_seq_read(struct console *con) { return 0; } static inline void nbcon_seq_force(struct console *con, u64 seq) { } @@ -200,7 +214,7 @@ extern bool legacy_allow_panic_sync; * @nbcon_atomic: Flush directly using nbcon_atomic() callback * @nbcon_offload: Offload flush to printer thread * @legacy_direct: Call the legacy loop in this context - * @legacy_offload: Offload the legacy loop into IRQ + * @legacy_offload: Offload the legacy loop into IRQ or legacy thread * * Note that the legacy loop also flushes the nbcon consoles. */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c27dc54f4151..66cfe7b8f95c 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -491,6 +491,7 @@ bool legacy_allow_panic_sync; #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); +static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; @@ -2756,6 +2757,8 @@ void resume_console(void) printk_get_console_flush_type(&ft); if (ft.nbcon_offload) nbcon_kthreads_wake(); + if (ft.legacy_offload) + defer_console_output(); pr_flush(1000, true); } @@ -3166,19 +3169,7 @@ abandon: return false; } -/** - * console_unlock - unblock the console subsystem from printing - * - * Releases the console_lock which the caller holds to block printing of - * the console subsystem. - * - * While the console_lock was held, console output may have been buffered - * by printk(). If this is the case, console_unlock(); emits - * the output prior to releasing the lock. - * - * console_unlock(); may be called from any context. - */ -void console_unlock(void) +static void __console_flush_and_unlock(void) { bool do_cond_resched; bool handover; @@ -3222,6 +3213,29 @@ void console_unlock(void) */ } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); } + +/** + * console_unlock - unblock the legacy console subsystem from printing + * + * Releases the console_lock which the caller holds to block printing of + * the legacy console subsystem. + * + * While the console_lock was held, console output may have been buffered + * by printk(). If this is the case, console_unlock() emits the output on + * legacy consoles prior to releasing the lock. + * + * console_unlock(); may be called from any context. + */ +void console_unlock(void) +{ + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) + __console_flush_and_unlock(); + else + __console_unlock(); +} EXPORT_SYMBOL(console_unlock); /** @@ -3449,6 +3463,8 @@ void console_start(struct console *console) printk_get_console_flush_type(&ft); if (is_nbcon && ft.nbcon_offload) nbcon_kthread_wake(console); + else if (ft.legacy_offload) + defer_console_output(); __pr_flush(console, 1000, true); } @@ -3460,6 +3476,88 @@ static int unregister_console_locked(struct console *console); /* True when system boot is far enough to create printer threads. */ static bool printk_kthreads_ready __ro_after_init; +static struct task_struct *printk_legacy_kthread; + +static bool legacy_kthread_should_wakeup(void) +{ + struct console_flush_type ft; + struct console *con; + bool ret = false; + int cookie; + + if (kthread_should_stop()) + return true; + + printk_get_console_flush_type(&ft); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + u64 printk_seq; + + /* + * The legacy printer thread is only responsible for nbcon + * consoles when the nbcon consoles cannot print via their + * atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + + if (!console_is_usable(con, flags, false)) + continue; + + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(con); + } else { + /* + * It is safe to read @seq because only this + * thread context updates @seq. + */ + printk_seq = con->seq; + } + + if (prb_read_valid(prb, printk_seq, NULL)) { + ret = true; + break; + } + } + console_srcu_read_unlock(cookie); + + return ret; +} + +static int legacy_kthread_func(void *unused) +{ + for (;;) { + wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup()); + + if (kthread_should_stop()) + break; + + console_lock(); + __console_flush_and_unlock(); + } + + return 0; +} + +static bool legacy_kthread_create(void) +{ + struct task_struct *kt; + + lockdep_assert_console_list_lock_held(); + + kt = kthread_run(legacy_kthread_func, NULL, "pr/legacy"); + if (WARN_ON(IS_ERR(kt))) { + pr_err("failed to start legacy printing thread\n"); + return false; + } + + printk_legacy_kthread = kt; + + return true; +} + /** * printk_kthreads_shutdown - shutdown all threaded printers * @@ -3509,6 +3607,27 @@ static void printk_kthreads_check_locked(void) if (!printk_kthreads_ready) return; + if (have_legacy_console || have_boot_console) { + if (!printk_legacy_kthread && + force_legacy_kthread() && + !legacy_kthread_create()) { + /* + * All legacy consoles must be unregistered. If there + * are any nbcon consoles, they will set up their own + * kthread. + */ + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (con->flags & CON_NBCON) + continue; + + unregister_console_locked(con); + } + } + } else if (printk_legacy_kthread) { + kthread_stop(printk_legacy_kthread); + printk_legacy_kthread = NULL; + } + /* * Printer threads cannot be started as long as any boot console is * registered because there is no way to synchronize the hardware @@ -4285,9 +4404,13 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) int pending = this_cpu_xchg(printk_pending, 0); if (pending & PRINTK_PENDING_OUTPUT) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + if (force_legacy_kthread()) { + if (printk_legacy_kthread) + wake_up_interruptible(&legacy_wait); + } else { + if (console_trylock()) + console_unlock(); + } } if (pending & PRINTK_PENDING_WAKEUP) @@ -4702,6 +4825,8 @@ void console_try_replay_all(void) nbcon_atomic_flush_pending(); if (ft.nbcon_offload) nbcon_kthreads_wake(); + if (ft.legacy_offload) + defer_console_output(); /* Consoles are flushed as part of console_unlock(). */ console_unlock(); } diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 86439fd20aab..2b35a9d3919d 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -44,7 +44,9 @@ bool is_printk_legacy_deferred(void) * The per-CPU variable @printk_context can be read safely in any * context. CPU migration is always disabled when set. */ - return (this_cpu_read(printk_context) || in_nmi()); + return (force_legacy_kthread() || + this_cpu_read(printk_context) || + in_nmi()); } asmlinkage int vprintk(const char *fmt, va_list args) From 1529bbb6e2619495f146dd519a196b67837fdfa6 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:35 +0206 Subject: [PATCH 53/54] printk: nbcon: Assign nice -20 for printing threads It is important that console printing threads are scheduled shortly after a printk call and with generous runtime budgets. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-17-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 6 ++++++ kernel/printk/printk.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 98440889d222..fd12efcc4aed 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1321,6 +1321,12 @@ bool nbcon_kthread_create(struct console *con) con->kthread = kt; + /* + * It is important that console printing threads are scheduled + * shortly after a printk call and with generous runtime budgets. + */ + sched_set_normal(con->kthread, -20); + return true; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 66cfe7b8f95c..afd926611f0f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3555,6 +3555,12 @@ static bool legacy_kthread_create(void) printk_legacy_kthread = kt; + /* + * It is important that console printing threads are scheduled + * shortly after a printk call and with generous runtime budgets. + */ + sched_set_normal(printk_legacy_kthread, -20); + return true; } From daeed1595b4ddf314bad8ee40b2662e03fd012dc Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:36 +0206 Subject: [PATCH 54/54] printk: Avoid false positive lockdep report for legacy printing Legacy console printing from printk() caller context may invoke the console driver from atomic context. This leads to a lockdep splat because the console driver will acquire a sleeping lock and the caller may already hold a spinning lock. This is noticed by lockdep on !PREEMPT_RT configurations because it will lead to a problem on PREEMPT_RT. However, on PREEMPT_RT the printing path from atomic context is always avoided and the console driver is always invoked from a dedicated thread. Thus the lockdep splat on !PREEMPT_RT is a false positive. For !PREEMPT_RT override the lock-context before invoking the console driver to avoid the false positive. Do not override the lock-context for PREEMPT_RT in order to allow lockdep to catch any real locking context issues related to the write callback usage. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-18-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 85 +++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index afd926611f0f..578723208f14 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2981,6 +2981,34 @@ out: return true; } +/* + * Legacy console printing from printk() caller context does not respect + * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a + * false positive. For PREEMPT_RT the false positive condition does not + * occur. + * + * This map is used to temporarily establish LD_WAIT_SLEEP context for the + * console write() callback when legacy printing to avoid false positive + * lockdep complaints, thus allowing lockdep to continue to function for + * real issues. + */ +#ifdef CONFIG_PREEMPT_RT +static inline void printk_legacy_allow_spinlock_enter(void) { } +static inline void printk_legacy_allow_spinlock_exit(void) { } +#else +static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP); + +static inline void printk_legacy_allow_spinlock_enter(void) +{ + lock_map_acquire_try(&printk_legacy_map); +} + +static inline void printk_legacy_allow_spinlock_exit(void) +{ + lock_map_release(&printk_legacy_map); +} +#endif /* CONFIG_PREEMPT_RT */ + /* * Used as the printk buffers for non-panic, serialized console printing. * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. @@ -3030,31 +3058,46 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co con->dropped = 0; } - /* - * While actively printing out messages, if another printk() - * were to occur on another CPU, it may wait for this one to - * finish. This task can not be preempted if there is a - * waiter waiting to take over. - * - * Interrupts are disabled because the hand over to a waiter - * must not be interrupted until the hand over is completed - * (@console_waiter is cleared). - */ - printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); - - /* Do not trace print latency. */ - stop_critical_timings(); - /* Write everything out to the hardware. */ - con->write(con, outbuf, pmsg.outbuf_len); - start_critical_timings(); + if (force_legacy_kthread() && !panic_in_progress()) { + /* + * With forced threading this function is in a task context + * (either legacy kthread or get_init_console_seq()). There + * is no need for concern about printk reentrance, handovers, + * or lockdep complaints. + */ - con->seq = pmsg.seq + 1; + con->write(con, outbuf, pmsg.outbuf_len); + con->seq = pmsg.seq + 1; + } else { + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + * + * Interrupts are disabled because the hand over to a waiter + * must not be interrupted until the hand over is completed + * (@console_waiter is cleared). + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); - *handover = console_lock_spinning_disable_and_check(cookie); - printk_safe_exit_irqrestore(flags); + /* Do not trace print latency. */ + stop_critical_timings(); + + printk_legacy_allow_spinlock_enter(); + con->write(con, outbuf, pmsg.outbuf_len); + printk_legacy_allow_spinlock_exit(); + + start_critical_timings(); + + con->seq = pmsg.seq + 1; + + *handover = console_lock_spinning_disable_and_check(cookie); + printk_safe_exit_irqrestore(flags); + } skip: return true; }