Merge pull request #26213 from poettering/journal-rework-seqnum

journal sequence number rework
This commit is contained in:
Luca Boccassi 2023-02-15 19:58:58 +00:00 committed by GitHub
commit 6f97aae029
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 364 additions and 69 deletions

View File

@ -20,7 +20,7 @@ When exporting journal data for other uses or transferring it via the network/lo
* Two journal entries that follow each other are separated by a double newline.
* Journal fields consisting only of valid non-control UTF-8 codepoints are serialized as they are (i.e. the field name, followed by '=', followed by field data), followed by a newline as separator to the next field. Note that fields containing newlines cannot be formatted like this. Non-control UTF-8 codepoints are the codepoints with value at or above 32 (' '), or equal to 9 (TAB).
* Other journal fields are serialized in a special binary safe way: field name, followed by newline, followed by a binary 64bit little endian size value, followed by the binary field data, followed by a newline as separator to the next field.
* Entry metadata that is not actually a field is serialized like it was a field, but beginning with two underscores. More specifically, `__CURSOR=`, `__REALTIME_TIMESTAMP=`, `__MONOTONIC_TIMESTAMP=` are introduced this way. Note that these meta-fields are only generated when actual journal files are serialized. They are omitted for entries that do not originate from a journal file (for example because they are transferred for the first time to be stored in one). Or in other words: if you are generating this format you shouldn't care about these special double-underscore fields. But you might find them usable when you deserialize the format generated by us. Additional fields prefixed with two underscores might be added later on, your parser should skip over the fields it does not know.
* Entry metadata that is not actually a field is serialized like it was a field, but beginning with two underscores. More specifically, `__CURSOR=`, `__REALTIME_TIMESTAMP=`, `__MONOTONIC_TIMESTAMP=`, `__SEQNUM=`, `__SEQNUM_ID` are introduced this way. Note that these meta-fields are only generated when actual journal files are serialized. They are omitted for entries that do not originate from a journal file (for example because they are transferred for the first time to be stored in one). Or in other words: if you are generating this format you shouldn't care about these special double-underscore fields. But you might find them usable when you deserialize the format generated by us. Additional fields prefixed with two underscores might be added later on, your parser should skip over the fields it does not know.
* The order in which fields appear in an entry is undefined and might be different for each entry that is serialized.
And that's already it.
@ -133,7 +133,7 @@ In most cases the Journal JSON serialization is the obvious mapping of the entry
* A field that contains non-printable or non-UTF8 is serialized as a number array instead. This is necessary to handle binary data in a safe way without losing data, since JSON cannot embed binary data natively. Each byte of the binary field will be mapped to its numeric value in the range 0…255.
* The JSON serializer can optionally skip huge (as in larger than a specific threshold) data fields from the JSON object. If that is enabled and a data field is too large, the field name is still included in the JSON object but assigned _null_.
* Within the same entry, Journal fields may have multiple values assigned. This is not allowed in JSON. The serializer will hence create a single JSON field only for these cases, and assign it an array of values (which the can be strings, _null_ or number arrays, see above).
* If the JSON data originates from a journal file it may include the special addressing fields `__CURSOR`, `__REALTIME_TIMESTAMP`, `__MONOTONIC_TIMESTAMP`, which contain the cursor string of this entry as string, and the realtime/monotonic timestamps of this entry as formatted numeric string of usec since the respective epoch.
* If the JSON data originates from a journal file it may include the special addressing fields `__CURSOR`, `__REALTIME_TIMESTAMP`, `__MONOTONIC_TIMESTAMP`, `__SEQNUM`, `__SEQNUM_ID`, which contain the cursor string of this entry as string, the realtime/monotonic timestamps of this entry as formatted numeric string of usec since the respective epoch, and the sequence number and associated sequence number ID, both formatted as strings.
Here's an example, illustrating all cases mentioned above. Consider this entry:

View File

@ -720,6 +720,7 @@ manpages = [
'sd_journal_wait'],
''],
['sd_journal_get_realtime_usec', '3', ['sd_journal_get_monotonic_usec'], ''],
['sd_journal_get_seqnum', '3', [], ''],
['sd_journal_get_usage', '3', [], ''],
['sd_journal_has_runtime_files', '3', ['sd_journal_has_persistent_files'], ''],
['sd_journal_next',

View File

@ -103,6 +103,7 @@
<citerefentry><refentrytitle>sd_journal_open</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_journal_next</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_journal_get_data</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_journal_get_seqnum</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_id128_get_boot</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>clock_gettime</refentrytitle><manvolnum>2</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_journal_get_cutoff_realtime_usec</refentrytitle><manvolnum>3</manvolnum></citerefentry>

View File

@ -0,0 +1,105 @@
<?xml version='1.0'?> <!--*-nxml-*-->
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
<refentry id="sd_journal_get_seqnum"
xmlns:xi="http://www.w3.org/2001/XInclude">
<refentryinfo>
<title>sd_journal_get_seqnum</title>
<productname>systemd</productname>
</refentryinfo>
<refmeta>
<refentrytitle>sd_journal_get_seqnum</refentrytitle>
<manvolnum>3</manvolnum>
</refmeta>
<refnamediv>
<refname>sd_journal_get_seqnum</refname>
<refpurpose>Read sequence number from the current journal entry</refpurpose>
</refnamediv>
<refsynopsisdiv>
<funcsynopsis>
<funcsynopsisinfo>#include &lt;systemd/sd-journal.h&gt;</funcsynopsisinfo>
<funcprototype>
<funcdef>int <function>sd_journal_get_seqnum</function></funcdef>
<paramdef>sd_journal *<parameter>j</parameter></paramdef>
<paramdef>uint64_t *<parameter>ret_seqnum</parameter></paramdef>
<paramdef>sd_id128_t *<parameter>ret_seqnum_id</parameter></paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para><function>sd_journal_get_seqnum()</function> returns the sequence number of the current journal
entry. It takes three arguments: the journal context object, a pointer to a 64-bit unsigned integer to
store the sequence number in, and a buffer to return the 128bit sequence number ID in.</para>
<para>When writing journal entries to disk each <command>systemd-journald</command> instance will number
them sequentially, starting from 1 for the first entry written after subsystem intialization. Each such
series of sequence numbers is associated with a 128bit sequence number ID which is initialized randomly,
once at <command>systemd-journal</command> initialization. Thus, while multiple instances of
<command>systemd-journald</command> will assign the same sequence numbers to their written journal
entries, they will have a distinct sequence number IDs. The sequence number is assigned at the moment of
writing the entry to disk. If log entries are rewritten (for example because the volatile logs from
<filename>/run/log/</filename> are flushed to <filename>/var/log/</filename> via
<filename>systemd-journald-flush.service</filename>) they will get new sequence numbers assigned.</para>
<para>Sequence numbers may be used to order entries (entries associated with the same sequence number ID
and lower sequence numbers should be ordered chronologically before those with higher sequence numbers),
and to detect lost entries. Note that journal service instances typically write to multiple journal files
in parallel (for example because <varname>SplitMode=</varname> is used), in which case each journal file
will only contain a subset of the sequence numbers. To recover the full stream of journal entries the
files must be combined ("interleaved"), a process that primarily relies on the sequence numbers. When
journal files are rotated (due to size or time limits), the series of sequence numbers is continued in
the replacement files. All journal files generated from the same journal instance will carry the same
sequence number ID.</para>
<para>As the sequence numbers are assigned at the moment of writing the journal entries to disk they do
not exist if storage is disabled via <varname>SplitMode=</varname>.</para>
<para>The <varname>ret_seqnum</varname> and <varname>ret_seqnum_id</varname> parameters may be specified
as <constant>NULL</constant> in which case the relevant data is not returned (but the call will otherwise
succeed).</para>
<para>Note that these functions will not work before
<citerefentry><refentrytitle>sd_journal_next</refentrytitle><manvolnum>3</manvolnum></citerefentry>
(or related call) has been called at least
once, in order to position the read pointer at a valid entry.</para>
</refsect1>
<refsect1>
<title>Return Value</title>
<para><function>sd_journal_get_seqnum()</function> returns 0 on success or a negative errno-style error
code..</para>
</refsect1>
<refsect1>
<title>Notes</title>
<xi:include href="threads-aware.xml" xpointer="strict"/>
<xi:include href="libsystemd-pkgconfig.xml" xpointer="pkgconfig-text"/>
</refsect1>
<refsect1>
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd-journal</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_journal_open</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_journal_next</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_journal_get_data</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_journal_get_monotonic_usec</refentrytitle><manvolnum>3</manvolnum></citerefentry>
</para>
</refsect1>
</refentry>

View File

@ -596,6 +596,16 @@
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>__SEQNUM=</varname></term>
<term><varname>__SEQNUM_ID=</varname></term>
<listitem><para>The sequence number (and associated sequence number ID) of this journal entry in the
journal file it originates from. See
<citerefentry><refentrytitle>sd_journal_get_seqnum</refentrytitle><manvolnum>3</manvolnum></citerefentry>
for details.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -99,9 +99,16 @@ int writer_write(Writer *w,
return r;
}
r = journal_file_append_entry(w->journal->file, ts, boot_id,
iovw->iovec, iovw->count,
&w->seqnum, NULL, NULL);
r = journal_file_append_entry(
w->journal->file,
ts,
boot_id,
iovw->iovec,
iovw->count,
&w->seqnum,
/* seqnum_id= */ NULL,
/* ret_object= */ NULL,
/* ret_offset= */ NULL);
if (r >= 0) {
if (w->server)
w->server->event_count += 1;
@ -120,9 +127,15 @@ int writer_write(Writer *w,
return r;
log_debug("Retrying write.");
r = journal_file_append_entry(w->journal->file, ts, boot_id,
iovw->iovec, iovw->count,
&w->seqnum, NULL, NULL);
r = journal_file_append_entry(
w->journal->file,
ts,
boot_id,
iovw->iovec, iovw->count,
&w->seqnum,
/* seqnum_id= */ NULL,
/* ret_object= */ NULL,
/* ret_offset= */ NULL);
if (r < 0)
return r;

View File

@ -423,9 +423,6 @@ finish:
}
int server_open_kernel_seqnum(Server *s) {
_cleanup_close_ int fd = -EBADF;
const char *fn;
uint64_t *p;
int r;
assert(s);
@ -436,26 +433,9 @@ int server_open_kernel_seqnum(Server *s) {
if (!s->dev_kmsg_readable)
return 0;
fn = strjoina(s->runtime_directory, "/kernel-seqnum");
fd = open(fn, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
if (fd < 0) {
log_error_errno(errno, "Failed to open %s, ignoring: %m", fn);
return 0;
}
r = posix_fallocate_loop(fd, 0, sizeof(uint64_t));
if (r < 0) {
log_error_errno(r, "Failed to allocate sequential number file, ignoring: %m");
return 0;
}
p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (p == MAP_FAILED) {
log_error_errno(errno, "Failed to map sequential number file, ignoring: %m");
return 0;
}
s->kernel_seqnum = p;
r = server_map_seqnum_file(s, "kernel-seqnum", sizeof(uint64_t), (void**) &s->kernel_seqnum);
if (r < 0)
return log_error_errno(r, "Failed to map kernel seqnum file: %m");
return 0;
}

View File

@ -889,10 +889,12 @@ static bool shall_try_append_again(JournalFile *f, int r) {
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Montonic clock jumped backwards relative to last journal entry, rotating.", f->path);
return true;
case -EILSEQ: /* seqnum ID last used in the file doesn't match the one we'd passed when writing an entry to it */
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file uses a different sequence number ID, rotating.", f->path);
return true;
case -EAFNOSUPPORT:
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT,
"%s: underlying file system does not support memory mapping or another required file system feature.",
f->path);
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: underlying file system does not support memory mapping or another required file system feature.", f->path);
return false;
default:
@ -900,7 +902,13 @@ static bool shall_try_append_again(JournalFile *f, int r) {
}
}
static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
static void server_write_to_journal(
Server *s,
uid_t uid,
const struct iovec *iovec,
size_t n,
int priority) {
bool vacuumed = false, rotate = false;
struct dual_timestamp ts;
ManagedJournalFile *f;
@ -950,7 +958,15 @@ static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, s
s->last_realtime_clock = ts.realtime;
r = journal_file_append_entry(f->file, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
r = journal_file_append_entry(
f->file,
&ts,
/* boot_id= */ NULL,
iovec, n,
&s->seqnum->seqnum,
&s->seqnum->id,
/* ret_object= */ NULL,
/* ret_offset= */ NULL);
if (r >= 0) {
server_schedule_sync(s, priority);
return;
@ -978,7 +994,15 @@ static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, s
return;
log_debug_errno(r, "Retrying write.");
r = journal_file_append_entry(f->file, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
r = journal_file_append_entry(
f->file,
&ts,
/* boot_id= */ NULL,
iovec, n,
&s->seqnum->seqnum,
&s->seqnum->id,
/* ret_object= */ NULL,
/* ret_offset= */ NULL);
if (r < 0)
log_ratelimit_error_errno(r, FAILED_TO_WRITE_ENTRY_RATELIMIT,
"Failed to write entry to %s (%zu items, %zu bytes) despite vacuuming, ignoring: %m",
@ -1290,7 +1314,13 @@ int server_flush_to_var(Server *s, bool require_flag_file) {
goto finish;
}
r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset);
r = journal_file_copy_entry(
f,
s->system_journal->file,
o,
f->current_offset,
&s->seqnum->seqnum,
&s->seqnum->id);
if (r >= 0)
continue;
@ -1312,7 +1342,13 @@ int server_flush_to_var(Server *s, bool require_flag_file) {
}
log_debug("Retrying write.");
r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset);
r = journal_file_copy_entry(
f,
s->system_journal->file,
o,
f->current_offset,
&s->seqnum->seqnum,
&s->seqnum->id);
if (r < 0) {
log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Can't write entry: %m");
goto finish;
@ -2250,6 +2286,51 @@ static int server_open_varlink(Server *s, const char *socket, int fd) {
return 0;
}
int server_map_seqnum_file(
Server *s,
const char *fname,
size_t size,
void **ret) {
_cleanup_free_ char *fn = NULL;
_cleanup_close_ int fd = -EBADF;
uint64_t *p;
int r;
assert(s);
assert(fname);
assert(size > 0);
assert(ret);
fn = path_join(s->runtime_directory, fname);
if (!fn)
return -ENOMEM;
fd = open(fn, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
if (fd < 0)
return -errno;
r = posix_fallocate_loop(fd, 0, size);
if (r < 0)
return r;
p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (p == MAP_FAILED)
return -errno;
*ret = p;
return 0;
}
void server_unmap_seqnum_file(void *p, size_t size) {
assert(size > 0);
if (!p)
return;
assert_se(munmap(p, size) >= 0);
}
static bool server_is_idle(Server *s) {
assert(s);
@ -2562,6 +2643,10 @@ int server_init(Server *s, const char *namespace) {
if (r < 0)
return r;
r = server_map_seqnum_file(s, "seqnum", sizeof(SeqnumData), (void**) &s->seqnum);
if (r < 0)
return log_error_errno(r, "Failed to map main seqnum file: %m");
r = server_open_kernel_seqnum(s);
if (r < 0)
return r;
@ -2678,8 +2763,8 @@ void server_done(Server *s) {
if (s->ratelimit)
journal_ratelimit_free(s->ratelimit);
if (s->kernel_seqnum)
munmap(s->kernel_seqnum, sizeof(uint64_t));
server_unmap_seqnum_file(s->seqnum, sizeof(*s->seqnum));
server_unmap_seqnum_file(s->kernel_seqnum, sizeof(*s->kernel_seqnum));
free(s->buffer);
free(s->tty_path);

View File

@ -60,6 +60,13 @@ typedef struct JournalStorage {
JournalStorageSpace space;
} JournalStorage;
/* This structure will be kept in $RUNTIME_DIRECTORY/seqnum and is mapped by journald, and is used to
* maintain the sequence number counter with its seqnum ID */
typedef struct SeqnumData {
sd_id128_t id;
uint64_t seqnum;
} SeqnumData;
struct Server {
char *namespace;
@ -93,7 +100,7 @@ struct Server {
ManagedJournalFile *system_journal;
OrderedHashmap *user_journals;
uint64_t seqnum;
SeqnumData *seqnum;
char *buffer;
@ -227,3 +234,6 @@ void server_space_usage_message(Server *s, JournalStorage *storage);
int server_start_or_stop_idle_timer(Server *s);
int server_refresh_idle_timer(Server *s);
int server_map_seqnum_file(Server *s, const char *fname, size_t size, void **ret);
void server_unmap_seqnum_file(void *p, size_t size);

View File

@ -53,7 +53,7 @@ static void test_journal_flush(int argc, char *argv[]) {
log_error_errno(r, "journal_file_move_to_object failed: %m");
assert_se(r >= 0);
r = journal_file_copy_entry(f, new_journal->file, o, f->current_offset);
r = journal_file_copy_entry(f, new_journal->file, o, f->current_offset, NULL, NULL);
if (r < 0)
log_warning_errno(r, "journal_file_copy_entry failed: %m");
assert_se(r >= 0 ||

View File

@ -66,7 +66,7 @@ static void append_number(ManagedJournalFile *f, int n, uint64_t *seqnum) {
assert_se(asprintf(&p, "NUMBER=%d", n) >= 0);
iovec[0] = IOVEC_MAKE_STRING(p);
assert_ret(journal_file_append_entry(f->file, &ts, NULL, iovec, 1, seqnum, NULL, NULL));
assert_ret(journal_file_append_entry(f->file, &ts, NULL, iovec, 1, seqnum, NULL, NULL, NULL));
free(p);
}

View File

@ -103,12 +103,12 @@ static void run_test(void) {
iovec[1] = IOVEC_MAKE(q, strlen(q));
if (i % 10 == 0)
assert_se(journal_file_append_entry(three->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(three->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
else {
if (i % 3 == 0)
assert_se(journal_file_append_entry(two->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(two->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(one->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(one->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
}
free(p);

View File

@ -94,7 +94,7 @@ static int run_test(int argc, char *argv[]) {
iovec = IOVEC_MAKE_STRING(test);
assert_se(journal_file_append_entry(df->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(df->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
free(test);
}

View File

@ -45,13 +45,13 @@ static void test_non_empty_one(void) {
assert_se(sd_id128_randomize(&fake_boot_id) == 0);
iovec = IOVEC_MAKE_STRING(test);
assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
iovec = IOVEC_MAKE_STRING(test2);
assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
iovec = IOVEC_MAKE_STRING(test);
assert_se(journal_file_append_entry(f->file, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(f->file, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
#if HAVE_GCRYPT
journal_file_append_tag(f->file);
@ -199,7 +199,7 @@ static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) {
dual_timestamp_get(&ts);
iovec = IOVEC_MAKE(data, data_size);
assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
#if HAVE_GCRYPT
journal_file_append_tag(f->file);

View File

@ -810,4 +810,5 @@ global:
sd_pidfd_get_unit;
sd_pidfd_get_user_slice;
sd_pidfd_get_user_unit;
sd_journal_get_seqnum;
} LIBSYSTEMD_252;

View File

@ -2094,6 +2094,7 @@ static int journal_file_append_entry_internal(
const EntryItem items[],
size_t n_items,
uint64_t *seqnum,
sd_id128_t *seqnum_id,
Object **ret_object,
uint64_t *ret_offset) {
@ -2139,6 +2140,21 @@ static int journal_file_append_entry_internal(
}
}
if (seqnum_id) {
/* Settle the passed in sequence number ID */
if (sd_id128_is_null(*seqnum_id))
*seqnum_id = f->header->seqnum_id; /* Caller has none assigned, then copy the one from the file */
else if (!sd_id128_equal(*seqnum_id, f->header->seqnum_id)) {
/* Different seqnum IDs? We can't allow entries from multiple IDs end up in the same journal.*/
if (le64toh(f->header->n_entries) == 0)
f->header->seqnum_id = *seqnum_id; /* Caller has one, and file so far has no entries, then copy the one from the caller */
else
return log_debug_errno(SYNTHETIC_ERRNO(EILSEQ),
"Sequence number IDs don't match, refusing entry.");
}
}
osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f));
r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
@ -2290,6 +2306,7 @@ int journal_file_append_entry(
const struct iovec iovec[],
size_t n_iovec,
uint64_t *seqnum,
sd_id128_t *seqnum_id,
Object **ret_object,
uint64_t *ret_offset) {
@ -2376,7 +2393,7 @@ int journal_file_append_entry(
typesafe_qsort(items, n_iovec, entry_item_cmp);
n_iovec = remove_duplicate_entry_items(items, n_iovec);
r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret_object, ret_offset);
r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, seqnum_id, ret_object, ret_offset);
/* If the memory mapping triggered a SIGBUS then we return an
* IO error and ignore the error code passed down to us, since
@ -4093,7 +4110,14 @@ int journal_file_dispose(int dir_fd, const char *fname) {
return 0;
}
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p) {
int journal_file_copy_entry(
JournalFile *from,
JournalFile *to,
Object *o,
uint64_t p,
uint64_t *seqnum,
sd_id128_t *seqnum_id) {
_cleanup_free_ EntryItem *items_alloc = NULL;
EntryItem *items;
uint64_t q, n, xor_hash = 0;
@ -4168,7 +4192,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
return r;
}
r = journal_file_append_entry_internal(to, &ts, boot_id, xor_hash, items, n, NULL, NULL, NULL);
r = journal_file_append_entry_internal(to, &ts, boot_id, xor_hash, items, n, seqnum, seqnum_id, NULL, NULL);
if (mmap_cache_fd_got_sigbus(to->cache_fd))
return -EIO;

View File

@ -259,7 +259,8 @@ int journal_file_append_entry(
const sd_id128_t *boot_id,
const struct iovec iovec[],
size_t n_iovec,
uint64_t *seqno,
uint64_t *seqnum,
sd_id128_t *seqnum_id,
Object **ret_object,
uint64_t *ret_offset);
@ -286,7 +287,7 @@ int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, Object *d, uin
int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, Object *d, uint64_t realtime, direction_t direction, Object **ret_object, uint64_t *ret_offset);
int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, Object *d, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret_object, uint64_t *ret_offset);
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p);
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, sd_id128_t *seqnum_id);
void journal_file_dump(JournalFile *f);
void journal_file_print_header(JournalFile *f);

View File

@ -2232,6 +2232,37 @@ _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id12
return 0;
}
_public_ int sd_journal_get_seqnum(
sd_journal *j,
uint64_t *ret_seqnum,
sd_id128_t *ret_seqnum_id) {
JournalFile *f;
Object *o;
int r;
assert_return(j, -EINVAL);
assert_return(!journal_pid_changed(j), -ECHILD);
f = j->current_file;
if (!f)
return -EADDRNOTAVAIL;
if (f->current_offset <= 0)
return -EADDRNOTAVAIL;
r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
if (r < 0)
return r;
if (ret_seqnum_id)
*ret_seqnum_id = f->header->seqnum_id;
if (ret_seqnum)
*ret_seqnum = le64toh(o->entry.seqnum);
return 0;
}
static bool field_is_valid(const char *field) {
assert(field);

View File

@ -14,6 +14,7 @@
#include "journal-util.h"
#include "parse-util.h"
#include "string-util.h"
#include "strv.h"
#include "unaligned.h"
enum {
@ -217,9 +218,8 @@ static int process_special_field(JournalImporter *imp, char *line) {
assert(line);
value = startswith(line, "__CURSOR=");
if (value)
/* ignore __CURSOR */
if (STARTSWITH_SET(line, "__CURSOR=", "__SEQNUM=", "__SEQNUM_ID="))
/* ignore __CURSOR=, __SEQNUM=, __SEQNUM_ID= which we cannot replicate */
return 1;
value = startswith(line, "__REALTIME_TIMESTAMP=");

View File

@ -791,11 +791,12 @@ static int output_export(
const dual_timestamp *previous_display_ts,
const sd_id128_t *previous_boot_id) {
sd_id128_t journal_boot_id, seqnum_id;
_cleanup_free_ char *cursor = NULL;
const void *data;
size_t length;
usec_t monotonic, realtime;
sd_id128_t journal_boot_id;
const void *data;
uint64_t seqnum;
size_t length;
int r;
assert(j);
@ -818,14 +819,22 @@ static int output_export(
if (r < 0)
return log_error_errno(r, "Failed to get monotonic timestamp: %m");
r = sd_journal_get_seqnum(j, &seqnum, &seqnum_id);
if (r < 0)
return log_error_errno(r, "Failed to get seqnum: %m");
fprintf(f,
"__CURSOR=%s\n"
"__REALTIME_TIMESTAMP="USEC_FMT"\n"
"__MONOTONIC_TIMESTAMP="USEC_FMT"\n"
"__REALTIME_TIMESTAMP=" USEC_FMT "\n"
"__MONOTONIC_TIMESTAMP=" USEC_FMT "\n"
"__SEQNUM=%" PRIu64 "\n"
"__SEQNUM_ID=%s\n"
"_BOOT_ID=%s\n",
cursor,
realtime,
monotonic,
seqnum,
SD_ID128_TO_STRING(seqnum_id),
SD_ID128_TO_STRING(journal_boot_id));
JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
@ -1040,15 +1049,16 @@ static int output_json(
const dual_timestamp *previous_display_ts,
const sd_id128_t *previous_boot_id) {
char usecbuf[DECIMAL_STR_MAX(usec_t)];
char usecbuf[CONST_MAX(DECIMAL_STR_MAX(usec_t), DECIMAL_STR_MAX(uint64_t))];
_cleanup_(json_variant_unrefp) JsonVariant *object = NULL;
sd_id128_t journal_boot_id, seqnum_id;
_cleanup_free_ char *cursor = NULL;
usec_t realtime, monotonic;
JsonVariant **array = NULL;
struct json_data *d;
Hashmap *h = NULL;
uint64_t seqnum;
size_t n = 0;
usec_t realtime, monotonic;
sd_id128_t journal_boot_id;
int r;
assert(j);
@ -1071,6 +1081,10 @@ static int output_json(
if (r < 0)
return log_error_errno(r, "Failed to get monotonic timestamp: %m");
r = sd_journal_get_seqnum(j, &seqnum, &seqnum_id);
if (r < 0)
return log_error_errno(r, "Failed to get seqnum: %m");
h = hashmap_new(&string_hash_ops);
if (!h)
return log_oom();
@ -1093,6 +1107,15 @@ static int output_json(
if (r < 0)
goto finish;
xsprintf(usecbuf, USEC_FMT, seqnum);
r = update_json_data(h, flags, "__SEQNUM", usecbuf, SIZE_MAX);
if (r < 0)
goto finish;
r = update_json_data(h, flags, "__SEQNUM_ID", SD_ID128_TO_STRING(seqnum_id), SIZE_MAX);
if (r < 0)
goto finish;
for (;;) {
const void *data;
size_t size;

View File

@ -99,6 +99,7 @@ int sd_journal_next_skip(sd_journal *j, uint64_t skip);
int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret);
int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id);
int sd_journal_get_seqnum(sd_journal *j, uint64_t *ret_seqnum, sd_id128_t *ret_seqnum_id);
int sd_journal_set_data_threshold(sd_journal *j, size_t sz);
int sd_journal_get_data_threshold(sd_journal *j, size_t *sz);

View File

@ -70,7 +70,7 @@ ID=$(journalctl --new-id128 | sed -n 2p)
printf $'foo' | systemd-cat -t "$ID" --level-prefix false
journalctl --sync
journalctl -b -o export --output-fields=MESSAGE,FOO --output-fields=PRIORITY,MESSAGE -t "$ID" >/output
[[ $(grep -c . /output) -eq 6 ]]
[[ $(grep -c . /output) -eq 8 ]]
grep -q '^__CURSOR=' /output
grep -q '^MESSAGE=foo$' /output
grep -q '^PRIORITY=6$' /output
@ -266,4 +266,13 @@ if is_xattr_supported; then
rm -rf /etc/systemd/system/delegated-cgroup-filtering.service.d
fi
# Check that the seqnum field at least superficially works
systemd-cat echo "ya"
journalctl --sync
SEQNUM1=$(journalctl -o export -n 1 | grep -a __SEQNUM= | cut -d= -f2)
systemd-cat echo "yo"
journalctl --sync
SEQNUM2=$(journalctl -o export -n 1 | grep -a __SEQNUM= | cut -d= -f2)
test "$SEQNUM2" -gt "$SEQNUM1"
touch /testok