mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-24 02:34:17 +08:00
eb1945c3d9
This commit introduces the StructuredLogger module, which provides a robust and flexible logging utility supporting multiple data formats (CSV, JSON, and YAML). By incorporating this module into our CI system, we enhance our log management capabilities, making it easier to: 1. Monitor and analyze logs: The StructuredLogger is a dict-like data abstraction which autosaves into a structured data file, whenever it is updated. With this file, one can easily know specifics of the job execution without having to grep it in the traces logs or exploring the job artifacts. The autosave feature makes it useful even when the CI job fails unexpectedly, since the partial dict is always written back to the disk. 2. Maintain data integrity: The module includes context managers for file locking and editing log data, ensuring data integrity and preventing race conditions. 3. Support multiple formats: With built-in support for CSV, JSON, and YAML formats, this module caters to a wide range of use cases and user preferences. 4. Increase maintainability: The modular design of the StructuredLogger and its corresponding strategies simplifies maintenance and allows for seamless integration of additional formats in the future. Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22500>
295 lines
8.7 KiB
Python
295 lines
8.7 KiB
Python
"""
|
|
A structured logging utility supporting multiple data formats such as CSV, JSON,
|
|
and YAML.
|
|
|
|
The main purpose of this script, besides having relevant information available
|
|
in a condensed and deserialized.
|
|
|
|
This script defines a protocol for different file handling strategies and provides
|
|
implementations for CSV, JSON, and YAML formats. The main class, StructuredLogger,
|
|
allows for easy interaction with log data, enabling users to load, save, increment,
|
|
set, and append fields in the log. The script also includes context managers for
|
|
file locking and editing log data to ensure data integrity and avoid race conditions.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from collections.abc import MutableMapping, MutableSequence
|
|
from contextlib import contextmanager
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, Protocol
|
|
|
|
import fire
|
|
from filelock import FileLock
|
|
|
|
try:
|
|
import polars as pl
|
|
|
|
CSV_LIB_EXCEPTION = None
|
|
except ImportError as e:
|
|
CSV_LIB_EXCEPTION: ImportError = e
|
|
|
|
try:
|
|
from ruamel.yaml import YAML
|
|
|
|
YAML_LIB_EXCEPTION = None
|
|
except ImportError as e:
|
|
YAML_LIB_EXCEPTION: ImportError = e
|
|
|
|
|
|
class ContainerProxy:
|
|
"""
|
|
A proxy class that wraps a mutable container object (such as a dictionary or
|
|
a list) and calls a provided save_callback function whenever the container
|
|
or its contents
|
|
are changed.
|
|
"""
|
|
def __init__(self, container, save_callback):
|
|
self.container = container
|
|
self.save_callback = save_callback
|
|
|
|
def __getitem__(self, key):
|
|
value = self.container[key]
|
|
if isinstance(value, (MutableMapping, MutableSequence)):
|
|
return ContainerProxy(value, self.save_callback)
|
|
return value
|
|
|
|
def __setitem__(self, key, value):
|
|
self.container[key] = value
|
|
self.save_callback()
|
|
|
|
def __delitem__(self, key):
|
|
del self.container[key]
|
|
self.save_callback()
|
|
|
|
def __getattr__(self, name):
|
|
attr = getattr(self.container, name)
|
|
|
|
if callable(attr):
|
|
def wrapper(*args, **kwargs):
|
|
result = attr(*args, **kwargs)
|
|
self.save_callback()
|
|
return result
|
|
|
|
return wrapper
|
|
return attr
|
|
|
|
def __iter__(self):
|
|
return iter(self.container)
|
|
|
|
def __len__(self):
|
|
return len(self.container)
|
|
|
|
def __repr__(self):
|
|
return repr(self.container)
|
|
|
|
|
|
class AutoSaveDict(dict):
|
|
"""
|
|
A subclass of the built-in dict class with additional functionality to
|
|
automatically save changes to the dictionary. It maintains a timestamp of
|
|
the last modification and automatically wraps nested mutable containers
|
|
using ContainerProxy.
|
|
"""
|
|
timestamp_key = "_timestamp"
|
|
|
|
def __init__(self, *args, save_callback, register_timestamp=True, **kwargs):
|
|
self.save_callback = save_callback
|
|
self.__register_timestamp = register_timestamp
|
|
self.__heartbeat()
|
|
super().__init__(*args, **kwargs)
|
|
self.__wrap_dictionaries()
|
|
|
|
def __heartbeat(self):
|
|
if self.__register_timestamp:
|
|
self[AutoSaveDict.timestamp_key] = datetime.now().isoformat()
|
|
|
|
def __save(self):
|
|
self.__heartbeat()
|
|
self.save_callback()
|
|
|
|
def __wrap_dictionaries(self):
|
|
for key, value in self.items():
|
|
if isinstance(value, MutableMapping) and not isinstance(
|
|
value, AutoSaveDict
|
|
):
|
|
self[key] = AutoSaveDict(
|
|
value, save_callback=self.save_callback, register_timestamp=False
|
|
)
|
|
|
|
def __setitem__(self, key, value):
|
|
if isinstance(value, MutableMapping) and not isinstance(value, AutoSaveDict):
|
|
value = AutoSaveDict(
|
|
value, save_callback=self.save_callback, register_timestamp=False
|
|
)
|
|
super().__setitem__(key, value)
|
|
|
|
if self.__register_timestamp and key == AutoSaveDict.timestamp_key:
|
|
return
|
|
self.__save()
|
|
|
|
def __getitem__(self, key):
|
|
value = super().__getitem__(key)
|
|
if isinstance(value, (MutableMapping, MutableSequence)):
|
|
return ContainerProxy(value, self.__save)
|
|
return value
|
|
|
|
def __delitem__(self, key):
|
|
super().__delitem__(key)
|
|
self.__save()
|
|
|
|
def pop(self, *args, **kwargs):
|
|
result = super().pop(*args, **kwargs)
|
|
self.__save()
|
|
return result
|
|
|
|
def update(self, *args, **kwargs):
|
|
super().update(*args, **kwargs)
|
|
self.__wrap_dictionaries()
|
|
self.__save()
|
|
|
|
|
|
class StructuredLoggerStrategy(Protocol):
|
|
def load_data(self, file_path: Path) -> dict:
|
|
pass
|
|
|
|
def save_data(self, file_path: Path, data: dict) -> None:
|
|
pass
|
|
|
|
|
|
class CSVStrategy:
|
|
def __init__(self) -> None:
|
|
if CSV_LIB_EXCEPTION:
|
|
raise RuntimeError(
|
|
"Can't parse CSV files. Missing library"
|
|
) from CSV_LIB_EXCEPTION
|
|
|
|
def load_data(self, file_path: Path) -> dict:
|
|
dicts: list[dict[str, Any]] = pl.read_csv(
|
|
file_path, try_parse_dates=True
|
|
).to_dicts()
|
|
data = {}
|
|
for d in dicts:
|
|
for k, v in d.items():
|
|
if k != AutoSaveDict.timestamp_key and k in data:
|
|
if isinstance(data[k], list):
|
|
data[k].append(v)
|
|
continue
|
|
data[k] = [data[k], v]
|
|
else:
|
|
data[k] = v
|
|
return data
|
|
|
|
def save_data(self, file_path: Path, data: dict) -> None:
|
|
pl.DataFrame(data).write_csv(file_path)
|
|
|
|
|
|
class JSONStrategy:
|
|
def load_data(self, file_path: Path) -> dict:
|
|
return json.loads(file_path.read_text())
|
|
|
|
def save_data(self, file_path: Path, data: dict) -> None:
|
|
with open(file_path, "w") as f:
|
|
json.dump(data, f, indent=2)
|
|
|
|
|
|
class YAMLStrategy:
|
|
def __init__(self):
|
|
if YAML_LIB_EXCEPTION:
|
|
raise RuntimeError(
|
|
"Can't parse YAML files. Missing library"
|
|
) from YAML_LIB_EXCEPTION
|
|
self.yaml = YAML()
|
|
self.yaml.indent(sequence=4, offset=2)
|
|
self.yaml.default_flow_style = False
|
|
self.yaml.representer.add_representer(AutoSaveDict, self.represent_dict)
|
|
|
|
@classmethod
|
|
def represent_dict(cls, dumper, data):
|
|
return dumper.represent_mapping("tag:yaml.org,2002:map", data)
|
|
|
|
def load_data(self, file_path: Path) -> dict:
|
|
return self.yaml.load(file_path.read_text())
|
|
|
|
def save_data(self, file_path: Path, data: dict) -> None:
|
|
with open(file_path, "w") as f:
|
|
self.yaml.dump(data, f)
|
|
|
|
|
|
class StructuredLogger:
|
|
def __init__(
|
|
self, file_name: str, strategy: StructuredLoggerStrategy = None, truncate=False
|
|
):
|
|
self.file_name: str = file_name
|
|
self.file_path = Path(self.file_name)
|
|
self._data: AutoSaveDict = AutoSaveDict(save_callback=self.save_data)
|
|
|
|
if strategy is None:
|
|
self.strategy: StructuredLoggerStrategy = self.guess_strategy_from_file(
|
|
self.file_path
|
|
)
|
|
else:
|
|
self.strategy = strategy
|
|
|
|
if not self.file_path.exists():
|
|
Path.mkdir(self.file_path.parent, exist_ok=True)
|
|
self.save_data()
|
|
return
|
|
|
|
if truncate:
|
|
with self.get_lock():
|
|
os.truncate(self.file_path, 0)
|
|
self.save_data()
|
|
|
|
def load_data(self):
|
|
self._data = self.strategy.load_data(self.file_path)
|
|
|
|
def save_data(self):
|
|
self.strategy.save_data(self.file_path, self._data)
|
|
|
|
@property
|
|
def data(self) -> AutoSaveDict:
|
|
return self._data
|
|
|
|
@contextmanager
|
|
def get_lock(self):
|
|
with FileLock(f"{self.file_path}.lock", timeout=10):
|
|
yield
|
|
|
|
@contextmanager
|
|
def edit_context(self):
|
|
"""
|
|
Context manager that ensures proper loading and saving of log data when
|
|
performing multiple modifications.
|
|
"""
|
|
with self.get_lock():
|
|
try:
|
|
self.load_data()
|
|
yield
|
|
finally:
|
|
self.save_data()
|
|
|
|
@staticmethod
|
|
def guess_strategy_from_file(file_path: Path) -> StructuredLoggerStrategy:
|
|
file_extension = file_path.suffix.lower().lstrip(".")
|
|
return StructuredLogger.get_strategy(file_extension)
|
|
|
|
@staticmethod
|
|
def get_strategy(strategy_name: str) -> StructuredLoggerStrategy:
|
|
strategies = {
|
|
"csv": CSVStrategy,
|
|
"json": JSONStrategy,
|
|
"yaml": YAMLStrategy,
|
|
"yml": YAMLStrategy,
|
|
}
|
|
|
|
try:
|
|
return strategies[strategy_name]()
|
|
except KeyError as e:
|
|
raise ValueError(f"Unknown strategy for: {strategy_name}") from e
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fire.Fire(StructuredLogger)
|