ci/lava: Improve exception handling for job failures

Include detailed error messages when raising exceptions on LAVA job
failures to enhance debugging and error tracking.

Also, handle additional error types by extracting error messages from
metadata and retrying accordingly.

Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32163>
This commit is contained in:
Guilherme Gallo 2024-11-15 02:08:30 -03:00 committed by Marge Bot
parent b2c2f0d187
commit 93ce733425

View File

@ -103,20 +103,22 @@ def raise_exception_from_metadata(metadata: dict, job_id: int) -> None:
if "result" not in metadata or metadata["result"] != "fail":
return
if "error_type" in metadata:
error_type = metadata["error_type"]
if error_type == "Infrastructure":
raise MesaCIRetriableException(
f"LAVA job {job_id} failed with Infrastructure Error. Retry."
)
error_type: str = metadata["error_type"]
error_msg: str = metadata.get("error_msg", "")
full_err_msg: str = error_type if not error_msg else f"{error_type}: {error_msg}"
if error_type == "Job":
# This happens when LAVA assumes that the job cannot terminate or
# with mal-formed job definitions. As we are always validating the
# jobs, only the former is probable to happen. E.g.: When some LAVA
# action timed out more times than expected in job definition.
raise MesaCIRetriableException(
f"LAVA job {job_id} failed with JobError "
f"LAVA job {job_id} failed with {full_err_msg}. Retry."
"(possible LAVA timeout misconfiguration/bug). Retry."
)
if error_type:
raise MesaCIRetriableException(
f"LAVA job {job_id} failed with error type: {full_err_msg}. Retry."
)
if "case" in metadata and metadata["case"] == "validate":
raise MesaCIRetriableException(
f"LAVA job {job_id} failed validation (possible download error). Retry."