From 93ce73342537289f8ee9ec08674d4f56ee8770c9 Mon Sep 17 00:00:00 2001 From: Guilherme Gallo Date: Fri, 15 Nov 2024 02:08:30 -0300 Subject: [PATCH] ci/lava: Improve exception handling for job failures Include detailed error messages when raising exceptions on LAVA job failures to enhance debugging and error tracking. Also, handle additional error types by extracting error messages from metadata and retrying accordingly. Signed-off-by: Guilherme Gallo Part-of: --- .gitlab-ci/lava/lava_job_submitter.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py index d436a9f5a8f..fe9988f29fc 100755 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ b/.gitlab-ci/lava/lava_job_submitter.py @@ -103,20 +103,22 @@ def raise_exception_from_metadata(metadata: dict, job_id: int) -> None: if "result" not in metadata or metadata["result"] != "fail": return if "error_type" in metadata: - error_type = metadata["error_type"] - if error_type == "Infrastructure": - raise MesaCIRetriableException( - f"LAVA job {job_id} failed with Infrastructure Error. Retry." - ) + error_type: str = metadata["error_type"] + error_msg: str = metadata.get("error_msg", "") + full_err_msg: str = error_type if not error_msg else f"{error_type}: {error_msg}" if error_type == "Job": # This happens when LAVA assumes that the job cannot terminate or # with mal-formed job definitions. As we are always validating the # jobs, only the former is probable to happen. E.g.: When some LAVA # action timed out more times than expected in job definition. raise MesaCIRetriableException( - f"LAVA job {job_id} failed with JobError " + f"LAVA job {job_id} failed with {full_err_msg}. Retry." "(possible LAVA timeout misconfiguration/bug). Retry." ) + if error_type: + raise MesaCIRetriableException( + f"LAVA job {job_id} failed with error type: {full_err_msg}. Retry." + ) if "case" in metadata and metadata["case"] == "validate": raise MesaCIRetriableException( f"LAVA job {job_id} failed validation (possible download error). Retry."