From f4301626cd284f7f52efd995fe3f794c53b7547f Mon Sep 17 00:00:00 2001 From: Guilherme Gallo Date: Fri, 15 Nov 2024 13:57:54 -0300 Subject: [PATCH] ci/lava: Uprev freezegun The former version was presenting some bugs running fixtures in parallel testing. With the new version, we need to change the side effects on mocks a little bit and fix a start time to make the tests reproducible. Signed-off-by: Guilherme Gallo Part-of: --- .gitlab-ci/tests/conftest.py | 9 ++++--- .gitlab-ci/tests/lava/helpers.py | 8 +++--- .gitlab-ci/tests/test_lava_job_submitter.py | 18 ++++++++----- .gitlab-ci/tests/utils/test_lava_log.py | 30 ++++++++++----------- bin/ci/test/requirements.txt | 2 +- 5 files changed, 37 insertions(+), 30 deletions(-) diff --git a/.gitlab-ci/tests/conftest.py b/.gitlab-ci/tests/conftest.py index ba021cc4719..2edc55a2282 100644 --- a/.gitlab-ci/tests/conftest.py +++ b/.gitlab-ci/tests/conftest.py @@ -25,7 +25,7 @@ def mock_sleep(): @pytest.fixture def frozen_time(mock_sleep): - with freeze_time() as frozen_time: + with freeze_time("2024-01-01") as frozen_time: yield frozen_time @@ -34,6 +34,10 @@ RESULT_GET_TESTJOB_RESULTS = [{"metadata": {"result": "test"}}] @pytest.fixture def mock_proxy(frozen_time): + def mock_job_state(jid) -> dict[str, str]: + frozen_time.tick(1) + return {"job_state": "Running"} + def create_proxy_mock( job_results=RESULT_GET_TESTJOB_RESULTS, testsuite_results=[generate_testsuite_result()], @@ -53,8 +57,7 @@ def mock_proxy(frozen_time): proxy_logs_mock.return_value = jobs_logs_response() proxy_job_state = proxy_mock.scheduler.job_state - proxy_job_state.return_value = {"job_state": "Running"} - proxy_job_state.side_effect = frozen_time.tick(1) + proxy_job_state.side_effect = mock_job_state proxy_show_mock = proxy_mock.scheduler.jobs.show proxy_show_mock.return_value = defaultdict( diff --git a/.gitlab-ci/tests/lava/helpers.py b/.gitlab-ci/tests/lava/helpers.py index 82d8c81ef23..60ed6ee2985 100644 --- a/.gitlab-ci/tests/lava/helpers.py +++ b/.gitlab-ci/tests/lava/helpers.py @@ -1,5 +1,5 @@ from contextlib import nullcontext as does_not_raise -from datetime import datetime +from datetime import UTC, datetime from io import StringIO from itertools import cycle from typing import Any, Callable, Generator, Iterable, Optional, Tuple, Union @@ -46,7 +46,7 @@ def generate_testsuite_result( def jobs_logs_response( finished=False, msg=None, lvl="target", result=None, exit_code=None ) -> Tuple[bool, str]: - timed_msg = {"dt": str(datetime.now()), "msg": "New message", "lvl": lvl} + timed_msg = {"dt": str(datetime.now(tz=UTC)), "msg": "New message", "lvl": lvl} if result: timed_msg["lvl"] = "target" timed_msg["msg"] = f"hwci: mesa: {result}, exit_code: {exit_code}" @@ -102,7 +102,7 @@ def generate_n_logs( else: tick_gen = cycle((tick_fn,)) - with freeze_time(datetime.now()) as time_travel: + with freeze_time(datetime.now(tz=UTC)) as time_travel: tick_sec: int = next(tick_gen) while True: # Simulate a scenario where the target job is waiting for being started @@ -128,7 +128,7 @@ def to_iterable(tick_fn): def mock_logs(messages=None, result=None, exit_code=None): if messages is None: messages = {} - with freeze_time(datetime.now()) as time_travel: + with freeze_time(datetime.now(tz=UTC)) as time_travel: # Simulate a complete run given by message_fn for msg, tick_list in section_aware_message_generator(messages, result, exit_code): for tick_sec in tick_list: diff --git a/.gitlab-ci/tests/test_lava_job_submitter.py b/.gitlab-ci/tests/test_lava_job_submitter.py index cd08da6b488..f4846b3b073 100644 --- a/.gitlab-ci/tests/test_lava_job_submitter.py +++ b/.gitlab-ci/tests/test_lava_job_submitter.py @@ -8,7 +8,7 @@ import os import xmlrpc.client from contextlib import nullcontext as does_not_raise -from datetime import datetime +from datetime import UTC, datetime from itertools import cycle, islice, repeat from pathlib import Path from typing import Generator @@ -43,11 +43,14 @@ NUMBER_OF_MAX_ATTEMPTS = NUMBER_OF_RETRIES_TIMEOUT_DETECTION + 1 @pytest.fixture def mock_proxy_waiting_time(mock_proxy): def update_mock_proxy(frozen_time, **kwargs): + def mock_job_state(jid) -> dict[str, str]: + frozen_time.tick(wait_time) + return {"job_state": "Running"} + wait_time = kwargs.pop("wait_time", 1) proxy_mock = mock_proxy(**kwargs) proxy_job_state = proxy_mock.scheduler.job_state - proxy_job_state.return_value = {"job_state": "Running"} - proxy_job_state.side_effect = frozen_time.tick(wait_time) + proxy_job_state.side_effect = mock_job_state return proxy_mock @@ -261,7 +264,7 @@ def test_simulate_a_long_wait_to_start_a_job( side_effect, mock_proxy_waiting_time, ): - start_time = datetime.now() + start_time = datetime.now(tz=UTC) job: LAVAJob = retriable_follow_job( mock_proxy_waiting_time( frozen_time, side_effect=side_effect, wait_time=wait_time @@ -271,7 +274,7 @@ def test_simulate_a_long_wait_to_start_a_job( None ) - end_time = datetime.now() + end_time = datetime.now(tz=UTC) delta_time = end_time - start_time assert job.status == "pass" @@ -292,6 +295,7 @@ LONG_LAVA_QUEUE_SCENARIOS = { ) def test_wait_for_job_get_started_no_time_to_run(monkeypatch, job_timeout, expectation): monkeypatch.setattr("lava.lava_job_submitter.CI_JOB_TIMEOUT_SEC", job_timeout) + monkeypatch.setattr("lava.lava_job_submitter.CI_JOB_STARTED_AT", datetime.now(tz=UTC)) job = MagicMock() # Make it escape the loop job.is_started.side_effect = (False, False, True) @@ -439,14 +443,14 @@ def test_full_yaml_log(mock_proxy, frozen_time, lava_job_submitter): proxy.scheduler.jobs.submit = reset_logs try: time_travel_to_test_time() - start_time = datetime.now() + start_time = datetime.now(tz=UTC) retriable_follow_job(proxy, "", "", None) finally: try: # If the job fails, maybe there will be no structured log print(lava_job_submitter.structured_log_file.read_text()) finally: - end_time = datetime.now() + end_time = datetime.now(tz=UTC) print("---- Reproduction log stats ----") print(f"Start time: {start_time}") print(f"End time: {end_time}") diff --git a/.gitlab-ci/tests/utils/test_lava_log.py b/.gitlab-ci/tests/utils/test_lava_log.py index e42a9bef692..9d9f6d9713d 100644 --- a/.gitlab-ci/tests/utils/test_lava_log.py +++ b/.gitlab-ci/tests/utils/test_lava_log.py @@ -5,7 +5,7 @@ # # SPDX-License-Identifier: MIT -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta import pytest from lava.exceptions import MesaCIKnownIssueException, MesaCITimeoutError @@ -76,35 +76,35 @@ def test_gitlab_section(method, collapsed, expectation): def test_gl_sections(): lines = [ { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "debug", "msg": "Received signal: 0_setup-ssh-server 10145749_1.3.2.3.1", }, { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "debug", "msg": "Received signal: 0_mesa 5971831_1.3.2.3.1", }, # Redundant log message which triggers the same Gitlab Section, it # should be ignored, unless the id is different { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "target", "msg": "[ 7.778836] ", }, { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "debug", "msg": "Received signal: mesa-ci_iris-kbl-traces", }, # Another redundant log message { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "target", "msg": "[ 16.997829] ", }, { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "target", "msg": "", }, @@ -140,12 +140,12 @@ def test_gl_sections(): def test_log_follower_flush(): lines = [ { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "debug", "msg": "Received signal: mesa-ci_iris-kbl-traces", }, { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "target", "msg": "", }, @@ -292,7 +292,7 @@ WATCHDOG_SCENARIOS = { def test_log_follower_watchdog(frozen_time, timedelta_kwargs, exception): lines = [ { - "dt": datetime.now(), + "dt": datetime.now(tz=UTC), "lvl": "debug", "msg": "Received signal: mesa-ci_iris-kbl-traces", }, @@ -401,7 +401,7 @@ def test_detect_a6xx_gpu_recovery_failure(frozen_time): log_follower = LogFollower() lava_log_hints = LAVALogHints(log_follower=log_follower) failure_message = { - "dt": datetime.now().isoformat(), + "dt": datetime.now(tz=UTC).isoformat(), "msg": A6XX_GPU_RECOVERY_FAILURE_MESSAGE[0], "lvl": "feedback", } @@ -410,13 +410,13 @@ def test_detect_a6xx_gpu_recovery_failure(frozen_time): lava_log_hints.detect_a6xx_gpu_recovery_failure(failure_message) # Simulate the passage of time within the watch period frozen_time.tick(1) - failure_message["dt"] = datetime.now().isoformat() + failure_message["dt"] = datetime.now(tz=UTC).isoformat() def test_detect_a6xx_gpu_recovery_success(frozen_time): log_follower = LogFollower() lava_log_hints = LAVALogHints(log_follower=log_follower) failure_message = { - "dt": datetime.now().isoformat(), + "dt": datetime.now(tz=UTC).isoformat(), "msg": A6XX_GPU_RECOVERY_FAILURE_MESSAGE[0], "lvl": "feedback", } @@ -424,12 +424,12 @@ def test_detect_a6xx_gpu_recovery_success(frozen_time): for _ in range(A6XX_GPU_RECOVERY_FAILURE_MAX_COUNT - 1): lava_log_hints.detect_a6xx_gpu_recovery_failure(failure_message) frozen_time.tick(1) - failure_message["dt"] = datetime.now().isoformat() + failure_message["dt"] = datetime.now(tz=UTC).isoformat() # Simulate the passage of time outside of the watch period frozen_time.tick(60 * A6XX_GPU_RECOVERY_WATCH_PERIOD_MIN + 1) failure_message = { - "dt": datetime.now().isoformat(), + "dt": datetime.now(tz=UTC).isoformat(), "msg": A6XX_GPU_RECOVERY_FAILURE_MESSAGE[1], "lvl": "feedback", } diff --git a/bin/ci/test/requirements.txt b/bin/ci/test/requirements.txt index 1576c0c7d15..36542d5c4b3 100644 --- a/bin/ci/test/requirements.txt +++ b/bin/ci/test/requirements.txt @@ -1,7 +1,7 @@ -r ../requirements.txt filelock==3.12.4 fire==0.5.0 -freezegun==1.1.0 +freezegun==1.5.1 hypothesis==6.67.1 mock==5.1.0 polars==0.19.3