accel/habanalabs: return tlb inv error code upon failure

Now that CQ-completion based jobs do not trigger a reset upon failure,
failure of such jobs (e.g., MMU cache invalidation) should be handled
by the caller itself depending on the error code returned to it.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Koby Elbaz 2023-03-07 10:13:44 +02:00 committed by Oded Gabbay
parent 60d7bbb5b4
commit af5e675f13
2 changed files with 44 additions and 19 deletions

View File

@ -3725,7 +3725,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"failed to set hop0 addr for asid %d\n", i); "failed to set hop0 addr for asid %d\n", i);
goto err; return rc;
} }
} }
@ -3736,7 +3736,9 @@ static int gaudi_mmu_init(struct hl_device *hdev)
/* mem cache invalidation */ /* mem cache invalidation */
WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
hl_mmu_invalidate_cache(hdev, true, 0); rc = hl_mmu_invalidate_cache(hdev, true, 0);
if (rc)
return rc;
WREG32(mmMMU_UP_MMU_ENABLE, 1); WREG32(mmMMU_UP_MMU_ENABLE, 1);
WREG32(mmMMU_UP_SPI_MASK, 0xF); WREG32(mmMMU_UP_SPI_MASK, 0xF);
@ -3752,9 +3754,6 @@ static int gaudi_mmu_init(struct hl_device *hdev)
gaudi->hw_cap_initialized |= HW_CAP_MMU; gaudi->hw_cap_initialized |= HW_CAP_MMU;
return 0; return 0;
err:
return rc;
} }
static int gaudi_load_firmware_to_device(struct hl_device *hdev) static int gaudi_load_firmware_to_device(struct hl_device *hdev)
@ -8420,19 +8419,26 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
} }
mutex_lock(&hdev->mmu_lock); mutex_lock(&hdev->mmu_lock);
rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
hdev->internal_cb_pool_dma_addr, hdev->internal_cb_pool_dma_addr,
HOST_SPACE_INTERNAL_CB_SZ); HOST_SPACE_INTERNAL_CB_SZ);
hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
mutex_unlock(&hdev->mmu_lock);
if (rc) if (rc)
goto unreserve_internal_cb_pool; goto unreserve_internal_cb_pool;
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
if (rc)
goto unmap_internal_cb_pool;
mutex_unlock(&hdev->mmu_lock);
return 0; return 0;
unmap_internal_cb_pool:
hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
HOST_SPACE_INTERNAL_CB_SZ);
unreserve_internal_cb_pool: unreserve_internal_cb_pool:
mutex_unlock(&hdev->mmu_lock);
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
HOST_SPACE_INTERNAL_CB_SZ); HOST_SPACE_INTERNAL_CB_SZ);
destroy_internal_cb_pool: destroy_internal_cb_pool:

View File

@ -10239,16 +10239,23 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
/* Create mapping on asic side */ /* Create mapping on asic side */
mutex_lock(&hdev->mmu_lock); mutex_lock(&hdev->mmu_lock);
rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
hl_mmu_invalidate_cache_range(hdev, false,
MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, reserved_va_base, SZ_2M);
mutex_unlock(&hdev->mmu_lock);
if (rc) { if (rc) {
dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
goto unreserve_va; goto unreserve_va;
} }
rc = hl_mmu_invalidate_cache_range(hdev, false,
MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, reserved_va_base, SZ_2M);
if (rc) {
hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
goto unreserve_va;
}
mutex_unlock(&hdev->mmu_lock);
/* Enable MMU on KDMA */ /* Enable MMU on KDMA */
gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
@ -10277,11 +10284,16 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
mutex_lock(&hdev->mmu_lock); mutex_lock(&hdev->mmu_lock);
hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
if (rc)
goto unreserve_va;
rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
ctx->asid, reserved_va_base, SZ_2M); ctx->asid, reserved_va_base, SZ_2M);
mutex_unlock(&hdev->mmu_lock);
unreserve_va: unreserve_va:
mutex_unlock(&hdev->mmu_lock);
hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
free_data_buffer: free_data_buffer:
hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr); hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
@ -10334,17 +10346,24 @@ static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *c
} }
mutex_lock(&hdev->mmu_lock); mutex_lock(&hdev->mmu_lock);
rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
HOST_SPACE_INTERNAL_CB_SZ); HOST_SPACE_INTERNAL_CB_SZ);
hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
mutex_unlock(&hdev->mmu_lock);
if (rc) if (rc)
goto unreserve_internal_cb_pool; goto unreserve_internal_cb_pool;
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
if (rc)
goto unmap_internal_cb_pool;
mutex_unlock(&hdev->mmu_lock);
return 0; return 0;
unmap_internal_cb_pool:
hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
unreserve_internal_cb_pool: unreserve_internal_cb_pool:
mutex_unlock(&hdev->mmu_lock);
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
destroy_internal_cb_pool: destroy_internal_cb_pool:
gen_pool_destroy(hdev->internal_cb_pool); gen_pool_destroy(hdev->internal_cb_pool);