habanalabs: refactor HOP functions in MMU V1

Take advantage of the HOPs shift/masks now defined as arrays.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Ohad Sharabi 2022-04-03 11:22:34 +03:00 committed by Greg Kroah-Hartman
parent b31848430f
commit 2ba75d3119

View File

@ -10,6 +10,8 @@
#include <linux/slab.h> #include <linux/slab.h>
#define MMU_V1_MAX_HOPS (MMU_HOP4 + 1)
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
@ -170,51 +172,15 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
return num_of_ptes_left; return num_of_ptes_left;
} }
static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
u64 virt_addr, u64 mask, u64 shift) u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * u64 mask, shift;
((virt_addr & mask) >> shift);
}
static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, mask = mmu_prop->hop_masks[hop_idx];
struct hl_mmu_properties *mmu_prop, shift = mmu_prop->hop_shifts[hop_idx];
u64 hop_addr, u64 vaddr) return hop_addr_arr[hop_idx] +
{ ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP0],
mmu_prop->hop_shifts[MMU_HOP0]);
}
static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP1],
mmu_prop->hop_shifts[MMU_HOP1]);
}
static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP2],
mmu_prop->hop_shifts[MMU_HOP2]);
}
static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP3],
mmu_prop->hop_shifts[MMU_HOP3]);
}
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP4],
mmu_prop->hop_shifts[MMU_HOP4]);
} }
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
@ -516,74 +482,50 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
} }
} }
static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
u64 virt_addr, bool is_dram_addr) u64 virt_addr, bool is_dram_addr)
{ {
u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop; struct hl_mmu_properties *mmu_prop;
u64 hop0_addr = 0, hop0_pte_addr = 0,
hop1_addr = 0, hop1_pte_addr = 0,
hop2_addr = 0, hop2_pte_addr = 0,
hop3_addr = 0, hop3_pte_addr = 0,
hop4_addr = 0, hop4_pte_addr = 0,
curr_pte;
bool is_huge, clear_hop3 = true; bool is_huge, clear_hop3 = true;
int hop_idx;
/* shifts and masks are the same in PMMU and HPMMU, use one of them */ /* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
hop0_addr = get_hop0_addr(ctx); for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {
hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); if (hop_idx == MMU_HOP0) {
hop_addr[hop_idx] = get_hop0_addr(ctx);
} else {
hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr[hop_idx] == ULLONG_MAX)
goto not_mapped;
}
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; hop_pte_addr[hop_idx] =
get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
}
if (hop1_addr == ULLONG_MAX)
goto not_mapped;
hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop2_addr == ULLONG_MAX)
goto not_mapped;
hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop3_addr == ULLONG_MAX)
goto not_mapped;
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
is_huge = curr_pte & mmu_prop->last_mask; is_huge = curr_pte & mmu_prop->last_mask;
if (is_dram_addr && !is_huge) { if (is_dram_addr && !is_huge) {
dev_err(hdev->dev, dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
"DRAM unmapping should use huge pages only\n");
return -EFAULT; return -EFAULT;
} }
if (!is_huge) { if (!is_huge) {
hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); hop_idx = MMU_HOP4;
hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop4_addr == ULLONG_MAX) if (hop_addr[hop_idx] == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, hop_pte_addr[hop_idx] =
virt_addr); get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
clear_hop3 = false; clear_hop3 = false;
} }
@ -605,39 +547,33 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
goto not_mapped; goto not_mapped;
} }
write_final_pte(ctx, hop3_pte_addr, default_pte); hop_idx = MMU_HOP3;
put_pte(ctx, hop3_addr); write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
put_pte(ctx, hop_addr[hop_idx]);
} else { } else {
if (!(curr_pte & PAGE_PRESENT_MASK)) if (!(curr_pte & PAGE_PRESENT_MASK))
goto not_mapped; goto not_mapped;
if (hop4_addr) if (hop_addr[MMU_HOP4])
clear_pte(ctx, hop4_pte_addr); clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
else else
clear_pte(ctx, hop3_pte_addr); clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
if (hop4_addr && !put_pte(ctx, hop4_addr)) if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4]))
clear_hop3 = true; clear_hop3 = true;
if (!clear_hop3) if (!clear_hop3)
goto mapped; goto mapped;
clear_pte(ctx, hop3_pte_addr); for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {
clear_pte(ctx, hop_pte_addr[hop_idx]);
if (put_pte(ctx, hop3_addr)) if (hop_idx == MMU_HOP0)
goto mapped; break;
clear_pte(ctx, hop2_pte_addr); if (put_pte(ctx, hop_addr[hop_idx]))
goto mapped;
if (put_pte(ctx, hop2_addr)) }
goto mapped;
clear_pte(ctx, hop1_pte_addr);
if (put_pte(ctx, hop1_addr))
goto mapped;
clear_pte(ctx, hop0_pte_addr);
} }
mapped: mapped:
@ -650,21 +586,15 @@ not_mapped:
return -EINVAL; return -EINVAL;
} }
static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
u32 page_size, bool is_dram_addr) u32 page_size, bool is_dram_addr)
{ {
u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop; struct hl_mmu_properties *mmu_prop;
u64 hop0_addr = 0, hop0_pte_addr = 0, bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false};
hop1_addr = 0, hop1_pte_addr = 0, int num_hops, hop_idx, prev_hop, rc = -ENOMEM;
hop2_addr = 0, hop2_pte_addr = 0,
hop3_addr = 0, hop3_pte_addr = 0,
hop4_addr = 0, hop4_pte_addr = 0,
curr_pte = 0;
bool hop1_new = false, hop2_new = false, hop3_new = false,
hop4_new = false, is_huge;
int rc = -ENOMEM;
/* /*
* This mapping function can map a page or a huge page. For huge page * This mapping function can map a page or a huge page. For huge page
@ -684,39 +614,21 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
is_huge = false; is_huge = false;
} }
hop0_addr = get_hop0_addr(ctx); num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS;
hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {
if (hop1_addr == ULLONG_MAX) if (hop_idx == MMU_HOP0) {
goto err; hop_addr[hop_idx] = get_hop0_addr(ctx);
} else {
hop_addr[hop_idx] =
get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
if (hop_addr[hop_idx] == ULLONG_MAX)
goto err;
}
hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); hop_pte_addr[hop_idx] =
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
if (hop2_addr == ULLONG_MAX)
goto err;
hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
if (hop3_addr == ULLONG_MAX)
goto err;
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
if (!is_huge) {
hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
if (hop4_addr == ULLONG_MAX)
goto err;
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
} }
if (hdev->dram_default_page_mapping && is_dram_addr) { if (hdev->dram_default_page_mapping && is_dram_addr) {
@ -732,30 +644,22 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
goto err; goto err;
} }
if (hop1_new || hop2_new || hop3_new || hop4_new) { for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
dev_err(hdev->dev, if (hop_new[hop_idx]) {
"DRAM mapping should not allocate more hops\n"); dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n");
rc = -EFAULT; rc = -EFAULT;
goto err; goto err;
}
} }
} else if (curr_pte & PAGE_PRESENT_MASK) { } else if (curr_pte & PAGE_PRESENT_MASK) {
dev_err(hdev->dev, dev_err(hdev->dev,
"mapping already exists for virt_addr 0x%llx\n", "mapping already exists for virt_addr 0x%llx\n",
virt_addr); virt_addr);
dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++)
*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx,
dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", *(u64 *) (uintptr_t) hop_pte_addr[hop_idx],
*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); hop_pte_addr[hop_idx]);
dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
if (!is_huge)
dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
*(u64 *) (uintptr_t) hop4_pte_addr,
hop4_pte_addr);
rc = -EINVAL; rc = -EINVAL;
goto err; goto err;
@ -764,53 +668,28 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
| PAGE_PRESENT_MASK; | PAGE_PRESENT_MASK;
if (is_huge) write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
write_final_pte(ctx, hop3_pte_addr, curr_pte);
else
write_final_pte(ctx, hop4_pte_addr, curr_pte);
if (hop1_new) { for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
curr_pte = prev_hop = hop_idx - 1;
(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
write_pte(ctx, hop0_pte_addr, curr_pte);
}
if (hop2_new) {
curr_pte =
(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
write_pte(ctx, hop1_pte_addr, curr_pte);
get_pte(ctx, hop1_addr);
}
if (hop3_new) {
curr_pte =
(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
write_pte(ctx, hop2_pte_addr, curr_pte);
get_pte(ctx, hop2_addr);
}
if (!is_huge) { if (hop_new[hop_idx]) {
if (hop4_new) { curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
PAGE_PRESENT_MASK; if (hop_idx != MMU_HOP1)
write_pte(ctx, hop3_pte_addr, curr_pte); get_pte(ctx, hop_addr[prev_hop]);
get_pte(ctx, hop3_addr);
} }
get_pte(ctx, hop4_addr);
} else {
get_pte(ctx, hop3_addr);
} }
get_pte(ctx, hop_addr[num_hops - 1]);
return 0; return 0;
err: err:
if (hop4_new) for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {
free_hop(ctx, hop4_addr); if (hop_new[hop_idx])
if (hop3_new) free_hop(ctx, hop_addr[hop_idx]);
free_hop(ctx, hop3_addr); }
if (hop2_new)
free_hop(ctx, hop2_addr);
if (hop1_new)
free_hop(ctx, hop1_addr);
return rc; return rc;
} }
@ -928,8 +807,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
mmu->fini = hl_mmu_v1_fini; mmu->fini = hl_mmu_v1_fini;
mmu->ctx_init = hl_mmu_v1_ctx_init; mmu->ctx_init = hl_mmu_v1_ctx_init;
mmu->ctx_fini = hl_mmu_v1_ctx_fini; mmu->ctx_fini = hl_mmu_v1_ctx_fini;
mmu->map = _hl_mmu_v1_map; mmu->map = hl_mmu_v1_map;
mmu->unmap = _hl_mmu_v1_unmap; mmu->unmap = hl_mmu_v1_unmap;
mmu->flush = flush; mmu->flush = flush;
mmu->swap_out = hl_mmu_v1_swap_out; mmu->swap_out = hl_mmu_v1_swap_out;
mmu->swap_in = hl_mmu_v1_swap_in; mmu->swap_in = hl_mmu_v1_swap_in;