drm for 6.7-rc1

kernel: - add initial vmemdup-user-array core: - fix platform remove() to return void - drm_file owner updated to reflect owner - move size calcs to drm buddy allocator - let GPUVM build as a module - allow variable number of run-queues in scheduler edid: - handle bad h/v sync_end in EDIDs panfrost: - add Boris as maintainer fbdev: - use fb_ops helpers more - only allow logo use from fbcon - rename fb_pgproto to pgprot_framebuffer - add HPD state to drm_connector_oob_hotplug_event - convert to fbdev i/o mem helpers i915: - Enable meteorlake by default - Early Xe2 LPD/Lunarlake display enablement - Rework subplatforms into IP version checks - GuC based TLB invalidation for Meteorlake - Display rework for future Xe driver integration - LNL FBC features - LNL display feature capability reads - update recommended fw versions for DG2+ - drop fastboot module parameter - added deviceid for Arrowlake-S - drop preproduction workarounds - don't disable preemption for resets - cleanup inlines in headers - PXP firmware loading fix - Fix sg list lengths - DSC PPS state readout/verification - Add more RPL P/U PCI IDs - Add new DG2-G12 stepping - DP enhanced framing support to state checker - Improve shared link bandwidth management - stop using GEM macros in display code - refactor related code into display code - locally enable W=1 warnings - remove PSR watchdog timers on LNL amdgpu: - RAS/FRU EEPROM updatse - IP discovery updatses - GC 11.5 support - DCN 3.5 support - VPE 6.1 support - NBIO 7.11 support - DML2 support - lots of IP updates - use flexible arrays for bo list handling - W=1 fixes - Enable seamless boot in more cases - Enable context type property for HDMI - Rework GPUVM TLB flushing - VCN IB start/size alignment fixes amdkfd: - GC 10/11 fixes - GC 11.5 support - use partial migration in GPU faults radeon: - W=1 Fixes - fix some possible buffer overflow/NULL derefs nouveau: - update uapi for NO_PREFETCH - scheduler/fence fixes - rework suspend/resume for GSP-RM - rework display in preparation for GSP-RM habanalabs: - uapi: expose tsc clock - uapi: block access to eventfd through control device - uapi: force dma-buf export to PAGE_SIZE alignments - complete move to accel subsystem - move firmware interface include files - perform hard reset on PCIe AXI drain event - optimise user interrupt handling msm: - DP: use existing helpers for DPCD - DPU: interrupts reworked - gpu: a7xx (a730/a740) support - decouple msm_drv from kms for headless devices mediatek: - MT8188 dsi/dp/edp support - DDP GAMMA - 12 bit LUT support - connector dynamic selection capability rockchip: - rv1126 mipi-dsi/vop support - add planar formats ast: - rename constants panels: - Mitsubishi AA084XE01 - JDI LPM102A188A - LTK050H3148W-CTA6 ivpu: - power management fixes qaic: - add detach slice bo api komeda: - add NV12 writeback tegra: - support NVSYNC/NHSYNC - host1x suspend fixes ili9882t: - separate into own driver -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmVAgzYACgkQDHTzWXnE hr7ZEQ//UXne3tyGOsU3X8r+lstLFDMa90a3hvTg6hX+Q0MjHd/clwkKFkLpkipL n7gIZlaHl11dRs0FzrIZA5EVAAgjMLKmIl10NBDFec6ZFA3VERcggx8y61uifI15 VviMR1VbLHYZaCdyrQOK0A4wcktWnKXyoXp7cwy9crdc2GOBMUZkdIqtvD7jHxQx UMIFnzi1CyKUX/Fjt/JceYcNk9y2ZGkzakYO3sHcUdv4DPu9qX4kNzpjF691AZBP UeKWvCswTRVg2M0kuo/RYIBzqaTmOlk6dHLWBognIeZPyuyhCcaGC2d64c6tShwQ dtHdi+IgyQ8s2qb350ymKTQUP7xA/DfZBwH7LvrZALBxeQGYQN1CnsgDMOS2wcUc XrRFiS7PxEOtMMBctcPBnnoV5ttnsLLlPpzM9puh9sUFMn6CgLzcAMqXdqxzMajH +dz2aD1N0vMqq4varozOg9SC2QamgUiPN/TQfrulhCTCfQaXczy5x1OYiIz65+Sl mKoe2WASuP9Ve8do4N/wEwH5SZY2ItipBdUTRxttY9NTanmV0X5DjZBXH5b9XGci Zl5Ar613f9zwm5T5BVA5k6s3ZbGY6QcP5pDNTCPaSgitfFXIdReBZ2CaYzK3MPg/ Wit/TXrud9yT6VPpI1igboMyasf5QubV1MY1K83kOCWr9u8R2CM= =l79u -----END PGP SIGNATURE----- Merge tag 'drm-next-2023-10-31-1' of git://anongit.freedesktop.org/drm/drm Pull drm updates from Dave Airlie: "Highlights: - AMD adds some more upcoming HW platforms - Intel made Meteorlake stable and started adding Lunarlake - nouveau has a bunch of display rework in prepartion for the NVIDIA GSP firmware support - msm adds a7xx support - habanalabs has finished migration to accel subsystem Detail summary: kernel: - add initial vmemdup-user-array core: - fix platform remove() to return void - drm_file owner updated to reflect owner - move size calcs to drm buddy allocator - let GPUVM build as a module - allow variable number of run-queues in scheduler edid: - handle bad h/v sync_end in EDIDs panfrost: - add Boris as maintainer fbdev: - use fb_ops helpers more - only allow logo use from fbcon - rename fb_pgproto to pgprot_framebuffer - add HPD state to drm_connector_oob_hotplug_event - convert to fbdev i/o mem helpers i915: - Enable meteorlake by default - Early Xe2 LPD/Lunarlake display enablement - Rework subplatforms into IP version checks - GuC based TLB invalidation for Meteorlake - Display rework for future Xe driver integration - LNL FBC features - LNL display feature capability reads - update recommended fw versions for DG2+ - drop fastboot module parameter - added deviceid for Arrowlake-S - drop preproduction workarounds - don't disable preemption for resets - cleanup inlines in headers - PXP firmware loading fix - Fix sg list lengths - DSC PPS state readout/verification - Add more RPL P/U PCI IDs - Add new DG2-G12 stepping - DP enhanced framing support to state checker - Improve shared link bandwidth management - stop using GEM macros in display code - refactor related code into display code - locally enable W=1 warnings - remove PSR watchdog timers on LNL amdgpu: - RAS/FRU EEPROM updatse - IP discovery updatses - GC 11.5 support - DCN 3.5 support - VPE 6.1 support - NBIO 7.11 support - DML2 support - lots of IP updates - use flexible arrays for bo list handling - W=1 fixes - Enable seamless boot in more cases - Enable context type property for HDMI - Rework GPUVM TLB flushing - VCN IB start/size alignment fixes amdkfd: - GC 10/11 fixes - GC 11.5 support - use partial migration in GPU faults radeon: - W=1 Fixes - fix some possible buffer overflow/NULL derefs nouveau: - update uapi for NO_PREFETCH - scheduler/fence fixes - rework suspend/resume for GSP-RM - rework display in preparation for GSP-RM habanalabs: - uapi: expose tsc clock - uapi: block access to eventfd through control device - uapi: force dma-buf export to PAGE_SIZE alignments - complete move to accel subsystem - move firmware interface include files - perform hard reset on PCIe AXI drain event - optimise user interrupt handling msm: - DP: use existing helpers for DPCD - DPU: interrupts reworked - gpu: a7xx (a730/a740) support - decouple msm_drv from kms for headless devices mediatek: - MT8188 dsi/dp/edp support - DDP GAMMA - 12 bit LUT support - connector dynamic selection capability rockchip: - rv1126 mipi-dsi/vop support - add planar formats ast: - rename constants panels: - Mitsubishi AA084XE01 - JDI LPM102A188A - LTK050H3148W-CTA6 ivpu: - power management fixes qaic: - add detach slice bo api komeda: - add NV12 writeback tegra: - support NVSYNC/NHSYNC - host1x suspend fixes ili9882t: - separate into own driver" * tag 'drm-next-2023-10-31-1' of git://anongit.freedesktop.org/drm/drm: (1803 commits) drm/amdgpu: Remove unused variables from amdgpu_show_fdinfo drm/amdgpu: Remove duplicate fdinfo fields drm/amd/amdgpu: avoid to disable gfxhub interrupt when driver is unloaded drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems drm/amdgpu: Retrieve CE count from ce_count_lo_chip in EccInfo table drm/amdgpu: Identify data parity error corrected in replay mode drm/amdgpu: Fix typo in IP discovery parsing drm/amd/display: fix S/G display enablement drm/amdxcp: fix amdxcp unloads incompletely drm/amd/amdgpu: fix the GPU power print error in pm info drm/amdgpu: Use pcie domain of xcc acpi objects drm/amd: check num of link levels when update pcie param drm/amdgpu: Add a read to GFX v9.4.3 ring test drm/amd/pm: call smu_cmn_get_smc_version in is_mode1_reset_supported. drm/amdgpu: get RAS poison status from DF v4_6_2 drm/amdgpu: Use discovery table's subrevision drm/amd/display: 3.2.256 drm/amd/display: add interface to query SubVP status drm/amd/display: Read before writing Backlight Mode Set Register drm/amd/display: Disable SYMCLK32_SE RCO on DCN314 ...
2024-09-21 20:22:13 +08:00 · 2023-11-01 06:28:35 -10:00 · 2023-11-01 06:28:35 -10:00 · 7d461b291e
commit 7d461b291e
parent 8bc9e65151 631808095a
1476 changed files with 295700 additions and 20118 deletions
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@ -1,4 +1,4 @@
-What:           /sys/kernel/debug/habanalabs/hl<n>/addr
+What:           /sys/kernel/debug/accel/<n>/addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -8,34 +8,34 @@ Description:    Sets the device address to be used for read or write through
                only when the IOMMU is disabled.
                The acceptable value is a string that starts with "0x"

-What:           /sys/kernel/debug/habanalabs/hl<n>/clk_gate
+What:           /sys/kernel/debug/accel/<n>/clk_gate
 Date:           May 2020
 KernelVersion:  5.8
 Contact:        ogabbay@kernel.org
 Description:    This setting is now deprecated as clock gating is handled solely by the f/w

-What:           /sys/kernel/debug/habanalabs/hl<n>/command_buffers
+What:           /sys/kernel/debug/accel/<n>/command_buffers
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently allocated
                command buffers

-What:           /sys/kernel/debug/habanalabs/hl<n>/command_submission
+What:           /sys/kernel/debug/accel/<n>/command_submission
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently active
                command submissions

-What:           /sys/kernel/debug/habanalabs/hl<n>/command_submission_jobs
+What:           /sys/kernel/debug/accel/<n>/command_submission_jobs
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with detailed information about each JOB (CB) of
                each active command submission

-What:           /sys/kernel/debug/habanalabs/hl<n>/data32
+What:           /sys/kernel/debug/accel/<n>/data32
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -50,7 +50,7 @@ Description:    Allows the root user to read or write directly through the
                If the IOMMU is disabled, it also allows the root user to read
                or write from the host a device VA of a host mapped memory

-What:           /sys/kernel/debug/habanalabs/hl<n>/data64
+What:           /sys/kernel/debug/accel/<n>/data64
 Date:           Jan 2020
 KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
@ -65,7 +65,7 @@ Description:    Allows the root user to read or write 64 bit data directly
                If the IOMMU is disabled, it also allows the root user to read
                or write from the host a device VA of a host mapped memory

-What:           /sys/kernel/debug/habanalabs/hl<n>/data_dma
+What:           /sys/kernel/debug/accel/<n>/data_dma
 Date:           Apr 2021
 KernelVersion:  5.13
 Contact:        ogabbay@kernel.org
@ -79,11 +79,11 @@ Description:    Allows the root user to read from the device's internal
                a very long time.
                This interface doesn't support concurrency in the same device.
                In GAUDI and GOYA, this action can cause undefined behavior
-                in case the it is done while the device is executing user
+                in case it is done while the device is executing user
                workloads.
                Only supported on GAUDI at this stage.

-What:           /sys/kernel/debug/habanalabs/hl<n>/device
+What:           /sys/kernel/debug/accel/<n>/device
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -91,14 +91,14 @@ Description:    Enables the root user to set the device to specific state.
                Valid values are "disable", "enable", "suspend", "resume".
                User can read this property to see the valid values

-What:           /sys/kernel/debug/habanalabs/hl<n>/device_release_watchdog_timeout
+What:           /sys/kernel/debug/accel/<n>/device_release_watchdog_timeout
 Date:           Oct 2022
 KernelVersion:  6.2
 Contact:        ttayar@habana.ai
 Description:    The watchdog timeout value in seconds for a device release upon
                certain error cases, after which the device is reset.

-What:           /sys/kernel/debug/habanalabs/hl<n>/dma_size
+What:           /sys/kernel/debug/accel/<n>/dma_size
 Date:           Apr 2021
 KernelVersion:  5.13
 Contact:        ogabbay@kernel.org
@ -108,7 +108,7 @@ Description:    Specify the size of the DMA transaction when using DMA to read
                When the write is finished, the user can read the "data_dma"
                blob

-What:           /sys/kernel/debug/habanalabs/hl<n>/dump_razwi_events
+What:           /sys/kernel/debug/accel/<n>/dump_razwi_events
 Date:           Aug 2022
 KernelVersion:  5.20
 Contact:        fkassabri@habana.ai
@ -117,7 +117,7 @@ Description:    Dumps all razwi events to dmesg if exist.
                the routine will clear the status register.
                Usage: cat dump_razwi_events

-What:           /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
+What:           /sys/kernel/debug/accel/<n>/dump_security_violations
 Date:           Jan 2021
 KernelVersion:  5.12
 Contact:        ogabbay@kernel.org
@ -125,14 +125,14 @@ Description:    Dumps all security violations to dmesg. This will also ack
                all security violations meanings those violations will not be
                dumped next time user calls this API

-What:           /sys/kernel/debug/habanalabs/hl<n>/engines
+What:           /sys/kernel/debug/accel/<n>/engines
 Date:           Jul 2019
 KernelVersion:  5.3
 Contact:        ogabbay@kernel.org
 Description:    Displays the status registers values of the device engines and
                their derived idle status

-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
+What:           /sys/kernel/debug/accel/<n>/i2c_addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -140,7 +140,7 @@ Description:    Sets I2C device address for I2C transaction that is generated
                by the device's CPU, Not available when device is loaded with secured
                firmware

-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
+What:           /sys/kernel/debug/accel/<n>/i2c_bus
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -148,7 +148,7 @@ Description:    Sets I2C bus address for I2C transaction that is generated by
                the device's CPU, Not available when device is loaded with secured
                firmware

-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_data
+What:           /sys/kernel/debug/accel/<n>/i2c_data
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -157,7 +157,7 @@ Description:    Triggers an I2C transaction that is generated by the device's
                reading from the file generates a read transaction, Not available
                when device is loaded with secured firmware

-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_len
+What:           /sys/kernel/debug/accel/<n>/i2c_len
 Date:           Dec 2021
 KernelVersion:  5.17
 Contact:        obitton@habana.ai
@ -165,7 +165,7 @@ Description:    Sets I2C length in bytes for I2C transaction that is generated b
                the device's CPU, Not available when device is loaded with secured
                firmware

-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
+What:           /sys/kernel/debug/accel/<n>/i2c_reg
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -173,35 +173,35 @@ Description:    Sets I2C register id for I2C transaction that is generated by
                the device's CPU, Not available when device is loaded with secured
                firmware

-What:           /sys/kernel/debug/habanalabs/hl<n>/led0
+What:           /sys/kernel/debug/accel/<n>/led0
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the first S/W led on the device, Not available
                when device is loaded with secured firmware

-What:           /sys/kernel/debug/habanalabs/hl<n>/led1
+What:           /sys/kernel/debug/accel/<n>/led1
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the second S/W led on the device, Not available
                when device is loaded with secured firmware

-What:           /sys/kernel/debug/habanalabs/hl<n>/led2
+What:           /sys/kernel/debug/accel/<n>/led2
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the third S/W led on the device, Not available
                when device is loaded with secured firmware

-What:           /sys/kernel/debug/habanalabs/hl<n>/memory_scrub
+What:           /sys/kernel/debug/accel/<n>/memory_scrub
 Date:           May 2022
 KernelVersion:  5.19
 Contact:        dhirschfeld@habana.ai
 Description:    Allows the root user to scrub the dram memory. The scrubbing
                value can be set using the debugfs file memory_scrub_val.

-What:           /sys/kernel/debug/habanalabs/hl<n>/memory_scrub_val
+What:           /sys/kernel/debug/accel/<n>/memory_scrub_val
 Date:           May 2022
 KernelVersion:  5.19
 Contact:        dhirschfeld@habana.ai
@ -209,7 +209,7 @@ Description:    The value to which the dram will be set to when the user
                scrubs the dram using 'memory_scrub' debugfs file and
                the scrubbing value when using module param 'memory_scrub'

-What:           /sys/kernel/debug/habanalabs/hl<n>/mmu
+What:           /sys/kernel/debug/accel/<n>/mmu
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -217,19 +217,19 @@ Description:    Displays the hop values and physical address for a given ASID
                and virtual address. The user should write the ASID and VA into
                the file and then read the file to get the result.
                e.g. to display info about VA 0x1000 for ASID 1 you need to do:
-                echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu
+                echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu

-What:           /sys/kernel/debug/habanalabs/hl<n>/mmu_error
+What:           /sys/kernel/debug/accel/<n>/mmu_error
 Date:           Mar 2021
 KernelVersion:  5.12
 Contact:        fkassabri@habana.ai
 Description:    Check and display page fault or access violation mmu errors for
                all MMUs specified in mmu_cap_mask.
                e.g. to display error info for MMU hw cap bit 9, you need to do:
-                echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error
-                cat /sys/kernel/debug/habanalabs/hl0/mmu_error
+                echo "0x200" > /sys/kernel/debug/accel/0/mmu_error
+                cat /sys/kernel/debug/accel/0/mmu_error

-What:           /sys/kernel/debug/habanalabs/hl<n>/monitor_dump
+What:           /sys/kernel/debug/accel/<n>/monitor_dump
 Date:           Mar 2022
 KernelVersion:  5.19
 Contact:        osharabi@habana.ai
@ -243,7 +243,7 @@ Description:    Allows the root user to dump monitors status from the device's
                This interface doesn't support concurrency in the same device.
                Only supported on GAUDI.

-What:           /sys/kernel/debug/habanalabs/hl<n>/monitor_dump_trig
+What:           /sys/kernel/debug/accel/<n>/monitor_dump_trig
 Date:           Mar 2022
 KernelVersion:  5.19
 Contact:        osharabi@habana.ai
@ -253,14 +253,14 @@ Description:    Triggers dump of monitor data. The value to trigger the operatio
                When the write is finished, the user can read the "monitor_dump"
                blob

-What:           /sys/kernel/debug/habanalabs/hl<n>/set_power_state
+What:           /sys/kernel/debug/accel/<n>/set_power_state
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the PCI power state. Valid values are "1" for D0 and "2"
                for D3Hot

-What:           /sys/kernel/debug/habanalabs/hl<n>/skip_reset_on_timeout
+What:           /sys/kernel/debug/accel/<n>/skip_reset_on_timeout
 Date:           Jun 2021
 KernelVersion:  5.13
 Contact:        ynudelman@habana.ai
@ -268,7 +268,7 @@ Description:    Sets the skip reset on timeout option for the device. Value of
                "0" means device will be reset in case some CS has timed out,
                otherwise it will not be reset.

-What:           /sys/kernel/debug/habanalabs/hl<n>/state_dump
+What:           /sys/kernel/debug/accel/<n>/state_dump
 Date:           Oct 2021
 KernelVersion:  5.15
 Contact:        ynudelman@habana.ai
@ -279,7 +279,7 @@ Description:    Gets the state dump occurring on a CS timeout or failure.
                Writing an integer X discards X state dumps, so that the
                next read would return X+1-st newest state dump.

-What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
+What:           /sys/kernel/debug/accel/<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
@ -287,21 +287,21 @@ Description:    Sets the stop-on_error option for the device engines. Value of
                "0" is for disable, otherwise enable.
                Relevant only for GOYA and GAUDI.

-What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
+What:           /sys/kernel/debug/accel/<n>/timeout_locked
 Date:           Sep 2021
 KernelVersion:  5.16
 Contact:        obitton@habana.ai
 Description:    Sets the command submission timeout value in seconds.

-What:           /sys/kernel/debug/habanalabs/hl<n>/userptr
+What:           /sys/kernel/debug/accel/<n>/userptr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
-Description:    Displays a list with information about the currently user
+Description:    Displays a list with information about the current user
                pointers (user virtual addresses) that are pinned and mapped
                to DMA addresses

-What:           /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
+What:           /sys/kernel/debug/accel/<n>/userptr_lookup
 Date:           Oct 2021
 KernelVersion:  5.15
 Contact:        ogabbay@kernel.org
@ -309,7 +309,7 @@ Description:    Allows to search for specific user pointers (user virtual
                addresses) that are pinned and mapped to DMA addresses, and see
                their resolution to the specific dma address.

-What:           /sys/kernel/debug/habanalabs/hl<n>/vm
+What:           /sys/kernel/debug/accel/<n>/vm
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@ -1,4 +1,4 @@
-What:           /sys/class/habanalabs/hl<n>/armcp_kernel_ver
+What:           /sys/class/accel/accel<n>/device/armcp_kernel_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -6,7 +6,7 @@ Description:    Version of the Linux kernel running on the device's CPU.
                Will be DEPRECATED in Linux kernel version 5.10, and be
                replaced with cpucp_kernel_ver

-What:           /sys/class/habanalabs/hl<n>/armcp_ver
+What:           /sys/class/accel/accel<n>/device/armcp_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -14,7 +14,7 @@ Description:    Version of the application running on the device's CPU
                Will be DEPRECATED in Linux kernel version 5.10, and be
                replaced with cpucp_ver

-What:           /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
+What:           /sys/class/accel/accel<n>/device/clk_max_freq_mhz
 Date:           Jun 2019
 KernelVersion:  5.7
 Contact:        ogabbay@kernel.org
@ -24,58 +24,58 @@ Description:    Allows the user to set the maximum clock frequency, in MHz.
                frequency value of the device clock. This property is valid
                only for the Gaudi ASIC family

-What:           /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
+What:           /sys/class/accel/accel<n>/device/clk_cur_freq_mhz
 Date:           Jun 2019
 KernelVersion:  5.7
 Contact:        ogabbay@kernel.org
 Description:    Displays the current frequency, in MHz, of the device clock.
                This property is valid only for the Gaudi ASIC family

-What:           /sys/class/habanalabs/hl<n>/cpld_ver
+What:           /sys/class/accel/accel<n>/device/cpld_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the Device's CPLD F/W

-What:           /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
+What:           /sys/class/accel/accel<n>/device/cpucp_kernel_ver
 Date:           Oct 2020
 KernelVersion:  5.10
 Contact:        ogabbay@kernel.org
 Description:    Version of the Linux kernel running on the device's CPU

-What:           /sys/class/habanalabs/hl<n>/cpucp_ver
+What:           /sys/class/accel/accel<n>/device/cpucp_ver
 Date:           Oct 2020
 KernelVersion:  5.10
 Contact:        ogabbay@kernel.org
 Description:    Version of the application running on the device's CPU

-What:           /sys/class/habanalabs/hl<n>/device_type
+What:           /sys/class/accel/accel<n>/device/device_type
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the code name of the device according to its type.
                The supported values are: "GOYA"

-What:           /sys/class/habanalabs/hl<n>/eeprom
+What:           /sys/class/accel/accel<n>/device/eeprom
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    A binary file attribute that contains the contents of the
                on-board EEPROM

-What:           /sys/class/habanalabs/hl<n>/fuse_ver
+What:           /sys/class/accel/accel<n>/device/fuse_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the device's version from the eFuse

-What:           /sys/class/habanalabs/hl<n>/fw_os_ver
+What:           /sys/class/accel/accel<n>/device/fw_os_ver
 Date:           Dec 2021
 KernelVersion:  5.18
 Contact:        ogabbay@kernel.org
 Description:    Version of the firmware OS running on the device's CPU

-What:           /sys/class/habanalabs/hl<n>/hard_reset
+What:           /sys/class/accel/accel<n>/device/hard_reset
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -83,14 +83,14 @@ Description:    Interface to trigger a hard-reset operation for the device.
                Hard-reset will reset ALL internal components of the device
                except for the PCI interface and the internal PLLs

-What:           /sys/class/habanalabs/hl<n>/hard_reset_cnt
+What:           /sys/class/accel/accel<n>/device/hard_reset_cnt
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays how many times the device have undergone a hard-reset
                operation since the driver was loaded

-What:           /sys/class/habanalabs/hl<n>/high_pll
+What:           /sys/class/accel/accel<n>/device/high_pll
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -98,7 +98,7 @@ Description:    Allows the user to set the maximum clock frequency for MME, TPC
                and IC when the power management profile is set to "automatic".
                This property is valid only for the Goya ASIC family

-What:           /sys/class/habanalabs/hl<n>/ic_clk
+What:           /sys/class/accel/accel<n>/device/ic_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -110,27 +110,27 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
                frequency value of the IC. This property is valid only for the
                Goya ASIC family

-What:           /sys/class/habanalabs/hl<n>/ic_clk_curr
+What:           /sys/class/accel/accel<n>/device/ic_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the Interconnect
                fabric. This property is valid only for the Goya ASIC family

-What:           /sys/class/habanalabs/hl<n>/infineon_ver
+What:           /sys/class/accel/accel<n>/device/infineon_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI

-What:           /sys/class/habanalabs/hl<n>/max_power
+What:           /sys/class/accel/accel<n>/device/max_power
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Allows the user to set the maximum power consumption of the
                device in milliwatts.

-What:           /sys/class/habanalabs/hl<n>/mme_clk
+What:           /sys/class/accel/accel<n>/device/mme_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -142,21 +142,21 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
                frequency value of the MME. This property is valid only for the
                Goya ASIC family

-What:           /sys/class/habanalabs/hl<n>/mme_clk_curr
+What:           /sys/class/accel/accel<n>/device/mme_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the MME compute
                engine. This property is valid only for the Goya ASIC family

-What:           /sys/class/habanalabs/hl<n>/pci_addr
+What:           /sys/class/accel/accel<n>/device/pci_addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the PCI address of the device. This is needed so the
                user would be able to open a device based on its PCI address

-What:           /sys/class/habanalabs/hl<n>/pm_mng_profile
+What:           /sys/class/accel/accel<n>/device/pm_mng_profile
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -170,19 +170,19 @@ Description:    Power management profile. Values are "auto", "manual". In "auto"
                ic_clk, mme_clk and tpc_clk. This property is valid only for
                the Goya ASIC family

-What:           /sys/class/habanalabs/hl<n>/preboot_btl_ver
+What:           /sys/class/accel/accel<n>/device/preboot_btl_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the device's preboot F/W code

-What:           /sys/class/habanalabs/hl<n>/security_enabled
+What:           /sys/class/accel/accel<n>/device/security_enabled
 Date:           Oct 2022
 KernelVersion:  6.1
 Contact:        obitton@habana.ai
 Description:    Displays the device's security status

-What:           /sys/class/habanalabs/hl<n>/soft_reset
+What:           /sys/class/accel/accel<n>/device/soft_reset
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -190,14 +190,14 @@ Description:    Interface to trigger a soft-reset operation for the device.
                Soft-reset will reset only the compute and DMA engines of the
                device

-What:           /sys/class/habanalabs/hl<n>/soft_reset_cnt
+What:           /sys/class/accel/accel<n>/device/soft_reset_cnt
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays how many times the device have undergone a soft-reset
                operation since the driver was loaded

-What:           /sys/class/habanalabs/hl<n>/status
+What:           /sys/class/accel/accel<n>/device/status
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -215,13 +215,13 @@ Description:    Status of the card:
                    a compute-reset which is executed after a device release
                    (relevant for Gaudi2 only).

-What:           /sys/class/habanalabs/hl<n>/thermal_ver
+What:           /sys/class/accel/accel<n>/device/thermal_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the Device's thermal daemon

-What:           /sys/class/habanalabs/hl<n>/tpc_clk
+What:           /sys/class/accel/accel<n>/device/tpc_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@ -233,20 +233,20 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
                frequency value of the TPC. This property is valid only for
                Goya ASIC family

-What:           /sys/class/habanalabs/hl<n>/tpc_clk_curr
+What:           /sys/class/accel/accel<n>/device/tpc_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the TPC compute
                engines. This property is valid only for the Goya ASIC family

-What:           /sys/class/habanalabs/hl<n>/uboot_ver
+What:           /sys/class/accel/accel<n>/device/uboot_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the u-boot running on the device's CPU

-What:           /sys/class/habanalabs/hl<n>/vrm_ver
+What:           /sys/class/accel/accel<n>/device/vrm_ver
 Date:           Jan 2022
 KernelVersion:  5.17
 Contact:        ogabbay@kernel.org
--- a/Documentation/accel/qaic/qaic.rst
+++ b/Documentation/accel/qaic/qaic.rst
@ -123,6 +123,16 @@ DRM_IOCTL_QAIC_PART_DEV
  AIC100 device and can be used for limiting a process to some subset of
  resources.

+DRM_IOCTL_QAIC_DETACH_SLICE_BO
+  This IOCTL allows userspace to remove the slicing information from a BO that
+  was originally provided by a call to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. This
+  is the inverse of DRM_IOCTL_QAIC_ATTACH_SLICE_BO. The BO must be idle for
+  DRM_IOCTL_QAIC_DETACH_SLICE_BO to be called. After a successful detach slice
+  operation the BO may have new slicing information attached with a new call
+  to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. After detach slice, the BO cannot be
+  executed until after a new attach slice operation. Combining attach slice
+  and detach slice calls allows userspace to use a BO with multiple workloads.
+
 Userspace Client Isolation
 ==========================

--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
@ -17,6 +17,7 @@ properties:
      - analogix,anx7808
      - analogix,anx7812
      - analogix,anx7814
+      - analogix,anx7816
      - analogix,anx7818

  reg:
--- a/Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml
@ -0,0 +1,115 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/fsl,imx93-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX93 specific extensions to Synopsys Designware MIPI DSI
+
+maintainers:
+  - Liu Ying <victor.liu@nxp.com>
+
+description: |
+  There is a Synopsys Designware MIPI DSI Host Controller and a Synopsys
+  Designware MIPI DPHY embedded in Freescale i.MX93 SoC.  Some configurations
+  and extensions to them are controlled by i.MX93 media blk-ctrl.
+
+allOf:
+  - $ref: snps,dw-mipi-dsi.yaml#
+
+properties:
+  compatible:
+    const: fsl,imx93-mipi-dsi
+
+  clocks:
+    items:
+      - description: apb clock
+      - description: pixel clock
+      - description: PHY configuration clock
+      - description: PHY reference clock
+
+  clock-names:
+    items:
+      - const: pclk
+      - const: pix
+      - const: phy_cfg
+      - const: phy_ref
+
+  interrupts:
+    maxItems: 1
+
+  fsl,media-blk-ctrl:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      i.MX93 media blk-ctrl, as a syscon, controls pixel component bit map
+      configurations from LCDIF display controller to the MIPI DSI host
+      controller and MIPI DPHY PLL related configurations through PLL SoC
+      interface.
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - interrupts
+  - fsl,media-blk-ctrl
+  - power-domains
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/imx93-clock.h>
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/fsl,imx93-power.h>
+
+    dsi@4ae10000 {
+        compatible = "fsl,imx93-mipi-dsi";
+        reg = <0x4ae10000 0x10000>;
+        interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&clk IMX93_CLK_MIPI_DSI_GATE>,
+                 <&clk IMX93_CLK_MEDIA_DISP_PIX>,
+                 <&clk IMX93_CLK_MIPI_PHY_CFG>,
+                 <&clk IMX93_CLK_24M>;
+        clock-names = "pclk", "pix", "phy_cfg", "phy_ref";
+        fsl,media-blk-ctrl = <&media_blk_ctrl>;
+        power-domains = <&media_blk_ctrl IMX93_MEDIABLK_PD_MIPI_DSI>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "raydium,rm67191";
+            reg = <0>;
+            reset-gpios = <&adp5585gpio 6 GPIO_ACTIVE_LOW>;
+            dsi-lanes = <4>;
+            video-mode = <2>;
+
+            port {
+                panel_in: endpoint {
+                    remote-endpoint = <&dsi_out>;
+                };
+            };
+        };
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                dsi_to_lcdif: endpoint {
+                    remote-endpoint = <&lcdif_to_dsi>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+
+                dsi_out: endpoint {
+                    remote-endpoint = <&panel_in>;
+                };
+            };
+        };
+    };
--- a/Documentation/devicetree/bindings/display/lvds-data-mapping.yaml
+++ b/Documentation/devicetree/bindings/display/lvds-data-mapping.yaml
@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/lvds-data-mapping.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LVDS Data Mapping
+
+maintainers:
+  - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+  - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+  LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
+  incompatible data link layers have been used over time to transmit image data
+  to LVDS devices. This bindings supports devices compatible with the following
+  specifications.
+
+  [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
+  1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
+  [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
+  Semiconductor
+  [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
+  Electronics Standards Association (VESA)
+
+  Device compatible with those specifications have been marketed under the
+  FPD-Link and FlatLink brands.
+
+properties:
+  data-mapping:
+    enum:
+      - jeida-18
+      - jeida-24
+      - vesa-24
+    description: |
+      The color signals mapping order.
+
+      LVDS data mappings are defined as follows.
+
+      - "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and
+        [VESA] specifications. Data are transferred as follows on 3 LVDS lanes.
+
+      Slot          0       1       2       3       4       5       6
+                ________________                         _________________
+      Clock                     \_______________________/
+                  ______  ______  ______  ______  ______  ______  ______
+      DATA0     ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
+      DATA1     ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
+      DATA2     ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
+
+      - "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI]
+        specifications. Data are transferred as follows on 4 LVDS lanes.
+
+      Slot          0       1       2       3       4       5       6
+                ________________                         _________________
+      Clock                     \_______________________/
+                  ______  ______  ______  ______  ______  ______  ______
+      DATA0     ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__><
+      DATA1     ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__><
+      DATA2     ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__><
+      DATA3     ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__><
+
+      - "vesa-24" - 24-bit data mapping compatible with the [VESA] specification.
+        Data are transferred as follows on 4 LVDS lanes.
+
+      Slot          0       1       2       3       4       5       6
+                ________________                         _________________
+      Clock                     \_______________________/
+                  ______  ______  ______  ______  ______  ______  ______
+      DATA0     ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
+      DATA1     ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
+      DATA2     ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
+      DATA3     ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__><
+
+      Control signals are mapped as follows.
+
+      CTL0: HSync
+      CTL1: VSync
+      CTL2: Data Enable
+      CTL3: 0
+
+additionalProperties: true
+
+...
--- a/Documentation/devicetree/bindings/display/lvds.yaml
+++ b/Documentation/devicetree/bindings/display/lvds.yaml
@ -6,83 +6,24 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#

 title: LVDS Display Common Properties

+allOf:
+  - $ref: lvds-data-mapping.yaml#
+
 maintainers:
  - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
  - Thierry Reding <thierry.reding@gmail.com>

-description: |+
-  LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
-  incompatible data link layers have been used over time to transmit image data
-  to LVDS devices. This bindings supports devices compatible with the following
-  specifications.
-
-  [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
-  1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
-  [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
-  Semiconductor
-  [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
-  Electronics Standards Association (VESA)
-
-  Device compatible with those specifications have been marketed under the
-  FPD-Link and FlatLink brands.
+description:
+  This binding extends the data mapping defined in lvds-data-mapping.yaml.
+  It supports reversing the bit order on the formats defined there in order
+  to accomodate for even more specialized data formats, since a variety of
+  data formats and layouts is used to drive LVDS displays.

 properties:
-  data-mapping:
-    enum:
-      - jeida-18
-      - jeida-24
-      - vesa-24
-    description: |
-      The color signals mapping order.
-
-      LVDS data mappings are defined as follows.
-
-      - "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and
-        [VESA] specifications. Data are transferred as follows on 3 LVDS lanes.
-
-      Slot          0       1       2       3       4       5       6
-                ________________                         _________________
-      Clock                     \_______________________/
-                  ______  ______  ______  ______  ______  ______  ______
-      DATA0     ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
-      DATA1     ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
-      DATA2     ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
-
-      - "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI]
-        specifications. Data are transferred as follows on 4 LVDS lanes.
-
-      Slot          0       1       2       3       4       5       6
-                ________________                         _________________
-      Clock                     \_______________________/
-                  ______  ______  ______  ______  ______  ______  ______
-      DATA0     ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__><
-      DATA1     ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__><
-      DATA2     ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__><
-      DATA3     ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__><
-
-      - "vesa-24" - 24-bit data mapping compatible with the [VESA] specification.
-        Data are transferred as follows on 4 LVDS lanes.
-
-      Slot          0       1       2       3       4       5       6
-                ________________                         _________________
-      Clock                     \_______________________/
-                  ______  ______  ______  ______  ______  ______  ______
-      DATA0     ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
-      DATA1     ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
-      DATA2     ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
-      DATA3     ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__><
-
-      Control signals are mapped as follows.
-
-      CTL0: HSync
-      CTL1: VSync
-      CTL2: Data Enable
-      CTL3: 0
-
  data-mirror:
    type: boolean
    description:
-      If set, reverse the bit order described in the data mappings below on all
+      If set, reverse the bit order described in the data mappings on all
      data lanes, transmitting bits for slots 6 to 0 instead of 0 to 6.

 additionalProperties: true
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml
@ -21,6 +21,8 @@ description: |
 properties:
  compatible:
    enum:
+      - mediatek,mt8188-dp-tx
+      - mediatek,mt8188-edp-tx
      - mediatek,mt8195-dp-tx
      - mediatek,mt8195-edp-tx

--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
@ -30,6 +30,7 @@ properties:
          - mediatek,mt8173-dsi
          - mediatek,mt8183-dsi
          - mediatek,mt8186-dsi
+          - mediatek,mt8188-dsi
      - items:
          - enum:
              - mediatek,mt6795-dsi
--- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
@ -114,6 +114,7 @@ properties:

      port@1:
        $ref: /schemas/graph.yaml#/$defs/port-base
+        unevaluatedProperties: false
        description: Output endpoint of the controller
        properties:
          endpoint:
--- a/Documentation/devicetree/bindings/display/msm/gmu.yaml
+++ b/Documentation/devicetree/bindings/display/msm/gmu.yaml
@ -21,7 +21,7 @@ properties:
  compatible:
    oneOf:
      - items:
-          - pattern: '^qcom,adreno-gmu-6[0-9][0-9]\.[0-9]$'
+          - pattern: '^qcom,adreno-gmu-[67][0-9][0-9]\.[0-9]$'
          - const: qcom,adreno-gmu
      - const: qcom,adreno-gmu-wrapper

@ -64,6 +64,10 @@ properties:
  iommus:
    maxItems: 1

+  qcom,qmp:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: Reference to the AOSS side-channel message RAM
+
  operating-points-v2: true

  opp-table:
@ -213,6 +217,47 @@ allOf:
            - const: axi
            - const: memnoc

+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,adreno-gmu-730.1
+              - qcom,adreno-gmu-740.1
+    then:
+      properties:
+        reg:
+          items:
+            - description: Core GMU registers
+            - description: Resource controller registers
+            - description: GMU PDC registers
+        reg-names:
+          items:
+            - const: gmu
+            - const: rscc
+            - const: gmu_pdc
+        clocks:
+          items:
+            - description: GPU AHB clock
+            - description: GMU clock
+            - description: GPU CX clock
+            - description: GPU AXI clock
+            - description: GPU MEMNOC clock
+            - description: GMU HUB clock
+            - description: GPUSS DEMET clock
+        clock-names:
+          items:
+            - const: ahb
+            - const: gmu
+            - const: cxo
+            - const: axi
+            - const: memnoc
+            - const: hub
+            - const: demet
+
+      required:
+        - qcom,qmp
+
  - if:
      properties:
        compatible:
--- a/Documentation/devicetree/bindings/display/msm/gpu.yaml
+++ b/Documentation/devicetree/bindings/display/msm/gpu.yaml
@ -23,7 +23,7 @@ properties:
          The driver is parsing the compat string for Adreno to
          figure out the gpu-id and patch level.
        items:
-          - pattern: '^qcom,adreno-[3-6][0-9][0-9]\.[0-9]$'
+          - pattern: '^qcom,adreno-[3-7][0-9][0-9]\.[0-9]$'
          - const: qcom,adreno
      - description: |
          The driver is parsing the compat string for Imageon to
@ -203,7 +203,7 @@ allOf:
        properties:
          compatible:
            contains:
-              pattern: '^qcom,adreno-6[0-9][0-9]\.[0-9]$'
+              pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]$'

      then: # Starting with A6xx, the clocks are usually defined in the GMU node
        properties:
--- a/Documentation/devicetree/bindings/display/msm/qcom,msm8998-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,msm8998-mdss.yaml
@ -38,12 +38,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,msm8998-dpu

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -52,6 +56,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-phy-10nm-8998
--- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
@ -44,18 +44,24 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,qcm2290-dpu

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-ctrl-6g-qcm2290

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-phy-14nm-2290
--- a/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml
@ -44,18 +44,24 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sc7180-dpu

  "^displayport-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sc7180-dp

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -64,6 +70,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-phy-10nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml
@ -44,18 +44,24 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sc7280-dpu

  "^displayport-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sc7280-dp

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -64,12 +70,16 @@ patternProperties:

  "^edp@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sc7280-edp

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        enum:
--- a/Documentation/devicetree/bindings/display/msm/qcom,sc8280xp-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sc8280xp-mdss.yaml
@ -34,12 +34,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sc8280xp-dpu

  "^displayport-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        enum:
--- a/Documentation/devicetree/bindings/display/msm/qcom,sdm845-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sdm845-mdss.yaml
@ -42,18 +42,24 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sdm845-dpu

  "^displayport-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sdm845-dp

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -62,6 +68,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-phy-10nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
@ -32,12 +32,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm6115-dpu

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        oneOf:
@ -50,6 +54,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-phy-14nm-2290
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml
@ -43,12 +43,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm6125-dpu

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -57,6 +61,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm6125-dsi-phy-14nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml
@ -43,12 +43,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm6350-dpu

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -57,6 +61,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-phy-10nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml
@ -43,12 +43,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm6375-dpu

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -57,6 +61,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm6375-dsi-phy-7nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
@ -47,12 +47,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8150-dpu

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -61,6 +65,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-phy-7nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml
@ -46,12 +46,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8250-dpu

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -60,6 +64,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,dsi-phy-7nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml
@ -48,18 +48,24 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8350-dpu

  "^displayport-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8350-dp

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -68,6 +74,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8350-dsi-phy-5nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml
@ -38,12 +38,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8450-dpu

  "^displayport-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -52,6 +56,8 @@ patternProperties:

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -60,6 +66,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8450-dsi-phy-5nm
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8550-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8550-mdss.yaml
@ -38,12 +38,16 @@ properties:
 patternProperties:
  "^display-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8550-dpu

  "^displayport-controller@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -52,6 +56,8 @@ patternProperties:

  "^dsi@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        items:
@ -60,6 +66,8 @@ patternProperties:

  "^phy@[0-9a-f]+$":
    type: object
+    additionalProperties: true
+
    properties:
      compatible:
        const: qcom,sm8550-dsi-phy-4nm
--- a/Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml
+++ b/Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml
@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/jdi,lpm102a188a.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: JDI LPM102A188A 2560x1800 10.2" DSI Panel
+
+maintainers:
+  - Diogo Ivo <diogo.ivo@tecnico.ulisboa.pt>
+
+description: |
+  This panel requires a dual-channel DSI host to operate. It supports two modes:
+  - left-right: each channel drives the left or right half of the screen
+  - even-odd: each channel drives the even or odd lines of the screen
+
+  Each of the DSI channels controls a separate DSI peripheral. The peripheral
+  driven by the first link (DSI-LINK1) is considered the primary peripheral
+  and controls the device. The 'link2' property contains a phandle to the
+  peripheral driven by the second link (DSI-LINK2).
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: jdi,lpm102a188a
+
+  reg: true
+  enable-gpios: true
+  reset-gpios: true
+  power-supply: true
+  backlight: true
+
+  ddi-supply:
+    description: The regulator that provides IOVCC (1.8V).
+
+  link2:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: |
+      phandle to the DSI peripheral on the secondary link. Note that the
+      presence of this property marks the containing node as DSI-LINK1.
+
+required:
+  - compatible
+  - reg
+
+if:
+  required:
+    - link2
+then:
+  required:
+    - power-supply
+    - ddi-supply
+    - enable-gpios
+    - reset-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/gpio/tegra-gpio.h>
+
+    dsia: dsi@54300000 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        reg = <0x0 0x54300000 0x0 0x00040000>;
+
+        link2: panel@0 {
+            compatible = "jdi,lpm102a188a";
+            reg = <0>;
+        };
+    };
+
+    dsib: dsi@54400000{
+        #address-cells = <1>;
+        #size-cells = <0>;
+        reg = <0x0 0x54400000 0x0 0x00040000>;
+        nvidia,ganged-mode = <&dsia>;
+
+        link1: panel@0 {
+            compatible = "jdi,lpm102a188a";
+            reg = <0>;
+            power-supply = <&pplcd_vdd>;
+            ddi-supply = <&pp1800_lcdio>;
+            enable-gpios = <&gpio TEGRA_GPIO(V, 1) GPIO_ACTIVE_HIGH>;
+            reset-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>;
+            link2 = <&link2>;
+            backlight = <&backlight>;
+        };
+    };
+
+...
--- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml
+++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml
@ -17,6 +17,7 @@ properties:
    enum:
      - leadtek,ltk050h3146w
      - leadtek,ltk050h3146w-a2
+      - leadtek,ltk050h3148w
  reg: true
  backlight: true
  reset-gpios: true
--- a/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml
+++ b/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml
@ -7,9 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: NewVision NV3051D based LCD panel

 description: |
-  The NewVision NV3051D is a driver chip used to drive DSI panels. For now,
-  this driver only supports the 640x480 panels found in the Anbernic RG353
-  based devices.
+  The NewVision NV3051D is a driver chip used to drive DSI panels.

 maintainers:
  - Chris Morgan <macromorgan@hotmail.com>
@ -21,6 +19,7 @@ properties:
  compatible:
    items:
      - enum:
+          - anbernic,rg351v-panel
          - anbernic,rg353p-panel
          - anbernic,rg353v-panel
      - const: newvision,nv3051d
--- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@ -21,9 +21,9 @@ description: |

 allOf:
  - $ref: panel-common.yaml#
+  - $ref: ../lvds-data-mapping.yaml#

 properties:
-
  compatible:
    enum:
    # compatible must be listed in alphabetical order, ordered by compatible.
@ -230,6 +230,8 @@ properties:
      - logictechno,lttd800480070-l6wh-rt
        # Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel
      - mitsubishi,aa070mc01-ca1
+        # Mitsubishi AA084XE01 8.4" XGA TFT LCD panel
+      - mitsubishi,aa084xe01
        # Multi-Inno Technology Co.,Ltd MI0700S4T-6 7" 800x480 TFT Resistive Touch Module
      - multi-inno,mi0700s4t-6
        # Multi-Inno Technology Co.,Ltd MI0800FT-9 8" 800x600 TFT Resistive Touch Module
@ -347,6 +349,17 @@ properties:
  power-supply: true
  no-hpd: true
  hpd-gpios: true
+  data-mapping: true
+
+if:
+  not:
+    properties:
+      compatible:
+        contains:
+          const: innolux,g101ice-l01
+then:
+  properties:
+    data-mapping: false

 additionalProperties: false

@ -366,3 +379,16 @@ examples:
        };
      };
    };
+  - |
+    panel_lvds: panel-lvds {
+      compatible = "innolux,g101ice-l01";
+      power-supply = <&vcc_lcd_reg>;
+
+      data-mapping = "jeida-24";
+
+      port {
+        panel_in_lvds: endpoint {
+          remote-endpoint = <&ltdc_out_lvds>;
+        };
+      };
+    };
--- a/Documentation/devicetree/bindings/display/panel/raydium,rm692e5.yaml
+++ b/Documentation/devicetree/bindings/display/panel/raydium,rm692e5.yaml
@ -0,0 +1,73 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/raydium,rm692e5.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Raydium RM692E5 based DSI display panels
+
+maintainers:
+  - Konrad Dybcio <konradybcio@kernel.org>
+
+description:
+  The Raydium RM692E5 is a generic DSI Panel IC used to control
+  AMOLED panels.
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    items:
+      - const: fairphone,fp5-rm692e5-boe
+      - const: raydium,rm692e5
+
+  dvdd-supply:
+    description: Digital voltage rail
+
+  vci-supply:
+    description: Analog voltage rail
+
+  vddio-supply:
+    description: I/O voltage rail
+
+  reg: true
+  port: true
+
+required:
+  - compatible
+  - reg
+  - reset-gpios
+  - dvdd-supply
+  - vci-supply
+  - vddio-supply
+  - port
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "fairphone,fp5-rm692e5-boe", "raydium,rm692e5";
+            reg = <0>;
+
+            reset-gpios = <&tlmm 44 GPIO_ACTIVE_LOW>;
+            dvdd-supply = <&vreg_oled_vci>;
+            vci-supply = <&vreg_l12c>;
+            vddio-supply = <&vreg_oled_dvdd>;
+
+            port {
+                panel_in_0: endpoint {
+                    remote-endpoint = <&dsi0_out>;
+                };
+            };
+        };
+    };
+
+...
--- a/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml
+++ b/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml
@ -22,6 +22,8 @@ properties:
    enum:
      # Anberic RG353V-V2 5.0" 640x480 TFT LCD panel
      - anbernic,rg353v-panel-v2
+      # Powkiddy RGB30 3.0" 720x720 TFT LCD panel
+      - powkiddy,rgb30-panel
      # Rocktech JH057N00900 5.5" 720x1440 TFT LCD panel
      - rocktech,jh057n00900
      # Xingbangda XBD599 5.99" 720x1440 TFT LCD panel
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml
@ -18,6 +18,7 @@ properties:
          - rockchip,rk3288-mipi-dsi
          - rockchip,rk3399-mipi-dsi
          - rockchip,rk3568-mipi-dsi
+          - rockchip,rv1126-mipi-dsi
      - const: snps,dw-mipi-dsi

  interrupts:
@ -77,6 +78,7 @@ allOf:
            enum:
              - rockchip,px30-mipi-dsi
              - rockchip,rk3568-mipi-dsi
+              - rockchip,rv1126-mipi-dsi

    then:
      properties:
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml
@ -31,6 +31,7 @@ properties:
      - rockchip,rk3368-vop
      - rockchip,rk3399-vop-big
      - rockchip,rk3399-vop-lit
+      - rockchip,rv1126-vop

  reg:
    minItems: 1
--- a/Documentation/devicetree/bindings/display/solomon,ssd-common.yaml
+++ b/Documentation/devicetree/bindings/display/solomon,ssd-common.yaml
@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/solomon,ssd-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common properties for Solomon OLED Display Controllers
+
+maintainers:
+  - Javier Martinez Canillas <javierm@redhat.com>
+
+properties:
+  reg:
+    maxItems: 1
+
+  reset-gpios:
+    maxItems: 1
+
+  # Only required for SPI
+  dc-gpios:
+    description:
+      GPIO connected to the controller's D/C# (Data/Command) pin,
+      that is needed for 4-wire SPI to tell the controller if the
+      data sent is for a command register or the display data RAM
+    maxItems: 1
+
+  solomon,height:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Height in pixel of the screen driven by the controller.
+      The default value is controller-dependent.
+
+  solomon,width:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Width in pixel of the screen driven by the controller.
+      The default value is controller-dependent.
+
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+additionalProperties: true
--- a/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml
+++ b/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml
@ -27,38 +27,12 @@ properties:
          - solomon,ssd1307
          - solomon,ssd1309

-  reg:
-    maxItems: 1
-
  pwms:
    maxItems: 1

-  reset-gpios:
-    maxItems: 1
-
-  # Only required for SPI
-  dc-gpios:
-    description:
-      GPIO connected to the controller's D/C# (Data/Command) pin,
-      that is needed for 4-wire SPI to tell the controller if the
-      data sent is for a command register or the display data RAM
-    maxItems: 1
-
  vbat-supply:
    description: The supply for VBAT

-  solomon,height:
-    $ref: /schemas/types.yaml#/definitions/uint32
-    description:
-      Height in pixel of the screen driven by the controller.
-      The default value is controller-dependent.
-
-  solomon,width:
-    $ref: /schemas/types.yaml#/definitions/uint32
-    description:
-      Width in pixel of the screen driven by the controller.
-      The default value is controller-dependent.
-
  solomon,page-offset:
    $ref: /schemas/types.yaml#/definitions/uint32
    default: 1
@ -148,7 +122,7 @@ required:
  - reg

 allOf:
-  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+  - $ref: solomon,ssd-common.yaml#

  - if:
      properties:
--- a/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml
+++ b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml
@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/solomon,ssd132x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Solomon SSD132x OLED Display Controllers
+
+maintainers:
+  - Javier Martinez Canillas <javierm@redhat.com>
+
+properties:
+  compatible:
+    - enum:
+        - solomon,ssd1322
+        - solomon,ssd1325
+        - solomon,ssd1327
+
+required:
+  - compatible
+  - reg
+
+allOf:
+  - $ref: solomon,ssd-common.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: solomon,ssd1322
+    then:
+      properties:
+        width:
+          default: 480
+        height:
+          default: 128
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: solomon,ssd1325
+    then:
+      properties:
+        width:
+          default: 128
+        height:
+          default: 80
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: solomon,ssd1327
+    then:
+      properties:
+        width:
+          default: 128
+        height:
+          default: 128
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            oled@3c {
+                    compatible = "solomon,ssd1327";
+                    reg = <0x3c>;
+                    reset-gpios = <&gpio2 7>;
+            };
+
+    };
+  - |
+    spi {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            oled@0 {
+                    compatible = "solomon,ssd1327";
+                    reg = <0x0>;
+                    reset-gpios = <&gpio2 7>;
+                    dc-gpios = <&gpio2 8>;
+                    spi-max-frequency = <10000000>;
+            };
+    };
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@ -1085,6 +1085,8 @@ patternProperties:
    description: Powertip Tech. Corp.
  "^powervr,.*":
    description: PowerVR (deprecated, use img)
+  "^powkiddy,.*":
+    description: Powkiddy
  "^primux,.*":
    description: Primux Trading, S.L.
  "^probox2,.*":
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@ -5,14 +5,30 @@ The dma-buf subsystem provides the framework for sharing buffers for
 hardware (DMA) access across multiple device drivers and subsystems, and
 for synchronizing asynchronous hardware access.

-This is used, for example, by drm "prime" multi-GPU support, but is of
-course not limited to GPU use cases.
+As an example, it is used extensively by the DRM subsystem to exchange
+buffers between processes, contexts, library APIs within the same
+process, and also to exchange buffers with other subsystems such as
+V4L2.
+
+This document describes the way in which kernel subsystems can use and
+interact with the three main primitives offered by dma-buf:
+
+ - dma-buf, representing a sg_table and exposed to userspace as a file
+   descriptor to allow passing between processes, subsystems, devices,
+   etc;
+ - dma-fence, providing a mechanism to signal when an asynchronous
+   hardware operation has completed; and
+ - dma-resv, which manages a set of dma-fences for a particular dma-buf
+   allowing implicit (kernel-ordered) synchronization of work to
+   preserve the illusion of coherent access
+
+
+Userspace API principles and use
+--------------------------------
+
+For more details on how to design your subsystem's API for dma-buf use, please
+see Documentation/userspace-api/dma-buf-alloc-exchange.rst.

-The three main components of this are: (1) dma-buf, representing a
-sg_table and exposed to userspace as a file descriptor to allow passing
-between devices, (2) fence, which provides a mechanism to signal when
-one device has finished access, and (3) reservation, which manages the
-shared or exclusive fence(s) associated with the buffer.

 Shared DMA Buffers
 ------------------
--- a/Documentation/gpu/amdgpu/driver-misc.rst
+++ b/Documentation/gpu/amdgpu/driver-misc.rst
@ -26,12 +26,30 @@ serial_number
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
   :doc: serial_number

+fru_id
+-------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+   :doc: fru_id
+
+manufacturer
+-------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+   :doc: manufacturer
+
 unique_id
 ---------

 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: unique_id

+board_info
+----------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+   :doc: board_info
+
 Accelerated Processing Units (APU) Info
 ---------------------------------------

--- a/Documentation/gpu/amdgpu/thermal.rst
+++ b/Documentation/gpu/amdgpu/thermal.rst
@ -64,6 +64,36 @@ gpu_metrics
 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: gpu_metrics

+fan_curve
+---------
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: fan_curve
+
+acoustic_limit_rpm_threshold
+----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: acoustic_limit_rpm_threshold
+
+acoustic_target_rpm_threshold
+-----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: acoustic_target_rpm_threshold
+
+fan_target_temperature
+----------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: fan_target_temperature
+
+fan_minimum_pwm
+---------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: fan_minimum_pwm
+
 GFXOFF
 ======

--- a/Documentation/gpu/automated_testing.rst
+++ b/Documentation/gpu/automated_testing.rst
@ -67,6 +67,19 @@ Lists the tests that for a given driver on a specific hardware revision are
 known to behave unreliably. These tests won't cause a job to fail regardless of
 the result. They will still be run.

+Each new flake entry must be associated with a link to the email reporting the
+bug to the author of the affected driver, the board name or Device Tree name of
+the board, the first kernel version affected, and an approximation of the
+failure rate.
+
+They should be provided under the following format::
+
+  # Bug Report: $LORE_OR_PATCHWORK_URL
+  # Board Name: broken-board.dtb
+  # Version: 6.6-rc1
+  # Failure Rate: 100
+  flaky-test
+
 drivers/gpu/drm/ci/${DRIVER_NAME}-${HW_REVISION}-skips.txt
 -----------------------------------------------------------

@ -86,10 +99,13 @@ https://gitlab.freedesktop.org/janedoe/linux/-/settings/ci_cd), change the
 CI/CD configuration file from .gitlab-ci.yml to
 drivers/gpu/drm/ci/gitlab-ci.yml.

-3. Next time you push to this repository, you will see a CI pipeline being
+3. Request to be added to the drm/ci-ok group so that your user has the
+necessary privileges to run the CI on https://gitlab.freedesktop.org/drm/ci-ok
+
+4. Next time you push to this repository, you will see a CI pipeline being
 created (eg. https://gitlab.freedesktop.org/janedoe/linux/-/pipelines)

-4. The various jobs will be run and when the pipeline is finished, all jobs
+5. The various jobs will be run and when the pipeline is finished, all jobs
 should be green unless a regression has been found.


--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@ -18,6 +18,7 @@ GPU Driver Documentation
   xen-front
   afbc
   komeda-kms
+   panfrost

 .. only::  subproject and html

--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@ -360,6 +360,8 @@ Format Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_fourcc.c
   :export:

+.. _kms_dumb_buffer_objects:
+
 Dumb Buffer Objects
 ===================

--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@ -466,40 +466,40 @@ DRM MM Range Allocator Function References
 .. kernel-doc:: drivers/gpu/drm/drm_mm.c
   :export:

-DRM GPU VA Manager
-==================
+DRM GPUVM
+=========

 Overview
 --------

-.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
   :doc: Overview

 Split and Merge
 ---------------

-.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
   :doc: Split and Merge

 Locking
 -------

-.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
   :doc: Locking

 Examples
 --------

-.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
   :doc: Examples

-DRM GPU VA Manager Function References
--------------------------------------
+DRM GPUVM Function References
+-----------------------------

-.. kernel-doc:: include/drm/drm_gpuva_mgr.h
+.. kernel-doc:: include/drm/drm_gpuvm.h
   :internal:

-.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
   :export:

 DRM Buddy Allocator
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@ -285,6 +285,83 @@ for GPU1 and GPU2 from different vendors, and a third handler for
 mmapped regular files. Threads cause additional pain with signal
 handling as well.

+Device reset
+============
+
+The GPU stack is really complex and is prone to errors, from hardware bugs,
+faulty applications and everything in between the many layers. Some errors
+require resetting the device in order to make the device usable again. This
+section describes the expectations for DRM and usermode drivers when a
+device resets and how to propagate the reset status.
+
+Device resets can not be disabled without tainting the kernel, which can lead to
+hanging the entire kernel through shrinkers/mmu_notifiers. Userspace role in
+device resets is to propagate the message to the application and apply any
+special policy for blocking guilty applications, if any. Corollary is that
+debugging a hung GPU context require hardware support to be able to preempt such
+a GPU context while it's stopped.
+
+Kernel Mode Driver
+------------------
+
+The KMD is responsible for checking if the device needs a reset, and to perform
+it as needed. Usually a hang is detected when a job gets stuck executing. KMD
+should keep track of resets, because userspace can query any time about the
+reset status for a specific context. This is needed to propagate to the rest of
+the stack that a reset has happened. Currently, this is implemented by each
+driver separately, with no common DRM interface. Ideally this should be properly
+integrated at DRM scheduler to provide a common ground for all drivers. After a
+reset, KMD should reject new command submissions for affected contexts.
+
+User Mode Driver
+----------------
+
+After command submission, UMD should check if the submission was accepted or
+rejected. After a reset, KMD should reject submissions, and UMD can issue an
+ioctl to the KMD to check the reset status, and this can be checked more often
+if the UMD requires it. After detecting a reset, UMD will then proceed to report
+it to the application using the appropriate API error code, as explained in the
+section below about robustness.
+
+Robustness
+----------
+
+The only way to try to keep a graphical API context working after a reset is if
+it complies with the robustness aspects of the graphical API that it is using.
+
+Graphical APIs provide ways to applications to deal with device resets. However,
+there is no guarantee that the app will use such features correctly, and a
+userspace that doesn't support robust interfaces (like a non-robust
+OpenGL context or API without any robustness support like libva) leave the
+robustness handling entirely to the userspace driver. There is no strong
+community consensus on what the userspace driver should do in that case,
+since all reasonable approaches have some clear downsides.
+
+OpenGL
+~~~~~~
+
+Apps using OpenGL should use the available robust interfaces, like the
+extension ``GL_ARB_robustness`` (or ``GL_EXT_robustness`` for OpenGL ES). This
+interface tells if a reset has happened, and if so, all the context state is
+considered lost and the app proceeds by creating new ones. There's no consensus
+on what to do to if robustness is not in use.
+
+Vulkan
+~~~~~~
+
+Apps using Vulkan should check for ``VK_ERROR_DEVICE_LOST`` for submissions.
+This error code means, among other things, that a device reset has happened and
+it needs to recreate the contexts to keep going.
+
+Reporting causes of resets
+--------------------------
+
+Apart from propagating the reset through the stack so apps can recover, it's
+really useful for driver developers to learn more about what caused the reset in
+the first place. DRM devices should make use of devcoredump to store relevant
+information about the reset, so this information can be added to user bug
+reports.
+
 .. _drm_driver_ioctl:

 IOCTL Support on Device Nodes
@ -450,12 +527,12 @@ VBlank event handling

 The DRM core exposes two vertical blank related ioctls:

-DRM_IOCTL_WAIT_VBLANK
+:c:macro:`DRM_IOCTL_WAIT_VBLANK`
    This takes a struct drm_wait_vblank structure as its argument, and
    it is used to block or request a signal when a specified vblank
    event occurs.

-DRM_IOCTL_MODESET_CTL
+:c:macro:`DRM_IOCTL_MODESET_CTL`
    This was only used for user-mode-settind drivers around modesetting
    changes to allow the kernel to update the vblank interrupt after
    mode setting, since on many devices the vertical blank counter is
@ -478,11 +555,18 @@ The index is used in cases where a densely packed identifier for a CRTC is
 needed, for instance a bitmask of CRTC's. The member possible_crtcs of struct
 drm_mode_get_plane is an example.

-DRM_IOCTL_MODE_GETRESOURCES populates a structure with an array of CRTC ID's,
-and the CRTC index is its position in this array.
+:c:macro:`DRM_IOCTL_MODE_GETRESOURCES` populates a structure with an array of
+CRTC ID's, and the CRTC index is its position in this array.

 .. kernel-doc:: include/uapi/drm/drm.h
   :internal:

 .. kernel-doc:: include/uapi/drm/drm_mode.h
   :internal:
+
+
+dma-buf interoperability
+========================
+
+Please see Documentation/userspace-api/dma-buf-alloc-exchange.rst for
+information on how dma-buf is integrated and exposed within DRM.
--- a/Documentation/gpu/drm-usage-stats.rst
+++ b/Documentation/gpu/drm-usage-stats.rst
@ -169,3 +169,4 @@ Driver specific implementations
 -------------------------------

 :ref:`i915-usage-stats`
+:ref:`panfrost-usage-stats`
--- a/Documentation/gpu/drm-vm-bind-async.rst
+++ b/Documentation/gpu/drm-vm-bind-async.rst
@ -0,0 +1,309 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+====================
+Asynchronous VM_BIND
+====================
+
+Nomenclature:
+=============
+
+* ``VRAM``: On-device memory. Sometimes referred to as device local memory.
+
+* ``gpu_vm``: A virtual GPU address space. Typically per process, but
+  can be shared by multiple processes.
+
+* ``VM_BIND``: An operation or a list of operations to modify a gpu_vm using
+  an IOCTL. The operations include mapping and unmapping system- or
+  VRAM memory.
+
+* ``syncobj``: A container that abstracts synchronization objects. The
+  synchronization objects can be either generic, like dma-fences or
+  driver specific. A syncobj typically indicates the type of the
+  underlying synchronization object.
+
+* ``in-syncobj``: Argument to a VM_BIND IOCTL, the VM_BIND operation waits
+  for these before starting.
+
+* ``out-syncobj``: Argument to a VM_BIND_IOCTL, the VM_BIND operation
+  signals these when the bind operation is complete.
+
+* ``dma-fence``: A cross-driver synchronization object. A basic
+  understanding of dma-fences is required to digest this
+  document. Please refer to the ``DMA Fences`` section of the
+  :doc:`dma-buf doc </driver-api/dma-buf>`.
+
+* ``memory fence``: A synchronization object, different from a dma-fence.
+  A memory fence uses the value of a specified memory location to determine
+  signaled status. A memory fence can be awaited and signaled by both
+  the GPU and CPU. Memory fences are sometimes referred to as
+  user-fences, userspace-fences or gpu futexes and do not necessarily obey
+  the dma-fence rule of signaling within a "reasonable amount of time".
+  The kernel should thus avoid waiting for memory fences with locks held.
+
+* ``long-running workload``: A workload that may take more than the
+  current stipulated dma-fence maximum signal delay to complete and
+  which therefore needs to set the gpu_vm or the GPU execution context in
+  a certain mode that disallows completion dma-fences.
+
+* ``exec function``: An exec function is a function that revalidates all
+  affected gpu_vmas, submits a GPU command batch and registers the
+  dma_fence representing the GPU command's activity with all affected
+  dma_resvs. For completeness, although not covered by this document,
+  it's worth mentioning that an exec function may also be the
+  revalidation worker that is used by some drivers in compute /
+  long-running mode.
+
+* ``bind context``: A context identifier used for the VM_BIND
+  operation. VM_BIND operations that use the same bind context can be
+  assumed, where it matters, to complete in order of submission. No such
+  assumptions can be made for VM_BIND operations using separate bind contexts.
+
+* ``UMD``: User-mode driver.
+
+* ``KMD``: Kernel-mode driver.
+
+
+Synchronous / Asynchronous VM_BIND operation
+============================================
+
+Synchronous VM_BIND
+___________________
+With Synchronous VM_BIND, the VM_BIND operations all complete before the
+IOCTL returns. A synchronous VM_BIND takes neither in-fences nor
+out-fences. Synchronous VM_BIND may block and wait for GPU operations;
+for example swap-in or clearing, or even previous binds.
+
+Asynchronous VM_BIND
+____________________
+Asynchronous VM_BIND accepts both in-syncobjs and out-syncobjs. While the
+IOCTL may return immediately, the VM_BIND operations wait for the in-syncobjs
+before modifying the GPU page-tables, and signal the out-syncobjs when
+the modification is done in the sense that the next exec function that
+awaits for the out-syncobjs will see the change. Errors are reported
+synchronously.
+In low-memory situations the implementation may block, performing the
+VM_BIND synchronously, because there might not be enough memory
+immediately available for preparing the asynchronous operation.
+
+If the VM_BIND IOCTL takes a list or an array of operations as an argument,
+the in-syncobjs needs to signal before the first operation starts to
+execute, and the out-syncobjs signal after the last operation
+completes. Operations in the operation list can be assumed, where it
+matters, to complete in order.
+
+Since asynchronous VM_BIND operations may use dma-fences embedded in
+out-syncobjs and internally in KMD to signal bind completion,  any
+memory fences given as VM_BIND in-fences need to be awaited
+synchronously before the VM_BIND ioctl returns, since dma-fences,
+required to signal in a reasonable amount of time, can never be made
+to depend on memory fences that don't have such a restriction.
+
+The purpose of an Asynchronous VM_BIND operation is for user-mode
+drivers to be able to pipeline interleaved gpu_vm modifications and
+exec functions. For long-running workloads, such pipelining of a bind
+operation is not allowed and any in-fences need to be awaited
+synchronously. The reason for this is twofold. First, any memory
+fences gated by a long-running workload and used as in-syncobjs for the
+VM_BIND operation will need to be awaited synchronously anyway (see
+above). Second, any dma-fences used as in-syncobjs for VM_BIND
+operations for long-running workloads will not allow for pipelining
+anyway since long-running workloads don't allow for dma-fences as
+out-syncobjs, so while theoretically possible the use of them is
+questionable and should be rejected until there is a valuable use-case.
+Note that this is not a limitation imposed by dma-fence rules, but
+rather a limitation imposed to keep KMD implementation simple. It does
+not affect using dma-fences as dependencies for the long-running
+workload itself, which is allowed by dma-fence rules, but rather for
+the VM_BIND operation only.
+
+An asynchronous VM_BIND operation may take substantial time to
+complete and signal the out_fence. In particular if the operation is
+deeply pipelined behind other VM_BIND operations and workloads
+submitted using exec functions. In that case, UMD might want to avoid a
+subsequent VM_BIND operation to be queued behind the first one if
+there are no explicit dependencies. In order to circumvent such a queue-up, a
+VM_BIND implementation may allow for VM_BIND contexts to be
+created. For each context, VM_BIND operations will be guaranteed to
+complete in the order they were submitted, but that is not the case
+for VM_BIND operations executing on separate VM_BIND contexts. Instead
+KMD will attempt to execute such VM_BIND operations in parallel but
+leaving no guarantee that they will actually be executed in
+parallel. There may be internal implicit dependencies that only KMD knows
+about, for example page-table structure changes. A way to attempt
+to avoid such internal dependencies is to have different VM_BIND
+contexts use separate regions of a VM.
+
+Also for VM_BINDS for long-running gpu_vms the user-mode driver should typically
+select memory fences as out-fences since that gives greater flexibility for
+the kernel mode driver to inject other operations into the bind /
+unbind operations. Like for example inserting breakpoints into batch
+buffers. The workload execution can then easily be pipelined behind
+the bind completion using the memory out-fence as the signal condition
+for a GPU semaphore embedded by UMD in the workload.
+
+There is no difference in the operations supported or in
+multi-operation support between asynchronous VM_BIND and synchronous VM_BIND.
+
+Multi-operation VM_BIND IOCTL error handling and interrupts
+===========================================================
+
+The VM_BIND operations of the IOCTL may error for various reasons, for
+example due to lack of resources to complete and due to interrupted
+waits.
+In these situations UMD should preferably restart the IOCTL after
+taking suitable action.
+If UMD has over-committed a memory resource, an -ENOSPC error will be
+returned, and UMD may then unbind resources that are not used at the
+moment and rerun the IOCTL. On -EINTR, UMD should simply rerun the
+IOCTL and on -ENOMEM user-space may either attempt to free known
+system memory resources or fail. In case of UMD deciding to fail a
+bind operation, due to an error return, no additional action is needed
+to clean up the failed operation, and the VM is left in the same state
+as it was before the failing IOCTL.
+Unbind operations are guaranteed not to return any errors due to
+resource constraints, but may return errors due to, for example,
+invalid arguments or the gpu_vm being banned.
+In the case an unexpected error happens during the asynchronous bind
+process, the gpu_vm will be banned, and attempts to use it after banning
+will return -ENOENT.
+
+Example: The Xe VM_BIND uAPI
+============================
+
+Starting with the VM_BIND operation struct, the IOCTL call can take
+zero, one or many such operations. A zero number means only the
+synchronization part of the IOCTL is carried out: an asynchronous
+VM_BIND updates the syncobjects, whereas a sync VM_BIND waits for the
+implicit dependencies to be fulfilled.
+
+.. code-block:: c
+
+   struct drm_xe_vm_bind_op {
+	/**
+	 * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
+	 */
+	__u32 obj;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	union {
+		/**
+		 * @obj_offset: Offset into the object for MAP.
+		 */
+		__u64 obj_offset;
+
+		/** @userptr: user virtual address for MAP_USERPTR */
+		__u64 userptr;
+	};
+
+	/**
+	 * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL
+	 */
+	__u64 range;
+
+	/** @addr: Address to operate on, MBZ for UNMAP_ALL */
+	__u64 addr;
+
+	/**
+	 * @tile_mask: Mask for which tiles to create binds for, 0 == All tiles,
+	 * only applies to creating new VMAs
+	 */
+	__u64 tile_mask;
+
+       /* Map (parts of) an object into the GPU virtual address range.
+    #define XE_VM_BIND_OP_MAP		0x0
+        /* Unmap a GPU virtual address range */
+    #define XE_VM_BIND_OP_UNMAP		0x1
+        /*
+	 * Map a CPU virtual address range into a GPU virtual
+	 * address range.
+	 */
+    #define XE_VM_BIND_OP_MAP_USERPTR	0x2
+        /* Unmap a gem object from the VM. */
+    #define XE_VM_BIND_OP_UNMAP_ALL	0x3
+        /*
+	 * Make the backing memory of an address range resident if
+	 * possible. Note that this doesn't pin backing memory.
+	 */
+    #define XE_VM_BIND_OP_PREFETCH	0x4
+
+        /* Make the GPU map readonly. */
+    #define XE_VM_BIND_FLAG_READONLY	(0x1 << 16)
+	/*
+	 * Valid on a faulting VM only, do the MAP operation immediately rather
+	 * than deferring the MAP to the page fault handler.
+	 */
+    #define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 17)
+	/*
+	 * When the NULL flag is set, the page tables are setup with a special
+	 * bit which indicates writes are dropped and all reads return zero.  In
+	 * the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP
+	 * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
+	 * intended to implement VK sparse bindings.
+	 */
+    #define XE_VM_BIND_FLAG_NULL	(0x1 << 18)
+	/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
+	__u32 op;
+
+	/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
+	__u32 region;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+   };
+
+
+The VM_BIND IOCTL argument itself, looks like follows. Note that for
+synchronous VM_BIND, the num_syncs and syncs fields must be zero. Here
+the ``exec_queue_id`` field is the VM_BIND context discussed previously
+that is used to facilitate out-of-order VM_BINDs.
+
+.. code-block:: c
+
+    struct drm_xe_vm_bind {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @vm_id: The ID of the VM to bind to */
+	__u32 vm_id;
+
+	/**
+	 * @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
+	 * and exec queue must have same vm_id. If zero, the default VM bind engine
+	 * is used.
+	 */
+	__u32 exec_queue_id;
+
+	/** @num_binds: number of binds in this IOCTL */
+	__u32 num_binds;
+
+        /* If set, perform an async VM_BIND, if clear a sync VM_BIND */
+    #define XE_VM_BIND_IOCTL_FLAG_ASYNC	(0x1 << 0)
+
+	/** @flag: Flags controlling all operations in this ioctl. */
+	__u32 flags;
+
+	union {
+		/** @bind: used if num_binds == 1 */
+		struct drm_xe_vm_bind_op bind;
+
+		/**
+		 * @vector_of_binds: userptr to array of struct
+		 * drm_xe_vm_bind_op if num_binds > 1
+		 */
+		__u64 vector_of_binds;
+	};
+
+	/** @num_syncs: amount of syncs to wait for or to signal on completion. */
+	__u32 num_syncs;
+
+	/** @pad2: MBZ */
+	__u32 pad2;
+
+	/** @syncs: pointer to struct drm_xe_sync array */
+	__u64 syncs;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+    };
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@ -267,19 +267,22 @@ i915 driver.
 Intel GPU Basics
 ----------------

-An Intel GPU has multiple engines. There are several engine types.
+An Intel GPU has multiple engines. There are several engine types:

- RCS engine is for rendering 3D and performing compute, this is named
-  `I915_EXEC_RENDER` in user space.
- BCS is a blitting (copy) engine, this is named `I915_EXEC_BLT` in user
-  space.
- VCS is a video encode and decode engine, this is named `I915_EXEC_BSD`
-  in user space
- VECS is video enhancement engine, this is named `I915_EXEC_VEBOX` in user
-  space.
- The enumeration `I915_EXEC_DEFAULT` does not refer to specific engine;
-  instead it is to be used by user space to specify a default rendering
-  engine (for 3D) that may or may not be the same as RCS.
+- Render Command Streamer (RCS). An engine for rendering 3D and
+  performing compute.
+- Blitting Command Streamer (BCS). An engine for performing blitting and/or
+  copying operations.
+- Video Command Streamer. An engine used for video encoding and decoding. Also
+  sometimes called 'BSD' in hardware documentation.
+- Video Enhancement Command Streamer (VECS). An engine for video enhancement.
+  Also sometimes called 'VEBOX' in hardware documentation.
+- Compute Command Streamer (CCS). An engine that has access to the media and
+  GPGPU pipelines, but not the 3D pipeline.
+- Graphics Security Controller (GSCCS). A dedicated engine for internal
+  communication with GSC controller on security related tasks like
+  High-bandwidth Digital Content Protection (HDCP), Protected Xe Path (PXP),
+  and HuC firmware authentication.

 The Intel GPU family is a family of integrated GPU's using Unified
 Memory Access. For having the GPU "do work", user space will feed the
--- a/Documentation/gpu/implementation_guidelines.rst
+++ b/Documentation/gpu/implementation_guidelines.rst
@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+===========================================================
+Misc DRM driver uAPI- and feature implementation guidelines
+===========================================================
+
+.. toctree::
+
+   drm-vm-bind-async
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@ -18,6 +18,7 @@ GPU Driver Developer's Guide
   vga-switcheroo
   vgaarbiter
   automated_testing
+   implementation_guidelines
   todo
   rfc/index

--- a/Documentation/gpu/panfrost.rst
+++ b/Documentation/gpu/panfrost.rst
@ -0,0 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+=========================
+ drm/Panfrost Mali Driver
+=========================
+
+.. _panfrost-usage-stats:
+
+Panfrost DRM client usage stats implementation
+==============================================
+
+The drm/Panfrost driver implements the DRM client usage stats specification as
+documented in :ref:`drm-client-usage-stats`.
+
+Example of the output showing the implemented key value pairs and entirety of
+the currently possible format options:
+
+::
+      pos:    0
+      flags:  02400002
+      mnt_id: 27
+      ino:    531
+      drm-driver:     panfrost
+      drm-client-id:  14
+      drm-engine-fragment:    1846584880 ns
+      drm-cycles-fragment:    1424359409
+      drm-maxfreq-fragment:   799999987 Hz
+      drm-curfreq-fragment:   799999987 Hz
+      drm-engine-vertex-tiler:        71932239 ns
+      drm-cycles-vertex-tiler:        52617357
+      drm-maxfreq-vertex-tiler:       799999987 Hz
+      drm-curfreq-vertex-tiler:       799999987 Hz
+      drm-total-memory:       290 MiB
+      drm-shared-memory:      0 MiB
+      drm-active-memory:      226 MiB
+      drm-resident-memory:    36496 KiB
+      drm-purgeable-memory:   128 KiB
+
+Possible `drm-engine-` key names are: `fragment`, and  `vertex-tiler`.
+`drm-curfreq-` values convey the current operating frequency for that engine.
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@ -67,14 +67,8 @@ platforms.

 When the time comes for Xe, the protection will be lifted on Xe and kept in i915.

-Xe driver will be protected with both STAGING Kconfig and force_probe. Changes in
-the uAPI are expected while the driver is behind these protections. STAGING will
-be removed when the driver uAPI gets to a mature state where we can guarantee the
-‘no regression’ rule. Then force_probe will be lifted only for future platforms
-that will be productized with Xe driver, but not with i915.
-
-Xe – Pre-Merge Goals
-====================
+Xe – Pre-Merge Goals - Work-in-Progress
+=======================================

 Drm_scheduler
 -------------
@ -94,41 +88,6 @@ depend on any other patch touching drm_scheduler itself that was not yet merged
 through drm-misc. This, by itself, already includes the reach of an agreement for
 uniform 1 to 1 relationship implementation / usage across drivers.

-GPU VA
------
-Two main goals of Xe are meeting together here:
-
-1) Have an uAPI that aligns with modern UMD needs.
-
-2) Early upstream engagement.
-
-RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
-track of GPU virtual address mappings. This is still not merged upstream, but
-this aligns very well with our goals and with our VM_BIND. The engagement with
-upstream and the port of Xe towards GPUVA is already ongoing.
-
-As a key measurable result, Xe needs to be aligned with the GPU VA and working in
-our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
-related patch should be independent and present on dri-devel or acked by
-maintainers to go along with the first Xe pull request towards drm-next.
-
-DRM_VM_BIND
-----------
-Nouveau, and Xe are all implementing ‘VM_BIND’ and new ‘Exec’ uAPIs in order to
-fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
-development of a common new drm_infrastructure. However, the Xe team needs to
-engage with the community to explore the options of a common API.
-
-As a key measurable result, the DRM_VM_BIND needs to be documented in this file
-below, or this entire block deleted if the consensus is for independent drivers
-vm_bind ioctls.
-
-Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
-Xe merged, it is mandatory to enforce the overall locking scheme for all major
-structs and list (so vm and vma). So, a consensus is needed, and possibly some
-common helpers. If helpers are needed, they should be also documented in this
-document.
-
 ASYNC VM_BIND
 -------------
 Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
@ -138,8 +97,8 @@ memory fences. Ideally with helper support so people don't get it wrong in all
 possible ways.

 As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
-various flavors, error handling and a sample API should be documented here or in
-a separate document pointed to by this document.
+various flavors, error handling and sample API suggestions are documented in
+:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.

 Userptr integration and vm_bind
 -------------------------------
@ -212,6 +171,14 @@ This item ties into the GPUVA, VM_BIND, and even long-running compute support.
 As a key measurable result, we need to have a community consensus documented in
 this document and the Xe driver prepared for the changes, if necessary.

+Xe – uAPI high level overview
+=============================
+
+...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
+
+Xe – Pre-Merge Goals - Completed
+================================
+
 Dev_coredump
 ------------

@ -229,7 +196,37 @@ infrastructure with overall possible improvements, like multiple file support
 for better organization of the dumps, snapshot support, dmesg extra print,
 and whatever may make sense and help the overall infrastructure.

-Xe – uAPI high level overview
-=============================
+DRM_VM_BIND
+-----------
+Nouveau, and Xe are all implementing ‘VM_BIND’ and new ‘Exec’ uAPIs in order to
+fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
+development of a common new drm_infrastructure. However, the Xe team needs to
+engage with the community to explore the options of a common API.

-...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
+As a key measurable result, the DRM_VM_BIND needs to be documented in this file
+below, or this entire block deleted if the consensus is for independent drivers
+vm_bind ioctls.
+
+Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
+Xe merged, it is mandatory to enforce the overall locking scheme for all major
+structs and list (so vm and vma). So, a consensus is needed, and possibly some
+common helpers. If helpers are needed, they should be also documented in this
+document.
+
+GPU VA
+------
+Two main goals of Xe are meeting together here:
+
+1) Have an uAPI that aligns with modern UMD needs.
+
+2) Early upstream engagement.
+
+RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
+track of GPU virtual address mappings. This is still not merged upstream, but
+this aligns very well with our goals and with our VM_BIND. The engagement with
+upstream and the port of Xe towards GPUVA is already ongoing.
+
+As a key measurable result, Xe needs to be aligned with the GPU VA and working in
+our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
+related patch should be independent and present on dri-devel or acked by
+maintainers to go along with the first Xe pull request towards drm-next.
--- a/Documentation/userspace-api/dma-buf-alloc-exchange.rst
+++ b/Documentation/userspace-api/dma-buf-alloc-exchange.rst
@ -0,0 +1,389 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright 2021-2023 Collabora Ltd.
+
+========================
+Exchanging pixel buffers
+========================
+
+As originally designed, the Linux graphics subsystem had extremely limited
+support for sharing pixel-buffer allocations between processes, devices, and
+subsystems. Modern systems require extensive integration between all three
+classes; this document details how applications and kernel subsystems should
+approach this sharing for two-dimensional image data.
+
+It is written with reference to the DRM subsystem for GPU and display devices,
+V4L2 for media devices, and also to Vulkan, EGL and Wayland, for userspace
+support, however any other subsystems should also follow this design and advice.
+
+
+Glossary of terms
+=================
+
+.. glossary::
+
+    image:
+      Conceptually a two-dimensional array of pixels. The pixels may be stored
+      in one or more memory buffers. Has width and height in pixels, pixel
+      format and modifier (implicit or explicit).
+
+    row:
+      A span along a single y-axis value, e.g. from co-ordinates (0,100) to
+      (200,100).
+
+    scanline:
+      Synonym for row.
+
+    column:
+      A span along a single x-axis value, e.g. from co-ordinates (100,0) to
+      (100,100).
+
+    memory buffer:
+      A piece of memory for storing (parts of) pixel data. Has stride and size
+      in bytes and at least one handle in some API. May contain one or more
+      planes.
+
+    plane:
+      A two-dimensional array of some or all of an image's color and alpha
+      channel values.
+
+    pixel:
+      A picture element. Has a single color value which is defined by one or
+      more color channels values, e.g. R, G and B, or Y, Cb and Cr. May also
+      have an alpha value as an additional channel.
+
+    pixel data:
+      Bytes or bits that represent some or all of the color/alpha channel values
+      of a pixel or an image. The data for one pixel may be spread over several
+      planes or memory buffers depending on format and modifier.
+
+    color value:
+      A tuple of numbers, representing a color. Each element in the tuple is a
+      color channel value.
+
+    color channel:
+      One of the dimensions in a color model. For example, RGB model has
+      channels R, G, and B. Alpha channel is sometimes counted as a color
+      channel as well.
+
+    pixel format:
+      A description of how pixel data represents the pixel's color and alpha
+      values.
+
+    modifier:
+      A description of how pixel data is laid out in memory buffers.
+
+    alpha:
+      A value that denotes the color coverage in a pixel. Sometimes used for
+      translucency instead.
+
+    stride:
+      A value that denotes the relationship between pixel-location co-ordinates
+      and byte-offset values. Typically used as the byte offset between two
+      pixels at the start of vertically-consecutive tiling blocks. For linear
+      layouts, the byte offset between two vertically-adjacent pixels. For
+      non-linear formats the stride must be computed in a consistent way, which
+      usually is done as-if the layout was linear.
+
+    pitch:
+      Synonym for stride.
+
+
+Formats and modifiers
+=====================
+
+Each buffer must have an underlying format. This format describes the color
+values provided for each pixel. Although each subsystem has its own format
+descriptions (e.g. V4L2 and fbdev), the ``DRM_FORMAT_*`` tokens should be reused
+wherever possible, as they are the standard descriptions used for interchange.
+These tokens are described in the ``drm_fourcc.h`` file, which is a part of
+DRM's uAPI.
+
+Each ``DRM_FORMAT_*`` token describes the translation between a pixel
+co-ordinate in an image, and the color values for that pixel contained within
+its memory buffers. The number and type of color channels are described:
+whether they are RGB or YUV, integer or floating-point, the size of each channel
+and their locations within the pixel memory, and the relationship between color
+planes.
+
+For example, ``DRM_FORMAT_ARGB8888`` describes a format in which each pixel has
+a single 32-bit value in memory. Alpha, red, green, and blue, color channels are
+available at 8-bit precision per channel, ordered respectively from most to
+least significant bits in little-endian storage. ``DRM_FORMAT_*`` is not
+affected by either CPU or device endianness; the byte pattern in memory is
+always as described in the format definition, which is usually little-endian.
+
+As a more complex example, ``DRM_FORMAT_NV12`` describes a format in which luma
+and chroma YUV samples are stored in separate planes, where the chroma plane is
+stored at half the resolution in both dimensions (i.e. one U/V chroma
+sample is stored for each 2x2 pixel grouping).
+
+Format modifiers describe a translation mechanism between these per-pixel memory
+samples, and the actual memory storage for the buffer. The most straightforward
+modifier is ``DRM_FORMAT_MOD_LINEAR``, describing a scheme in which each plane
+is laid out row-sequentially, from the top-left to the bottom-right corner.
+This is considered the baseline interchange format, and most convenient for CPU
+access.
+
+Modern hardware employs much more sophisticated access mechanisms, typically
+making use of tiled access and possibly also compression. For example, the
+``DRM_FORMAT_MOD_VIVANTE_TILED`` modifier describes memory storage where pixels
+are stored in 4x4 blocks arranged in row-major ordering, i.e. the first tile in
+a plane stores pixels (0,0) to (3,3) inclusive, and the second tile in a plane
+stores pixels (4,0) to (7,3) inclusive.
+
+Some modifiers may modify the number of planes required for an image; for
+example, the ``I915_FORMAT_MOD_Y_TILED_CCS`` modifier adds a second plane to RGB
+formats in which it stores data about the status of every tile, notably
+including whether the tile is fully populated with pixel data, or can be
+expanded from a single solid color.
+
+These extended layouts are highly vendor-specific, and even specific to
+particular generations or configurations of devices per-vendor. For this reason,
+support of modifiers must be explicitly enumerated and negotiated by all users
+in order to ensure a compatible and optimal pipeline, as discussed below.
+
+
+Dimensions and size
+===================
+
+Each pixel buffer must be accompanied by logical pixel dimensions. This refers
+to the number of unique samples which can be extracted from, or stored to, the
+underlying memory storage. For example, even though a 1920x1080
+``DRM_FORMAT_NV12`` buffer has a luma plane containing 1920x1080 samples for the Y
+component, and 960x540 samples for the U and V components, the overall buffer is
+still described as having dimensions of 1920x1080.
+
+The in-memory storage of a buffer is not guaranteed to begin immediately at the
+base address of the underlying memory, nor is it guaranteed that the memory
+storage is tightly clipped to either dimension.
+
+Each plane must therefore be described with an ``offset`` in bytes, which will be
+added to the base address of the memory storage before performing any per-pixel
+calculations. This may be used to combine multiple planes into a single memory
+buffer; for example, ``DRM_FORMAT_NV12`` may be stored in a single memory buffer
+where the luma plane's storage begins immediately at the start of the buffer
+with an offset of 0, and the chroma plane's storage follows within the same buffer
+beginning from the byte offset for that plane.
+
+Each plane must also have a ``stride`` in bytes, expressing the offset in memory
+between two contiguous row. For example, a ``DRM_FORMAT_MOD_LINEAR`` buffer
+with dimensions of 1000x1000 may have been allocated as if it were 1024x1000, in
+order to allow for aligned access patterns. In this case, the buffer will still
+be described with a width of 1000, however the stride will be ``1024 * bpp``,
+indicating that there are 24 pixels at the positive extreme of the x axis whose
+values are not significant.
+
+Buffers may also be padded further in the y dimension, simply by allocating a
+larger area than would ordinarily be required. For example, many media decoders
+are not able to natively output buffers of height 1080, but instead require an
+effective height of 1088 pixels. In this case, the buffer continues to be
+described as having a height of 1080, with the memory allocation for each buffer
+being increased to account for the extra padding.
+
+
+Enumeration
+===========
+
+Every user of pixel buffers must be able to enumerate a set of supported formats
+and modifiers, described together. Within KMS, this is achieved with the
+``IN_FORMATS`` property on each DRM plane, listing the supported DRM formats, and
+the modifiers supported for each format. In userspace, this is supported through
+the `EGL_EXT_image_dma_buf_import_modifiers`_ extension entrypoints for EGL, the
+`VK_EXT_image_drm_format_modifier`_ extension for Vulkan, and the
+`zwp_linux_dmabuf_v1`_ extension for Wayland.
+
+Each of these interfaces allows users to query a set of supported
+format+modifier combinations.
+
+
+Negotiation
+===========
+
+It is the responsibility of userspace to negotiate an acceptable format+modifier
+combination for its usage. This is performed through a simple intersection of
+lists. For example, if a user wants to use Vulkan to render an image to be
+displayed on a KMS plane, it must:
+
+ - query KMS for the ``IN_FORMATS`` property for the given plane
+ - query Vulkan for the supported formats for its physical device, making sure
+   to pass the ``VkImageUsageFlagBits`` and ``VkImageCreateFlagBits``
+   corresponding to the intended rendering use
+ - intersect these formats to determine the most appropriate one
+ - for this format, intersect the lists of supported modifiers for both KMS and
+   Vulkan, to obtain a final list of acceptable modifiers for that format
+
+This intersection must be performed for all usages. For example, if the user
+also wishes to encode the image to a video stream, it must query the media API
+it intends to use for encoding for the set of modifiers it supports, and
+additionally intersect against this list.
+
+If the intersection of all lists is an empty list, it is not possible to share
+buffers in this way, and an alternate strategy must be considered (e.g. using
+CPU access routines to copy data between the different uses, with the
+corresponding performance cost).
+
+The resulting modifier list is unsorted; the order is not significant.
+
+
+Allocation
+==========
+
+Once userspace has determined an appropriate format, and corresponding list of
+acceptable modifiers, it must allocate the buffer. As there is no universal
+buffer-allocation interface available at either kernel or userspace level, the
+client makes an arbitrary choice of allocation interface such as Vulkan, GBM, or
+a media API.
+
+Each allocation request must take, at a minimum: the pixel format, a list of
+acceptable modifiers, and the buffer's width and height. Each API may extend
+this set of properties in different ways, such as allowing allocation in more
+than two dimensions, intended usage patterns, etc.
+
+The component which allocates the buffer will make an arbitrary choice of what
+it considers the 'best' modifier within the acceptable list for the requested
+allocation, any padding required, and further properties of the underlying
+memory buffers such as whether they are stored in system or device-specific
+memory, whether or not they are physically contiguous, and their cache mode.
+These properties of the memory buffer are not visible to userspace, however the
+``dma-heaps`` API is an effort to address this.
+
+After allocation, the client must query the allocator to determine the actual
+modifier selected for the buffer, as well as the per-plane offset and stride.
+Allocators are not permitted to vary the format in use, to select a modifier not
+provided within the acceptable list, nor to vary the pixel dimensions other than
+the padding expressed through offset, stride, and size.
+
+Communicating additional constraints, such as alignment of stride or offset,
+placement within a particular memory area, etc, is out of scope of dma-buf,
+and is not solved by format and modifier tokens.
+
+
+Import
+======
+
+To use a buffer within a different context, device, or subsystem, the user
+passes these parameters (format, modifier, width, height, and per-plane offset
+and stride) to an importing API.
+
+Each memory buffer is referred to by a buffer handle, which may be unique or
+duplicated within an image. For example, a ``DRM_FORMAT_NV12`` buffer may have
+the luma and chroma buffers combined into a single memory buffer by use of the
+per-plane offset parameters, or they may be completely separate allocations in
+memory. For this reason, each import and allocation API must provide a separate
+handle for each plane.
+
+Each kernel subsystem has its own types and interfaces for buffer management.
+DRM uses GEM buffer objects (BOs), V4L2 has its own references, etc. These types
+are not portable between contexts, processes, devices, or subsystems.
+
+To address this, ``dma-buf`` handles are used as the universal interchange for
+buffers. Subsystem-specific operations are used to export native buffer handles
+to a ``dma-buf`` file descriptor, and to import those file descriptors into a
+native buffer handle. dma-buf file descriptors can be transferred between
+contexts, processes, devices, and subsystems.
+
+For example, a Wayland media player may use V4L2 to decode a video frame into a
+``DRM_FORMAT_NV12`` buffer. This will result in two memory planes (luma and
+chroma) being dequeued by the user from V4L2. These planes are then exported to
+one dma-buf file descriptor per plane, these descriptors are then sent along
+with the metadata (format, modifier, width, height, per-plane offset and stride)
+to the Wayland server. The Wayland server will then import these file
+descriptors as an EGLImage for use through EGL/OpenGL (ES), a VkImage for use
+through Vulkan, or a KMS framebuffer object; each of these import operations
+will take the same metadata and convert the dma-buf file descriptors into their
+native buffer handles.
+
+Having a non-empty intersection of supported modifiers does not guarantee that
+import will succeed into all consumers; they may have constraints beyond those
+implied by modifiers which must be satisfied.
+
+
+Implicit modifiers
+==================
+
+The concept of modifiers post-dates all of the subsystems mentioned above. As
+such, it has been retrofitted into all of these APIs, and in order to ensure
+backwards compatibility, support is needed for drivers and userspace which do
+not (yet) support modifiers.
+
+As an example, GBM is used to allocate buffers to be shared between EGL for
+rendering and KMS for display. It has two entrypoints for allocating buffers:
+``gbm_bo_create`` which only takes the format, width, height, and a usage token,
+and ``gbm_bo_create_with_modifiers`` which extends this with a list of modifiers.
+
+In the latter case, the allocation is as discussed above, being provided with a
+list of acceptable modifiers that the implementation can choose from (or fail if
+it is not possible to allocate within those constraints). In the former case
+where modifiers are not provided, the GBM implementation must make its own
+choice as to what is likely to be the 'best' layout. Such a choice is entirely
+implementation-specific: some will internally use tiled layouts which are not
+CPU-accessible if the implementation decides that is a good idea through
+whatever heuristic. It is the implementation's responsibility to ensure that
+this choice is appropriate.
+
+To support this case where the layout is not known because there is no awareness
+of modifiers, a special ``DRM_FORMAT_MOD_INVALID`` token has been defined. This
+pseudo-modifier declares that the layout is not known, and that the driver
+should use its own logic to determine what the underlying layout may be.
+
+.. note::
+
+  ``DRM_FORMAT_MOD_INVALID`` is a non-zero value. The modifier value zero is
+  ``DRM_FORMAT_MOD_LINEAR``, which is an explicit guarantee that the image
+  has the linear layout. Care and attention should be taken to ensure that
+  zero as a default value is not mixed up with either no modifier or the linear
+  modifier. Also note that in some APIs the invalid modifier value is specified
+  with an out-of-band flag, like in ``DRM_IOCTL_MODE_ADDFB2``.
+
+There are four cases where this token may be used:
+  - during enumeration, an interface may return ``DRM_FORMAT_MOD_INVALID``, either
+    as the sole member of a modifier list to declare that explicit modifiers are
+    not supported, or as part of a larger list to declare that implicit modifiers
+    may be used
+  - during allocation, a user may supply ``DRM_FORMAT_MOD_INVALID``, either as the
+    sole member of a modifier list (equivalent to not supplying a modifier list
+    at all) to declare that explicit modifiers are not supported and must not be
+    used, or as part of a larger list to declare that an allocation using implicit
+    modifiers is acceptable
+  - in a post-allocation query, an implementation may return
+    ``DRM_FORMAT_MOD_INVALID`` as the modifier of the allocated buffer to declare
+    that the underlying layout is implementation-defined and that an explicit
+    modifier description is not available; per the above rules, this may only be
+    returned when the user has included ``DRM_FORMAT_MOD_INVALID`` as part of the
+    list of acceptable modifiers, or not provided a list
+  - when importing a buffer, the user may supply ``DRM_FORMAT_MOD_INVALID`` as the
+    buffer modifier (or not supply a modifier) to indicate that the modifier is
+    unknown for whatever reason; this is only acceptable when the buffer has
+    not been allocated with an explicit modifier
+
+It follows from this that for any single buffer, the complete chain of operations
+formed by the producer and all the consumers must be either fully implicit or fully
+explicit. For example, if a user wishes to allocate a buffer for use between
+GPU, display, and media, but the media API does not support modifiers, then the
+user **must not** allocate the buffer with explicit modifiers and attempt to
+import the buffer into the media API with no modifier, but either perform the
+allocation using implicit modifiers, or allocate the buffer for media use
+separately and copy between the two buffers.
+
+As one exception to the above, allocations may be 'upgraded' from implicit
+to explicit modifiers. For example, if the buffer is allocated with
+``gbm_bo_create`` (taking no modifiers), the user may then query the modifier with
+``gbm_bo_get_modifier`` and then use this modifier as an explicit modifier token
+if a valid modifier is returned.
+
+When allocating buffers for exchange between different users and modifiers are
+not available, implementations are strongly encouraged to use
+``DRM_FORMAT_MOD_LINEAR`` for their allocation, as this is the universal baseline
+for exchange. However, it is not guaranteed that this will result in the correct
+interpretation of buffer content, as implicit modifier operation may still be
+subject to driver-specific heuristics.
+
+Any new users - userspace programs and protocols, kernel subsystems, etc -
+wishing to exchange buffers must offer interoperability through dma-buf file
+descriptors for memory planes, DRM format tokens to describe the format, DRM
+format modifiers to describe the layout in memory, at least width and height for
+dimensions, and at least offset and stride for each memory plane.
+
+.. _zwp_linux_dmabuf_v1: https://gitlab.freedesktop.org/wayland/wayland-protocols/-/blob/main/unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml
+.. _VK_EXT_image_drm_format_modifier: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_image_drm_format_modifier.html
+.. _EGL_EXT_image_dma_buf_import_modifiers: https://registry.khronos.org/EGL/extensions/EXT/EGL_EXT_image_dma_buf_import_modifiers.txt
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@ -22,6 +22,7 @@ place where this information is gathered.
   unshare
   spec_ctrl
   accelerators/ocxl
+   dma-buf-alloc-exchange
   ebpf/index
   ELF
   ioctl/index
--- a/50
+++ b/50
@ -1636,13 +1636,13 @@ F:	drivers/gpu/drm/arm/display/include/
 F:	drivers/gpu/drm/arm/display/komeda/

 ARM MALI PANFROST DRM DRIVER
+M:	Boris Brezillon <boris.brezillon@collabora.com>
 M:	Rob Herring <robh@kernel.org>
-M:	Tomeu Vizoso <tomeu.vizoso@collabora.com>
 R:	Steven Price <steven.price@arm.com>
-R:	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	Documentation/gpu/panfrost.rst
 F:	drivers/gpu/drm/panfrost/
 F:	include/uapi/drm/panfrost_drm.h

@ -6175,6 +6175,7 @@ L:	linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/driver-api/dma-buf.rst
+F:	Documentation/userspace-api/dma-buf-alloc-exchange.rst
 F:	drivers/dma-buf/
 F:	include/linux/*fence.h
 F:	include/linux/dma-buf.h
@ -6667,6 +6668,7 @@ S:	Maintained
 B:	https://gitlab.freedesktop.org/drm/msm/-/issues
 T:	git https://gitlab.freedesktop.org/drm/msm.git
 F:	Documentation/devicetree/bindings/display/msm/
+F:	drivers/gpu/drm/ci/xfails/msm*
 F:	drivers/gpu/drm/msm/
 F:	include/uapi/drm/msm_drm.h

@ -6818,7 +6820,8 @@ DRM DRIVER FOR SOLOMON SSD130X OLED DISPLAYS
 M:	Javier Martinez Canillas <javierm@redhat.com>
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
-F:	Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml
+F:	Documentation/devicetree/bindings/display/solomon,ssd-common.yaml
+F:	Documentation/devicetree/bindings/display/solomon,ssd13*.yaml
 F:	drivers/gpu/drm/solomon/ssd130x*

 DRM DRIVER FOR ST-ERICSSON MCDE
@ -6913,12 +6916,26 @@ M:	Thomas Zimmermann <tzimmermann@suse.de>
 S:	Maintained
 W:	https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
 T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	Documentation/devicetree/bindings/display/
+F:	Documentation/devicetree/bindings/gpu/
 F:	Documentation/gpu/
-F:	drivers/gpu/drm/*
+F:	drivers/gpu/drm/
 F:	drivers/gpu/vga/
-F:	include/drm/drm*
+F:	include/drm/drm
 F:	include/linux/vga*
-F:	include/uapi/drm/drm*
+F:	include/uapi/drm/
+X:	drivers/gpu/drm/amd/
+X:	drivers/gpu/drm/armada/
+X:	drivers/gpu/drm/etnaviv/
+X:	drivers/gpu/drm/exynos/
+X:	drivers/gpu/drm/i915/
+X:	drivers/gpu/drm/kmb/
+X:	drivers/gpu/drm/mediatek/
+X:	drivers/gpu/drm/msm/
+X:	drivers/gpu/drm/nouveau/
+X:	drivers/gpu/drm/radeon/
+X:	drivers/gpu/drm/renesas/
+X:	drivers/gpu/drm/tegra/

 DRM DRIVERS FOR ALLWINNER A10
 M:	Maxime Ripard <mripard@kernel.org>
@ -6939,6 +6956,7 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
 F:	Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
 F:	Documentation/gpu/meson.rst
+F:	drivers/gpu/drm/ci/xfails/meson*
 F:	drivers/gpu/drm/meson/

 DRM DRIVERS FOR ATMEL HLCDC
@ -6962,7 +6980,9 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/bridge/
 F:	drivers/gpu/drm/bridge/
 F:	drivers/gpu/drm/drm_bridge.c
+F:	drivers/gpu/drm/drm_bridge_connector.c
 F:	include/drm/drm_bridge.h
+F:	include/drm/drm_bridge_connector.h

 DRM DRIVERS FOR EXYNOS
 M:	Inki Dae <inki.dae@samsung.com>
@ -6986,10 +7006,12 @@ F:	Documentation/devicetree/bindings/display/fsl,dcu.txt
 F:	Documentation/devicetree/bindings/display/fsl,tcon.txt
 F:	drivers/gpu/drm/fsl-dcu/

-DRM DRIVERS FOR FREESCALE IMX
+DRM DRIVERS FOR FREESCALE IMX 5/6
 M:	Philipp Zabel <p.zabel@pengutronix.de>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+T:	git git://git.pengutronix.de/git/pza/linux
 F:	Documentation/devicetree/bindings/display/imx/
 F:	drivers/gpu/drm/imx/ipuv3/
 F:	drivers/gpu/ipu-v3/
@ -7008,7 +7030,7 @@ DRM DRIVERS FOR GMA500 (Poulsbo, Moorestown and derivative chipsets)
 M:	Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
-T:	git git://github.com/patjak/drm-gma500
+T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/gma500/

 DRM DRIVERS FOR HISILICON
@ -7047,6 +7069,7 @@ L:	dri-devel@lists.freedesktop.org
 L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	Documentation/devicetree/bindings/display/mediatek/
+F:	drivers/gpu/drm/ci/xfails/mediatek*
 F:	drivers/gpu/drm/mediatek/
 F:	drivers/phy/mediatek/phy-mtk-dp.c
 F:	drivers/phy/mediatek/phy-mtk-hdmi*
@ -7087,6 +7110,7 @@ L:	dri-devel@lists.freedesktop.org
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/rockchip/
+F:	drivers/gpu/drm/ci/xfails/rockchip*
 F:	drivers/gpu/drm/rockchip/

 DRM DRIVERS FOR STI
@ -7183,7 +7207,7 @@ F:	Documentation/devicetree/bindings/display/xlnx/
 F:	drivers/gpu/drm/xlnx/

 DRM GPU SCHEDULER
-M:	Luben Tuikov <luben.tuikov@amd.com>
+M:	Luben Tuikov <ltuikov89@gmail.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@ -7192,6 +7216,7 @@ F:	include/drm/gpu_scheduler.h

 DRM PANEL DRIVERS
 M:	Neil Armstrong <neil.armstrong@linaro.org>
+R:	Jessica Zhang <quic_jesszhan@quicinc.com>
 R:	Sam Ravnborg <sam@ravnborg.org>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
@ -9129,6 +9154,7 @@ T:	git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
 F:	Documentation/ABI/testing/debugfs-driver-habanalabs
 F:	Documentation/ABI/testing/sysfs-driver-habanalabs
 F:	drivers/accel/habanalabs/
+F:	include/linux/habanalabs/
 F:	include/trace/events/habanalabs.h
 F:	include/uapi/drm/habanalabs_accel.h

@ -10535,6 +10561,7 @@ C:	irc://irc.oftc.net/intel-gfx
 T:	git git://anongit.freedesktop.org/drm-intel
 F:	Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
 F:	Documentation/gpu/i915.rst
+F:	drivers/gpu/drm/ci/xfails/i915*
 F:	drivers/gpu/drm/i915/
 F:	include/drm/i915*
 F:	include/uapi/drm/i915_drm.h
@ -13577,7 +13604,7 @@ F:	drivers/usb/mtu3/

 MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
 M:	Peter Senna Tschudin <peter.senna@gmail.com>
-M:	Martin Donnelly <martin.donnelly@ge.com>
+M:	Ian Ray <ian.ray@ge.com>
 M:	Martyn Welch <martyn.welch@collabora.co.uk>
 S:	Maintained
 F:	Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt
@ -15414,6 +15441,7 @@ M:	Laurentiu Palcu <laurentiu.palcu@oss.nxp.com>
 R:	Lucas Stach <l.stach@pengutronix.de>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
+T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/imx/nxp,imx8mq-dcss.yaml
 F:	drivers/gpu/drm/imx/dcss/

@ -17927,6 +17955,7 @@ C:	irc://irc.oftc.net/radeon
 T:	git https://gitlab.freedesktop.org/agd5f/linux.git
 F:	Documentation/gpu/amdgpu/
 F:	drivers/gpu/drm/amd/
+F:	drivers/gpu/drm/ci/xfails/amd*
 F:	drivers/gpu/drm/radeon/
 F:	include/uapi/drm/amdgpu_drm.h
 F:	include/uapi/drm/radeon_drm.h
@ -22907,6 +22936,7 @@ L:	dri-devel@lists.freedesktop.org
 L:	virtualization@lists.linux-foundation.org
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	drivers/gpu/drm/ci/xfails/virtio*
 F:	drivers/gpu/drm/virtio/
 F:	include/uapi/linux/virtio_gpu.h

--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@ -835,6 +835,7 @@ CONFIG_DRM_PANEL_BOE_TV101WUM_NL6=m
 CONFIG_DRM_PANEL_LVDS=m
 CONFIG_DRM_PANEL_SIMPLE=m
 CONFIG_DRM_PANEL_EDP=m
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
 CONFIG_DRM_PANEL_MANTIX_MLAF057WE51=m
 CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
 CONFIG_DRM_PANEL_SITRONIX_ST7703=m
--- a/arch/ia64/include/asm/fb.h
+++ b/arch/ia64/include/asm/fb.h
@ -8,17 +8,16 @@

 #include <asm/page.h>

-struct file;
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
+static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
+					  unsigned long vm_start, unsigned long vm_end,
+					  unsigned long offset)
 {
-	if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
-		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	if (efi_range_is_wc(vm_start, vm_end - vm_start))
+		return pgprot_writecombine(prot);
 	else
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		return pgprot_noncached(prot);
 }
-#define fb_pgprotect fb_pgprotect
+#define pgprot_framebuffer pgprot_framebuffer

 static inline void fb_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
 {
--- a/arch/m68k/include/asm/fb.h
+++ b/arch/m68k/include/asm/fb.h
@ -5,26 +5,27 @@
 #include <asm/page.h>
 #include <asm/setup.h>

-struct file;
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
+static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
+					  unsigned long vm_start, unsigned long vm_end,
+					  unsigned long offset)
 {
 #ifdef CONFIG_MMU
 #ifdef CONFIG_SUN3
-	pgprot_val(vma->vm_page_prot) |= SUN3_PAGE_NOCACHE;
+	pgprot_val(prot) |= SUN3_PAGE_NOCACHE;
 #else
 	if (CPU_IS_020_OR_030)
-		pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE030;
+		pgprot_val(prot) |= _PAGE_NOCACHE030;
 	if (CPU_IS_040_OR_060) {
-		pgprot_val(vma->vm_page_prot) &= _CACHEMASK040;
+		pgprot_val(prot) &= _CACHEMASK040;
 		/* Use no-cache mode, serialized */
-		pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE_S;
+		pgprot_val(prot) |= _PAGE_NOCACHE_S;
 	}
 #endif /* CONFIG_SUN3 */
 #endif /* CONFIG_MMU */
+
+	return prot;
 }
-#define fb_pgprotect fb_pgprotect
+#define pgprot_framebuffer pgprot_framebuffer

 #include <asm-generic/fb.h>

--- a/arch/mips/include/asm/fb.h
+++ b/arch/mips/include/asm/fb.h
@ -3,14 +3,13 @@

 #include <asm/page.h>

-struct file;
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
+static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
+					  unsigned long vm_start, unsigned long vm_end,
+					  unsigned long offset)
 {
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return pgprot_noncached(prot);
 }
-#define fb_pgprotect fb_pgprotect
+#define pgprot_framebuffer pgprot_framebuffer

 /*
 * MIPS doesn't define __raw_ I/O macros, so the helpers
--- a/arch/powerpc/include/asm/fb.h
+++ b/arch/powerpc/include/asm/fb.h
@ -2,18 +2,20 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_

-#include <linux/fs.h>
-
 #include <asm/page.h>

-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
+static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
+					  unsigned long vm_start, unsigned long vm_end,
+					  unsigned long offset)
 {
-	vma->vm_page_prot = phys_mem_access_prot(file, off >> PAGE_SHIFT,
-						 vma->vm_end - vma->vm_start,
-						 vma->vm_page_prot);
+	/*
+	 * PowerPC's implementation of phys_mem_access_prot() does
+	 * not use the file argument. Set it to NULL in preparation
+	 * of later updates to the interface.
+	 */
+	return phys_mem_access_prot(NULL, PHYS_PFN(offset), vm_end - vm_start, prot);
 }
-#define fb_pgprotect fb_pgprotect
+#define pgprot_framebuffer pgprot_framebuffer

 #include <asm-generic/fb.h>

--- a/arch/sparc/include/asm/fb.h
+++ b/arch/sparc/include/asm/fb.h
@ -4,15 +4,18 @@

 #include <linux/io.h>

+#include <asm/page.h>
+
 struct fb_info;
-struct file;
-struct vm_area_struct;

 #ifdef CONFIG_SPARC32
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
-{ }
-#define fb_pgprotect fb_pgprotect
+static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
+					  unsigned long vm_start, unsigned long vm_end,
+					  unsigned long offset)
+{
+	return prot;
+}
+#define pgprot_framebuffer pgprot_framebuffer
 #endif

 int fb_is_primary_device(struct fb_info *info);
--- a/arch/x86/include/asm/fb.h
+++ b/arch/x86/include/asm/fb.h
@ -2,12 +2,14 @@
 #ifndef _ASM_X86_FB_H
 #define _ASM_X86_FB_H

-struct fb_info;
-struct file;
-struct vm_area_struct;
+#include <asm/page.h>

-void fb_pgprotect(struct file *file, struct vm_area_struct *vma, unsigned long off);
-#define fb_pgprotect fb_pgprotect
+struct fb_info;
+
+pgprot_t pgprot_framebuffer(pgprot_t prot,
+			    unsigned long vm_start, unsigned long vm_end,
+			    unsigned long offset);
+#define pgprot_framebuffer pgprot_framebuffer

 int fb_is_primary_device(struct fb_info *info);
 #define fb_is_primary_device fb_is_primary_device
--- a/arch/x86/video/fbdev.c
+++ b/arch/x86/video/fbdev.c
@ -13,16 +13,17 @@
 #include <linux/vgaarb.h>
 #include <asm/fb.h>

-void fb_pgprotect(struct file *file, struct vm_area_struct *vma, unsigned long off)
+pgprot_t pgprot_framebuffer(pgprot_t prot,
+			    unsigned long vm_start, unsigned long vm_end,
+			    unsigned long offset)
 {
-	unsigned long prot;
-
-	prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK;
+	pgprot_val(prot) &= ~_PAGE_CACHE_MASK;
 	if (boot_cpu_data.x86 > 3)
-		pgprot_val(vma->vm_page_prot) =
-			prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
+		pgprot_val(prot) |= cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
+
+	return prot;
 }
-EXPORT_SYMBOL(fb_pgprotect);
+EXPORT_SYMBOL(pgprot_framebuffer);

 int fb_is_primary_device(struct fb_info *info)
 {
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock);
 static struct idr accel_minors_idr;

 static struct dentry *accel_debugfs_root;
-static struct class *accel_class;

 static struct device_type accel_sysfs_device_minor = {
 	.name = "accel_minor"
@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode)
 	return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
 }

+static const struct class accel_class = {
+	.name = "accel",
+	.devnode = accel_devnode,
+};
+
 static int accel_sysfs_init(void)
 {
-	accel_class = class_create("accel");
-	if (IS_ERR(accel_class))
-		return PTR_ERR(accel_class);
-
-	accel_class->devnode = accel_devnode;
-
-	return 0;
+	return class_register(&accel_class);
 }

 static void accel_sysfs_destroy(void)
 {
-	if (IS_ERR_OR_NULL(accel_class))
-		return;
-	class_destroy(accel_class);
-	accel_class = NULL;
+	class_unregister(&accel_class);
 }

 static int accel_name_info(struct seq_file *m, void *data)
@ -79,29 +74,30 @@ static const struct drm_info_list accel_debugfs_list[] = {
 #define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)

 /**
- * accel_debugfs_init() - Initialize debugfs for accel minor
- * @minor: Pointer to the drm_minor instance.
- * @minor_id: The minor's id
+ * accel_debugfs_init() - Initialize debugfs for device
+ * @dev: Pointer to the device instance.
 *
- * This function initializes the drm minor's debugfs members and creates
- * a root directory for the minor in debugfs. It also creates common files
- * for accelerators and calls the driver's debugfs init callback.
+ * This function creates a root directory for the device in debugfs.
 */
-void accel_debugfs_init(struct drm_minor *minor, int minor_id)
+void accel_debugfs_init(struct drm_device *dev)
 {
-	struct drm_device *dev = minor->dev;
-	char name[64];
+	drm_debugfs_dev_init(dev, accel_debugfs_root);
+}

-	INIT_LIST_HEAD(&minor->debugfs_list);
-	mutex_init(&minor->debugfs_lock);
-	sprintf(name, "%d", minor_id);
-	minor->debugfs_root = debugfs_create_dir(name, accel_debugfs_root);
+/**
+ * accel_debugfs_register() - Register debugfs for device
+ * @dev: Pointer to the device instance.
+ *
+ * Creates common files for accelerators.
+ */
+void accel_debugfs_register(struct drm_device *dev)
+{
+	struct drm_minor *minor = dev->accel;
+
+	minor->debugfs_root = dev->debugfs_root;

 	drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES,
-				 minor->debugfs_root, minor);
-
-	if (dev->driver->debugfs_init)
-		dev->driver->debugfs_init(minor);
+				 dev->debugfs_root, minor);
 }

 /**
@ -116,7 +112,7 @@ void accel_debugfs_init(struct drm_minor *minor, int minor_id)
 void accel_set_device_instance_params(struct device *kdev, int index)
 {
 	kdev->devt = MKDEV(ACCEL_MAJOR, index);
-	kdev->class = accel_class;
+	kdev->class = &accel_class;
 	kdev->type = &accel_sysfs_device_minor;
 }

--- a/drivers/accel/habanalabs/common/command_buffer.c
+++ b/drivers/accel/habanalabs/common/command_buffer.c
@ -361,10 +361,11 @@ out:
 	return rc;
 }

-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
-	union hl_cb_args *args = data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
+	union hl_cb_args *args = data;
 	u64 handle = 0, device_va = 0;
 	enum hl_device_status status;
 	u32 usage_cnt = 0;
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@ -31,6 +31,24 @@ enum hl_cs_wait_status {
 	CS_WAIT_STATUS_GONE
 };

+/*
+ * Data used while handling wait/timestamp nodes.
+ * The purpose of this struct is to store the needed data for both operations
+ * in one variable instead of passing large number of arguments to functions.
+ */
+struct wait_interrupt_data {
+	struct hl_user_interrupt *interrupt;
+	struct hl_mmap_mem_buf *buf;
+	struct hl_mem_mgr *mmg;
+	struct hl_cb *cq_cb;
+	u64 ts_handle;
+	u64 ts_offset;
+	u64 cq_handle;
+	u64 cq_offset;
+	u64 target_value;
+	u64 intr_timeout_us;
+};
+
 static void job_wq_completion(struct work_struct *work);
 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
 				enum hl_cs_wait_status *status, s64 *timestamp);
@ -1079,19 +1097,22 @@ static void
 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 {
 	struct hl_user_pending_interrupt *pend, *temp;
+	unsigned long flags;

-	spin_lock(&interrupt->wait_list_lock);
-	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
-		if (pend->ts_reg_info.buf) {
-			list_del(&pend->wait_list_node);
-			hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
-			hl_cb_put(pend->ts_reg_info.cq_cb);
-		} else {
-			pend->fence.error = -EIO;
-			complete_all(&pend->fence.completion);
-		}
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
+		pend->fence.error = -EIO;
+		complete_all(&pend->fence.completion);
 	}
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+	spin_lock_irqsave(&interrupt->ts_list_lock, flags);
+	list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
+		list_del(&pend->list_node);
+		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
+		hl_cb_put(pend->ts_reg_info.cq_cb);
+	}
+	spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
 }

 void hl_release_pending_user_interrupts(struct hl_device *hdev)
@ -1730,16 +1751,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 		/* Need to wait for restore completion before execution phase */
 		if (num_chunks) {
 			enum hl_cs_wait_status status;
-wait_again:
+
 			ret = _hl_cs_wait_ioctl(hdev, ctx,
 					jiffies_to_usecs(hdev->timeout_jiffies),
 					*cs_seq, &status, NULL);
 			if (ret) {
-				if (ret == -ERESTARTSYS) {
-					usleep_range(100, 200);
-					goto wait_again;
-				}
-
 				dev_err(hdev->dev,
 					"Restore CS for context %d failed to complete %d\n",
 					ctx->asid, ret);
@ -2539,8 +2555,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
 	return 0;
 }

-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	union hl_cs_args *args = data;
 	enum hl_cs_type cs_type = 0;
 	u64 cs_seq = ULONG_MAX;
@ -3197,166 +3214,241 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }

-static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
-					struct hl_cb *cq_cb,
-					u64 ts_offset, u64 cq_offset, u64 target_value,
-					spinlock_t *wait_list_lock,
-					struct hl_user_pending_interrupt **pend)
+static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
+					struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
 {
-	struct hl_ts_buff *ts_buff = buf->private;
-	struct hl_user_pending_interrupt *requested_offset_record =
-				(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
-				ts_offset;
-	struct hl_user_pending_interrupt *cb_last =
-			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+	record->ts_reg_info.cq_cb = cq_cb;
+	record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
+	record->cq_target_value = target_value;
+}
+
+static int validate_and_get_ts_record(struct device *dev,
+					struct hl_ts_buff *ts_buff, u64 ts_offset,
+					struct hl_user_pending_interrupt **req_event_record)
+{
+	struct hl_user_pending_interrupt *ts_cb_last;
+
+	*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+						ts_offset;
+	ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
 			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
-	unsigned long iter_counter = 0;
-	u64 current_cq_counter;
-	ktime_t timestamp;

 	/* Validate ts_offset not exceeding last max */
-	if (requested_offset_record >= cb_last) {
-		dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
-								(u64)(uintptr_t)cb_last);
+	if (*req_event_record >= ts_cb_last) {
+		dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
+				ts_offset, (u64)(uintptr_t)ts_cb_last);
 		return -EINVAL;
 	}

-	timestamp = ktime_get();
-
-start_over:
-	spin_lock(wait_list_lock);
-
-	/* Unregister only if we didn't reach the target value
-	 * since in this case there will be no handling in irq context
-	 * and then it's safe to delete the node out of the interrupt list
-	 * then re-use it on other interrupt
-	 */
-	if (requested_offset_record->ts_reg_info.in_use) {
-		current_cq_counter = *requested_offset_record->cq_kernel_addr;
-		if (current_cq_counter < requested_offset_record->cq_target_value) {
-			list_del(&requested_offset_record->wait_list_node);
-			spin_unlock(wait_list_lock);
-
-			hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
-			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
-
-			dev_dbg(buf->mmg->dev,
-				"ts node removed from interrupt list now can re-use\n");
-		} else {
-			dev_dbg(buf->mmg->dev,
-				"ts node in middle of irq handling\n");
-
-			/* irq thread handling in the middle give it time to finish */
-			spin_unlock(wait_list_lock);
-			usleep_range(100, 1000);
-			if (++iter_counter == MAX_TS_ITER_NUM) {
-				dev_err(buf->mmg->dev,
-					"Timestamp offset processing reached timeout of %lld ms\n",
-					ktime_ms_delta(ktime_get(), timestamp));
-				return -EAGAIN;
-			}
-
-			goto start_over;
-		}
-	} else {
-		/* Fill up the new registration node info */
-		requested_offset_record->ts_reg_info.buf = buf;
-		requested_offset_record->ts_reg_info.cq_cb = cq_cb;
-		requested_offset_record->ts_reg_info.timestamp_kernel_addr =
-				(u64 *) ts_buff->user_buff_address + ts_offset;
-		requested_offset_record->cq_kernel_addr =
-				(u64 *) cq_cb->kernel_address + cq_offset;
-		requested_offset_record->cq_target_value = target_value;
-
-		spin_unlock(wait_list_lock);
-	}
-
-	*pend = requested_offset_record;
-
-	dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n",
-		requested_offset_record);
 	return 0;
 }

-static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
-				struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
-				u64 timeout_us, u64 cq_counters_handle,	u64 cq_counters_offset,
-				u64 target_value, struct hl_user_interrupt *interrupt,
-				bool register_ts_record, u64 ts_handle, u64 ts_offset,
+static void unregister_timestamp_node(struct hl_device *hdev,
+			struct hl_user_pending_interrupt *record, bool need_lock)
+{
+	struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
+	bool ts_rec_found = false;
+	unsigned long flags;
+
+	if (need_lock)
+		spin_lock_irqsave(&interrupt->ts_list_lock, flags);
+
+	if (record->ts_reg_info.in_use) {
+		record->ts_reg_info.in_use = false;
+		list_del(&record->list_node);
+		ts_rec_found = true;
+	}
+
+	if (need_lock)
+		spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
+
+	/* Put refcounts that were taken when we registered the event */
+	if (ts_rec_found) {
+		hl_mmap_mem_buf_put(record->ts_reg_info.buf);
+		hl_cb_put(record->ts_reg_info.cq_cb);
+	}
+}
+
+static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
+					struct wait_interrupt_data *data, unsigned long *flags,
+					struct hl_user_pending_interrupt **pend)
+{
+	struct hl_user_pending_interrupt *req_offset_record;
+	struct hl_ts_buff *ts_buff = data->buf->private;
+	bool need_lock = false;
+	int rc;
+
+	rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
+									&req_offset_record);
+	if (rc)
+		return rc;
+
+	/* In case the node already registered, need to unregister first then re-use */
+	if (req_offset_record->ts_reg_info.in_use) {
+		dev_dbg(data->buf->mmg->dev,
+				"Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n",
+				req_offset_record,
+				req_offset_record->ts_reg_info.interrupt->interrupt_id,
+				req_offset_record->ts_reg_info.timestamp_kernel_addr,
+				data->interrupt->interrupt_id);
+		/*
+		 * Since interrupt here can be different than the one the node currently registered
+		 * on, and we don't want to lock two lists while we're doing unregister, so
+		 * unlock the new interrupt wait list here and acquire the lock again after you done
+		 */
+		if (data->interrupt->interrupt_id !=
+				req_offset_record->ts_reg_info.interrupt->interrupt_id) {
+
+			need_lock = true;
+			spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
+		}
+
+		unregister_timestamp_node(hdev, req_offset_record, need_lock);
+
+		if (need_lock)
+			spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
+	}
+
+	/* Fill up the new registration node info and add it to the list */
+	req_offset_record->ts_reg_info.in_use = true;
+	req_offset_record->ts_reg_info.buf = data->buf;
+	req_offset_record->ts_reg_info.timestamp_kernel_addr =
+			(u64 *) ts_buff->user_buff_address + data->ts_offset;
+	req_offset_record->ts_reg_info.interrupt = data->interrupt;
+	set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
+						data->target_value);
+
+	*pend = req_offset_record;
+
+	return rc;
+}
+
+static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+				struct wait_interrupt_data *data,
 				u32 *status, u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	struct hl_mmap_mem_buf *buf;
-	struct hl_cb *cq_cb;
-	unsigned long timeout;
-	long completion_rc;
+	unsigned long flags;
 	int rc = 0;

-	timeout = hl_usecs64_to_jiffies(timeout_us);
-
 	hl_ctx_get(ctx);

-	cq_cb = hl_cb_get(cb_mmg, cq_counters_handle);
-	if (!cq_cb) {
+	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
+	if (!data->cq_cb) {
 		rc = -EINVAL;
 		goto put_ctx;
 	}

 	/* Validate the cq offset */
-	if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >=
-			((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) {
+	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
+			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
 		rc = -EINVAL;
 		goto put_cq_cb;
 	}

-	if (register_ts_record) {
-		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
-					interrupt->interrupt_id, ts_offset, cq_counters_offset);
-		buf = hl_mmap_mem_buf_get(mmg, ts_handle);
-		if (!buf) {
-			rc = -EINVAL;
-			goto put_cq_cb;
-		}
+	dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
+					data->interrupt->interrupt_id, data->ts_handle,
+					data->ts_offset, data->cq_offset);

-		/* get ts buffer record */
-		rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
-						cq_counters_offset, target_value,
-						&interrupt->wait_list_lock, &pend);
-		if (rc)
-			goto put_ts_buff;
-	} else {
-		pend = kzalloc(sizeof(*pend), GFP_KERNEL);
-		if (!pend) {
-			rc = -ENOMEM;
-			goto put_cq_cb;
-		}
-		hl_fence_init(&pend->fence, ULONG_MAX);
-		pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
-		pend->cq_target_value = target_value;
+	data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
+	if (!data->buf) {
+		rc = -EINVAL;
+		goto put_cq_cb;
 	}

-	spin_lock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
+
+	/* get ts buffer record */
+	rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
+	if (rc) {
+		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+		goto put_ts_buff;
+	}

 	/* We check for completion value as interrupt could have been received
-	 * before we added the node to the wait list
+	 * before we add the timestamp node to the ts list.
 	 */
-	if (*pend->cq_kernel_addr >= target_value) {
-		if (register_ts_record)
-			pend->ts_reg_info.in_use = 0;
-		spin_unlock(&interrupt->wait_list_lock);
+	if (*pend->cq_kernel_addr >= data->target_value) {
+		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);

+		dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
+				pend, data->ts_offset, data->interrupt->interrupt_id);
+
+		pend->ts_reg_info.in_use = 0;
 		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
+
+		goto put_ts_buff;
+	}
+
+	list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
+	spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+
+	rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
+
+	hl_ctx_put(ctx);
+
+	return rc;
+
+put_ts_buff:
+	hl_mmap_mem_buf_put(data->buf);
+put_cq_cb:
+	hl_cb_put(data->cq_cb);
+put_ctx:
+	hl_ctx_put(ctx);
+
+	return rc;
+}
+
+static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+				struct wait_interrupt_data *data,
+				u32 *status, u64 *timestamp)
+{
+	struct hl_user_pending_interrupt *pend;
+	unsigned long timeout, flags;
+	long completion_rc;
+	int rc = 0;
+
+	timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
+
+	hl_ctx_get(ctx);
+
+	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
+	if (!data->cq_cb) {
+		rc = -EINVAL;
+		goto put_ctx;
+	}
+
+	/* Validate the cq offset */
+	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
+			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
+		rc = -EINVAL;
+		goto put_cq_cb;
+	}
+
+	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+	if (!pend) {
+		rc = -ENOMEM;
+		goto put_cq_cb;
+	}
+
+	hl_fence_init(&pend->fence, ULONG_MAX);
+	pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
+	pend->cq_target_value = data->target_value;
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
+
+
+	/* We check for completion value as interrupt could have been received
+	 * before we add the wait node to the wait list.
+	 */
+	if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
+		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
+
+		if (*pend->cq_kernel_addr >= data->target_value)
+			*status = HL_WAIT_CS_STATUS_COMPLETED;
+		else
+			*status = HL_WAIT_CS_STATUS_BUSY;

-		if (register_ts_record) {
-			*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
-			goto put_ts_buff;
-		} else {
-			pend->fence.timestamp = ktime_get();
-			goto set_timestamp;
-		}
-	} else if (!timeout_us) {
-		spin_unlock(&interrupt->wait_list_lock);
-		*status = HL_WAIT_CS_STATUS_BUSY;
 		pend->fence.timestamp = ktime_get();
 		goto set_timestamp;
 	}
@ -3366,55 +3458,38 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * Note that we cannot have sorted list by target value,
 	 * in order to shorten the list pass loop, since
 	 * same list could have nodes for different cq counter handle.
-	 * Note:
-	 * Mark ts buff offset as in use here in the spinlock protection area
-	 * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record
-	 * before adding the node to the list. this scenario might happen when
-	 * multiple threads are racing on same offset and one thread could
-	 * set the ts buff in ts_buff_get_kernel_ts_record then the other thread
-	 * takes over and get to ts_buff_get_kernel_ts_record and then we will try
-	 * to re-use the same ts buff offset, and will try to delete a non existing
-	 * node from the list.
 	 */
-	if (register_ts_record)
-		pend->ts_reg_info.in_use = 1;
-
-	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock(&interrupt->wait_list_lock);
-
-	if (register_ts_record) {
-		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
-		goto ts_registration_exit;
-	}
+	list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);

 	/* Wait for interrupt handler to signal completion */
 	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
 								timeout);
 	if (completion_rc > 0) {
-		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		if (pend->fence.error == -EIO) {
+			dev_err_ratelimited(hdev->dev,
+					"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+					pend->fence.error);
+			rc = -EIO;
+			*status = HL_WAIT_CS_STATUS_ABORTED;
+		} else {
+			*status = HL_WAIT_CS_STATUS_COMPLETED;
+		}
 	} else {
 		if (completion_rc == -ERESTARTSYS) {
 			dev_err_ratelimited(hdev->dev,
 					"user process got signal while waiting for interrupt ID %d\n",
-					interrupt->interrupt_id);
+					data->interrupt->interrupt_id);
 			rc = -EINTR;
 			*status = HL_WAIT_CS_STATUS_ABORTED;
 		} else {
-			if (pend->fence.error == -EIO) {
-				dev_err_ratelimited(hdev->dev,
-						"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
-						pend->fence.error);
-				rc = -EIO;
-				*status = HL_WAIT_CS_STATUS_ABORTED;
-			} else {
-				/* The wait has timed-out. We don't know anything beyond that
-				 * because the workload wasn't submitted through the driver.
-				 * Therefore, from driver's perspective, the workload is still
-				 * executing.
-				 */
-				rc = 0;
-				*status = HL_WAIT_CS_STATUS_BUSY;
-			}
+			/* The wait has timed-out. We don't know anything beyond that
+			 * because the workload was not submitted through the driver.
+			 * Therefore, from driver's perspective, the workload is still
+			 * executing.
+			 */
+			rc = 0;
+			*status = HL_WAIT_CS_STATUS_BUSY;
 		}
 	}

@ -3424,23 +3499,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * for ts record, the node will be deleted in the irq handler after
 	 * we reach the target value.
 	 */
-	spin_lock(&interrupt->wait_list_lock);
-	list_del(&pend->wait_list_node);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
+	list_del(&pend->list_node);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);

 set_timestamp:
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
 	kfree(pend);
-	hl_cb_put(cq_cb);
-ts_registration_exit:
+	hl_cb_put(data->cq_cb);
 	hl_ctx_put(ctx);

 	return rc;

-put_ts_buff:
-	hl_mmap_mem_buf_put(buf);
 put_cq_cb:
-	hl_cb_put(cq_cb);
+	hl_cb_put(data->cq_cb);
 put_ctx:
 	hl_ctx_put(ctx);

@ -3454,7 +3526,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 				u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	unsigned long timeout;
+	unsigned long timeout, flags;
 	u64 completion_value;
 	long completion_rc;
 	int rc = 0;
@ -3474,9 +3546,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 	/* Add pending user interrupt to relevant list for the interrupt
 	 * handler to monitor
 	 */
-	spin_lock(&interrupt->wait_list_lock);
-	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	list_add_tail(&pend->list_node, &interrupt->wait_list_head);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

 	/* We check for completion value as interrupt could have been received
 	 * before we added the node to the wait list
@ -3507,14 +3579,14 @@ wait_again:
 	 * If comparison fails, keep waiting until timeout expires
 	 */
 	if (completion_rc > 0) {
-		spin_lock(&interrupt->wait_list_lock);
+		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 		/* reinit_completion must be called before we check for user
 		 * completion value, otherwise, if interrupt is received after
 		 * the comparison and before the next wait_for_completion,
 		 * we will reach timeout and fail
 		 */
 		reinit_completion(&pend->fence.completion);
-		spin_unlock(&interrupt->wait_list_lock);
+		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

 		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
 			dev_err(hdev->dev, "Failed to copy completion value from user\n");
@ -3551,9 +3623,9 @@ wait_again:
 	}

 remove_pending_user_interrupt:
-	spin_lock(&interrupt->wait_list_lock);
-	list_del(&pend->wait_list_node);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	list_del(&pend->list_node);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

 	*timestamp = ktime_to_ns(pend->fence.timestamp);

@ -3611,19 +3683,42 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		return -EINVAL;
 	}

-	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
-		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
-				args->in.interrupt_timeout_us, args->in.cq_counters_handle,
-				args->in.cq_counters_offset,
-				args->in.target, interrupt,
-				!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
-				args->in.timestamp_handle, args->in.timestamp_offset,
-				&status, &timestamp);
-	else
+	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
+		struct wait_interrupt_data wait_intr_data = {0};
+
+		wait_intr_data.interrupt = interrupt;
+		wait_intr_data.mmg = &hpriv->mem_mgr;
+		wait_intr_data.cq_handle = args->in.cq_counters_handle;
+		wait_intr_data.cq_offset = args->in.cq_counters_offset;
+		wait_intr_data.ts_handle = args->in.timestamp_handle;
+		wait_intr_data.ts_offset = args->in.timestamp_offset;
+		wait_intr_data.target_value = args->in.target;
+		wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
+
+		if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
+			/*
+			 * Allow only one registration at a time. this is needed in order to prevent
+			 * issues while handling the flow of re-use of the same offset.
+			 * Since the registration flow is protected only by the interrupt lock,
+			 * re-use flow might request to move ts node to another interrupt list,
+			 * and in such case we're not protected.
+			 */
+			mutex_lock(&hpriv->ctx->ts_reg_lock);
+
+			rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
+						&status, &timestamp);
+
+			mutex_unlock(&hpriv->ctx->ts_reg_lock);
+		} else
+			rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
+						&status, &timestamp);
+	} else {
 		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
 				args->in.interrupt_timeout_us, args->in.addr,
 				args->in.target, interrupt, &status,
 				&timestamp);
+	}
+
 	if (rc)
 		return rc;

@ -3638,8 +3733,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }

-int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
 	union hl_wait_cs_args *args = data;
 	u32 flags = args->in.flags;
--- a/drivers/accel/habanalabs/common/context.c
+++ b/drivers/accel/habanalabs/common/context.c
@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	kfree(ctx->cs_pending);

 	if (ctx->asid != HL_KERNEL_ASID_ID) {
-		dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid);
+		dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid);

 		/* The engines are stopped as there is no executing CS, but the
 		 * Coresight might be still working by accessing addresses
@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 		hl_vm_ctx_fini(ctx);
 		hl_asid_free(hdev, ctx->asid);
 		hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
+		mutex_destroy(&ctx->ts_reg_lock);
 	} else {
 		dev_dbg(hdev->dev, "closing kernel context\n");
 		hdev->asic_funcs->ctx_fini(ctx);
@ -198,6 +199,7 @@ out_err:

 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 {
+	char task_comm[TASK_COMM_LEN];
 	int rc = 0, i;

 	ctx->hdev = hdev;
@ -267,7 +269,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)

 		hl_encaps_sig_mgr_init(&ctx->sig_mgr);

-		dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
+		mutex_init(&ctx->ts_reg_lock);
+
+		dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
+			get_task_comm(task_comm, current), ctx->asid);
 	}

 	return 0;
--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@ -18,8 +18,6 @@
 #define MMU_KBUF_SIZE		(MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
 #define I2C_MAX_TRANSACTION_LEN	8

-static struct dentry *hl_debug_root;
-
 static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 				u8 i2c_reg, u8 i2c_len, u64 *val)
 {
@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 {
 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;

-	dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root);
+	dev_entry->root = hdev->drm.accel->debugfs_root;

 	add_files_to_device(hdev, dev_entry, dev_entry->root);
+
 	if (!hdev->asic_prop.fw_security_enabled)
 		add_secured_nodes(dev_entry, dev_entry->root);
 }

-void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-	struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
-
-	debugfs_remove_recursive(entry->root);
-}
-
 void hl_debugfs_add_file(struct hl_fpriv *hpriv)
 {
 	struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,

 	up_write(&dev_entry->state_dump_sem);
 }
-
-void __init hl_debugfs_init(void)
-{
-	hl_debug_root = debugfs_create_dir("habanalabs", NULL);
-}
-
-void hl_debugfs_fini(void)
-{
-	debugfs_remove_recursive(hl_debug_root);
-}
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@ -14,11 +14,14 @@
 #include <linux/hwmon.h>
 #include <linux/vmalloc.h>

+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+
 #include <trace/events/habanalabs.h>

 #define HL_RESET_DELAY_USEC			10000	/* 10ms */

-#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC	5
+#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC	30

 enum dma_alloc_type {
 	DMA_ALLOC_COHERENT,
@ -185,7 +188,36 @@ void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *
 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr);
 }

-int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir, const char *caller)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct scatterlist *sg;
+	int rc, i;
+
+	rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir);
+	if (rc)
+		return rc;
+
+	if (!trace_habanalabs_dma_map_page_enabled())
+		return 0;
+
+	for_each_sgtable_dma_sg(sgt, sg, i)
+		trace_habanalabs_dma_map_page(hdev->dev,
+				page_to_phys(sg_page(sg)),
+				sg->dma_address - prop->device_dma_offset_for_host_access,
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+				sg->dma_length,
+#else
+				sg->length,
+#endif
+				dir, caller);
+
+	return 0;
+}
+
+int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
@ -203,7 +235,30 @@ int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_da
 	return 0;
 }

-void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+					enum dma_data_direction dir, const char *caller)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct scatterlist *sg;
+	int i;
+
+	hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir);
+
+	if (trace_habanalabs_dma_unmap_page_enabled()) {
+		for_each_sgtable_dma_sg(sgt, sg, i)
+			trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)),
+					sg->dma_address - prop->device_dma_offset_for_host_access,
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+					sg->dma_length,
+#else
+					sg->length,
+#endif
+					dir, caller);
+	}
+}
+
+void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
@ -315,7 +370,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
 	enum hl_device_status status;

-	if (hdev->reset_info.in_reset) {
+	if (hdev->device_fini_pending) {
+		status = HL_DEVICE_STATUS_MALFUNCTION;
+	} else if (hdev->reset_info.in_reset) {
 		if (hdev->reset_info.in_compute_reset)
 			status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
 		else
@ -343,9 +400,9 @@ bool hl_device_operational(struct hl_device *hdev,
 		*status = current_status;

 	switch (current_status) {
+	case HL_DEVICE_STATUS_MALFUNCTION:
 	case HL_DEVICE_STATUS_IN_RESET:
 	case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
-	case HL_DEVICE_STATUS_MALFUNCTION:
 	case HL_DEVICE_STATUS_NEEDS_RESET:
 		return false;
 	case HL_DEVICE_STATUS_OPERATIONAL:
@ -406,8 +463,6 @@ static void hpriv_release(struct kref *ref)

 	hdev->asic_funcs->send_device_activity(hdev, false);

-	put_pid(hpriv->taskpid);
-
 	hl_debugfs_remove_file(hpriv);

 	mutex_destroy(&hpriv->ctx_lock);
@ -424,7 +479,7 @@ static void hpriv_release(struct kref *ref)
 	/* Check the device idle status and reset if not idle.
 	 * Skip it if already in reset, or if device is going to be reset in any case.
 	 */
-	if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm)
+	if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm)
 		device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
 							HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
 	if (!device_is_idle) {
@ -446,14 +501,18 @@ static void hpriv_release(struct kref *ref)
 	list_del(&hpriv->dev_node);
 	mutex_unlock(&hdev->fpriv_list_lock);

+	put_pid(hpriv->taskpid);
+
 	if (reset_device) {
 		hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
 	} else {
 		/* Scrubbing is handled within hl_device_reset(), so here need to do it directly */
 		int rc = hdev->asic_funcs->scrub_device_mem(hdev);

-		if (rc)
+		if (rc) {
 			dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
+			hl_device_reset(hdev, HL_DRV_RESET_HARD);
+		}
 	}

 	/* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
@ -516,24 +575,20 @@ static void print_device_in_use_info(struct hl_device *hdev, const char *message
 }

 /*
- * hl_device_release - release function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
+ * hl_device_release() - release function for habanalabs device.
+ * @ddev: pointer to DRM device structure.
+ * @file: pointer to DRM file private data structure.
 *
 * Called when process closes an habanalabs device
 */
-static int hl_device_release(struct inode *inode, struct file *filp)
+void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv)
 {
-	struct hl_fpriv *hpriv = filp->private_data;
-	struct hl_device *hdev = hpriv->hdev;
-
-	filp->private_data = NULL;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+	struct hl_device *hdev = to_hl_device(ddev);

 	if (!hdev) {
 		pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
 		put_pid(hpriv->taskpid);
-		return 0;
 	}

 	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
@ -551,8 +606,6 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	}

 	hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif;
-
-	return 0;
 }

 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
@ -571,11 +624,6 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 	list_del(&hpriv->dev_node);
 	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
 out:
-	/* release the eventfd */
-	if (hpriv->notifier_event.eventfd)
-		eventfd_ctx_put(hpriv->notifier_event.eventfd);
-
-	mutex_destroy(&hpriv->notifier_event.lock);
 	put_pid(hpriv->taskpid);

 	kfree(hpriv);
@ -583,18 +631,8 @@ out:
 	return 0;
 }

-/*
- * hl_mmap - mmap function for habanalabs device
- *
- * @*filp: pointer to file structure
- * @*vma: pointer to vm_area_struct of the process
- *
- * Called when process does an mmap on habanalabs device. Call the relevant mmap
- * function at the end of the common code.
- */
-static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 {
-	struct hl_fpriv *hpriv = filp->private_data;
 	struct hl_device *hdev = hpriv->hdev;
 	unsigned long vm_pgoff;

@ -617,14 +655,22 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 	return -EINVAL;
 }

-static const struct file_operations hl_ops = {
-	.owner = THIS_MODULE,
-	.open = hl_device_open,
-	.release = hl_device_release,
-	.mmap = hl_mmap,
-	.unlocked_ioctl = hl_ioctl,
-	.compat_ioctl = hl_ioctl
-};
+/*
+ * hl_mmap - mmap function for habanalabs device
+ *
+ * @*filp: pointer to file structure
+ * @*vma: pointer to vm_area_struct of the process
+ *
+ * Called when process does an mmap on habanalabs device. Call the relevant mmap
+ * function at the end of the common code.
+ */
+int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+
+	return __hl_mmap(hpriv, vma);
+}

 static const struct file_operations hl_ctrl_ops = {
 	.owner = THIS_MODULE,
@ -645,14 +691,14 @@ static void device_release_func(struct device *dev)
 * @hdev: pointer to habanalabs device structure
 * @class: pointer to the class object of the device
 * @minor: minor number of the specific device
- * @fpos: file operations to install for this device
+ * @fops: file operations to install for this device
 * @name: name of the device as it will appear in the filesystem
 * @cdev: pointer to the char device object that will be initialized
 * @dev: pointer to the device object that will be initialized
 *
 * Initialize a cdev and a Linux device for habanalabs's device.
 */
-static int device_init_cdev(struct hl_device *hdev, struct class *class,
+static int device_init_cdev(struct hl_device *hdev, const struct class *class,
 				int minor, const struct file_operations *fops,
 				char *name, struct cdev *cdev,
 				struct device **dev)
@ -676,23 +722,26 @@ static int device_init_cdev(struct hl_device *hdev, struct class *class,

 static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
 {
+	const struct class *accel_class = hdev->drm.accel->kdev->class;
+	char name[32];
 	int rc;

-	rc = cdev_device_add(&hdev->cdev, hdev->dev);
-	if (rc) {
-		dev_err(hdev->dev,
-			"failed to add a char device to the system\n");
+	hdev->cdev_idx = hdev->drm.accel->index;
+
+	/* Initialize cdev and device structures for the control device */
+	snprintf(name, sizeof(name), "accel_controlD%d", hdev->cdev_idx);
+	rc = device_init_cdev(hdev, accel_class, hdev->cdev_idx, &hl_ctrl_ops, name,
+				&hdev->cdev_ctrl, &hdev->dev_ctrl);
+	if (rc)
 		return rc;
-	}

 	rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
 	if (rc) {
-		dev_err(hdev->dev,
-			"failed to add a control char device to the system\n");
-		goto delete_cdev_device;
+		dev_err(hdev->dev_ctrl,
+			"failed to add an accel control char device to the system\n");
+		goto free_ctrl_device;
 	}

-	/* hl_sysfs_init() must be done after adding the device to the system */
 	rc = hl_sysfs_init(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "failed to initialize sysfs\n");
@ -707,23 +756,19 @@ static int cdev_sysfs_debugfs_add(struct hl_device *hdev)

 delete_ctrl_cdev_device:
 	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-delete_cdev_device:
-	cdev_device_del(&hdev->cdev, hdev->dev);
+free_ctrl_device:
+	put_device(hdev->dev_ctrl);
 	return rc;
 }

 static void cdev_sysfs_debugfs_remove(struct hl_device *hdev)
 {
 	if (!hdev->cdev_sysfs_debugfs_created)
-		goto put_devices;
+		return;

-	hl_debugfs_remove_device(hdev);
 	hl_sysfs_fini(hdev);
-	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-	cdev_device_del(&hdev->cdev, hdev->dev);

-put_devices:
-	put_device(hdev->dev);
+	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 	put_device(hdev->dev_ctrl);
 }

@ -996,6 +1041,20 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
 	return (vendor_id == PCI_VENDOR_ID_HABANALABS);
 }

+static void hl_device_eq_heartbeat(struct hl_device *hdev)
+{
+	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+	if (!prop->cpucp_info.eq_health_check_supported)
+		return;
+
+	if (hdev->eq_heartbeat_received)
+		hdev->eq_heartbeat_received = false;
+	else
+		hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+}
+
 static void hl_device_heartbeat(struct work_struct *work)
 {
 	struct hl_device *hdev = container_of(work, struct hl_device,
@ -1003,9 +1062,16 @@ static void hl_device_heartbeat(struct work_struct *work)
 	struct hl_info_fw_err_info info = {0};
 	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;

-	if (!hl_device_operational(hdev, NULL))
+	/* Start heartbeat checks only after driver has enabled events from FW */
+	if (!hl_device_operational(hdev, NULL) || !hdev->init_done)
 		goto reschedule;

+	/*
+	 * For EQ health check need to check if driver received the heartbeat eq event
+	 * in order to validate the eq is working.
+	 */
+	hl_device_eq_heartbeat(hdev);
+
 	if (!hdev->asic_funcs->send_heartbeat(hdev))
 		goto reschedule;

@ -1062,7 +1128,15 @@ static int device_late_init(struct hl_device *hdev)
 	hdev->high_pll = hdev->asic_prop.high_pll;

 	if (hdev->heartbeat) {
+		/*
+		 * Before scheduling the heartbeat driver will check if eq event has received.
+		 * for the first schedule we need to set the indication as true then for the next
+		 * one this indication will be true only if eq event was sent by FW.
+		 */
+		hdev->eq_heartbeat_received = true;
+
 		INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
+
 		schedule_delayed_work(&hdev->work_heartbeat,
 				usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 	}
@ -1302,18 +1376,18 @@ disable_device:
 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
 {
 	struct task_struct *task = NULL;
-	struct list_head *fd_list;
-	struct hl_fpriv	*hpriv;
-	struct mutex *fd_lock;
+	struct list_head *hpriv_list;
+	struct hl_fpriv *hpriv;
+	struct mutex *hpriv_lock;
 	u32 pending_cnt;

-	fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
-	fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
+	hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+	hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;

 	/* Giving time for user to close FD, and for processes that are inside
 	 * hl_device_open to finish
 	 */
-	if (!list_empty(fd_list))
+	if (!list_empty(hpriv_list))
 		ssleep(1);

 	if (timeout) {
@ -1329,12 +1403,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 		}
 	}

-	mutex_lock(fd_lock);
+	mutex_lock(hpriv_lock);

 	/* This section must be protected because we are dereferencing
 	 * pointers that are freed if the process exits
 	 */
-	list_for_each_entry(hpriv, fd_list, dev_node) {
+	list_for_each_entry(hpriv, hpriv_list, dev_node) {
 		task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
 		if (task) {
 			dev_info(hdev->dev, "Killing user process pid=%d\n",
@ -1344,17 +1418,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool

 			put_task_struct(task);
 		} else {
-			/*
-			 * If we got here, it means that process was killed from outside the driver
-			 * right after it started looping on fd_list and before get_pid_task, thus
-			 * we don't need to kill it.
-			 */
 			dev_dbg(hdev->dev,
-				"Can't get task struct for user process, assuming process was killed from outside the driver\n");
+				"Can't get task struct for user process %d, process was killed from outside the driver\n",
+				pid_nr(hpriv->taskpid));
 		}
 	}

-	mutex_unlock(fd_lock);
+	mutex_unlock(hpriv_lock);

 	/*
 	 * We killed the open users, but that doesn't mean they are closed.
@ -1366,7 +1436,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 	 */

 wait_for_processes:
-	while ((!list_empty(fd_list)) && (pending_cnt)) {
+	while ((!list_empty(hpriv_list)) && (pending_cnt)) {
 		dev_dbg(hdev->dev,
 			"Waiting for all unmap operations to finish before hard reset\n");

@ -1376,7 +1446,7 @@ wait_for_processes:
 	}

 	/* All processes exited successfully */
-	if (list_empty(fd_list))
+	if (list_empty(hpriv_list))
 		return 0;

 	/* Give up waiting for processes to exit */
@ -1390,17 +1460,17 @@ wait_for_processes:

 static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
 {
-	struct list_head *fd_list;
+	struct list_head *hpriv_list;
 	struct hl_fpriv *hpriv;
-	struct mutex *fd_lock;
+	struct mutex *hpriv_lock;

-	fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
-	fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
+	hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+	hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;

-	mutex_lock(fd_lock);
-	list_for_each_entry(hpriv, fd_list, dev_node)
+	mutex_lock(hpriv_lock);
+	list_for_each_entry(hpriv, hpriv_list, dev_node)
 		hpriv->hdev = NULL;
-	mutex_unlock(fd_lock);
+	mutex_unlock(hpriv_lock);
 }

 static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
@ -1916,7 +1986,16 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
 	}

 	ctx = hl_get_compute_ctx(hdev);
-	if (!ctx || !ctx->hpriv->notifier_event.eventfd)
+	if (!ctx)
+		goto device_reset;
+
+	/*
+	 * There is no point in postponing the reset if user is not registered for events.
+	 * However if no eventfd_ctx exists but the device release watchdog is already scheduled, it
+	 * just implies that user has unregistered as part of handling a previous event. In this
+	 * case an immediate reset is not required.
+	 */
+	if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active)
 		goto device_reset;

 	/* Schedule the device release watchdog work unless reset is already in progress or if the
@ -1928,8 +2007,10 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
 		goto device_reset;
 	}

-	if (hdev->reset_info.watchdog_active)
+	if (hdev->reset_info.watchdog_active) {
+		hdev->device_release_watchdog_work.flags |= flags;
 		goto out;
+	}

 	hdev->device_release_watchdog_work.flags = flags;
 	dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",
@ -1990,59 +2071,6 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
 		hl_notifier_event_send(&hpriv->notifier_event, event_mask);

 	mutex_unlock(&hdev->fpriv_list_lock);
-
-	/* control device */
-	mutex_lock(&hdev->fpriv_ctrl_list_lock);
-
-	list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
-		hl_notifier_event_send(&hpriv->notifier_event, event_mask);
-
-	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
-}
-
-static int create_cdev(struct hl_device *hdev)
-{
-	char *name;
-	int rc;
-
-	hdev->cdev_idx = hdev->id / 2;
-
-	name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
-	if (!name) {
-		rc = -ENOMEM;
-		goto out_err;
-	}
-
-	/* Initialize cdev and device structures */
-	rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name,
-				&hdev->cdev, &hdev->dev);
-
-	kfree(name);
-
-	if (rc)
-		goto out_err;
-
-	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
-	if (!name) {
-		rc = -ENOMEM;
-		goto free_dev;
-	}
-
-	/* Initialize cdev and device structures for control device */
-	rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops,
-				name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
-
-	kfree(name);
-
-	if (rc)
-		goto free_dev;
-
-	return 0;
-
-free_dev:
-	put_device(hdev->dev);
-out_err:
-	return rc;
 }

 /*
@ -2057,16 +2085,14 @@ out_err:
 int hl_device_init(struct hl_device *hdev)
 {
 	int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
+	struct hl_ts_free_jobs *free_jobs_data;
 	bool expose_interfaces_on_err = false;
-
-	rc = create_cdev(hdev);
-	if (rc)
-		goto out_disabled;
+	void *p;

 	/* Initialize ASIC function pointers and perform early init */
 	rc = device_early_init(hdev);
 	if (rc)
-		goto free_dev;
+		goto out_disabled;

 	user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
 				hdev->asic_prop.user_interrupt_count;
@ -2078,15 +2104,43 @@ int hl_device_init(struct hl_device *hdev)
 			rc = -ENOMEM;
 			goto early_fini;
 		}
+
+		/* Timestamp records supported only if CQ supported in device */
+		if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
+			for (i = 0 ; i < user_interrupt_cnt ; i++) {
+				p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
+						sizeof(struct timestamp_reg_free_node));
+				if (!p) {
+					rc = -ENOMEM;
+					goto free_usr_intr_mem;
+				}
+				free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data;
+				free_jobs_data->free_nodes_pool = p;
+				free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
+				free_jobs_data->next_avail_free_node_idx = 0;
+			}
+		}
 	}

+	free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data;
+	p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
+				sizeof(struct timestamp_reg_free_node));
+	if (!p) {
+		rc = -ENOMEM;
+		goto free_usr_intr_mem;
+	}
+
+	free_jobs_data->free_nodes_pool = p;
+	free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
+	free_jobs_data->next_avail_free_node_idx = 0;
+
 	/*
 	 * Start calling ASIC initialization. First S/W then H/W and finally
 	 * late init
 	 */
 	rc = hdev->asic_funcs->sw_init(hdev);
 	if (rc)
-		goto free_usr_intr_mem;
+		goto free_common_usr_intr_mem;


 	/* initialize completion structure for multi CS wait */
@ -2253,6 +2307,14 @@ int hl_device_init(struct hl_device *hdev)
 	 * From here there is no need to expose them in case of an error.
 	 */
 	expose_interfaces_on_err = false;
+
+	rc = drm_dev_register(&hdev->drm, 0);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc);
+		rc = 0;
+		goto out_disabled;
+	}
+
 	rc = cdev_sysfs_debugfs_add(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n");
@ -2284,8 +2346,6 @@ int hl_device_init(struct hl_device *hdev)
 		"Successfully added device %s to habanalabs driver\n",
 		dev_name(&(hdev)->pdev->dev));

-	hdev->init_done = true;
-
 	/* After initialization is done, we are ready to receive events from
 	 * the F/W. We can't do it before because we will ignore events and if
 	 * those events are fatal, we won't know about it and the device will
@ -2293,6 +2353,8 @@ int hl_device_init(struct hl_device *hdev)
 	 */
 	hdev->asic_funcs->enable_events_from_fw(hdev);

+	hdev->init_done = true;
+
 	return 0;

 cb_pool_fini:
@ -2317,19 +2379,27 @@ hw_queues_destroy:
 	hl_hw_queues_destroy(hdev);
 sw_fini:
 	hdev->asic_funcs->sw_fini(hdev);
+free_common_usr_intr_mem:
+	vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
 free_usr_intr_mem:
-	kfree(hdev->user_interrupt);
+	if (user_interrupt_cnt) {
+		for (i = 0 ; i < user_interrupt_cnt ; i++) {
+			if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool)
+				break;
+			vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
+		}
+		kfree(hdev->user_interrupt);
+	}
 early_fini:
 	device_early_fini(hdev);
-free_dev:
-	put_device(hdev->dev_ctrl);
-	put_device(hdev->dev);
 out_disabled:
 	hdev->disabled = true;
-	if (expose_interfaces_on_err)
+	if (expose_interfaces_on_err) {
+		drm_dev_register(&hdev->drm, 0);
 		cdev_sysfs_debugfs_add(hdev);
-	dev_err(&hdev->pdev->dev,
-		"Failed to initialize hl%d. Device %s is NOT usable !\n",
+	}
+
+	pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n",
 		hdev->cdev_idx, dev_name(&hdev->pdev->dev));

 	return rc;
@ -2344,12 +2414,13 @@ out_disabled:
 */
 void hl_device_fini(struct hl_device *hdev)
 {
+	u32 user_interrupt_cnt;
 	bool device_in_reset;
 	ktime_t timeout;
 	u64 reset_sec;
 	int i, rc;

-	dev_info(hdev->dev, "Removing device\n");
+	dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev));

 	hdev->device_fini_pending = 1;
 	flush_delayed_work(&hdev->device_reset_work.reset_work);
@ -2425,14 +2496,14 @@ void hl_device_fini(struct hl_device *hdev)
 	hdev->process_kill_trial_cnt = 0;
 	rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false);
 	if (rc) {
-		dev_crit(hdev->dev, "Failed to kill all open processes\n");
+		dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc);
 		device_disable_open_processes(hdev, false);
 	}

 	hdev->process_kill_trial_cnt = 0;
 	rc = device_kill_open_processes(hdev, 0, true);
 	if (rc) {
-		dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
+		dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc);
 		device_disable_open_processes(hdev, true);
 	}

@ -2464,7 +2535,20 @@ void hl_device_fini(struct hl_device *hdev)
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
 	kfree(hdev->completion_queue);
-	kfree(hdev->user_interrupt);
+
+	user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
+					hdev->asic_prop.user_interrupt_count;
+
+	if (user_interrupt_cnt) {
+		if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
+			for (i = 0 ; i < user_interrupt_cnt ; i++)
+				vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
+		}
+
+		kfree(hdev->user_interrupt);
+	}
+
+	vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);

 	hl_hw_queues_destroy(hdev);

@ -2475,6 +2559,7 @@ void hl_device_fini(struct hl_device *hdev)

 	/* Hide devices and sysfs/debugfs files from user */
 	cdev_sysfs_debugfs_remove(hdev);
+	drm_dev_unregister(&hdev->drm);

 	hl_debugfs_device_fini(hdev);

@ -2690,6 +2775,20 @@ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info)
 		*info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
 }

+void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count)
+{
+	struct engine_err_info *info = &hdev->captured_err_info.engine_err;
+
+	/* Capture only the first engine error */
+	if (atomic_cmpxchg(&info->event_detected, 0, 1))
+		return;
+
+	info->event.timestamp = ktime_to_ns(ktime_get());
+	info->event.engine_id = engine_id;
+	info->event.error_count = error_count;
+	info->event_info_available = true;
+}
+
 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
 {
 	vfree(captured_err_info->page_fault_info.user_mappings);
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@ -6,7 +6,7 @@
 */

 #include "habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>

 #include <linux/firmware.h>
 #include <linux/crc32.h>
@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		err_exists = true;
 	}

+	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
+		dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
+		err_exists = true;
+	}
+
 	if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
 		/* Ignore this bit, don't prevent driver loading */
 		dev_dbg(hdev->dev, "device unusable status is set\n");
@ -1459,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 		dev_err(hdev->dev,
 			"Device boot progress - Stuck in preboot after security initialization\n");
 		break;
+	case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP:
+		dev_err(hdev->dev,
+			"Device boot progress - Stuck in preparation for shutdown\n");
+		break;
 	default:
 		dev_err(hdev->dev,
 			"Device boot progress - Invalid or unexpected status code %d\n", status);
@ -1469,8 +1478,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 {
 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
-	u32 status;
-	int rc;
+	u32 status = 0, timeout;
+	int rc, tries = 1;
+	bool preboot_still_runs;

 	/* Need to check two possible scenarios:
 	 *
@ -1480,6 +1490,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 	 * All other status values - for older firmwares where the uboot was
 	 * loaded from the FLASH
 	 */
+	timeout = pre_fw_load->wait_for_preboot_timeout;
+retry:
 	rc = hl_poll_timeout(
 		hdev,
 		pre_fw_load->cpu_boot_status_reg,
@ -1488,7 +1500,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
 		(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
 		hdev->fw_poll_interval_usec,
-		pre_fw_load->wait_for_preboot_timeout);
+		timeout);
+	/*
+	 * if F/W reports "security-ready" it means preboot might take longer.
+	 * If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again
+	 * with that timeout
+	 */
+	preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY ||
+				status == CPU_BOOT_STATUS_IN_PREBOOT ||
+				status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP ||
+				status == CPU_BOOT_STATUS_DRAM_RDY);
+
+	if (rc && tries && preboot_still_runs) {
+		tries--;
+		if (pre_fw_load->wait_for_preboot_extended_timeout) {
+			timeout = pre_fw_load->wait_for_preboot_extended_timeout;
+			goto retry;
+		}
+	}

 	if (rc) {
 		detect_cpu_boot_status(hdev, status);
@ -2743,7 +2772,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
 		struct lkd_fw_binning_info *binning_info;

-		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0);
+		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
+							sizeof(struct lkd_msg_comms));
 		if (rc)
 			goto protocol_err;

@ -2777,6 +2807,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 				hdev->decoder_binning, hdev->rotator_binning);
 		}

+		if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
+			hdev->asic_prop.reserved_fw_mem_size =
+				le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
+		}
+
 		return 0;
 	}

--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
 *
- * Copyright 2016-2022 HabanaLabs, Ltd.
+ * Copyright 2016-2023 HabanaLabs, Ltd.
 * All Rights Reserved.
 *
 */
@ -8,7 +8,7 @@
 #ifndef HABANALABSP_H_
 #define HABANALABSP_H_

-#include "../include/common/cpucp_if.h"
+#include <linux/habanalabs/cpucp_if.h>
 #include "../include/common/qman_if.h"
 #include "../include/hw_ip/mmu/mmu_general.h"
 #include <uapi/drm/habanalabs_accel.h>
@ -29,6 +29,9 @@
 #include <linux/coresight.h>
 #include <linux/dma-buf.h>

+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
 #include "security.h"

 #define HL_NAME				"habanalabs"
@ -82,8 +85,6 @@ struct hl_fpriv;

 #define HL_PCI_ELBI_TIMEOUT_MSEC	10 /* 10ms */

-#define HL_SIM_MAX_TIMEOUT_US		100000000 /* 100s */
-
 #define HL_INVALID_QUEUE		UINT_MAX

 #define HL_COMMON_USER_CQ_INTERRUPT_ID	0xFFF
@ -103,6 +104,8 @@ struct hl_fpriv;
 /* MMU */
 #define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */

+#define TIMESTAMP_FREE_NODES_NUM	512
+
 /**
 * enum hl_mmu_page_table_location - mmu page table location
 * @MMU_DR_PGT: page-table is located on device DRAM.
@ -154,6 +157,11 @@ enum hl_mmu_page_table_location {
 #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
 	hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)

+#define hl_dma_map_sgtable(hdev, sgt, dir) \
+	hl_dma_map_sgtable_caller(hdev, sgt, dir, __func__)
+#define hl_dma_unmap_sgtable(hdev, sgt, dir) \
+	hl_dma_unmap_sgtable_caller(hdev, sgt, dir, __func__)
+
 /*
 * Reset Flags
 *
@ -545,8 +553,7 @@ struct hl_hints_range {
 *              allocated with huge pages.
 * @hints_dram_reserved_va_range: dram hint addresses reserved range.
 * @hints_host_reserved_va_range: host hint addresses reserved range.
- * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
- *                                      range.
+ * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range.
 * @sram_base_address: SRAM physical start address.
 * @sram_end_address: SRAM physical end address.
 * @sram_user_base_address - SRAM physical start address for user access.
@ -585,7 +592,7 @@ struct hl_hints_range {
 * @mmu_pte_size: PTE size in MMU page tables.
 * @mmu_hop_table_size: MMU hop table size.
 * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
- * @dram_page_size: page size for MMU DRAM allocation.
+ * @dram_page_size: The DRAM physical page size.
 * @cfg_size: configuration space size on SRAM.
 * @sram_size: total size of SRAM.
 * @max_asid: maximum number of open contexts (ASIDs).
@ -641,6 +648,7 @@ struct hl_hints_range {
 * @glbl_err_cause_num: global err cause number.
 * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
 *                 not supported.
+ * @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
 * @collective_first_sob: first sync object available for collective use
 * @collective_first_mon: first monitor available for collective use
 * @sync_stream_first_sob: first sync object available for sync stream use
@ -686,9 +694,10 @@ struct hl_hints_range {
 * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
 * @set_max_power_on_device_init: true if need to set max power in F/W on device init.
 * @supports_user_set_page_size: true if user can set the allocation page size.
- * @dma_mask: the dma mask to be set for this device
+ * @dma_mask: the dma mask to be set for this device.
 * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
 * @supports_engine_modes: true if changing engines/engine_cores modes is supported.
+ * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
 */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@ -772,6 +781,7 @@ struct asic_fixed_properties {
 	u32				num_of_special_blocks;
 	u32				glbl_err_cause_num;
 	u32				hbw_flush_reg;
+	u32				reserved_fw_mem_size;
 	u16				collective_first_sob;
 	u16				collective_first_mon;
 	u16				sync_stream_first_sob;
@ -808,6 +818,7 @@ struct asic_fixed_properties {
 	u8				dma_mask;
 	u8				supports_advanced_cpucp_rc;
 	u8				supports_engine_modes;
+	u8				support_dynamic_resereved_fw_size;
 };

 /**
@ -1097,20 +1108,42 @@ enum hl_user_interrupt_type {
 	HL_USR_INTERRUPT_UNEXPECTED
 };

+/**
+ * struct hl_ts_free_jobs - holds user interrupt ts free nodes related data
+ * @free_nodes_pool: pool of nodes to be used for free timestamp jobs
+ * @free_nodes_length: number of nodes in free_nodes_pool
+ * @next_avail_free_node_idx: index of the next free node in the pool
+ *
+ * the free nodes pool must be protected by the user interrupt lock
+ * to avoid race between different interrupts which are using the same
+ * ts buffer with different offsets.
+ */
+struct hl_ts_free_jobs {
+	struct timestamp_reg_free_node *free_nodes_pool;
+	u32				free_nodes_length;
+	u32				next_avail_free_node_idx;
+};
+
 /**
 * struct hl_user_interrupt - holds user interrupt information
 * @hdev: pointer to the device structure
+ * @ts_free_jobs_data: timestamp free jobs related data
 * @type: user interrupt type
 * @wait_list_head: head to the list of user threads pending on this interrupt
+ * @ts_list_head: head to the list of timestamp records
 * @wait_list_lock: protects wait_list_head
+ * @ts_list_lock: protects ts_list_head
 * @timestamp: last timestamp taken upon interrupt
 * @interrupt_id: msix interrupt id
 */
 struct hl_user_interrupt {
 	struct hl_device		*hdev;
+	struct hl_ts_free_jobs		ts_free_jobs_data;
 	enum hl_user_interrupt_type	type;
 	struct list_head		wait_list_head;
+	struct list_head		ts_list_head;
 	spinlock_t			wait_list_lock;
+	spinlock_t			ts_list_lock;
 	ktime_t				timestamp;
 	u32				interrupt_id;
 };
@ -1120,11 +1153,15 @@ struct hl_user_interrupt {
 * @free_objects_node: node in the list free_obj_jobs
 * @cq_cb: pointer to cq command buffer to be freed
 * @buf: pointer to timestamp buffer to be freed
+ * @in_use: indicates whether the node still in use in workqueue thread.
+ * @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler
 */
 struct timestamp_reg_free_node {
 	struct list_head	free_objects_node;
 	struct hl_cb		*cq_cb;
 	struct hl_mmap_mem_buf	*buf;
+	atomic_t		in_use;
+	u8			dynamic_alloc;
 };

 /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
@ -1133,17 +1170,21 @@ struct timestamp_reg_free_node {
 * @free_obj: workqueue object to free timestamp registration node objects
 * @hdev: pointer to the device structure
 * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
+ * @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the
+ *                               interrupt handler.
 */
 struct timestamp_reg_work_obj {
 	struct work_struct	free_obj;
 	struct hl_device	*hdev;
 	struct list_head	*free_obj_head;
+	struct list_head	*dynamic_alloc_free_obj_head;
 };

 /* struct timestamp_reg_info - holds the timestamp registration related data.
 * @buf: pointer to the timestamp buffer which include both user/kernel buffers.
 *       relevant only when doing timestamps records registration.
 * @cq_cb: pointer to CQ counter CB.
+ * @interrupt: interrupt that the node hanged on it's wait list.
 * @timestamp_kernel_addr: timestamp handle address, where to set timestamp
 *                         relevant only when doing timestamps records
 *                         registration.
@ -1153,17 +1194,18 @@ struct timestamp_reg_work_obj {
 *          allocating records dynamically.
 */
 struct timestamp_reg_info {
-	struct hl_mmap_mem_buf	*buf;
-	struct hl_cb		*cq_cb;
-	u64			*timestamp_kernel_addr;
-	u8			in_use;
+	struct hl_mmap_mem_buf		*buf;
+	struct hl_cb			*cq_cb;
+	struct hl_user_interrupt	*interrupt;
+	u64				*timestamp_kernel_addr;
+	bool				in_use;
 };

 /**
 * struct hl_user_pending_interrupt - holds a context to a user thread
 *                                    pending on an interrupt
 * @ts_reg_info: holds the timestamps registration nodes info
- * @wait_list_node: node in the list of user threads pending on an interrupt
+ * @list_node: node in the list of user threads pending on an interrupt or timestamp
 * @fence: hl fence object for interrupt completion
 * @cq_target_value: CQ target value
 * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
@ -1171,7 +1213,7 @@ struct timestamp_reg_info {
 */
 struct hl_user_pending_interrupt {
 	struct timestamp_reg_info	ts_reg_info;
-	struct list_head		wait_list_node;
+	struct list_head		list_node;
 	struct hl_fence			fence;
 	u64				cq_target_value;
 	u64				*cq_kernel_addr;
@ -1370,6 +1412,8 @@ struct dynamic_fw_load_mgr {
 * @boot_err0_reg: boot_err0 register address
 * @boot_err1_reg: boot_err1 register address
 * @wait_for_preboot_timeout: timeout to poll for preboot ready
+ * @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know
+ *		preboot needs longer time.
 */
 struct pre_fw_load_props {
 	u32 cpu_boot_status_reg;
@ -1378,6 +1422,7 @@ struct pre_fw_load_props {
 	u32 boot_err0_reg;
 	u32 boot_err1_reg;
 	u32 wait_for_preboot_timeout;
+	u32 wait_for_preboot_extended_timeout;
 };

 /**
@ -1477,11 +1522,9 @@ struct engines_data {
 * @asic_dma_pool_free: free small DMA allocation from pool.
 * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
 * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
- * @asic_dma_unmap_single: unmap a single DMA buffer
- * @asic_dma_map_single: map a single buffer to a DMA
- * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
+ * @dma_unmap_sgtable: DMA unmap scatter-gather table.
+ * @dma_map_sgtable: DMA map scatter-gather table.
 * @cs_parser: parse Command Submission.
- * @asic_dma_map_sgtable: DMA map scatter-gather table.
 * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
 * @update_eq_ci: update event queue CI.
 * @context_switch: called upon ASID context switch.
@ -1602,18 +1645,11 @@ struct hl_asic_funcs {
 				size_t size, dma_addr_t *dma_handle);
 	void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
 				size_t size, void *vaddr);
-	void (*asic_dma_unmap_single)(struct hl_device *hdev,
-				dma_addr_t dma_addr, int len,
+	void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
-	dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev,
-				void *addr, int len,
-				enum dma_data_direction dir);
-	void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
-				struct sg_table *sgt,
+	int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
 	int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
-	int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
-				enum dma_data_direction dir);
 	void (*add_end_of_cb_packets)(struct hl_device *hdev,
 					void *kernel_address, u32 len,
 					u32 original_len,
@ -1771,16 +1807,19 @@ struct hl_cs_counters_atomic {
 * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported
 *                where virtual memory is supported.
 * @memhash_hnode: pointer to the memhash node. this object holds the export count.
- * @device_address: physical address of the device's memory. Relevant only
- *                  if phys_pg_pack is NULL (dma-buf was exported from address).
- *                  The total size can be taken from the dmabuf object.
+ * @offset: the offset into the buffer from which the memory is exported.
+ *          Relevant only if virtual memory is supported and phys_pg_pack is being used.
+ * device_phys_addr: physical address of the device's memory. Relevant only
+ *                   if phys_pg_pack is NULL (dma-buf was exported from address).
+ *                   The total size can be taken from the dmabuf object.
 */
 struct hl_dmabuf_priv {
 	struct dma_buf			*dmabuf;
 	struct hl_ctx			*ctx;
 	struct hl_vm_phys_pg_pack	*phys_pg_pack;
 	struct hl_vm_hash_node		*memhash_hnode;
-	uint64_t			device_address;
+	u64				offset;
+	u64				device_phys_addr;
 };

 #define HL_CS_OUTCOME_HISTORY_LEN 256
@ -1835,6 +1874,7 @@ struct hl_cs_outcome_store {
 * @va_range: holds available virtual addresses for host and dram mappings.
 * @mem_hash_lock: protects the mem_hash.
 * @hw_block_list_lock: protects the HW block memory list.
+ * @ts_reg_lock: timestamp registration ioctls lock.
 * @debugfs_list: node in debugfs list of contexts.
 * @hw_block_mem_list: list of HW block virtual mapped addresses.
 * @cs_counters: context command submission counters.
@ -1871,6 +1911,7 @@ struct hl_ctx {
 	struct hl_va_range		*va_range[HL_VA_RANGE_TYPE_MAX];
 	struct mutex			mem_hash_lock;
 	struct mutex			hw_block_list_lock;
+	struct mutex			ts_reg_lock;
 	struct list_head		debugfs_list;
 	struct list_head		hw_block_mem_list;
 	struct hl_cs_counters_atomic	cs_counters;
@ -1917,17 +1958,17 @@ struct hl_ctx_mgr {
 * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
 */
 struct hl_userptr {
-	enum vm_type		vm_type; /* must be first */
-	struct list_head	job_node;
-	struct page		**pages;
-	unsigned int		npages;
-	struct sg_table		*sgt;
-	enum dma_data_direction dir;
-	struct list_head	debugfs_list;
-	pid_t			pid;
-	u64			addr;
-	u64			size;
-	u8			dma_mapped;
+	enum vm_type			vm_type; /* must be first */
+	struct list_head		job_node;
+	struct page			**pages;
+	unsigned int			npages;
+	struct sg_table			*sgt;
+	enum dma_data_direction		dir;
+	struct list_head		debugfs_list;
+	pid_t				pid;
+	u64				addr;
+	u64				size;
+	u8				dma_mapped;
 };

 /**
@ -2148,7 +2189,6 @@ struct hl_vm_hw_block_list_node {
 * @pages: the physical page array.
 * @npages: num physical pages in the pack.
 * @total_size: total size of all the pages in this list.
- * @exported_size: buffer exported size.
 * @node: used to attach to deletion list that is used when all the allocations are cleared
 *        at the teardown of the context.
 * @mapping_cnt: number of shared mappings.
@ -2165,7 +2205,6 @@ struct hl_vm_phys_pg_pack {
 	u64			*pages;
 	u64			npages;
 	u64			total_size;
-	u64			exported_size;
 	struct list_head	node;
 	atomic_t		mapping_cnt;
 	u32			asid;
@ -2250,7 +2289,7 @@ struct hl_notifier_event {
 /**
 * struct hl_fpriv - process information stored in FD private data.
 * @hdev: habanalabs device structure.
- * @filp: pointer to the given file structure.
+ * @file_priv: pointer to the DRM file private data structure.
 * @taskpid: current process ID.
 * @ctx: current executing context. TODO: remove for multiple ctx per process
 * @ctx_mgr: context manager to handle multiple context for this FD.
@ -2265,7 +2304,7 @@ struct hl_notifier_event {
 */
 struct hl_fpriv {
 	struct hl_device		*hdev;
-	struct file			*filp;
+	struct drm_file			*file_priv;
 	struct pid			*taskpid;
 	struct hl_ctx			*ctx;
 	struct hl_ctx_mgr		ctx_mgr;
@ -2706,6 +2745,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 	usr_intr.type = intr_type; \
 	INIT_LIST_HEAD(&usr_intr.wait_list_head); \
 	spin_lock_init(&usr_intr.wait_list_lock); \
+	INIT_LIST_HEAD(&usr_intr.ts_list_head); \
+	spin_lock_init(&usr_intr.ts_list_lock); \
 })

 struct hwmon_chip_info;
@ -3054,6 +3095,20 @@ struct fw_err_info {
 	bool				event_info_available;
 };

+/**
+ * struct engine_err_info - engine error information.
+ * @event: holds information on the event.
+ * @event_detected: if set as 1, then an engine event was discovered for the
+ *                  first time after the driver has finished booting-up.
+ * @event_info_available: indicates that an engine event info is now available.
+ */
+struct engine_err_info {
+	struct hl_info_engine_err_event	event;
+	atomic_t			event_detected;
+	bool				event_info_available;
+};
+
+
 /**
 * struct hl_error_info - holds information collected during an error.
 * @cs_timeout: CS timeout error information.
@ -3062,6 +3117,7 @@ struct fw_err_info {
 * @page_fault_info: page fault information.
 * @hw_err: (fatal) hardware error information.
 * @fw_err: firmware error information.
+ * @engine_err: engine error information.
 */
 struct hl_error_info {
 	struct cs_timeout_info		cs_timeout;
@ -3070,6 +3126,7 @@ struct hl_error_info {
 	struct page_fault_info		page_fault_info;
 	struct hw_err_info		hw_err;
 	struct fw_err_info		fw_err;
+	struct engine_err_info		engine_err;
 };

 /**
@ -3117,8 +3174,7 @@ struct hl_reset_info {
 *		   (required only for PCI address match mode)
 * @pcie_bar: array of available PCIe bars virtual addresses.
 * @rmmio: configuration area address on SRAM.
- * @hclass: pointer to the habanalabs class.
- * @cdev: related char device.
+ * @drm: related DRM device.
 * @cdev_ctrl: char device for control operations only (INFO IOCTL)
 * @dev: related kernel basic device structure.
 * @dev_ctrl: related kernel device structure for the control device
@ -3245,8 +3301,7 @@ struct hl_reset_info {
 * @rotator_binning: contains mask of rotators engines that is received from the f/w
 *			which indicates which rotator engines are binned-out(Gaudi3 and above).
 * @id: device minor.
- * @id_control: minor of the control device.
- * @cdev_idx: char device index. Used for setting its name.
+ * @cdev_idx: char device index.
 * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
 *                    addresses.
 * @is_in_dram_scrub: true if dram scrub operation is on going.
@ -3289,6 +3344,7 @@ struct hl_reset_info {
 *                             device.
 * @supports_ctx_switch: true if a ctx switch is required upon first submission.
 * @support_preboot_binning: true if we support read binning info from preboot.
+ * @eq_heartbeat_received: indication that eq heartbeat event has received from FW.
 * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing.
 * @fw_components: Controls which f/w components to load to the device. There are multiple f/w
 *                 stages and sometimes we want to stop at a certain stage. Used only for testing.
@ -3308,8 +3364,7 @@ struct hl_device {
 	u64				pcie_bar_phys[HL_PCI_NUM_BARS];
 	void __iomem			*pcie_bar[HL_PCI_NUM_BARS];
 	void __iomem			*rmmio;
-	struct class			*hclass;
-	struct cdev			cdev;
+	struct drm_device		drm;
 	struct cdev			cdev_ctrl;
 	struct device			*dev;
 	struct device			*dev_ctrl;
@ -3418,7 +3473,6 @@ struct hl_device {
 	u32				device_release_watchdog_timeout_sec;
 	u32				rotator_binning;
 	u16				id;
-	u16				id_control;
 	u16				cdev_idx;
 	u16				cpu_pci_msb_addr;
 	u8				is_in_dram_scrub;
@ -3451,6 +3505,7 @@ struct hl_device {
 	u8				reset_upon_device_release;
 	u8				supports_ctx_switch;
 	u8				support_preboot_binning;
+	u8				eq_heartbeat_received;

 	/* Parameters for bring-up to be upstreamed */
 	u64				nic_ports_mask;
@ -3582,6 +3637,11 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size,
 	return false;
 }

+static inline struct hl_device *to_hl_device(struct drm_device *ddev)
+{
+	return container_of(ddev, struct hl_device, drm);
+}
+
 /**
 * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
 * @address: The start address of the area we want to validate.
@ -3611,8 +3671,13 @@ void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t
 					dma_addr_t *dma_handle, const char *caller);
 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
 					const char *caller);
-int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
-void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir, const char *caller);
+void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+					enum dma_data_direction dir, const char *caller);
+int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir);
+void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
 int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar);
@ -3620,7 +3685,12 @@ int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type);
 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
 			u64 addr, u64 *val, enum debugfs_access_type acc_type);
-int hl_device_open(struct inode *inode, struct file *filp);
+
+int hl_mmap(struct file *filp, struct vm_area_struct *vma);
+
+int hl_device_open(struct drm_device *drm, struct drm_file *file_priv);
+void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv);
+
 int hl_device_open_ctrl(struct inode *inode, struct file *filp);
 bool hl_device_operational(struct hl_device *hdev,
 		enum hl_device_status *status);
@ -3652,8 +3722,9 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
 irqreturn_t hl_irq_handler_cq(int irq, void *arg);
 irqreturn_t hl_irq_handler_eq(int irq, void *arg);
 irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
-irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
+irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg);
 irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg);
+irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg);
 u32 hl_cq_inc_ptr(u32 ptr);

 int hl_asid_init(struct hl_device *hdev);
@ -3944,16 +4015,14 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_
 				u64 *event_mask);
 void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask);
 void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
+void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);

 #ifdef CONFIG_DEBUG_FS

-void hl_debugfs_init(void);
-void hl_debugfs_fini(void);
 int hl_debugfs_device_init(struct hl_device *hdev);
 void hl_debugfs_device_fini(struct hl_device *hdev);
 void hl_debugfs_add_device(struct hl_device *hdev);
-void hl_debugfs_remove_device(struct hl_device *hdev);
 void hl_debugfs_add_file(struct hl_fpriv *hpriv);
 void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
 void hl_debugfs_add_cb(struct hl_cb *cb);
@ -3972,14 +4041,6 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,

 #else

-static inline void __init hl_debugfs_init(void)
-{
-}
-
-static inline void hl_debugfs_fini(void)
-{
-}
-
 static inline int hl_debugfs_device_init(struct hl_device *hdev)
 {
 	return 0;
@ -3993,10 +4054,6 @@ static inline void hl_debugfs_add_device(struct hl_device *hdev)
 {
 }

-static inline void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-}
-
 static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
 {
 }
@ -4108,11 +4165,12 @@ void hl_ack_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset,
 		const u32 pb_blocks[], u32 blocks_array_size);

 /* IOCTLs */
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);

 #endif /* HABANALABSP_H_ */
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@ -14,6 +14,11 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/version.h>
+
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_ioctl.h>

 #define CREATE_TRACE_POINTS
 #include <trace/events/habanalabs.h>
@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC);
 MODULE_LICENSE("GPL v2");

 static int hl_major;
-static struct class *hl_class;
 static DEFINE_IDR(hl_devs_idr);
 static DEFINE_MUTEX(hl_devs_idr_lock);

@ -70,6 +74,42 @@ static const struct pci_device_id ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, ids);

+static const struct drm_ioctl_desc hl_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0),
+};
+
+static const struct file_operations hl_fops = {
+	.owner = THIS_MODULE,
+	.open = accel_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.compat_ioctl = drm_compat_ioctl,
+	.llseek = noop_llseek,
+	.mmap = hl_mmap
+};
+
+static const struct drm_driver hl_driver = {
+	.driver_features = DRIVER_COMPUTE_ACCEL,
+
+	.name = HL_NAME,
+	.desc = HL_DRIVER_DESC,
+	.major = LINUX_VERSION_MAJOR,
+	.minor = LINUX_VERSION_PATCHLEVEL,
+	.patchlevel = LINUX_VERSION_SUBLEVEL,
+	.date = "20190505",
+
+	.fops = &hl_fops,
+	.open = hl_device_open,
+	.postclose = hl_device_release,
+	.ioctls = hl_drm_ioctls,
+	.num_ioctls = ARRAY_SIZE(hl_drm_ioctls)
+};
+
 /*
 * get_asic_type - translate device id to asic type
 *
@ -123,43 +163,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
 }

 /*
- * hl_device_open - open function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
+ * hl_device_open() - open function for habanalabs device.
+ * @ddev: pointer to DRM device structure.
+ * @file: pointer to DRM file private data structure.
 *
 * Called when process opens an habanalabs device.
 */
-int hl_device_open(struct inode *inode, struct file *filp)
+int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv)
 {
+	struct hl_device *hdev = to_hl_device(ddev);
 	enum hl_device_status status;
-	struct hl_device *hdev;
 	struct hl_fpriv *hpriv;
 	int rc;

-	mutex_lock(&hl_devs_idr_lock);
-	hdev = idr_find(&hl_devs_idr, iminor(inode));
-	mutex_unlock(&hl_devs_idr_lock);
-
-	if (!hdev) {
-		pr_err("Couldn't find device %d:%d\n",
-			imajor(inode), iminor(inode));
-		return -ENXIO;
-	}
-
 	hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 	if (!hpriv)
 		return -ENOMEM;

 	hpriv->hdev = hdev;
-	filp->private_data = hpriv;
-	hpriv->filp = filp;
-
 	mutex_init(&hpriv->notifier_event.lock);
 	mutex_init(&hpriv->restore_phase_mutex);
 	mutex_init(&hpriv->ctx_lock);
 	kref_init(&hpriv->refcount);
-	nonseekable_open(inode, filp);

 	hl_ctx_mgr_init(&hpriv->ctx_mgr);
 	hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
@ -225,6 +250,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	hdev->last_successful_open_jif = jiffies;
 	hdev->last_successful_open_ktime = ktime_get();

+	file_priv->driver_priv = hpriv;
+	hpriv->file_priv = file_priv;
+
 	return 0;

 out_err:
@ -232,7 +260,6 @@ out_err:
 	hl_mem_mgr_fini(&hpriv->mem_mgr);
 	hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
-	filp->private_data = NULL;
 	mutex_destroy(&hpriv->ctx_lock);
 	mutex_destroy(&hpriv->restore_phase_mutex);
 	mutex_destroy(&hpriv->notifier_event.lock);
@ -268,9 +295,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 	 */
 	hpriv->hdev = hdev;
 	filp->private_data = hpriv;
-	hpriv->filp = filp;

-	mutex_init(&hpriv->notifier_event.lock);
 	nonseekable_open(inode, filp);

 	hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
@ -317,7 +342,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
 	hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);

 	hdev->major = hl_major;
-	hdev->hclass = hl_class;
 	hdev->memory_scrub = memory_scrub;
 	hdev->reset_on_lockup = reset_on_lockup;
 	hdev->boot_error_status_mask = boot_error_status_mask;
@ -383,6 +407,31 @@ static int fixup_device_params(struct hl_device *hdev)
 	return 0;
 }

+static int allocate_device_id(struct hl_device *hdev)
+{
+	int id;
+
+	mutex_lock(&hl_devs_idr_lock);
+	id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
+	mutex_unlock(&hl_devs_idr_lock);
+
+	if (id < 0) {
+		if (id == -ENOSPC)
+			pr_err("too many devices in the system\n");
+		return -EBUSY;
+	}
+
+	hdev->id = id;
+
+	/*
+	 * Firstly initialized with the internal device ID.
+	 * Will be updated later after the DRM device registration to hold the minor ID.
+	 */
+	hdev->cdev_idx = hdev->id;
+
+	return 0;
+}
+
 /**
 * create_hdev - create habanalabs device instance
 *
@ -395,27 +444,29 @@ static int fixup_device_params(struct hl_device *hdev)
 */
 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 {
-	int main_id, ctrl_id = 0, rc = 0;
 	struct hl_device *hdev;
+	int rc;

 	*dev = NULL;

-	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
-	if (!hdev)
-		return -ENOMEM;
+	hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm);
+	if (IS_ERR(hdev))
+		return PTR_ERR(hdev);
+
+	hdev->dev = hdev->drm.dev;

 	/* Will be NULL in case of simulator device */
 	hdev->pdev = pdev;

 	/* Assign status description string */
-	strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
-					"in device creation", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
-					"in reset after device release", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
+				"in device creation", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
+				"in reset after device release", HL_STR_MAX);


 	/* First, we must find out which ASIC are we handling. This is needed
@ -425,7 +476,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 	if (hdev->asic_type == ASIC_INVALID) {
 		dev_err(&pdev->dev, "Unsupported ASIC\n");
 		rc = -ENODEV;
-		goto free_hdev;
+		goto out_err;
 	}

 	copy_kernel_module_params_to_device(hdev);
@ -434,42 +485,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)

 	fixup_device_params(hdev);

-	mutex_lock(&hl_devs_idr_lock);
-
-	/* Always save 2 numbers, 1 for main device and 1 for control.
-	 * They must be consecutive
-	 */
-	main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
-
-	if (main_id >= 0)
-		ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
-					main_id + 2, GFP_KERNEL);
-
-	mutex_unlock(&hl_devs_idr_lock);
-
-	if ((main_id < 0) || (ctrl_id < 0)) {
-		if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
-			pr_err("too many devices in the system\n");
-
-		if (main_id >= 0) {
-			mutex_lock(&hl_devs_idr_lock);
-			idr_remove(&hl_devs_idr, main_id);
-			mutex_unlock(&hl_devs_idr_lock);
-		}
-
-		rc = -EBUSY;
-		goto free_hdev;
-	}
-
-	hdev->id = main_id;
-	hdev->id_control = ctrl_id;
+	rc = allocate_device_id(hdev);
+	if (rc)
+		goto out_err;

 	*dev = hdev;

 	return 0;

-free_hdev:
-	kfree(hdev);
+out_err:
 	return rc;
 }

@ -484,10 +508,8 @@ static void destroy_hdev(struct hl_device *hdev)
 	/* Remove device from the device list */
 	mutex_lock(&hl_devs_idr_lock);
 	idr_remove(&hl_devs_idr, hdev->id);
-	idr_remove(&hl_devs_idr, hdev->id_control);
 	mutex_unlock(&hl_devs_idr_lock);

-	kfree(hdev);
 }

 static int hl_pmops_suspend(struct device *dev)
@ -691,28 +713,16 @@ static int __init hl_init(void)

 	hl_major = MAJOR(dev);

-	hl_class = class_create(HL_NAME);
-	if (IS_ERR(hl_class)) {
-		pr_err("failed to allocate class\n");
-		rc = PTR_ERR(hl_class);
-		goto remove_major;
-	}
-
-	hl_debugfs_init();
-
 	rc = pci_register_driver(&hl_pci_driver);
 	if (rc) {
 		pr_err("failed to register pci device\n");
-		goto remove_debugfs;
+		goto remove_major;
 	}

 	pr_debug("driver loaded\n");

 	return 0;

-remove_debugfs:
-	hl_debugfs_fini();
-	class_destroy(hl_class);
 remove_major:
 	unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 	return rc;
@ -725,14 +735,6 @@ static void __exit hl_exit(void)
 {
 	pci_unregister_driver(&hl_pci_driver);

-	/*
-	 * Removing debugfs must be after all devices or simulator devices
-	 * have been removed because otherwise we get a bug in the
-	 * debugfs module for referencing NULL objects
-	 */
-	hl_debugfs_fini();
-
-	class_destroy(hl_class);
 	unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);

 	idr_destroy(&hl_devs_idr);
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@ -17,6 +17,8 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>

+#include <asm/msr.h>
+
 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
 	[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
 	[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)

 	time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
 	time_sync.host_time = ktime_get_raw_ns();
+	time_sync.tsc_time = rdtsc();

 	return copy_to_user(out, &time_sync,
 		min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
@ -875,6 +878,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	return rc ? -EFAULT : 0;
 }

+static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
+	struct hl_device *hdev = hpriv->hdev;
+	u32 user_buf_size = args->return_size;
+	struct engine_err_info *info;
+	int rc;
+
+	if (!user_buf)
+		return -EINVAL;
+
+	info = &hdev->captured_err_info.engine_err;
+	if (!info->event_info_available)
+		return 0;
+
+	if (user_buf_size < sizeof(struct hl_info_engine_err_event))
+		return -ENOMEM;
+
+	rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event));
+	return rc ? -EFAULT : 0;
+}
+
 static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args)
 {
 	void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer;
@ -1001,6 +1026,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_FW_ERR_EVENT:
 		return fw_err_info(hpriv, args);

+	case HL_INFO_USER_ENGINE_ERR_EVENT:
+		return engine_err_info(hpriv, args);
+
 	case HL_INFO_DRAM_USAGE:
 		return dram_usage_info(hpriv, args);
 	default:
@ -1070,20 +1098,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	return rc;
 }

-static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+
 	return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
 }

 static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
 {
+	struct hl_info_args *args = data;
+
+	switch (args->op) {
+	case HL_INFO_GET_EVENTS:
+	case HL_INFO_UNREGISTER_EVENTFD:
+	case HL_INFO_REGISTER_EVENTFD:
+		return -EOPNOTSUPP;
+	default:
+		break;
+	}
+
 	return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
 }

-static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
-	struct hl_debug_args *args = data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
+	struct hl_debug_args *args = data;
 	enum hl_device_status status;

 	int rc = 0;
@ -1126,25 +1168,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
 }

 #define HL_IOCTL_DEF(ioctl, _func) \
-	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
-
-static const struct hl_ioctl_desc hl_ioctls[] = {
-	HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
-};
+	[_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func}

 static const struct hl_ioctl_desc hl_ioctls_control[] = {
-	HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
+	HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control)
 };

-static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
-		const struct hl_ioctl_desc *ioctl, struct device *dev)
+static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg,
+			const struct hl_ioctl_desc *ioctl, struct device *dev)
 {
-	struct hl_fpriv *hpriv = filep->private_data;
 	unsigned int nr = _IOC_NR(cmd);
 	char stack_kdata[128] = {0};
 	char *kdata = NULL;
@ -1194,9 +1226,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
 		retcode = -EFAULT;

 out_err:
-	if (retcode)
-		dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
-			  task_pid_nr(current), cmd, nr);
+	if (retcode) {
+		char task_comm[TASK_COMM_LEN];
+
+		dev_dbg_ratelimited(dev,
+				"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
+				task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
+	}

 	if (kdata != stack_kdata)
 		kfree(kdata);
@ -1204,29 +1240,6 @@ out_err:
 	return retcode;
 }

-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
-{
-	struct hl_fpriv *hpriv = filep->private_data;
-	struct hl_device *hdev = hpriv->hdev;
-	const struct hl_ioctl_desc *ioctl = NULL;
-	unsigned int nr = _IOC_NR(cmd);
-
-	if (!hdev) {
-		pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
-		return -ENODEV;
-	}
-
-	if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
-		ioctl = &hl_ioctls[nr];
-	} else {
-		dev_dbg_ratelimited(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
-			task_pid_nr(current), nr);
-		return -ENOTTY;
-	}
-
-	return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
-}
-
 long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
 {
 	struct hl_fpriv *hpriv = filep->private_data;
@ -1239,13 +1252,16 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
 		return -ENODEV;
 	}

-	if (nr == _IOC_NR(HL_IOCTL_INFO)) {
-		ioctl = &hl_ioctls_control[nr];
+	if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
+		ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
 	} else {
-		dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
-			task_pid_nr(current), nr);
+		char task_comm[TASK_COMM_LEN];
+
+		dev_dbg_ratelimited(hdev->dev_ctrl,
+				"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
+				task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
 		return -ENOTTY;
 	}

-	return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
+	return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl);
 }
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work)
 {
 	struct timestamp_reg_work_obj *job =
 			container_of(work, struct timestamp_reg_work_obj, free_obj);
+	struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head;
 	struct timestamp_reg_free_node *free_obj, *temp_free_obj;
 	struct list_head *free_list_head = job->free_obj_head;
+
 	struct hl_device *hdev = job->hdev;

 	list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work)

 		hl_mmap_mem_buf_put(free_obj->buf);
 		hl_cb_put(free_obj->cq_cb);
-		kfree(free_obj);
+		atomic_set(&free_obj->in_use, 0);
 	}

 	kfree(free_list_head);
+
+	if (dynamic_alloc_free_list_head) {
+		list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head,
+								free_objects_node) {
+			dev_dbg(hdev->dev,
+				"Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n",
+						free_obj->buf,
+						free_obj->cq_cb);
+
+			hl_mmap_mem_buf_put(free_obj->buf);
+			hl_cb_put(free_obj->cq_cb);
+			list_del(&free_obj->free_objects_node);
+			kfree(free_obj);
+		}
+
+		kfree(dynamic_alloc_free_list_head);
+	}
+
 	kfree(job);
 }

@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work)
 * list to a dedicated workqueue to do the actual put.
 */
 static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
-						struct list_head **free_list, ktime_t now)
+						struct list_head **free_list,
+						struct list_head **dynamic_alloc_list,
+						struct hl_user_interrupt *intr)
 {
+	struct hl_ts_free_jobs *ts_free_jobs_data;
 	struct timestamp_reg_free_node *free_node;
+	u32 free_node_index;
 	u64 timestamp;

+	ts_free_jobs_data = &intr->ts_free_jobs_data;
+	free_node_index = ts_free_jobs_data->next_avail_free_node_idx;
+
 	if (!(*free_list)) {
 		/* Alloc/Init the timestamp registration free objects list */
 		*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
@ -247,39 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 		INIT_LIST_HEAD(*free_list);
 	}

-	free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
-	if (!free_node)
-		return -ENOMEM;
+	free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index];
+	if (atomic_cmpxchg(&free_node->in_use, 0, 1)) {
+		dev_dbg(hdev->dev,
+			"Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n",
+				pend->ts_reg_info.buf,
+				pend,
+				intr->interrupt_id);

-	timestamp = ktime_to_ns(now);
+		if (!(*dynamic_alloc_list)) {
+			*dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
+			if (!(*dynamic_alloc_list))
+				return -ENOMEM;
+
+			INIT_LIST_HEAD(*dynamic_alloc_list);
+		}
+
+		free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC);
+		if (!free_node)
+			return -ENOMEM;
+
+		free_node->dynamic_alloc = 1;
+	}
+
+	timestamp = ktime_to_ns(intr->timestamp);

 	*pend->ts_reg_info.timestamp_kernel_addr = timestamp;

-	dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
-			pend->ts_reg_info.timestamp_kernel_addr,
-			*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
+	dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
+			pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);

-	list_del(&pend->wait_list_node);
-
-	/* Mark kernel CB node as free */
-	pend->ts_reg_info.in_use = 0;
+	list_del(&pend->list_node);

 	/* Putting the refcount for ts_buff and cq_cb objects will be handled
 	 * in workqueue context, just add job to free_list.
 	 */
 	free_node->buf = pend->ts_reg_info.buf;
 	free_node->cq_cb = pend->ts_reg_info.cq_cb;
-	list_add(&free_node->free_objects_node, *free_list);
+
+	if (free_node->dynamic_alloc) {
+		list_add(&free_node->free_objects_node, *dynamic_alloc_list);
+	} else {
+		ts_free_jobs_data->next_avail_free_node_idx =
+				(++free_node_index) % ts_free_jobs_data->free_nodes_length;
+		list_add(&free_node->free_objects_node, *free_list);
+	}
+
+	/* Mark TS record as free */
+	pend->ts_reg_info.in_use = false;

 	return 0;
 }

-static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr)
+static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
 {
+	struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
 	struct hl_user_pending_interrupt *pend, *temp_pend;
-	struct list_head *ts_reg_free_list_head = NULL;
 	struct timestamp_reg_work_obj *job;
 	bool reg_node_handle_fail = false;
+	unsigned long flags;
 	int rc;

 	/* For registration nodes:
@ -288,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	 * or in irq handler context at all (since release functions are long and
 	 * might sleep), so we will need to handle that part in workqueue context.
 	 * To avoid handling kmalloc failure which compels us rolling back actions
-	 * and move nodes hanged on the free list back to the interrupt wait list
+	 * and move nodes hanged on the free list back to the interrupt ts list
 	 * we always alloc the job of the WQ at the beginning.
 	 */
 	job = kmalloc(sizeof(*job), GFP_ATOMIC);
 	if (!job)
 		return;

-	spin_lock(&intr->wait_list_lock);
-	list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) {
+	spin_lock_irqsave(&intr->ts_list_lock, flags);
+	list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) {
 		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
 				!pend->cq_kernel_addr) {
-			if (pend->ts_reg_info.buf) {
-				if (!reg_node_handle_fail) {
-					rc = handle_registration_node(hdev, pend,
-							&ts_reg_free_list_head, intr->timestamp);
-					if (rc)
-						reg_node_handle_fail = true;
-				}
-			} else {
-				/* Handle wait target value node */
-				pend->fence.timestamp = intr->timestamp;
-				complete_all(&pend->fence.completion);
+			if (!reg_node_handle_fail) {
+				rc = handle_registration_node(hdev, pend,
+						&ts_reg_free_list_head,
+						&dynamic_alloc_list_head, intr);
+				if (rc)
+					reg_node_handle_fail = true;
 			}
 		}
 	}
-	spin_unlock(&intr->wait_list_lock);
+	spin_unlock_irqrestore(&intr->ts_list_lock, flags);

 	if (ts_reg_free_list_head) {
 		INIT_WORK(&job->free_obj, hl_ts_free_objects);
 		job->free_obj_head = ts_reg_free_list_head;
+		job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head;
 		job->hdev = hdev;
 		queue_work(hdev->ts_free_obj_wq, &job->free_obj);
 	} else {
@ -325,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	}
 }

+static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
+{
+	struct hl_user_pending_interrupt *pend, *temp_pend;
+	unsigned long flags;
+
+	spin_lock_irqsave(&intr->wait_list_lock, flags);
+	list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) {
+		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
+				!pend->cq_kernel_addr) {
+			/* Handle wait target value node */
+			pend->fence.timestamp = intr->timestamp;
+			complete_all(&pend->fence.completion);
+		}
+	}
+	spin_unlock_irqrestore(&intr->wait_list_lock, flags);
+}
+
 static void handle_tpc_interrupt(struct hl_device *hdev)
 {
 	u64 event_mask;
@ -346,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev)
 }

 /**
- * hl_irq_handler_user_interrupt - irq handler for user interrupts
+ * hl_irq_user_interrupt_handler - irq handler for user interrupts.
 *
 * @irq: irq number
 * @arg: pointer to user interrupt structure
- *
 */
-irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
+irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg)
 {
 	struct hl_user_interrupt *user_int = arg;
+	struct hl_device *hdev = user_int->hdev;

 	user_int->timestamp = ktime_get();
+	switch (user_int->type) {
+	case HL_USR_INTERRUPT_CQ:
+		/* First handle user waiters threads */
+		handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt);
+		handle_user_interrupt_wait_list(hdev, user_int);

-	return IRQ_WAKE_THREAD;
+		/* Second handle user timestamp registrations */
+		handle_user_interrupt_ts_list(hdev,  &hdev->common_user_cq_interrupt);
+		handle_user_interrupt_ts_list(hdev, user_int);
+		break;
+	case HL_USR_INTERRUPT_DECODER:
+		handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt);
+
+		/* Handle decoder interrupt registered on this specific irq */
+		handle_user_interrupt_wait_list(hdev, user_int);
+		break;
+	default:
+		break;
+	}
+
+	return IRQ_HANDLED;
 }

 /**
@ -374,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
 	struct hl_user_interrupt *user_int = arg;
 	struct hl_device *hdev = user_int->hdev;

+	user_int->timestamp = ktime_get();
 	switch (user_int->type) {
-	case HL_USR_INTERRUPT_CQ:
-		handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt);
-
-		/* Handle user cq interrupt registered on this specific irq */
-		handle_user_interrupt(hdev, user_int);
-		break;
-	case HL_USR_INTERRUPT_DECODER:
-		handle_user_interrupt(hdev, &hdev->common_decoder_interrupt);
-
-		/* Handle decoder interrupt registered on this specific irq */
-		handle_user_interrupt(hdev, user_int);
-		break;
 	case HL_USR_INTERRUPT_TPC:
 		handle_tpc_interrupt(hdev);
 		break;
@ -400,6 +474,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }

+irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg)
+{
+	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
+	struct hl_device *hdev = arg;
+
+	dev_err(hdev->dev, "EQ error interrupt received\n");
+
+	hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+
+	return IRQ_HANDLED;
+}
+
 /**
 * hl_irq_handler_eq - irq handler for event queue
 *
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@ -244,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,

 	*p_userptr = userptr;

-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto dma_map_err;
@ -832,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 *                      physical pages
 *
 * This function does the following:
- * - Pin the physical pages related to the given virtual block.
 * - Create a physical page pack from the physical pages related to the given
 *   virtual block.
 */
@ -1532,24 +1531,20 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
 }

 static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
-						u64 page_size, u64 exported_size,
+						u64 page_size, u64 exported_size, u64 offset,
 						struct device *dev, enum dma_data_direction dir)
 {
-	u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
-	struct asic_fixed_properties *prop;
-	int rc, i, j, nents, cur_page;
+	u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page,
+		left_size_in_dma_seg, device_address, bar_address, start_page;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
+	unsigned int nents, i;
 	struct sg_table *sgt;
+	bool next_sg_entry;
+	int rc;

-	prop = &hdev->asic_prop;
-
-	dma_max_seg_size = dma_get_max_seg_size(dev);
-
-	/* We would like to align the max segment size to PAGE_SIZE, so the
-	 * SGL will contain aligned addresses that can be easily mapped to
-	 * an MMU
-	 */
-	dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
+	/* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */
+	dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
 	if (dma_max_seg_size < PAGE_SIZE) {
 		dev_err_ratelimited(hdev->dev,
 				"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
@ -1561,121 +1556,149 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 	if (!sgt)
 		return ERR_PTR(-ENOMEM);

-	/* remove export size restrictions in case not explicitly defined */
-	cur_size_to_export = exported_size ? exported_size : (npages * page_size);
+	/* Use the offset to move to the actual first page that is exported */
+	for (start_page = 0 ; start_page < npages ; ++start_page) {
+		if (offset < page_size)
+			break;

-	/* If the size of each page is larger than the dma max segment size,
-	 * then we can't combine pages and the number of entries in the SGL
-	 * will just be the
-	 * <number of pages> * <chunks of max segment size in each page>
-	 */
-	if (page_size > dma_max_seg_size) {
-		/* we should limit number of pages according to the exported size */
-		cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
-		nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
-	} else {
-		cur_npages = npages;
+		/* The offset value was validated so there can't be an underflow */
+		offset -= page_size;
+	}

-		/* Get number of non-contiguous chunks */
-		for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
-			if (pages[i - 1] + page_size != pages[i] ||
-					chunk_size + page_size > dma_max_seg_size) {
-				nents++;
-				chunk_size = page_size;
-				continue;
-			}
+	/* Calculate the required number of entries for the SG table */
+	curr_page = start_page;
+	nents = 1;
+	left_size_to_export = exported_size;
+	left_size_in_page = page_size - offset;
+	left_size_in_dma_seg = dma_max_seg_size;
+	next_sg_entry = false;

-			chunk_size += page_size;
+	while (true) {
+		size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
+		left_size_to_export -= size;
+		left_size_in_page -= size;
+		left_size_in_dma_seg -= size;
+
+		if (!left_size_to_export)
+			break;
+
+		if (!left_size_in_page) {
+			/* left_size_to_export is not zero so there must be another page */
+			if (pages[curr_page] + page_size != pages[curr_page + 1])
+				next_sg_entry = true;
+
+			++curr_page;
+			left_size_in_page = page_size;
+		}
+
+		if (!left_size_in_dma_seg) {
+			next_sg_entry = true;
+			left_size_in_dma_seg = dma_max_seg_size;
+		}
+
+		if (next_sg_entry) {
+			++nents;
+			next_sg_entry = false;
 		}
 	}

 	rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
 	if (rc)
-		goto error_free;
+		goto err_free_sgt;

-	cur_page = 0;
+	/* Prepare the SG table entries */
+	curr_page = start_page;
+	device_address = pages[curr_page] + offset;
+	left_size_to_export = exported_size;
+	left_size_in_page = page_size - offset;
+	left_size_in_dma_seg = dma_max_seg_size;
+	next_sg_entry = false;

-	if (page_size > dma_max_seg_size) {
-		u64 size_left, cur_device_address = 0;
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address);
+		chunk_size = 0;

-		size_left = page_size;
+		for ( ; curr_page < npages ; ++curr_page) {
+			size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
+			chunk_size += size;
+			left_size_to_export -= size;
+			left_size_in_page -= size;
+			left_size_in_dma_seg -= size;

-		/* Need to split each page into the number of chunks of
-		 * dma_max_seg_size
-		 */
-		for_each_sgtable_dma_sg(sgt, sg, i) {
-			if (size_left == page_size)
-				cur_device_address =
-					pages[cur_page] - prop->dram_base_address;
-			else
-				cur_device_address += dma_max_seg_size;
+			if (!left_size_to_export)
+				break;

-			/* make sure not to export over exported size */
-			chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
+			if (!left_size_in_page) {
+				/* left_size_to_export is not zero so there must be another page */
+				if (pages[curr_page] + page_size != pages[curr_page + 1]) {
+					device_address = pages[curr_page + 1];
+					next_sg_entry = true;
+				}

-			bar_address = hdev->dram_pci_bar_start + cur_device_address;
-
-			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
-			if (rc)
-				goto error_unmap;
-
-			cur_size_to_export -= chunk_size;
-
-			if (size_left > dma_max_seg_size) {
-				size_left -= dma_max_seg_size;
-			} else {
-				cur_page++;
-				size_left = page_size;
-			}
-		}
-	} else {
-		/* Merge pages and put them into the scatterlist */
-		for_each_sgtable_dma_sg(sgt, sg, i) {
-			chunk_size = page_size;
-			for (j = cur_page + 1 ; j < cur_npages ; j++) {
-				if (pages[j - 1] + page_size != pages[j] ||
-						chunk_size + page_size > dma_max_seg_size)
-					break;
-
-				chunk_size += page_size;
+				left_size_in_page = page_size;
 			}

-			bar_address = hdev->dram_pci_bar_start +
-					(pages[cur_page] - prop->dram_base_address);
+			if (!left_size_in_dma_seg) {
+				/*
+				 * Skip setting a new device address if already moving to a page
+				 * which is not contiguous with the current page.
+				 */
+				if (!next_sg_entry) {
+					device_address += chunk_size;
+					next_sg_entry = true;
+				}

-			/* make sure not to export over exported size */
-			chunk_size = min(chunk_size, cur_size_to_export);
-			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
-			if (rc)
-				goto error_unmap;
+				left_size_in_dma_seg = dma_max_seg_size;
+			}

-			cur_size_to_export -= chunk_size;
-			cur_page = j;
+			if (next_sg_entry) {
+				next_sg_entry = false;
+				break;
+			}
 		}
+
+		rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
+		if (rc)
+			goto err_unmap;
 	}

-	/* Because we are not going to include a CPU list we want to have some
-	 * chance that other users will detect this by setting the orig_nents
-	 * to 0 and using only nents (length of DMA list) when going over the
-	 * sgl
+	/* There should be nothing left to export exactly after looping over all SG elements */
+	if (left_size_to_export) {
+		dev_err(hdev->dev,
+			"left size to export %#llx after initializing %u SG elements\n",
+			left_size_to_export, sgt->nents);
+		rc = -ENOMEM;
+		goto err_unmap;
+	}
+
+	/*
+	 * Because we are not going to include a CPU list, we want to have some chance that other
+	 * users will detect this when going over SG table, by setting the orig_nents to 0 and using
+	 * only nents (length of DMA list).
 	 */
 	sgt->orig_nents = 0;

+	dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n",
+		nents, dev_name(dev));
+	for_each_sgtable_dma_sg(sgt, sg, i)
+		dev_dbg(hdev->dev,
+			"SG entry %d: address %#llx, length %#x\n",
+			i, sg_dma_address(sg), sg_dma_len(sg));
+
 	return sgt;

-error_unmap:
+err_unmap:
 	for_each_sgtable_dma_sg(sgt, sg, i) {
 		if (!sg_dma_len(sg))
 			continue;

-		dma_unmap_resource(dev, sg_dma_address(sg),
-					sg_dma_len(sg), dir,
+		dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir,
 					DMA_ATTR_SKIP_CPU_SYNC);
 	}

 	sg_free_table(sgt);

-error_free:
+err_free_sgt:
 	kfree(sgt);
 	return ERR_PTR(rc);
 }
@ -1700,6 +1723,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf,
 static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 					enum dma_data_direction dir)
 {
+	u64 *pages, npages, page_size, exported_size, offset;
 	struct dma_buf *dma_buf = attachment->dmabuf;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct hl_dmabuf_priv *hl_dmabuf;
@ -1708,30 +1732,28 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,

 	hl_dmabuf = dma_buf->priv;
 	hdev = hl_dmabuf->ctx->hdev;
-	phys_pg_pack = hl_dmabuf->phys_pg_pack;

 	if (!attachment->peer2peer) {
 		dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
 		return ERR_PTR(-EPERM);
 	}

-	if (phys_pg_pack)
-		sgt = alloc_sgt_from_device_pages(hdev,
-						phys_pg_pack->pages,
-						phys_pg_pack->npages,
-						phys_pg_pack->page_size,
-						phys_pg_pack->exported_size,
-						attachment->dev,
-						dir);
-	else
-		sgt = alloc_sgt_from_device_pages(hdev,
-						&hl_dmabuf->device_address,
-						1,
-						hl_dmabuf->dmabuf->size,
-						0,
-						attachment->dev,
-						dir);
+	exported_size = hl_dmabuf->dmabuf->size;
+	offset = hl_dmabuf->offset;
+	phys_pg_pack = hl_dmabuf->phys_pg_pack;

+	if (phys_pg_pack) {
+		pages = phys_pg_pack->pages;
+		npages = phys_pg_pack->npages;
+		page_size = phys_pg_pack->page_size;
+	} else {
+		pages = &hl_dmabuf->device_phys_addr;
+		npages = 1;
+		page_size = hl_dmabuf->dmabuf->size;
+	}
+
+	sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset,
+						attachment->dev, dir);
 	if (IS_ERR(sgt))
 		dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));

@ -1818,7 +1840,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
 	hl_ctx_put(ctx);

 	/* Paired with get_file() in export_dmabuf() */
-	fput(ctx->hpriv->filp);
+	fput(ctx->hpriv->file_priv->filp);

 	kfree(hl_dmabuf);
 }
@ -1864,7 +1886,7 @@ static int export_dmabuf(struct hl_ctx *ctx,
 	 * released first and only then the compute device.
 	 * Paired with fput() in hl_release_dmabuf().
 	 */
-	get_file(ctx->hpriv->filp);
+	get_file(ctx->hpriv->file_priv->filp);

 	*dmabuf_fd = fd;

@ -1876,22 +1898,29 @@ err_dma_buf_put:
 	return rc;
 }

-static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size)
+static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset)
 {
-	if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
+	if (!PAGE_ALIGNED(addr)) {
 		dev_dbg(hdev->dev,
-			"exported device memory address 0x%llx should be aligned to 0x%lx\n",
-			device_addr, PAGE_SIZE);
+			"exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n",
+			addr, PAGE_SIZE);
 		return -EINVAL;
 	}

-	if (size < PAGE_SIZE) {
+	if (!size || !PAGE_ALIGNED(size)) {
 		dev_dbg(hdev->dev,
-			"exported device memory size %llu should be equal to or greater than %lu\n",
+			"exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n",
 			size, PAGE_SIZE);
 		return -EINVAL;
 	}

+	if (!PAGE_ALIGNED(offset)) {
+		dev_dbg(hdev->dev,
+			"exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n",
+			offset, PAGE_SIZE);
+		return -EINVAL;
+	}
+
 	return 0;
 }

@ -1901,13 +1930,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr
 	u64 bar_address;
 	int rc;

-	rc = validate_export_params_common(hdev, device_addr, size);
+	rc = validate_export_params_common(hdev, device_addr, size, 0);
 	if (rc)
 		return rc;

 	if (device_addr < prop->dram_user_base_address ||
-				(device_addr + size) > prop->dram_end_address ||
-				(device_addr + size) < device_addr) {
+			(device_addr + size) > prop->dram_end_address ||
+			(device_addr + size) < device_addr) {
 		dev_dbg(hdev->dev,
 			"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
 			device_addr, size);
@ -1934,29 +1963,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s
 	u64 bar_address;
 	int i, rc;

-	rc = validate_export_params_common(hdev, device_addr, size);
+	rc = validate_export_params_common(hdev, device_addr, size, offset);
 	if (rc)
 		return rc;

 	if ((offset + size) > phys_pg_pack->total_size) {
 		dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n",
-				offset, size, phys_pg_pack->total_size);
+			offset, size, phys_pg_pack->total_size);
 		return -EINVAL;
 	}

 	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
-
 		bar_address = hdev->dram_pci_bar_start +
-					(phys_pg_pack->pages[i] - prop->dram_base_address);
+				(phys_pg_pack->pages[i] - prop->dram_base_address);

 		if ((bar_address + phys_pg_pack->page_size) >
 				(hdev->dram_pci_bar_start + prop->dram_pci_bar_size) ||
 				(bar_address + phys_pg_pack->page_size) < bar_address) {
 			dev_dbg(hdev->dev,
 				"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
-					phys_pg_pack->pages[i],
-					phys_pg_pack->page_size);
-
+				phys_pg_pack->pages[i], phys_pg_pack->page_size);
 			return -EINVAL;
 		}
 	}
@ -2012,7 +2038,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 	struct asic_fixed_properties *prop;
 	struct hl_dmabuf_priv *hl_dmabuf;
 	struct hl_device *hdev;
-	u64 export_addr;
 	int rc;

 	hdev = ctx->hdev;
@ -2024,8 +2049,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 		return -EINVAL;
 	}

-	export_addr = addr + offset;
-
 	hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
 	if (!hl_dmabuf)
 		return -ENOMEM;
@ -2041,20 +2064,20 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 			rc = PTR_ERR(phys_pg_pack);
 			goto dec_memhash_export_cnt;
 		}
-		rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack);
+		rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack);
 		if (rc)
 			goto dec_memhash_export_cnt;

-		phys_pg_pack->exported_size = size;
 		hl_dmabuf->phys_pg_pack = phys_pg_pack;
 		hl_dmabuf->memhash_hnode = hnode;
+		hl_dmabuf->offset = offset;
 	} else {
-		rc = validate_export_params_no_mmu(hdev, export_addr, size);
+		rc = validate_export_params_no_mmu(hdev, addr, size);
 		if (rc)
 			goto err_free_dmabuf_wrapper;
-	}

-	hl_dmabuf->device_address = export_addr;
+		hl_dmabuf->device_phys_addr = addr;
+	}

 	rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd);
 	if (rc)
@ -2171,8 +2194,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in
 	return 0;
 }

-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	enum hl_device_status status;
 	union hl_mem_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
@ -2420,7 +2444,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 	hl_debugfs_remove_userptr(hdev, userptr);

 	if (userptr->dma_mapped)
-		hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
+		hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);

 	unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
 	kvfree(userptr->pages);
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@ -63,6 +63,10 @@
 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"

+MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
+MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
+MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
+
 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */

 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
@ -660,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;

-	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 					CARD_NAME_MAX_LEN);

 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
@ -4619,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
 static int gaudi_scrub_device_mem(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
-			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
+	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
 	u64 addr, size, val = hdev->memory_scrub_val;
 	ktime_t timeout;
 	int rc = 0;
@ -4904,7 +4907,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,

 	list_add_tail(&userptr->job_node, parser->job_userptr_list);

-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto unpin_memory;
@ -8000,7 +8003,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 		return rc;

 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 				CARD_NAME_MAX_LEN);

 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
@ -9140,9 +9143,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.asic_dma_pool_free = gaudi_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = gaudi_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
 	.update_eq_ci = gaudi_update_eq_ci,
 	.context_switch = gaudi_context_switch,
--- a/drivers/accel/habanalabs/gaudi/gaudiP.h
+++ b/drivers/accel/habanalabs/gaudi/gaudiP.h
@ -10,7 +10,7 @@

 #include <uapi/drm/habanalabs_accel.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../include/gaudi/gaudi_packets.h"
 #include "../include/gaudi/gaudi.h"
 #include "../include/gaudi/gaudi_async_events.h"
--- a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c
@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev,

 	WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);

+	val = RREG32(base_reg + 0x20);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + 0x304);
 	val |= 0x1000;
 	WREG32(base_reg + 0x304, val);
@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev,

 	WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);

+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
+
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@ -66,7 +66,6 @@
 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		31
 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
-#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
 	"sbte_prtn_intr_4",
 };

-static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
-	"i0",
-	"i1",
-	"i2",
-	"i3",
-	"i4",
-};
-
 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
 	"WBC ERR RESP_0",
 	"WBC ERR RESP_1",
@ -993,6 +984,111 @@ gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
 	"TLP is blocked by RR"
 };

+static const int gaudi2_queue_id_to_engine_id[] = {
+	[GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
+	[GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
+							GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
+							GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
+							GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
+							GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_6,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
+	[GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
+	[GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
+	[GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
+	[GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
+	[GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
+	[GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
+	[GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
+	[GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
+	[GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
+	[GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
+	[GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
+	[GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
+	[GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
+	[GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
+	[GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
+	[GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
+	[GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
+	[GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
+	[GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
+	[GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
+	[GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
+	[GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
+	[GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
+	[GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
+	[GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
+};
+
 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
@ -2001,7 +2097,8 @@ enum razwi_event_sources {
 	RAZWI_PDMA,
 	RAZWI_NIC,
 	RAZWI_DEC,
-	RAZWI_ROT
+	RAZWI_ROT,
+	RAZWI_ARC_FARM
 };

 struct hbm_mc_error_causes {
@ -2431,7 +2528,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;

-	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
+	strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);

 	prop->mme_master_slave_mode = 1;

@ -2884,7 +2981,8 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
 	}

 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
+		strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
+				CARD_NAME_MAX_LEN);

 	/* Overwrite binning masks with the actual binning values from F/W */
 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
@ -4077,6 +4175,8 @@ static const char *gaudi2_irq_name(u16 irq_number)
 		return "gaudi2 unexpected error";
 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
 		return "gaudi2 user completion";
+	case GAUDI2_IRQ_NUM_EQ_ERROR:
+		return "gaudi2 eq error";
 	default:
 		return "invalid";
 	}
@ -4127,9 +4227,7 @@ static int gaudi2_dec_enable_msix(struct hl_device *hdev)
 			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
 						gaudi2_irq_name(i), (void *) dec);
 		} else {
-			rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-					hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-					gaudi2_irq_name(i),
+			rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
 					(void *) &hdev->user_interrupt[dec->core_id]);
 		}

@ -4187,17 +4285,17 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 	}

 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
-	rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-			hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-			gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
+	rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+					gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
+					&hdev->tpc_interrupt);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
 		goto free_dec_irq;
 	}

 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
-	rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
-			gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
+	rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+					gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
 					&hdev->unexpected_error_interrupt);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
@ -4209,16 +4307,23 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 			i++, j++, user_irq_init_cnt++) {

 		irq = pci_irq_vector(hdev->pdev, i);
-		rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-						hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-						gaudi2_irq_name(i), &hdev->user_interrupt[j]);
-
+		rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
+				&hdev->user_interrupt[j]);
 		if (rc) {
 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
 			goto free_user_irq;
 		}
 	}

+	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
+	rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
+					IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
+					hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
+		goto free_user_irq;
+	}
+
 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;

 	return 0;
@ -4278,6 +4383,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
 	}

 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
+	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
 }

 static void gaudi2_disable_msix(struct hl_device *hdev)
@ -4314,6 +4420,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
 	free_irq(irq, cq);

+	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
+	free_irq(irq, hdev);
+
 	pci_free_irq_vectors(hdev->pdev);

 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
@ -4716,6 +4825,8 @@ static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
+	pre_fw_load->wait_for_preboot_extended_timeout =
+		GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
 }

 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
@ -6157,17 +6268,14 @@ static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_
 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
 						u32 poll_timeout_us)
 {
-	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
-	int rc = 0;
+	int rc;

 	if (!driver_performs_reset) {
 		if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
 			/* set SP to indicate reset request sent to FW */
-			if (dyn_regs->cpu_rst_status)
-				WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
-			else
-				WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
-			WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
+			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
+
+			WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
 				gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);

 			/* wait for f/w response */
@ -6623,24 +6731,6 @@ static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t s
 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
 }

-static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
-					enum dma_data_direction dir)
-{
-	dma_addr_t dma_addr;
-
-	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
-	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
-		return 0;
-
-	return dma_addr;
-}
-
-static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
-					enum dma_data_direction dir)
-{
-	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
-}
-
 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
 {
 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
@ -7703,11 +7793,13 @@ static inline bool is_info_event(u32 event)
 	switch (event) {
 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
+	case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:

 	/* return in case of NIC status event - these events are received periodically and not as
 	 * an indication to an error.
 	 */
 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
+	case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
 		return true;
 	default:
 		return false;
@ -7739,21 +7831,34 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 		struct hl_eq_ecc_data *ecc_data)
 {
-	u64 ecc_address = 0, ecc_syndrom = 0;
+	u64 ecc_address = 0, ecc_syndrome = 0;
 	u8 memory_wrapper_idx = 0;
+	bool has_block_id = false;
+	u16 block_id;
+
+	if (!hl_is_fw_sw_ver_below(hdev, 1, 12))
+		has_block_id = true;

 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
-	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
+	ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;

-	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
-		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
-		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
+	if (has_block_id) {
+		block_id = le16_to_cpu(ecc_data->block_id);
+		gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
+			"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
+			ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
+			ecc_data->is_critical);
+	} else {
+		gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
+			"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
+			ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
+	}

 	return !!ecc_data->is_critical;
 }

-static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
+static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
 {
 	u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
 	u64 cq_ptr, arc_cq_ptr, cp_current_inst;
@ -7775,10 +7880,22 @@ static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
 	dev_info(hdev->dev,
 		"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
 		cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
+
+	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
+		if (arc_cq_ptr) {
+			hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
+			hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
+		} else {
+			hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
+			hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
+		}
+
+		hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
+	}
 }

 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
-							u64 qman_base, u32 qid_base)
+						u64 qman_base, u32 qid_base, u64 *event_mask)
 {
 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
 	u64 glbl_sts_addr, arb_err_addr;
@ -7812,8 +7929,22 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
 				error_count++;
 			}

-		if (i == QMAN_STREAMS)
-			print_lower_qman_data_on_err(hdev, qman_base);
+		if (i == QMAN_STREAMS && error_count) {
+			/* check for undefined opcode */
+			if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
+					hdev->captured_err_info.undef_opcode.write_enable) {
+				memset(&hdev->captured_err_info.undef_opcode, 0,
+						sizeof(hdev->captured_err_info.undef_opcode));
+
+				hdev->captured_err_info.undef_opcode.write_enable = false;
+				hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
+				hdev->captured_err_info.undef_opcode.engine_id =
+							gaudi2_queue_id_to_engine_id[qid_base];
+				*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
+			}
+
+			handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
+		}
 	}

 	arb_err_val = RREG32(arb_err_addr);
@ -7927,6 +8058,9 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
 	case RAZWI_ROT:
 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;

+	case RAZWI_ARC_FARM:
+		return GAUDI2_ENGINE_ID_ARC_FARM;
+
 	default:
 		return GAUDI2_ENGINE_ID_SIZE;
 	}
@ -8036,6 +8170,11 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
 		sprintf(initiator_name, "ROT_%u", module_idx);
 		break;
+	case RAZWI_ARC_FARM:
+		lbw_rtr_id = DCORE1_RTR5;
+		hbw_rtr_id = DCORE1_RTR7;
+		sprintf(initiator_name, "ARC_FARM_%u", module_idx);
+		break;
 	default:
 		return;
 	}
@ -8149,11 +8288,11 @@ static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u
 		eng_id[num_of_eng] = razwi_info[i].eng_id;
 		base[num_of_eng] = razwi_info[i].rtr_ctrl;
 		if (!num_of_eng)
-			str_size += snprintf(eng_name + str_size,
+			str_size += scnprintf(eng_name + str_size,
 						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
 						razwi_info[i].eng_name);
 		else
-			str_size += snprintf(eng_name + str_size,
+			str_size += scnprintf(eng_name + str_size,
 						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
 						razwi_info[i].eng_name);
 		num_of_eng++;
@ -8475,7 +8614,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 		return 0;
 	}

-	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
+	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
+								qid_base, event_mask);

 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
@ -8488,7 +8628,7 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 	return error_count;
 }

-static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
+static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
 {
 	u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;

@ -8510,6 +8650,7 @@ static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type
 				sts_clr_val);
 	}

+	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
 	hl_check_for_glbl_errors(hdev);

 	return error_count;
@ -8649,21 +8790,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event
 	return error_count;
 }

-static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
-					u64 intr_cause_data)
+static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
 {
-	int i, error_count = 0;
-
-	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
-		if (intr_cause_data & BIT(i)) {
-			gaudi2_print_event(hdev, event_type, true,
-				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
-			error_count++;
-		}
-
+	/*
+	 * We have a single error cause here but the report mechanism is
+	 * buggy. Hence there is no good reason to fetch the cause so we
+	 * just check for glbl_errors and exit.
+	 */
 	hl_check_for_glbl_errors(hdev);

-	return error_count;
+	return GAUDI2_NA_EVENT_CAUSE;
 }

 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
@ -9460,6 +9596,176 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
 	}
 }

+static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
+{
+	enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
+	u16 index;
+
+	switch (event_type) {
+	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
+		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
+		index = event_type - GAUDI2_EVENT_TPC0_QM;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME0_QM:
+		index = 0;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME1_QM:
+		index = 1;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME2_QM:
+		index = 2;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME3_QM:
+		index = 3;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
+	case GAUDI2_EVENT_KDMA_BM_SPMU:
+	case GAUDI2_EVENT_KDMA0_CORE:
+		return GAUDI2_ENGINE_ID_KDMA;
+	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
+	case GAUDI2_EVENT_PDMA0_CORE:
+	case GAUDI2_EVENT_PDMA0_BM_SPMU:
+	case GAUDI2_EVENT_PDMA0_QM:
+		return GAUDI2_ENGINE_ID_PDMA_0;
+	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
+	case GAUDI2_EVENT_PDMA1_CORE:
+	case GAUDI2_EVENT_PDMA1_BM_SPMU:
+	case GAUDI2_EVENT_PDMA1_QM:
+		return GAUDI2_ENGINE_ID_PDMA_1;
+	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
+		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
+		type = GAUDI2_BLOCK_TYPE_DEC;
+		break;
+	case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
+		index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
+		type = GAUDI2_BLOCK_TYPE_DEC;
+		break;
+	case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
+		index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
+		return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
+	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
+		index = event_type - GAUDI2_EVENT_NIC0_QM0;
+		return GAUDI2_ENGINE_ID_NIC0_0 + index;
+	case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
+		index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
+		return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
+	case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
+		index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
+	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
+		return GAUDI2_ENGINE_ID_ROT_0;
+	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
+	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
+		return GAUDI2_ENGINE_ID_ROT_1;
+	case GAUDI2_EVENT_HDMA0_BM_SPMU:
+	case GAUDI2_EVENT_HDMA0_QM:
+	case GAUDI2_EVENT_HDMA0_CORE:
+		return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA1_BM_SPMU:
+	case GAUDI2_EVENT_HDMA1_QM:
+	case GAUDI2_EVENT_HDMA1_CORE:
+		return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA2_BM_SPMU:
+	case GAUDI2_EVENT_HDMA2_QM:
+	case GAUDI2_EVENT_HDMA2_CORE:
+		return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA3_BM_SPMU:
+	case GAUDI2_EVENT_HDMA3_QM:
+	case GAUDI2_EVENT_HDMA3_CORE:
+		return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA4_BM_SPMU:
+	case GAUDI2_EVENT_HDMA4_QM:
+	case GAUDI2_EVENT_HDMA4_CORE:
+		return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA5_BM_SPMU:
+	case GAUDI2_EVENT_HDMA5_QM:
+	case GAUDI2_EVENT_HDMA5_CORE:
+		return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA6_BM_SPMU:
+	case GAUDI2_EVENT_HDMA6_QM:
+	case GAUDI2_EVENT_HDMA6_CORE:
+		return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA7_BM_SPMU:
+	case GAUDI2_EVENT_HDMA7_QM:
+	case GAUDI2_EVENT_HDMA7_CORE:
+		return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
+	default:
+		break;
+	}
+
+	switch (type) {
+	case GAUDI2_BLOCK_TYPE_TPC:
+		switch (index) {
+		case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
+			return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
+		case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
+			return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
+		case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
+			return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
+		case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
+			return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
+		default:
+			break;
+		}
+		break;
+	case GAUDI2_BLOCK_TYPE_MME:
+		switch (index) {
+		case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
+		case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
+		case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
+		case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
+		default:
+			break;
+		}
+		break;
+	case GAUDI2_BLOCK_TYPE_DEC:
+		switch (index) {
+		case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
+		case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
+		case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return U16_MAX;
+}
+
+static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
+{
+	hdev->eq_heartbeat_received = true;
+}
+
 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 {
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
@ -9501,7 +9807,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		break;

 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
-		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
+		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;

@ -9724,8 +10030,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
-		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
-						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
@ -9875,6 +10180,21 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		is_critical = true;
 		break;

+	case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
+	case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
+		error_count = GAUDI2_NA_EVENT_CAUSE;
+		dev_info_ratelimited(hdev->dev, "%s event received\n",
+					gaudi2_irq_map_table[event_type].name);
+		break;
+
+	case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
+		hl_eq_heartbeat_event_handle(hdev);
+		error_count = GAUDI2_NA_EVENT_CAUSE;
+		break;
 	default:
 		if (gaudi2_irq_map_table[event_type].valid) {
 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
@ -9883,6 +10203,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		}
 	}

+	if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
+		hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
+
 	/* Make sure to dump an error in case no error cause was printed so far.
 	 * Note that although we have counted the errors, we use this number as
 	 * a boolean.
@ -10523,6 +10846,9 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx)
 {
 	int rc;

+	if (ctx->asid == HL_KERNEL_ASID_ID)
+		return 0;
+
 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
 	if (rc)
 		return rc;
@ -11014,6 +11340,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64
 static void gaudi2_get_msi_info(__le32 *table)
 {
 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
+	table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
 }

 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
@ -11170,11 +11497,9 @@ static const struct hl_asic_funcs gaudi2_funcs = {
 	.asic_dma_pool_free = gaudi2_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
-	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
-	.asic_dma_map_single = gaudi2_dma_map_single,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = gaudi2_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = NULL,
 	.update_eq_ci = gaudi2_update_eq_ci,
 	.context_switch = gaudi2_context_switch,
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@ -10,7 +10,7 @@

 #include <uapi/drm/habanalabs_accel.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../include/gaudi2/gaudi2.h"
 #include "../include/gaudi2/gaudi2_packets.h"
 #include "../include/gaudi2/gaudi2_fw_if.h"
@ -84,6 +84,7 @@
 #define CORESIGHT_TIMEOUT_USEC			100000		/* 100 ms */

 #define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC		25000000	/* 25s */
+#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000	/* 85s */

 #define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC	10000000	/* 10s */

@ -419,6 +420,7 @@ enum gaudi2_irq_num {
 	GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
 	GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
 	GAUDI2_IRQ_NUM_TPC_ASSERT,
+	GAUDI2_IRQ_NUM_EQ_ERROR,
 	GAUDI2_IRQ_NUM_RESERVED_FIRST,
 	GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
 	GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = {
 	[GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE,
 	[GAUDI2_STM_PCIE] = mmPCIE_STM_BASE,
 	[GAUDI2_STM_PSOC] = mmPSOC_STM_BASE,
-	[GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE,
-	[GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE,
+	[GAUDI2_STM_PSOC_ARC0_CS] = 0,
+	[GAUDI2_STM_PSOC_ARC1_CS] = 0,
 	[GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE,
 	[GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE,
 	[GAUDI2_STM_CPU] = mmCPU_STM_BASE,
@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = {
 	[GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE,
 	[GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE,
 	[GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE,
-	[GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE,
-	[GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE,
+	[GAUDI2_ETF_PSOC_ARC0_CS] = 0,
+	[GAUDI2_ETF_PSOC_ARC1_CS] = 0,
 	[GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE,
 	[GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE,
 	[GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE,
@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = {
 	[GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE,
-	[GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE,
-	[GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE,
+	[GAUDI2_FUNNEL_PSOC_ARC0] = 0,
+	[GAUDI2_FUNNEL_PSOC_ARC1] = 0,
 	[GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE,
@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = {
 	[GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE,
 	[GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE,
 	[GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE,
-	[GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE,
-	[GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE,
-	[GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE,
-	[GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE,
+	[GAUDI2_BMON_PSOC_ARC0_0] = 0,
+	[GAUDI2_BMON_PSOC_ARC0_1] = 0,
+	[GAUDI2_BMON_PSOC_ARC1_0] = 0,
+	[GAUDI2_BMON_PSOC_ARC1_1] = 0,
 	[GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE,
 	[GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE,
 	[GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE,
@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = {
 	[GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE,
 	[GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE,
-	[GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE,
-	[GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE,
+	[GAUDI2_SPMU_PSOC_ARC0_CS] = 0,
+	[GAUDI2_SPMU_PSOC_ARC1_CS] = 0,
 	[GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE,
@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
 	if (rc)
 		return -EIO;

+	val = RREG32(base_reg + mmETF_CTL_OFFSET);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + mmETF_FFCR_OFFSET);
 	val |= 0x1000;
 	WREG32(base_reg + mmETF_FFCR_OFFSET, val);
@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
 		if (!input)
 			return -EINVAL;

+		val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2;
+		if (val) {
+			val = ffs(val);
+			WREG32(base_reg + mmETF_PSCR_OFFSET, val);
+		} else {
+			WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
+		}
+
 		WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC);
 		WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode);
 		WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001);
-		WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
 		WREG32(base_reg + mmETF_CTL_OFFSET, 1);
 	} else {
 		WREG32(base_reg + mmETF_BUFWM_OFFSET, 0);
@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx,
 	if (rc)
 		return -EIO;

+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);
@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa
 		 * set enabled events mask based on input->event_types_num
 		 */
 		event_mask = 0x80000000;
-		event_mask |= GENMASK(input->event_types_num, 0);
+		if (input->event_types_num)
+			event_mask |= GENMASK(input->event_types_num - 1, 0);

 		WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask);
 	} else {
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
@ -1601,6 +1601,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = {
 	mmDCORE0_TPC0_CFG_KERNEL_SRF_30,
 	mmDCORE0_TPC0_CFG_KERNEL_SRF_31,
 	mmDCORE0_TPC0_CFG_TPC_SB_L0CD,
+	mmDCORE0_TPC0_CFG_TPC_COUNT,
 	mmDCORE0_TPC0_CFG_TPC_ID,
 	mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC,
 	mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0,
@ -2907,7 +2908,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
 	 * - range 11: NIC11_CFG + *_DBG (not including TPC_DBG)
 	 *
 	 * If F/W security is not enabled:
-	 * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP)
+	 * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP, PSOC_EFUSE and PSOC_GLOBAL_CONF)
 	 */
 	u64 lbw_range_min_short[] = {
 		mmNIC0_TX_AXUSER_BASE,
@ -2923,7 +2924,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
 		mmNIC10_TX_AXUSER_BASE,
 		mmNIC11_TX_AXUSER_BASE,
 		mmPSOC_I2C_M0_BASE,
-		mmPSOC_EFUSE_BASE
+		mmPSOC_GPIO0_BASE
 	};
 	u64 lbw_range_max_short[] = {
 		mmNIC0_MAC_CH3_MAC_PCS_BASE + HL_BLOCK_SIZE,
@ -3219,6 +3220,7 @@ static void gaudi2_init_range_registers(struct hl_device *hdev)
 */
 static int gaudi2_init_protection_bits(struct hl_device *hdev)
 {
+	u32 *user_regs_array = NULL, user_regs_array_size = 0, engine_core_intr_reg;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u32 instance_offset;
 	int rc = 0;
@ -3389,11 +3391,24 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
 	/* PSOC.
 	 * Except for PSOC_GLOBAL_CONF, skip when security is enabled in F/W, because the blocks are
 	 * protected by privileged RR.
+	 * For PSOC_GLOBAL_CONF, need to un-secure the scratchpad register which is used for engine
+	 * cores to raise events towards F/W.
 	 */
+	engine_core_intr_reg = (u32) (hdev->asic_prop.engine_core_interrupt_reg_addr - CFG_BASE);
+	if (engine_core_intr_reg >= mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 &&
+			engine_core_intr_reg <= mmPSOC_GLOBAL_CONF_SCRATCHPAD_31) {
+		user_regs_array = &engine_core_intr_reg;
+		user_regs_array_size = 1;
+	} else {
+		dev_err(hdev->dev,
+			"Engine cores register for interrupts (%#x) is not a PSOC scratchpad register\n",
+			engine_core_intr_reg);
+	}
+
 	rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
 			HL_PB_SINGLE_INSTANCE, HL_PB_NA,
 			gaudi2_pb_psoc_global_conf, ARRAY_SIZE(gaudi2_pb_psoc_global_conf),
-			NULL, HL_PB_NA);
+			user_regs_array, user_regs_array_size);

 	if (!hdev->asic_prop.fw_security_enabled)
 		rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;

-	strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+	strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 		CARD_NAME_MAX_LEN);

 	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,

 	list_add_tail(&userptr->job_node, parser->job_userptr_list);

-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto unpin_memory;
@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	}

 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+		strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 				CARD_NAME_MAX_LEN);

 	return 0;
@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = {
 	.asic_dma_pool_free = goya_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = goya_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = goya_add_end_of_cb_packets,
 	.update_eq_ci = goya_update_eq_ci,
 	.context_switch = goya_context_switch,
--- a/drivers/accel/habanalabs/goya/goyaP.h
+++ b/drivers/accel/habanalabs/goya/goyaP.h
@ -9,8 +9,8 @@
 #define GOYAP_H_

 #include <uapi/drm/habanalabs_accel.h>
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
 #include "../include/goya/goya_packets.h"
 #include "../include/goya/goya.h"
 #include "../include/goya/goya_async_events.h"
--- a/drivers/accel/habanalabs/goya/goya_coresight.c
+++ b/drivers/accel/habanalabs/goya/goya_coresight.c
@ -315,6 +315,11 @@ static int goya_config_etf(struct hl_device *hdev,

 	WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);

+	val = RREG32(base_reg + 0x20);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + 0x304);
 	val |= 0x1000;
 	WREG32(base_reg + 0x304, val);
@ -386,6 +391,11 @@ static int goya_config_etr(struct hl_device *hdev,

 	WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);

+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);
--- a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h
+++ b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h
@ -44,38 +44,6 @@ struct eq_nic_sei_event {
 	__u8 pad[6];
 };

-/*
- * struct gaudi_nic_status - describes the status of a NIC port.
- * @port: NIC port index.
- * @bad_format_cnt: e.g. CRC.
- * @responder_out_of_sequence_psn_cnt: e.g NAK.
- * @high_ber_reinit_cnt: link reinit due to high BER.
- * @correctable_err_cnt: e.g. bit-flip.
- * @uncorrectable_err_cnt: e.g. MAC errors.
- * @retraining_cnt: re-training counter.
- * @up: is port up.
- * @pcs_link: has PCS link.
- * @phy_ready: is PHY ready.
- * @auto_neg: is Autoneg enabled.
- * @timeout_retransmission_cnt: timeout retransmission events
- * @high_ber_cnt: high ber events
- */
-struct gaudi_nic_status {
-	__u32 port;
-	__u32 bad_format_cnt;
-	__u32 responder_out_of_sequence_psn_cnt;
-	__u32 high_ber_reinit;
-	__u32 correctable_err_cnt;
-	__u32 uncorrectable_err_cnt;
-	__u32 retraining_cnt;
-	__u8 up;
-	__u8 pcs_link;
-	__u8 phy_ready;
-	__u8 auto_neg;
-	__u32 timeout_retransmission_cnt;
-	__u32 high_ber_cnt;
-};
-
 struct gaudi_cold_rst_data {
 	union {
 		struct {
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
@ -959,6 +959,13 @@ enum gaudi2_async_event_id {
 	GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
 	GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
 	GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321,
+	GAUDI2_EVENT_ARC_PWR_BRK_ENTRY = 1322,
+	GAUDI2_EVENT_ARC_PWR_BRK_EXT = 1323,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE0 = 1324,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE1 = 1325,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE2 = 1326,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE3 = 1327,
+	GAUDI2_EVENT_ARC_EQ_HEARTBEAT = 1328,
 	GAUDI2_EVENT_SIZE,
 };

--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
@ -1293,7 +1293,7 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
 		 .name = "" },
 	{ .fc_id = 631, .cpu_id = 128, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
 		 .name = "PCIE_P2P_MSIX" },
-	{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+	{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
 		 .name = "PCIE_DRAIN_COMPLETE" },
 	{ .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
 		 .name = "TPC0_BMON_SPMU" },
@ -2673,6 +2673,20 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
 		 .name = "FP32_NOT_SUPPORTED" },
 	{ .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD,
 		 .name = "DEV_RESET_REQ" },
+	{ .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_BRK_ENTRY" },
+	{ .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_BRK_EXT" },
+	{ .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_RD_MODE0" },
+	{ .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_RD_MODE1" },
+	{ .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_RD_MODE2" },
+	{ .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_RD_MODE3" },
+	{ .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_EQ_HEARTBEAT" },
 };

 #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */
--- a/drivers/accel/ivpu/Makefile
+++ b/drivers/accel/ivpu/Makefile
@ -2,7 +2,6 @@
 # Copyright (C) 2023 Intel Corporation

 intel_vpu-y := \
-	ivpu_debugfs.o \
 	ivpu_drv.o \
 	ivpu_fw.o \
 	ivpu_fw_log.o \
@ -16,4 +15,6 @@ intel_vpu-y := \
 	ivpu_mmu_context.o \
 	ivpu_pm.o

+intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o
+
 obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
--- a/drivers/accel/ivpu/TODO
+++ b/drivers/accel/ivpu/TODO
@ -1,11 +0,0 @@
- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler()
- Implement support for BLOB IDs
- Add debugfs support to improve debugging and testing
- Add tracing events for performance debugging
- Implement HW based scheduling support
- Use syncobjs for submit/sync
- Refactor IPC protocol to improve message latency
- Implement BO cache and MADVISE IOCTL
- Add support for user allocated buffers using prime import and dma-buf heaps
- Refactor struct ivpu_bo to use struct drm_gem_shmem_object
- Add driver/device documentation
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@ -17,20 +17,26 @@
 #include "ivpu_jsm_msg.h"
 #include "ivpu_pm.h"

+static inline struct ivpu_device *seq_to_ivpu(struct seq_file *s)
+{
+	struct drm_debugfs_entry *entry = s->private;
+
+	return to_ivpu_device(entry->dev);
+}
+
 static int bo_list_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
 	struct drm_printer p = drm_seq_file_printer(s);
+	struct ivpu_device *vdev = seq_to_ivpu(s);

-	ivpu_bo_list(node->minor->dev, &p);
+	ivpu_bo_list(&vdev->drm, &p);

 	return 0;
 }

 static int fw_name_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);

 	seq_printf(s, "%s\n", vdev->fw->name);
 	return 0;
@ -38,8 +44,7 @@ static int fw_name_show(struct seq_file *s, void *v)

 static int fw_trace_capability_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 	u64 trace_hw_component_mask;
 	u32 trace_destination_mask;
 	int ret;
@ -57,8 +62,7 @@ static int fw_trace_capability_show(struct seq_file *s, void *v)

 static int fw_trace_config_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 	/**
 	 * WA: VPU_JSM_MSG_TRACE_GET_CONFIG command is not working yet,
 	 * so we use values from vdev->fw instead of calling ivpu_jsm_trace_get_config()
@ -78,8 +82,7 @@ static int fw_trace_config_show(struct seq_file *s, void *v)

 static int last_bootmode_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);

 	seq_printf(s, "%s\n", (vdev->pm->is_warmboot) ? "warmboot" : "coldboot");

@ -88,8 +91,7 @@ static int last_bootmode_show(struct seq_file *s, void *v)

 static int reset_counter_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);

 	seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_counter));
 	return 0;
@ -97,14 +99,13 @@ static int reset_counter_show(struct seq_file *s, void *v)

 static int reset_pending_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);

 	seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset));
 	return 0;
 }

-static const struct drm_info_list vdev_debugfs_list[] = {
+static const struct drm_debugfs_info vdev_debugfs_list[] = {
 	{"bo_list", bo_list_show, 0},
 	{"fw_name", fw_name_show, 0},
 	{"fw_trace_capability", fw_trace_capability_show, 0},
@ -270,25 +271,24 @@ static const struct file_operations ivpu_reset_engine_fops = {
 	.write = ivpu_reset_engine_fn,
 };

-void ivpu_debugfs_init(struct drm_minor *minor)
+void ivpu_debugfs_init(struct ivpu_device *vdev)
 {
-	struct ivpu_device *vdev = to_ivpu_device(minor->dev);
+	struct dentry *debugfs_root = vdev->drm.debugfs_root;

-	drm_debugfs_create_files(vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list),
-				 minor->debugfs_root, minor);
+	drm_debugfs_add_files(&vdev->drm, vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list));

-	debugfs_create_file("force_recovery", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
 			    &ivpu_force_recovery_fops);

-	debugfs_create_file("fw_log", 0644, minor->debugfs_root, vdev,
+	debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
 			    &fw_log_fops);
-	debugfs_create_file("fw_trace_destination_mask", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
 			    &fw_trace_destination_mask_fops);
-	debugfs_create_file("fw_trace_hw_comp_mask", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("fw_trace_hw_comp_mask", 0200, debugfs_root, vdev,
 			    &fw_trace_hw_comp_mask_fops);
-	debugfs_create_file("fw_trace_level", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev,
 			    &fw_trace_level_fops);

-	debugfs_create_file("reset_engine", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
 			    &ivpu_reset_engine_fops);
 }
--- a/drivers/accel/ivpu/ivpu_debugfs.h
+++ b/drivers/accel/ivpu/ivpu_debugfs.h
@ -6,8 +6,12 @@
 #ifndef __IVPU_DEBUGFS_H__
 #define __IVPU_DEBUGFS_H__

-struct drm_minor;
+struct ivpu_device;

-void ivpu_debugfs_init(struct drm_minor *minor);
+#if defined(CONFIG_DEBUG_FS)
+void ivpu_debugfs_init(struct ivpu_device *vdev);
+#else
+static inline void ivpu_debugfs_init(struct ivpu_device *vdev) { }
+#endif

 #endif /* __IVPU_DEBUGFS_H__ */
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@ -131,6 +131,22 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
 	return 0;
 }

+static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
+{
+	int ret;
+
+	ret = ivpu_rpm_get_if_active(vdev);
+	if (ret < 0)
+		return ret;
+
+	*clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
+
+	if (ret)
+		ivpu_rpm_put(vdev);
+
+	return 0;
+}
+
 static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
@ -154,7 +170,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
 		args->value = vdev->platform;
 		break;
 	case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
-		args->value = ivpu_hw_reg_pll_freq_get(vdev);
+		ret = ivpu_get_core_clock_rate(vdev, &args->value);
 		break;
 	case DRM_IVPU_PARAM_NUM_CONTEXTS:
 		args->value = ivpu_get_context_count(vdev);
@ -400,10 +416,6 @@ static const struct drm_driver driver = {
 	.postclose = ivpu_postclose,
 	.gem_prime_import = ivpu_gem_prime_import,

-#if defined(CONFIG_DEBUG_FS)
-	.debugfs_init = ivpu_debugfs_init,
-#endif
-
 	.ioctls = ivpu_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
 	.fops = &ivpu_fops,
@ -523,78 +535,52 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);

 	ret = ivpu_pci_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}

 	ret = ivpu_irq_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}

 	/* Init basic HW info based on buttress registers which are accessible before power up */
 	ret = ivpu_hw_info_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}

 	/* Power up early so the rest of init code can access VPU registers */
 	ret = ivpu_hw_power_up(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}

 	ret = ivpu_mmu_global_context_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret);
+	if (ret)
 		goto err_power_down;
-	}

 	ret = ivpu_mmu_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret);
+	if (ret)
+		goto err_mmu_gctx_fini;
+
+	ret = ivpu_mmu_reserved_context_init(vdev);
+	if (ret)
 		goto err_mmu_gctx_fini;
-	}

 	ret = ivpu_fw_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret);
-		goto err_mmu_gctx_fini;
-	}
+	if (ret)
+		goto err_mmu_rctx_fini;

 	ret = ivpu_ipc_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret);
+	if (ret)
 		goto err_fw_fini;
-	}

-	ret = ivpu_pm_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize PM: %d\n", ret);
-		goto err_ipc_fini;
-	}
+	ivpu_pm_init(vdev);

 	ret = ivpu_job_done_thread_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret);
+	if (ret)
 		goto err_ipc_fini;
-	}
-
-	ret = ivpu_fw_load(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to load firmware: %d\n", ret);
-		goto err_job_done_thread_fini;
-	}

 	ret = ivpu_boot(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to boot: %d\n", ret);
+	if (ret)
 		goto err_job_done_thread_fini;
-	}

 	ivpu_pm_enable(vdev);

@ -606,6 +592,8 @@ err_ipc_fini:
 	ivpu_ipc_fini(vdev);
 err_fw_fini:
 	ivpu_fw_fini(vdev);
+err_mmu_rctx_fini:
+	ivpu_mmu_reserved_context_fini(vdev);
 err_mmu_gctx_fini:
 	ivpu_mmu_global_context_fini(vdev);
 err_power_down:
@ -629,6 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)

 	ivpu_ipc_fini(vdev);
 	ivpu_fw_fini(vdev);
+	ivpu_mmu_reserved_context_fini(vdev);
 	ivpu_mmu_global_context_fini(vdev);

 	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
@ -657,10 +646,10 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, vdev);

 	ret = ivpu_dev_init(vdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret);
+	if (ret)
 		return ret;
-	}
+
+	ivpu_debugfs_init(vdev);

 	ret = drm_dev_register(&vdev->drm, 0);
 	if (ret) {
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@ -29,12 +29,13 @@
 #define IVPU_HW_37XX	37
 #define IVPU_HW_40XX	40

-#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
-/* SSID 1 is used by the VPU to represent invalid context */
-#define IVPU_USER_CONTEXT_MIN_SSID   2
-#define IVPU_USER_CONTEXT_MAX_SSID   (IVPU_USER_CONTEXT_MIN_SSID + 63)
+#define IVPU_GLOBAL_CONTEXT_MMU_SSID   0
+/* SSID 1 is used by the VPU to represent reserved context */
+#define IVPU_RESERVED_CONTEXT_MMU_SSID 1
+#define IVPU_USER_CONTEXT_MIN_SSID     2
+#define IVPU_USER_CONTEXT_MAX_SSID     (IVPU_USER_CONTEXT_MIN_SSID + 63)

-#define IVPU_NUM_ENGINES	     2
+#define IVPU_NUM_ENGINES 2

 #define IVPU_PLATFORM_SILICON 0
 #define IVPU_PLATFORM_SIMICS  2
@ -76,6 +77,11 @@

 #define IVPU_WA(wa_name) (vdev->wa.wa_name)

+#define IVPU_PRINT_WA(wa_name) do {					\
+	if (IVPU_WA(wa_name))						\
+		ivpu_dbg(vdev, MISC, "Using WA: " #wa_name "\n");	\
+} while (0)
+
 struct ivpu_wa_table {
 	bool punit_disabled;
 	bool clear_runtime_mem;
@ -105,6 +111,7 @@ struct ivpu_device {
 	struct ivpu_pm_info *pm;

 	struct ivpu_mmu_context gctx;
+	struct ivpu_mmu_context rctx;
 	struct xarray context_xa;
 	struct xa_limit context_xa_limit;

@ -118,6 +125,7 @@ struct ivpu_device {
 		int jsm;
 		int tdr;
 		int reschedule_suspend;
+		int autosuspend;
 	} timeout;
 };

--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@ -301,6 +301,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
 	if (ret)
 		goto err_fw_release;

+	ivpu_fw_load(vdev);
+
 	return 0;

 err_fw_release:
@ -314,25 +316,23 @@ void ivpu_fw_fini(struct ivpu_device *vdev)
 	ivpu_fw_release(vdev);
 }

-int ivpu_fw_load(struct ivpu_device *vdev)
+void ivpu_fw_load(struct ivpu_device *vdev)
 {
 	struct ivpu_fw_info *fw = vdev->fw;
 	u64 image_end_offset = fw->image_load_offset + fw->image_size;

-	memset(fw->mem->kvaddr, 0, fw->image_load_offset);
-	memcpy(fw->mem->kvaddr + fw->image_load_offset,
+	memset(ivpu_bo_vaddr(fw->mem), 0, fw->image_load_offset);
+	memcpy(ivpu_bo_vaddr(fw->mem) + fw->image_load_offset,
 	       fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size);

 	if (IVPU_WA(clear_runtime_mem)) {
-		u8 *start = fw->mem->kvaddr + image_end_offset;
-		u64 size = fw->mem->base.size - image_end_offset;
+		u8 *start = ivpu_bo_vaddr(fw->mem) + image_end_offset;
+		u64 size = ivpu_bo_size(fw->mem) - image_end_offset;

 		memset(start, 0, size);
 	}

 	wmb(); /* Flush WC buffers after writing fw->mem */
-
-	return 0;
 }

 static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@ -451,10 +451,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 					  vdev->hw->ranges.global.start;

 	boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr;
-	boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2;
+	boot_params->ipc_header_area_size = ivpu_bo_size(ipc_mem_rx) / 2;

-	boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2;
-	boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2;
+	boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2;
+	boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2;

 	boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
 	boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
@ -486,9 +486,9 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->trace_destination_mask = vdev->fw->trace_destination_mask;
 	boot_params->trace_hw_component_mask = vdev->fw->trace_hw_component_mask;
 	boot_params->crit_tracing_buff_addr = vdev->fw->mem_log_crit->vpu_addr;
-	boot_params->crit_tracing_buff_size = vdev->fw->mem_log_crit->base.size;
+	boot_params->crit_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_crit);
 	boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr;
-	boot_params->verbose_tracing_buff_size = vdev->fw->mem_log_verb->base.size;
+	boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb);

 	boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
 	boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@ -31,7 +31,7 @@ struct ivpu_fw_info {

 int ivpu_fw_init(struct ivpu_device *vdev);
 void ivpu_fw_fini(struct ivpu_device *vdev);
-int ivpu_fw_load(struct ivpu_device *vdev);
+void ivpu_fw_load(struct ivpu_device *vdev);
 void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);

 static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
--- a/drivers/accel/ivpu/ivpu_fw_log.c
+++ b/drivers/accel/ivpu/ivpu_fw_log.c
@ -31,10 +31,10 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
 {
 	struct vpu_tracing_buffer_header *log;

-	if ((*offset + sizeof(*log)) > bo->base.size)
+	if ((*offset + sizeof(*log)) > ivpu_bo_size(bo))
 		return -EINVAL;

-	log = bo->kvaddr + *offset;
+	log = ivpu_bo_vaddr(bo) + *offset;

 	if (log->vpu_canary_start != VPU_TRACING_BUFFER_CANARY)
 		return -EINVAL;
@ -43,7 +43,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
 		ivpu_dbg(vdev, FW_BOOT, "Invalid header size 0x%x\n", log->header_size);
 		return -EINVAL;
 	}
-	if ((char *)log + log->size > (char *)bo->kvaddr + bo->base.size) {
+	if ((char *)log + log->size > (char *)ivpu_bo_vaddr(bo) + ivpu_bo_size(bo)) {
 		ivpu_dbg(vdev, FW_BOOT, "Invalid log size 0x%x\n", log->size);
 		return -EINVAL;
 	}
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@ -69,7 +69,7 @@ static const struct ivpu_bo_ops prime_ops = {

 static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
 {
-	int npages = bo->base.size >> PAGE_SHIFT;
+	int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
 	struct page **pages;

 	pages = drm_gem_get_pages(&bo->base);
@ -88,7 +88,7 @@ static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
 static void shmem_free_pages_locked(struct ivpu_bo *bo)
 {
 	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+		set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);

 	drm_gem_put_pages(&bo->base, bo->pages, true, false);
 	bo->pages = NULL;
@ -96,7 +96,7 @@ static void shmem_free_pages_locked(struct ivpu_bo *bo)

 static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
 {
-	int npages = bo->base.size >> PAGE_SHIFT;
+	int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
 	struct sg_table *sgt;
 	int ret;
@ -142,7 +142,7 @@ static const struct ivpu_bo_ops shmem_ops = {

 static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
 {
-	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+	unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
 	struct page **pages;
 	int ret;

@ -171,10 +171,10 @@ err_free_pages:

 static void internal_free_pages_locked(struct ivpu_bo *bo)
 {
-	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+	unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;

 	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+		set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);

 	for (i = 0; i < npages; i++)
 		put_page(bo->pages[i]);
@ -291,7 +291,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 	}

 	mutex_lock(&ctx->lock);
-	ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node);
+	ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
 	if (!ret) {
 		bo->ctx = ctx;
 		bo->vpu_addr = bo->mm_node.start;
@ -438,7 +438,7 @@ static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);

 	ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
-		 bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name);
+		 bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);

 	if (obj->import_attach) {
 		/* Drop the reference drm_gem_mmap_obj() acquired.*/
@ -553,7 +553,7 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	drm_gem_object_put(&bo->base);

 	ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
-		 file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags);
+		 file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);

 	return ret;
 }
@ -590,22 +590,22 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 		goto err_put;

 	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
+		drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);

 	if (bo->flags & DRM_IVPU_BO_WC)
-		set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT);
+		set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
 	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
-		set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT);
+		set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);

 	prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
-	bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
+	bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
 	if (!bo->kvaddr) {
 		ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
 		goto err_put;
 	}

 	ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
-		 bo->vpu_addr, bo->base.size, flags);
+		 bo->vpu_addr, ivpu_bo_size(bo), flags);

 	return bo;

@ -718,7 +718,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
 		dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);

 	drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
-		   bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size,
+		   bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
 		   kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
 }

--- a/Show More
+++ b/Show More