Files
CoolPi-Armbian-Rockchip-RK3…/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
2025-02-04 12:41:12 -06:00

404 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
/*
* Implementation of the dummy job execution workaround for the GPU hang issue.
*/
#include <mali_kbase.h>
#include <device/mali_kbase_device.h>
#include <mali_kbase_dummy_job_wa.h>
#include <linux/firmware.h>
#include <linux/delay.h>
#define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa"
struct wa_header {
u16 signature;
u16 version;
u32 info_offset;
} __packed;
struct wa_v2_info {
u64 jc;
u32 js;
u32 blob_offset;
u64 flags;
} __packed;
struct wa_blob {
u64 base;
u32 size;
u32 map_flags;
u32 payload_offset;
u32 blob_offset;
} __packed;
static bool within_range(const u8 *base, const u8 *end, off_t off, size_t sz)
{
return !((size_t)(end - base - off) < sz);
}
static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits)
{
int loop;
const int timeout = 100;
u32 val;
for (loop = 0; loop < timeout; loop++) {
val = kbase_reg_read32(kbdev, offset);
if (val & bits)
break;
udelay(10);
}
if (loop == timeout) {
dev_err(kbdev->dev,
"Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n",
(unsigned long)offset, (unsigned long)bits, (unsigned long)val);
}
return (val & bits);
}
static inline int run_job(struct kbase_device *kbdev, int as, u32 slot, u64 cores, u64 jc)
{
u32 done;
/* setup job */
kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(slot, HEAD_NEXT), jc);
kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(slot, AFFINITY_NEXT), cores);
kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(slot, CONFIG_NEXT),
JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | (unsigned int)as);
/* go */
kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(slot, COMMAND_NEXT), JS_COMMAND_START);
/* wait for the slot to finish (done, error) */
done = wait_any(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT),
(1ul << (16 + slot)) | (1ul << slot));
kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), done);
if (done != (1ul << slot)) {
dev_err(kbdev->dev, "Failed to run WA job on slot %u cores 0x%llx: done 0x%lx\n",
slot, (unsigned long long)cores, (unsigned long)done);
dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n",
kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(slot, STATUS)));
return -EFAULT;
} else {
return 0;
}
}
/* To be called after power up & MMU init, but before everything else */
int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
{
int as;
u32 slot;
u64 jc;
int failed = 0;
int runs = 0;
u32 old_gpu_mask;
u32 old_job_mask;
u64 val;
const u32 timeout_us = 10000;
if (!kbdev)
return -EFAULT;
if (!kbdev->dummy_job_wa.kctx)
return -EFAULT;
as = kbdev->dummy_job_wa.kctx->as_nr;
slot = kbdev->dummy_job_wa.slot;
jc = kbdev->dummy_job_wa.jc;
/* mask off all but MMU IRQs */
old_gpu_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK));
old_job_mask = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK));
kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), 0);
kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), 0);
/* power up requested cores */
kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(SHADER_PWRON), cores);
if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) {
/* wait for power-ups */
kbase_reg_poll64_timeout(kbdev, GPU_CONTROL_ENUM(SHADER_READY), val,
(val & cores) == cores, 10, timeout_us, false);
}
if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) {
size_t i;
/* do for each requested core */
for (i = 0; i < sizeof(cores) * 8; i++) {
u64 affinity;
affinity = 1ull << i;
if (!(cores & affinity))
continue;
if (run_job(kbdev, as, slot, affinity, jc))
failed++;
runs++;
}
} else {
if (run_job(kbdev, as, slot, cores, jc))
failed++;
runs++;
}
if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) {
/* power off shader cores (to reduce any dynamic leakage) */
kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(SHADER_PWROFF), cores);
/* wait for power off complete */
kbase_reg_poll64_timeout(kbdev, GPU_CONTROL_ENUM(SHADER_READY), val, !(val & cores),
10, timeout_us, false);
kbase_reg_poll64_timeout(kbdev, GPU_CONTROL_ENUM(SHADER_PWRTRANS), val,
!(val & cores), 10, timeout_us, false);
kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), U32_MAX);
}
/* restore IRQ masks */
kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), old_gpu_mask);
kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), old_job_mask);
if (failed)
dev_err(kbdev->dev, "WA complete with %d failures out of %d runs\n", failed, runs);
return failed ? -EFAULT : 0;
}
static ssize_t dummy_job_wa_info_show(struct device *const dev, struct device_attribute *const attr,
char *const buf)
{
struct kbase_device *const kbdev = dev_get_drvdata(dev);
int err;
CSTD_UNUSED(attr);
if (!kbdev || !kbdev->dummy_job_wa.kctx)
return -ENODEV;
err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", kbdev->dummy_job_wa.slot,
kbdev->dummy_job_wa.flags);
return err;
}
static DEVICE_ATTR_RO(dummy_job_wa_info);
static bool wa_blob_load_needed(struct kbase_device *kbdev)
{
if (of_machine_is_compatible("arm,juno"))
return false;
if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_3485))
return true;
return false;
}
int kbase_dummy_job_wa_load(struct kbase_device *kbdev)
{
const struct firmware *firmware;
static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME;
const u32 signature = 0x4157;
const u32 version = 2;
const u8 *fw_end;
const u8 *fw;
const struct wa_header *header;
const struct wa_v2_info *v2_info;
u32 blob_offset;
int err;
struct kbase_context *kctx;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
lockdep_assert_held(&kbdev->fw_load_lock);
if (!wa_blob_load_needed(kbdev))
return 0;
/* load the wa */
err = request_firmware(&firmware, wa_name, kbdev->dev);
if (err) {
dev_err(kbdev->dev,
"WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, "
"Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed");
return -ENODEV;
}
kctx = kbase_create_context(kbdev, true, BASE_CONTEXT_CREATE_FLAG_NONE, 0, NULL);
if (!kctx) {
dev_err(kbdev->dev, "Failed to create WA context\n");
goto no_ctx;
}
fw = firmware->data;
fw_end = fw + firmware->size;
dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", firmware->size);
if (!within_range(fw, fw_end, 0, sizeof(*header))) {
dev_err(kbdev->dev, "WA too small\n");
goto bad_fw;
}
header = (const struct wa_header *)(fw + 0);
if (header->signature != signature) {
dev_err(kbdev->dev, "WA signature failure: 0x%lx\n",
(unsigned long)header->signature);
goto bad_fw;
}
if (header->version != version) {
dev_err(kbdev->dev, "WA version 0x%lx not supported\n",
(unsigned long)header->version);
goto bad_fw;
}
if (!within_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) {
dev_err(kbdev->dev, "WA info offset out of bounds\n");
goto bad_fw;
}
v2_info = (const struct wa_v2_info *)(fw + header->info_offset);
if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) {
dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n",
(unsigned long long)v2_info->flags);
goto bad_fw;
}
kbdev->dummy_job_wa.slot = v2_info->js;
kbdev->dummy_job_wa.jc = v2_info->jc;
kbdev->dummy_job_wa.flags = v2_info->flags;
blob_offset = v2_info->blob_offset;
while (blob_offset) {
const struct wa_blob *blob;
size_t nr_pages;
base_mem_alloc_flags flags;
u64 gpu_va;
struct kbase_va_region *va_region;
if (!within_range(fw, fw_end, blob_offset, sizeof(*blob))) {
dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n",
(unsigned long)blob_offset);
goto bad_fw;
}
blob = (const struct wa_blob *)(fw + blob_offset);
if (!within_range(fw, fw_end, blob->payload_offset, blob->size)) {
dev_err(kbdev->dev, "Payload out-of-bounds\n");
goto bad_fw;
}
gpu_va = blob->base;
if (PAGE_ALIGN(gpu_va) != gpu_va) {
dev_err(kbdev->dev, "blob not page aligned\n");
goto bad_fw;
}
nr_pages = PFN_UP(blob->size);
flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED;
va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &gpu_va,
mmu_sync_info);
if (!va_region) {
dev_err(kbdev->dev, "Failed to allocate for blob\n");
} else {
struct kbase_vmap_struct vmap = { 0 };
const u8 *payload;
void *dst;
/* copy the payload, */
payload = fw + blob->payload_offset;
dst = kbase_vmap(kctx, va_region->start_pfn << PAGE_SHIFT,
nr_pages << PAGE_SHIFT, &vmap);
if (dst) {
memcpy(dst, payload, blob->size);
kbase_vunmap(kctx, &vmap);
} else {
dev_err(kbdev->dev, "Failed to copy payload\n");
}
}
blob_offset = blob->blob_offset; /* follow chain */
}
release_firmware(firmware);
kbasep_js_schedule_privileged_ctx(kbdev, kctx);
kbdev->dummy_job_wa.kctx = kctx;
err = sysfs_create_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr);
if (err)
dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n");
return 0;
bad_fw:
kbase_destroy_context(kctx);
no_ctx:
release_firmware(firmware);
return -EFAULT;
}
void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev)
{
struct kbase_context *wa_kctx;
/* return if the dummy job has not been loaded */
if (kbdev->dummy_job_wa_loaded == false)
return;
/* Can be safely called even if the file wasn't created on probe */
sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr);
wa_kctx = READ_ONCE(kbdev->dummy_job_wa.kctx);
WRITE_ONCE(kbdev->dummy_job_wa.kctx, NULL);
/* make this write visible before we tear down the ctx */
smp_mb();
if (wa_kctx) {
kbasep_js_release_privileged_ctx(kbdev, wa_kctx);
kbase_destroy_context(wa_kctx);
}
}