1024 lines
26 KiB
C
1024 lines
26 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) Rockchip Electronics Co.Ltd
|
|
* Author: Felix Zeng <felix.zeng@rock-chips.com>
|
|
*/
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/sync_file.h>
|
|
#include <linux/io.h>
|
|
|
|
#include "rknpu_ioctl.h"
|
|
#include "rknpu_drv.h"
|
|
#include "rknpu_reset.h"
|
|
#include "rknpu_gem.h"
|
|
#include "rknpu_fence.h"
|
|
#include "rknpu_job.h"
|
|
#include "rknpu_mem.h"
|
|
|
|
#define _REG_READ(base, offset) readl(base + (offset))
|
|
#define _REG_WRITE(base, value, offset) writel(value, base + (offset))
|
|
|
|
#define REG_READ(offset) _REG_READ(rknpu_core_base, offset)
|
|
#define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset)
|
|
|
|
static int rknpu_wait_core_index(int core_mask)
|
|
{
|
|
int index = 0;
|
|
|
|
switch (core_mask & ((1 << RKNPU_MAX_CORES) - 1)) {
|
|
case RKNPU_CORE0_MASK:
|
|
case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
|
|
case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
|
|
index = 0;
|
|
break;
|
|
case RKNPU_CORE1_MASK:
|
|
index = 1;
|
|
break;
|
|
case RKNPU_CORE2_MASK:
|
|
index = 2;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return index;
|
|
}
|
|
|
|
static int rknpu_core_mask(int core_index)
|
|
{
|
|
int core_mask = RKNPU_CORE_AUTO_MASK;
|
|
|
|
switch (core_index) {
|
|
case 0:
|
|
core_mask = RKNPU_CORE0_MASK;
|
|
break;
|
|
case 1:
|
|
core_mask = RKNPU_CORE1_MASK;
|
|
break;
|
|
case 2:
|
|
core_mask = RKNPU_CORE2_MASK;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return core_mask;
|
|
}
|
|
|
|
static int rknpu_get_task_number(struct rknpu_job *job, int core_index)
|
|
{
|
|
struct rknpu_device *rknpu_dev = job->rknpu_dev;
|
|
int task_num = job->args->task_number;
|
|
|
|
if (core_index >= RKNPU_MAX_CORES || core_index < 0) {
|
|
LOG_ERROR("core_index: %d set error!", core_index);
|
|
return 0;
|
|
}
|
|
|
|
if (rknpu_dev->config->num_irqs > 1) {
|
|
if (job->use_core_num == 1 || job->use_core_num == 2)
|
|
task_num =
|
|
job->args->subcore_task[core_index].task_number;
|
|
else if (job->use_core_num == 3)
|
|
task_num = job->args->subcore_task[core_index + 2]
|
|
.task_number;
|
|
}
|
|
|
|
return task_num;
|
|
}
|
|
|
|
static void rknpu_job_free(struct rknpu_job *job)
|
|
{
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
|
|
struct rknpu_gem_object *task_obj = NULL;
|
|
|
|
task_obj =
|
|
(struct rknpu_gem_object *)(uintptr_t)job->args->task_obj_addr;
|
|
if (task_obj)
|
|
rknpu_gem_object_put(&task_obj->base);
|
|
#endif
|
|
|
|
if (job->fence)
|
|
dma_fence_put(job->fence);
|
|
|
|
if (job->args_owner)
|
|
kfree(job->args);
|
|
|
|
kfree(job);
|
|
}
|
|
|
|
static int rknpu_job_cleanup(struct rknpu_job *job)
|
|
{
|
|
rknpu_job_free(job);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void rknpu_job_cleanup_work(struct work_struct *work)
|
|
{
|
|
struct rknpu_job *job =
|
|
container_of(work, struct rknpu_job, cleanup_work);
|
|
|
|
rknpu_job_cleanup(job);
|
|
}
|
|
|
|
static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev,
|
|
struct rknpu_submit *args)
|
|
{
|
|
struct rknpu_job *job = NULL;
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
|
|
struct rknpu_gem_object *task_obj = NULL;
|
|
#endif
|
|
if (rknpu_dev->config->num_irqs == 1)
|
|
args->core_mask = RKNPU_CORE0_MASK;
|
|
|
|
job = kzalloc(sizeof(*job), GFP_KERNEL);
|
|
if (!job)
|
|
return NULL;
|
|
|
|
job->timestamp = ktime_get();
|
|
job->rknpu_dev = rknpu_dev;
|
|
job->use_core_num = (args->core_mask & RKNPU_CORE0_MASK) +
|
|
((args->core_mask & RKNPU_CORE1_MASK) >> 1) +
|
|
((args->core_mask & RKNPU_CORE2_MASK) >> 2);
|
|
atomic_set(&job->run_count, job->use_core_num);
|
|
atomic_set(&job->interrupt_count, job->use_core_num);
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
|
|
task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
|
|
if (task_obj)
|
|
rknpu_gem_object_get(&task_obj->base);
|
|
#endif
|
|
|
|
if (!(args->flags & RKNPU_JOB_NONBLOCK)) {
|
|
job->args = args;
|
|
job->args_owner = false;
|
|
return job;
|
|
}
|
|
|
|
job->args = kzalloc(sizeof(*args), GFP_KERNEL);
|
|
if (!job->args) {
|
|
kfree(job);
|
|
return NULL;
|
|
}
|
|
*job->args = *args;
|
|
job->args_owner = true;
|
|
|
|
INIT_WORK(&job->cleanup_work, rknpu_job_cleanup_work);
|
|
|
|
return job;
|
|
}
|
|
|
|
static inline int rknpu_job_wait(struct rknpu_job *job)
|
|
{
|
|
struct rknpu_device *rknpu_dev = job->rknpu_dev;
|
|
struct rknpu_submit *args = job->args;
|
|
struct rknpu_task *last_task = NULL;
|
|
struct rknpu_subcore_data *subcore_data = NULL;
|
|
struct rknpu_job *entry, *q;
|
|
void __iomem *rknpu_core_base = NULL;
|
|
int core_index = rknpu_wait_core_index(job->args->core_mask);
|
|
unsigned long flags;
|
|
int wait_count = 0;
|
|
bool continue_wait = false;
|
|
int ret = -EINVAL;
|
|
int i = 0;
|
|
|
|
subcore_data = &rknpu_dev->subcore_datas[core_index];
|
|
|
|
do {
|
|
ret = wait_event_timeout(subcore_data->job_done_wq,
|
|
job->flags & RKNPU_JOB_DONE ||
|
|
rknpu_dev->soft_reseting,
|
|
msecs_to_jiffies(args->timeout));
|
|
|
|
if (++wait_count >= 3)
|
|
break;
|
|
|
|
if (ret == 0) {
|
|
int64_t commit_time = 0;
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
commit_time = ktime_us_delta(ktime_get(),
|
|
job->commit_pc_time);
|
|
continue_wait =
|
|
job->commit_pc_time == 0 ?
|
|
true :
|
|
(commit_time < args->timeout * 1000);
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
LOG_ERROR(
|
|
"job: %p, wait_count: %d, continue_wait: %d, commit time: %lldus, wait time: %lldus, timeout time: %uus\n",
|
|
job, wait_count, continue_wait,
|
|
(job->commit_pc_time == 0 ? 0 : commit_time),
|
|
ktime_us_delta(ktime_get(), job->timestamp),
|
|
args->timeout * 1000);
|
|
}
|
|
} while (ret == 0 && continue_wait);
|
|
|
|
last_task = job->last_task;
|
|
if (!last_task) {
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
for (i = 0; i < job->use_core_num; i++) {
|
|
subcore_data = &rknpu_dev->subcore_datas[i];
|
|
list_for_each_entry_safe(
|
|
entry, q, &subcore_data->todo_list, head[i]) {
|
|
if (entry == job) {
|
|
list_del(&job->head[i]);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
|
|
LOG_ERROR("job commit failed\n");
|
|
return ret < 0 ? ret : -EINVAL;
|
|
}
|
|
|
|
last_task->int_status = job->int_status[core_index];
|
|
|
|
if (ret <= 0) {
|
|
args->task_counter = 0;
|
|
rknpu_core_base = rknpu_dev->base[core_index];
|
|
if (args->flags & RKNPU_JOB_PC) {
|
|
uint32_t task_status = REG_READ(
|
|
rknpu_dev->config->pc_task_status_offset);
|
|
args->task_counter =
|
|
(task_status &
|
|
rknpu_dev->config->pc_task_number_mask);
|
|
}
|
|
|
|
LOG_ERROR(
|
|
"failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n",
|
|
args->task_counter, args->flags, ret,
|
|
ktime_us_delta(ktime_get(), job->timestamp));
|
|
|
|
return ret < 0 ? ret : -ETIMEDOUT;
|
|
}
|
|
|
|
if (!(job->flags & RKNPU_JOB_DONE))
|
|
return -EINVAL;
|
|
|
|
args->task_counter = args->task_number;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int rknpu_job_subcore_commit_pc(struct rknpu_job *job,
|
|
int core_index)
|
|
{
|
|
struct rknpu_device *rknpu_dev = job->rknpu_dev;
|
|
struct rknpu_submit *args = job->args;
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
|
|
struct rknpu_gem_object *task_obj =
|
|
(struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
|
|
#endif
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP
|
|
struct rknpu_mem_object *task_obj =
|
|
(struct rknpu_mem_object *)(uintptr_t)args->task_obj_addr;
|
|
#endif
|
|
struct rknpu_task *task_base = NULL;
|
|
struct rknpu_task *first_task = NULL;
|
|
struct rknpu_task *last_task = NULL;
|
|
void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
|
|
int task_start = args->task_start;
|
|
int task_end;
|
|
int task_number = args->task_number;
|
|
int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0;
|
|
int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale;
|
|
int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits;
|
|
int i = 0;
|
|
int submit_index = atomic_read(&job->submit_count[core_index]);
|
|
int max_submit_number = rknpu_dev->config->max_submit_number;
|
|
unsigned long flags;
|
|
|
|
if (!task_obj) {
|
|
job->ret = -EINVAL;
|
|
return job->ret;
|
|
}
|
|
|
|
if (rknpu_dev->config->num_irqs > 1) {
|
|
for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
|
|
if (i == core_index) {
|
|
REG_WRITE((0xe + 0x10000000 * i), 0x1004);
|
|
REG_WRITE((0xe + 0x10000000 * i), 0x3004);
|
|
}
|
|
}
|
|
|
|
switch (job->use_core_num) {
|
|
case 1:
|
|
case 2:
|
|
task_start = args->subcore_task[core_index].task_start;
|
|
task_number =
|
|
args->subcore_task[core_index].task_number;
|
|
break;
|
|
case 3:
|
|
task_start =
|
|
args->subcore_task[core_index + 2].task_start;
|
|
task_number =
|
|
args->subcore_task[core_index + 2].task_number;
|
|
break;
|
|
default:
|
|
LOG_ERROR("Unknown use core num %d\n",
|
|
job->use_core_num);
|
|
break;
|
|
}
|
|
}
|
|
|
|
task_start = task_start + submit_index * max_submit_number;
|
|
task_number = task_number - submit_index * max_submit_number;
|
|
task_number = task_number > max_submit_number ? max_submit_number :
|
|
task_number;
|
|
task_end = task_start + task_number - 1;
|
|
|
|
task_base = task_obj->kv_addr;
|
|
|
|
first_task = &task_base[task_start];
|
|
last_task = &task_base[task_end];
|
|
|
|
if (rknpu_dev->config->pc_dma_ctrl) {
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
} else {
|
|
REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
|
|
}
|
|
|
|
REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT +
|
|
pc_data_amount_scale - 1) /
|
|
pc_data_amount_scale -
|
|
1,
|
|
RKNPU_OFFSET_PC_DATA_AMOUNT);
|
|
|
|
REG_WRITE(last_task->int_mask, RKNPU_OFFSET_INT_MASK);
|
|
|
|
REG_WRITE(first_task->int_mask, RKNPU_OFFSET_INT_CLEAR);
|
|
|
|
REG_WRITE(((0x6 | task_pp_en) << pc_task_number_bits) | task_number,
|
|
RKNPU_OFFSET_PC_TASK_CONTROL);
|
|
|
|
REG_WRITE(args->task_base_addr, RKNPU_OFFSET_PC_DMA_BASE_ADDR);
|
|
|
|
job->first_task = first_task;
|
|
job->last_task = last_task;
|
|
job->int_mask[core_index] = last_task->int_mask;
|
|
|
|
REG_WRITE(0x1, RKNPU_OFFSET_PC_OP_EN);
|
|
REG_WRITE(0x0, RKNPU_OFFSET_PC_OP_EN);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index)
|
|
{
|
|
struct rknpu_device *rknpu_dev = job->rknpu_dev;
|
|
struct rknpu_submit *args = job->args;
|
|
void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
|
|
unsigned long flags;
|
|
|
|
// switch to slave mode
|
|
if (rknpu_dev->config->pc_dma_ctrl) {
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
} else {
|
|
REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
|
|
}
|
|
|
|
if (!(args->flags & RKNPU_JOB_PC)) {
|
|
job->ret = -EINVAL;
|
|
return job->ret;
|
|
}
|
|
|
|
return rknpu_job_subcore_commit_pc(job, core_index);
|
|
}
|
|
|
|
static void rknpu_job_commit(struct rknpu_job *job)
|
|
{
|
|
switch (job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) {
|
|
case RKNPU_CORE0_MASK:
|
|
rknpu_job_subcore_commit(job, 0);
|
|
break;
|
|
case RKNPU_CORE1_MASK:
|
|
rknpu_job_subcore_commit(job, 1);
|
|
break;
|
|
case RKNPU_CORE2_MASK:
|
|
rknpu_job_subcore_commit(job, 2);
|
|
break;
|
|
case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
|
|
rknpu_job_subcore_commit(job, 0);
|
|
rknpu_job_subcore_commit(job, 1);
|
|
break;
|
|
case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
|
|
rknpu_job_subcore_commit(job, 0);
|
|
rknpu_job_subcore_commit(job, 1);
|
|
rknpu_job_subcore_commit(job, 2);
|
|
break;
|
|
default:
|
|
LOG_ERROR("Unknown core mask: %d\n", job->args->core_mask);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index)
|
|
{
|
|
struct rknpu_job *job = NULL;
|
|
struct rknpu_subcore_data *subcore_data = NULL;
|
|
unsigned long flags;
|
|
|
|
if (rknpu_dev->soft_reseting)
|
|
return;
|
|
|
|
subcore_data = &rknpu_dev->subcore_datas[core_index];
|
|
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
|
|
if (subcore_data->job || list_empty(&subcore_data->todo_list)) {
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
return;
|
|
}
|
|
|
|
job = list_first_entry(&subcore_data->todo_list, struct rknpu_job,
|
|
head[core_index]);
|
|
|
|
list_del_init(&job->head[core_index]);
|
|
subcore_data->job = job;
|
|
job->hw_recoder_time = ktime_get();
|
|
job->commit_pc_time = job->hw_recoder_time;
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
|
|
if (atomic_dec_and_test(&job->run_count)) {
|
|
rknpu_job_commit(job);
|
|
}
|
|
}
|
|
|
|
static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index)
|
|
{
|
|
struct rknpu_device *rknpu_dev = job->rknpu_dev;
|
|
struct rknpu_subcore_data *subcore_data = NULL;
|
|
unsigned long flags;
|
|
int max_submit_number = rknpu_dev->config->max_submit_number;
|
|
|
|
if (atomic_inc_return(&job->submit_count[core_index]) <
|
|
(rknpu_get_task_number(job, core_index) + max_submit_number - 1) /
|
|
max_submit_number) {
|
|
rknpu_job_subcore_commit(job, core_index);
|
|
return;
|
|
}
|
|
|
|
subcore_data = &rknpu_dev->subcore_datas[core_index];
|
|
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
subcore_data->job = NULL;
|
|
subcore_data->task_num -= rknpu_get_task_number(job, core_index);
|
|
subcore_data->timer.busy_time +=
|
|
ktime_us_delta(ktime_get(), job->hw_recoder_time);
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
|
|
if (atomic_dec_and_test(&job->interrupt_count)) {
|
|
int use_core_num = job->use_core_num;
|
|
|
|
job->flags |= RKNPU_JOB_DONE;
|
|
job->ret = ret;
|
|
|
|
if (job->fence)
|
|
dma_fence_signal(job->fence);
|
|
|
|
if (job->flags & RKNPU_JOB_ASYNC)
|
|
schedule_work(&job->cleanup_work);
|
|
|
|
if (use_core_num > 1)
|
|
wake_up(&(&rknpu_dev->subcore_datas[0])->job_done_wq);
|
|
else
|
|
wake_up(&subcore_data->job_done_wq);
|
|
}
|
|
|
|
rknpu_job_next(rknpu_dev, core_index);
|
|
}
|
|
|
|
static void rknpu_job_schedule(struct rknpu_job *job)
|
|
{
|
|
struct rknpu_device *rknpu_dev = job->rknpu_dev;
|
|
struct rknpu_subcore_data *subcore_data = NULL;
|
|
int i = 0, core_index = 0;
|
|
unsigned long flags;
|
|
int task_num_list[3] = { 0, 1, 2 };
|
|
int tmp = 0;
|
|
|
|
if ((job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) ==
|
|
RKNPU_CORE_AUTO_MASK) {
|
|
if (rknpu_dev->subcore_datas[0].task_num >
|
|
rknpu_dev->subcore_datas[1].task_num) {
|
|
tmp = task_num_list[1];
|
|
task_num_list[1] = task_num_list[0];
|
|
task_num_list[0] = tmp;
|
|
}
|
|
if (rknpu_dev->subcore_datas[task_num_list[0]].task_num >
|
|
rknpu_dev->subcore_datas[2].task_num) {
|
|
tmp = task_num_list[2];
|
|
task_num_list[2] = task_num_list[1];
|
|
task_num_list[1] = task_num_list[0];
|
|
task_num_list[0] = tmp;
|
|
} else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num >
|
|
rknpu_dev->subcore_datas[2].task_num) {
|
|
tmp = task_num_list[2];
|
|
task_num_list[2] = task_num_list[1];
|
|
task_num_list[1] = tmp;
|
|
}
|
|
if (!rknpu_dev->subcore_datas[task_num_list[0]].job)
|
|
core_index = task_num_list[0];
|
|
else if (!rknpu_dev->subcore_datas[task_num_list[1]].job)
|
|
core_index = task_num_list[1];
|
|
else if (!rknpu_dev->subcore_datas[task_num_list[2]].job)
|
|
core_index = task_num_list[2];
|
|
else
|
|
core_index = task_num_list[0];
|
|
|
|
job->args->core_mask = rknpu_core_mask(core_index);
|
|
job->use_core_num = 1;
|
|
atomic_set(&job->run_count, job->use_core_num);
|
|
atomic_set(&job->interrupt_count, job->use_core_num);
|
|
}
|
|
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
|
|
if (job->args->core_mask & rknpu_core_mask(i)) {
|
|
subcore_data = &rknpu_dev->subcore_datas[i];
|
|
list_add_tail(&job->head[i], &subcore_data->todo_list);
|
|
subcore_data->task_num += rknpu_get_task_number(job, i);
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
|
|
for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
|
|
if (job->args->core_mask & rknpu_core_mask(i))
|
|
rknpu_job_next(rknpu_dev, i);
|
|
}
|
|
}
|
|
|
|
static void rknpu_job_abort(struct rknpu_job *job)
|
|
{
|
|
struct rknpu_device *rknpu_dev = job->rknpu_dev;
|
|
struct rknpu_subcore_data *subcore_data = NULL;
|
|
unsigned long flags;
|
|
int i = 0;
|
|
|
|
msleep(100);
|
|
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
|
|
if (job->args->core_mask & rknpu_core_mask(i)) {
|
|
subcore_data = &rknpu_dev->subcore_datas[i];
|
|
if (job == subcore_data->job && !job->irq_entry[i]) {
|
|
subcore_data->job = NULL;
|
|
subcore_data->task_num -=
|
|
rknpu_get_task_number(job, i);
|
|
}
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
|
|
if (job->ret == -ETIMEDOUT) {
|
|
LOG_ERROR("job timeout, flags: %#x:\n", job->flags);
|
|
for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
|
|
if (job->args->core_mask & rknpu_core_mask(i)) {
|
|
void __iomem *rknpu_core_base =
|
|
rknpu_dev->base[i];
|
|
LOG_ERROR(
|
|
"\tcore %d irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n",
|
|
i, REG_READ(RKNPU_OFFSET_INT_STATUS),
|
|
REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
|
|
job->int_mask[i],
|
|
(REG_READ(
|
|
rknpu_dev->config
|
|
->pc_task_status_offset) &
|
|
rknpu_dev->config->pc_task_number_mask),
|
|
ktime_us_delta(ktime_get(),
|
|
job->timestamp));
|
|
}
|
|
}
|
|
rknpu_soft_reset(rknpu_dev);
|
|
} else {
|
|
LOG_ERROR(
|
|
"job abort, flags: %#x, ret: %d, elapsed time: %lldus\n",
|
|
job->flags, job->ret,
|
|
ktime_us_delta(ktime_get(), job->timestamp));
|
|
}
|
|
|
|
rknpu_job_cleanup(job);
|
|
}
|
|
|
|
static inline uint32_t rknpu_fuzz_status(uint32_t status)
|
|
{
|
|
uint32_t fuzz_status = 0;
|
|
|
|
if ((status & 0x3) != 0)
|
|
fuzz_status |= 0x3;
|
|
|
|
if ((status & 0xc) != 0)
|
|
fuzz_status |= 0xc;
|
|
|
|
if ((status & 0x30) != 0)
|
|
fuzz_status |= 0x30;
|
|
|
|
if ((status & 0xc0) != 0)
|
|
fuzz_status |= 0xc0;
|
|
|
|
if ((status & 0x300) != 0)
|
|
fuzz_status |= 0x300;
|
|
|
|
if ((status & 0xc00) != 0)
|
|
fuzz_status |= 0xc00;
|
|
|
|
return fuzz_status;
|
|
}
|
|
|
|
static inline irqreturn_t rknpu_irq_handler(int irq, void *data, int core_index)
|
|
{
|
|
struct rknpu_device *rknpu_dev = data;
|
|
void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
|
|
struct rknpu_subcore_data *subcore_data = NULL;
|
|
struct rknpu_job *job = NULL;
|
|
uint32_t status = 0;
|
|
unsigned long flags;
|
|
|
|
subcore_data = &rknpu_dev->subcore_datas[core_index];
|
|
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
job = subcore_data->job;
|
|
if (!job) {
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
|
|
rknpu_job_next(rknpu_dev, core_index);
|
|
return IRQ_HANDLED;
|
|
}
|
|
job->irq_entry[core_index] = true;
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
|
|
status = REG_READ(RKNPU_OFFSET_INT_STATUS);
|
|
|
|
job->int_status[core_index] = status;
|
|
|
|
if (rknpu_fuzz_status(status) != job->int_mask[core_index]) {
|
|
LOG_ERROR(
|
|
"invalid irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n",
|
|
status, REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
|
|
job->int_mask[core_index],
|
|
(REG_READ(rknpu_dev->config->pc_task_status_offset) &
|
|
rknpu_dev->config->pc_task_number_mask));
|
|
REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
|
|
|
|
rknpu_job_done(job, 0, core_index);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
irqreturn_t rknpu_core0_irq_handler(int irq, void *data)
|
|
{
|
|
return rknpu_irq_handler(irq, data, 0);
|
|
}
|
|
|
|
irqreturn_t rknpu_core1_irq_handler(int irq, void *data)
|
|
{
|
|
return rknpu_irq_handler(irq, data, 1);
|
|
}
|
|
|
|
irqreturn_t rknpu_core2_irq_handler(int irq, void *data)
|
|
{
|
|
return rknpu_irq_handler(irq, data, 2);
|
|
}
|
|
|
|
static void rknpu_job_timeout_clean(struct rknpu_device *rknpu_dev,
|
|
int core_mask)
|
|
{
|
|
struct rknpu_job *job = NULL;
|
|
unsigned long flags;
|
|
struct rknpu_subcore_data *subcore_data = NULL;
|
|
int i = 0;
|
|
|
|
for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
|
|
if (core_mask & rknpu_core_mask(i)) {
|
|
subcore_data = &rknpu_dev->subcore_datas[i];
|
|
job = subcore_data->job;
|
|
if (job &&
|
|
ktime_us_delta(ktime_get(), job->timestamp) >=
|
|
job->args->timeout) {
|
|
rknpu_soft_reset(rknpu_dev);
|
|
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
subcore_data->job = NULL;
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock,
|
|
flags);
|
|
|
|
do {
|
|
schedule_work(&job->cleanup_work);
|
|
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock,
|
|
flags);
|
|
|
|
if (!list_empty(
|
|
&subcore_data->todo_list)) {
|
|
job = list_first_entry(
|
|
&subcore_data->todo_list,
|
|
struct rknpu_job,
|
|
head[i]);
|
|
list_del_init(&job->head[i]);
|
|
} else {
|
|
job = NULL;
|
|
}
|
|
|
|
spin_unlock_irqrestore(
|
|
&rknpu_dev->irq_lock, flags);
|
|
} while (job);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static int rknpu_submit(struct rknpu_device *rknpu_dev,
|
|
struct rknpu_submit *args)
|
|
{
|
|
struct rknpu_job *job = NULL;
|
|
int ret = -EINVAL;
|
|
|
|
if (args->task_number == 0) {
|
|
LOG_ERROR("invalid rknpu task number!\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
job = rknpu_job_alloc(rknpu_dev, args);
|
|
if (!job) {
|
|
LOG_ERROR("failed to allocate rknpu job!\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (args->flags & RKNPU_JOB_FENCE_IN) {
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE
|
|
struct dma_fence *in_fence;
|
|
|
|
in_fence = sync_file_get_fence(args->fence_fd);
|
|
|
|
if (!in_fence) {
|
|
LOG_ERROR("invalid fence in fd, fd: %d\n",
|
|
args->fence_fd);
|
|
return -EINVAL;
|
|
}
|
|
args->fence_fd = -1;
|
|
|
|
/*
|
|
* Wait if the fence is from a foreign context, or if the fence
|
|
* array contains any fence from a foreign context.
|
|
*/
|
|
ret = 0;
|
|
if (!dma_fence_match_context(in_fence,
|
|
rknpu_dev->fence_ctx->context))
|
|
ret = dma_fence_wait_timeout(in_fence, true,
|
|
args->timeout);
|
|
dma_fence_put(in_fence);
|
|
if (ret < 0) {
|
|
if (ret != -ERESTARTSYS)
|
|
LOG_ERROR("Error (%d) waiting for fence!\n",
|
|
ret);
|
|
|
|
return ret;
|
|
}
|
|
#else
|
|
LOG_ERROR(
|
|
"failed to use rknpu fence, please enable rknpu fence config!\n");
|
|
rknpu_job_free(job);
|
|
return -EINVAL;
|
|
#endif
|
|
}
|
|
|
|
if (args->flags & RKNPU_JOB_FENCE_OUT) {
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE
|
|
ret = rknpu_fence_alloc(job);
|
|
if (ret) {
|
|
rknpu_job_free(job);
|
|
return ret;
|
|
}
|
|
job->args->fence_fd = rknpu_fence_get_fd(job);
|
|
args->fence_fd = job->args->fence_fd;
|
|
#else
|
|
LOG_ERROR(
|
|
"failed to use rknpu fence, please enable rknpu fence config!\n");
|
|
rknpu_job_free(job);
|
|
return -EINVAL;
|
|
#endif
|
|
}
|
|
|
|
if (args->flags & RKNPU_JOB_NONBLOCK) {
|
|
job->flags |= RKNPU_JOB_ASYNC;
|
|
rknpu_job_timeout_clean(rknpu_dev, job->args->core_mask);
|
|
rknpu_job_schedule(job);
|
|
ret = job->ret;
|
|
if (ret) {
|
|
rknpu_job_abort(job);
|
|
return ret;
|
|
}
|
|
} else {
|
|
rknpu_job_schedule(job);
|
|
if (args->flags & RKNPU_JOB_PC)
|
|
job->ret = rknpu_job_wait(job);
|
|
|
|
args->task_counter = job->args->task_counter;
|
|
ret = job->ret;
|
|
if (!ret)
|
|
rknpu_job_cleanup(job);
|
|
else
|
|
rknpu_job_abort(job);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
|
|
int rknpu_submit_ioctl(struct drm_device *dev, void *data,
|
|
struct drm_file *file_priv)
|
|
{
|
|
struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev);
|
|
struct rknpu_submit *args = data;
|
|
|
|
return rknpu_submit(rknpu_dev, args);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP
|
|
int rknpu_submit_ioctl(struct rknpu_device *rknpu_dev, unsigned long data)
|
|
{
|
|
struct rknpu_submit args;
|
|
int ret = -EINVAL;
|
|
|
|
if (unlikely(copy_from_user(&args, (struct rknpu_submit *)data,
|
|
sizeof(struct rknpu_submit)))) {
|
|
LOG_ERROR("%s: copy_from_user failed\n", __func__);
|
|
ret = -EFAULT;
|
|
return ret;
|
|
}
|
|
|
|
ret = rknpu_submit(rknpu_dev, &args);
|
|
|
|
if (unlikely(copy_to_user((struct rknpu_submit *)data, &args,
|
|
sizeof(struct rknpu_submit)))) {
|
|
LOG_ERROR("%s: copy_to_user failed\n", __func__);
|
|
ret = -EFAULT;
|
|
return ret;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version)
|
|
{
|
|
void __iomem *rknpu_core_base = rknpu_dev->base[0];
|
|
|
|
if (version == NULL)
|
|
return -EINVAL;
|
|
|
|
*version = REG_READ(RKNPU_OFFSET_VERSION) +
|
|
(REG_READ(RKNPU_OFFSET_VERSION_NUM) & 0xffff);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int rknpu_get_bw_priority(struct rknpu_device *rknpu_dev, uint32_t *priority,
|
|
uint32_t *expect, uint32_t *tw)
|
|
{
|
|
void __iomem *base = rknpu_dev->bw_priority_base;
|
|
|
|
if (!rknpu_dev->config->bw_enable) {
|
|
LOG_WARN("Get bw_priority is not supported on this device!\n");
|
|
return 0;
|
|
}
|
|
|
|
if (!base)
|
|
return -EINVAL;
|
|
|
|
spin_lock(&rknpu_dev->lock);
|
|
|
|
if (priority != NULL)
|
|
*priority = _REG_READ(base, 0x0);
|
|
|
|
if (expect != NULL)
|
|
*expect = _REG_READ(base, 0x8);
|
|
|
|
if (tw != NULL)
|
|
*tw = _REG_READ(base, 0xc);
|
|
|
|
spin_unlock(&rknpu_dev->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int rknpu_set_bw_priority(struct rknpu_device *rknpu_dev, uint32_t priority,
|
|
uint32_t expect, uint32_t tw)
|
|
{
|
|
void __iomem *base = rknpu_dev->bw_priority_base;
|
|
|
|
if (!rknpu_dev->config->bw_enable) {
|
|
LOG_WARN("Set bw_priority is not supported on this device!\n");
|
|
return 0;
|
|
}
|
|
|
|
if (!base)
|
|
return -EINVAL;
|
|
|
|
spin_lock(&rknpu_dev->lock);
|
|
|
|
if (priority != 0)
|
|
_REG_WRITE(base, priority, 0x0);
|
|
|
|
if (expect != 0)
|
|
_REG_WRITE(base, expect, 0x8);
|
|
|
|
if (tw != 0)
|
|
_REG_WRITE(base, tw, 0xc);
|
|
|
|
spin_unlock(&rknpu_dev->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev)
|
|
{
|
|
void __iomem *rknpu_core_base = rknpu_dev->base[0];
|
|
unsigned long flags;
|
|
|
|
if (!rknpu_dev->config->bw_enable) {
|
|
LOG_WARN("Clear rw_amount is not supported on this device!\n");
|
|
return 0;
|
|
}
|
|
|
|
if (rknpu_dev->config->pc_dma_ctrl) {
|
|
uint32_t pc_data_addr = 0;
|
|
|
|
spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
|
|
pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR);
|
|
|
|
REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
|
|
REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
|
|
REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
|
|
REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR);
|
|
spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
|
|
} else {
|
|
spin_lock(&rknpu_dev->lock);
|
|
REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
|
|
REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
|
|
spin_unlock(&rknpu_dev->lock);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr,
|
|
uint32_t *dt_rd, uint32_t *wd_rd)
|
|
{
|
|
void __iomem *rknpu_core_base = rknpu_dev->base[0];
|
|
int amount_scale = rknpu_dev->config->pc_data_amount_scale;
|
|
|
|
if (!rknpu_dev->config->bw_enable) {
|
|
LOG_WARN("Get rw_amount is not supported on this device!\n");
|
|
return 0;
|
|
}
|
|
|
|
spin_lock(&rknpu_dev->lock);
|
|
|
|
if (dt_wr != NULL)
|
|
*dt_wr = REG_READ(RKNPU_OFFSET_DT_WR_AMOUNT) * amount_scale;
|
|
|
|
if (dt_rd != NULL)
|
|
*dt_rd = REG_READ(RKNPU_OFFSET_DT_RD_AMOUNT) * amount_scale;
|
|
|
|
if (wd_rd != NULL)
|
|
*wd_rd = REG_READ(RKNPU_OFFSET_WT_RD_AMOUNT) * amount_scale;
|
|
|
|
spin_unlock(&rknpu_dev->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int rknpu_get_total_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *amount)
|
|
{
|
|
uint32_t dt_wr = 0;
|
|
uint32_t dt_rd = 0;
|
|
uint32_t wd_rd = 0;
|
|
int ret = -EINVAL;
|
|
|
|
if (!rknpu_dev->config->bw_enable) {
|
|
LOG_WARN(
|
|
"Get total_rw_amount is not supported on this device!\n");
|
|
return 0;
|
|
}
|
|
|
|
ret = rknpu_get_rw_amount(rknpu_dev, &dt_wr, &dt_rd, &wd_rd);
|
|
|
|
if (amount != NULL)
|
|
*amount = dt_wr + dt_rd + wd_rd;
|
|
|
|
return ret;
|
|
}
|