/* * drivers/video/tegra/host/nvhost_cdma.c * * Tegra Graphics Host Command DMA * * Copyright (c) 2010-2018, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "nvhost_cdma.h" #include "nvhost_channel.h" #include "nvhost_job.h" #include "dev.h" #include "debug.h" #include "chip_support.h" #include #include #include #include #include #include /* * TODO: * stats * - for figuring out what to optimize further * resizable push buffer * - some channels hardly need any, some channels (3d) could use more */ /* * push_buffer * * The push buffer is a circular array of words to be fetched by command DMA. * Note that it works slightly differently to the sync queue; fence == cur * means that the push buffer is full, not empty. */ /** * Allocate pushbuffer memory */ int nvhost_push_buffer_alloc(struct push_buffer *pb) { struct nvhost_cdma *cdma = pb_to_cdma(pb); pb->mapped = NULL; pb->dma_addr = 0; pb->mapped = dma_alloc_coherent(&cdma_to_dev(cdma)->dev->dev, PUSH_BUFFER_SIZE + 4, &pb->dma_addr, GFP_KERNEL); if (!pb->mapped) { nvhost_err(NULL, "failed to allocate pushbuffer"); pb->mapped = NULL; return -ENOMEM; } /* for now, map pushbuffer to all address spaces */ nvhost_vm_map_static(cdma_to_dev(cdma)->dev, pb->mapped, pb->dma_addr, PUSH_BUFFER_SIZE + 4); return 0; } /** * Clean up push buffer resources */ void nvhost_push_buffer_destroy(struct push_buffer *pb) { struct nvhost_cdma *cdma = pb_to_cdma(pb); if (pb->mapped) dma_free_coherent(&cdma_to_dev(cdma)->dev->dev, PUSH_BUFFER_SIZE + 4, pb->mapped, pb->dma_addr); pb->mapped = NULL; pb->dma_addr = 0; } /** * Push two words to the push buffer * Caller must ensure push buffer is not full */ static void nvhost_push_buffer_push_to(struct push_buffer *pb, u32 op1, u32 op2) { u32 cur = pb->cur; u32 *p = (u32 *)((uintptr_t)pb->mapped + cur); WARN_ON(cur == pb->fence); *(p++) = op1; *(p++) = op2; pb->cur = (cur + 8) & (PUSH_BUFFER_SIZE - 1); } /** * Pop a number of two word slots from the push buffer * Caller must ensure push buffer is not empty */ static void nvhost_push_buffer_pop_from(struct push_buffer *pb, unsigned int slots) { /* Advance the next write position */ pb->fence = (pb->fence + slots * 8) & (PUSH_BUFFER_SIZE - 1); } /** * Return the number of two word slots free in the push buffer */ static u32 nvhost_push_buffer_space(struct push_buffer *pb) { return ((pb->fence - pb->cur) & (PUSH_BUFFER_SIZE - 1)) / 8; } u32 nvhost_push_buffer_putptr(struct push_buffer *pb) { return pb->cur; } dma_addr_t nvhost_push_buffer_start(struct push_buffer *pb) { return pb->dma_addr; } dma_addr_t nvhost_push_buffer_end(struct push_buffer *pb) { return pb->dma_addr + PUSH_BUFFER_SIZE + 4; } /** * Add an entry to the sync queue. */ static void add_to_sync_queue(struct nvhost_cdma *cdma, struct nvhost_job *job, u32 nr_slots, u32 first_get) { job->first_get = first_get; job->num_slots = nr_slots; nvhost_job_get(job); mutex_lock(&cdma->sync_queue_lock); list_add_tail(&job->list, &cdma->sync_queue); mutex_unlock(&cdma->sync_queue_lock); } /** * Return the status of the cdma's sync queue or push buffer for the given event * - sq empty: returns 1 for empty, 0 for not empty (as in "1 empty queue" :-) * - pb space: returns the number of free slots in the channel's push buffer * Must be called with the cdma lock held. */ static unsigned int cdma_status_locked(struct nvhost_cdma *cdma, enum cdma_event event) { switch (event) { case CDMA_EVENT_SYNC_QUEUE_EMPTY: return list_empty(&cdma->sync_queue) ? 1 : 0; case CDMA_EVENT_PUSH_BUFFER_SPACE: { struct push_buffer *pb = &cdma->push_buffer; return nvhost_push_buffer_space(pb); } default: return 0; } } /** * Sleep (if necessary) until the requested event happens * - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty. * - Returns 1 * - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer * - Return the amount of space (> 0) * Must be called with the cdma lock held. */ unsigned int nvhost_cdma_wait_locked(struct nvhost_cdma *cdma, enum cdma_event event) { struct mutex *lock; if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY) lock = &cdma->sync_queue_lock; else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) lock = &cdma->push_buffer_lock; else { nvhost_err(&cdma->pdev->dev, "invalid event %d", event); return -EINVAL; } mutex_lock(lock); /* * Note that we call this function with read lock held on cdma->lock * So we need to drop both cdma->lock and event lock (either * sync_queue_lock or push_buffer_lock) before we start waiting on * event or before calling schedule() * * We need to drop event lock since the event is signalled with * the event lock acquired * We need to drop cdma->lock since in case event is never signalled * and timeout routine is invoked - this will result in deadlock since * the timeout routine will also request write lock on cdma->lock */ for (;;) { unsigned int space; space = cdma_status_locked(cdma, event); if (space) { mutex_unlock(lock); return space; } trace_nvhost_wait_cdma(cdma_to_channel(cdma)->dev->name, event); /* If somebody has managed to already start waiting, yield */ if (cdma->event != CDMA_EVENT_NONE) { mutex_unlock(lock); up_read(&cdma->lock); schedule(); down_read(&cdma->lock); mutex_lock(lock); continue; } cdma->event = event; mutex_unlock(lock); up_read(&cdma->lock); /* start waiting */ down(&cdma->sem); down_read(&cdma->lock); mutex_lock(lock); } return 0; } /** * Start timer for a buffer submition that has completed yet. * Must be called with the cdma lock held. */ static void cdma_start_timer_locked(struct nvhost_cdma *cdma, struct nvhost_job *job) { /* In the virtual case, timeouts are handled by the server */ if (nvhost_dev_is_virtual(cdma_to_dev(cdma)->dev)) return; mutex_lock(&cdma->timeout_lock); if (cdma->timeout.clientid) { mutex_unlock(&cdma->timeout_lock); /* timer already started */ return; } cdma->timeout.clientid = job->clientid; cdma->timeout.sp = job->sp; cdma->timeout.num_syncpts = job->num_syncpts; cdma->timeout.start_ktime = ktime_get(); cdma->timeout.timeout_debug_dump = job->timeout_debug_dump; cdma->timeout.timeout = job->timeout; cdma->timeout.allow_dependency = true; if (job->timeout) schedule_delayed_work(&cdma->timeout.wq, msecs_to_jiffies(cdma->timeout.timeout)); mutex_unlock(&cdma->timeout_lock); } /** * Stop timer when a buffer submition completes. * Must be called with the cdma lock held. */ static void stop_cdma_timer_locked(struct nvhost_cdma *cdma) { cancel_delayed_work_sync(&cdma->timeout.wq); mutex_lock(&cdma->timeout_lock); if (cdma->timeout.clientid) cdma->timeout.clientid = 0; mutex_unlock(&cdma->timeout_lock); } /** * For all sync queue entries that have already finished according to the * current sync point registers: * - unpin & unref their mems * - pop their push buffer slots * - remove them from the sync queue * This is normally called from the host code's worker thread, but can be * called manually if necessary. * Must be called with the cdma lock held. */ static void update_cdma_locked(struct nvhost_cdma *cdma) { struct nvhost_master *dev = cdma_to_dev(cdma); struct nvhost_syncpt *sp = &dev->syncpt; struct nvhost_job *job; /* If CDMA is stopped, queue is cleared and we can return */ if (!cdma->running) return; /* * Walk the sync queue, reading the sync point registers as necessary, * to consume as many sync queue entries as possible without blocking */ while (1) { bool completed = true; int i; mutex_lock(&cdma->sync_queue_lock); if (list_empty(&cdma->sync_queue)) { if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY) { cdma->event = CDMA_EVENT_NONE; up(&cdma->sem); } mutex_unlock(&cdma->sync_queue_lock); break; } job = list_first_entry(&cdma->sync_queue, struct nvhost_job, list); /* Check whether this syncpt has completed, and bail if not */ for (i = 0; completed && i < job->num_syncpts; ++i) completed &= nvhost_syncpt_is_expired(sp, job->sp[i].id, job->sp[i].fence); if (!completed) { /* Start timer on next pending syncpt */ mutex_unlock(&cdma->sync_queue_lock); cdma_start_timer_locked(cdma, job); break; } list_del(&job->list); mutex_unlock(&cdma->sync_queue_lock); /* Cancel timeout, when a buffer completes */ stop_cdma_timer_locked(cdma); /* Drop syncpoint references from this job */ for (i = 0; i < job->num_syncpts; ++i) nvhost_syncpt_put_ref(sp, job->sp[i].id); /* Unpin the memory */ nvhost_job_unpin(job); /* Pop push buffer slots */ mutex_lock(&cdma->push_buffer_lock); if (job->num_slots) { struct push_buffer *pb = &cdma->push_buffer; nvhost_push_buffer_pop_from(pb, job->num_slots); if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) { cdma->event = CDMA_EVENT_NONE; up(&cdma->sem); } } mutex_unlock(&cdma->push_buffer_lock); nvhost_job_put(job); } } void nvhost_cdma_finalize_job_incrs(struct nvhost_syncpt *syncpt, struct nvhost_job_syncpt *sp) { u32 id = sp->id; u32 fence = sp->fence; atomic_set(&syncpt->min_val[id], fence); syncpt_op().reset(syncpt, id); nvhost_syncpt_update_min(syncpt, id); } void nvhost_cdma_update_sync_queue(struct nvhost_cdma *cdma, struct nvhost_syncpt *syncpt, struct platform_device *dev) { u32 get_restart; struct nvhost_job *job = NULL; int nb_pts = nvhost_syncpt_nb_hw_pts(syncpt); DECLARE_BITMAP(syncpt_used, nb_pts); bool is_empty; bitmap_zero(syncpt_used, nb_pts); /* ensure that no-one in CPU updates syncpoint values */ mutex_lock(&syncpt->cpu_increment_mutex); /* * Move the sync_queue read pointer to the first entry that hasn't * completed based on the current HW syncpt value. It's likely there * won't be any (i.e. we're still at the head), but covers the case * where a syncpt incr happens just prior/during the teardown. */ dev_dbg(&dev->dev, "%s: skip completed buffers still in sync_queue\n", __func__); mutex_lock(&cdma->sync_queue_lock); list_for_each_entry(job, &cdma->sync_queue, list) { int i; for (i = 0; i < job->num_syncpts; ++i) { u32 id = job->sp[i].id; if (!test_bit(id, syncpt_used)) nvhost_syncpt_update_min(syncpt, id); set_bit(id, syncpt_used); if (!nvhost_syncpt_is_expired(syncpt, id, job->sp[i].fence)) goto out; } if (nvhost_debug_force_timeout_dump || cdma->timeout.timeout_debug_dump) nvhost_job_dump(&dev->dev, job); } out: mutex_unlock(&cdma->sync_queue_lock); /* * Walk the sync_queue, first incrementing with the CPU syncpts that * are partially executed (the first buffer) or fully skipped while * still in the current context (slots are also NOP-ed). * * At the point contexts are interleaved, syncpt increments must be * done inline with the pushbuffer from a GATHER buffer to maintain * the order (slots are modified to be a GATHER of syncpt incrs). * * Note: save in get_restart the location where the timed out buffer * started in the PB, so we can start the refetch from there (with the * modified NOP-ed PB slots). This lets things appear to have completed * properly for this buffer and resources are freed. */ dev_dbg(&dev->dev, "%s: perform CPU incr on pending same ctx buffers\n", __func__); get_restart = cdma->last_put; mutex_lock(&cdma->sync_queue_lock); is_empty = list_empty(&cdma->sync_queue); mutex_unlock(&cdma->sync_queue_lock); if (!is_empty) get_restart = job->first_get; /* do CPU increments as long as this context continues */ mutex_lock(&cdma->sync_queue_lock); list_for_each_entry_from(job, &cdma->sync_queue, list) { int i; /* different context, gets us out of this loop */ if (job->clientid != cdma->timeout.clientid) break; if (nvhost_debug_force_timeout_dump || cdma->timeout.timeout_debug_dump) nvhost_job_dump(&dev->dev, job); /* won't need a timeout when replayed */ job->timeout = 0; /* set notifier to userspace about submit timeout */ nvhost_job_set_notifier(job, NVHOST_CHANNEL_SUBMIT_TIMEOUT); for (i = 0; i < job->num_syncpts; ++i) nvhost_cdma_finalize_job_incrs(syncpt, job->sp + i); /* cleanup push buffer */ cdma_op().timeout_pb_cleanup(cdma, job->first_get, job->num_slots); } mutex_unlock(&cdma->sync_queue_lock); mutex_unlock(&syncpt->cpu_increment_mutex); mutex_lock(&cdma->sync_queue_lock); list_for_each_entry_from(job, &cdma->sync_queue, list) if (job->clientid == cdma->timeout.clientid) job->timeout = min(job->timeout, 500); mutex_unlock(&cdma->sync_queue_lock); dev_dbg(&dev->dev, "%s: finished sync_queue modification\n", __func__); /* roll back DMAGET and start up channel again */ cdma_op().timeout_teardown_end(cdma, get_restart); } /** * Create a cdma */ int nvhost_cdma_init(struct platform_device *pdev, struct nvhost_cdma *cdma) { int err; struct push_buffer *pb = &cdma->push_buffer; init_rwsem(&cdma->lock); sema_init(&cdma->sem, 0); mutex_init(&cdma->push_buffer_lock); mutex_init(&cdma->sync_queue_lock); mutex_init(&cdma->timeout_lock); INIT_LIST_HEAD(&cdma->sync_queue); cdma->event = CDMA_EVENT_NONE; cdma->running = false; cdma->torndown = false; cdma->pdev = pdev; err = cdma_pb_op().init(pb); if (err) return err; return 0; } /** * Destroy a cdma */ void nvhost_cdma_deinit(struct nvhost_cdma *cdma) { struct push_buffer *pb = &cdma->push_buffer; WARN_ON(cdma->running); nvhost_push_buffer_destroy(pb); cdma_op().timeout_destroy(cdma); } /** * Begin a cdma submit */ int nvhost_cdma_begin(struct nvhost_cdma *cdma, struct nvhost_job *job) { down_read(&cdma->lock); if (job->timeout) { /* init state on first submit with timeout value */ if (!cdma->timeout.initialized) { int err; err = cdma_op().timeout_init(cdma, job->sp->id); if (err) { up_read(&cdma->lock); return err; } } } if (!cdma->running) { cdma_op().start(cdma); } cdma->slots_free = 0; cdma->slots_used = 0; cdma->first_get = nvhost_push_buffer_putptr(&cdma->push_buffer); return 0; } static void trace_write_gather(struct nvhost_cdma *cdma, u32 *cpuva, dma_addr_t iova, u32 offset, u32 words) { if (iova) { u32 i; /* * Write in batches of 128 as there seems to be a limit * of how much you can output to ftrace at once. */ for (i = 0; i < words; i += TRACE_MAX_LENGTH) { trace_nvhost_cdma_push_gather( cdma_to_channel(cdma)->dev->name, (u32)((uintptr_t)iova), min(words - i, TRACE_MAX_LENGTH), offset + i * sizeof(u32), cpuva); } } } /** * Push two words into a push buffer slot * Blocks as necessary if the push buffer is full. */ void nvhost_cdma_push(struct nvhost_cdma *cdma, u32 op1, u32 op2) { if (nvhost_debug_trace_cmdbuf) trace_nvhost_cdma_push(cdma_to_channel(cdma)->dev->name, op1, op2); nvhost_cdma_push_gather(cdma, NULL, 0, 0, op1, op2); } /** * Push two words into a push buffer slot * Blocks as necessary if the push buffer is full. */ void nvhost_cdma_push_gather(struct nvhost_cdma *cdma, u32 *cpuva, dma_addr_t iova, u32 offset, u32 op1, u32 op2) { u32 slots_free = cdma->slots_free; struct push_buffer *pb = &cdma->push_buffer; if (cpuva) trace_write_gather(cdma, cpuva, iova, offset, op1 & 0x1fff); if (slots_free == 0) { slots_free = nvhost_cdma_wait_locked(cdma, CDMA_EVENT_PUSH_BUFFER_SPACE); } cdma->slots_free = slots_free - 1; cdma->slots_used++; mutex_lock(&cdma->push_buffer_lock); nvhost_push_buffer_push_to(pb, op1, op2); mutex_unlock(&cdma->push_buffer_lock); } /** * End a cdma submit * Kick off DMA, add job to the sync queue, and a number of slots to be freed * from the pushbuffer. The handles for a submit must all be pinned at the same * time, but they can be unpinned in smaller chunks. */ void nvhost_cdma_end(struct nvhost_cdma *cdma, struct nvhost_job *job) { bool was_idle; mutex_lock(&cdma->sync_queue_lock); was_idle = list_empty(&cdma->sync_queue); mutex_unlock(&cdma->sync_queue_lock); add_to_sync_queue(cdma, job, cdma->slots_used, cdma->first_get); cdma_op().kick(cdma); /* start timer on idle -> active transitions */ if (was_idle) cdma_start_timer_locked(cdma, job); trace_nvhost_cdma_end(job->ch->dev->name); up_read(&cdma->lock); } /** * Update cdma state according to current sync point values */ void nvhost_cdma_update(struct nvhost_cdma *cdma) { down_read(&cdma->lock); update_cdma_locked(cdma); up_read(&cdma->lock); }