Lines Matching +full:4 +full:- +full:ring

41 static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)  in si_dma_ring_get_rptr()  argument
43 return *ring->rptr_cpu_addr; in si_dma_ring_get_rptr()
46 static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) in si_dma_ring_get_wptr() argument
48 struct amdgpu_device *adev = ring->adev; in si_dma_ring_get_wptr()
49 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; in si_dma_ring_get_wptr()
54 static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) in si_dma_ring_set_wptr() argument
56 struct amdgpu_device *adev = ring->adev; in si_dma_ring_set_wptr()
57 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; in si_dma_ring_set_wptr()
59 WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); in si_dma_ring_set_wptr()
62 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, in si_dma_ring_emit_ib() argument
68 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. in si_dma_ring_emit_ib()
71 while ((lower_32_bits(ring->wptr) & 7) != 5) in si_dma_ring_emit_ib()
72 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); in si_dma_ring_emit_ib()
73 amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0)); in si_dma_ring_emit_ib()
74 amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); in si_dma_ring_emit_ib()
75 amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); in si_dma_ring_emit_ib()
80 * si_dma_ring_emit_fence - emit a fence on the DMA ring
82 * @ring: amdgpu ring pointer
87 * Add a DMA fence packet to the ring to write
91 static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, in si_dma_ring_emit_fence() argument
97 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); in si_dma_ring_emit_fence()
98 amdgpu_ring_write(ring, addr & 0xfffffffc); in si_dma_ring_emit_fence()
99 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); in si_dma_ring_emit_fence()
100 amdgpu_ring_write(ring, seq); in si_dma_ring_emit_fence()
103 addr += 4; in si_dma_ring_emit_fence()
104 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); in si_dma_ring_emit_fence()
105 amdgpu_ring_write(ring, addr & 0xfffffffc); in si_dma_ring_emit_fence()
106 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); in si_dma_ring_emit_fence()
107 amdgpu_ring_write(ring, upper_32_bits(seq)); in si_dma_ring_emit_fence()
110 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); in si_dma_ring_emit_fence()
118 for (i = 0; i < adev->sdma.num_instances; i++) { in si_dma_stop()
128 struct amdgpu_ring *ring; in si_dma_start() local
133 for (i = 0; i < adev->sdma.num_instances; i++) { in si_dma_start()
134 ring = &adev->sdma.instance[i].ring; in si_dma_start()
139 /* Set ring buffer size in dwords */ in si_dma_start()
140 rb_bufsz = order_base_2(ring->ring_size / 4); in si_dma_start()
147 /* Initialize the ring buffer's read and write pointers */ in si_dma_start()
151 rptr_addr = ring->rptr_gpu_addr; in si_dma_start()
158 WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); in si_dma_start()
171 ring->wptr = 0; in si_dma_start()
172 WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); in si_dma_start()
175 r = amdgpu_ring_test_helper(ring); in si_dma_start()
184 * si_dma_ring_test_ring - simple async dma engine test
186 * @ring: amdgpu_ring structure holding ring information
192 static int si_dma_ring_test_ring(struct amdgpu_ring *ring) in si_dma_ring_test_ring() argument
194 struct amdgpu_device *adev = ring->adev; in si_dma_ring_test_ring()
205 gpu_addr = adev->wb.gpu_addr + (index * 4); in si_dma_ring_test_ring()
207 adev->wb.wb[index] = cpu_to_le32(tmp); in si_dma_ring_test_ring()
209 r = amdgpu_ring_alloc(ring, 4); in si_dma_ring_test_ring()
213 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); in si_dma_ring_test_ring()
214 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); in si_dma_ring_test_ring()
215 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); in si_dma_ring_test_ring()
216 amdgpu_ring_write(ring, 0xDEADBEEF); in si_dma_ring_test_ring()
217 amdgpu_ring_commit(ring); in si_dma_ring_test_ring()
219 for (i = 0; i < adev->usec_timeout; i++) { in si_dma_ring_test_ring()
220 tmp = le32_to_cpu(adev->wb.wb[index]); in si_dma_ring_test_ring()
226 if (i >= adev->usec_timeout) in si_dma_ring_test_ring()
227 r = -ETIMEDOUT; in si_dma_ring_test_ring()
235 * si_dma_ring_test_ib - test an IB on the DMA engine
237 * @ring: amdgpu_ring structure holding ring information
240 * Test a simple IB in the DMA ring (VI).
243 static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) in si_dma_ring_test_ib() argument
245 struct amdgpu_device *adev = ring->adev; in si_dma_ring_test_ib()
257 gpu_addr = adev->wb.gpu_addr + (index * 4); in si_dma_ring_test_ib()
259 adev->wb.wb[index] = cpu_to_le32(tmp); in si_dma_ring_test_ib()
270 ib.length_dw = 4; in si_dma_ring_test_ib()
271 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); in si_dma_ring_test_ib()
277 r = -ETIMEDOUT; in si_dma_ring_test_ib()
282 tmp = le32_to_cpu(adev->wb.wb[index]); in si_dma_ring_test_ib()
286 r = -EINVAL; in si_dma_ring_test_ib()
297 * si_dma_vm_copy_pte - update PTEs by copying them from the GART
312 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, in si_dma_vm_copy_pte()
314 ib->ptr[ib->length_dw++] = lower_32_bits(pe); in si_dma_vm_copy_pte()
315 ib->ptr[ib->length_dw++] = lower_32_bits(src); in si_dma_vm_copy_pte()
316 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; in si_dma_vm_copy_pte()
317 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; in si_dma_vm_copy_pte()
321 * si_dma_vm_write_pte - update PTEs by writing them manually
337 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); in si_dma_vm_write_pte()
338 ib->ptr[ib->length_dw++] = lower_32_bits(pe); in si_dma_vm_write_pte()
339 ib->ptr[ib->length_dw++] = upper_32_bits(pe); in si_dma_vm_write_pte()
340 for (; ndw > 0; ndw -= 2) { in si_dma_vm_write_pte()
341 ib->ptr[ib->length_dw++] = lower_32_bits(value); in si_dma_vm_write_pte()
342 ib->ptr[ib->length_dw++] = upper_32_bits(value); in si_dma_vm_write_pte()
348 * si_dma_vm_set_pte_pde - update the page tables using sDMA
378 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); in si_dma_vm_set_pte_pde()
379 ib->ptr[ib->length_dw++] = pe; /* dst addr */ in si_dma_vm_set_pte_pde()
380 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; in si_dma_vm_set_pte_pde()
381 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ in si_dma_vm_set_pte_pde()
382 ib->ptr[ib->length_dw++] = upper_32_bits(flags); in si_dma_vm_set_pte_pde()
383 ib->ptr[ib->length_dw++] = value; /* value */ in si_dma_vm_set_pte_pde()
384 ib->ptr[ib->length_dw++] = upper_32_bits(value); in si_dma_vm_set_pte_pde()
385 ib->ptr[ib->length_dw++] = incr; /* increment size */ in si_dma_vm_set_pte_pde()
386 ib->ptr[ib->length_dw++] = 0; in si_dma_vm_set_pte_pde()
387 pe += ndw * 4; in si_dma_vm_set_pte_pde()
389 count -= ndw / 2; in si_dma_vm_set_pte_pde()
394 * si_dma_ring_pad_ib - pad the IB to the required number of dw
396 * @ring: amdgpu_ring pointer
400 static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) in si_dma_ring_pad_ib() argument
402 while (ib->length_dw & 0x7) in si_dma_ring_pad_ib()
403 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); in si_dma_ring_pad_ib()
407 * si_dma_ring_emit_pipeline_sync - sync the pipeline
409 * @ring: amdgpu_ring pointer
413 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) in si_dma_ring_emit_pipeline_sync() argument
415 uint32_t seq = ring->fence_drv.sync_seq; in si_dma_ring_emit_pipeline_sync()
416 uint64_t addr = ring->fence_drv.gpu_addr; in si_dma_ring_emit_pipeline_sync()
419 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | in si_dma_ring_emit_pipeline_sync()
421 amdgpu_ring_write(ring, lower_32_bits(addr)); in si_dma_ring_emit_pipeline_sync()
422 amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ in si_dma_ring_emit_pipeline_sync()
423 amdgpu_ring_write(ring, 0xffffffff); /* mask */ in si_dma_ring_emit_pipeline_sync()
424 amdgpu_ring_write(ring, seq); /* value */ in si_dma_ring_emit_pipeline_sync()
425 amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ in si_dma_ring_emit_pipeline_sync()
429 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA
431 * @ring: amdgpu_ring pointer
438 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, in si_dma_ring_emit_vm_flush() argument
441 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); in si_dma_ring_emit_vm_flush()
444 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); in si_dma_ring_emit_vm_flush()
445 amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); in si_dma_ring_emit_vm_flush()
446 amdgpu_ring_write(ring, 0xff << 16); /* retry */ in si_dma_ring_emit_vm_flush()
447 amdgpu_ring_write(ring, 1 << vmid); /* mask */ in si_dma_ring_emit_vm_flush()
448 amdgpu_ring_write(ring, 0); /* value */ in si_dma_ring_emit_vm_flush()
449 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ in si_dma_ring_emit_vm_flush()
452 static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, in si_dma_ring_emit_wreg() argument
455 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); in si_dma_ring_emit_wreg()
456 amdgpu_ring_write(ring, (0xf << 16) | reg); in si_dma_ring_emit_wreg()
457 amdgpu_ring_write(ring, val); in si_dma_ring_emit_wreg()
464 adev->sdma.num_instances = 2; in si_dma_early_init()
476 struct amdgpu_ring *ring; in si_dma_sw_init() local
482 &adev->sdma.trap_irq); in si_dma_sw_init()
488 &adev->sdma.trap_irq); in si_dma_sw_init()
492 for (i = 0; i < adev->sdma.num_instances; i++) { in si_dma_sw_init()
493 ring = &adev->sdma.instance[i].ring; in si_dma_sw_init()
494 ring->ring_obj = NULL; in si_dma_sw_init()
495 ring->use_doorbell = false; in si_dma_sw_init()
496 sprintf(ring->name, "sdma%d", i); in si_dma_sw_init()
497 r = amdgpu_ring_init(adev, ring, 1024, in si_dma_sw_init()
498 &adev->sdma.trap_irq, in si_dma_sw_init()
514 for (i = 0; i < adev->sdma.num_instances; i++) in si_dma_sw_fini()
515 amdgpu_ring_fini(&adev->sdma.instance[i].ring); in si_dma_sw_fini()
566 for (i = 0; i < adev->usec_timeout; i++) { in si_dma_wait_for_idle()
571 return -ETIMEDOUT; in si_dma_wait_for_idle()
576 DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n"); in si_dma_soft_reset()
630 if (entry->src_id == 224) in si_dma_process_trap_irq()
631 amdgpu_fence_process(&adev->sdma.instance[0].ring); in si_dma_process_trap_irq()
633 amdgpu_fence_process(&adev->sdma.instance[1].ring); in si_dma_process_trap_irq()
647 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { in si_dma_set_clockgating_state()
648 for (i = 0; i < adev->sdma.num_instances; i++) { in si_dma_set_clockgating_state()
660 for (i = 0; i < adev->sdma.num_instances; i++) { in si_dma_set_clockgating_state()
744 for (i = 0; i < adev->sdma.num_instances; i++) in si_dma_set_ring_funcs()
745 adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs; in si_dma_set_ring_funcs()
755 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; in si_dma_set_irq_funcs()
756 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; in si_dma_set_irq_funcs()
760 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine
778 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, in si_dma_emit_copy_buffer()
780 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); in si_dma_emit_copy_buffer()
781 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); in si_dma_emit_copy_buffer()
782 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; in si_dma_emit_copy_buffer()
783 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; in si_dma_emit_copy_buffer()
787 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine
801 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, in si_dma_emit_fill_buffer()
802 0, 0, 0, byte_count / 4); in si_dma_emit_fill_buffer()
803 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); in si_dma_emit_fill_buffer()
804 ib->ptr[ib->length_dw++] = src_data; in si_dma_emit_fill_buffer()
805 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; in si_dma_emit_fill_buffer()
815 .fill_num_dw = 4,
821 adev->mman.buffer_funcs = &si_dma_buffer_funcs; in si_dma_set_buffer_funcs()
822 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; in si_dma_set_buffer_funcs()
837 adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; in si_dma_set_vm_pte_funcs()
838 for (i = 0; i < adev->sdma.num_instances; i++) { in si_dma_set_vm_pte_funcs()
839 adev->vm_manager.vm_pte_scheds[i] = in si_dma_set_vm_pte_funcs()
840 &adev->sdma.instance[i].ring.sched; in si_dma_set_vm_pte_funcs()
842 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; in si_dma_set_vm_pte_funcs()