1  /*
2   * Copyright 2019 Advanced Micro Devices, Inc.
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   * and/or sell copies of the Software, and to permit persons to whom the
9   * Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20   * OTHER DEALINGS IN THE SOFTWARE.
21   */
22  
23  #include "amdgpu_vm.h"
24  #include "amdgpu_job.h"
25  #include "amdgpu_object.h"
26  #include "amdgpu_trace.h"
27  
28  #define AMDGPU_VM_SDMA_MIN_NUM_DW	256u
29  #define AMDGPU_VM_SDMA_MAX_NUM_DW	(16u * 1024u)
30  
31  /**
32   * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped
33   *
34   * @table: newly allocated or validated PD/PT
35   */
amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm * table)36  static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
37  {
38  	return amdgpu_ttm_alloc_gart(&table->bo.tbo);
39  }
40  
41  /* Allocate a new job for @count PTE updates */
amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params * p,unsigned int count)42  static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
43  				    unsigned int count)
44  {
45  	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
46  		: AMDGPU_IB_POOL_DELAYED;
47  	struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate
48  		: &p->vm->delayed;
49  	unsigned int ndw;
50  	int r;
51  
52  	/* estimate how many dw we need */
53  	ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
54  	if (p->pages_addr)
55  		ndw += count * 2;
56  	ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
57  
58  	r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM,
59  				     ndw * 4, pool, &p->job);
60  	if (r)
61  		return r;
62  
63  	p->num_dw_left = ndw;
64  	return 0;
65  }
66  
67  /**
68   * amdgpu_vm_sdma_prepare - prepare SDMA command submission
69   *
70   * @p: see amdgpu_vm_update_params definition
71   * @sync: amdgpu_sync object with fences to wait for
72   *
73   * Returns:
74   * Negativ errno, 0 for success.
75   */
amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params * p,struct amdgpu_sync * sync)76  static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
77  				  struct amdgpu_sync *sync)
78  {
79  	int r;
80  
81  	r = amdgpu_vm_sdma_alloc_job(p, 0);
82  	if (r)
83  		return r;
84  
85  	if (!sync)
86  		return 0;
87  
88  	r = amdgpu_sync_push_to_job(sync, p->job);
89  	if (r) {
90  		p->num_dw_left = 0;
91  		amdgpu_job_free(p->job);
92  	}
93  	return r;
94  }
95  
96  /**
97   * amdgpu_vm_sdma_commit - commit SDMA command submission
98   *
99   * @p: see amdgpu_vm_update_params definition
100   * @fence: resulting fence
101   *
102   * Returns:
103   * Negativ errno, 0 for success.
104   */
amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params * p,struct dma_fence ** fence)105  static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
106  				 struct dma_fence **fence)
107  {
108  	struct amdgpu_ib *ib = p->job->ibs;
109  	struct amdgpu_ring *ring;
110  	struct dma_fence *f;
111  
112  	ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring,
113  			    sched);
114  
115  	WARN_ON(ib->length_dw == 0);
116  	amdgpu_ring_pad_ib(ring, ib);
117  
118  	if (p->needs_flush)
119  		atomic64_inc(&p->vm->tlb_seq);
120  
121  	WARN_ON(ib->length_dw > p->num_dw_left);
122  	f = amdgpu_job_submit(p->job);
123  
124  	if (p->unlocked) {
125  		struct dma_fence *tmp = dma_fence_get(f);
126  
127  		swap(p->vm->last_unlocked, tmp);
128  		dma_fence_put(tmp);
129  	} else {
130  		dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f,
131  				   DMA_RESV_USAGE_BOOKKEEP);
132  	}
133  
134  	if (fence && !p->immediate) {
135  		/*
136  		 * Most hw generations now have a separate queue for page table
137  		 * updates, but when the queue is shared with userspace we need
138  		 * the extra CPU round trip to correctly flush the TLB.
139  		 */
140  		set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
141  		swap(*fence, f);
142  	}
143  	dma_fence_put(f);
144  	return 0;
145  }
146  
147  /**
148   * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
149   *
150   * @p: see amdgpu_vm_update_params definition
151   * @bo: PD/PT to update
152   * @pe: addr of the page entry
153   * @count: number of page entries to copy
154   *
155   * Traces the parameters and calls the DMA function to copy the PTEs.
156   */
amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params * p,struct amdgpu_bo * bo,uint64_t pe,unsigned count)157  static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
158  				     struct amdgpu_bo *bo, uint64_t pe,
159  				     unsigned count)
160  {
161  	struct amdgpu_ib *ib = p->job->ibs;
162  	uint64_t src = ib->gpu_addr;
163  
164  	src += p->num_dw_left * 4;
165  
166  	pe += amdgpu_bo_gpu_offset_no_check(bo);
167  	trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
168  
169  	amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
170  }
171  
172  /**
173   * amdgpu_vm_sdma_set_ptes - helper to call the right asic function
174   *
175   * @p: see amdgpu_vm_update_params definition
176   * @bo: PD/PT to update
177   * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
178   * @addr: dst addr to write into pe
179   * @count: number of page entries to update
180   * @incr: increase next addr by incr bytes
181   * @flags: hw access flags
182   *
183   * Traces the parameters and calls the right asic functions
184   * to setup the page table using the DMA.
185   */
amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params * p,struct amdgpu_bo * bo,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint64_t flags)186  static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
187  				    struct amdgpu_bo *bo, uint64_t pe,
188  				    uint64_t addr, unsigned count,
189  				    uint32_t incr, uint64_t flags)
190  {
191  	struct amdgpu_ib *ib = p->job->ibs;
192  
193  	pe += amdgpu_bo_gpu_offset_no_check(bo);
194  	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
195  	if (count < 3) {
196  		amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
197  				    count, incr);
198  	} else {
199  		amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr,
200  				      count, incr, flags);
201  	}
202  }
203  
204  /**
205   * amdgpu_vm_sdma_update - execute VM update
206   *
207   * @p: see amdgpu_vm_update_params definition
208   * @vmbo: PD/PT to update
209   * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
210   * @addr: dst addr to write into pe
211   * @count: number of page entries to update
212   * @incr: increase next addr by incr bytes
213   * @flags: hw access flags
214   *
215   * Reserve space in the IB, setup mapping buffer on demand and write commands to
216   * the IB.
217   */
amdgpu_vm_sdma_update(struct amdgpu_vm_update_params * p,struct amdgpu_bo_vm * vmbo,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint64_t flags)218  static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
219  				 struct amdgpu_bo_vm *vmbo, uint64_t pe,
220  				 uint64_t addr, unsigned count, uint32_t incr,
221  				 uint64_t flags)
222  {
223  	struct amdgpu_bo *bo = &vmbo->bo;
224  	struct dma_resv_iter cursor;
225  	unsigned int i, ndw, nptes;
226  	struct dma_fence *fence;
227  	uint64_t *pte;
228  	int r;
229  
230  	/* Wait for PD/PT moves to be completed */
231  	dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
232  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
233  		dma_fence_get(fence);
234  		r = drm_sched_job_add_dependency(&p->job->base, fence);
235  		if (r) {
236  			dma_fence_put(fence);
237  			dma_resv_iter_end(&cursor);
238  			return r;
239  		}
240  	}
241  	dma_resv_iter_end(&cursor);
242  
243  	do {
244  		ndw = p->num_dw_left;
245  		ndw -= p->job->ibs->length_dw;
246  
247  		if (ndw < 32) {
248  			r = amdgpu_vm_sdma_commit(p, NULL);
249  			if (r)
250  				return r;
251  
252  			r = amdgpu_vm_sdma_alloc_job(p, count);
253  			if (r)
254  				return r;
255  		}
256  
257  		if (!p->pages_addr) {
258  			/* set page commands needed */
259  			amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
260  						incr, flags);
261  			return 0;
262  		}
263  
264  		/* copy commands needed */
265  		ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
266  
267  		/* for padding */
268  		ndw -= 7;
269  
270  		nptes = min(count, ndw / 2);
271  
272  		/* Put the PTEs at the end of the IB. */
273  		p->num_dw_left -= nptes * 2;
274  		pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]);
275  		for (i = 0; i < nptes; ++i, addr += incr) {
276  			pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr);
277  			pte[i] |= flags;
278  		}
279  
280  		amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
281  
282  		pe += nptes * 8;
283  		count -= nptes;
284  	} while (count);
285  
286  	return 0;
287  }
288  
289  const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = {
290  	.map_table = amdgpu_vm_sdma_map_table,
291  	.prepare = amdgpu_vm_sdma_prepare,
292  	.update = amdgpu_vm_sdma_update,
293  	.commit = amdgpu_vm_sdma_commit
294  };
295