1  /*
2   * Copyright 2009 Jerome Glisse.
3   * All Rights Reserved.
4   *
5   * Permission is hereby granted, free of charge, to any person obtaining a
6   * copy of this software and associated documentation files (the
7   * "Software"), to deal in the Software without restriction, including
8   * without limitation the rights to use, copy, modify, merge, publish,
9   * distribute, sub license, and/or sell copies of the Software, and to
10   * permit persons to whom the Software is furnished to do so, subject to
11   * the following conditions:
12   *
13   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16   * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19   * USE OR OTHER DEALINGS IN THE SOFTWARE.
20   *
21   * The above copyright notice and this permission notice (including the
22   * next paragraph) shall be included in all copies or substantial portions
23   * of the Software.
24   *
25   */
26  /*
27   * Authors:
28   *    Jerome Glisse <glisse@freedesktop.org>
29   *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30   *    Dave Airlie
31   */
32  
33  #include <linux/dma-mapping.h>
34  #include <linux/iommu.h>
35  #include <linux/pagemap.h>
36  #include <linux/sched/task.h>
37  #include <linux/sched/mm.h>
38  #include <linux/seq_file.h>
39  #include <linux/slab.h>
40  #include <linux/swap.h>
41  #include <linux/dma-buf.h>
42  #include <linux/sizes.h>
43  #include <linux/module.h>
44  
45  #include <drm/drm_drv.h>
46  #include <drm/ttm/ttm_bo.h>
47  #include <drm/ttm/ttm_placement.h>
48  #include <drm/ttm/ttm_range_manager.h>
49  #include <drm/ttm/ttm_tt.h>
50  
51  #include <drm/amdgpu_drm.h>
52  
53  #include "amdgpu.h"
54  #include "amdgpu_object.h"
55  #include "amdgpu_trace.h"
56  #include "amdgpu_amdkfd.h"
57  #include "amdgpu_sdma.h"
58  #include "amdgpu_ras.h"
59  #include "amdgpu_hmm.h"
60  #include "amdgpu_atomfirmware.h"
61  #include "amdgpu_res_cursor.h"
62  #include "bif/bif_4_1_d.h"
63  
64  MODULE_IMPORT_NS(DMA_BUF);
65  
66  #define AMDGPU_TTM_VRAM_MAX_DW_READ	((size_t)128)
67  
68  static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
69  				   struct ttm_tt *ttm,
70  				   struct ttm_resource *bo_mem);
71  static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
72  				      struct ttm_tt *ttm);
73  
amdgpu_ttm_init_on_chip(struct amdgpu_device * adev,unsigned int type,uint64_t size_in_page)74  static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
75  				    unsigned int type,
76  				    uint64_t size_in_page)
77  {
78  	return ttm_range_man_init(&adev->mman.bdev, type,
79  				  false, size_in_page);
80  }
81  
82  /**
83   * amdgpu_evict_flags - Compute placement flags
84   *
85   * @bo: The buffer object to evict
86   * @placement: Possible destination(s) for evicted BO
87   *
88   * Fill in placement data when ttm_bo_evict() is called
89   */
amdgpu_evict_flags(struct ttm_buffer_object * bo,struct ttm_placement * placement)90  static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
91  				struct ttm_placement *placement)
92  {
93  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
94  	struct amdgpu_bo *abo;
95  	static const struct ttm_place placements = {
96  		.fpfn = 0,
97  		.lpfn = 0,
98  		.mem_type = TTM_PL_SYSTEM,
99  		.flags = 0
100  	};
101  
102  	/* Don't handle scatter gather BOs */
103  	if (bo->type == ttm_bo_type_sg) {
104  		placement->num_placement = 0;
105  		return;
106  	}
107  
108  	/* Object isn't an AMDGPU object so ignore */
109  	if (!amdgpu_bo_is_amdgpu_bo(bo)) {
110  		placement->placement = &placements;
111  		placement->num_placement = 1;
112  		return;
113  	}
114  
115  	abo = ttm_to_amdgpu_bo(bo);
116  	if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
117  		placement->num_placement = 0;
118  		return;
119  	}
120  
121  	switch (bo->resource->mem_type) {
122  	case AMDGPU_PL_GDS:
123  	case AMDGPU_PL_GWS:
124  	case AMDGPU_PL_OA:
125  	case AMDGPU_PL_DOORBELL:
126  		placement->num_placement = 0;
127  		return;
128  
129  	case TTM_PL_VRAM:
130  		if (!adev->mman.buffer_funcs_enabled) {
131  			/* Move to system memory */
132  			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
133  
134  		} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
135  			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
136  			   amdgpu_res_cpu_visible(adev, bo->resource)) {
137  
138  			/* Try evicting to the CPU inaccessible part of VRAM
139  			 * first, but only set GTT as busy placement, so this
140  			 * BO will be evicted to GTT rather than causing other
141  			 * BOs to be evicted from VRAM
142  			 */
143  			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
144  							AMDGPU_GEM_DOMAIN_GTT |
145  							AMDGPU_GEM_DOMAIN_CPU);
146  			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
147  			abo->placements[0].lpfn = 0;
148  			abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
149  		} else {
150  			/* Move to GTT memory */
151  			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
152  							AMDGPU_GEM_DOMAIN_CPU);
153  		}
154  		break;
155  	case TTM_PL_TT:
156  	case AMDGPU_PL_PREEMPT:
157  	default:
158  		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
159  		break;
160  	}
161  	*placement = abo->placement;
162  }
163  
164  /**
165   * amdgpu_ttm_map_buffer - Map memory into the GART windows
166   * @bo: buffer object to map
167   * @mem: memory object to map
168   * @mm_cur: range to map
169   * @window: which GART window to use
170   * @ring: DMA ring to use for the copy
171   * @tmz: if we should setup a TMZ enabled mapping
172   * @size: in number of bytes to map, out number of bytes mapped
173   * @addr: resulting address inside the MC address space
174   *
175   * Setup one of the GART windows to access a specific piece of memory or return
176   * the physical address for local memory.
177   */
amdgpu_ttm_map_buffer(struct ttm_buffer_object * bo,struct ttm_resource * mem,struct amdgpu_res_cursor * mm_cur,unsigned int window,struct amdgpu_ring * ring,bool tmz,uint64_t * size,uint64_t * addr)178  static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
179  				 struct ttm_resource *mem,
180  				 struct amdgpu_res_cursor *mm_cur,
181  				 unsigned int window, struct amdgpu_ring *ring,
182  				 bool tmz, uint64_t *size, uint64_t *addr)
183  {
184  	struct amdgpu_device *adev = ring->adev;
185  	unsigned int offset, num_pages, num_dw, num_bytes;
186  	uint64_t src_addr, dst_addr;
187  	struct amdgpu_job *job;
188  	void *cpu_addr;
189  	uint64_t flags;
190  	unsigned int i;
191  	int r;
192  
193  	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
194  	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
195  
196  	if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
197  		return -EINVAL;
198  
199  	/* Map only what can't be accessed directly */
200  	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
201  		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
202  			mm_cur->start;
203  		return 0;
204  	}
205  
206  
207  	/*
208  	 * If start begins at an offset inside the page, then adjust the size
209  	 * and addr accordingly
210  	 */
211  	offset = mm_cur->start & ~PAGE_MASK;
212  
213  	num_pages = PFN_UP(*size + offset);
214  	num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
215  
216  	*size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
217  
218  	*addr = adev->gmc.gart_start;
219  	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
220  		AMDGPU_GPU_PAGE_SIZE;
221  	*addr += offset;
222  
223  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
224  	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
225  
226  	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
227  				     AMDGPU_FENCE_OWNER_UNDEFINED,
228  				     num_dw * 4 + num_bytes,
229  				     AMDGPU_IB_POOL_DELAYED, &job);
230  	if (r)
231  		return r;
232  
233  	src_addr = num_dw * 4;
234  	src_addr += job->ibs[0].gpu_addr;
235  
236  	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
237  	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
238  	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
239  				dst_addr, num_bytes, 0);
240  
241  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
242  	WARN_ON(job->ibs[0].length_dw > num_dw);
243  
244  	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
245  	if (tmz)
246  		flags |= AMDGPU_PTE_TMZ;
247  
248  	cpu_addr = &job->ibs[0].ptr[num_dw];
249  
250  	if (mem->mem_type == TTM_PL_TT) {
251  		dma_addr_t *dma_addr;
252  
253  		dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
254  		amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr);
255  	} else {
256  		dma_addr_t dma_address;
257  
258  		dma_address = mm_cur->start;
259  		dma_address += adev->vm_manager.vram_base_offset;
260  
261  		for (i = 0; i < num_pages; ++i) {
262  			amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, &dma_address,
263  					flags, cpu_addr);
264  			dma_address += PAGE_SIZE;
265  		}
266  	}
267  
268  	dma_fence_put(amdgpu_job_submit(job));
269  	return 0;
270  }
271  
272  /**
273   * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
274   * @adev: amdgpu device
275   * @src: buffer/address where to read from
276   * @dst: buffer/address where to write to
277   * @size: number of bytes to copy
278   * @tmz: if a secure copy should be used
279   * @resv: resv object to sync to
280   * @f: Returns the last fence if multiple jobs are submitted.
281   *
282   * The function copies @size bytes from {src->mem + src->offset} to
283   * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
284   * move and different for a BO to BO copy.
285   *
286   */
amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device * adev,const struct amdgpu_copy_mem * src,const struct amdgpu_copy_mem * dst,uint64_t size,bool tmz,struct dma_resv * resv,struct dma_fence ** f)287  int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
288  			       const struct amdgpu_copy_mem *src,
289  			       const struct amdgpu_copy_mem *dst,
290  			       uint64_t size, bool tmz,
291  			       struct dma_resv *resv,
292  			       struct dma_fence **f)
293  {
294  	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
295  	struct amdgpu_res_cursor src_mm, dst_mm;
296  	struct dma_fence *fence = NULL;
297  	int r = 0;
298  	uint32_t copy_flags = 0;
299  	struct amdgpu_bo *abo_src, *abo_dst;
300  
301  	if (!adev->mman.buffer_funcs_enabled) {
302  		DRM_ERROR("Trying to move memory with ring turned off.\n");
303  		return -EINVAL;
304  	}
305  
306  	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
307  	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
308  
309  	mutex_lock(&adev->mman.gtt_window_lock);
310  	while (src_mm.remaining) {
311  		uint64_t from, to, cur_size, tiling_flags;
312  		uint32_t num_type, data_format, max_com;
313  		struct dma_fence *next;
314  
315  		/* Never copy more than 256MiB at once to avoid a timeout */
316  		cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
317  
318  		/* Map src to window 0 and dst to window 1. */
319  		r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
320  					  0, ring, tmz, &cur_size, &from);
321  		if (r)
322  			goto error;
323  
324  		r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
325  					  1, ring, tmz, &cur_size, &to);
326  		if (r)
327  			goto error;
328  
329  		abo_src = ttm_to_amdgpu_bo(src->bo);
330  		abo_dst = ttm_to_amdgpu_bo(dst->bo);
331  		if (tmz)
332  			copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
333  		if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
334  		    (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
335  			copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED;
336  		if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
337  		    (dst->mem->mem_type == TTM_PL_VRAM)) {
338  			copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED;
339  			amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags);
340  			max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
341  			num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
342  			data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
343  			copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
344  				       AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
345  				       AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format));
346  		}
347  
348  		r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
349  				       &next, false, true, copy_flags);
350  		if (r)
351  			goto error;
352  
353  		dma_fence_put(fence);
354  		fence = next;
355  
356  		amdgpu_res_next(&src_mm, cur_size);
357  		amdgpu_res_next(&dst_mm, cur_size);
358  	}
359  error:
360  	mutex_unlock(&adev->mman.gtt_window_lock);
361  	if (f)
362  		*f = dma_fence_get(fence);
363  	dma_fence_put(fence);
364  	return r;
365  }
366  
367  /*
368   * amdgpu_move_blit - Copy an entire buffer to another buffer
369   *
370   * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
371   * help move buffers to and from VRAM.
372   */
amdgpu_move_blit(struct ttm_buffer_object * bo,bool evict,struct ttm_resource * new_mem,struct ttm_resource * old_mem)373  static int amdgpu_move_blit(struct ttm_buffer_object *bo,
374  			    bool evict,
375  			    struct ttm_resource *new_mem,
376  			    struct ttm_resource *old_mem)
377  {
378  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
379  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
380  	struct amdgpu_copy_mem src, dst;
381  	struct dma_fence *fence = NULL;
382  	int r;
383  
384  	src.bo = bo;
385  	dst.bo = bo;
386  	src.mem = old_mem;
387  	dst.mem = new_mem;
388  	src.offset = 0;
389  	dst.offset = 0;
390  
391  	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
392  				       new_mem->size,
393  				       amdgpu_bo_encrypted(abo),
394  				       bo->base.resv, &fence);
395  	if (r)
396  		goto error;
397  
398  	/* clear the space being freed */
399  	if (old_mem->mem_type == TTM_PL_VRAM &&
400  	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
401  		struct dma_fence *wipe_fence = NULL;
402  
403  		r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
404  				       false);
405  		if (r) {
406  			goto error;
407  		} else if (wipe_fence) {
408  			amdgpu_vram_mgr_set_cleared(bo->resource);
409  			dma_fence_put(fence);
410  			fence = wipe_fence;
411  		}
412  	}
413  
414  	/* Always block for VM page tables before committing the new location */
415  	if (bo->type == ttm_bo_type_kernel)
416  		r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem);
417  	else
418  		r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem);
419  	dma_fence_put(fence);
420  	return r;
421  
422  error:
423  	if (fence)
424  		dma_fence_wait(fence, false);
425  	dma_fence_put(fence);
426  	return r;
427  }
428  
429  /**
430   * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
431   * @adev: amdgpu device
432   * @res: the resource to check
433   *
434   * Returns: true if the full resource is CPU visible, false otherwise.
435   */
amdgpu_res_cpu_visible(struct amdgpu_device * adev,struct ttm_resource * res)436  bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
437  			    struct ttm_resource *res)
438  {
439  	struct amdgpu_res_cursor cursor;
440  
441  	if (!res)
442  		return false;
443  
444  	if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
445  	    res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL)
446  		return true;
447  
448  	if (res->mem_type != TTM_PL_VRAM)
449  		return false;
450  
451  	amdgpu_res_first(res, 0, res->size, &cursor);
452  	while (cursor.remaining) {
453  		if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
454  			return false;
455  		amdgpu_res_next(&cursor, cursor.size);
456  	}
457  
458  	return true;
459  }
460  
461  /*
462   * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
463   *
464   * Called by amdgpu_bo_move()
465   */
amdgpu_res_copyable(struct amdgpu_device * adev,struct ttm_resource * mem)466  static bool amdgpu_res_copyable(struct amdgpu_device *adev,
467  				struct ttm_resource *mem)
468  {
469  	if (!amdgpu_res_cpu_visible(adev, mem))
470  		return false;
471  
472  	/* ttm_resource_ioremap only supports contiguous memory */
473  	if (mem->mem_type == TTM_PL_VRAM &&
474  	    !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
475  		return false;
476  
477  	return true;
478  }
479  
480  /*
481   * amdgpu_bo_move - Move a buffer object to a new memory location
482   *
483   * Called by ttm_bo_handle_move_mem()
484   */
amdgpu_bo_move(struct ttm_buffer_object * bo,bool evict,struct ttm_operation_ctx * ctx,struct ttm_resource * new_mem,struct ttm_place * hop)485  static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
486  			  struct ttm_operation_ctx *ctx,
487  			  struct ttm_resource *new_mem,
488  			  struct ttm_place *hop)
489  {
490  	struct amdgpu_device *adev;
491  	struct amdgpu_bo *abo;
492  	struct ttm_resource *old_mem = bo->resource;
493  	int r;
494  
495  	if (new_mem->mem_type == TTM_PL_TT ||
496  	    new_mem->mem_type == AMDGPU_PL_PREEMPT) {
497  		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
498  		if (r)
499  			return r;
500  	}
501  
502  	abo = ttm_to_amdgpu_bo(bo);
503  	adev = amdgpu_ttm_adev(bo->bdev);
504  
505  	if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
506  			 bo->ttm == NULL)) {
507  		amdgpu_bo_move_notify(bo, evict, new_mem);
508  		ttm_bo_move_null(bo, new_mem);
509  		return 0;
510  	}
511  	if (old_mem->mem_type == TTM_PL_SYSTEM &&
512  	    (new_mem->mem_type == TTM_PL_TT ||
513  	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
514  		amdgpu_bo_move_notify(bo, evict, new_mem);
515  		ttm_bo_move_null(bo, new_mem);
516  		return 0;
517  	}
518  	if ((old_mem->mem_type == TTM_PL_TT ||
519  	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
520  	    new_mem->mem_type == TTM_PL_SYSTEM) {
521  		r = ttm_bo_wait_ctx(bo, ctx);
522  		if (r)
523  			return r;
524  
525  		amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
526  		amdgpu_bo_move_notify(bo, evict, new_mem);
527  		ttm_resource_free(bo, &bo->resource);
528  		ttm_bo_assign_mem(bo, new_mem);
529  		return 0;
530  	}
531  
532  	if (old_mem->mem_type == AMDGPU_PL_GDS ||
533  	    old_mem->mem_type == AMDGPU_PL_GWS ||
534  	    old_mem->mem_type == AMDGPU_PL_OA ||
535  	    old_mem->mem_type == AMDGPU_PL_DOORBELL ||
536  	    new_mem->mem_type == AMDGPU_PL_GDS ||
537  	    new_mem->mem_type == AMDGPU_PL_GWS ||
538  	    new_mem->mem_type == AMDGPU_PL_OA ||
539  	    new_mem->mem_type == AMDGPU_PL_DOORBELL) {
540  		/* Nothing to save here */
541  		amdgpu_bo_move_notify(bo, evict, new_mem);
542  		ttm_bo_move_null(bo, new_mem);
543  		return 0;
544  	}
545  
546  	if (bo->type == ttm_bo_type_device &&
547  	    new_mem->mem_type == TTM_PL_VRAM &&
548  	    old_mem->mem_type != TTM_PL_VRAM) {
549  		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
550  		 * accesses the BO after it's moved.
551  		 */
552  		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
553  	}
554  
555  	if (adev->mman.buffer_funcs_enabled &&
556  	    ((old_mem->mem_type == TTM_PL_SYSTEM &&
557  	      new_mem->mem_type == TTM_PL_VRAM) ||
558  	     (old_mem->mem_type == TTM_PL_VRAM &&
559  	      new_mem->mem_type == TTM_PL_SYSTEM))) {
560  		hop->fpfn = 0;
561  		hop->lpfn = 0;
562  		hop->mem_type = TTM_PL_TT;
563  		hop->flags = TTM_PL_FLAG_TEMPORARY;
564  		return -EMULTIHOP;
565  	}
566  
567  	amdgpu_bo_move_notify(bo, evict, new_mem);
568  	if (adev->mman.buffer_funcs_enabled)
569  		r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
570  	else
571  		r = -ENODEV;
572  
573  	if (r) {
574  		/* Check that all memory is CPU accessible */
575  		if (!amdgpu_res_copyable(adev, old_mem) ||
576  		    !amdgpu_res_copyable(adev, new_mem)) {
577  			pr_err("Move buffer fallback to memcpy unavailable\n");
578  			return r;
579  		}
580  
581  		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
582  		if (r)
583  			return r;
584  	}
585  
586  	/* update statistics after the move */
587  	if (evict)
588  		atomic64_inc(&adev->num_evictions);
589  	atomic64_add(bo->base.size, &adev->num_bytes_moved);
590  	return 0;
591  }
592  
593  /*
594   * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
595   *
596   * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
597   */
amdgpu_ttm_io_mem_reserve(struct ttm_device * bdev,struct ttm_resource * mem)598  static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
599  				     struct ttm_resource *mem)
600  {
601  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
602  
603  	switch (mem->mem_type) {
604  	case TTM_PL_SYSTEM:
605  		/* system memory */
606  		return 0;
607  	case TTM_PL_TT:
608  	case AMDGPU_PL_PREEMPT:
609  		break;
610  	case TTM_PL_VRAM:
611  		mem->bus.offset = mem->start << PAGE_SHIFT;
612  
613  		if (adev->mman.aper_base_kaddr &&
614  		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
615  			mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
616  					mem->bus.offset;
617  
618  		mem->bus.offset += adev->gmc.aper_base;
619  		mem->bus.is_iomem = true;
620  		break;
621  	case AMDGPU_PL_DOORBELL:
622  		mem->bus.offset = mem->start << PAGE_SHIFT;
623  		mem->bus.offset += adev->doorbell.base;
624  		mem->bus.is_iomem = true;
625  		mem->bus.caching = ttm_uncached;
626  		break;
627  	default:
628  		return -EINVAL;
629  	}
630  	return 0;
631  }
632  
amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object * bo,unsigned long page_offset)633  static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
634  					   unsigned long page_offset)
635  {
636  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
637  	struct amdgpu_res_cursor cursor;
638  
639  	amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
640  			 &cursor);
641  
642  	if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
643  		return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
644  
645  	return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
646  }
647  
648  /**
649   * amdgpu_ttm_domain_start - Returns GPU start address
650   * @adev: amdgpu device object
651   * @type: type of the memory
652   *
653   * Returns:
654   * GPU start address of a memory domain
655   */
656  
amdgpu_ttm_domain_start(struct amdgpu_device * adev,uint32_t type)657  uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
658  {
659  	switch (type) {
660  	case TTM_PL_TT:
661  		return adev->gmc.gart_start;
662  	case TTM_PL_VRAM:
663  		return adev->gmc.vram_start;
664  	}
665  
666  	return 0;
667  }
668  
669  /*
670   * TTM backend functions.
671   */
672  struct amdgpu_ttm_tt {
673  	struct ttm_tt	ttm;
674  	struct drm_gem_object	*gobj;
675  	u64			offset;
676  	uint64_t		userptr;
677  	struct task_struct	*usertask;
678  	uint32_t		userflags;
679  	bool			bound;
680  	int32_t			pool_id;
681  };
682  
683  #define ttm_to_amdgpu_ttm_tt(ptr)	container_of(ptr, struct amdgpu_ttm_tt, ttm)
684  
685  #ifdef CONFIG_DRM_AMDGPU_USERPTR
686  /*
687   * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
688   * memory and start HMM tracking CPU page table update
689   *
690   * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
691   * once afterwards to stop HMM tracking
692   */
amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo * bo,struct page ** pages,struct hmm_range ** range)693  int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
694  				 struct hmm_range **range)
695  {
696  	struct ttm_tt *ttm = bo->tbo.ttm;
697  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
698  	unsigned long start = gtt->userptr;
699  	struct vm_area_struct *vma;
700  	struct mm_struct *mm;
701  	bool readonly;
702  	int r = 0;
703  
704  	/* Make sure get_user_pages_done() can cleanup gracefully */
705  	*range = NULL;
706  
707  	mm = bo->notifier.mm;
708  	if (unlikely(!mm)) {
709  		DRM_DEBUG_DRIVER("BO is not registered?\n");
710  		return -EFAULT;
711  	}
712  
713  	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
714  		return -ESRCH;
715  
716  	mmap_read_lock(mm);
717  	vma = vma_lookup(mm, start);
718  	if (unlikely(!vma)) {
719  		r = -EFAULT;
720  		goto out_unlock;
721  	}
722  	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
723  		vma->vm_file)) {
724  		r = -EPERM;
725  		goto out_unlock;
726  	}
727  
728  	readonly = amdgpu_ttm_tt_is_readonly(ttm);
729  	r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
730  				       readonly, NULL, pages, range);
731  out_unlock:
732  	mmap_read_unlock(mm);
733  	if (r)
734  		pr_debug("failed %d to get user pages 0x%lx\n", r, start);
735  
736  	mmput(mm);
737  
738  	return r;
739  }
740  
741  /* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations
742   */
amdgpu_ttm_tt_discard_user_pages(struct ttm_tt * ttm,struct hmm_range * range)743  void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
744  				      struct hmm_range *range)
745  {
746  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
747  
748  	if (gtt && gtt->userptr && range)
749  		amdgpu_hmm_range_get_pages_done(range);
750  }
751  
752  /*
753   * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
754   * Check if the pages backing this ttm range have been invalidated
755   *
756   * Returns: true if pages are still valid
757   */
amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt * ttm,struct hmm_range * range)758  bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
759  				       struct hmm_range *range)
760  {
761  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
762  
763  	if (!gtt || !gtt->userptr || !range)
764  		return false;
765  
766  	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
767  		gtt->userptr, ttm->num_pages);
768  
769  	WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
770  
771  	return !amdgpu_hmm_range_get_pages_done(range);
772  }
773  #endif
774  
775  /*
776   * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
777   *
778   * Called by amdgpu_cs_list_validate(). This creates the page list
779   * that backs user memory and will ultimately be mapped into the device
780   * address space.
781   */
amdgpu_ttm_tt_set_user_pages(struct ttm_tt * ttm,struct page ** pages)782  void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
783  {
784  	unsigned long i;
785  
786  	for (i = 0; i < ttm->num_pages; ++i)
787  		ttm->pages[i] = pages ? pages[i] : NULL;
788  }
789  
790  /*
791   * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
792   *
793   * Called by amdgpu_ttm_backend_bind()
794   **/
amdgpu_ttm_tt_pin_userptr(struct ttm_device * bdev,struct ttm_tt * ttm)795  static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
796  				     struct ttm_tt *ttm)
797  {
798  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
799  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
800  	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
801  	enum dma_data_direction direction = write ?
802  		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
803  	int r;
804  
805  	/* Allocate an SG array and squash pages into it */
806  	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
807  				      (u64)ttm->num_pages << PAGE_SHIFT,
808  				      GFP_KERNEL);
809  	if (r)
810  		goto release_sg;
811  
812  	/* Map SG to device */
813  	r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
814  	if (r)
815  		goto release_sg;
816  
817  	/* convert SG to linear array of pages and dma addresses */
818  	drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
819  				       ttm->num_pages);
820  
821  	return 0;
822  
823  release_sg:
824  	kfree(ttm->sg);
825  	ttm->sg = NULL;
826  	return r;
827  }
828  
829  /*
830   * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
831   */
amdgpu_ttm_tt_unpin_userptr(struct ttm_device * bdev,struct ttm_tt * ttm)832  static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
833  					struct ttm_tt *ttm)
834  {
835  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
836  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
837  	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
838  	enum dma_data_direction direction = write ?
839  		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
840  
841  	/* double check that we don't free the table twice */
842  	if (!ttm->sg || !ttm->sg->sgl)
843  		return;
844  
845  	/* unmap the pages mapped to the device */
846  	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
847  	sg_free_table(ttm->sg);
848  }
849  
850  /*
851   * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
852   * MQDn+CtrlStackn where n is the number of XCCs per partition.
853   * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
854   * and uses memory type default, UC. The rest of pages_per_xcc are
855   * Ctrl stack and modify their memory type to NC.
856   */
amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device * adev,struct ttm_tt * ttm,uint64_t flags)857  static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
858  				struct ttm_tt *ttm, uint64_t flags)
859  {
860  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
861  	uint64_t total_pages = ttm->num_pages;
862  	int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
863  	uint64_t page_idx, pages_per_xcc;
864  	int i;
865  	uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
866  
867  	pages_per_xcc = total_pages;
868  	do_div(pages_per_xcc, num_xcc);
869  
870  	for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
871  		/* MQD page: use default flags */
872  		amdgpu_gart_bind(adev,
873  				gtt->offset + (page_idx << PAGE_SHIFT),
874  				1, &gtt->ttm.dma_address[page_idx], flags);
875  		/*
876  		 * Ctrl pages - modify the memory type to NC (ctrl_flags) from
877  		 * the second page of the BO onward.
878  		 */
879  		amdgpu_gart_bind(adev,
880  				gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
881  				pages_per_xcc - 1,
882  				&gtt->ttm.dma_address[page_idx + 1],
883  				ctrl_flags);
884  	}
885  }
886  
amdgpu_ttm_gart_bind(struct amdgpu_device * adev,struct ttm_buffer_object * tbo,uint64_t flags)887  static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
888  				 struct ttm_buffer_object *tbo,
889  				 uint64_t flags)
890  {
891  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
892  	struct ttm_tt *ttm = tbo->ttm;
893  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
894  
895  	if (amdgpu_bo_encrypted(abo))
896  		flags |= AMDGPU_PTE_TMZ;
897  
898  	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
899  		amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
900  	} else {
901  		amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
902  				 gtt->ttm.dma_address, flags);
903  	}
904  	gtt->bound = true;
905  }
906  
907  /*
908   * amdgpu_ttm_backend_bind - Bind GTT memory
909   *
910   * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
911   * This handles binding GTT memory to the device address space.
912   */
amdgpu_ttm_backend_bind(struct ttm_device * bdev,struct ttm_tt * ttm,struct ttm_resource * bo_mem)913  static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
914  				   struct ttm_tt *ttm,
915  				   struct ttm_resource *bo_mem)
916  {
917  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
918  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
919  	uint64_t flags;
920  	int r;
921  
922  	if (!bo_mem)
923  		return -EINVAL;
924  
925  	if (gtt->bound)
926  		return 0;
927  
928  	if (gtt->userptr) {
929  		r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
930  		if (r) {
931  			DRM_ERROR("failed to pin userptr\n");
932  			return r;
933  		}
934  	} else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
935  		if (!ttm->sg) {
936  			struct dma_buf_attachment *attach;
937  			struct sg_table *sgt;
938  
939  			attach = gtt->gobj->import_attach;
940  			sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
941  			if (IS_ERR(sgt))
942  				return PTR_ERR(sgt);
943  
944  			ttm->sg = sgt;
945  		}
946  
947  		drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
948  					       ttm->num_pages);
949  	}
950  
951  	if (!ttm->num_pages) {
952  		WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
953  		     ttm->num_pages, bo_mem, ttm);
954  	}
955  
956  	if (bo_mem->mem_type != TTM_PL_TT ||
957  	    !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
958  		gtt->offset = AMDGPU_BO_INVALID_OFFSET;
959  		return 0;
960  	}
961  
962  	/* compute PTE flags relevant to this BO memory */
963  	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
964  
965  	/* bind pages into GART page tables */
966  	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
967  	amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
968  			 gtt->ttm.dma_address, flags);
969  	gtt->bound = true;
970  	return 0;
971  }
972  
973  /*
974   * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
975   * through AGP or GART aperture.
976   *
977   * If bo is accessible through AGP aperture, then use AGP aperture
978   * to access bo; otherwise allocate logical space in GART aperture
979   * and map bo to GART aperture.
980   */
amdgpu_ttm_alloc_gart(struct ttm_buffer_object * bo)981  int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
982  {
983  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
984  	struct ttm_operation_ctx ctx = { false, false };
985  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
986  	struct ttm_placement placement;
987  	struct ttm_place placements;
988  	struct ttm_resource *tmp;
989  	uint64_t addr, flags;
990  	int r;
991  
992  	if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
993  		return 0;
994  
995  	addr = amdgpu_gmc_agp_addr(bo);
996  	if (addr != AMDGPU_BO_INVALID_OFFSET)
997  		return 0;
998  
999  	/* allocate GART space */
1000  	placement.num_placement = 1;
1001  	placement.placement = &placements;
1002  	placements.fpfn = 0;
1003  	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
1004  	placements.mem_type = TTM_PL_TT;
1005  	placements.flags = bo->resource->placement;
1006  
1007  	r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
1008  	if (unlikely(r))
1009  		return r;
1010  
1011  	/* compute PTE flags for this buffer object */
1012  	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
1013  
1014  	/* Bind pages */
1015  	gtt->offset = (u64)tmp->start << PAGE_SHIFT;
1016  	amdgpu_ttm_gart_bind(adev, bo, flags);
1017  	amdgpu_gart_invalidate_tlb(adev);
1018  	ttm_resource_free(bo, &bo->resource);
1019  	ttm_bo_assign_mem(bo, tmp);
1020  
1021  	return 0;
1022  }
1023  
1024  /*
1025   * amdgpu_ttm_recover_gart - Rebind GTT pages
1026   *
1027   * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
1028   * rebind GTT pages during a GPU reset.
1029   */
amdgpu_ttm_recover_gart(struct ttm_buffer_object * tbo)1030  void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
1031  {
1032  	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
1033  	uint64_t flags;
1034  
1035  	if (!tbo->ttm)
1036  		return;
1037  
1038  	flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
1039  	amdgpu_ttm_gart_bind(adev, tbo, flags);
1040  }
1041  
1042  /*
1043   * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
1044   *
1045   * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
1046   * ttm_tt_destroy().
1047   */
amdgpu_ttm_backend_unbind(struct ttm_device * bdev,struct ttm_tt * ttm)1048  static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
1049  				      struct ttm_tt *ttm)
1050  {
1051  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
1052  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1053  
1054  	/* if the pages have userptr pinning then clear that first */
1055  	if (gtt->userptr) {
1056  		amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
1057  	} else if (ttm->sg && gtt->gobj->import_attach) {
1058  		struct dma_buf_attachment *attach;
1059  
1060  		attach = gtt->gobj->import_attach;
1061  		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
1062  		ttm->sg = NULL;
1063  	}
1064  
1065  	if (!gtt->bound)
1066  		return;
1067  
1068  	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1069  		return;
1070  
1071  	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
1072  	amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
1073  	gtt->bound = false;
1074  }
1075  
amdgpu_ttm_backend_destroy(struct ttm_device * bdev,struct ttm_tt * ttm)1076  static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
1077  				       struct ttm_tt *ttm)
1078  {
1079  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1080  
1081  	if (gtt->usertask)
1082  		put_task_struct(gtt->usertask);
1083  
1084  	ttm_tt_fini(&gtt->ttm);
1085  	kfree(gtt);
1086  }
1087  
1088  /**
1089   * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
1090   *
1091   * @bo: The buffer object to create a GTT ttm_tt object around
1092   * @page_flags: Page flags to be added to the ttm_tt object
1093   *
1094   * Called by ttm_tt_create().
1095   */
amdgpu_ttm_tt_create(struct ttm_buffer_object * bo,uint32_t page_flags)1096  static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
1097  					   uint32_t page_flags)
1098  {
1099  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
1100  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1101  	struct amdgpu_ttm_tt *gtt;
1102  	enum ttm_caching caching;
1103  
1104  	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
1105  	if (!gtt)
1106  		return NULL;
1107  
1108  	gtt->gobj = &bo->base;
1109  	if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
1110  		gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
1111  	else
1112  		gtt->pool_id = abo->xcp_id;
1113  
1114  	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
1115  		caching = ttm_write_combined;
1116  	else
1117  		caching = ttm_cached;
1118  
1119  	/* allocate space for the uninitialized page entries */
1120  	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
1121  		kfree(gtt);
1122  		return NULL;
1123  	}
1124  	return &gtt->ttm;
1125  }
1126  
1127  /*
1128   * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
1129   *
1130   * Map the pages of a ttm_tt object to an address space visible
1131   * to the underlying device.
1132   */
amdgpu_ttm_tt_populate(struct ttm_device * bdev,struct ttm_tt * ttm,struct ttm_operation_ctx * ctx)1133  static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
1134  				  struct ttm_tt *ttm,
1135  				  struct ttm_operation_ctx *ctx)
1136  {
1137  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
1138  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1139  	struct ttm_pool *pool;
1140  	pgoff_t i;
1141  	int ret;
1142  
1143  	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
1144  	if (gtt->userptr) {
1145  		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1146  		if (!ttm->sg)
1147  			return -ENOMEM;
1148  		return 0;
1149  	}
1150  
1151  	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
1152  		return 0;
1153  
1154  	if (adev->mman.ttm_pools && gtt->pool_id >= 0)
1155  		pool = &adev->mman.ttm_pools[gtt->pool_id];
1156  	else
1157  		pool = &adev->mman.bdev.pool;
1158  	ret = ttm_pool_alloc(pool, ttm, ctx);
1159  	if (ret)
1160  		return ret;
1161  
1162  	for (i = 0; i < ttm->num_pages; ++i)
1163  		ttm->pages[i]->mapping = bdev->dev_mapping;
1164  
1165  	return 0;
1166  }
1167  
1168  /*
1169   * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
1170   *
1171   * Unmaps pages of a ttm_tt object from the device address space and
1172   * unpopulates the page array backing it.
1173   */
amdgpu_ttm_tt_unpopulate(struct ttm_device * bdev,struct ttm_tt * ttm)1174  static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
1175  				     struct ttm_tt *ttm)
1176  {
1177  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1178  	struct amdgpu_device *adev;
1179  	struct ttm_pool *pool;
1180  	pgoff_t i;
1181  
1182  	amdgpu_ttm_backend_unbind(bdev, ttm);
1183  
1184  	if (gtt->userptr) {
1185  		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
1186  		kfree(ttm->sg);
1187  		ttm->sg = NULL;
1188  		return;
1189  	}
1190  
1191  	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
1192  		return;
1193  
1194  	for (i = 0; i < ttm->num_pages; ++i)
1195  		ttm->pages[i]->mapping = NULL;
1196  
1197  	adev = amdgpu_ttm_adev(bdev);
1198  
1199  	if (adev->mman.ttm_pools && gtt->pool_id >= 0)
1200  		pool = &adev->mman.ttm_pools[gtt->pool_id];
1201  	else
1202  		pool = &adev->mman.bdev.pool;
1203  
1204  	return ttm_pool_free(pool, ttm);
1205  }
1206  
1207  /**
1208   * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
1209   * task
1210   *
1211   * @tbo: The ttm_buffer_object that contains the userptr
1212   * @user_addr:  The returned value
1213   */
amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object * tbo,uint64_t * user_addr)1214  int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
1215  			      uint64_t *user_addr)
1216  {
1217  	struct amdgpu_ttm_tt *gtt;
1218  
1219  	if (!tbo->ttm)
1220  		return -EINVAL;
1221  
1222  	gtt = (void *)tbo->ttm;
1223  	*user_addr = gtt->userptr;
1224  	return 0;
1225  }
1226  
1227  /**
1228   * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
1229   * task
1230   *
1231   * @bo: The ttm_buffer_object to bind this userptr to
1232   * @addr:  The address in the current tasks VM space to use
1233   * @flags: Requirements of userptr object.
1234   *
1235   * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to
1236   * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to
1237   * initialize GPU VM for a KFD process.
1238   */
amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object * bo,uint64_t addr,uint32_t flags)1239  int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
1240  			      uint64_t addr, uint32_t flags)
1241  {
1242  	struct amdgpu_ttm_tt *gtt;
1243  
1244  	if (!bo->ttm) {
1245  		/* TODO: We want a separate TTM object type for userptrs */
1246  		bo->ttm = amdgpu_ttm_tt_create(bo, 0);
1247  		if (bo->ttm == NULL)
1248  			return -ENOMEM;
1249  	}
1250  
1251  	/* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
1252  	bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
1253  
1254  	gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
1255  	gtt->userptr = addr;
1256  	gtt->userflags = flags;
1257  
1258  	if (gtt->usertask)
1259  		put_task_struct(gtt->usertask);
1260  	gtt->usertask = current->group_leader;
1261  	get_task_struct(gtt->usertask);
1262  
1263  	return 0;
1264  }
1265  
1266  /*
1267   * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
1268   */
amdgpu_ttm_tt_get_usermm(struct ttm_tt * ttm)1269  struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1270  {
1271  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1272  
1273  	if (gtt == NULL)
1274  		return NULL;
1275  
1276  	if (gtt->usertask == NULL)
1277  		return NULL;
1278  
1279  	return gtt->usertask->mm;
1280  }
1281  
1282  /*
1283   * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
1284   * address range for the current task.
1285   *
1286   */
amdgpu_ttm_tt_affect_userptr(struct ttm_tt * ttm,unsigned long start,unsigned long end,unsigned long * userptr)1287  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1288  				  unsigned long end, unsigned long *userptr)
1289  {
1290  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1291  	unsigned long size;
1292  
1293  	if (gtt == NULL || !gtt->userptr)
1294  		return false;
1295  
1296  	/* Return false if no part of the ttm_tt object lies within
1297  	 * the range
1298  	 */
1299  	size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
1300  	if (gtt->userptr > end || gtt->userptr + size <= start)
1301  		return false;
1302  
1303  	if (userptr)
1304  		*userptr = gtt->userptr;
1305  	return true;
1306  }
1307  
1308  /*
1309   * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1310   */
amdgpu_ttm_tt_is_userptr(struct ttm_tt * ttm)1311  bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1312  {
1313  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1314  
1315  	if (gtt == NULL || !gtt->userptr)
1316  		return false;
1317  
1318  	return true;
1319  }
1320  
1321  /*
1322   * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
1323   */
amdgpu_ttm_tt_is_readonly(struct ttm_tt * ttm)1324  bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1325  {
1326  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1327  
1328  	if (gtt == NULL)
1329  		return false;
1330  
1331  	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1332  }
1333  
1334  /**
1335   * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1336   *
1337   * @ttm: The ttm_tt object to compute the flags for
1338   * @mem: The memory registry backing this ttm_tt object
1339   *
1340   * Figure out the flags to use for a VM PDE (Page Directory Entry).
1341   */
amdgpu_ttm_tt_pde_flags(struct ttm_tt * ttm,struct ttm_resource * mem)1342  uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
1343  {
1344  	uint64_t flags = 0;
1345  
1346  	if (mem && mem->mem_type != TTM_PL_SYSTEM)
1347  		flags |= AMDGPU_PTE_VALID;
1348  
1349  	if (mem && (mem->mem_type == TTM_PL_TT ||
1350  		    mem->mem_type == AMDGPU_PL_DOORBELL ||
1351  		    mem->mem_type == AMDGPU_PL_PREEMPT)) {
1352  		flags |= AMDGPU_PTE_SYSTEM;
1353  
1354  		if (ttm->caching == ttm_cached)
1355  			flags |= AMDGPU_PTE_SNOOPED;
1356  	}
1357  
1358  	if (mem && mem->mem_type == TTM_PL_VRAM &&
1359  			mem->bus.caching == ttm_cached)
1360  		flags |= AMDGPU_PTE_SNOOPED;
1361  
1362  	return flags;
1363  }
1364  
1365  /**
1366   * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1367   *
1368   * @adev: amdgpu_device pointer
1369   * @ttm: The ttm_tt object to compute the flags for
1370   * @mem: The memory registry backing this ttm_tt object
1371   *
1372   * Figure out the flags to use for a VM PTE (Page Table Entry).
1373   */
amdgpu_ttm_tt_pte_flags(struct amdgpu_device * adev,struct ttm_tt * ttm,struct ttm_resource * mem)1374  uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1375  				 struct ttm_resource *mem)
1376  {
1377  	uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
1378  
1379  	flags |= adev->gart.gart_pte_flags;
1380  	flags |= AMDGPU_PTE_READABLE;
1381  
1382  	if (!amdgpu_ttm_tt_is_readonly(ttm))
1383  		flags |= AMDGPU_PTE_WRITEABLE;
1384  
1385  	return flags;
1386  }
1387  
1388  /*
1389   * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
1390   * object.
1391   *
1392   * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
1393   * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
1394   * it can find space for a new object and by ttm_bo_force_list_clean() which is
1395   * used to clean out a memory space.
1396   */
amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object * bo,const struct ttm_place * place)1397  static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1398  					    const struct ttm_place *place)
1399  {
1400  	struct dma_resv_iter resv_cursor;
1401  	struct dma_fence *f;
1402  
1403  	if (!amdgpu_bo_is_amdgpu_bo(bo))
1404  		return ttm_bo_eviction_valuable(bo, place);
1405  
1406  	/* Swapout? */
1407  	if (bo->resource->mem_type == TTM_PL_SYSTEM)
1408  		return true;
1409  
1410  	if (bo->type == ttm_bo_type_kernel &&
1411  	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
1412  		return false;
1413  
1414  	/* If bo is a KFD BO, check if the bo belongs to the current process.
1415  	 * If true, then return false as any KFD process needs all its BOs to
1416  	 * be resident to run successfully
1417  	 */
1418  	dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
1419  				DMA_RESV_USAGE_BOOKKEEP, f) {
1420  		if (amdkfd_fence_check_mm(f, current->mm) &&
1421  		    !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
1422  			return false;
1423  	}
1424  
1425  	/* Preemptible BOs don't own system resources managed by the
1426  	 * driver (pages, VRAM, GART space). They point to resources
1427  	 * owned by someone else (e.g. pageable memory in user mode
1428  	 * or a DMABuf). They are used in a preemptible context so we
1429  	 * can guarantee no deadlocks and good QoS in case of MMU
1430  	 * notifiers or DMABuf move notifiers from the resource owner.
1431  	 */
1432  	if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
1433  		return false;
1434  
1435  	if (bo->resource->mem_type == TTM_PL_TT &&
1436  	    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
1437  		return false;
1438  
1439  	return ttm_bo_eviction_valuable(bo, place);
1440  }
1441  
amdgpu_ttm_vram_mm_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)1442  static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
1443  				      void *buf, size_t size, bool write)
1444  {
1445  	while (size) {
1446  		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
1447  		uint64_t bytes = 4 - (pos & 0x3);
1448  		uint32_t shift = (pos & 0x3) * 8;
1449  		uint32_t mask = 0xffffffff << shift;
1450  		uint32_t value = 0;
1451  
1452  		if (size < bytes) {
1453  			mask &= 0xffffffff >> (bytes - size) * 8;
1454  			bytes = size;
1455  		}
1456  
1457  		if (mask != 0xffffffff) {
1458  			amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
1459  			if (write) {
1460  				value &= ~mask;
1461  				value |= (*(uint32_t *)buf << shift) & mask;
1462  				amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
1463  			} else {
1464  				value = (value & mask) >> shift;
1465  				memcpy(buf, &value, bytes);
1466  			}
1467  		} else {
1468  			amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
1469  		}
1470  
1471  		pos += bytes;
1472  		buf += bytes;
1473  		size -= bytes;
1474  	}
1475  }
1476  
amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object * bo,unsigned long offset,void * buf,int len,int write)1477  static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
1478  					unsigned long offset, void *buf,
1479  					int len, int write)
1480  {
1481  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1482  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1483  	struct amdgpu_res_cursor src_mm;
1484  	struct amdgpu_job *job;
1485  	struct dma_fence *fence;
1486  	uint64_t src_addr, dst_addr;
1487  	unsigned int num_dw;
1488  	int r, idx;
1489  
1490  	if (len != PAGE_SIZE)
1491  		return -EINVAL;
1492  
1493  	if (!adev->mman.sdma_access_ptr)
1494  		return -EACCES;
1495  
1496  	if (!drm_dev_enter(adev_to_drm(adev), &idx))
1497  		return -ENODEV;
1498  
1499  	if (write)
1500  		memcpy(adev->mman.sdma_access_ptr, buf, len);
1501  
1502  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
1503  	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
1504  				     AMDGPU_FENCE_OWNER_UNDEFINED,
1505  				     num_dw * 4, AMDGPU_IB_POOL_DELAYED,
1506  				     &job);
1507  	if (r)
1508  		goto out;
1509  
1510  	amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
1511  	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
1512  		src_mm.start;
1513  	dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
1514  	if (write)
1515  		swap(src_addr, dst_addr);
1516  
1517  	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
1518  				PAGE_SIZE, 0);
1519  
1520  	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
1521  	WARN_ON(job->ibs[0].length_dw > num_dw);
1522  
1523  	fence = amdgpu_job_submit(job);
1524  
1525  	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
1526  		r = -ETIMEDOUT;
1527  	dma_fence_put(fence);
1528  
1529  	if (!(r || write))
1530  		memcpy(buf, adev->mman.sdma_access_ptr, len);
1531  out:
1532  	drm_dev_exit(idx);
1533  	return r;
1534  }
1535  
1536  /**
1537   * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1538   *
1539   * @bo:  The buffer object to read/write
1540   * @offset:  Offset into buffer object
1541   * @buf:  Secondary buffer to write/read from
1542   * @len: Length in bytes of access
1543   * @write:  true if writing
1544   *
1545   * This is used to access VRAM that backs a buffer object via MMIO
1546   * access for debugging purposes.
1547   */
amdgpu_ttm_access_memory(struct ttm_buffer_object * bo,unsigned long offset,void * buf,int len,int write)1548  static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1549  				    unsigned long offset, void *buf, int len,
1550  				    int write)
1551  {
1552  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1553  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1554  	struct amdgpu_res_cursor cursor;
1555  	int ret = 0;
1556  
1557  	if (bo->resource->mem_type != TTM_PL_VRAM)
1558  		return -EIO;
1559  
1560  	if (amdgpu_device_has_timeouts_enabled(adev) &&
1561  			!amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
1562  		return len;
1563  
1564  	amdgpu_res_first(bo->resource, offset, len, &cursor);
1565  	while (cursor.remaining) {
1566  		size_t count, size = cursor.size;
1567  		loff_t pos = cursor.start;
1568  
1569  		count = amdgpu_device_aper_access(adev, pos, buf, size, write);
1570  		size -= count;
1571  		if (size) {
1572  			/* using MM to access rest vram and handle un-aligned address */
1573  			pos += count;
1574  			buf += count;
1575  			amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
1576  		}
1577  
1578  		ret += cursor.size;
1579  		buf += cursor.size;
1580  		amdgpu_res_next(&cursor, cursor.size);
1581  	}
1582  
1583  	return ret;
1584  }
1585  
1586  static void
amdgpu_bo_delete_mem_notify(struct ttm_buffer_object * bo)1587  amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
1588  {
1589  	amdgpu_bo_move_notify(bo, false, NULL);
1590  }
1591  
1592  static struct ttm_device_funcs amdgpu_bo_driver = {
1593  	.ttm_tt_create = &amdgpu_ttm_tt_create,
1594  	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
1595  	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1596  	.ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
1597  	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
1598  	.evict_flags = &amdgpu_evict_flags,
1599  	.move = &amdgpu_bo_move,
1600  	.delete_mem_notify = &amdgpu_bo_delete_mem_notify,
1601  	.release_notify = &amdgpu_bo_release_notify,
1602  	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1603  	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1604  	.access_memory = &amdgpu_ttm_access_memory,
1605  };
1606  
1607  /*
1608   * Firmware Reservation functions
1609   */
1610  /**
1611   * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1612   *
1613   * @adev: amdgpu_device pointer
1614   *
1615   * free fw reserved vram if it has been reserved.
1616   */
amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device * adev)1617  static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1618  {
1619  	amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
1620  		NULL, &adev->mman.fw_vram_usage_va);
1621  }
1622  
1623  /*
1624   * Driver Reservation functions
1625   */
1626  /**
1627   * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
1628   *
1629   * @adev: amdgpu_device pointer
1630   *
1631   * free drv reserved vram if it has been reserved.
1632   */
amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device * adev)1633  static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
1634  {
1635  	amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
1636  						  NULL,
1637  						  &adev->mman.drv_vram_usage_va);
1638  }
1639  
1640  /**
1641   * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1642   *
1643   * @adev: amdgpu_device pointer
1644   *
1645   * create bo vram reservation from fw.
1646   */
amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device * adev)1647  static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1648  {
1649  	uint64_t vram_size = adev->gmc.visible_vram_size;
1650  
1651  	adev->mman.fw_vram_usage_va = NULL;
1652  	adev->mman.fw_vram_usage_reserved_bo = NULL;
1653  
1654  	if (adev->mman.fw_vram_usage_size == 0 ||
1655  	    adev->mman.fw_vram_usage_size > vram_size)
1656  		return 0;
1657  
1658  	return amdgpu_bo_create_kernel_at(adev,
1659  					  adev->mman.fw_vram_usage_start_offset,
1660  					  adev->mman.fw_vram_usage_size,
1661  					  &adev->mman.fw_vram_usage_reserved_bo,
1662  					  &adev->mman.fw_vram_usage_va);
1663  }
1664  
1665  /**
1666   * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
1667   *
1668   * @adev: amdgpu_device pointer
1669   *
1670   * create bo vram reservation from drv.
1671   */
amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device * adev)1672  static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
1673  {
1674  	u64 vram_size = adev->gmc.visible_vram_size;
1675  
1676  	adev->mman.drv_vram_usage_va = NULL;
1677  	adev->mman.drv_vram_usage_reserved_bo = NULL;
1678  
1679  	if (adev->mman.drv_vram_usage_size == 0 ||
1680  	    adev->mman.drv_vram_usage_size > vram_size)
1681  		return 0;
1682  
1683  	return amdgpu_bo_create_kernel_at(adev,
1684  					  adev->mman.drv_vram_usage_start_offset,
1685  					  adev->mman.drv_vram_usage_size,
1686  					  &adev->mman.drv_vram_usage_reserved_bo,
1687  					  &adev->mman.drv_vram_usage_va);
1688  }
1689  
1690  /*
1691   * Memoy training reservation functions
1692   */
1693  
1694  /**
1695   * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
1696   *
1697   * @adev: amdgpu_device pointer
1698   *
1699   * free memory training reserved vram if it has been reserved.
1700   */
amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device * adev)1701  static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
1702  {
1703  	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1704  
1705  	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
1706  	amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
1707  	ctx->c2p_bo = NULL;
1708  
1709  	return 0;
1710  }
1711  
amdgpu_ttm_training_data_block_init(struct amdgpu_device * adev,uint32_t reserve_size)1712  static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
1713  						uint32_t reserve_size)
1714  {
1715  	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1716  
1717  	memset(ctx, 0, sizeof(*ctx));
1718  
1719  	ctx->c2p_train_data_offset =
1720  		ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
1721  	ctx->p2c_train_data_offset =
1722  		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
1723  	ctx->train_data_size =
1724  		GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1725  
1726  	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
1727  			ctx->train_data_size,
1728  			ctx->p2c_train_data_offset,
1729  			ctx->c2p_train_data_offset);
1730  }
1731  
1732  /*
1733   * reserve TMR memory at the top of VRAM which holds
1734   * IP Discovery data and is protected by PSP.
1735   */
amdgpu_ttm_reserve_tmr(struct amdgpu_device * adev)1736  static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
1737  {
1738  	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1739  	bool mem_train_support = false;
1740  	uint32_t reserve_size = 0;
1741  	int ret;
1742  
1743  	if (adev->bios && !amdgpu_sriov_vf(adev)) {
1744  		if (amdgpu_atomfirmware_mem_training_supported(adev))
1745  			mem_train_support = true;
1746  		else
1747  			DRM_DEBUG("memory training does not support!\n");
1748  	}
1749  
1750  	/*
1751  	 * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
1752  	 * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
1753  	 *
1754  	 * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
1755  	 * discovery data and G6 memory training data respectively
1756  	 */
1757  	if (adev->bios)
1758  		reserve_size =
1759  			amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
1760  
1761  	if (!adev->bios &&
1762  	    (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1763  	     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)))
1764  		reserve_size = max(reserve_size, (uint32_t)280 << 20);
1765  	else if (!reserve_size)
1766  		reserve_size = DISCOVERY_TMR_OFFSET;
1767  
1768  	if (mem_train_support) {
1769  		/* reserve vram for mem train according to TMR location */
1770  		amdgpu_ttm_training_data_block_init(adev, reserve_size);
1771  		ret = amdgpu_bo_create_kernel_at(adev,
1772  						 ctx->c2p_train_data_offset,
1773  						 ctx->train_data_size,
1774  						 &ctx->c2p_bo,
1775  						 NULL);
1776  		if (ret) {
1777  			DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
1778  			amdgpu_ttm_training_reserve_vram_fini(adev);
1779  			return ret;
1780  		}
1781  		ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1782  	}
1783  
1784  	if (!adev->gmc.is_app_apu) {
1785  		ret = amdgpu_bo_create_kernel_at(
1786  			adev, adev->gmc.real_vram_size - reserve_size,
1787  			reserve_size, &adev->mman.fw_reserved_memory, NULL);
1788  		if (ret) {
1789  			DRM_ERROR("alloc tmr failed(%d)!\n", ret);
1790  			amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
1791  					      NULL, NULL);
1792  			return ret;
1793  		}
1794  	} else {
1795  		DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
1796  	}
1797  
1798  	return 0;
1799  }
1800  
amdgpu_ttm_pools_init(struct amdgpu_device * adev)1801  static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
1802  {
1803  	int i;
1804  
1805  	if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
1806  		return 0;
1807  
1808  	adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
1809  				       sizeof(*adev->mman.ttm_pools),
1810  				       GFP_KERNEL);
1811  	if (!adev->mman.ttm_pools)
1812  		return -ENOMEM;
1813  
1814  	for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
1815  		ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
1816  			      adev->gmc.mem_partitions[i].numa.node,
1817  			      false, false);
1818  	}
1819  	return 0;
1820  }
1821  
amdgpu_ttm_pools_fini(struct amdgpu_device * adev)1822  static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
1823  {
1824  	int i;
1825  
1826  	if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
1827  		return;
1828  
1829  	for (i = 0; i < adev->gmc.num_mem_partitions; i++)
1830  		ttm_pool_fini(&adev->mman.ttm_pools[i]);
1831  
1832  	kfree(adev->mman.ttm_pools);
1833  	adev->mman.ttm_pools = NULL;
1834  }
1835  
1836  /*
1837   * amdgpu_ttm_init - Init the memory management (ttm) as well as various
1838   * gtt/vram related fields.
1839   *
1840   * This initializes all of the memory space pools that the TTM layer
1841   * will need such as the GTT space (system memory mapped to the device),
1842   * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
1843   * can be mapped per VMID.
1844   */
amdgpu_ttm_init(struct amdgpu_device * adev)1845  int amdgpu_ttm_init(struct amdgpu_device *adev)
1846  {
1847  	uint64_t gtt_size;
1848  	int r;
1849  
1850  	mutex_init(&adev->mman.gtt_window_lock);
1851  
1852  	/* No others user of address space so set it to 0 */
1853  	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
1854  			       adev_to_drm(adev)->anon_inode->i_mapping,
1855  			       adev_to_drm(adev)->vma_offset_manager,
1856  			       adev->need_swiotlb,
1857  			       dma_addressing_limited(adev->dev));
1858  	if (r) {
1859  		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1860  		return r;
1861  	}
1862  
1863  	r = amdgpu_ttm_pools_init(adev);
1864  	if (r) {
1865  		DRM_ERROR("failed to init ttm pools(%d).\n", r);
1866  		return r;
1867  	}
1868  	adev->mman.initialized = true;
1869  
1870  	/* Initialize VRAM pool with all of VRAM divided into pages */
1871  	r = amdgpu_vram_mgr_init(adev);
1872  	if (r) {
1873  		DRM_ERROR("Failed initializing VRAM heap.\n");
1874  		return r;
1875  	}
1876  
1877  	/* Change the size here instead of the init above so only lpfn is affected */
1878  	amdgpu_ttm_set_buffer_funcs_status(adev, false);
1879  #ifdef CONFIG_64BIT
1880  #ifdef CONFIG_X86
1881  	if (adev->gmc.xgmi.connected_to_cpu)
1882  		adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
1883  				adev->gmc.visible_vram_size);
1884  
1885  	else if (adev->gmc.is_app_apu)
1886  		DRM_DEBUG_DRIVER(
1887  			"No need to ioremap when real vram size is 0\n");
1888  	else
1889  #endif
1890  		adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
1891  				adev->gmc.visible_vram_size);
1892  #endif
1893  
1894  	/*
1895  	 *The reserved vram for firmware must be pinned to the specified
1896  	 *place on the VRAM, so reserve it early.
1897  	 */
1898  	r = amdgpu_ttm_fw_reserve_vram_init(adev);
1899  	if (r)
1900  		return r;
1901  
1902  	/*
1903  	 *The reserved vram for driver must be pinned to the specified
1904  	 *place on the VRAM, so reserve it early.
1905  	 */
1906  	r = amdgpu_ttm_drv_reserve_vram_init(adev);
1907  	if (r)
1908  		return r;
1909  
1910  	/*
1911  	 * only NAVI10 and onwards ASIC support for IP discovery.
1912  	 * If IP discovery enabled, a block of memory should be
1913  	 * reserved for IP discovey.
1914  	 */
1915  	if (adev->mman.discovery_bin) {
1916  		r = amdgpu_ttm_reserve_tmr(adev);
1917  		if (r)
1918  			return r;
1919  	}
1920  
1921  	/* allocate memory as required for VGA
1922  	 * This is used for VGA emulation and pre-OS scanout buffers to
1923  	 * avoid display artifacts while transitioning between pre-OS
1924  	 * and driver.
1925  	 */
1926  	if (!adev->gmc.is_app_apu) {
1927  		r = amdgpu_bo_create_kernel_at(adev, 0,
1928  					       adev->mman.stolen_vga_size,
1929  					       &adev->mman.stolen_vga_memory,
1930  					       NULL);
1931  		if (r)
1932  			return r;
1933  
1934  		r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
1935  					       adev->mman.stolen_extended_size,
1936  					       &adev->mman.stolen_extended_memory,
1937  					       NULL);
1938  
1939  		if (r)
1940  			return r;
1941  
1942  		r = amdgpu_bo_create_kernel_at(adev,
1943  					       adev->mman.stolen_reserved_offset,
1944  					       adev->mman.stolen_reserved_size,
1945  					       &adev->mman.stolen_reserved_memory,
1946  					       NULL);
1947  		if (r)
1948  			return r;
1949  	} else {
1950  		DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
1951  	}
1952  
1953  	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1954  		 (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
1955  
1956  	/* Compute GTT size, either based on TTM limit
1957  	 * or whatever the user passed on module init.
1958  	 */
1959  	if (amdgpu_gtt_size == -1)
1960  		gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
1961  	else
1962  		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1963  
1964  	/* Initialize GTT memory pool */
1965  	r = amdgpu_gtt_mgr_init(adev, gtt_size);
1966  	if (r) {
1967  		DRM_ERROR("Failed initializing GTT heap.\n");
1968  		return r;
1969  	}
1970  	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1971  		 (unsigned int)(gtt_size / (1024 * 1024)));
1972  
1973  	/* Initialize doorbell pool on PCI BAR */
1974  	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
1975  	if (r) {
1976  		DRM_ERROR("Failed initializing doorbell heap.\n");
1977  		return r;
1978  	}
1979  
1980  	/* Create a boorbell page for kernel usages */
1981  	r = amdgpu_doorbell_create_kernel_doorbells(adev);
1982  	if (r) {
1983  		DRM_ERROR("Failed to initialize kernel doorbells.\n");
1984  		return r;
1985  	}
1986  
1987  	/* Initialize preemptible memory pool */
1988  	r = amdgpu_preempt_mgr_init(adev);
1989  	if (r) {
1990  		DRM_ERROR("Failed initializing PREEMPT heap.\n");
1991  		return r;
1992  	}
1993  
1994  	/* Initialize various on-chip memory pools */
1995  	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
1996  	if (r) {
1997  		DRM_ERROR("Failed initializing GDS heap.\n");
1998  		return r;
1999  	}
2000  
2001  	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
2002  	if (r) {
2003  		DRM_ERROR("Failed initializing gws heap.\n");
2004  		return r;
2005  	}
2006  
2007  	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
2008  	if (r) {
2009  		DRM_ERROR("Failed initializing oa heap.\n");
2010  		return r;
2011  	}
2012  	if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
2013  				AMDGPU_GEM_DOMAIN_GTT,
2014  				&adev->mman.sdma_access_bo, NULL,
2015  				&adev->mman.sdma_access_ptr))
2016  		DRM_WARN("Debug VRAM access will use slowpath MM access\n");
2017  
2018  	return 0;
2019  }
2020  
2021  /*
2022   * amdgpu_ttm_fini - De-initialize the TTM memory pools
2023   */
amdgpu_ttm_fini(struct amdgpu_device * adev)2024  void amdgpu_ttm_fini(struct amdgpu_device *adev)
2025  {
2026  	int idx;
2027  
2028  	if (!adev->mman.initialized)
2029  		return;
2030  
2031  	amdgpu_ttm_pools_fini(adev);
2032  
2033  	amdgpu_ttm_training_reserve_vram_fini(adev);
2034  	/* return the stolen vga memory back to VRAM */
2035  	if (!adev->gmc.is_app_apu) {
2036  		amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
2037  		amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
2038  		/* return the FW reserved memory back to VRAM */
2039  		amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
2040  				      NULL);
2041  		if (adev->mman.stolen_reserved_size)
2042  			amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
2043  					      NULL, NULL);
2044  	}
2045  	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
2046  					&adev->mman.sdma_access_ptr);
2047  	amdgpu_ttm_fw_reserve_vram_fini(adev);
2048  	amdgpu_ttm_drv_reserve_vram_fini(adev);
2049  
2050  	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
2051  
2052  		if (adev->mman.aper_base_kaddr)
2053  			iounmap(adev->mman.aper_base_kaddr);
2054  		adev->mman.aper_base_kaddr = NULL;
2055  
2056  		drm_dev_exit(idx);
2057  	}
2058  
2059  	amdgpu_vram_mgr_fini(adev);
2060  	amdgpu_gtt_mgr_fini(adev);
2061  	amdgpu_preempt_mgr_fini(adev);
2062  	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
2063  	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
2064  	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
2065  	ttm_device_fini(&adev->mman.bdev);
2066  	adev->mman.initialized = false;
2067  	DRM_INFO("amdgpu: ttm finalized\n");
2068  }
2069  
2070  /**
2071   * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
2072   *
2073   * @adev: amdgpu_device pointer
2074   * @enable: true when we can use buffer functions.
2075   *
2076   * Enable/disable use of buffer functions during suspend/resume. This should
2077   * only be called at bootup or when userspace isn't running.
2078   */
amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device * adev,bool enable)2079  void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
2080  {
2081  	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
2082  	uint64_t size;
2083  	int r;
2084  
2085  	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
2086  	    adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
2087  		return;
2088  
2089  	if (enable) {
2090  		struct amdgpu_ring *ring;
2091  		struct drm_gpu_scheduler *sched;
2092  
2093  		ring = adev->mman.buffer_funcs_ring;
2094  		sched = &ring->sched;
2095  		r = drm_sched_entity_init(&adev->mman.high_pr,
2096  					  DRM_SCHED_PRIORITY_KERNEL, &sched,
2097  					  1, NULL);
2098  		if (r) {
2099  			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
2100  				  r);
2101  			return;
2102  		}
2103  
2104  		r = drm_sched_entity_init(&adev->mman.low_pr,
2105  					  DRM_SCHED_PRIORITY_NORMAL, &sched,
2106  					  1, NULL);
2107  		if (r) {
2108  			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
2109  				  r);
2110  			goto error_free_entity;
2111  		}
2112  	} else {
2113  		drm_sched_entity_destroy(&adev->mman.high_pr);
2114  		drm_sched_entity_destroy(&adev->mman.low_pr);
2115  		dma_fence_put(man->move);
2116  		man->move = NULL;
2117  	}
2118  
2119  	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
2120  	if (enable)
2121  		size = adev->gmc.real_vram_size;
2122  	else
2123  		size = adev->gmc.visible_vram_size;
2124  	man->size = size;
2125  	adev->mman.buffer_funcs_enabled = enable;
2126  
2127  	return;
2128  
2129  error_free_entity:
2130  	drm_sched_entity_destroy(&adev->mman.high_pr);
2131  }
2132  
amdgpu_ttm_prepare_job(struct amdgpu_device * adev,bool direct_submit,unsigned int num_dw,struct dma_resv * resv,bool vm_needs_flush,struct amdgpu_job ** job,bool delayed)2133  static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
2134  				  bool direct_submit,
2135  				  unsigned int num_dw,
2136  				  struct dma_resv *resv,
2137  				  bool vm_needs_flush,
2138  				  struct amdgpu_job **job,
2139  				  bool delayed)
2140  {
2141  	enum amdgpu_ib_pool_type pool = direct_submit ?
2142  		AMDGPU_IB_POOL_DIRECT :
2143  		AMDGPU_IB_POOL_DELAYED;
2144  	int r;
2145  	struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
2146  						    &adev->mman.high_pr;
2147  	r = amdgpu_job_alloc_with_ib(adev, entity,
2148  				     AMDGPU_FENCE_OWNER_UNDEFINED,
2149  				     num_dw * 4, pool, job);
2150  	if (r)
2151  		return r;
2152  
2153  	if (vm_needs_flush) {
2154  		(*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
2155  							adev->gmc.pdb0_bo :
2156  							adev->gart.bo);
2157  		(*job)->vm_needs_flush = true;
2158  	}
2159  	if (!resv)
2160  		return 0;
2161  
2162  	return drm_sched_job_add_resv_dependencies(&(*job)->base, resv,
2163  						   DMA_RESV_USAGE_BOOKKEEP);
2164  }
2165  
amdgpu_copy_buffer(struct amdgpu_ring * ring,uint64_t src_offset,uint64_t dst_offset,uint32_t byte_count,struct dma_resv * resv,struct dma_fence ** fence,bool direct_submit,bool vm_needs_flush,uint32_t copy_flags)2166  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
2167  		       uint64_t dst_offset, uint32_t byte_count,
2168  		       struct dma_resv *resv,
2169  		       struct dma_fence **fence, bool direct_submit,
2170  		       bool vm_needs_flush, uint32_t copy_flags)
2171  {
2172  	struct amdgpu_device *adev = ring->adev;
2173  	unsigned int num_loops, num_dw;
2174  	struct amdgpu_job *job;
2175  	uint32_t max_bytes;
2176  	unsigned int i;
2177  	int r;
2178  
2179  	if (!direct_submit && !ring->sched.ready) {
2180  		DRM_ERROR("Trying to move memory with ring turned off.\n");
2181  		return -EINVAL;
2182  	}
2183  
2184  	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
2185  	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
2186  	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
2187  	r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
2188  				   resv, vm_needs_flush, &job, false);
2189  	if (r)
2190  		return r;
2191  
2192  	for (i = 0; i < num_loops; i++) {
2193  		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
2194  
2195  		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
2196  					dst_offset, cur_size_in_bytes, copy_flags);
2197  		src_offset += cur_size_in_bytes;
2198  		dst_offset += cur_size_in_bytes;
2199  		byte_count -= cur_size_in_bytes;
2200  	}
2201  
2202  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2203  	WARN_ON(job->ibs[0].length_dw > num_dw);
2204  	if (direct_submit)
2205  		r = amdgpu_job_submit_direct(job, ring, fence);
2206  	else
2207  		*fence = amdgpu_job_submit(job);
2208  	if (r)
2209  		goto error_free;
2210  
2211  	return r;
2212  
2213  error_free:
2214  	amdgpu_job_free(job);
2215  	DRM_ERROR("Error scheduling IBs (%d)\n", r);
2216  	return r;
2217  }
2218  
amdgpu_ttm_fill_mem(struct amdgpu_ring * ring,uint32_t src_data,uint64_t dst_addr,uint32_t byte_count,struct dma_resv * resv,struct dma_fence ** fence,bool vm_needs_flush,bool delayed)2219  static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
2220  			       uint64_t dst_addr, uint32_t byte_count,
2221  			       struct dma_resv *resv,
2222  			       struct dma_fence **fence,
2223  			       bool vm_needs_flush, bool delayed)
2224  {
2225  	struct amdgpu_device *adev = ring->adev;
2226  	unsigned int num_loops, num_dw;
2227  	struct amdgpu_job *job;
2228  	uint32_t max_bytes;
2229  	unsigned int i;
2230  	int r;
2231  
2232  	max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
2233  	num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
2234  	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
2235  	r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
2236  				   &job, delayed);
2237  	if (r)
2238  		return r;
2239  
2240  	for (i = 0; i < num_loops; i++) {
2241  		uint32_t cur_size = min(byte_count, max_bytes);
2242  
2243  		amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
2244  					cur_size);
2245  
2246  		dst_addr += cur_size;
2247  		byte_count -= cur_size;
2248  	}
2249  
2250  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2251  	WARN_ON(job->ibs[0].length_dw > num_dw);
2252  	*fence = amdgpu_job_submit(job);
2253  	return 0;
2254  }
2255  
2256  /**
2257   * amdgpu_ttm_clear_buffer - clear memory buffers
2258   * @bo: amdgpu buffer object
2259   * @resv: reservation object
2260   * @fence: dma_fence associated with the operation
2261   *
2262   * Clear the memory buffer resource.
2263   *
2264   * Returns:
2265   * 0 for success or a negative error code on failure.
2266   */
amdgpu_ttm_clear_buffer(struct amdgpu_bo * bo,struct dma_resv * resv,struct dma_fence ** fence)2267  int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
2268  			    struct dma_resv *resv,
2269  			    struct dma_fence **fence)
2270  {
2271  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
2272  	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2273  	struct amdgpu_res_cursor cursor;
2274  	u64 addr;
2275  	int r;
2276  
2277  	if (!adev->mman.buffer_funcs_enabled)
2278  		return -EINVAL;
2279  
2280  	if (!fence)
2281  		return -EINVAL;
2282  
2283  	*fence = dma_fence_get_stub();
2284  
2285  	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
2286  
2287  	mutex_lock(&adev->mman.gtt_window_lock);
2288  	while (cursor.remaining) {
2289  		struct dma_fence *next = NULL;
2290  		u64 size;
2291  
2292  		if (amdgpu_res_cleared(&cursor)) {
2293  			amdgpu_res_next(&cursor, cursor.size);
2294  			continue;
2295  		}
2296  
2297  		/* Never clear more than 256MiB at once to avoid timeouts */
2298  		size = min(cursor.size, 256ULL << 20);
2299  
2300  		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
2301  					  1, ring, false, &size, &addr);
2302  		if (r)
2303  			goto err;
2304  
2305  		r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
2306  					&next, true, true);
2307  		if (r)
2308  			goto err;
2309  
2310  		dma_fence_put(*fence);
2311  		*fence = next;
2312  
2313  		amdgpu_res_next(&cursor, size);
2314  	}
2315  err:
2316  	mutex_unlock(&adev->mman.gtt_window_lock);
2317  
2318  	return r;
2319  }
2320  
amdgpu_fill_buffer(struct amdgpu_bo * bo,uint32_t src_data,struct dma_resv * resv,struct dma_fence ** f,bool delayed)2321  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
2322  			uint32_t src_data,
2323  			struct dma_resv *resv,
2324  			struct dma_fence **f,
2325  			bool delayed)
2326  {
2327  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
2328  	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2329  	struct dma_fence *fence = NULL;
2330  	struct amdgpu_res_cursor dst;
2331  	int r;
2332  
2333  	if (!adev->mman.buffer_funcs_enabled) {
2334  		DRM_ERROR("Trying to clear memory with ring turned off.\n");
2335  		return -EINVAL;
2336  	}
2337  
2338  	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
2339  
2340  	mutex_lock(&adev->mman.gtt_window_lock);
2341  	while (dst.remaining) {
2342  		struct dma_fence *next;
2343  		uint64_t cur_size, to;
2344  
2345  		/* Never fill more than 256MiB at once to avoid timeouts */
2346  		cur_size = min(dst.size, 256ULL << 20);
2347  
2348  		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
2349  					  1, ring, false, &cur_size, &to);
2350  		if (r)
2351  			goto error;
2352  
2353  		r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
2354  					&next, true, delayed);
2355  		if (r)
2356  			goto error;
2357  
2358  		dma_fence_put(fence);
2359  		fence = next;
2360  
2361  		amdgpu_res_next(&dst, cur_size);
2362  	}
2363  error:
2364  	mutex_unlock(&adev->mman.gtt_window_lock);
2365  	if (f)
2366  		*f = dma_fence_get(fence);
2367  	dma_fence_put(fence);
2368  	return r;
2369  }
2370  
2371  /**
2372   * amdgpu_ttm_evict_resources - evict memory buffers
2373   * @adev: amdgpu device object
2374   * @mem_type: evicted BO's memory type
2375   *
2376   * Evicts all @mem_type buffers on the lru list of the memory type.
2377   *
2378   * Returns:
2379   * 0 for success or a negative error code on failure.
2380   */
amdgpu_ttm_evict_resources(struct amdgpu_device * adev,int mem_type)2381  int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
2382  {
2383  	struct ttm_resource_manager *man;
2384  
2385  	switch (mem_type) {
2386  	case TTM_PL_VRAM:
2387  	case TTM_PL_TT:
2388  	case AMDGPU_PL_GWS:
2389  	case AMDGPU_PL_GDS:
2390  	case AMDGPU_PL_OA:
2391  		man = ttm_manager_type(&adev->mman.bdev, mem_type);
2392  		break;
2393  	default:
2394  		DRM_ERROR("Trying to evict invalid memory type\n");
2395  		return -EINVAL;
2396  	}
2397  
2398  	return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
2399  }
2400  
2401  #if defined(CONFIG_DEBUG_FS)
2402  
amdgpu_ttm_page_pool_show(struct seq_file * m,void * unused)2403  static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
2404  {
2405  	struct amdgpu_device *adev = m->private;
2406  
2407  	return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
2408  }
2409  
2410  DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
2411  
2412  /*
2413   * amdgpu_ttm_vram_read - Linear read access to VRAM
2414   *
2415   * Accesses VRAM via MMIO for debugging purposes.
2416   */
amdgpu_ttm_vram_read(struct file * f,char __user * buf,size_t size,loff_t * pos)2417  static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
2418  				    size_t size, loff_t *pos)
2419  {
2420  	struct amdgpu_device *adev = file_inode(f)->i_private;
2421  	ssize_t result = 0;
2422  
2423  	if (size & 0x3 || *pos & 0x3)
2424  		return -EINVAL;
2425  
2426  	if (*pos >= adev->gmc.mc_vram_size)
2427  		return -ENXIO;
2428  
2429  	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
2430  	while (size) {
2431  		size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
2432  		uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
2433  
2434  		amdgpu_device_vram_access(adev, *pos, value, bytes, false);
2435  		if (copy_to_user(buf, value, bytes))
2436  			return -EFAULT;
2437  
2438  		result += bytes;
2439  		buf += bytes;
2440  		*pos += bytes;
2441  		size -= bytes;
2442  	}
2443  
2444  	return result;
2445  }
2446  
2447  /*
2448   * amdgpu_ttm_vram_write - Linear write access to VRAM
2449   *
2450   * Accesses VRAM via MMIO for debugging purposes.
2451   */
amdgpu_ttm_vram_write(struct file * f,const char __user * buf,size_t size,loff_t * pos)2452  static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
2453  				    size_t size, loff_t *pos)
2454  {
2455  	struct amdgpu_device *adev = file_inode(f)->i_private;
2456  	ssize_t result = 0;
2457  	int r;
2458  
2459  	if (size & 0x3 || *pos & 0x3)
2460  		return -EINVAL;
2461  
2462  	if (*pos >= adev->gmc.mc_vram_size)
2463  		return -ENXIO;
2464  
2465  	while (size) {
2466  		uint32_t value;
2467  
2468  		if (*pos >= adev->gmc.mc_vram_size)
2469  			return result;
2470  
2471  		r = get_user(value, (uint32_t *)buf);
2472  		if (r)
2473  			return r;
2474  
2475  		amdgpu_device_mm_access(adev, *pos, &value, 4, true);
2476  
2477  		result += 4;
2478  		buf += 4;
2479  		*pos += 4;
2480  		size -= 4;
2481  	}
2482  
2483  	return result;
2484  }
2485  
2486  static const struct file_operations amdgpu_ttm_vram_fops = {
2487  	.owner = THIS_MODULE,
2488  	.read = amdgpu_ttm_vram_read,
2489  	.write = amdgpu_ttm_vram_write,
2490  	.llseek = default_llseek,
2491  };
2492  
2493  /*
2494   * amdgpu_iomem_read - Virtual read access to GPU mapped memory
2495   *
2496   * This function is used to read memory that has been mapped to the
2497   * GPU and the known addresses are not physical addresses but instead
2498   * bus addresses (e.g., what you'd put in an IB or ring buffer).
2499   */
amdgpu_iomem_read(struct file * f,char __user * buf,size_t size,loff_t * pos)2500  static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
2501  				 size_t size, loff_t *pos)
2502  {
2503  	struct amdgpu_device *adev = file_inode(f)->i_private;
2504  	struct iommu_domain *dom;
2505  	ssize_t result = 0;
2506  	int r;
2507  
2508  	/* retrieve the IOMMU domain if any for this device */
2509  	dom = iommu_get_domain_for_dev(adev->dev);
2510  
2511  	while (size) {
2512  		phys_addr_t addr = *pos & PAGE_MASK;
2513  		loff_t off = *pos & ~PAGE_MASK;
2514  		size_t bytes = PAGE_SIZE - off;
2515  		unsigned long pfn;
2516  		struct page *p;
2517  		void *ptr;
2518  
2519  		bytes = min(bytes, size);
2520  
2521  		/* Translate the bus address to a physical address.  If
2522  		 * the domain is NULL it means there is no IOMMU active
2523  		 * and the address translation is the identity
2524  		 */
2525  		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2526  
2527  		pfn = addr >> PAGE_SHIFT;
2528  		if (!pfn_valid(pfn))
2529  			return -EPERM;
2530  
2531  		p = pfn_to_page(pfn);
2532  		if (p->mapping != adev->mman.bdev.dev_mapping)
2533  			return -EPERM;
2534  
2535  		ptr = kmap_local_page(p);
2536  		r = copy_to_user(buf, ptr + off, bytes);
2537  		kunmap_local(ptr);
2538  		if (r)
2539  			return -EFAULT;
2540  
2541  		size -= bytes;
2542  		*pos += bytes;
2543  		result += bytes;
2544  	}
2545  
2546  	return result;
2547  }
2548  
2549  /*
2550   * amdgpu_iomem_write - Virtual write access to GPU mapped memory
2551   *
2552   * This function is used to write memory that has been mapped to the
2553   * GPU and the known addresses are not physical addresses but instead
2554   * bus addresses (e.g., what you'd put in an IB or ring buffer).
2555   */
amdgpu_iomem_write(struct file * f,const char __user * buf,size_t size,loff_t * pos)2556  static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
2557  				 size_t size, loff_t *pos)
2558  {
2559  	struct amdgpu_device *adev = file_inode(f)->i_private;
2560  	struct iommu_domain *dom;
2561  	ssize_t result = 0;
2562  	int r;
2563  
2564  	dom = iommu_get_domain_for_dev(adev->dev);
2565  
2566  	while (size) {
2567  		phys_addr_t addr = *pos & PAGE_MASK;
2568  		loff_t off = *pos & ~PAGE_MASK;
2569  		size_t bytes = PAGE_SIZE - off;
2570  		unsigned long pfn;
2571  		struct page *p;
2572  		void *ptr;
2573  
2574  		bytes = min(bytes, size);
2575  
2576  		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2577  
2578  		pfn = addr >> PAGE_SHIFT;
2579  		if (!pfn_valid(pfn))
2580  			return -EPERM;
2581  
2582  		p = pfn_to_page(pfn);
2583  		if (p->mapping != adev->mman.bdev.dev_mapping)
2584  			return -EPERM;
2585  
2586  		ptr = kmap_local_page(p);
2587  		r = copy_from_user(ptr + off, buf, bytes);
2588  		kunmap_local(ptr);
2589  		if (r)
2590  			return -EFAULT;
2591  
2592  		size -= bytes;
2593  		*pos += bytes;
2594  		result += bytes;
2595  	}
2596  
2597  	return result;
2598  }
2599  
2600  static const struct file_operations amdgpu_ttm_iomem_fops = {
2601  	.owner = THIS_MODULE,
2602  	.read = amdgpu_iomem_read,
2603  	.write = amdgpu_iomem_write,
2604  	.llseek = default_llseek
2605  };
2606  
2607  #endif
2608  
amdgpu_ttm_debugfs_init(struct amdgpu_device * adev)2609  void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
2610  {
2611  #if defined(CONFIG_DEBUG_FS)
2612  	struct drm_minor *minor = adev_to_drm(adev)->primary;
2613  	struct dentry *root = minor->debugfs_root;
2614  
2615  	debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
2616  				 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
2617  	debugfs_create_file("amdgpu_iomem", 0444, root, adev,
2618  			    &amdgpu_ttm_iomem_fops);
2619  	debugfs_create_file("ttm_page_pool", 0444, root, adev,
2620  			    &amdgpu_ttm_page_pool_fops);
2621  	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2622  							     TTM_PL_VRAM),
2623  					    root, "amdgpu_vram_mm");
2624  	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2625  							     TTM_PL_TT),
2626  					    root, "amdgpu_gtt_mm");
2627  	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2628  							     AMDGPU_PL_GDS),
2629  					    root, "amdgpu_gds_mm");
2630  	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2631  							     AMDGPU_PL_GWS),
2632  					    root, "amdgpu_gws_mm");
2633  	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2634  							     AMDGPU_PL_OA),
2635  					    root, "amdgpu_oa_mm");
2636  
2637  #endif
2638  }
2639