1  /*
2   * Copyright 2008 Advanced Micro Devices, Inc.
3   * Copyright 2008 Red Hat Inc.
4   * Copyright 2009 Jerome Glisse.
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a
7   * copy of this software and associated documentation files (the "Software"),
8   * to deal in the Software without restriction, including without limitation
9   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10   * and/or sell copies of the Software, and to permit persons to whom the
11   * Software is furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22   * OTHER DEALINGS IN THE SOFTWARE.
23   *
24   * Authors: Dave Airlie
25   *          Alex Deucher
26   *          Jerome Glisse
27   */
28  
29  #include <drm/radeon_drm.h>
30  #include "radeon.h"
31  #include "radeon_trace.h"
32  
33  /*
34   * GPUVM
35   * GPUVM is similar to the legacy gart on older asics, however
36   * rather than there being a single global gart table
37   * for the entire GPU, there are multiple VM page tables active
38   * at any given time.  The VM page tables can contain a mix
39   * vram pages and system memory pages and system memory pages
40   * can be mapped as snooped (cached system pages) or unsnooped
41   * (uncached system pages).
42   * Each VM has an ID associated with it and there is a page table
43   * associated with each VMID.  When execting a command buffer,
44   * the kernel tells the ring what VMID to use for that command
45   * buffer.  VMIDs are allocated dynamically as commands are submitted.
46   * The userspace drivers maintain their own address space and the kernel
47   * sets up their pages tables accordingly when they submit their
48   * command buffers and a VMID is assigned.
49   * Cayman/Trinity support up to 8 active VMs at any given time;
50   * SI supports 16.
51   */
52  
53  /**
54   * radeon_vm_num_pdes - return the number of page directory entries
55   *
56   * @rdev: radeon_device pointer
57   *
58   * Calculate the number of page directory entries (cayman+).
59   */
radeon_vm_num_pdes(struct radeon_device * rdev)60  static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
61  {
62  	return rdev->vm_manager.max_pfn >> radeon_vm_block_size;
63  }
64  
65  /**
66   * radeon_vm_directory_size - returns the size of the page directory in bytes
67   *
68   * @rdev: radeon_device pointer
69   *
70   * Calculate the size of the page directory in bytes (cayman+).
71   */
radeon_vm_directory_size(struct radeon_device * rdev)72  static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
73  {
74  	return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
75  }
76  
77  /**
78   * radeon_vm_manager_init - init the vm manager
79   *
80   * @rdev: radeon_device pointer
81   *
82   * Init the vm manager (cayman+).
83   * Returns 0 for success, error for failure.
84   */
radeon_vm_manager_init(struct radeon_device * rdev)85  int radeon_vm_manager_init(struct radeon_device *rdev)
86  {
87  	int r;
88  
89  	if (!rdev->vm_manager.enabled) {
90  		r = radeon_asic_vm_init(rdev);
91  		if (r)
92  			return r;
93  
94  		rdev->vm_manager.enabled = true;
95  	}
96  	return 0;
97  }
98  
99  /**
100   * radeon_vm_manager_fini - tear down the vm manager
101   *
102   * @rdev: radeon_device pointer
103   *
104   * Tear down the VM manager (cayman+).
105   */
radeon_vm_manager_fini(struct radeon_device * rdev)106  void radeon_vm_manager_fini(struct radeon_device *rdev)
107  {
108  	int i;
109  
110  	if (!rdev->vm_manager.enabled)
111  		return;
112  
113  	for (i = 0; i < RADEON_NUM_VM; ++i)
114  		radeon_fence_unref(&rdev->vm_manager.active[i]);
115  	radeon_asic_vm_fini(rdev);
116  	rdev->vm_manager.enabled = false;
117  }
118  
119  /**
120   * radeon_vm_get_bos - add the vm BOs to a validation list
121   *
122   * @rdev: radeon_device pointer
123   * @vm: vm providing the BOs
124   * @head: head of validation list
125   *
126   * Add the page directory to the list of BOs to
127   * validate for command submission (cayman+).
128   */
radeon_vm_get_bos(struct radeon_device * rdev,struct radeon_vm * vm,struct list_head * head)129  struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
130  					  struct radeon_vm *vm,
131  					  struct list_head *head)
132  {
133  	struct radeon_bo_list *list;
134  	unsigned i, idx;
135  
136  	list = kvmalloc_array(vm->max_pde_used + 2,
137  			     sizeof(struct radeon_bo_list), GFP_KERNEL);
138  	if (!list)
139  		return NULL;
140  
141  	/* add the vm page table to the list */
142  	list[0].robj = vm->page_directory;
143  	list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM;
144  	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
145  	list[0].tv.bo = &vm->page_directory->tbo;
146  	list[0].tv.num_shared = 1;
147  	list[0].tiling_flags = 0;
148  	list_add(&list[0].tv.head, head);
149  
150  	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
151  		if (!vm->page_tables[i].bo)
152  			continue;
153  
154  		list[idx].robj = vm->page_tables[i].bo;
155  		list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM;
156  		list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
157  		list[idx].tv.bo = &list[idx].robj->tbo;
158  		list[idx].tv.num_shared = 1;
159  		list[idx].tiling_flags = 0;
160  		list_add(&list[idx++].tv.head, head);
161  	}
162  
163  	return list;
164  }
165  
166  /**
167   * radeon_vm_grab_id - allocate the next free VMID
168   *
169   * @rdev: radeon_device pointer
170   * @vm: vm to allocate id for
171   * @ring: ring we want to submit job to
172   *
173   * Allocate an id for the vm (cayman+).
174   * Returns the fence we need to sync to (if any).
175   *
176   * Global and local mutex must be locked!
177   */
radeon_vm_grab_id(struct radeon_device * rdev,struct radeon_vm * vm,int ring)178  struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
179  				       struct radeon_vm *vm, int ring)
180  {
181  	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
182  	struct radeon_vm_id *vm_id = &vm->ids[ring];
183  
184  	unsigned choices[2] = {};
185  	unsigned i;
186  
187  	/* check if the id is still valid */
188  	if (vm_id->id && vm_id->last_id_use &&
189  	    vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
190  		return NULL;
191  
192  	/* we definitely need to flush */
193  	vm_id->pd_gpu_addr = ~0ll;
194  
195  	/* skip over VMID 0, since it is the system VM */
196  	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
197  		struct radeon_fence *fence = rdev->vm_manager.active[i];
198  
199  		if (fence == NULL) {
200  			/* found a free one */
201  			vm_id->id = i;
202  			trace_radeon_vm_grab_id(i, ring);
203  			return NULL;
204  		}
205  
206  		if (radeon_fence_is_earlier(fence, best[fence->ring])) {
207  			best[fence->ring] = fence;
208  			choices[fence->ring == ring ? 0 : 1] = i;
209  		}
210  	}
211  
212  	for (i = 0; i < 2; ++i) {
213  		if (choices[i]) {
214  			vm_id->id = choices[i];
215  			trace_radeon_vm_grab_id(choices[i], ring);
216  			return rdev->vm_manager.active[choices[i]];
217  		}
218  	}
219  
220  	/* should never happen */
221  	BUG();
222  	return NULL;
223  }
224  
225  /**
226   * radeon_vm_flush - hardware flush the vm
227   *
228   * @rdev: radeon_device pointer
229   * @vm: vm we want to flush
230   * @ring: ring to use for flush
231   * @updates: last vm update that is waited for
232   *
233   * Flush the vm (cayman+).
234   *
235   * Global and local mutex must be locked!
236   */
radeon_vm_flush(struct radeon_device * rdev,struct radeon_vm * vm,int ring,struct radeon_fence * updates)237  void radeon_vm_flush(struct radeon_device *rdev,
238  		     struct radeon_vm *vm,
239  		     int ring, struct radeon_fence *updates)
240  {
241  	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
242  	struct radeon_vm_id *vm_id = &vm->ids[ring];
243  
244  	if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
245  	    radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
246  
247  		trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
248  		radeon_fence_unref(&vm_id->flushed_updates);
249  		vm_id->flushed_updates = radeon_fence_ref(updates);
250  		vm_id->pd_gpu_addr = pd_addr;
251  		radeon_ring_vm_flush(rdev, &rdev->ring[ring],
252  				     vm_id->id, vm_id->pd_gpu_addr);
253  
254  	}
255  }
256  
257  /**
258   * radeon_vm_fence - remember fence for vm
259   *
260   * @rdev: radeon_device pointer
261   * @vm: vm we want to fence
262   * @fence: fence to remember
263   *
264   * Fence the vm (cayman+).
265   * Set the fence used to protect page table and id.
266   *
267   * Global and local mutex must be locked!
268   */
radeon_vm_fence(struct radeon_device * rdev,struct radeon_vm * vm,struct radeon_fence * fence)269  void radeon_vm_fence(struct radeon_device *rdev,
270  		     struct radeon_vm *vm,
271  		     struct radeon_fence *fence)
272  {
273  	unsigned vm_id = vm->ids[fence->ring].id;
274  
275  	radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
276  	rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
277  
278  	radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
279  	vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
280  }
281  
282  /**
283   * radeon_vm_bo_find - find the bo_va for a specific vm & bo
284   *
285   * @vm: requested vm
286   * @bo: requested buffer object
287   *
288   * Find @bo inside the requested vm (cayman+).
289   * Search inside the @bos vm list for the requested vm
290   * Returns the found bo_va or NULL if none is found
291   *
292   * Object has to be reserved!
293   */
radeon_vm_bo_find(struct radeon_vm * vm,struct radeon_bo * bo)294  struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
295  				       struct radeon_bo *bo)
296  {
297  	struct radeon_bo_va *bo_va;
298  
299  	list_for_each_entry(bo_va, &bo->va, bo_list) {
300  		if (bo_va->vm == vm)
301  			return bo_va;
302  
303  	}
304  	return NULL;
305  }
306  
307  /**
308   * radeon_vm_bo_add - add a bo to a specific vm
309   *
310   * @rdev: radeon_device pointer
311   * @vm: requested vm
312   * @bo: radeon buffer object
313   *
314   * Add @bo into the requested vm (cayman+).
315   * Add @bo to the list of bos associated with the vm
316   * Returns newly added bo_va or NULL for failure
317   *
318   * Object has to be reserved!
319   */
radeon_vm_bo_add(struct radeon_device * rdev,struct radeon_vm * vm,struct radeon_bo * bo)320  struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
321  				      struct radeon_vm *vm,
322  				      struct radeon_bo *bo)
323  {
324  	struct radeon_bo_va *bo_va;
325  
326  	bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
327  	if (bo_va == NULL)
328  		return NULL;
329  
330  	bo_va->vm = vm;
331  	bo_va->bo = bo;
332  	bo_va->it.start = 0;
333  	bo_va->it.last = 0;
334  	bo_va->flags = 0;
335  	bo_va->ref_count = 1;
336  	INIT_LIST_HEAD(&bo_va->bo_list);
337  	INIT_LIST_HEAD(&bo_va->vm_status);
338  
339  	mutex_lock(&vm->mutex);
340  	list_add_tail(&bo_va->bo_list, &bo->va);
341  	mutex_unlock(&vm->mutex);
342  
343  	return bo_va;
344  }
345  
346  /**
347   * radeon_vm_set_pages - helper to call the right asic function
348   *
349   * @rdev: radeon_device pointer
350   * @ib: indirect buffer to fill with commands
351   * @pe: addr of the page entry
352   * @addr: dst addr to write into pe
353   * @count: number of page entries to update
354   * @incr: increase next addr by incr bytes
355   * @flags: hw access flags
356   *
357   * Traces the parameters and calls the right asic functions
358   * to setup the page table using the DMA.
359   */
radeon_vm_set_pages(struct radeon_device * rdev,struct radeon_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint32_t flags)360  static void radeon_vm_set_pages(struct radeon_device *rdev,
361  				struct radeon_ib *ib,
362  				uint64_t pe,
363  				uint64_t addr, unsigned count,
364  				uint32_t incr, uint32_t flags)
365  {
366  	trace_radeon_vm_set_page(pe, addr, count, incr, flags);
367  
368  	if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) {
369  		uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8;
370  		radeon_asic_vm_copy_pages(rdev, ib, pe, src, count);
371  
372  	} else if ((flags & R600_PTE_SYSTEM) || (count < 3)) {
373  		radeon_asic_vm_write_pages(rdev, ib, pe, addr,
374  					   count, incr, flags);
375  
376  	} else {
377  		radeon_asic_vm_set_pages(rdev, ib, pe, addr,
378  					 count, incr, flags);
379  	}
380  }
381  
382  /**
383   * radeon_vm_clear_bo - initially clear the page dir/table
384   *
385   * @rdev: radeon_device pointer
386   * @bo: bo to clear
387   */
radeon_vm_clear_bo(struct radeon_device * rdev,struct radeon_bo * bo)388  static int radeon_vm_clear_bo(struct radeon_device *rdev,
389  			      struct radeon_bo *bo)
390  {
391  	struct ttm_operation_ctx ctx = { true, false };
392  	struct radeon_ib ib;
393  	unsigned entries;
394  	uint64_t addr;
395  	int r;
396  
397  	r = radeon_bo_reserve(bo, false);
398  	if (r)
399  		return r;
400  
401  	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
402  	if (r)
403  		goto error_unreserve;
404  
405  	addr = radeon_bo_gpu_offset(bo);
406  	entries = radeon_bo_size(bo) / 8;
407  
408  	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
409  	if (r)
410  		goto error_unreserve;
411  
412  	ib.length_dw = 0;
413  
414  	radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0);
415  	radeon_asic_vm_pad_ib(rdev, &ib);
416  	WARN_ON(ib.length_dw > 64);
417  
418  	r = radeon_ib_schedule(rdev, &ib, NULL, false);
419  	if (r)
420  		goto error_free;
421  
422  	ib.fence->is_vm_update = true;
423  	radeon_bo_fence(bo, ib.fence, false);
424  
425  error_free:
426  	radeon_ib_free(rdev, &ib);
427  
428  error_unreserve:
429  	radeon_bo_unreserve(bo);
430  	return r;
431  }
432  
433  /**
434   * radeon_vm_bo_set_addr - set bos virtual address inside a vm
435   *
436   * @rdev: radeon_device pointer
437   * @bo_va: bo_va to store the address
438   * @soffset: requested offset of the buffer in the VM address space
439   * @flags: attributes of pages (read/write/valid/etc.)
440   *
441   * Set offset of @bo_va (cayman+).
442   * Validate and set the offset requested within the vm address space.
443   * Returns 0 for success, error for failure.
444   *
445   * Object has to be reserved and gets unreserved by this function!
446   */
radeon_vm_bo_set_addr(struct radeon_device * rdev,struct radeon_bo_va * bo_va,uint64_t soffset,uint32_t flags)447  int radeon_vm_bo_set_addr(struct radeon_device *rdev,
448  			  struct radeon_bo_va *bo_va,
449  			  uint64_t soffset,
450  			  uint32_t flags)
451  {
452  	uint64_t size = radeon_bo_size(bo_va->bo);
453  	struct radeon_vm *vm = bo_va->vm;
454  	unsigned last_pfn, pt_idx;
455  	uint64_t eoffset;
456  	int r;
457  
458  	if (soffset) {
459  		/* make sure object fit at this offset */
460  		eoffset = soffset + size - 1;
461  		if (soffset >= eoffset) {
462  			r = -EINVAL;
463  			goto error_unreserve;
464  		}
465  
466  		last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
467  		if (last_pfn >= rdev->vm_manager.max_pfn) {
468  			dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n",
469  				last_pfn, rdev->vm_manager.max_pfn);
470  			r = -EINVAL;
471  			goto error_unreserve;
472  		}
473  
474  	} else {
475  		eoffset = last_pfn = 0;
476  	}
477  
478  	mutex_lock(&vm->mutex);
479  	soffset /= RADEON_GPU_PAGE_SIZE;
480  	eoffset /= RADEON_GPU_PAGE_SIZE;
481  	if (soffset || eoffset) {
482  		struct interval_tree_node *it;
483  		it = interval_tree_iter_first(&vm->va, soffset, eoffset);
484  		if (it && it != &bo_va->it) {
485  			struct radeon_bo_va *tmp;
486  			tmp = container_of(it, struct radeon_bo_va, it);
487  			/* bo and tmp overlap, invalid offset */
488  			dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
489  				"(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
490  				soffset, tmp->bo, tmp->it.start, tmp->it.last);
491  			mutex_unlock(&vm->mutex);
492  			r = -EINVAL;
493  			goto error_unreserve;
494  		}
495  	}
496  
497  	if (bo_va->it.start || bo_va->it.last) {
498  		/* add a clone of the bo_va to clear the old address */
499  		struct radeon_bo_va *tmp;
500  		tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
501  		if (!tmp) {
502  			mutex_unlock(&vm->mutex);
503  			r = -ENOMEM;
504  			goto error_unreserve;
505  		}
506  		tmp->it.start = bo_va->it.start;
507  		tmp->it.last = bo_va->it.last;
508  		tmp->vm = vm;
509  		tmp->bo = radeon_bo_ref(bo_va->bo);
510  
511  		interval_tree_remove(&bo_va->it, &vm->va);
512  		spin_lock(&vm->status_lock);
513  		bo_va->it.start = 0;
514  		bo_va->it.last = 0;
515  		list_del_init(&bo_va->vm_status);
516  		list_add(&tmp->vm_status, &vm->freed);
517  		spin_unlock(&vm->status_lock);
518  	}
519  
520  	if (soffset || eoffset) {
521  		spin_lock(&vm->status_lock);
522  		bo_va->it.start = soffset;
523  		bo_va->it.last = eoffset;
524  		list_add(&bo_va->vm_status, &vm->cleared);
525  		spin_unlock(&vm->status_lock);
526  		interval_tree_insert(&bo_va->it, &vm->va);
527  	}
528  
529  	bo_va->flags = flags;
530  
531  	soffset >>= radeon_vm_block_size;
532  	eoffset >>= radeon_vm_block_size;
533  
534  	BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
535  
536  	if (eoffset > vm->max_pde_used)
537  		vm->max_pde_used = eoffset;
538  
539  	radeon_bo_unreserve(bo_va->bo);
540  
541  	/* walk over the address space and allocate the page tables */
542  	for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) {
543  		struct radeon_bo *pt;
544  
545  		if (vm->page_tables[pt_idx].bo)
546  			continue;
547  
548  		/* drop mutex to allocate and clear page table */
549  		mutex_unlock(&vm->mutex);
550  
551  		r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
552  				     RADEON_GPU_PAGE_SIZE, true,
553  				     RADEON_GEM_DOMAIN_VRAM, 0,
554  				     NULL, NULL, &pt);
555  		if (r)
556  			return r;
557  
558  		r = radeon_vm_clear_bo(rdev, pt);
559  		if (r) {
560  			radeon_bo_unref(&pt);
561  			return r;
562  		}
563  
564  		/* aquire mutex again */
565  		mutex_lock(&vm->mutex);
566  		if (vm->page_tables[pt_idx].bo) {
567  			/* someone else allocated the pt in the meantime */
568  			mutex_unlock(&vm->mutex);
569  			radeon_bo_unref(&pt);
570  			mutex_lock(&vm->mutex);
571  			continue;
572  		}
573  
574  		vm->page_tables[pt_idx].addr = 0;
575  		vm->page_tables[pt_idx].bo = pt;
576  	}
577  
578  	mutex_unlock(&vm->mutex);
579  	return 0;
580  
581  error_unreserve:
582  	radeon_bo_unreserve(bo_va->bo);
583  	return r;
584  }
585  
586  /**
587   * radeon_vm_map_gart - get the physical address of a gart page
588   *
589   * @rdev: radeon_device pointer
590   * @addr: the unmapped addr
591   *
592   * Look up the physical address of the page that the pte resolves
593   * to (cayman+).
594   * Returns the physical address of the page.
595   */
radeon_vm_map_gart(struct radeon_device * rdev,uint64_t addr)596  uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
597  {
598  	uint64_t result;
599  
600  	/* page table offset */
601  	result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT];
602  	result &= ~RADEON_GPU_PAGE_MASK;
603  
604  	return result;
605  }
606  
607  /**
608   * radeon_vm_page_flags - translate page flags to what the hw uses
609   *
610   * @flags: flags comming from userspace
611   *
612   * Translate the flags the userspace ABI uses to hw flags.
613   */
radeon_vm_page_flags(uint32_t flags)614  static uint32_t radeon_vm_page_flags(uint32_t flags)
615  {
616  	uint32_t hw_flags = 0;
617  
618  	hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
619  	hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
620  	hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
621  	if (flags & RADEON_VM_PAGE_SYSTEM) {
622  		hw_flags |= R600_PTE_SYSTEM;
623  		hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
624  	}
625  	return hw_flags;
626  }
627  
628  /**
629   * radeon_vm_update_page_directory - make sure that page directory is valid
630   *
631   * @rdev: radeon_device pointer
632   * @vm: requested vm
633   *
634   * Allocates new page tables if necessary
635   * and updates the page directory (cayman+).
636   * Returns 0 for success, error for failure.
637   *
638   * Global and local mutex must be locked!
639   */
radeon_vm_update_page_directory(struct radeon_device * rdev,struct radeon_vm * vm)640  int radeon_vm_update_page_directory(struct radeon_device *rdev,
641  				    struct radeon_vm *vm)
642  {
643  	struct radeon_bo *pd = vm->page_directory;
644  	uint64_t pd_addr = radeon_bo_gpu_offset(pd);
645  	uint32_t incr = RADEON_VM_PTE_COUNT * 8;
646  	uint64_t last_pde = ~0, last_pt = ~0;
647  	unsigned count = 0, pt_idx, ndw;
648  	struct radeon_ib ib;
649  	int r;
650  
651  	/* padding, etc. */
652  	ndw = 64;
653  
654  	/* assume the worst case */
655  	ndw += vm->max_pde_used * 6;
656  
657  	/* update too big for an IB */
658  	if (ndw > 0xfffff)
659  		return -ENOMEM;
660  
661  	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
662  	if (r)
663  		return r;
664  	ib.length_dw = 0;
665  
666  	/* walk over the address space and update the page directory */
667  	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
668  		struct radeon_bo *bo = vm->page_tables[pt_idx].bo;
669  		uint64_t pde, pt;
670  
671  		if (bo == NULL)
672  			continue;
673  
674  		pt = radeon_bo_gpu_offset(bo);
675  		if (vm->page_tables[pt_idx].addr == pt)
676  			continue;
677  		vm->page_tables[pt_idx].addr = pt;
678  
679  		pde = pd_addr + pt_idx * 8;
680  		if (((last_pde + 8 * count) != pde) ||
681  		    ((last_pt + incr * count) != pt)) {
682  
683  			if (count) {
684  				radeon_vm_set_pages(rdev, &ib, last_pde,
685  						    last_pt, count, incr,
686  						    R600_PTE_VALID);
687  			}
688  
689  			count = 1;
690  			last_pde = pde;
691  			last_pt = pt;
692  		} else {
693  			++count;
694  		}
695  	}
696  
697  	if (count)
698  		radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count,
699  				    incr, R600_PTE_VALID);
700  
701  	if (ib.length_dw != 0) {
702  		radeon_asic_vm_pad_ib(rdev, &ib);
703  
704  		radeon_sync_resv(rdev, &ib.sync, pd->tbo.base.resv, true);
705  		WARN_ON(ib.length_dw > ndw);
706  		r = radeon_ib_schedule(rdev, &ib, NULL, false);
707  		if (r) {
708  			radeon_ib_free(rdev, &ib);
709  			return r;
710  		}
711  		ib.fence->is_vm_update = true;
712  		radeon_bo_fence(pd, ib.fence, false);
713  	}
714  	radeon_ib_free(rdev, &ib);
715  
716  	return 0;
717  }
718  
719  /**
720   * radeon_vm_frag_ptes - add fragment information to PTEs
721   *
722   * @rdev: radeon_device pointer
723   * @ib: IB for the update
724   * @pe_start: first PTE to handle
725   * @pe_end: last PTE to handle
726   * @addr: addr those PTEs should point to
727   * @flags: hw mapping flags
728   *
729   * Global and local mutex must be locked!
730   */
radeon_vm_frag_ptes(struct radeon_device * rdev,struct radeon_ib * ib,uint64_t pe_start,uint64_t pe_end,uint64_t addr,uint32_t flags)731  static void radeon_vm_frag_ptes(struct radeon_device *rdev,
732  				struct radeon_ib *ib,
733  				uint64_t pe_start, uint64_t pe_end,
734  				uint64_t addr, uint32_t flags)
735  {
736  	/**
737  	 * The MC L1 TLB supports variable sized pages, based on a fragment
738  	 * field in the PTE. When this field is set to a non-zero value, page
739  	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
740  	 * flags are considered valid for all PTEs within the fragment range
741  	 * and corresponding mappings are assumed to be physically contiguous.
742  	 *
743  	 * The L1 TLB can store a single PTE for the whole fragment,
744  	 * significantly increasing the space available for translation
745  	 * caching. This leads to large improvements in throughput when the
746  	 * TLB is under pressure.
747  	 *
748  	 * The L2 TLB distributes small and large fragments into two
749  	 * asymmetric partitions. The large fragment cache is significantly
750  	 * larger. Thus, we try to use large fragments wherever possible.
751  	 * Userspace can support this by aligning virtual base address and
752  	 * allocation size to the fragment size.
753  	 */
754  
755  	/* NI is optimized for 256KB fragments, SI and newer for 64KB */
756  	uint64_t frag_flags = ((rdev->family == CHIP_CAYMAN) ||
757  			       (rdev->family == CHIP_ARUBA)) ?
758  			R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
759  	uint64_t frag_align = ((rdev->family == CHIP_CAYMAN) ||
760  			       (rdev->family == CHIP_ARUBA)) ? 0x200 : 0x80;
761  
762  	uint64_t frag_start = ALIGN(pe_start, frag_align);
763  	uint64_t frag_end = pe_end & ~(frag_align - 1);
764  
765  	unsigned count;
766  
767  	/* system pages are non continuously */
768  	if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) ||
769  	    (frag_start >= frag_end)) {
770  
771  		count = (pe_end - pe_start) / 8;
772  		radeon_vm_set_pages(rdev, ib, pe_start, addr, count,
773  				    RADEON_GPU_PAGE_SIZE, flags);
774  		return;
775  	}
776  
777  	/* handle the 4K area at the beginning */
778  	if (pe_start != frag_start) {
779  		count = (frag_start - pe_start) / 8;
780  		radeon_vm_set_pages(rdev, ib, pe_start, addr, count,
781  				    RADEON_GPU_PAGE_SIZE, flags);
782  		addr += RADEON_GPU_PAGE_SIZE * count;
783  	}
784  
785  	/* handle the area in the middle */
786  	count = (frag_end - frag_start) / 8;
787  	radeon_vm_set_pages(rdev, ib, frag_start, addr, count,
788  			    RADEON_GPU_PAGE_SIZE, flags | frag_flags);
789  
790  	/* handle the 4K area at the end */
791  	if (frag_end != pe_end) {
792  		addr += RADEON_GPU_PAGE_SIZE * count;
793  		count = (pe_end - frag_end) / 8;
794  		radeon_vm_set_pages(rdev, ib, frag_end, addr, count,
795  				    RADEON_GPU_PAGE_SIZE, flags);
796  	}
797  }
798  
799  /**
800   * radeon_vm_update_ptes - make sure that page tables are valid
801   *
802   * @rdev: radeon_device pointer
803   * @vm: requested vm
804   * @ib: indirect buffer to use for the update
805   * @start: start of GPU address range
806   * @end: end of GPU address range
807   * @dst: destination address to map to
808   * @flags: mapping flags
809   *
810   * Update the page tables in the range @start - @end (cayman+).
811   *
812   * Global and local mutex must be locked!
813   */
radeon_vm_update_ptes(struct radeon_device * rdev,struct radeon_vm * vm,struct radeon_ib * ib,uint64_t start,uint64_t end,uint64_t dst,uint32_t flags)814  static int radeon_vm_update_ptes(struct radeon_device *rdev,
815  				 struct radeon_vm *vm,
816  				 struct radeon_ib *ib,
817  				 uint64_t start, uint64_t end,
818  				 uint64_t dst, uint32_t flags)
819  {
820  	uint64_t mask = RADEON_VM_PTE_COUNT - 1;
821  	uint64_t last_pte = ~0, last_dst = ~0;
822  	unsigned count = 0;
823  	uint64_t addr;
824  
825  	/* walk over the address space and update the page tables */
826  	for (addr = start; addr < end; ) {
827  		uint64_t pt_idx = addr >> radeon_vm_block_size;
828  		struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
829  		unsigned nptes;
830  		uint64_t pte;
831  		int r;
832  
833  		radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true);
834  		r = dma_resv_reserve_fences(pt->tbo.base.resv, 1);
835  		if (r)
836  			return r;
837  
838  		if ((addr & ~mask) == (end & ~mask))
839  			nptes = end - addr;
840  		else
841  			nptes = RADEON_VM_PTE_COUNT - (addr & mask);
842  
843  		pte = radeon_bo_gpu_offset(pt);
844  		pte += (addr & mask) * 8;
845  
846  		if ((last_pte + 8 * count) != pte) {
847  
848  			if (count) {
849  				radeon_vm_frag_ptes(rdev, ib, last_pte,
850  						    last_pte + 8 * count,
851  						    last_dst, flags);
852  			}
853  
854  			count = nptes;
855  			last_pte = pte;
856  			last_dst = dst;
857  		} else {
858  			count += nptes;
859  		}
860  
861  		addr += nptes;
862  		dst += nptes * RADEON_GPU_PAGE_SIZE;
863  	}
864  
865  	if (count) {
866  		radeon_vm_frag_ptes(rdev, ib, last_pte,
867  				    last_pte + 8 * count,
868  				    last_dst, flags);
869  	}
870  
871  	return 0;
872  }
873  
874  /**
875   * radeon_vm_fence_pts - fence page tables after an update
876   *
877   * @vm: requested vm
878   * @start: start of GPU address range
879   * @end: end of GPU address range
880   * @fence: fence to use
881   *
882   * Fence the page tables in the range @start - @end (cayman+).
883   *
884   * Global and local mutex must be locked!
885   */
radeon_vm_fence_pts(struct radeon_vm * vm,uint64_t start,uint64_t end,struct radeon_fence * fence)886  static void radeon_vm_fence_pts(struct radeon_vm *vm,
887  				uint64_t start, uint64_t end,
888  				struct radeon_fence *fence)
889  {
890  	unsigned i;
891  
892  	start >>= radeon_vm_block_size;
893  	end = (end - 1) >> radeon_vm_block_size;
894  
895  	for (i = start; i <= end; ++i)
896  		radeon_bo_fence(vm->page_tables[i].bo, fence, true);
897  }
898  
899  /**
900   * radeon_vm_bo_update - map a bo into the vm page table
901   *
902   * @rdev: radeon_device pointer
903   * @bo_va: radeon buffer virtual address object
904   * @mem: ttm mem
905   *
906   * Fill in the page table entries for @bo (cayman+).
907   * Returns 0 for success, -EINVAL for failure.
908   *
909   * Object have to be reserved and mutex must be locked!
910   */
radeon_vm_bo_update(struct radeon_device * rdev,struct radeon_bo_va * bo_va,struct ttm_resource * mem)911  int radeon_vm_bo_update(struct radeon_device *rdev,
912  			struct radeon_bo_va *bo_va,
913  			struct ttm_resource *mem)
914  {
915  	struct radeon_vm *vm = bo_va->vm;
916  	struct radeon_ib ib;
917  	unsigned nptes, ncmds, ndw;
918  	uint64_t addr;
919  	uint32_t flags;
920  	int r;
921  
922  	if (!bo_va->it.start) {
923  		dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
924  			bo_va->bo, vm);
925  		return -EINVAL;
926  	}
927  
928  	spin_lock(&vm->status_lock);
929  	if (mem) {
930  		if (list_empty(&bo_va->vm_status)) {
931  			spin_unlock(&vm->status_lock);
932  			return 0;
933  		}
934  		list_del_init(&bo_va->vm_status);
935  	} else {
936  		list_del(&bo_va->vm_status);
937  		list_add(&bo_va->vm_status, &vm->cleared);
938  	}
939  	spin_unlock(&vm->status_lock);
940  
941  	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
942  	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
943  	bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED;
944  	if (bo_va->bo && radeon_ttm_tt_is_readonly(rdev, bo_va->bo->tbo.ttm))
945  		bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE;
946  
947  	if (mem) {
948  		addr = (u64)mem->start << PAGE_SHIFT;
949  		if (mem->mem_type != TTM_PL_SYSTEM)
950  			bo_va->flags |= RADEON_VM_PAGE_VALID;
951  
952  		if (mem->mem_type == TTM_PL_TT) {
953  			bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
954  			if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC)))
955  				bo_va->flags |= RADEON_VM_PAGE_SNOOPED;
956  
957  		} else {
958  			addr += rdev->vm_manager.vram_base_offset;
959  		}
960  	} else {
961  		addr = 0;
962  	}
963  
964  	trace_radeon_vm_bo_update(bo_va);
965  
966  	nptes = bo_va->it.last - bo_va->it.start + 1;
967  
968  	/* reserve space for one command every (1 << BLOCK_SIZE) entries
969  	   or 2k dwords (whatever is smaller) */
970  	ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1;
971  
972  	/* padding, etc. */
973  	ndw = 64;
974  
975  	flags = radeon_vm_page_flags(bo_va->flags);
976  	if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) {
977  		/* only copy commands needed */
978  		ndw += ncmds * 7;
979  
980  	} else if (flags & R600_PTE_SYSTEM) {
981  		/* header for write data commands */
982  		ndw += ncmds * 4;
983  
984  		/* body of write data command */
985  		ndw += nptes * 2;
986  
987  	} else {
988  		/* set page commands needed */
989  		ndw += ncmds * 10;
990  
991  		/* two extra commands for begin/end of fragment */
992  		ndw += 2 * 10;
993  	}
994  
995  	/* update too big for an IB */
996  	if (ndw > 0xfffff)
997  		return -ENOMEM;
998  
999  	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
1000  	if (r)
1001  		return r;
1002  	ib.length_dw = 0;
1003  
1004  	if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) {
1005  		unsigned i;
1006  
1007  		for (i = 0; i < RADEON_NUM_RINGS; ++i)
1008  			radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use);
1009  	}
1010  
1011  	r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
1012  				  bo_va->it.last + 1, addr,
1013  				  radeon_vm_page_flags(bo_va->flags));
1014  	if (r) {
1015  		radeon_ib_free(rdev, &ib);
1016  		return r;
1017  	}
1018  
1019  	radeon_asic_vm_pad_ib(rdev, &ib);
1020  	WARN_ON(ib.length_dw > ndw);
1021  
1022  	r = radeon_ib_schedule(rdev, &ib, NULL, false);
1023  	if (r) {
1024  		radeon_ib_free(rdev, &ib);
1025  		return r;
1026  	}
1027  	ib.fence->is_vm_update = true;
1028  	radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
1029  	radeon_fence_unref(&bo_va->last_pt_update);
1030  	bo_va->last_pt_update = radeon_fence_ref(ib.fence);
1031  	radeon_ib_free(rdev, &ib);
1032  
1033  	return 0;
1034  }
1035  
1036  /**
1037   * radeon_vm_clear_freed - clear freed BOs in the PT
1038   *
1039   * @rdev: radeon_device pointer
1040   * @vm: requested vm
1041   *
1042   * Make sure all freed BOs are cleared in the PT.
1043   * Returns 0 for success.
1044   *
1045   * PTs have to be reserved and mutex must be locked!
1046   */
radeon_vm_clear_freed(struct radeon_device * rdev,struct radeon_vm * vm)1047  int radeon_vm_clear_freed(struct radeon_device *rdev,
1048  			  struct radeon_vm *vm)
1049  {
1050  	struct radeon_bo_va *bo_va;
1051  	int r = 0;
1052  
1053  	spin_lock(&vm->status_lock);
1054  	while (!list_empty(&vm->freed)) {
1055  		bo_va = list_first_entry(&vm->freed,
1056  			struct radeon_bo_va, vm_status);
1057  		spin_unlock(&vm->status_lock);
1058  
1059  		r = radeon_vm_bo_update(rdev, bo_va, NULL);
1060  		radeon_bo_unref(&bo_va->bo);
1061  		radeon_fence_unref(&bo_va->last_pt_update);
1062  		spin_lock(&vm->status_lock);
1063  		list_del(&bo_va->vm_status);
1064  		kfree(bo_va);
1065  		if (r)
1066  			break;
1067  
1068  	}
1069  	spin_unlock(&vm->status_lock);
1070  	return r;
1071  
1072  }
1073  
1074  /**
1075   * radeon_vm_clear_invalids - clear invalidated BOs in the PT
1076   *
1077   * @rdev: radeon_device pointer
1078   * @vm: requested vm
1079   *
1080   * Make sure all invalidated BOs are cleared in the PT.
1081   * Returns 0 for success.
1082   *
1083   * PTs have to be reserved and mutex must be locked!
1084   */
radeon_vm_clear_invalids(struct radeon_device * rdev,struct radeon_vm * vm)1085  int radeon_vm_clear_invalids(struct radeon_device *rdev,
1086  			     struct radeon_vm *vm)
1087  {
1088  	struct radeon_bo_va *bo_va;
1089  	int r;
1090  
1091  	spin_lock(&vm->status_lock);
1092  	while (!list_empty(&vm->invalidated)) {
1093  		bo_va = list_first_entry(&vm->invalidated,
1094  			struct radeon_bo_va, vm_status);
1095  		spin_unlock(&vm->status_lock);
1096  
1097  		r = radeon_vm_bo_update(rdev, bo_va, NULL);
1098  		if (r)
1099  			return r;
1100  
1101  		spin_lock(&vm->status_lock);
1102  	}
1103  	spin_unlock(&vm->status_lock);
1104  
1105  	return 0;
1106  }
1107  
1108  /**
1109   * radeon_vm_bo_rmv - remove a bo to a specific vm
1110   *
1111   * @rdev: radeon_device pointer
1112   * @bo_va: requested bo_va
1113   *
1114   * Remove @bo_va->bo from the requested vm (cayman+).
1115   *
1116   * Object have to be reserved!
1117   */
radeon_vm_bo_rmv(struct radeon_device * rdev,struct radeon_bo_va * bo_va)1118  void radeon_vm_bo_rmv(struct radeon_device *rdev,
1119  		      struct radeon_bo_va *bo_va)
1120  {
1121  	struct radeon_vm *vm = bo_va->vm;
1122  
1123  	list_del(&bo_va->bo_list);
1124  
1125  	mutex_lock(&vm->mutex);
1126  	if (bo_va->it.start || bo_va->it.last)
1127  		interval_tree_remove(&bo_va->it, &vm->va);
1128  
1129  	spin_lock(&vm->status_lock);
1130  	list_del(&bo_va->vm_status);
1131  	if (bo_va->it.start || bo_va->it.last) {
1132  		bo_va->bo = radeon_bo_ref(bo_va->bo);
1133  		list_add(&bo_va->vm_status, &vm->freed);
1134  	} else {
1135  		radeon_fence_unref(&bo_va->last_pt_update);
1136  		kfree(bo_va);
1137  	}
1138  	spin_unlock(&vm->status_lock);
1139  
1140  	mutex_unlock(&vm->mutex);
1141  }
1142  
1143  /**
1144   * radeon_vm_bo_invalidate - mark the bo as invalid
1145   *
1146   * @rdev: radeon_device pointer
1147   * @bo: radeon buffer object
1148   *
1149   * Mark @bo as invalid (cayman+).
1150   */
radeon_vm_bo_invalidate(struct radeon_device * rdev,struct radeon_bo * bo)1151  void radeon_vm_bo_invalidate(struct radeon_device *rdev,
1152  			     struct radeon_bo *bo)
1153  {
1154  	struct radeon_bo_va *bo_va;
1155  
1156  	list_for_each_entry(bo_va, &bo->va, bo_list) {
1157  		spin_lock(&bo_va->vm->status_lock);
1158  		if (list_empty(&bo_va->vm_status) &&
1159  		    (bo_va->it.start || bo_va->it.last))
1160  			list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
1161  		spin_unlock(&bo_va->vm->status_lock);
1162  	}
1163  }
1164  
1165  /**
1166   * radeon_vm_init - initialize a vm instance
1167   *
1168   * @rdev: radeon_device pointer
1169   * @vm: requested vm
1170   *
1171   * Init @vm fields (cayman+).
1172   */
radeon_vm_init(struct radeon_device * rdev,struct radeon_vm * vm)1173  int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
1174  {
1175  	const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
1176  		RADEON_VM_PTE_COUNT * 8);
1177  	unsigned pd_size, pd_entries, pts_size;
1178  	int i, r;
1179  
1180  	vm->ib_bo_va = NULL;
1181  	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
1182  		vm->ids[i].id = 0;
1183  		vm->ids[i].flushed_updates = NULL;
1184  		vm->ids[i].last_id_use = NULL;
1185  	}
1186  	mutex_init(&vm->mutex);
1187  	vm->va = RB_ROOT_CACHED;
1188  	spin_lock_init(&vm->status_lock);
1189  	INIT_LIST_HEAD(&vm->invalidated);
1190  	INIT_LIST_HEAD(&vm->freed);
1191  	INIT_LIST_HEAD(&vm->cleared);
1192  
1193  	pd_size = radeon_vm_directory_size(rdev);
1194  	pd_entries = radeon_vm_num_pdes(rdev);
1195  
1196  	/* allocate page table array */
1197  	pts_size = pd_entries * sizeof(struct radeon_vm_pt);
1198  	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
1199  	if (vm->page_tables == NULL) {
1200  		DRM_ERROR("Cannot allocate memory for page table array\n");
1201  		return -ENOMEM;
1202  	}
1203  
1204  	r = radeon_bo_create(rdev, pd_size, align, true,
1205  			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
1206  			     NULL, &vm->page_directory);
1207  	if (r) {
1208  		kfree(vm->page_tables);
1209  		vm->page_tables = NULL;
1210  		return r;
1211  	}
1212  	r = radeon_vm_clear_bo(rdev, vm->page_directory);
1213  	if (r) {
1214  		radeon_bo_unref(&vm->page_directory);
1215  		vm->page_directory = NULL;
1216  		kfree(vm->page_tables);
1217  		vm->page_tables = NULL;
1218  		return r;
1219  	}
1220  
1221  	return 0;
1222  }
1223  
1224  /**
1225   * radeon_vm_fini - tear down a vm instance
1226   *
1227   * @rdev: radeon_device pointer
1228   * @vm: requested vm
1229   *
1230   * Tear down @vm (cayman+).
1231   * Unbind the VM and remove all bos from the vm bo list
1232   */
radeon_vm_fini(struct radeon_device * rdev,struct radeon_vm * vm)1233  void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
1234  {
1235  	struct radeon_bo_va *bo_va, *tmp;
1236  	int i, r;
1237  
1238  	if (!RB_EMPTY_ROOT(&vm->va.rb_root))
1239  		dev_err(rdev->dev, "still active bo inside vm\n");
1240  
1241  	rbtree_postorder_for_each_entry_safe(bo_va, tmp,
1242  					     &vm->va.rb_root, it.rb) {
1243  		interval_tree_remove(&bo_va->it, &vm->va);
1244  		r = radeon_bo_reserve(bo_va->bo, false);
1245  		if (!r) {
1246  			list_del_init(&bo_va->bo_list);
1247  			radeon_bo_unreserve(bo_va->bo);
1248  			radeon_fence_unref(&bo_va->last_pt_update);
1249  			kfree(bo_va);
1250  		}
1251  	}
1252  	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
1253  		radeon_bo_unref(&bo_va->bo);
1254  		radeon_fence_unref(&bo_va->last_pt_update);
1255  		kfree(bo_va);
1256  	}
1257  
1258  	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
1259  		radeon_bo_unref(&vm->page_tables[i].bo);
1260  	kfree(vm->page_tables);
1261  
1262  	radeon_bo_unref(&vm->page_directory);
1263  
1264  	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
1265  		radeon_fence_unref(&vm->ids[i].flushed_updates);
1266  		radeon_fence_unref(&vm->ids[i].last_id_use);
1267  	}
1268  
1269  	mutex_destroy(&vm->mutex);
1270  }
1271