1  // SPDX-License-Identifier: GPL-2.0 AND MIT
2  /*
3   * Copyright © 2022 Intel Corporation
4   */
5  
6  #include <kunit/test.h>
7  #include <kunit/visibility.h>
8  
9  #include "tests/xe_kunit_helpers.h"
10  #include "tests/xe_pci_test.h"
11  #include "tests/xe_test.h"
12  
13  #include "xe_bo_evict.h"
14  #include "xe_pci.h"
15  #include "xe_pm.h"
16  
ccs_test_migrate(struct xe_tile * tile,struct xe_bo * bo,bool clear,u64 get_val,u64 assign_val,struct kunit * test)17  static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
18  			    bool clear, u64 get_val, u64 assign_val,
19  			    struct kunit *test)
20  {
21  	struct dma_fence *fence;
22  	struct ttm_tt *ttm;
23  	struct page *page;
24  	pgoff_t ccs_page;
25  	long timeout;
26  	u64 *cpu_map;
27  	int ret;
28  	u32 offset;
29  
30  	/* Move bo to VRAM if not already there. */
31  	ret = xe_bo_validate(bo, NULL, false);
32  	if (ret) {
33  		KUNIT_FAIL(test, "Failed to validate bo.\n");
34  		return ret;
35  	}
36  
37  	/* Optionally clear bo *and* CCS data in VRAM. */
38  	if (clear) {
39  		fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource,
40  					 XE_MIGRATE_CLEAR_FLAG_FULL);
41  		if (IS_ERR(fence)) {
42  			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
43  			return PTR_ERR(fence);
44  		}
45  		dma_fence_put(fence);
46  	}
47  
48  	/* Evict to system. CCS data should be copied. */
49  	ret = xe_bo_evict(bo, true);
50  	if (ret) {
51  		KUNIT_FAIL(test, "Failed to evict bo.\n");
52  		return ret;
53  	}
54  
55  	/* Sync all migration blits */
56  	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
57  					DMA_RESV_USAGE_KERNEL,
58  					true,
59  					5 * HZ);
60  	if (timeout <= 0) {
61  		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
62  		return -ETIME;
63  	}
64  
65  	/*
66  	 * Bo with CCS data is now in system memory. Verify backing store
67  	 * and data integrity. Then assign for the next testing round while
68  	 * we still have a CPU map.
69  	 */
70  	ttm = bo->ttm.ttm;
71  	if (!ttm || !ttm_tt_is_populated(ttm)) {
72  		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
73  		return -EINVAL;
74  	}
75  
76  	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
77  	if (ccs_page >= ttm->num_pages) {
78  		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
79  		return -EINVAL;
80  	}
81  
82  	page = ttm->pages[ccs_page];
83  	cpu_map = kmap_local_page(page);
84  
85  	/* Check first CCS value */
86  	if (cpu_map[0] != get_val) {
87  		KUNIT_FAIL(test,
88  			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
89  			   (unsigned long long)get_val,
90  			   (unsigned long long)cpu_map[0]);
91  		ret = -EINVAL;
92  	}
93  
94  	/* Check last CCS value, or at least last value in page. */
95  	offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
96  	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
97  	if (cpu_map[offset] != get_val) {
98  		KUNIT_FAIL(test,
99  			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
100  			   (unsigned long long)get_val,
101  			   (unsigned long long)cpu_map[offset]);
102  		ret = -EINVAL;
103  	}
104  
105  	cpu_map[0] = assign_val;
106  	cpu_map[offset] = assign_val;
107  	kunmap_local(cpu_map);
108  
109  	return ret;
110  }
111  
ccs_test_run_tile(struct xe_device * xe,struct xe_tile * tile,struct kunit * test)112  static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
113  			      struct kunit *test)
114  {
115  	struct xe_bo *bo;
116  
117  	int ret;
118  
119  	/* TODO: Sanity check */
120  	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
121  
122  	if (IS_DGFX(xe))
123  		kunit_info(test, "Testing vram id %u\n", tile->id);
124  	else
125  		kunit_info(test, "Testing system memory\n");
126  
127  	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
128  			       bo_flags);
129  	if (IS_ERR(bo)) {
130  		KUNIT_FAIL(test, "Failed to create bo.\n");
131  		return;
132  	}
133  
134  	xe_bo_lock(bo, false);
135  
136  	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
137  	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
138  			       test);
139  	if (ret)
140  		goto out_unlock;
141  
142  	kunit_info(test, "Verifying that CCS data survives migration.\n");
143  	ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
144  			       0xdeadbeefdeadbeefULL, test);
145  	if (ret)
146  		goto out_unlock;
147  
148  	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
149  	ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
150  
151  out_unlock:
152  	xe_bo_unlock(bo);
153  	xe_bo_put(bo);
154  }
155  
ccs_test_run_device(struct xe_device * xe)156  static int ccs_test_run_device(struct xe_device *xe)
157  {
158  	struct kunit *test = kunit_get_current_test();
159  	struct xe_tile *tile;
160  	int id;
161  
162  	if (!xe_device_has_flat_ccs(xe)) {
163  		kunit_skip(test, "non-flat-ccs device\n");
164  		return 0;
165  	}
166  
167  	/* For xe2+ dgfx, we don't handle ccs metadata */
168  	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) {
169  		kunit_skip(test, "xe2+ dgfx device\n");
170  		return 0;
171  	}
172  
173  	xe_pm_runtime_get(xe);
174  
175  	for_each_tile(tile, xe, id) {
176  		/* For igfx run only for primary tile */
177  		if (!IS_DGFX(xe) && id > 0)
178  			continue;
179  		ccs_test_run_tile(xe, tile, test);
180  	}
181  
182  	xe_pm_runtime_put(xe);
183  
184  	return 0;
185  }
186  
xe_ccs_migrate_kunit(struct kunit * test)187  static void xe_ccs_migrate_kunit(struct kunit *test)
188  {
189  	struct xe_device *xe = test->priv;
190  
191  	ccs_test_run_device(xe);
192  }
193  
evict_test_run_tile(struct xe_device * xe,struct xe_tile * tile,struct kunit * test)194  static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
195  {
196  	struct xe_bo *bo, *external;
197  	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
198  	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
199  	struct xe_gt *__gt;
200  	int err, i, id;
201  
202  	kunit_info(test, "Testing device %s vram id %u\n",
203  		   dev_name(xe->drm.dev), tile->id);
204  
205  	for (i = 0; i < 2; ++i) {
206  		xe_vm_lock(vm, false);
207  		bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
208  				       DRM_XE_GEM_CPU_CACHING_WC,
209  				       bo_flags);
210  		xe_vm_unlock(vm);
211  		if (IS_ERR(bo)) {
212  			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
213  			break;
214  		}
215  
216  		external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
217  					     DRM_XE_GEM_CPU_CACHING_WC,
218  					     bo_flags);
219  		if (IS_ERR(external)) {
220  			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
221  			goto cleanup_bo;
222  		}
223  
224  		xe_bo_lock(external, false);
225  		err = xe_bo_pin_external(external);
226  		xe_bo_unlock(external);
227  		if (err) {
228  			KUNIT_FAIL(test, "external bo pin err=%pe\n",
229  				   ERR_PTR(err));
230  			goto cleanup_external;
231  		}
232  
233  		err = xe_bo_evict_all(xe);
234  		if (err) {
235  			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
236  			goto cleanup_all;
237  		}
238  
239  		for_each_gt(__gt, xe, id)
240  			xe_gt_sanitize(__gt);
241  		err = xe_bo_restore_kernel(xe);
242  		/*
243  		 * Snapshotting the CTB and copying back a potentially old
244  		 * version seems risky, depending on what might have been
245  		 * inflight. Also it seems snapshotting the ADS object and
246  		 * copying back results in serious breakage. Normally when
247  		 * calling xe_bo_restore_kernel() we always fully restart the
248  		 * GT, which re-intializes such things.  We could potentially
249  		 * skip saving and restoring such objects in xe_bo_evict_all()
250  		 * however seems quite fragile not to also restart the GT. Try
251  		 * to do that here by triggering a GT reset.
252  		 */
253  		for_each_gt(__gt, xe, id) {
254  			xe_gt_reset_async(__gt);
255  			flush_work(&__gt->reset.worker);
256  		}
257  		if (err) {
258  			KUNIT_FAIL(test, "restore kernel err=%pe\n",
259  				   ERR_PTR(err));
260  			goto cleanup_all;
261  		}
262  
263  		err = xe_bo_restore_user(xe);
264  		if (err) {
265  			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
266  			goto cleanup_all;
267  		}
268  
269  		if (!xe_bo_is_vram(external)) {
270  			KUNIT_FAIL(test, "external bo is not vram\n");
271  			err = -EPROTO;
272  			goto cleanup_all;
273  		}
274  
275  		if (xe_bo_is_vram(bo)) {
276  			KUNIT_FAIL(test, "bo is vram\n");
277  			err = -EPROTO;
278  			goto cleanup_all;
279  		}
280  
281  		if (i) {
282  			down_read(&vm->lock);
283  			xe_vm_lock(vm, false);
284  			err = xe_bo_validate(bo, bo->vm, false);
285  			xe_vm_unlock(vm);
286  			up_read(&vm->lock);
287  			if (err) {
288  				KUNIT_FAIL(test, "bo valid err=%pe\n",
289  					   ERR_PTR(err));
290  				goto cleanup_all;
291  			}
292  			xe_bo_lock(external, false);
293  			err = xe_bo_validate(external, NULL, false);
294  			xe_bo_unlock(external);
295  			if (err) {
296  				KUNIT_FAIL(test, "external bo valid err=%pe\n",
297  					   ERR_PTR(err));
298  				goto cleanup_all;
299  			}
300  		}
301  
302  		xe_bo_lock(external, false);
303  		xe_bo_unpin_external(external);
304  		xe_bo_unlock(external);
305  
306  		xe_bo_put(external);
307  
308  		xe_bo_lock(bo, false);
309  		__xe_bo_unset_bulk_move(bo);
310  		xe_bo_unlock(bo);
311  		xe_bo_put(bo);
312  		continue;
313  
314  cleanup_all:
315  		xe_bo_lock(external, false);
316  		xe_bo_unpin_external(external);
317  		xe_bo_unlock(external);
318  cleanup_external:
319  		xe_bo_put(external);
320  cleanup_bo:
321  		xe_bo_lock(bo, false);
322  		__xe_bo_unset_bulk_move(bo);
323  		xe_bo_unlock(bo);
324  		xe_bo_put(bo);
325  		break;
326  	}
327  
328  	xe_vm_put(vm);
329  
330  	return 0;
331  }
332  
evict_test_run_device(struct xe_device * xe)333  static int evict_test_run_device(struct xe_device *xe)
334  {
335  	struct kunit *test = kunit_get_current_test();
336  	struct xe_tile *tile;
337  	int id;
338  
339  	if (!IS_DGFX(xe)) {
340  		kunit_skip(test, "non-discrete device\n");
341  		return 0;
342  	}
343  
344  	xe_pm_runtime_get(xe);
345  
346  	for_each_tile(tile, xe, id)
347  		evict_test_run_tile(xe, tile, test);
348  
349  	xe_pm_runtime_put(xe);
350  
351  	return 0;
352  }
353  
xe_bo_evict_kunit(struct kunit * test)354  static void xe_bo_evict_kunit(struct kunit *test)
355  {
356  	struct xe_device *xe = test->priv;
357  
358  	evict_test_run_device(xe);
359  }
360  
361  static struct kunit_case xe_bo_tests[] = {
362  	KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
363  	KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
364  	{}
365  };
366  
367  VISIBLE_IF_KUNIT
368  struct kunit_suite xe_bo_test_suite = {
369  	.name = "xe_bo",
370  	.test_cases = xe_bo_tests,
371  	.init = xe_kunit_helper_xe_device_live_test_init,
372  };
373  EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
374