1 // SPDX-License-Identifier: GPL-2.0 AND MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include <kunit/test.h>
7 #include <kunit/visibility.h>
8 
9 #include "tests/xe_kunit_helpers.h"
10 #include "tests/xe_pci_test.h"
11 #include "tests/xe_test.h"
12 
13 #include "xe_bo_evict.h"
14 #include "xe_pci.h"
15 #include "xe_pm.h"
16 
ccs_test_migrate(struct xe_tile * tile,struct xe_bo * bo,bool clear,u64 get_val,u64 assign_val,struct kunit * test)17 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
18 			    bool clear, u64 get_val, u64 assign_val,
19 			    struct kunit *test)
20 {
21 	struct dma_fence *fence;
22 	struct ttm_tt *ttm;
23 	struct page *page;
24 	pgoff_t ccs_page;
25 	long timeout;
26 	u64 *cpu_map;
27 	int ret;
28 	u32 offset;
29 
30 	/* Move bo to VRAM if not already there. */
31 	ret = xe_bo_validate(bo, NULL, false);
32 	if (ret) {
33 		KUNIT_FAIL(test, "Failed to validate bo.\n");
34 		return ret;
35 	}
36 
37 	/* Optionally clear bo *and* CCS data in VRAM. */
38 	if (clear) {
39 		fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource,
40 					 XE_MIGRATE_CLEAR_FLAG_FULL);
41 		if (IS_ERR(fence)) {
42 			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
43 			return PTR_ERR(fence);
44 		}
45 		dma_fence_put(fence);
46 	}
47 
48 	/* Evict to system. CCS data should be copied. */
49 	ret = xe_bo_evict(bo, true);
50 	if (ret) {
51 		KUNIT_FAIL(test, "Failed to evict bo.\n");
52 		return ret;
53 	}
54 
55 	/* Sync all migration blits */
56 	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
57 					DMA_RESV_USAGE_KERNEL,
58 					true,
59 					5 * HZ);
60 	if (timeout <= 0) {
61 		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
62 		return -ETIME;
63 	}
64 
65 	/*
66 	 * Bo with CCS data is now in system memory. Verify backing store
67 	 * and data integrity. Then assign for the next testing round while
68 	 * we still have a CPU map.
69 	 */
70 	ttm = bo->ttm.ttm;
71 	if (!ttm || !ttm_tt_is_populated(ttm)) {
72 		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
73 		return -EINVAL;
74 	}
75 
76 	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
77 	if (ccs_page >= ttm->num_pages) {
78 		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
79 		return -EINVAL;
80 	}
81 
82 	page = ttm->pages[ccs_page];
83 	cpu_map = kmap_local_page(page);
84 
85 	/* Check first CCS value */
86 	if (cpu_map[0] != get_val) {
87 		KUNIT_FAIL(test,
88 			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
89 			   (unsigned long long)get_val,
90 			   (unsigned long long)cpu_map[0]);
91 		ret = -EINVAL;
92 	}
93 
94 	/* Check last CCS value, or at least last value in page. */
95 	offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
96 	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
97 	if (cpu_map[offset] != get_val) {
98 		KUNIT_FAIL(test,
99 			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
100 			   (unsigned long long)get_val,
101 			   (unsigned long long)cpu_map[offset]);
102 		ret = -EINVAL;
103 	}
104 
105 	cpu_map[0] = assign_val;
106 	cpu_map[offset] = assign_val;
107 	kunmap_local(cpu_map);
108 
109 	return ret;
110 }
111 
ccs_test_run_tile(struct xe_device * xe,struct xe_tile * tile,struct kunit * test)112 static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
113 			      struct kunit *test)
114 {
115 	struct xe_bo *bo;
116 
117 	int ret;
118 
119 	/* TODO: Sanity check */
120 	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
121 
122 	if (IS_DGFX(xe))
123 		kunit_info(test, "Testing vram id %u\n", tile->id);
124 	else
125 		kunit_info(test, "Testing system memory\n");
126 
127 	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
128 			       bo_flags);
129 	if (IS_ERR(bo)) {
130 		KUNIT_FAIL(test, "Failed to create bo.\n");
131 		return;
132 	}
133 
134 	xe_bo_lock(bo, false);
135 
136 	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
137 	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
138 			       test);
139 	if (ret)
140 		goto out_unlock;
141 
142 	kunit_info(test, "Verifying that CCS data survives migration.\n");
143 	ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
144 			       0xdeadbeefdeadbeefULL, test);
145 	if (ret)
146 		goto out_unlock;
147 
148 	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
149 	ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
150 
151 out_unlock:
152 	xe_bo_unlock(bo);
153 	xe_bo_put(bo);
154 }
155 
ccs_test_run_device(struct xe_device * xe)156 static int ccs_test_run_device(struct xe_device *xe)
157 {
158 	struct kunit *test = kunit_get_current_test();
159 	struct xe_tile *tile;
160 	int id;
161 
162 	if (!xe_device_has_flat_ccs(xe)) {
163 		kunit_skip(test, "non-flat-ccs device\n");
164 		return 0;
165 	}
166 
167 	/* For xe2+ dgfx, we don't handle ccs metadata */
168 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) {
169 		kunit_skip(test, "xe2+ dgfx device\n");
170 		return 0;
171 	}
172 
173 	xe_pm_runtime_get(xe);
174 
175 	for_each_tile(tile, xe, id) {
176 		/* For igfx run only for primary tile */
177 		if (!IS_DGFX(xe) && id > 0)
178 			continue;
179 		ccs_test_run_tile(xe, tile, test);
180 	}
181 
182 	xe_pm_runtime_put(xe);
183 
184 	return 0;
185 }
186 
xe_ccs_migrate_kunit(struct kunit * test)187 static void xe_ccs_migrate_kunit(struct kunit *test)
188 {
189 	struct xe_device *xe = test->priv;
190 
191 	ccs_test_run_device(xe);
192 }
193 
evict_test_run_tile(struct xe_device * xe,struct xe_tile * tile,struct kunit * test)194 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
195 {
196 	struct xe_bo *bo, *external;
197 	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
198 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
199 	struct xe_gt *__gt;
200 	int err, i, id;
201 
202 	kunit_info(test, "Testing device %s vram id %u\n",
203 		   dev_name(xe->drm.dev), tile->id);
204 
205 	for (i = 0; i < 2; ++i) {
206 		xe_vm_lock(vm, false);
207 		bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
208 				       DRM_XE_GEM_CPU_CACHING_WC,
209 				       bo_flags);
210 		xe_vm_unlock(vm);
211 		if (IS_ERR(bo)) {
212 			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
213 			break;
214 		}
215 
216 		external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
217 					     DRM_XE_GEM_CPU_CACHING_WC,
218 					     bo_flags);
219 		if (IS_ERR(external)) {
220 			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
221 			goto cleanup_bo;
222 		}
223 
224 		xe_bo_lock(external, false);
225 		err = xe_bo_pin_external(external);
226 		xe_bo_unlock(external);
227 		if (err) {
228 			KUNIT_FAIL(test, "external bo pin err=%pe\n",
229 				   ERR_PTR(err));
230 			goto cleanup_external;
231 		}
232 
233 		err = xe_bo_evict_all(xe);
234 		if (err) {
235 			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
236 			goto cleanup_all;
237 		}
238 
239 		for_each_gt(__gt, xe, id)
240 			xe_gt_sanitize(__gt);
241 		err = xe_bo_restore_kernel(xe);
242 		/*
243 		 * Snapshotting the CTB and copying back a potentially old
244 		 * version seems risky, depending on what might have been
245 		 * inflight. Also it seems snapshotting the ADS object and
246 		 * copying back results in serious breakage. Normally when
247 		 * calling xe_bo_restore_kernel() we always fully restart the
248 		 * GT, which re-intializes such things.  We could potentially
249 		 * skip saving and restoring such objects in xe_bo_evict_all()
250 		 * however seems quite fragile not to also restart the GT. Try
251 		 * to do that here by triggering a GT reset.
252 		 */
253 		for_each_gt(__gt, xe, id) {
254 			xe_gt_reset_async(__gt);
255 			flush_work(&__gt->reset.worker);
256 		}
257 		if (err) {
258 			KUNIT_FAIL(test, "restore kernel err=%pe\n",
259 				   ERR_PTR(err));
260 			goto cleanup_all;
261 		}
262 
263 		err = xe_bo_restore_user(xe);
264 		if (err) {
265 			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
266 			goto cleanup_all;
267 		}
268 
269 		if (!xe_bo_is_vram(external)) {
270 			KUNIT_FAIL(test, "external bo is not vram\n");
271 			err = -EPROTO;
272 			goto cleanup_all;
273 		}
274 
275 		if (xe_bo_is_vram(bo)) {
276 			KUNIT_FAIL(test, "bo is vram\n");
277 			err = -EPROTO;
278 			goto cleanup_all;
279 		}
280 
281 		if (i) {
282 			down_read(&vm->lock);
283 			xe_vm_lock(vm, false);
284 			err = xe_bo_validate(bo, bo->vm, false);
285 			xe_vm_unlock(vm);
286 			up_read(&vm->lock);
287 			if (err) {
288 				KUNIT_FAIL(test, "bo valid err=%pe\n",
289 					   ERR_PTR(err));
290 				goto cleanup_all;
291 			}
292 			xe_bo_lock(external, false);
293 			err = xe_bo_validate(external, NULL, false);
294 			xe_bo_unlock(external);
295 			if (err) {
296 				KUNIT_FAIL(test, "external bo valid err=%pe\n",
297 					   ERR_PTR(err));
298 				goto cleanup_all;
299 			}
300 		}
301 
302 		xe_bo_lock(external, false);
303 		xe_bo_unpin_external(external);
304 		xe_bo_unlock(external);
305 
306 		xe_bo_put(external);
307 
308 		xe_bo_lock(bo, false);
309 		__xe_bo_unset_bulk_move(bo);
310 		xe_bo_unlock(bo);
311 		xe_bo_put(bo);
312 		continue;
313 
314 cleanup_all:
315 		xe_bo_lock(external, false);
316 		xe_bo_unpin_external(external);
317 		xe_bo_unlock(external);
318 cleanup_external:
319 		xe_bo_put(external);
320 cleanup_bo:
321 		xe_bo_lock(bo, false);
322 		__xe_bo_unset_bulk_move(bo);
323 		xe_bo_unlock(bo);
324 		xe_bo_put(bo);
325 		break;
326 	}
327 
328 	xe_vm_put(vm);
329 
330 	return 0;
331 }
332 
evict_test_run_device(struct xe_device * xe)333 static int evict_test_run_device(struct xe_device *xe)
334 {
335 	struct kunit *test = kunit_get_current_test();
336 	struct xe_tile *tile;
337 	int id;
338 
339 	if (!IS_DGFX(xe)) {
340 		kunit_skip(test, "non-discrete device\n");
341 		return 0;
342 	}
343 
344 	xe_pm_runtime_get(xe);
345 
346 	for_each_tile(tile, xe, id)
347 		evict_test_run_tile(xe, tile, test);
348 
349 	xe_pm_runtime_put(xe);
350 
351 	return 0;
352 }
353 
xe_bo_evict_kunit(struct kunit * test)354 static void xe_bo_evict_kunit(struct kunit *test)
355 {
356 	struct xe_device *xe = test->priv;
357 
358 	evict_test_run_device(xe);
359 }
360 
361 static struct kunit_case xe_bo_tests[] = {
362 	KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
363 	KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
364 	{}
365 };
366 
367 VISIBLE_IF_KUNIT
368 struct kunit_suite xe_bo_test_suite = {
369 	.name = "xe_bo",
370 	.test_cases = xe_bo_tests,
371 	.init = xe_kunit_helper_xe_device_live_test_init,
372 };
373 EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
374