1 // SPDX-License-Identifier: GPL-2.0 AND MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include <kunit/test.h>
7 #include <kunit/visibility.h>
8
9 #include "tests/xe_kunit_helpers.h"
10 #include "tests/xe_pci_test.h"
11 #include "tests/xe_test.h"
12
13 #include "xe_bo_evict.h"
14 #include "xe_pci.h"
15 #include "xe_pm.h"
16
ccs_test_migrate(struct xe_tile * tile,struct xe_bo * bo,bool clear,u64 get_val,u64 assign_val,struct kunit * test)17 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
18 bool clear, u64 get_val, u64 assign_val,
19 struct kunit *test)
20 {
21 struct dma_fence *fence;
22 struct ttm_tt *ttm;
23 struct page *page;
24 pgoff_t ccs_page;
25 long timeout;
26 u64 *cpu_map;
27 int ret;
28 u32 offset;
29
30 /* Move bo to VRAM if not already there. */
31 ret = xe_bo_validate(bo, NULL, false);
32 if (ret) {
33 KUNIT_FAIL(test, "Failed to validate bo.\n");
34 return ret;
35 }
36
37 /* Optionally clear bo *and* CCS data in VRAM. */
38 if (clear) {
39 fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource,
40 XE_MIGRATE_CLEAR_FLAG_FULL);
41 if (IS_ERR(fence)) {
42 KUNIT_FAIL(test, "Failed to submit bo clear.\n");
43 return PTR_ERR(fence);
44 }
45 dma_fence_put(fence);
46 }
47
48 /* Evict to system. CCS data should be copied. */
49 ret = xe_bo_evict(bo, true);
50 if (ret) {
51 KUNIT_FAIL(test, "Failed to evict bo.\n");
52 return ret;
53 }
54
55 /* Sync all migration blits */
56 timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
57 DMA_RESV_USAGE_KERNEL,
58 true,
59 5 * HZ);
60 if (timeout <= 0) {
61 KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
62 return -ETIME;
63 }
64
65 /*
66 * Bo with CCS data is now in system memory. Verify backing store
67 * and data integrity. Then assign for the next testing round while
68 * we still have a CPU map.
69 */
70 ttm = bo->ttm.ttm;
71 if (!ttm || !ttm_tt_is_populated(ttm)) {
72 KUNIT_FAIL(test, "Bo was not in expected placement.\n");
73 return -EINVAL;
74 }
75
76 ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
77 if (ccs_page >= ttm->num_pages) {
78 KUNIT_FAIL(test, "No TTM CCS pages present.\n");
79 return -EINVAL;
80 }
81
82 page = ttm->pages[ccs_page];
83 cpu_map = kmap_local_page(page);
84
85 /* Check first CCS value */
86 if (cpu_map[0] != get_val) {
87 KUNIT_FAIL(test,
88 "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
89 (unsigned long long)get_val,
90 (unsigned long long)cpu_map[0]);
91 ret = -EINVAL;
92 }
93
94 /* Check last CCS value, or at least last value in page. */
95 offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
96 offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
97 if (cpu_map[offset] != get_val) {
98 KUNIT_FAIL(test,
99 "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
100 (unsigned long long)get_val,
101 (unsigned long long)cpu_map[offset]);
102 ret = -EINVAL;
103 }
104
105 cpu_map[0] = assign_val;
106 cpu_map[offset] = assign_val;
107 kunmap_local(cpu_map);
108
109 return ret;
110 }
111
ccs_test_run_tile(struct xe_device * xe,struct xe_tile * tile,struct kunit * test)112 static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
113 struct kunit *test)
114 {
115 struct xe_bo *bo;
116
117 int ret;
118
119 /* TODO: Sanity check */
120 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
121
122 if (IS_DGFX(xe))
123 kunit_info(test, "Testing vram id %u\n", tile->id);
124 else
125 kunit_info(test, "Testing system memory\n");
126
127 bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
128 bo_flags);
129 if (IS_ERR(bo)) {
130 KUNIT_FAIL(test, "Failed to create bo.\n");
131 return;
132 }
133
134 xe_bo_lock(bo, false);
135
136 kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
137 ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
138 test);
139 if (ret)
140 goto out_unlock;
141
142 kunit_info(test, "Verifying that CCS data survives migration.\n");
143 ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
144 0xdeadbeefdeadbeefULL, test);
145 if (ret)
146 goto out_unlock;
147
148 kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
149 ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
150
151 out_unlock:
152 xe_bo_unlock(bo);
153 xe_bo_put(bo);
154 }
155
ccs_test_run_device(struct xe_device * xe)156 static int ccs_test_run_device(struct xe_device *xe)
157 {
158 struct kunit *test = kunit_get_current_test();
159 struct xe_tile *tile;
160 int id;
161
162 if (!xe_device_has_flat_ccs(xe)) {
163 kunit_skip(test, "non-flat-ccs device\n");
164 return 0;
165 }
166
167 /* For xe2+ dgfx, we don't handle ccs metadata */
168 if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) {
169 kunit_skip(test, "xe2+ dgfx device\n");
170 return 0;
171 }
172
173 xe_pm_runtime_get(xe);
174
175 for_each_tile(tile, xe, id) {
176 /* For igfx run only for primary tile */
177 if (!IS_DGFX(xe) && id > 0)
178 continue;
179 ccs_test_run_tile(xe, tile, test);
180 }
181
182 xe_pm_runtime_put(xe);
183
184 return 0;
185 }
186
xe_ccs_migrate_kunit(struct kunit * test)187 static void xe_ccs_migrate_kunit(struct kunit *test)
188 {
189 struct xe_device *xe = test->priv;
190
191 ccs_test_run_device(xe);
192 }
193
evict_test_run_tile(struct xe_device * xe,struct xe_tile * tile,struct kunit * test)194 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
195 {
196 struct xe_bo *bo, *external;
197 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
198 struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
199 struct xe_gt *__gt;
200 int err, i, id;
201
202 kunit_info(test, "Testing device %s vram id %u\n",
203 dev_name(xe->drm.dev), tile->id);
204
205 for (i = 0; i < 2; ++i) {
206 xe_vm_lock(vm, false);
207 bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
208 DRM_XE_GEM_CPU_CACHING_WC,
209 bo_flags);
210 xe_vm_unlock(vm);
211 if (IS_ERR(bo)) {
212 KUNIT_FAIL(test, "bo create err=%pe\n", bo);
213 break;
214 }
215
216 external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
217 DRM_XE_GEM_CPU_CACHING_WC,
218 bo_flags);
219 if (IS_ERR(external)) {
220 KUNIT_FAIL(test, "external bo create err=%pe\n", external);
221 goto cleanup_bo;
222 }
223
224 xe_bo_lock(external, false);
225 err = xe_bo_pin_external(external);
226 xe_bo_unlock(external);
227 if (err) {
228 KUNIT_FAIL(test, "external bo pin err=%pe\n",
229 ERR_PTR(err));
230 goto cleanup_external;
231 }
232
233 err = xe_bo_evict_all(xe);
234 if (err) {
235 KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
236 goto cleanup_all;
237 }
238
239 for_each_gt(__gt, xe, id)
240 xe_gt_sanitize(__gt);
241 err = xe_bo_restore_kernel(xe);
242 /*
243 * Snapshotting the CTB and copying back a potentially old
244 * version seems risky, depending on what might have been
245 * inflight. Also it seems snapshotting the ADS object and
246 * copying back results in serious breakage. Normally when
247 * calling xe_bo_restore_kernel() we always fully restart the
248 * GT, which re-intializes such things. We could potentially
249 * skip saving and restoring such objects in xe_bo_evict_all()
250 * however seems quite fragile not to also restart the GT. Try
251 * to do that here by triggering a GT reset.
252 */
253 for_each_gt(__gt, xe, id) {
254 xe_gt_reset_async(__gt);
255 flush_work(&__gt->reset.worker);
256 }
257 if (err) {
258 KUNIT_FAIL(test, "restore kernel err=%pe\n",
259 ERR_PTR(err));
260 goto cleanup_all;
261 }
262
263 err = xe_bo_restore_user(xe);
264 if (err) {
265 KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
266 goto cleanup_all;
267 }
268
269 if (!xe_bo_is_vram(external)) {
270 KUNIT_FAIL(test, "external bo is not vram\n");
271 err = -EPROTO;
272 goto cleanup_all;
273 }
274
275 if (xe_bo_is_vram(bo)) {
276 KUNIT_FAIL(test, "bo is vram\n");
277 err = -EPROTO;
278 goto cleanup_all;
279 }
280
281 if (i) {
282 down_read(&vm->lock);
283 xe_vm_lock(vm, false);
284 err = xe_bo_validate(bo, bo->vm, false);
285 xe_vm_unlock(vm);
286 up_read(&vm->lock);
287 if (err) {
288 KUNIT_FAIL(test, "bo valid err=%pe\n",
289 ERR_PTR(err));
290 goto cleanup_all;
291 }
292 xe_bo_lock(external, false);
293 err = xe_bo_validate(external, NULL, false);
294 xe_bo_unlock(external);
295 if (err) {
296 KUNIT_FAIL(test, "external bo valid err=%pe\n",
297 ERR_PTR(err));
298 goto cleanup_all;
299 }
300 }
301
302 xe_bo_lock(external, false);
303 xe_bo_unpin_external(external);
304 xe_bo_unlock(external);
305
306 xe_bo_put(external);
307
308 xe_bo_lock(bo, false);
309 __xe_bo_unset_bulk_move(bo);
310 xe_bo_unlock(bo);
311 xe_bo_put(bo);
312 continue;
313
314 cleanup_all:
315 xe_bo_lock(external, false);
316 xe_bo_unpin_external(external);
317 xe_bo_unlock(external);
318 cleanup_external:
319 xe_bo_put(external);
320 cleanup_bo:
321 xe_bo_lock(bo, false);
322 __xe_bo_unset_bulk_move(bo);
323 xe_bo_unlock(bo);
324 xe_bo_put(bo);
325 break;
326 }
327
328 xe_vm_put(vm);
329
330 return 0;
331 }
332
evict_test_run_device(struct xe_device * xe)333 static int evict_test_run_device(struct xe_device *xe)
334 {
335 struct kunit *test = kunit_get_current_test();
336 struct xe_tile *tile;
337 int id;
338
339 if (!IS_DGFX(xe)) {
340 kunit_skip(test, "non-discrete device\n");
341 return 0;
342 }
343
344 xe_pm_runtime_get(xe);
345
346 for_each_tile(tile, xe, id)
347 evict_test_run_tile(xe, tile, test);
348
349 xe_pm_runtime_put(xe);
350
351 return 0;
352 }
353
xe_bo_evict_kunit(struct kunit * test)354 static void xe_bo_evict_kunit(struct kunit *test)
355 {
356 struct xe_device *xe = test->priv;
357
358 evict_test_run_device(xe);
359 }
360
361 static struct kunit_case xe_bo_tests[] = {
362 KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
363 KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
364 {}
365 };
366
367 VISIBLE_IF_KUNIT
368 struct kunit_suite xe_bo_test_suite = {
369 .name = "xe_bo",
370 .test_cases = xe_bo_tests,
371 .init = xe_kunit_helper_xe_device_live_test_init,
372 };
373 EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
374