1  // SPDX-License-Identifier: MIT
2  /*
3   * Copyright © 2021-2023 Intel Corporation
4   * Copyright (C) 2021-2002 Red Hat
5   */
6  
7  #include <drm/drm_managed.h>
8  
9  #include <drm/ttm/ttm_device.h>
10  #include <drm/ttm/ttm_placement.h>
11  #include <drm/ttm/ttm_range_manager.h>
12  
13  #include <generated/xe_wa_oob.h>
14  
15  #include "regs/xe_bars.h"
16  #include "regs/xe_gt_regs.h"
17  #include "regs/xe_regs.h"
18  #include "xe_bo.h"
19  #include "xe_device.h"
20  #include "xe_gt.h"
21  #include "xe_gt_printk.h"
22  #include "xe_mmio.h"
23  #include "xe_res_cursor.h"
24  #include "xe_sriov.h"
25  #include "xe_ttm_stolen_mgr.h"
26  #include "xe_ttm_vram_mgr.h"
27  #include "xe_wa.h"
28  
29  struct xe_ttm_stolen_mgr {
30  	struct xe_ttm_vram_mgr base;
31  
32  	/* PCI base offset */
33  	resource_size_t io_base;
34  	/* GPU base offset */
35  	resource_size_t stolen_base;
36  
37  	void __iomem *mapping;
38  };
39  
40  static inline struct xe_ttm_stolen_mgr *
to_stolen_mgr(struct ttm_resource_manager * man)41  to_stolen_mgr(struct ttm_resource_manager *man)
42  {
43  	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
44  }
45  
46  /**
47   * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
48   * stolen, can we then fallback to mapping through the GGTT.
49   * @xe: xe device
50   *
51   * Some older integrated platforms don't support reliable CPU access for stolen,
52   * however on such hardware we can always use the mappable part of the GGTT for
53   * CPU access. Check if that's the case for this device.
54   */
xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device * xe)55  bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
56  {
57  	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
58  }
59  
detect_bar2_dgfx(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr)60  static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
61  {
62  	struct xe_tile *tile = xe_device_get_root_tile(xe);
63  	struct xe_gt *mmio = xe_root_mmio_gt(xe);
64  	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
65  	u64 stolen_size;
66  	u64 tile_offset;
67  	u64 tile_size;
68  
69  	tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
70  	tile_size = tile->mem.vram.actual_physical_size;
71  
72  	/* Use DSM base address instead for stolen memory */
73  	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
74  	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
75  		return 0;
76  
77  	stolen_size = tile_size - mgr->stolen_base;
78  
79  	/* Verify usage fits in the actual resource available */
80  	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
81  		mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
82  
83  	/*
84  	 * There may be few KB of platform dependent reserved memory at the end
85  	 * of vram which is not part of the DSM. Such reserved memory portion is
86  	 * always less then DSM granularity so align down the stolen_size to DSM
87  	 * granularity to accommodate such reserve vram portion.
88  	 */
89  	return ALIGN_DOWN(stolen_size, SZ_1M);
90  }
91  
get_wopcm_size(struct xe_device * xe)92  static u32 get_wopcm_size(struct xe_device *xe)
93  {
94  	u32 wopcm_size;
95  	u64 val;
96  
97  	val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
98  	val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
99  
100  	switch (val) {
101  	case 0x5 ... 0x6:
102  		val--;
103  		fallthrough;
104  	case 0x0 ... 0x3:
105  		wopcm_size = (1U << val) * SZ_1M;
106  		break;
107  	default:
108  		WARN(1, "Missing case wopcm_size=%llx\n", val);
109  		wopcm_size = 0;
110  	}
111  
112  	return wopcm_size;
113  }
114  
detect_bar2_integrated(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr)115  static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
116  {
117  	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
118  	struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
119  	u32 stolen_size, wopcm_size;
120  	u32 ggc, gms;
121  
122  	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
123  
124  	/*
125  	 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
126  	 * GTT size
127  	 */
128  	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
129  		return 0;
130  
131  	/*
132  	 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
133  	 * PTEs, together with the DM flag being set. Previously there was no
134  	 * such flag so the address was the io_base.
135  	 *
136  	 * DSMBASE = GSMBASE + 8MB
137  	 */
138  	mgr->stolen_base = SZ_8M;
139  	mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
140  
141  	/* return valid GMS value, -EIO if invalid */
142  	gms = REG_FIELD_GET(GMS_MASK, ggc);
143  	switch (gms) {
144  	case 0x0 ... 0x04:
145  		stolen_size = gms * 32 * SZ_1M;
146  		break;
147  	case 0xf0 ... 0xfe:
148  		stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
149  		break;
150  	default:
151  		return 0;
152  	}
153  
154  	/* Carve out the top of DSM as it contains the reserved WOPCM region */
155  	wopcm_size = get_wopcm_size(xe);
156  	if (drm_WARN_ON(&xe->drm, !wopcm_size))
157  		return 0;
158  
159  	stolen_size -= wopcm_size;
160  
161  	if (media_gt && XE_WA(media_gt, 14019821291)) {
162  		u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
163  			& ~GENMASK_ULL(5, 0);
164  
165  		/*
166  		 * This workaround is primarily implemented by the BIOS.  We
167  		 * just need to figure out whether the BIOS has applied the
168  		 * workaround (meaning the programmed address falls within
169  		 * the DSM) and, if so, reserve that part of the DSM to
170  		 * prevent accidental reuse.  The DSM location should be just
171  		 * below the WOPCM.
172  		 */
173  		if (gscpsmi_base >= mgr->io_base &&
174  		    gscpsmi_base < mgr->io_base + stolen_size) {
175  			xe_gt_dbg(media_gt,
176  				  "Reserving %llu bytes of DSM for Wa_14019821291\n",
177  				  mgr->io_base + stolen_size - gscpsmi_base);
178  			stolen_size = gscpsmi_base - mgr->io_base;
179  		}
180  	}
181  
182  	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
183  		return 0;
184  
185  	return stolen_size;
186  }
187  
188  extern struct resource intel_graphics_stolen_res;
189  
detect_stolen(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr)190  static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
191  {
192  #ifdef CONFIG_X86
193  	/* Map into GGTT */
194  	mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
195  
196  	/* Stolen memory is x86 only */
197  	mgr->stolen_base = intel_graphics_stolen_res.start;
198  	return resource_size(&intel_graphics_stolen_res);
199  #else
200  	return 0;
201  #endif
202  }
203  
xe_ttm_stolen_mgr_init(struct xe_device * xe)204  void xe_ttm_stolen_mgr_init(struct xe_device *xe)
205  {
206  	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
207  	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
208  	u64 stolen_size, io_size;
209  	int err;
210  
211  	if (!mgr) {
212  		drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n");
213  		return;
214  	}
215  
216  	if (IS_SRIOV_VF(xe))
217  		stolen_size = 0;
218  	else if (IS_DGFX(xe))
219  		stolen_size = detect_bar2_dgfx(xe, mgr);
220  	else if (GRAPHICS_VERx100(xe) >= 1270)
221  		stolen_size = detect_bar2_integrated(xe, mgr);
222  	else
223  		stolen_size = detect_stolen(xe, mgr);
224  
225  	if (!stolen_size) {
226  		drm_dbg_kms(&xe->drm, "No stolen memory support\n");
227  		return;
228  	}
229  
230  	/*
231  	 * We don't try to attempt partial visible support for stolen vram,
232  	 * since stolen is always at the end of vram, and the BAR size is pretty
233  	 * much always 256M, with small-bar.
234  	 */
235  	io_size = 0;
236  	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
237  		io_size = stolen_size;
238  
239  	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
240  				     io_size, PAGE_SIZE);
241  	if (err) {
242  		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
243  		return;
244  	}
245  
246  	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
247  		    stolen_size);
248  
249  	if (io_size)
250  		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
251  }
252  
xe_ttm_stolen_io_offset(struct xe_bo * bo,u32 offset)253  u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
254  {
255  	struct xe_device *xe = xe_bo_device(bo);
256  	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
257  	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
258  	struct xe_res_cursor cur;
259  
260  	XE_WARN_ON(!mgr->io_base);
261  
262  	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
263  		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
264  
265  	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
266  	return mgr->io_base + cur.start;
267  }
268  
__xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr,struct ttm_resource * mem)269  static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
270  					       struct xe_ttm_stolen_mgr *mgr,
271  					       struct ttm_resource *mem)
272  {
273  	struct xe_res_cursor cur;
274  
275  	if (!mgr->io_base)
276  		return -EIO;
277  
278  	xe_res_first(mem, 0, 4096, &cur);
279  	mem->bus.offset = cur.start;
280  
281  	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
282  
283  	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
284  		mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
285  
286  	mem->bus.offset += mgr->io_base;
287  	mem->bus.is_iomem = true;
288  	mem->bus.caching = ttm_write_combined;
289  
290  	return 0;
291  }
292  
__xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr,struct ttm_resource * mem)293  static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
294  						 struct xe_ttm_stolen_mgr *mgr,
295  						 struct ttm_resource *mem)
296  {
297  #ifdef CONFIG_X86
298  	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
299  
300  	XE_WARN_ON(IS_DGFX(xe));
301  
302  	/* XXX: Require BO to be mapped to GGTT? */
303  	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT)))
304  		return -EIO;
305  
306  	/* GGTT is always contiguously mapped */
307  	mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
308  
309  	mem->bus.is_iomem = true;
310  	mem->bus.caching = ttm_write_combined;
311  
312  	return 0;
313  #else
314  	/* How is it even possible to get here without gen12 stolen? */
315  	drm_WARN_ON(&xe->drm, 1);
316  	return -EIO;
317  #endif
318  }
319  
xe_ttm_stolen_io_mem_reserve(struct xe_device * xe,struct ttm_resource * mem)320  int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
321  {
322  	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
323  	struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
324  
325  	if (!mgr || !mgr->io_base)
326  		return -EIO;
327  
328  	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
329  		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
330  	else
331  		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
332  }
333  
xe_ttm_stolen_gpu_offset(struct xe_device * xe)334  u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
335  {
336  	struct xe_ttm_stolen_mgr *mgr =
337  		to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
338  
339  	return mgr->stolen_base;
340  }
341