1  /*
2   * Copyright 2008 Advanced Micro Devices, Inc.
3   * Copyright 2008 Red Hat Inc.
4   * Copyright 2009 Jerome Glisse.
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a
7   * copy of this software and associated documentation files (the "Software"),
8   * to deal in the Software without restriction, including without limitation
9   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10   * and/or sell copies of the Software, and to permit persons to whom the
11   * Software is furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22   * OTHER DEALINGS IN THE SOFTWARE.
23   *
24   * Authors: Dave Airlie
25   *          Alex Deucher
26   *          Jerome Glisse
27   */
28  #include <linux/power_supply.h>
29  #include <linux/kthread.h>
30  #include <linux/module.h>
31  #include <linux/console.h>
32  #include <linux/slab.h>
33  #include <linux/iommu.h>
34  #include <linux/pci.h>
35  #include <linux/pci-p2pdma.h>
36  #include <linux/apple-gmux.h>
37  
38  #include <drm/drm_aperture.h>
39  #include <drm/drm_atomic_helper.h>
40  #include <drm/drm_crtc_helper.h>
41  #include <drm/drm_fb_helper.h>
42  #include <drm/drm_probe_helper.h>
43  #include <drm/amdgpu_drm.h>
44  #include <linux/device.h>
45  #include <linux/vgaarb.h>
46  #include <linux/vga_switcheroo.h>
47  #include <linux/efi.h>
48  #include "amdgpu.h"
49  #include "amdgpu_trace.h"
50  #include "amdgpu_i2c.h"
51  #include "atom.h"
52  #include "amdgpu_atombios.h"
53  #include "amdgpu_atomfirmware.h"
54  #include "amd_pcie.h"
55  #ifdef CONFIG_DRM_AMDGPU_SI
56  #include "si.h"
57  #endif
58  #ifdef CONFIG_DRM_AMDGPU_CIK
59  #include "cik.h"
60  #endif
61  #include "vi.h"
62  #include "soc15.h"
63  #include "nv.h"
64  #include "bif/bif_4_1_d.h"
65  #include <linux/firmware.h>
66  #include "amdgpu_vf_error.h"
67  
68  #include "amdgpu_amdkfd.h"
69  #include "amdgpu_pm.h"
70  
71  #include "amdgpu_xgmi.h"
72  #include "amdgpu_ras.h"
73  #include "amdgpu_pmu.h"
74  #include "amdgpu_fru_eeprom.h"
75  #include "amdgpu_reset.h"
76  #include "amdgpu_virt.h"
77  #include "amdgpu_dev_coredump.h"
78  
79  #include <linux/suspend.h>
80  #include <drm/task_barrier.h>
81  #include <linux/pm_runtime.h>
82  
83  #include <drm/drm_drv.h>
84  
85  #if IS_ENABLED(CONFIG_X86)
86  #include <asm/intel-family.h>
87  #endif
88  
89  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92  MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93  MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94  MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95  MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
96  
97  #define AMDGPU_RESUME_MS		2000
98  #define AMDGPU_MAX_RETRY_LIMIT		2
99  #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100  #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101  #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102  #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
103  
104  static const struct drm_driver amdgpu_kms_driver;
105  
106  const char *amdgpu_asic_name[] = {
107  	"TAHITI",
108  	"PITCAIRN",
109  	"VERDE",
110  	"OLAND",
111  	"HAINAN",
112  	"BONAIRE",
113  	"KAVERI",
114  	"KABINI",
115  	"HAWAII",
116  	"MULLINS",
117  	"TOPAZ",
118  	"TONGA",
119  	"FIJI",
120  	"CARRIZO",
121  	"STONEY",
122  	"POLARIS10",
123  	"POLARIS11",
124  	"POLARIS12",
125  	"VEGAM",
126  	"VEGA10",
127  	"VEGA12",
128  	"VEGA20",
129  	"RAVEN",
130  	"ARCTURUS",
131  	"RENOIR",
132  	"ALDEBARAN",
133  	"NAVI10",
134  	"CYAN_SKILLFISH",
135  	"NAVI14",
136  	"NAVI12",
137  	"SIENNA_CICHLID",
138  	"NAVY_FLOUNDER",
139  	"VANGOGH",
140  	"DIMGREY_CAVEFISH",
141  	"BEIGE_GOBY",
142  	"YELLOW_CARP",
143  	"IP DISCOVERY",
144  	"LAST",
145  };
146  
147  static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
148  
149  /**
150   * DOC: pcie_replay_count
151   *
152   * The amdgpu driver provides a sysfs API for reporting the total number
153   * of PCIe replays (NAKs)
154   * The file pcie_replay_count is used for this and returns the total
155   * number of replays as a sum of the NAKs generated and NAKs received
156   */
157  
amdgpu_device_get_pcie_replay_count(struct device * dev,struct device_attribute * attr,char * buf)158  static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
159  		struct device_attribute *attr, char *buf)
160  {
161  	struct drm_device *ddev = dev_get_drvdata(dev);
162  	struct amdgpu_device *adev = drm_to_adev(ddev);
163  	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
164  
165  	return sysfs_emit(buf, "%llu\n", cnt);
166  }
167  
168  static DEVICE_ATTR(pcie_replay_count, 0444,
169  		amdgpu_device_get_pcie_replay_count, NULL);
170  
amdgpu_sysfs_reg_state_get(struct file * f,struct kobject * kobj,struct bin_attribute * attr,char * buf,loff_t ppos,size_t count)171  static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
172  					  struct bin_attribute *attr, char *buf,
173  					  loff_t ppos, size_t count)
174  {
175  	struct device *dev = kobj_to_dev(kobj);
176  	struct drm_device *ddev = dev_get_drvdata(dev);
177  	struct amdgpu_device *adev = drm_to_adev(ddev);
178  	ssize_t bytes_read;
179  
180  	switch (ppos) {
181  	case AMDGPU_SYS_REG_STATE_XGMI:
182  		bytes_read = amdgpu_asic_get_reg_state(
183  			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
184  		break;
185  	case AMDGPU_SYS_REG_STATE_WAFL:
186  		bytes_read = amdgpu_asic_get_reg_state(
187  			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
188  		break;
189  	case AMDGPU_SYS_REG_STATE_PCIE:
190  		bytes_read = amdgpu_asic_get_reg_state(
191  			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
192  		break;
193  	case AMDGPU_SYS_REG_STATE_USR:
194  		bytes_read = amdgpu_asic_get_reg_state(
195  			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
196  		break;
197  	case AMDGPU_SYS_REG_STATE_USR_1:
198  		bytes_read = amdgpu_asic_get_reg_state(
199  			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
200  		break;
201  	default:
202  		return -EINVAL;
203  	}
204  
205  	return bytes_read;
206  }
207  
208  BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
209  	 AMDGPU_SYS_REG_STATE_END);
210  
amdgpu_reg_state_sysfs_init(struct amdgpu_device * adev)211  int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
212  {
213  	int ret;
214  
215  	if (!amdgpu_asic_get_reg_state_supported(adev))
216  		return 0;
217  
218  	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
219  
220  	return ret;
221  }
222  
amdgpu_reg_state_sysfs_fini(struct amdgpu_device * adev)223  void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
224  {
225  	if (!amdgpu_asic_get_reg_state_supported(adev))
226  		return;
227  	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
228  }
229  
230  /**
231   * DOC: board_info
232   *
233   * The amdgpu driver provides a sysfs API for giving board related information.
234   * It provides the form factor information in the format
235   *
236   *   type : form factor
237   *
238   * Possible form factor values
239   *
240   * - "cem"		- PCIE CEM card
241   * - "oam"		- Open Compute Accelerator Module
242   * - "unknown"	- Not known
243   *
244   */
245  
amdgpu_device_get_board_info(struct device * dev,struct device_attribute * attr,char * buf)246  static ssize_t amdgpu_device_get_board_info(struct device *dev,
247  					    struct device_attribute *attr,
248  					    char *buf)
249  {
250  	struct drm_device *ddev = dev_get_drvdata(dev);
251  	struct amdgpu_device *adev = drm_to_adev(ddev);
252  	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
253  	const char *pkg;
254  
255  	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
256  		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
257  
258  	switch (pkg_type) {
259  	case AMDGPU_PKG_TYPE_CEM:
260  		pkg = "cem";
261  		break;
262  	case AMDGPU_PKG_TYPE_OAM:
263  		pkg = "oam";
264  		break;
265  	default:
266  		pkg = "unknown";
267  		break;
268  	}
269  
270  	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
271  }
272  
273  static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
274  
275  static struct attribute *amdgpu_board_attrs[] = {
276  	&dev_attr_board_info.attr,
277  	NULL,
278  };
279  
amdgpu_board_attrs_is_visible(struct kobject * kobj,struct attribute * attr,int n)280  static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
281  					     struct attribute *attr, int n)
282  {
283  	struct device *dev = kobj_to_dev(kobj);
284  	struct drm_device *ddev = dev_get_drvdata(dev);
285  	struct amdgpu_device *adev = drm_to_adev(ddev);
286  
287  	if (adev->flags & AMD_IS_APU)
288  		return 0;
289  
290  	return attr->mode;
291  }
292  
293  static const struct attribute_group amdgpu_board_attrs_group = {
294  	.attrs = amdgpu_board_attrs,
295  	.is_visible = amdgpu_board_attrs_is_visible
296  };
297  
298  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
299  
300  
301  /**
302   * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
303   *
304   * @dev: drm_device pointer
305   *
306   * Returns true if the device is a dGPU with ATPX power control,
307   * otherwise return false.
308   */
amdgpu_device_supports_px(struct drm_device * dev)309  bool amdgpu_device_supports_px(struct drm_device *dev)
310  {
311  	struct amdgpu_device *adev = drm_to_adev(dev);
312  
313  	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
314  		return true;
315  	return false;
316  }
317  
318  /**
319   * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
320   *
321   * @dev: drm_device pointer
322   *
323   * Returns true if the device is a dGPU with ACPI power control,
324   * otherwise return false.
325   */
amdgpu_device_supports_boco(struct drm_device * dev)326  bool amdgpu_device_supports_boco(struct drm_device *dev)
327  {
328  	struct amdgpu_device *adev = drm_to_adev(dev);
329  
330  	if (adev->has_pr3 ||
331  	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
332  		return true;
333  	return false;
334  }
335  
336  /**
337   * amdgpu_device_supports_baco - Does the device support BACO
338   *
339   * @dev: drm_device pointer
340   *
341   * Return:
342   * 1 if the device supporte BACO;
343   * 3 if the device support MACO (only works if BACO is supported)
344   * otherwise return 0.
345   */
amdgpu_device_supports_baco(struct drm_device * dev)346  int amdgpu_device_supports_baco(struct drm_device *dev)
347  {
348  	struct amdgpu_device *adev = drm_to_adev(dev);
349  
350  	return amdgpu_asic_supports_baco(adev);
351  }
352  
amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device * adev)353  void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
354  {
355  	struct drm_device *dev;
356  	int bamaco_support;
357  
358  	dev = adev_to_drm(adev);
359  
360  	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
361  	bamaco_support = amdgpu_device_supports_baco(dev);
362  
363  	switch (amdgpu_runtime_pm) {
364  	case 2:
365  		if (bamaco_support & MACO_SUPPORT) {
366  			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
367  			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
368  		} else if (bamaco_support == BACO_SUPPORT) {
369  			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
370  			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
371  		}
372  		break;
373  	case 1:
374  		if (bamaco_support & BACO_SUPPORT) {
375  			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
376  			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
377  		}
378  		break;
379  	case -1:
380  	case -2:
381  		if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
382  			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
383  			dev_info(adev->dev, "Using ATPX for runtime pm\n");
384  		} else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
385  			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
386  			dev_info(adev->dev, "Using BOCO for runtime pm\n");
387  		} else {
388  			if (!bamaco_support)
389  				goto no_runtime_pm;
390  
391  			switch (adev->asic_type) {
392  			case CHIP_VEGA20:
393  			case CHIP_ARCTURUS:
394  				/* BACO are not supported on vega20 and arctrus */
395  				break;
396  			case CHIP_VEGA10:
397  				/* enable BACO as runpm mode if noretry=0 */
398  				if (!adev->gmc.noretry)
399  					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
400  				break;
401  			default:
402  				/* enable BACO as runpm mode on CI+ */
403  				adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
404  				break;
405  			}
406  
407  			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
408  				if (bamaco_support & MACO_SUPPORT) {
409  					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
410  					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
411  				} else {
412  					dev_info(adev->dev, "Using BACO for runtime pm\n");
413  				}
414  			}
415  		}
416  		break;
417  	case 0:
418  		dev_info(adev->dev, "runtime pm is manually disabled\n");
419  		break;
420  	default:
421  		break;
422  	}
423  
424  no_runtime_pm:
425  	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
426  		dev_info(adev->dev, "Runtime PM not available\n");
427  }
428  /**
429   * amdgpu_device_supports_smart_shift - Is the device dGPU with
430   * smart shift support
431   *
432   * @dev: drm_device pointer
433   *
434   * Returns true if the device is a dGPU with Smart Shift support,
435   * otherwise returns false.
436   */
amdgpu_device_supports_smart_shift(struct drm_device * dev)437  bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
438  {
439  	return (amdgpu_device_supports_boco(dev) &&
440  		amdgpu_acpi_is_power_shift_control_supported());
441  }
442  
443  /*
444   * VRAM access helper functions
445   */
446  
447  /**
448   * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
449   *
450   * @adev: amdgpu_device pointer
451   * @pos: offset of the buffer in vram
452   * @buf: virtual address of the buffer in system memory
453   * @size: read/write size, sizeof(@buf) must > @size
454   * @write: true - write to vram, otherwise - read from vram
455   */
amdgpu_device_mm_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)456  void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
457  			     void *buf, size_t size, bool write)
458  {
459  	unsigned long flags;
460  	uint32_t hi = ~0, tmp = 0;
461  	uint32_t *data = buf;
462  	uint64_t last;
463  	int idx;
464  
465  	if (!drm_dev_enter(adev_to_drm(adev), &idx))
466  		return;
467  
468  	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
469  
470  	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
471  	for (last = pos + size; pos < last; pos += 4) {
472  		tmp = pos >> 31;
473  
474  		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
475  		if (tmp != hi) {
476  			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
477  			hi = tmp;
478  		}
479  		if (write)
480  			WREG32_NO_KIQ(mmMM_DATA, *data++);
481  		else
482  			*data++ = RREG32_NO_KIQ(mmMM_DATA);
483  	}
484  
485  	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
486  	drm_dev_exit(idx);
487  }
488  
489  /**
490   * amdgpu_device_aper_access - access vram by vram aperature
491   *
492   * @adev: amdgpu_device pointer
493   * @pos: offset of the buffer in vram
494   * @buf: virtual address of the buffer in system memory
495   * @size: read/write size, sizeof(@buf) must > @size
496   * @write: true - write to vram, otherwise - read from vram
497   *
498   * The return value means how many bytes have been transferred.
499   */
amdgpu_device_aper_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)500  size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
501  				 void *buf, size_t size, bool write)
502  {
503  #ifdef CONFIG_64BIT
504  	void __iomem *addr;
505  	size_t count = 0;
506  	uint64_t last;
507  
508  	if (!adev->mman.aper_base_kaddr)
509  		return 0;
510  
511  	last = min(pos + size, adev->gmc.visible_vram_size);
512  	if (last > pos) {
513  		addr = adev->mman.aper_base_kaddr + pos;
514  		count = last - pos;
515  
516  		if (write) {
517  			memcpy_toio(addr, buf, count);
518  			/* Make sure HDP write cache flush happens without any reordering
519  			 * after the system memory contents are sent over PCIe device
520  			 */
521  			mb();
522  			amdgpu_device_flush_hdp(adev, NULL);
523  		} else {
524  			amdgpu_device_invalidate_hdp(adev, NULL);
525  			/* Make sure HDP read cache is invalidated before issuing a read
526  			 * to the PCIe device
527  			 */
528  			mb();
529  			memcpy_fromio(buf, addr, count);
530  		}
531  
532  	}
533  
534  	return count;
535  #else
536  	return 0;
537  #endif
538  }
539  
540  /**
541   * amdgpu_device_vram_access - read/write a buffer in vram
542   *
543   * @adev: amdgpu_device pointer
544   * @pos: offset of the buffer in vram
545   * @buf: virtual address of the buffer in system memory
546   * @size: read/write size, sizeof(@buf) must > @size
547   * @write: true - write to vram, otherwise - read from vram
548   */
amdgpu_device_vram_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)549  void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
550  			       void *buf, size_t size, bool write)
551  {
552  	size_t count;
553  
554  	/* try to using vram apreature to access vram first */
555  	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
556  	size -= count;
557  	if (size) {
558  		/* using MM to access rest vram */
559  		pos += count;
560  		buf += count;
561  		amdgpu_device_mm_access(adev, pos, buf, size, write);
562  	}
563  }
564  
565  /*
566   * register access helper functions.
567   */
568  
569  /* Check if hw access should be skipped because of hotplug or device error */
amdgpu_device_skip_hw_access(struct amdgpu_device * adev)570  bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
571  {
572  	if (adev->no_hw_access)
573  		return true;
574  
575  #ifdef CONFIG_LOCKDEP
576  	/*
577  	 * This is a bit complicated to understand, so worth a comment. What we assert
578  	 * here is that the GPU reset is not running on another thread in parallel.
579  	 *
580  	 * For this we trylock the read side of the reset semaphore, if that succeeds
581  	 * we know that the reset is not running in paralell.
582  	 *
583  	 * If the trylock fails we assert that we are either already holding the read
584  	 * side of the lock or are the reset thread itself and hold the write side of
585  	 * the lock.
586  	 */
587  	if (in_task()) {
588  		if (down_read_trylock(&adev->reset_domain->sem))
589  			up_read(&adev->reset_domain->sem);
590  		else
591  			lockdep_assert_held(&adev->reset_domain->sem);
592  	}
593  #endif
594  	return false;
595  }
596  
597  /**
598   * amdgpu_device_rreg - read a memory mapped IO or indirect register
599   *
600   * @adev: amdgpu_device pointer
601   * @reg: dword aligned register offset
602   * @acc_flags: access flags which require special behavior
603   *
604   * Returns the 32 bit value from the offset specified.
605   */
amdgpu_device_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t acc_flags)606  uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
607  			    uint32_t reg, uint32_t acc_flags)
608  {
609  	uint32_t ret;
610  
611  	if (amdgpu_device_skip_hw_access(adev))
612  		return 0;
613  
614  	if ((reg * 4) < adev->rmmio_size) {
615  		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
616  		    amdgpu_sriov_runtime(adev) &&
617  		    down_read_trylock(&adev->reset_domain->sem)) {
618  			ret = amdgpu_kiq_rreg(adev, reg, 0);
619  			up_read(&adev->reset_domain->sem);
620  		} else {
621  			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
622  		}
623  	} else {
624  		ret = adev->pcie_rreg(adev, reg * 4);
625  	}
626  
627  	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
628  
629  	return ret;
630  }
631  
632  /*
633   * MMIO register read with bytes helper functions
634   * @offset:bytes offset from MMIO start
635   */
636  
637  /**
638   * amdgpu_mm_rreg8 - read a memory mapped IO register
639   *
640   * @adev: amdgpu_device pointer
641   * @offset: byte aligned register offset
642   *
643   * Returns the 8 bit value from the offset specified.
644   */
amdgpu_mm_rreg8(struct amdgpu_device * adev,uint32_t offset)645  uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
646  {
647  	if (amdgpu_device_skip_hw_access(adev))
648  		return 0;
649  
650  	if (offset < adev->rmmio_size)
651  		return (readb(adev->rmmio + offset));
652  	BUG();
653  }
654  
655  
656  /**
657   * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
658   *
659   * @adev: amdgpu_device pointer
660   * @reg: dword aligned register offset
661   * @acc_flags: access flags which require special behavior
662   * @xcc_id: xcc accelerated compute core id
663   *
664   * Returns the 32 bit value from the offset specified.
665   */
amdgpu_device_xcc_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t acc_flags,uint32_t xcc_id)666  uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
667  				uint32_t reg, uint32_t acc_flags,
668  				uint32_t xcc_id)
669  {
670  	uint32_t ret, rlcg_flag;
671  
672  	if (amdgpu_device_skip_hw_access(adev))
673  		return 0;
674  
675  	if ((reg * 4) < adev->rmmio_size) {
676  		if (amdgpu_sriov_vf(adev) &&
677  		    !amdgpu_sriov_runtime(adev) &&
678  		    adev->gfx.rlc.rlcg_reg_access_supported &&
679  		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
680  							 GC_HWIP, false,
681  							 &rlcg_flag)) {
682  			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
683  		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
684  		    amdgpu_sriov_runtime(adev) &&
685  		    down_read_trylock(&adev->reset_domain->sem)) {
686  			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
687  			up_read(&adev->reset_domain->sem);
688  		} else {
689  			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
690  		}
691  	} else {
692  		ret = adev->pcie_rreg(adev, reg * 4);
693  	}
694  
695  	return ret;
696  }
697  
698  /*
699   * MMIO register write with bytes helper functions
700   * @offset:bytes offset from MMIO start
701   * @value: the value want to be written to the register
702   */
703  
704  /**
705   * amdgpu_mm_wreg8 - read a memory mapped IO register
706   *
707   * @adev: amdgpu_device pointer
708   * @offset: byte aligned register offset
709   * @value: 8 bit value to write
710   *
711   * Writes the value specified to the offset specified.
712   */
amdgpu_mm_wreg8(struct amdgpu_device * adev,uint32_t offset,uint8_t value)713  void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
714  {
715  	if (amdgpu_device_skip_hw_access(adev))
716  		return;
717  
718  	if (offset < adev->rmmio_size)
719  		writeb(value, adev->rmmio + offset);
720  	else
721  		BUG();
722  }
723  
724  /**
725   * amdgpu_device_wreg - write to a memory mapped IO or indirect register
726   *
727   * @adev: amdgpu_device pointer
728   * @reg: dword aligned register offset
729   * @v: 32 bit value to write to the register
730   * @acc_flags: access flags which require special behavior
731   *
732   * Writes the value specified to the offset specified.
733   */
amdgpu_device_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t acc_flags)734  void amdgpu_device_wreg(struct amdgpu_device *adev,
735  			uint32_t reg, uint32_t v,
736  			uint32_t acc_flags)
737  {
738  	if (amdgpu_device_skip_hw_access(adev))
739  		return;
740  
741  	if ((reg * 4) < adev->rmmio_size) {
742  		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
743  		    amdgpu_sriov_runtime(adev) &&
744  		    down_read_trylock(&adev->reset_domain->sem)) {
745  			amdgpu_kiq_wreg(adev, reg, v, 0);
746  			up_read(&adev->reset_domain->sem);
747  		} else {
748  			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
749  		}
750  	} else {
751  		adev->pcie_wreg(adev, reg * 4, v);
752  	}
753  
754  	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
755  }
756  
757  /**
758   * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
759   *
760   * @adev: amdgpu_device pointer
761   * @reg: mmio/rlc register
762   * @v: value to write
763   * @xcc_id: xcc accelerated compute core id
764   *
765   * this function is invoked only for the debugfs register access
766   */
amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)767  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
768  			     uint32_t reg, uint32_t v,
769  			     uint32_t xcc_id)
770  {
771  	if (amdgpu_device_skip_hw_access(adev))
772  		return;
773  
774  	if (amdgpu_sriov_fullaccess(adev) &&
775  	    adev->gfx.rlc.funcs &&
776  	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
777  		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
778  			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
779  	} else if ((reg * 4) >= adev->rmmio_size) {
780  		adev->pcie_wreg(adev, reg * 4, v);
781  	} else {
782  		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
783  	}
784  }
785  
786  /**
787   * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
788   *
789   * @adev: amdgpu_device pointer
790   * @reg: dword aligned register offset
791   * @v: 32 bit value to write to the register
792   * @acc_flags: access flags which require special behavior
793   * @xcc_id: xcc accelerated compute core id
794   *
795   * Writes the value specified to the offset specified.
796   */
amdgpu_device_xcc_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t acc_flags,uint32_t xcc_id)797  void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
798  			uint32_t reg, uint32_t v,
799  			uint32_t acc_flags, uint32_t xcc_id)
800  {
801  	uint32_t rlcg_flag;
802  
803  	if (amdgpu_device_skip_hw_access(adev))
804  		return;
805  
806  	if ((reg * 4) < adev->rmmio_size) {
807  		if (amdgpu_sriov_vf(adev) &&
808  		    !amdgpu_sriov_runtime(adev) &&
809  		    adev->gfx.rlc.rlcg_reg_access_supported &&
810  		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
811  							 GC_HWIP, true,
812  							 &rlcg_flag)) {
813  			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
814  		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
815  		    amdgpu_sriov_runtime(adev) &&
816  		    down_read_trylock(&adev->reset_domain->sem)) {
817  			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
818  			up_read(&adev->reset_domain->sem);
819  		} else {
820  			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
821  		}
822  	} else {
823  		adev->pcie_wreg(adev, reg * 4, v);
824  	}
825  }
826  
827  /**
828   * amdgpu_device_indirect_rreg - read an indirect register
829   *
830   * @adev: amdgpu_device pointer
831   * @reg_addr: indirect register address to read from
832   *
833   * Returns the value of indirect register @reg_addr
834   */
amdgpu_device_indirect_rreg(struct amdgpu_device * adev,u32 reg_addr)835  u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
836  				u32 reg_addr)
837  {
838  	unsigned long flags, pcie_index, pcie_data;
839  	void __iomem *pcie_index_offset;
840  	void __iomem *pcie_data_offset;
841  	u32 r;
842  
843  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
844  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
845  
846  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
847  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
848  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
849  
850  	writel(reg_addr, pcie_index_offset);
851  	readl(pcie_index_offset);
852  	r = readl(pcie_data_offset);
853  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
854  
855  	return r;
856  }
857  
amdgpu_device_indirect_rreg_ext(struct amdgpu_device * adev,u64 reg_addr)858  u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
859  				    u64 reg_addr)
860  {
861  	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
862  	u32 r;
863  	void __iomem *pcie_index_offset;
864  	void __iomem *pcie_index_hi_offset;
865  	void __iomem *pcie_data_offset;
866  
867  	if (unlikely(!adev->nbio.funcs)) {
868  		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
869  		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
870  	} else {
871  		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
872  		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
873  	}
874  
875  	if (reg_addr >> 32) {
876  		if (unlikely(!adev->nbio.funcs))
877  			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
878  		else
879  			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
880  	} else {
881  		pcie_index_hi = 0;
882  	}
883  
884  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
885  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
886  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
887  	if (pcie_index_hi != 0)
888  		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
889  				pcie_index_hi * 4;
890  
891  	writel(reg_addr, pcie_index_offset);
892  	readl(pcie_index_offset);
893  	if (pcie_index_hi != 0) {
894  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
895  		readl(pcie_index_hi_offset);
896  	}
897  	r = readl(pcie_data_offset);
898  
899  	/* clear the high bits */
900  	if (pcie_index_hi != 0) {
901  		writel(0, pcie_index_hi_offset);
902  		readl(pcie_index_hi_offset);
903  	}
904  
905  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
906  
907  	return r;
908  }
909  
910  /**
911   * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
912   *
913   * @adev: amdgpu_device pointer
914   * @reg_addr: indirect register address to read from
915   *
916   * Returns the value of indirect register @reg_addr
917   */
amdgpu_device_indirect_rreg64(struct amdgpu_device * adev,u32 reg_addr)918  u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
919  				  u32 reg_addr)
920  {
921  	unsigned long flags, pcie_index, pcie_data;
922  	void __iomem *pcie_index_offset;
923  	void __iomem *pcie_data_offset;
924  	u64 r;
925  
926  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
927  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
928  
929  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
930  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
931  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
932  
933  	/* read low 32 bits */
934  	writel(reg_addr, pcie_index_offset);
935  	readl(pcie_index_offset);
936  	r = readl(pcie_data_offset);
937  	/* read high 32 bits */
938  	writel(reg_addr + 4, pcie_index_offset);
939  	readl(pcie_index_offset);
940  	r |= ((u64)readl(pcie_data_offset) << 32);
941  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
942  
943  	return r;
944  }
945  
amdgpu_device_indirect_rreg64_ext(struct amdgpu_device * adev,u64 reg_addr)946  u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
947  				  u64 reg_addr)
948  {
949  	unsigned long flags, pcie_index, pcie_data;
950  	unsigned long pcie_index_hi = 0;
951  	void __iomem *pcie_index_offset;
952  	void __iomem *pcie_index_hi_offset;
953  	void __iomem *pcie_data_offset;
954  	u64 r;
955  
956  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
957  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
958  	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
959  		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
960  
961  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
962  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
963  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
964  	if (pcie_index_hi != 0)
965  		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
966  			pcie_index_hi * 4;
967  
968  	/* read low 32 bits */
969  	writel(reg_addr, pcie_index_offset);
970  	readl(pcie_index_offset);
971  	if (pcie_index_hi != 0) {
972  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
973  		readl(pcie_index_hi_offset);
974  	}
975  	r = readl(pcie_data_offset);
976  	/* read high 32 bits */
977  	writel(reg_addr + 4, pcie_index_offset);
978  	readl(pcie_index_offset);
979  	if (pcie_index_hi != 0) {
980  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
981  		readl(pcie_index_hi_offset);
982  	}
983  	r |= ((u64)readl(pcie_data_offset) << 32);
984  
985  	/* clear the high bits */
986  	if (pcie_index_hi != 0) {
987  		writel(0, pcie_index_hi_offset);
988  		readl(pcie_index_hi_offset);
989  	}
990  
991  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
992  
993  	return r;
994  }
995  
996  /**
997   * amdgpu_device_indirect_wreg - write an indirect register address
998   *
999   * @adev: amdgpu_device pointer
1000   * @reg_addr: indirect register offset
1001   * @reg_data: indirect register data
1002   *
1003   */
amdgpu_device_indirect_wreg(struct amdgpu_device * adev,u32 reg_addr,u32 reg_data)1004  void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1005  				 u32 reg_addr, u32 reg_data)
1006  {
1007  	unsigned long flags, pcie_index, pcie_data;
1008  	void __iomem *pcie_index_offset;
1009  	void __iomem *pcie_data_offset;
1010  
1011  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1012  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1013  
1014  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1015  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1016  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1017  
1018  	writel(reg_addr, pcie_index_offset);
1019  	readl(pcie_index_offset);
1020  	writel(reg_data, pcie_data_offset);
1021  	readl(pcie_data_offset);
1022  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1023  }
1024  
amdgpu_device_indirect_wreg_ext(struct amdgpu_device * adev,u64 reg_addr,u32 reg_data)1025  void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1026  				     u64 reg_addr, u32 reg_data)
1027  {
1028  	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1029  	void __iomem *pcie_index_offset;
1030  	void __iomem *pcie_index_hi_offset;
1031  	void __iomem *pcie_data_offset;
1032  
1033  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1034  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1035  	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1036  		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1037  	else
1038  		pcie_index_hi = 0;
1039  
1040  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1041  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1042  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1043  	if (pcie_index_hi != 0)
1044  		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1045  				pcie_index_hi * 4;
1046  
1047  	writel(reg_addr, pcie_index_offset);
1048  	readl(pcie_index_offset);
1049  	if (pcie_index_hi != 0) {
1050  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1051  		readl(pcie_index_hi_offset);
1052  	}
1053  	writel(reg_data, pcie_data_offset);
1054  	readl(pcie_data_offset);
1055  
1056  	/* clear the high bits */
1057  	if (pcie_index_hi != 0) {
1058  		writel(0, pcie_index_hi_offset);
1059  		readl(pcie_index_hi_offset);
1060  	}
1061  
1062  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1063  }
1064  
1065  /**
1066   * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1067   *
1068   * @adev: amdgpu_device pointer
1069   * @reg_addr: indirect register offset
1070   * @reg_data: indirect register data
1071   *
1072   */
amdgpu_device_indirect_wreg64(struct amdgpu_device * adev,u32 reg_addr,u64 reg_data)1073  void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1074  				   u32 reg_addr, u64 reg_data)
1075  {
1076  	unsigned long flags, pcie_index, pcie_data;
1077  	void __iomem *pcie_index_offset;
1078  	void __iomem *pcie_data_offset;
1079  
1080  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1081  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1082  
1083  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1084  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1085  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1086  
1087  	/* write low 32 bits */
1088  	writel(reg_addr, pcie_index_offset);
1089  	readl(pcie_index_offset);
1090  	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1091  	readl(pcie_data_offset);
1092  	/* write high 32 bits */
1093  	writel(reg_addr + 4, pcie_index_offset);
1094  	readl(pcie_index_offset);
1095  	writel((u32)(reg_data >> 32), pcie_data_offset);
1096  	readl(pcie_data_offset);
1097  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1098  }
1099  
amdgpu_device_indirect_wreg64_ext(struct amdgpu_device * adev,u64 reg_addr,u64 reg_data)1100  void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1101  				   u64 reg_addr, u64 reg_data)
1102  {
1103  	unsigned long flags, pcie_index, pcie_data;
1104  	unsigned long pcie_index_hi = 0;
1105  	void __iomem *pcie_index_offset;
1106  	void __iomem *pcie_index_hi_offset;
1107  	void __iomem *pcie_data_offset;
1108  
1109  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1110  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1111  	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1112  		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1113  
1114  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1115  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1116  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1117  	if (pcie_index_hi != 0)
1118  		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1119  				pcie_index_hi * 4;
1120  
1121  	/* write low 32 bits */
1122  	writel(reg_addr, pcie_index_offset);
1123  	readl(pcie_index_offset);
1124  	if (pcie_index_hi != 0) {
1125  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1126  		readl(pcie_index_hi_offset);
1127  	}
1128  	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1129  	readl(pcie_data_offset);
1130  	/* write high 32 bits */
1131  	writel(reg_addr + 4, pcie_index_offset);
1132  	readl(pcie_index_offset);
1133  	if (pcie_index_hi != 0) {
1134  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1135  		readl(pcie_index_hi_offset);
1136  	}
1137  	writel((u32)(reg_data >> 32), pcie_data_offset);
1138  	readl(pcie_data_offset);
1139  
1140  	/* clear the high bits */
1141  	if (pcie_index_hi != 0) {
1142  		writel(0, pcie_index_hi_offset);
1143  		readl(pcie_index_hi_offset);
1144  	}
1145  
1146  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1147  }
1148  
1149  /**
1150   * amdgpu_device_get_rev_id - query device rev_id
1151   *
1152   * @adev: amdgpu_device pointer
1153   *
1154   * Return device rev_id
1155   */
amdgpu_device_get_rev_id(struct amdgpu_device * adev)1156  u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1157  {
1158  	return adev->nbio.funcs->get_rev_id(adev);
1159  }
1160  
1161  /**
1162   * amdgpu_invalid_rreg - dummy reg read function
1163   *
1164   * @adev: amdgpu_device pointer
1165   * @reg: offset of register
1166   *
1167   * Dummy register read function.  Used for register blocks
1168   * that certain asics don't have (all asics).
1169   * Returns the value in the register.
1170   */
amdgpu_invalid_rreg(struct amdgpu_device * adev,uint32_t reg)1171  static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1172  {
1173  	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1174  	BUG();
1175  	return 0;
1176  }
1177  
amdgpu_invalid_rreg_ext(struct amdgpu_device * adev,uint64_t reg)1178  static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1179  {
1180  	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1181  	BUG();
1182  	return 0;
1183  }
1184  
1185  /**
1186   * amdgpu_invalid_wreg - dummy reg write function
1187   *
1188   * @adev: amdgpu_device pointer
1189   * @reg: offset of register
1190   * @v: value to write to the register
1191   *
1192   * Dummy register read function.  Used for register blocks
1193   * that certain asics don't have (all asics).
1194   */
amdgpu_invalid_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v)1195  static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1196  {
1197  	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1198  		  reg, v);
1199  	BUG();
1200  }
1201  
amdgpu_invalid_wreg_ext(struct amdgpu_device * adev,uint64_t reg,uint32_t v)1202  static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1203  {
1204  	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1205  		  reg, v);
1206  	BUG();
1207  }
1208  
1209  /**
1210   * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1211   *
1212   * @adev: amdgpu_device pointer
1213   * @reg: offset of register
1214   *
1215   * Dummy register read function.  Used for register blocks
1216   * that certain asics don't have (all asics).
1217   * Returns the value in the register.
1218   */
amdgpu_invalid_rreg64(struct amdgpu_device * adev,uint32_t reg)1219  static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1220  {
1221  	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1222  	BUG();
1223  	return 0;
1224  }
1225  
amdgpu_invalid_rreg64_ext(struct amdgpu_device * adev,uint64_t reg)1226  static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1227  {
1228  	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1229  	BUG();
1230  	return 0;
1231  }
1232  
1233  /**
1234   * amdgpu_invalid_wreg64 - dummy reg write function
1235   *
1236   * @adev: amdgpu_device pointer
1237   * @reg: offset of register
1238   * @v: value to write to the register
1239   *
1240   * Dummy register read function.  Used for register blocks
1241   * that certain asics don't have (all asics).
1242   */
amdgpu_invalid_wreg64(struct amdgpu_device * adev,uint32_t reg,uint64_t v)1243  static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1244  {
1245  	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1246  		  reg, v);
1247  	BUG();
1248  }
1249  
amdgpu_invalid_wreg64_ext(struct amdgpu_device * adev,uint64_t reg,uint64_t v)1250  static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1251  {
1252  	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1253  		  reg, v);
1254  	BUG();
1255  }
1256  
1257  /**
1258   * amdgpu_block_invalid_rreg - dummy reg read function
1259   *
1260   * @adev: amdgpu_device pointer
1261   * @block: offset of instance
1262   * @reg: offset of register
1263   *
1264   * Dummy register read function.  Used for register blocks
1265   * that certain asics don't have (all asics).
1266   * Returns the value in the register.
1267   */
amdgpu_block_invalid_rreg(struct amdgpu_device * adev,uint32_t block,uint32_t reg)1268  static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1269  					  uint32_t block, uint32_t reg)
1270  {
1271  	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1272  		  reg, block);
1273  	BUG();
1274  	return 0;
1275  }
1276  
1277  /**
1278   * amdgpu_block_invalid_wreg - dummy reg write function
1279   *
1280   * @adev: amdgpu_device pointer
1281   * @block: offset of instance
1282   * @reg: offset of register
1283   * @v: value to write to the register
1284   *
1285   * Dummy register read function.  Used for register blocks
1286   * that certain asics don't have (all asics).
1287   */
amdgpu_block_invalid_wreg(struct amdgpu_device * adev,uint32_t block,uint32_t reg,uint32_t v)1288  static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1289  				      uint32_t block,
1290  				      uint32_t reg, uint32_t v)
1291  {
1292  	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1293  		  reg, block, v);
1294  	BUG();
1295  }
1296  
1297  /**
1298   * amdgpu_device_asic_init - Wrapper for atom asic_init
1299   *
1300   * @adev: amdgpu_device pointer
1301   *
1302   * Does any asic specific work and then calls atom asic init.
1303   */
amdgpu_device_asic_init(struct amdgpu_device * adev)1304  static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1305  {
1306  	int ret;
1307  
1308  	amdgpu_asic_pre_asic_init(adev);
1309  
1310  	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1311  	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1312  	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1313  		amdgpu_psp_wait_for_bootloader(adev);
1314  		ret = amdgpu_atomfirmware_asic_init(adev, true);
1315  		return ret;
1316  	} else {
1317  		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1318  	}
1319  
1320  	return 0;
1321  }
1322  
1323  /**
1324   * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1325   *
1326   * @adev: amdgpu_device pointer
1327   *
1328   * Allocates a scratch page of VRAM for use by various things in the
1329   * driver.
1330   */
amdgpu_device_mem_scratch_init(struct amdgpu_device * adev)1331  static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1332  {
1333  	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1334  				       AMDGPU_GEM_DOMAIN_VRAM |
1335  				       AMDGPU_GEM_DOMAIN_GTT,
1336  				       &adev->mem_scratch.robj,
1337  				       &adev->mem_scratch.gpu_addr,
1338  				       (void **)&adev->mem_scratch.ptr);
1339  }
1340  
1341  /**
1342   * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1343   *
1344   * @adev: amdgpu_device pointer
1345   *
1346   * Frees the VRAM scratch page.
1347   */
amdgpu_device_mem_scratch_fini(struct amdgpu_device * adev)1348  static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1349  {
1350  	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1351  }
1352  
1353  /**
1354   * amdgpu_device_program_register_sequence - program an array of registers.
1355   *
1356   * @adev: amdgpu_device pointer
1357   * @registers: pointer to the register array
1358   * @array_size: size of the register array
1359   *
1360   * Programs an array or registers with and or masks.
1361   * This is a helper for setting golden registers.
1362   */
amdgpu_device_program_register_sequence(struct amdgpu_device * adev,const u32 * registers,const u32 array_size)1363  void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1364  					     const u32 *registers,
1365  					     const u32 array_size)
1366  {
1367  	u32 tmp, reg, and_mask, or_mask;
1368  	int i;
1369  
1370  	if (array_size % 3)
1371  		return;
1372  
1373  	for (i = 0; i < array_size; i += 3) {
1374  		reg = registers[i + 0];
1375  		and_mask = registers[i + 1];
1376  		or_mask = registers[i + 2];
1377  
1378  		if (and_mask == 0xffffffff) {
1379  			tmp = or_mask;
1380  		} else {
1381  			tmp = RREG32(reg);
1382  			tmp &= ~and_mask;
1383  			if (adev->family >= AMDGPU_FAMILY_AI)
1384  				tmp |= (or_mask & and_mask);
1385  			else
1386  				tmp |= or_mask;
1387  		}
1388  		WREG32(reg, tmp);
1389  	}
1390  }
1391  
1392  /**
1393   * amdgpu_device_pci_config_reset - reset the GPU
1394   *
1395   * @adev: amdgpu_device pointer
1396   *
1397   * Resets the GPU using the pci config reset sequence.
1398   * Only applicable to asics prior to vega10.
1399   */
amdgpu_device_pci_config_reset(struct amdgpu_device * adev)1400  void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1401  {
1402  	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1403  }
1404  
1405  /**
1406   * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1407   *
1408   * @adev: amdgpu_device pointer
1409   *
1410   * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1411   */
amdgpu_device_pci_reset(struct amdgpu_device * adev)1412  int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1413  {
1414  	return pci_reset_function(adev->pdev);
1415  }
1416  
1417  /*
1418   * amdgpu_device_wb_*()
1419   * Writeback is the method by which the GPU updates special pages in memory
1420   * with the status of certain GPU events (fences, ring pointers,etc.).
1421   */
1422  
1423  /**
1424   * amdgpu_device_wb_fini - Disable Writeback and free memory
1425   *
1426   * @adev: amdgpu_device pointer
1427   *
1428   * Disables Writeback and frees the Writeback memory (all asics).
1429   * Used at driver shutdown.
1430   */
amdgpu_device_wb_fini(struct amdgpu_device * adev)1431  static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1432  {
1433  	if (adev->wb.wb_obj) {
1434  		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1435  				      &adev->wb.gpu_addr,
1436  				      (void **)&adev->wb.wb);
1437  		adev->wb.wb_obj = NULL;
1438  	}
1439  }
1440  
1441  /**
1442   * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1443   *
1444   * @adev: amdgpu_device pointer
1445   *
1446   * Initializes writeback and allocates writeback memory (all asics).
1447   * Used at driver startup.
1448   * Returns 0 on success or an -error on failure.
1449   */
amdgpu_device_wb_init(struct amdgpu_device * adev)1450  static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1451  {
1452  	int r;
1453  
1454  	if (adev->wb.wb_obj == NULL) {
1455  		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1456  		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1457  					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1458  					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1459  					    (void **)&adev->wb.wb);
1460  		if (r) {
1461  			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1462  			return r;
1463  		}
1464  
1465  		adev->wb.num_wb = AMDGPU_MAX_WB;
1466  		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1467  
1468  		/* clear wb memory */
1469  		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1470  	}
1471  
1472  	return 0;
1473  }
1474  
1475  /**
1476   * amdgpu_device_wb_get - Allocate a wb entry
1477   *
1478   * @adev: amdgpu_device pointer
1479   * @wb: wb index
1480   *
1481   * Allocate a wb slot for use by the driver (all asics).
1482   * Returns 0 on success or -EINVAL on failure.
1483   */
amdgpu_device_wb_get(struct amdgpu_device * adev,u32 * wb)1484  int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1485  {
1486  	unsigned long flags, offset;
1487  
1488  	spin_lock_irqsave(&adev->wb.lock, flags);
1489  	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1490  	if (offset < adev->wb.num_wb) {
1491  		__set_bit(offset, adev->wb.used);
1492  		spin_unlock_irqrestore(&adev->wb.lock, flags);
1493  		*wb = offset << 3; /* convert to dw offset */
1494  		return 0;
1495  	} else {
1496  		spin_unlock_irqrestore(&adev->wb.lock, flags);
1497  		return -EINVAL;
1498  	}
1499  }
1500  
1501  /**
1502   * amdgpu_device_wb_free - Free a wb entry
1503   *
1504   * @adev: amdgpu_device pointer
1505   * @wb: wb index
1506   *
1507   * Free a wb slot allocated for use by the driver (all asics)
1508   */
amdgpu_device_wb_free(struct amdgpu_device * adev,u32 wb)1509  void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1510  {
1511  	unsigned long flags;
1512  
1513  	wb >>= 3;
1514  	spin_lock_irqsave(&adev->wb.lock, flags);
1515  	if (wb < adev->wb.num_wb)
1516  		__clear_bit(wb, adev->wb.used);
1517  	spin_unlock_irqrestore(&adev->wb.lock, flags);
1518  }
1519  
1520  /**
1521   * amdgpu_device_resize_fb_bar - try to resize FB BAR
1522   *
1523   * @adev: amdgpu_device pointer
1524   *
1525   * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1526   * to fail, but if any of the BARs is not accessible after the size we abort
1527   * driver loading by returning -ENODEV.
1528   */
amdgpu_device_resize_fb_bar(struct amdgpu_device * adev)1529  int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1530  {
1531  	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1532  	struct pci_bus *root;
1533  	struct resource *res;
1534  	unsigned int i;
1535  	u16 cmd;
1536  	int r;
1537  
1538  	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1539  		return 0;
1540  
1541  	/* Bypass for VF */
1542  	if (amdgpu_sriov_vf(adev))
1543  		return 0;
1544  
1545  	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1546  	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1547  		DRM_WARN("System can't access extended configuration space, please check!!\n");
1548  
1549  	/* skip if the bios has already enabled large BAR */
1550  	if (adev->gmc.real_vram_size &&
1551  	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1552  		return 0;
1553  
1554  	/* Check if the root BUS has 64bit memory resources */
1555  	root = adev->pdev->bus;
1556  	while (root->parent)
1557  		root = root->parent;
1558  
1559  	pci_bus_for_each_resource(root, res, i) {
1560  		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1561  		    res->start > 0x100000000ull)
1562  			break;
1563  	}
1564  
1565  	/* Trying to resize is pointless without a root hub window above 4GB */
1566  	if (!res)
1567  		return 0;
1568  
1569  	/* Limit the BAR size to what is available */
1570  	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1571  			rbar_size);
1572  
1573  	/* Disable memory decoding while we change the BAR addresses and size */
1574  	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1575  	pci_write_config_word(adev->pdev, PCI_COMMAND,
1576  			      cmd & ~PCI_COMMAND_MEMORY);
1577  
1578  	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1579  	amdgpu_doorbell_fini(adev);
1580  	if (adev->asic_type >= CHIP_BONAIRE)
1581  		pci_release_resource(adev->pdev, 2);
1582  
1583  	pci_release_resource(adev->pdev, 0);
1584  
1585  	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1586  	if (r == -ENOSPC)
1587  		DRM_INFO("Not enough PCI address space for a large BAR.");
1588  	else if (r && r != -ENOTSUPP)
1589  		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1590  
1591  	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1592  
1593  	/* When the doorbell or fb BAR isn't available we have no chance of
1594  	 * using the device.
1595  	 */
1596  	r = amdgpu_doorbell_init(adev);
1597  	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1598  		return -ENODEV;
1599  
1600  	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1601  
1602  	return 0;
1603  }
1604  
amdgpu_device_read_bios(struct amdgpu_device * adev)1605  static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1606  {
1607  	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1608  		return false;
1609  
1610  	return true;
1611  }
1612  
1613  /*
1614   * GPU helpers function.
1615   */
1616  /**
1617   * amdgpu_device_need_post - check if the hw need post or not
1618   *
1619   * @adev: amdgpu_device pointer
1620   *
1621   * Check if the asic has been initialized (all asics) at driver startup
1622   * or post is needed if  hw reset is performed.
1623   * Returns true if need or false if not.
1624   */
amdgpu_device_need_post(struct amdgpu_device * adev)1625  bool amdgpu_device_need_post(struct amdgpu_device *adev)
1626  {
1627  	uint32_t reg;
1628  
1629  	if (amdgpu_sriov_vf(adev))
1630  		return false;
1631  
1632  	if (!amdgpu_device_read_bios(adev))
1633  		return false;
1634  
1635  	if (amdgpu_passthrough(adev)) {
1636  		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1637  		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1638  		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1639  		 * vpost executed for smc version below 22.15
1640  		 */
1641  		if (adev->asic_type == CHIP_FIJI) {
1642  			int err;
1643  			uint32_t fw_ver;
1644  
1645  			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1646  			/* force vPost if error occured */
1647  			if (err)
1648  				return true;
1649  
1650  			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1651  			release_firmware(adev->pm.fw);
1652  			if (fw_ver < 0x00160e00)
1653  				return true;
1654  		}
1655  	}
1656  
1657  	/* Don't post if we need to reset whole hive on init */
1658  	if (adev->gmc.xgmi.pending_reset)
1659  		return false;
1660  
1661  	if (adev->has_hw_reset) {
1662  		adev->has_hw_reset = false;
1663  		return true;
1664  	}
1665  
1666  	/* bios scratch used on CIK+ */
1667  	if (adev->asic_type >= CHIP_BONAIRE)
1668  		return amdgpu_atombios_scratch_need_asic_init(adev);
1669  
1670  	/* check MEM_SIZE for older asics */
1671  	reg = amdgpu_asic_get_config_memsize(adev);
1672  
1673  	if ((reg != 0) && (reg != 0xffffffff))
1674  		return false;
1675  
1676  	return true;
1677  }
1678  
1679  /*
1680   * Check whether seamless boot is supported.
1681   *
1682   * So far we only support seamless boot on DCE 3.0 or later.
1683   * If users report that it works on older ASICS as well, we may
1684   * loosen this.
1685   */
amdgpu_device_seamless_boot_supported(struct amdgpu_device * adev)1686  bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1687  {
1688  	switch (amdgpu_seamless) {
1689  	case -1:
1690  		break;
1691  	case 1:
1692  		return true;
1693  	case 0:
1694  		return false;
1695  	default:
1696  		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1697  			  amdgpu_seamless);
1698  		return false;
1699  	}
1700  
1701  	if (!(adev->flags & AMD_IS_APU))
1702  		return false;
1703  
1704  	if (adev->mman.keep_stolen_vga_memory)
1705  		return false;
1706  
1707  	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1708  }
1709  
1710  /*
1711   * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1712   * don't support dynamic speed switching. Until we have confirmation from Intel
1713   * that a specific host supports it, it's safer that we keep it disabled for all.
1714   *
1715   * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1716   * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1717   */
amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device * adev)1718  static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1719  {
1720  #if IS_ENABLED(CONFIG_X86)
1721  	struct cpuinfo_x86 *c = &cpu_data(0);
1722  
1723  	/* eGPU change speeds based on USB4 fabric conditions */
1724  	if (dev_is_removable(adev->dev))
1725  		return true;
1726  
1727  	if (c->x86_vendor == X86_VENDOR_INTEL)
1728  		return false;
1729  #endif
1730  	return true;
1731  }
1732  
1733  /**
1734   * amdgpu_device_should_use_aspm - check if the device should program ASPM
1735   *
1736   * @adev: amdgpu_device pointer
1737   *
1738   * Confirm whether the module parameter and pcie bridge agree that ASPM should
1739   * be set for this device.
1740   *
1741   * Returns true if it should be used or false if not.
1742   */
amdgpu_device_should_use_aspm(struct amdgpu_device * adev)1743  bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1744  {
1745  	switch (amdgpu_aspm) {
1746  	case -1:
1747  		break;
1748  	case 0:
1749  		return false;
1750  	case 1:
1751  		return true;
1752  	default:
1753  		return false;
1754  	}
1755  	if (adev->flags & AMD_IS_APU)
1756  		return false;
1757  	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1758  		return false;
1759  	return pcie_aspm_enabled(adev->pdev);
1760  }
1761  
1762  /* if we get transitioned to only one device, take VGA back */
1763  /**
1764   * amdgpu_device_vga_set_decode - enable/disable vga decode
1765   *
1766   * @pdev: PCI device pointer
1767   * @state: enable/disable vga decode
1768   *
1769   * Enable/disable vga decode (all asics).
1770   * Returns VGA resource flags.
1771   */
amdgpu_device_vga_set_decode(struct pci_dev * pdev,bool state)1772  static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1773  		bool state)
1774  {
1775  	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1776  
1777  	amdgpu_asic_set_vga_state(adev, state);
1778  	if (state)
1779  		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1780  		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1781  	else
1782  		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1783  }
1784  
1785  /**
1786   * amdgpu_device_check_block_size - validate the vm block size
1787   *
1788   * @adev: amdgpu_device pointer
1789   *
1790   * Validates the vm block size specified via module parameter.
1791   * The vm block size defines number of bits in page table versus page directory,
1792   * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1793   * page table and the remaining bits are in the page directory.
1794   */
amdgpu_device_check_block_size(struct amdgpu_device * adev)1795  static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1796  {
1797  	/* defines number of bits in page table versus page directory,
1798  	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1799  	 * page table and the remaining bits are in the page directory
1800  	 */
1801  	if (amdgpu_vm_block_size == -1)
1802  		return;
1803  
1804  	if (amdgpu_vm_block_size < 9) {
1805  		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1806  			 amdgpu_vm_block_size);
1807  		amdgpu_vm_block_size = -1;
1808  	}
1809  }
1810  
1811  /**
1812   * amdgpu_device_check_vm_size - validate the vm size
1813   *
1814   * @adev: amdgpu_device pointer
1815   *
1816   * Validates the vm size in GB specified via module parameter.
1817   * The VM size is the size of the GPU virtual memory space in GB.
1818   */
amdgpu_device_check_vm_size(struct amdgpu_device * adev)1819  static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1820  {
1821  	/* no need to check the default value */
1822  	if (amdgpu_vm_size == -1)
1823  		return;
1824  
1825  	if (amdgpu_vm_size < 1) {
1826  		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1827  			 amdgpu_vm_size);
1828  		amdgpu_vm_size = -1;
1829  	}
1830  }
1831  
amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device * adev)1832  static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1833  {
1834  	struct sysinfo si;
1835  	bool is_os_64 = (sizeof(void *) == 8);
1836  	uint64_t total_memory;
1837  	uint64_t dram_size_seven_GB = 0x1B8000000;
1838  	uint64_t dram_size_three_GB = 0xB8000000;
1839  
1840  	if (amdgpu_smu_memory_pool_size == 0)
1841  		return;
1842  
1843  	if (!is_os_64) {
1844  		DRM_WARN("Not 64-bit OS, feature not supported\n");
1845  		goto def_value;
1846  	}
1847  	si_meminfo(&si);
1848  	total_memory = (uint64_t)si.totalram * si.mem_unit;
1849  
1850  	if ((amdgpu_smu_memory_pool_size == 1) ||
1851  		(amdgpu_smu_memory_pool_size == 2)) {
1852  		if (total_memory < dram_size_three_GB)
1853  			goto def_value1;
1854  	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1855  		(amdgpu_smu_memory_pool_size == 8)) {
1856  		if (total_memory < dram_size_seven_GB)
1857  			goto def_value1;
1858  	} else {
1859  		DRM_WARN("Smu memory pool size not supported\n");
1860  		goto def_value;
1861  	}
1862  	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1863  
1864  	return;
1865  
1866  def_value1:
1867  	DRM_WARN("No enough system memory\n");
1868  def_value:
1869  	adev->pm.smu_prv_buffer_size = 0;
1870  }
1871  
amdgpu_device_init_apu_flags(struct amdgpu_device * adev)1872  static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1873  {
1874  	if (!(adev->flags & AMD_IS_APU) ||
1875  	    adev->asic_type < CHIP_RAVEN)
1876  		return 0;
1877  
1878  	switch (adev->asic_type) {
1879  	case CHIP_RAVEN:
1880  		if (adev->pdev->device == 0x15dd)
1881  			adev->apu_flags |= AMD_APU_IS_RAVEN;
1882  		if (adev->pdev->device == 0x15d8)
1883  			adev->apu_flags |= AMD_APU_IS_PICASSO;
1884  		break;
1885  	case CHIP_RENOIR:
1886  		if ((adev->pdev->device == 0x1636) ||
1887  		    (adev->pdev->device == 0x164c))
1888  			adev->apu_flags |= AMD_APU_IS_RENOIR;
1889  		else
1890  			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1891  		break;
1892  	case CHIP_VANGOGH:
1893  		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1894  		break;
1895  	case CHIP_YELLOW_CARP:
1896  		break;
1897  	case CHIP_CYAN_SKILLFISH:
1898  		if ((adev->pdev->device == 0x13FE) ||
1899  		    (adev->pdev->device == 0x143F))
1900  			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1901  		break;
1902  	default:
1903  		break;
1904  	}
1905  
1906  	return 0;
1907  }
1908  
1909  /**
1910   * amdgpu_device_check_arguments - validate module params
1911   *
1912   * @adev: amdgpu_device pointer
1913   *
1914   * Validates certain module parameters and updates
1915   * the associated values used by the driver (all asics).
1916   */
amdgpu_device_check_arguments(struct amdgpu_device * adev)1917  static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1918  {
1919  	int i;
1920  
1921  	if (amdgpu_sched_jobs < 4) {
1922  		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1923  			 amdgpu_sched_jobs);
1924  		amdgpu_sched_jobs = 4;
1925  	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1926  		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1927  			 amdgpu_sched_jobs);
1928  		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1929  	}
1930  
1931  	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1932  		/* gart size must be greater or equal to 32M */
1933  		dev_warn(adev->dev, "gart size (%d) too small\n",
1934  			 amdgpu_gart_size);
1935  		amdgpu_gart_size = -1;
1936  	}
1937  
1938  	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1939  		/* gtt size must be greater or equal to 32M */
1940  		dev_warn(adev->dev, "gtt size (%d) too small\n",
1941  				 amdgpu_gtt_size);
1942  		amdgpu_gtt_size = -1;
1943  	}
1944  
1945  	/* valid range is between 4 and 9 inclusive */
1946  	if (amdgpu_vm_fragment_size != -1 &&
1947  	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1948  		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1949  		amdgpu_vm_fragment_size = -1;
1950  	}
1951  
1952  	if (amdgpu_sched_hw_submission < 2) {
1953  		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1954  			 amdgpu_sched_hw_submission);
1955  		amdgpu_sched_hw_submission = 2;
1956  	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1957  		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1958  			 amdgpu_sched_hw_submission);
1959  		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1960  	}
1961  
1962  	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1963  		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1964  		amdgpu_reset_method = -1;
1965  	}
1966  
1967  	amdgpu_device_check_smu_prv_buffer_size(adev);
1968  
1969  	amdgpu_device_check_vm_size(adev);
1970  
1971  	amdgpu_device_check_block_size(adev);
1972  
1973  	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1974  
1975  	for (i = 0; i < MAX_XCP; i++)
1976  		adev->enforce_isolation[i] = !!enforce_isolation;
1977  
1978  	return 0;
1979  }
1980  
1981  /**
1982   * amdgpu_switcheroo_set_state - set switcheroo state
1983   *
1984   * @pdev: pci dev pointer
1985   * @state: vga_switcheroo state
1986   *
1987   * Callback for the switcheroo driver.  Suspends or resumes
1988   * the asics before or after it is powered up using ACPI methods.
1989   */
amdgpu_switcheroo_set_state(struct pci_dev * pdev,enum vga_switcheroo_state state)1990  static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1991  					enum vga_switcheroo_state state)
1992  {
1993  	struct drm_device *dev = pci_get_drvdata(pdev);
1994  	int r;
1995  
1996  	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1997  		return;
1998  
1999  	if (state == VGA_SWITCHEROO_ON) {
2000  		pr_info("switched on\n");
2001  		/* don't suspend or resume card normally */
2002  		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2003  
2004  		pci_set_power_state(pdev, PCI_D0);
2005  		amdgpu_device_load_pci_state(pdev);
2006  		r = pci_enable_device(pdev);
2007  		if (r)
2008  			DRM_WARN("pci_enable_device failed (%d)\n", r);
2009  		amdgpu_device_resume(dev, true);
2010  
2011  		dev->switch_power_state = DRM_SWITCH_POWER_ON;
2012  	} else {
2013  		pr_info("switched off\n");
2014  		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2015  		amdgpu_device_prepare(dev);
2016  		amdgpu_device_suspend(dev, true);
2017  		amdgpu_device_cache_pci_state(pdev);
2018  		/* Shut down the device */
2019  		pci_disable_device(pdev);
2020  		pci_set_power_state(pdev, PCI_D3cold);
2021  		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2022  	}
2023  }
2024  
2025  /**
2026   * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2027   *
2028   * @pdev: pci dev pointer
2029   *
2030   * Callback for the switcheroo driver.  Check of the switcheroo
2031   * state can be changed.
2032   * Returns true if the state can be changed, false if not.
2033   */
amdgpu_switcheroo_can_switch(struct pci_dev * pdev)2034  static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2035  {
2036  	struct drm_device *dev = pci_get_drvdata(pdev);
2037  
2038         /*
2039  	* FIXME: open_count is protected by drm_global_mutex but that would lead to
2040  	* locking inversion with the driver load path. And the access here is
2041  	* completely racy anyway. So don't bother with locking for now.
2042  	*/
2043  	return atomic_read(&dev->open_count) == 0;
2044  }
2045  
2046  static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2047  	.set_gpu_state = amdgpu_switcheroo_set_state,
2048  	.reprobe = NULL,
2049  	.can_switch = amdgpu_switcheroo_can_switch,
2050  };
2051  
2052  /**
2053   * amdgpu_device_ip_set_clockgating_state - set the CG state
2054   *
2055   * @dev: amdgpu_device pointer
2056   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2057   * @state: clockgating state (gate or ungate)
2058   *
2059   * Sets the requested clockgating state for all instances of
2060   * the hardware IP specified.
2061   * Returns the error code from the last instance.
2062   */
amdgpu_device_ip_set_clockgating_state(void * dev,enum amd_ip_block_type block_type,enum amd_clockgating_state state)2063  int amdgpu_device_ip_set_clockgating_state(void *dev,
2064  					   enum amd_ip_block_type block_type,
2065  					   enum amd_clockgating_state state)
2066  {
2067  	struct amdgpu_device *adev = dev;
2068  	int i, r = 0;
2069  
2070  	for (i = 0; i < adev->num_ip_blocks; i++) {
2071  		if (!adev->ip_blocks[i].status.valid)
2072  			continue;
2073  		if (adev->ip_blocks[i].version->type != block_type)
2074  			continue;
2075  		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2076  			continue;
2077  		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2078  			(void *)adev, state);
2079  		if (r)
2080  			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2081  				  adev->ip_blocks[i].version->funcs->name, r);
2082  	}
2083  	return r;
2084  }
2085  
2086  /**
2087   * amdgpu_device_ip_set_powergating_state - set the PG state
2088   *
2089   * @dev: amdgpu_device pointer
2090   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2091   * @state: powergating state (gate or ungate)
2092   *
2093   * Sets the requested powergating state for all instances of
2094   * the hardware IP specified.
2095   * Returns the error code from the last instance.
2096   */
amdgpu_device_ip_set_powergating_state(void * dev,enum amd_ip_block_type block_type,enum amd_powergating_state state)2097  int amdgpu_device_ip_set_powergating_state(void *dev,
2098  					   enum amd_ip_block_type block_type,
2099  					   enum amd_powergating_state state)
2100  {
2101  	struct amdgpu_device *adev = dev;
2102  	int i, r = 0;
2103  
2104  	for (i = 0; i < adev->num_ip_blocks; i++) {
2105  		if (!adev->ip_blocks[i].status.valid)
2106  			continue;
2107  		if (adev->ip_blocks[i].version->type != block_type)
2108  			continue;
2109  		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2110  			continue;
2111  		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2112  			(void *)adev, state);
2113  		if (r)
2114  			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2115  				  adev->ip_blocks[i].version->funcs->name, r);
2116  	}
2117  	return r;
2118  }
2119  
2120  /**
2121   * amdgpu_device_ip_get_clockgating_state - get the CG state
2122   *
2123   * @adev: amdgpu_device pointer
2124   * @flags: clockgating feature flags
2125   *
2126   * Walks the list of IPs on the device and updates the clockgating
2127   * flags for each IP.
2128   * Updates @flags with the feature flags for each hardware IP where
2129   * clockgating is enabled.
2130   */
amdgpu_device_ip_get_clockgating_state(struct amdgpu_device * adev,u64 * flags)2131  void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2132  					    u64 *flags)
2133  {
2134  	int i;
2135  
2136  	for (i = 0; i < adev->num_ip_blocks; i++) {
2137  		if (!adev->ip_blocks[i].status.valid)
2138  			continue;
2139  		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2140  			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2141  	}
2142  }
2143  
2144  /**
2145   * amdgpu_device_ip_wait_for_idle - wait for idle
2146   *
2147   * @adev: amdgpu_device pointer
2148   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2149   *
2150   * Waits for the request hardware IP to be idle.
2151   * Returns 0 for success or a negative error code on failure.
2152   */
amdgpu_device_ip_wait_for_idle(struct amdgpu_device * adev,enum amd_ip_block_type block_type)2153  int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2154  				   enum amd_ip_block_type block_type)
2155  {
2156  	int i, r;
2157  
2158  	for (i = 0; i < adev->num_ip_blocks; i++) {
2159  		if (!adev->ip_blocks[i].status.valid)
2160  			continue;
2161  		if (adev->ip_blocks[i].version->type == block_type) {
2162  			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2163  			if (r)
2164  				return r;
2165  			break;
2166  		}
2167  	}
2168  	return 0;
2169  
2170  }
2171  
2172  /**
2173   * amdgpu_device_ip_is_idle - is the hardware IP idle
2174   *
2175   * @adev: amdgpu_device pointer
2176   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2177   *
2178   * Check if the hardware IP is idle or not.
2179   * Returns true if it the IP is idle, false if not.
2180   */
amdgpu_device_ip_is_idle(struct amdgpu_device * adev,enum amd_ip_block_type block_type)2181  bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2182  			      enum amd_ip_block_type block_type)
2183  {
2184  	int i;
2185  
2186  	for (i = 0; i < adev->num_ip_blocks; i++) {
2187  		if (!adev->ip_blocks[i].status.valid)
2188  			continue;
2189  		if (adev->ip_blocks[i].version->type == block_type)
2190  			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2191  	}
2192  	return true;
2193  
2194  }
2195  
2196  /**
2197   * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2198   *
2199   * @adev: amdgpu_device pointer
2200   * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2201   *
2202   * Returns a pointer to the hardware IP block structure
2203   * if it exists for the asic, otherwise NULL.
2204   */
2205  struct amdgpu_ip_block *
amdgpu_device_ip_get_ip_block(struct amdgpu_device * adev,enum amd_ip_block_type type)2206  amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2207  			      enum amd_ip_block_type type)
2208  {
2209  	int i;
2210  
2211  	for (i = 0; i < adev->num_ip_blocks; i++)
2212  		if (adev->ip_blocks[i].version->type == type)
2213  			return &adev->ip_blocks[i];
2214  
2215  	return NULL;
2216  }
2217  
2218  /**
2219   * amdgpu_device_ip_block_version_cmp
2220   *
2221   * @adev: amdgpu_device pointer
2222   * @type: enum amd_ip_block_type
2223   * @major: major version
2224   * @minor: minor version
2225   *
2226   * return 0 if equal or greater
2227   * return 1 if smaller or the ip_block doesn't exist
2228   */
amdgpu_device_ip_block_version_cmp(struct amdgpu_device * adev,enum amd_ip_block_type type,u32 major,u32 minor)2229  int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2230  				       enum amd_ip_block_type type,
2231  				       u32 major, u32 minor)
2232  {
2233  	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2234  
2235  	if (ip_block && ((ip_block->version->major > major) ||
2236  			((ip_block->version->major == major) &&
2237  			(ip_block->version->minor >= minor))))
2238  		return 0;
2239  
2240  	return 1;
2241  }
2242  
2243  /**
2244   * amdgpu_device_ip_block_add
2245   *
2246   * @adev: amdgpu_device pointer
2247   * @ip_block_version: pointer to the IP to add
2248   *
2249   * Adds the IP block driver information to the collection of IPs
2250   * on the asic.
2251   */
amdgpu_device_ip_block_add(struct amdgpu_device * adev,const struct amdgpu_ip_block_version * ip_block_version)2252  int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2253  			       const struct amdgpu_ip_block_version *ip_block_version)
2254  {
2255  	if (!ip_block_version)
2256  		return -EINVAL;
2257  
2258  	switch (ip_block_version->type) {
2259  	case AMD_IP_BLOCK_TYPE_VCN:
2260  		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2261  			return 0;
2262  		break;
2263  	case AMD_IP_BLOCK_TYPE_JPEG:
2264  		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2265  			return 0;
2266  		break;
2267  	default:
2268  		break;
2269  	}
2270  
2271  	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2272  		  ip_block_version->funcs->name);
2273  
2274  	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2275  
2276  	return 0;
2277  }
2278  
2279  /**
2280   * amdgpu_device_enable_virtual_display - enable virtual display feature
2281   *
2282   * @adev: amdgpu_device pointer
2283   *
2284   * Enabled the virtual display feature if the user has enabled it via
2285   * the module parameter virtual_display.  This feature provides a virtual
2286   * display hardware on headless boards or in virtualized environments.
2287   * This function parses and validates the configuration string specified by
2288   * the user and configues the virtual display configuration (number of
2289   * virtual connectors, crtcs, etc.) specified.
2290   */
amdgpu_device_enable_virtual_display(struct amdgpu_device * adev)2291  static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2292  {
2293  	adev->enable_virtual_display = false;
2294  
2295  	if (amdgpu_virtual_display) {
2296  		const char *pci_address_name = pci_name(adev->pdev);
2297  		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2298  
2299  		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2300  		pciaddstr_tmp = pciaddstr;
2301  		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2302  			pciaddname = strsep(&pciaddname_tmp, ",");
2303  			if (!strcmp("all", pciaddname)
2304  			    || !strcmp(pci_address_name, pciaddname)) {
2305  				long num_crtc;
2306  				int res = -1;
2307  
2308  				adev->enable_virtual_display = true;
2309  
2310  				if (pciaddname_tmp)
2311  					res = kstrtol(pciaddname_tmp, 10,
2312  						      &num_crtc);
2313  
2314  				if (!res) {
2315  					if (num_crtc < 1)
2316  						num_crtc = 1;
2317  					if (num_crtc > 6)
2318  						num_crtc = 6;
2319  					adev->mode_info.num_crtc = num_crtc;
2320  				} else {
2321  					adev->mode_info.num_crtc = 1;
2322  				}
2323  				break;
2324  			}
2325  		}
2326  
2327  		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2328  			 amdgpu_virtual_display, pci_address_name,
2329  			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2330  
2331  		kfree(pciaddstr);
2332  	}
2333  }
2334  
amdgpu_device_set_sriov_virtual_display(struct amdgpu_device * adev)2335  void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2336  {
2337  	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2338  		adev->mode_info.num_crtc = 1;
2339  		adev->enable_virtual_display = true;
2340  		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2341  			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2342  	}
2343  }
2344  
2345  /**
2346   * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2347   *
2348   * @adev: amdgpu_device pointer
2349   *
2350   * Parses the asic configuration parameters specified in the gpu info
2351   * firmware and makes them availale to the driver for use in configuring
2352   * the asic.
2353   * Returns 0 on success, -EINVAL on failure.
2354   */
amdgpu_device_parse_gpu_info_fw(struct amdgpu_device * adev)2355  static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2356  {
2357  	const char *chip_name;
2358  	int err;
2359  	const struct gpu_info_firmware_header_v1_0 *hdr;
2360  
2361  	adev->firmware.gpu_info_fw = NULL;
2362  
2363  	if (adev->mman.discovery_bin)
2364  		return 0;
2365  
2366  	switch (adev->asic_type) {
2367  	default:
2368  		return 0;
2369  	case CHIP_VEGA10:
2370  		chip_name = "vega10";
2371  		break;
2372  	case CHIP_VEGA12:
2373  		chip_name = "vega12";
2374  		break;
2375  	case CHIP_RAVEN:
2376  		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2377  			chip_name = "raven2";
2378  		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2379  			chip_name = "picasso";
2380  		else
2381  			chip_name = "raven";
2382  		break;
2383  	case CHIP_ARCTURUS:
2384  		chip_name = "arcturus";
2385  		break;
2386  	case CHIP_NAVI12:
2387  		chip_name = "navi12";
2388  		break;
2389  	}
2390  
2391  	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
2392  				   "amdgpu/%s_gpu_info.bin", chip_name);
2393  	if (err) {
2394  		dev_err(adev->dev,
2395  			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2396  			chip_name);
2397  		goto out;
2398  	}
2399  
2400  	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2401  	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2402  
2403  	switch (hdr->version_major) {
2404  	case 1:
2405  	{
2406  		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2407  			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2408  								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2409  
2410  		/*
2411  		 * Should be droped when DAL no longer needs it.
2412  		 */
2413  		if (adev->asic_type == CHIP_NAVI12)
2414  			goto parse_soc_bounding_box;
2415  
2416  		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2417  		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2418  		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2419  		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2420  		adev->gfx.config.max_texture_channel_caches =
2421  			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2422  		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2423  		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2424  		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2425  		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2426  		adev->gfx.config.double_offchip_lds_buf =
2427  			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2428  		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2429  		adev->gfx.cu_info.max_waves_per_simd =
2430  			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2431  		adev->gfx.cu_info.max_scratch_slots_per_cu =
2432  			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2433  		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2434  		if (hdr->version_minor >= 1) {
2435  			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2436  				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2437  									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2438  			adev->gfx.config.num_sc_per_sh =
2439  				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2440  			adev->gfx.config.num_packer_per_sc =
2441  				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2442  		}
2443  
2444  parse_soc_bounding_box:
2445  		/*
2446  		 * soc bounding box info is not integrated in disocovery table,
2447  		 * we always need to parse it from gpu info firmware if needed.
2448  		 */
2449  		if (hdr->version_minor == 2) {
2450  			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2451  				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2452  									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2453  			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2454  		}
2455  		break;
2456  	}
2457  	default:
2458  		dev_err(adev->dev,
2459  			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2460  		err = -EINVAL;
2461  		goto out;
2462  	}
2463  out:
2464  	return err;
2465  }
2466  
2467  /**
2468   * amdgpu_device_ip_early_init - run early init for hardware IPs
2469   *
2470   * @adev: amdgpu_device pointer
2471   *
2472   * Early initialization pass for hardware IPs.  The hardware IPs that make
2473   * up each asic are discovered each IP's early_init callback is run.  This
2474   * is the first stage in initializing the asic.
2475   * Returns 0 on success, negative error code on failure.
2476   */
amdgpu_device_ip_early_init(struct amdgpu_device * adev)2477  static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2478  {
2479  	struct amdgpu_ip_block *ip_block;
2480  	struct pci_dev *parent;
2481  	int i, r;
2482  	bool total;
2483  
2484  	amdgpu_device_enable_virtual_display(adev);
2485  
2486  	if (amdgpu_sriov_vf(adev)) {
2487  		r = amdgpu_virt_request_full_gpu(adev, true);
2488  		if (r)
2489  			return r;
2490  	}
2491  
2492  	switch (adev->asic_type) {
2493  #ifdef CONFIG_DRM_AMDGPU_SI
2494  	case CHIP_VERDE:
2495  	case CHIP_TAHITI:
2496  	case CHIP_PITCAIRN:
2497  	case CHIP_OLAND:
2498  	case CHIP_HAINAN:
2499  		adev->family = AMDGPU_FAMILY_SI;
2500  		r = si_set_ip_blocks(adev);
2501  		if (r)
2502  			return r;
2503  		break;
2504  #endif
2505  #ifdef CONFIG_DRM_AMDGPU_CIK
2506  	case CHIP_BONAIRE:
2507  	case CHIP_HAWAII:
2508  	case CHIP_KAVERI:
2509  	case CHIP_KABINI:
2510  	case CHIP_MULLINS:
2511  		if (adev->flags & AMD_IS_APU)
2512  			adev->family = AMDGPU_FAMILY_KV;
2513  		else
2514  			adev->family = AMDGPU_FAMILY_CI;
2515  
2516  		r = cik_set_ip_blocks(adev);
2517  		if (r)
2518  			return r;
2519  		break;
2520  #endif
2521  	case CHIP_TOPAZ:
2522  	case CHIP_TONGA:
2523  	case CHIP_FIJI:
2524  	case CHIP_POLARIS10:
2525  	case CHIP_POLARIS11:
2526  	case CHIP_POLARIS12:
2527  	case CHIP_VEGAM:
2528  	case CHIP_CARRIZO:
2529  	case CHIP_STONEY:
2530  		if (adev->flags & AMD_IS_APU)
2531  			adev->family = AMDGPU_FAMILY_CZ;
2532  		else
2533  			adev->family = AMDGPU_FAMILY_VI;
2534  
2535  		r = vi_set_ip_blocks(adev);
2536  		if (r)
2537  			return r;
2538  		break;
2539  	default:
2540  		r = amdgpu_discovery_set_ip_blocks(adev);
2541  		if (r)
2542  			return r;
2543  		break;
2544  	}
2545  
2546  	if (amdgpu_has_atpx() &&
2547  	    (amdgpu_is_atpx_hybrid() ||
2548  	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2549  	    ((adev->flags & AMD_IS_APU) == 0) &&
2550  	    !dev_is_removable(&adev->pdev->dev))
2551  		adev->flags |= AMD_IS_PX;
2552  
2553  	if (!(adev->flags & AMD_IS_APU)) {
2554  		parent = pcie_find_root_port(adev->pdev);
2555  		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2556  	}
2557  
2558  
2559  	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2560  	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2561  		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2562  	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2563  		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2564  	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2565  		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2566  
2567  	total = true;
2568  	for (i = 0; i < adev->num_ip_blocks; i++) {
2569  		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2570  			DRM_WARN("disabled ip block: %d <%s>\n",
2571  				  i, adev->ip_blocks[i].version->funcs->name);
2572  			adev->ip_blocks[i].status.valid = false;
2573  		} else {
2574  			if (adev->ip_blocks[i].version->funcs->early_init) {
2575  				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2576  				if (r == -ENOENT) {
2577  					adev->ip_blocks[i].status.valid = false;
2578  				} else if (r) {
2579  					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2580  						  adev->ip_blocks[i].version->funcs->name, r);
2581  					total = false;
2582  				} else {
2583  					adev->ip_blocks[i].status.valid = true;
2584  				}
2585  			} else {
2586  				adev->ip_blocks[i].status.valid = true;
2587  			}
2588  		}
2589  		/* get the vbios after the asic_funcs are set up */
2590  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2591  			r = amdgpu_device_parse_gpu_info_fw(adev);
2592  			if (r)
2593  				return r;
2594  
2595  			/* Read BIOS */
2596  			if (amdgpu_device_read_bios(adev)) {
2597  				if (!amdgpu_get_bios(adev))
2598  					return -EINVAL;
2599  
2600  				r = amdgpu_atombios_init(adev);
2601  				if (r) {
2602  					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2603  					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2604  					return r;
2605  				}
2606  			}
2607  
2608  			/*get pf2vf msg info at it's earliest time*/
2609  			if (amdgpu_sriov_vf(adev))
2610  				amdgpu_virt_init_data_exchange(adev);
2611  
2612  		}
2613  	}
2614  	if (!total)
2615  		return -ENODEV;
2616  
2617  	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2618  	if (ip_block->status.valid != false)
2619  		amdgpu_amdkfd_device_probe(adev);
2620  
2621  	adev->cg_flags &= amdgpu_cg_mask;
2622  	adev->pg_flags &= amdgpu_pg_mask;
2623  
2624  	return 0;
2625  }
2626  
amdgpu_device_ip_hw_init_phase1(struct amdgpu_device * adev)2627  static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2628  {
2629  	int i, r;
2630  
2631  	for (i = 0; i < adev->num_ip_blocks; i++) {
2632  		if (!adev->ip_blocks[i].status.sw)
2633  			continue;
2634  		if (adev->ip_blocks[i].status.hw)
2635  			continue;
2636  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2637  		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2638  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2639  			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2640  			if (r) {
2641  				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2642  					  adev->ip_blocks[i].version->funcs->name, r);
2643  				return r;
2644  			}
2645  			adev->ip_blocks[i].status.hw = true;
2646  		}
2647  	}
2648  
2649  	return 0;
2650  }
2651  
amdgpu_device_ip_hw_init_phase2(struct amdgpu_device * adev)2652  static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2653  {
2654  	int i, r;
2655  
2656  	for (i = 0; i < adev->num_ip_blocks; i++) {
2657  		if (!adev->ip_blocks[i].status.sw)
2658  			continue;
2659  		if (adev->ip_blocks[i].status.hw)
2660  			continue;
2661  		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2662  		if (r) {
2663  			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2664  				  adev->ip_blocks[i].version->funcs->name, r);
2665  			return r;
2666  		}
2667  		adev->ip_blocks[i].status.hw = true;
2668  	}
2669  
2670  	return 0;
2671  }
2672  
amdgpu_device_fw_loading(struct amdgpu_device * adev)2673  static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2674  {
2675  	int r = 0;
2676  	int i;
2677  	uint32_t smu_version;
2678  
2679  	if (adev->asic_type >= CHIP_VEGA10) {
2680  		for (i = 0; i < adev->num_ip_blocks; i++) {
2681  			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2682  				continue;
2683  
2684  			if (!adev->ip_blocks[i].status.sw)
2685  				continue;
2686  
2687  			/* no need to do the fw loading again if already done*/
2688  			if (adev->ip_blocks[i].status.hw == true)
2689  				break;
2690  
2691  			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2692  				r = adev->ip_blocks[i].version->funcs->resume(adev);
2693  				if (r) {
2694  					DRM_ERROR("resume of IP block <%s> failed %d\n",
2695  							  adev->ip_blocks[i].version->funcs->name, r);
2696  					return r;
2697  				}
2698  			} else {
2699  				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2700  				if (r) {
2701  					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2702  							  adev->ip_blocks[i].version->funcs->name, r);
2703  					return r;
2704  				}
2705  			}
2706  
2707  			adev->ip_blocks[i].status.hw = true;
2708  			break;
2709  		}
2710  	}
2711  
2712  	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2713  		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2714  
2715  	return r;
2716  }
2717  
amdgpu_device_init_schedulers(struct amdgpu_device * adev)2718  static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2719  {
2720  	long timeout;
2721  	int r, i;
2722  
2723  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2724  		struct amdgpu_ring *ring = adev->rings[i];
2725  
2726  		/* No need to setup the GPU scheduler for rings that don't need it */
2727  		if (!ring || ring->no_scheduler)
2728  			continue;
2729  
2730  		switch (ring->funcs->type) {
2731  		case AMDGPU_RING_TYPE_GFX:
2732  			timeout = adev->gfx_timeout;
2733  			break;
2734  		case AMDGPU_RING_TYPE_COMPUTE:
2735  			timeout = adev->compute_timeout;
2736  			break;
2737  		case AMDGPU_RING_TYPE_SDMA:
2738  			timeout = adev->sdma_timeout;
2739  			break;
2740  		default:
2741  			timeout = adev->video_timeout;
2742  			break;
2743  		}
2744  
2745  		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2746  				   DRM_SCHED_PRIORITY_COUNT,
2747  				   ring->num_hw_submission, 0,
2748  				   timeout, adev->reset_domain->wq,
2749  				   ring->sched_score, ring->name,
2750  				   adev->dev);
2751  		if (r) {
2752  			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2753  				  ring->name);
2754  			return r;
2755  		}
2756  		r = amdgpu_uvd_entity_init(adev, ring);
2757  		if (r) {
2758  			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2759  				  ring->name);
2760  			return r;
2761  		}
2762  		r = amdgpu_vce_entity_init(adev, ring);
2763  		if (r) {
2764  			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2765  				  ring->name);
2766  			return r;
2767  		}
2768  	}
2769  
2770  	amdgpu_xcp_update_partition_sched_list(adev);
2771  
2772  	return 0;
2773  }
2774  
2775  
2776  /**
2777   * amdgpu_device_ip_init - run init for hardware IPs
2778   *
2779   * @adev: amdgpu_device pointer
2780   *
2781   * Main initialization pass for hardware IPs.  The list of all the hardware
2782   * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2783   * are run.  sw_init initializes the software state associated with each IP
2784   * and hw_init initializes the hardware associated with each IP.
2785   * Returns 0 on success, negative error code on failure.
2786   */
amdgpu_device_ip_init(struct amdgpu_device * adev)2787  static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2788  {
2789  	int i, r;
2790  
2791  	r = amdgpu_ras_init(adev);
2792  	if (r)
2793  		return r;
2794  
2795  	for (i = 0; i < adev->num_ip_blocks; i++) {
2796  		if (!adev->ip_blocks[i].status.valid)
2797  			continue;
2798  		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2799  		if (r) {
2800  			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2801  				  adev->ip_blocks[i].version->funcs->name, r);
2802  			goto init_failed;
2803  		}
2804  		adev->ip_blocks[i].status.sw = true;
2805  
2806  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2807  			/* need to do common hw init early so everything is set up for gmc */
2808  			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2809  			if (r) {
2810  				DRM_ERROR("hw_init %d failed %d\n", i, r);
2811  				goto init_failed;
2812  			}
2813  			adev->ip_blocks[i].status.hw = true;
2814  		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2815  			/* need to do gmc hw init early so we can allocate gpu mem */
2816  			/* Try to reserve bad pages early */
2817  			if (amdgpu_sriov_vf(adev))
2818  				amdgpu_virt_exchange_data(adev);
2819  
2820  			r = amdgpu_device_mem_scratch_init(adev);
2821  			if (r) {
2822  				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2823  				goto init_failed;
2824  			}
2825  			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2826  			if (r) {
2827  				DRM_ERROR("hw_init %d failed %d\n", i, r);
2828  				goto init_failed;
2829  			}
2830  			r = amdgpu_device_wb_init(adev);
2831  			if (r) {
2832  				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2833  				goto init_failed;
2834  			}
2835  			adev->ip_blocks[i].status.hw = true;
2836  
2837  			/* right after GMC hw init, we create CSA */
2838  			if (adev->gfx.mcbp) {
2839  				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2840  							       AMDGPU_GEM_DOMAIN_VRAM |
2841  							       AMDGPU_GEM_DOMAIN_GTT,
2842  							       AMDGPU_CSA_SIZE);
2843  				if (r) {
2844  					DRM_ERROR("allocate CSA failed %d\n", r);
2845  					goto init_failed;
2846  				}
2847  			}
2848  
2849  			r = amdgpu_seq64_init(adev);
2850  			if (r) {
2851  				DRM_ERROR("allocate seq64 failed %d\n", r);
2852  				goto init_failed;
2853  			}
2854  		}
2855  	}
2856  
2857  	if (amdgpu_sriov_vf(adev))
2858  		amdgpu_virt_init_data_exchange(adev);
2859  
2860  	r = amdgpu_ib_pool_init(adev);
2861  	if (r) {
2862  		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2863  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2864  		goto init_failed;
2865  	}
2866  
2867  	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2868  	if (r)
2869  		goto init_failed;
2870  
2871  	r = amdgpu_device_ip_hw_init_phase1(adev);
2872  	if (r)
2873  		goto init_failed;
2874  
2875  	r = amdgpu_device_fw_loading(adev);
2876  	if (r)
2877  		goto init_failed;
2878  
2879  	r = amdgpu_device_ip_hw_init_phase2(adev);
2880  	if (r)
2881  		goto init_failed;
2882  
2883  	/*
2884  	 * retired pages will be loaded from eeprom and reserved here,
2885  	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2886  	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2887  	 * for I2C communication which only true at this point.
2888  	 *
2889  	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2890  	 * failure from bad gpu situation and stop amdgpu init process
2891  	 * accordingly. For other failed cases, it will still release all
2892  	 * the resource and print error message, rather than returning one
2893  	 * negative value to upper level.
2894  	 *
2895  	 * Note: theoretically, this should be called before all vram allocations
2896  	 * to protect retired page from abusing
2897  	 */
2898  	r = amdgpu_ras_recovery_init(adev);
2899  	if (r)
2900  		goto init_failed;
2901  
2902  	/**
2903  	 * In case of XGMI grab extra reference for reset domain for this device
2904  	 */
2905  	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2906  		if (amdgpu_xgmi_add_device(adev) == 0) {
2907  			if (!amdgpu_sriov_vf(adev)) {
2908  				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2909  
2910  				if (WARN_ON(!hive)) {
2911  					r = -ENOENT;
2912  					goto init_failed;
2913  				}
2914  
2915  				if (!hive->reset_domain ||
2916  				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2917  					r = -ENOENT;
2918  					amdgpu_put_xgmi_hive(hive);
2919  					goto init_failed;
2920  				}
2921  
2922  				/* Drop the early temporary reset domain we created for device */
2923  				amdgpu_reset_put_reset_domain(adev->reset_domain);
2924  				adev->reset_domain = hive->reset_domain;
2925  				amdgpu_put_xgmi_hive(hive);
2926  			}
2927  		}
2928  	}
2929  
2930  	r = amdgpu_device_init_schedulers(adev);
2931  	if (r)
2932  		goto init_failed;
2933  
2934  	if (adev->mman.buffer_funcs_ring->sched.ready)
2935  		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2936  
2937  	/* Don't init kfd if whole hive need to be reset during init */
2938  	if (!adev->gmc.xgmi.pending_reset) {
2939  		kgd2kfd_init_zone_device(adev);
2940  		amdgpu_amdkfd_device_init(adev);
2941  	}
2942  
2943  	amdgpu_fru_get_product_info(adev);
2944  
2945  init_failed:
2946  
2947  	return r;
2948  }
2949  
2950  /**
2951   * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2952   *
2953   * @adev: amdgpu_device pointer
2954   *
2955   * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2956   * this function before a GPU reset.  If the value is retained after a
2957   * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2958   */
amdgpu_device_fill_reset_magic(struct amdgpu_device * adev)2959  static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2960  {
2961  	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2962  }
2963  
2964  /**
2965   * amdgpu_device_check_vram_lost - check if vram is valid
2966   *
2967   * @adev: amdgpu_device pointer
2968   *
2969   * Checks the reset magic value written to the gart pointer in VRAM.
2970   * The driver calls this after a GPU reset to see if the contents of
2971   * VRAM is lost or now.
2972   * returns true if vram is lost, false if not.
2973   */
amdgpu_device_check_vram_lost(struct amdgpu_device * adev)2974  static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2975  {
2976  	if (memcmp(adev->gart.ptr, adev->reset_magic,
2977  			AMDGPU_RESET_MAGIC_NUM))
2978  		return true;
2979  
2980  	if (!amdgpu_in_reset(adev))
2981  		return false;
2982  
2983  	/*
2984  	 * For all ASICs with baco/mode1 reset, the VRAM is
2985  	 * always assumed to be lost.
2986  	 */
2987  	switch (amdgpu_asic_reset_method(adev)) {
2988  	case AMD_RESET_METHOD_BACO:
2989  	case AMD_RESET_METHOD_MODE1:
2990  		return true;
2991  	default:
2992  		return false;
2993  	}
2994  }
2995  
2996  /**
2997   * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2998   *
2999   * @adev: amdgpu_device pointer
3000   * @state: clockgating state (gate or ungate)
3001   *
3002   * The list of all the hardware IPs that make up the asic is walked and the
3003   * set_clockgating_state callbacks are run.
3004   * Late initialization pass enabling clockgating for hardware IPs.
3005   * Fini or suspend, pass disabling clockgating for hardware IPs.
3006   * Returns 0 on success, negative error code on failure.
3007   */
3008  
amdgpu_device_set_cg_state(struct amdgpu_device * adev,enum amd_clockgating_state state)3009  int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3010  			       enum amd_clockgating_state state)
3011  {
3012  	int i, j, r;
3013  
3014  	if (amdgpu_emu_mode == 1)
3015  		return 0;
3016  
3017  	for (j = 0; j < adev->num_ip_blocks; j++) {
3018  		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3019  		if (!adev->ip_blocks[i].status.late_initialized)
3020  			continue;
3021  		/* skip CG for GFX, SDMA on S0ix */
3022  		if (adev->in_s0ix &&
3023  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3024  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3025  			continue;
3026  		/* skip CG for VCE/UVD, it's handled specially */
3027  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3028  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3029  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3030  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3031  		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3032  			/* enable clockgating to save power */
3033  			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3034  										     state);
3035  			if (r) {
3036  				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3037  					  adev->ip_blocks[i].version->funcs->name, r);
3038  				return r;
3039  			}
3040  		}
3041  	}
3042  
3043  	return 0;
3044  }
3045  
amdgpu_device_set_pg_state(struct amdgpu_device * adev,enum amd_powergating_state state)3046  int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3047  			       enum amd_powergating_state state)
3048  {
3049  	int i, j, r;
3050  
3051  	if (amdgpu_emu_mode == 1)
3052  		return 0;
3053  
3054  	for (j = 0; j < adev->num_ip_blocks; j++) {
3055  		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3056  		if (!adev->ip_blocks[i].status.late_initialized)
3057  			continue;
3058  		/* skip PG for GFX, SDMA on S0ix */
3059  		if (adev->in_s0ix &&
3060  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3061  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3062  			continue;
3063  		/* skip CG for VCE/UVD, it's handled specially */
3064  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3065  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3066  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3067  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3068  		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
3069  			/* enable powergating to save power */
3070  			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3071  											state);
3072  			if (r) {
3073  				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3074  					  adev->ip_blocks[i].version->funcs->name, r);
3075  				return r;
3076  			}
3077  		}
3078  	}
3079  	return 0;
3080  }
3081  
amdgpu_device_enable_mgpu_fan_boost(void)3082  static int amdgpu_device_enable_mgpu_fan_boost(void)
3083  {
3084  	struct amdgpu_gpu_instance *gpu_ins;
3085  	struct amdgpu_device *adev;
3086  	int i, ret = 0;
3087  
3088  	mutex_lock(&mgpu_info.mutex);
3089  
3090  	/*
3091  	 * MGPU fan boost feature should be enabled
3092  	 * only when there are two or more dGPUs in
3093  	 * the system
3094  	 */
3095  	if (mgpu_info.num_dgpu < 2)
3096  		goto out;
3097  
3098  	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3099  		gpu_ins = &(mgpu_info.gpu_ins[i]);
3100  		adev = gpu_ins->adev;
3101  		if (!(adev->flags & AMD_IS_APU) &&
3102  		    !gpu_ins->mgpu_fan_enabled) {
3103  			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3104  			if (ret)
3105  				break;
3106  
3107  			gpu_ins->mgpu_fan_enabled = 1;
3108  		}
3109  	}
3110  
3111  out:
3112  	mutex_unlock(&mgpu_info.mutex);
3113  
3114  	return ret;
3115  }
3116  
3117  /**
3118   * amdgpu_device_ip_late_init - run late init for hardware IPs
3119   *
3120   * @adev: amdgpu_device pointer
3121   *
3122   * Late initialization pass for hardware IPs.  The list of all the hardware
3123   * IPs that make up the asic is walked and the late_init callbacks are run.
3124   * late_init covers any special initialization that an IP requires
3125   * after all of the have been initialized or something that needs to happen
3126   * late in the init process.
3127   * Returns 0 on success, negative error code on failure.
3128   */
amdgpu_device_ip_late_init(struct amdgpu_device * adev)3129  static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3130  {
3131  	struct amdgpu_gpu_instance *gpu_instance;
3132  	int i = 0, r;
3133  
3134  	for (i = 0; i < adev->num_ip_blocks; i++) {
3135  		if (!adev->ip_blocks[i].status.hw)
3136  			continue;
3137  		if (adev->ip_blocks[i].version->funcs->late_init) {
3138  			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3139  			if (r) {
3140  				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3141  					  adev->ip_blocks[i].version->funcs->name, r);
3142  				return r;
3143  			}
3144  		}
3145  		adev->ip_blocks[i].status.late_initialized = true;
3146  	}
3147  
3148  	r = amdgpu_ras_late_init(adev);
3149  	if (r) {
3150  		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3151  		return r;
3152  	}
3153  
3154  	if (!amdgpu_in_reset(adev))
3155  		amdgpu_ras_set_error_query_ready(adev, true);
3156  
3157  	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3158  	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3159  
3160  	amdgpu_device_fill_reset_magic(adev);
3161  
3162  	r = amdgpu_device_enable_mgpu_fan_boost();
3163  	if (r)
3164  		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3165  
3166  	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3167  	if (amdgpu_passthrough(adev) &&
3168  	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3169  	     adev->asic_type == CHIP_ALDEBARAN))
3170  		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3171  
3172  	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3173  		mutex_lock(&mgpu_info.mutex);
3174  
3175  		/*
3176  		 * Reset device p-state to low as this was booted with high.
3177  		 *
3178  		 * This should be performed only after all devices from the same
3179  		 * hive get initialized.
3180  		 *
3181  		 * However, it's unknown how many device in the hive in advance.
3182  		 * As this is counted one by one during devices initializations.
3183  		 *
3184  		 * So, we wait for all XGMI interlinked devices initialized.
3185  		 * This may bring some delays as those devices may come from
3186  		 * different hives. But that should be OK.
3187  		 */
3188  		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3189  			for (i = 0; i < mgpu_info.num_gpu; i++) {
3190  				gpu_instance = &(mgpu_info.gpu_ins[i]);
3191  				if (gpu_instance->adev->flags & AMD_IS_APU)
3192  					continue;
3193  
3194  				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3195  						AMDGPU_XGMI_PSTATE_MIN);
3196  				if (r) {
3197  					DRM_ERROR("pstate setting failed (%d).\n", r);
3198  					break;
3199  				}
3200  			}
3201  		}
3202  
3203  		mutex_unlock(&mgpu_info.mutex);
3204  	}
3205  
3206  	return 0;
3207  }
3208  
3209  /**
3210   * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3211   *
3212   * @adev: amdgpu_device pointer
3213   *
3214   * For ASICs need to disable SMC first
3215   */
amdgpu_device_smu_fini_early(struct amdgpu_device * adev)3216  static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3217  {
3218  	int i, r;
3219  
3220  	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3221  		return;
3222  
3223  	for (i = 0; i < adev->num_ip_blocks; i++) {
3224  		if (!adev->ip_blocks[i].status.hw)
3225  			continue;
3226  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3227  			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3228  			/* XXX handle errors */
3229  			if (r) {
3230  				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3231  					  adev->ip_blocks[i].version->funcs->name, r);
3232  			}
3233  			adev->ip_blocks[i].status.hw = false;
3234  			break;
3235  		}
3236  	}
3237  }
3238  
amdgpu_device_ip_fini_early(struct amdgpu_device * adev)3239  static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3240  {
3241  	int i, r;
3242  
3243  	for (i = 0; i < adev->num_ip_blocks; i++) {
3244  		if (!adev->ip_blocks[i].version->funcs->early_fini)
3245  			continue;
3246  
3247  		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3248  		if (r) {
3249  			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3250  				  adev->ip_blocks[i].version->funcs->name, r);
3251  		}
3252  	}
3253  
3254  	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3255  	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3256  
3257  	amdgpu_amdkfd_suspend(adev, false);
3258  
3259  	/* Workaroud for ASICs need to disable SMC first */
3260  	amdgpu_device_smu_fini_early(adev);
3261  
3262  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3263  		if (!adev->ip_blocks[i].status.hw)
3264  			continue;
3265  
3266  		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3267  		/* XXX handle errors */
3268  		if (r) {
3269  			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3270  				  adev->ip_blocks[i].version->funcs->name, r);
3271  		}
3272  
3273  		adev->ip_blocks[i].status.hw = false;
3274  	}
3275  
3276  	if (amdgpu_sriov_vf(adev)) {
3277  		if (amdgpu_virt_release_full_gpu(adev, false))
3278  			DRM_ERROR("failed to release exclusive mode on fini\n");
3279  	}
3280  
3281  	return 0;
3282  }
3283  
3284  /**
3285   * amdgpu_device_ip_fini - run fini for hardware IPs
3286   *
3287   * @adev: amdgpu_device pointer
3288   *
3289   * Main teardown pass for hardware IPs.  The list of all the hardware
3290   * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3291   * are run.  hw_fini tears down the hardware associated with each IP
3292   * and sw_fini tears down any software state associated with each IP.
3293   * Returns 0 on success, negative error code on failure.
3294   */
amdgpu_device_ip_fini(struct amdgpu_device * adev)3295  static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3296  {
3297  	int i, r;
3298  
3299  	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3300  		amdgpu_virt_release_ras_err_handler_data(adev);
3301  
3302  	if (adev->gmc.xgmi.num_physical_nodes > 1)
3303  		amdgpu_xgmi_remove_device(adev);
3304  
3305  	amdgpu_amdkfd_device_fini_sw(adev);
3306  
3307  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3308  		if (!adev->ip_blocks[i].status.sw)
3309  			continue;
3310  
3311  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3312  			amdgpu_ucode_free_bo(adev);
3313  			amdgpu_free_static_csa(&adev->virt.csa_obj);
3314  			amdgpu_device_wb_fini(adev);
3315  			amdgpu_device_mem_scratch_fini(adev);
3316  			amdgpu_ib_pool_fini(adev);
3317  			amdgpu_seq64_fini(adev);
3318  		}
3319  
3320  		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3321  		/* XXX handle errors */
3322  		if (r) {
3323  			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3324  				  adev->ip_blocks[i].version->funcs->name, r);
3325  		}
3326  		adev->ip_blocks[i].status.sw = false;
3327  		adev->ip_blocks[i].status.valid = false;
3328  	}
3329  
3330  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3331  		if (!adev->ip_blocks[i].status.late_initialized)
3332  			continue;
3333  		if (adev->ip_blocks[i].version->funcs->late_fini)
3334  			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3335  		adev->ip_blocks[i].status.late_initialized = false;
3336  	}
3337  
3338  	amdgpu_ras_fini(adev);
3339  
3340  	return 0;
3341  }
3342  
3343  /**
3344   * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3345   *
3346   * @work: work_struct.
3347   */
amdgpu_device_delayed_init_work_handler(struct work_struct * work)3348  static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3349  {
3350  	struct amdgpu_device *adev =
3351  		container_of(work, struct amdgpu_device, delayed_init_work.work);
3352  	int r;
3353  
3354  	r = amdgpu_ib_ring_tests(adev);
3355  	if (r)
3356  		DRM_ERROR("ib ring test failed (%d).\n", r);
3357  }
3358  
amdgpu_device_delay_enable_gfx_off(struct work_struct * work)3359  static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3360  {
3361  	struct amdgpu_device *adev =
3362  		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3363  
3364  	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3365  	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3366  
3367  	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3368  		adev->gfx.gfx_off_state = true;
3369  }
3370  
3371  /**
3372   * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3373   *
3374   * @adev: amdgpu_device pointer
3375   *
3376   * Main suspend function for hardware IPs.  The list of all the hardware
3377   * IPs that make up the asic is walked, clockgating is disabled and the
3378   * suspend callbacks are run.  suspend puts the hardware and software state
3379   * in each IP into a state suitable for suspend.
3380   * Returns 0 on success, negative error code on failure.
3381   */
amdgpu_device_ip_suspend_phase1(struct amdgpu_device * adev)3382  static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3383  {
3384  	int i, r;
3385  
3386  	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3387  	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3388  
3389  	/*
3390  	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3391  	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3392  	 * scenario. Add the missing df cstate disablement here.
3393  	 */
3394  	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3395  		dev_warn(adev->dev, "Failed to disallow df cstate");
3396  
3397  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3398  		if (!adev->ip_blocks[i].status.valid)
3399  			continue;
3400  
3401  		/* displays are handled separately */
3402  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3403  			continue;
3404  
3405  		/* XXX handle errors */
3406  		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3407  		/* XXX handle errors */
3408  		if (r) {
3409  			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3410  				  adev->ip_blocks[i].version->funcs->name, r);
3411  			return r;
3412  		}
3413  
3414  		adev->ip_blocks[i].status.hw = false;
3415  	}
3416  
3417  	return 0;
3418  }
3419  
3420  /**
3421   * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3422   *
3423   * @adev: amdgpu_device pointer
3424   *
3425   * Main suspend function for hardware IPs.  The list of all the hardware
3426   * IPs that make up the asic is walked, clockgating is disabled and the
3427   * suspend callbacks are run.  suspend puts the hardware and software state
3428   * in each IP into a state suitable for suspend.
3429   * Returns 0 on success, negative error code on failure.
3430   */
amdgpu_device_ip_suspend_phase2(struct amdgpu_device * adev)3431  static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3432  {
3433  	int i, r;
3434  
3435  	if (adev->in_s0ix)
3436  		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3437  
3438  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3439  		if (!adev->ip_blocks[i].status.valid)
3440  			continue;
3441  		/* displays are handled in phase1 */
3442  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3443  			continue;
3444  		/* PSP lost connection when err_event_athub occurs */
3445  		if (amdgpu_ras_intr_triggered() &&
3446  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3447  			adev->ip_blocks[i].status.hw = false;
3448  			continue;
3449  		}
3450  
3451  		/* skip unnecessary suspend if we do not initialize them yet */
3452  		if (adev->gmc.xgmi.pending_reset &&
3453  		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3454  		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3455  		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3456  		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3457  			adev->ip_blocks[i].status.hw = false;
3458  			continue;
3459  		}
3460  
3461  		/* skip suspend of gfx/mes and psp for S0ix
3462  		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3463  		 * like at runtime. PSP is also part of the always on hardware
3464  		 * so no need to suspend it.
3465  		 */
3466  		if (adev->in_s0ix &&
3467  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3468  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3469  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3470  			continue;
3471  
3472  		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3473  		if (adev->in_s0ix &&
3474  		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3475  		     IP_VERSION(5, 0, 0)) &&
3476  		    (adev->ip_blocks[i].version->type ==
3477  		     AMD_IP_BLOCK_TYPE_SDMA))
3478  			continue;
3479  
3480  		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3481  		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3482  		 * from this location and RLC Autoload automatically also gets loaded
3483  		 * from here based on PMFW -> PSP message during re-init sequence.
3484  		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3485  		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3486  		 */
3487  		if (amdgpu_in_reset(adev) &&
3488  		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3489  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3490  			continue;
3491  
3492  		/* XXX handle errors */
3493  		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3494  		/* XXX handle errors */
3495  		if (r) {
3496  			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3497  				  adev->ip_blocks[i].version->funcs->name, r);
3498  		}
3499  		adev->ip_blocks[i].status.hw = false;
3500  		/* handle putting the SMC in the appropriate state */
3501  		if (!amdgpu_sriov_vf(adev)) {
3502  			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3503  				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3504  				if (r) {
3505  					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3506  							adev->mp1_state, r);
3507  					return r;
3508  				}
3509  			}
3510  		}
3511  	}
3512  
3513  	return 0;
3514  }
3515  
3516  /**
3517   * amdgpu_device_ip_suspend - run suspend for hardware IPs
3518   *
3519   * @adev: amdgpu_device pointer
3520   *
3521   * Main suspend function for hardware IPs.  The list of all the hardware
3522   * IPs that make up the asic is walked, clockgating is disabled and the
3523   * suspend callbacks are run.  suspend puts the hardware and software state
3524   * in each IP into a state suitable for suspend.
3525   * Returns 0 on success, negative error code on failure.
3526   */
amdgpu_device_ip_suspend(struct amdgpu_device * adev)3527  int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3528  {
3529  	int r;
3530  
3531  	if (amdgpu_sriov_vf(adev)) {
3532  		amdgpu_virt_fini_data_exchange(adev);
3533  		amdgpu_virt_request_full_gpu(adev, false);
3534  	}
3535  
3536  	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3537  
3538  	r = amdgpu_device_ip_suspend_phase1(adev);
3539  	if (r)
3540  		return r;
3541  	r = amdgpu_device_ip_suspend_phase2(adev);
3542  
3543  	if (amdgpu_sriov_vf(adev))
3544  		amdgpu_virt_release_full_gpu(adev, false);
3545  
3546  	return r;
3547  }
3548  
amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device * adev)3549  static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3550  {
3551  	int i, r;
3552  
3553  	static enum amd_ip_block_type ip_order[] = {
3554  		AMD_IP_BLOCK_TYPE_COMMON,
3555  		AMD_IP_BLOCK_TYPE_GMC,
3556  		AMD_IP_BLOCK_TYPE_PSP,
3557  		AMD_IP_BLOCK_TYPE_IH,
3558  	};
3559  
3560  	for (i = 0; i < adev->num_ip_blocks; i++) {
3561  		int j;
3562  		struct amdgpu_ip_block *block;
3563  
3564  		block = &adev->ip_blocks[i];
3565  		block->status.hw = false;
3566  
3567  		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3568  
3569  			if (block->version->type != ip_order[j] ||
3570  				!block->status.valid)
3571  				continue;
3572  
3573  			r = block->version->funcs->hw_init(adev);
3574  			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3575  			if (r)
3576  				return r;
3577  			block->status.hw = true;
3578  		}
3579  	}
3580  
3581  	return 0;
3582  }
3583  
amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device * adev)3584  static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3585  {
3586  	int i, r;
3587  
3588  	static enum amd_ip_block_type ip_order[] = {
3589  		AMD_IP_BLOCK_TYPE_SMC,
3590  		AMD_IP_BLOCK_TYPE_DCE,
3591  		AMD_IP_BLOCK_TYPE_GFX,
3592  		AMD_IP_BLOCK_TYPE_SDMA,
3593  		AMD_IP_BLOCK_TYPE_MES,
3594  		AMD_IP_BLOCK_TYPE_UVD,
3595  		AMD_IP_BLOCK_TYPE_VCE,
3596  		AMD_IP_BLOCK_TYPE_VCN,
3597  		AMD_IP_BLOCK_TYPE_JPEG
3598  	};
3599  
3600  	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3601  		int j;
3602  		struct amdgpu_ip_block *block;
3603  
3604  		for (j = 0; j < adev->num_ip_blocks; j++) {
3605  			block = &adev->ip_blocks[j];
3606  
3607  			if (block->version->type != ip_order[i] ||
3608  				!block->status.valid ||
3609  				block->status.hw)
3610  				continue;
3611  
3612  			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3613  				r = block->version->funcs->resume(adev);
3614  			else
3615  				r = block->version->funcs->hw_init(adev);
3616  
3617  			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3618  			if (r)
3619  				return r;
3620  			block->status.hw = true;
3621  		}
3622  	}
3623  
3624  	return 0;
3625  }
3626  
3627  /**
3628   * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3629   *
3630   * @adev: amdgpu_device pointer
3631   *
3632   * First resume function for hardware IPs.  The list of all the hardware
3633   * IPs that make up the asic is walked and the resume callbacks are run for
3634   * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3635   * after a suspend and updates the software state as necessary.  This
3636   * function is also used for restoring the GPU after a GPU reset.
3637   * Returns 0 on success, negative error code on failure.
3638   */
amdgpu_device_ip_resume_phase1(struct amdgpu_device * adev)3639  static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3640  {
3641  	int i, r;
3642  
3643  	for (i = 0; i < adev->num_ip_blocks; i++) {
3644  		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3645  			continue;
3646  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3647  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3648  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3649  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3650  
3651  			r = adev->ip_blocks[i].version->funcs->resume(adev);
3652  			if (r) {
3653  				DRM_ERROR("resume of IP block <%s> failed %d\n",
3654  					  adev->ip_blocks[i].version->funcs->name, r);
3655  				return r;
3656  			}
3657  			adev->ip_blocks[i].status.hw = true;
3658  		}
3659  	}
3660  
3661  	return 0;
3662  }
3663  
3664  /**
3665   * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3666   *
3667   * @adev: amdgpu_device pointer
3668   *
3669   * First resume function for hardware IPs.  The list of all the hardware
3670   * IPs that make up the asic is walked and the resume callbacks are run for
3671   * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3672   * functional state after a suspend and updates the software state as
3673   * necessary.  This function is also used for restoring the GPU after a GPU
3674   * reset.
3675   * Returns 0 on success, negative error code on failure.
3676   */
amdgpu_device_ip_resume_phase2(struct amdgpu_device * adev)3677  static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3678  {
3679  	int i, r;
3680  
3681  	for (i = 0; i < adev->num_ip_blocks; i++) {
3682  		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3683  			continue;
3684  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3685  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3686  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3687  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3688  			continue;
3689  		r = adev->ip_blocks[i].version->funcs->resume(adev);
3690  		if (r) {
3691  			DRM_ERROR("resume of IP block <%s> failed %d\n",
3692  				  adev->ip_blocks[i].version->funcs->name, r);
3693  			return r;
3694  		}
3695  		adev->ip_blocks[i].status.hw = true;
3696  	}
3697  
3698  	return 0;
3699  }
3700  
3701  /**
3702   * amdgpu_device_ip_resume - run resume for hardware IPs
3703   *
3704   * @adev: amdgpu_device pointer
3705   *
3706   * Main resume function for hardware IPs.  The hardware IPs
3707   * are split into two resume functions because they are
3708   * also used in recovering from a GPU reset and some additional
3709   * steps need to be take between them.  In this case (S3/S4) they are
3710   * run sequentially.
3711   * Returns 0 on success, negative error code on failure.
3712   */
amdgpu_device_ip_resume(struct amdgpu_device * adev)3713  static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3714  {
3715  	int r;
3716  
3717  	r = amdgpu_device_ip_resume_phase1(adev);
3718  	if (r)
3719  		return r;
3720  
3721  	r = amdgpu_device_fw_loading(adev);
3722  	if (r)
3723  		return r;
3724  
3725  	r = amdgpu_device_ip_resume_phase2(adev);
3726  
3727  	if (adev->mman.buffer_funcs_ring->sched.ready)
3728  		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3729  
3730  	return r;
3731  }
3732  
3733  /**
3734   * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3735   *
3736   * @adev: amdgpu_device pointer
3737   *
3738   * Query the VBIOS data tables to determine if the board supports SR-IOV.
3739   */
amdgpu_device_detect_sriov_bios(struct amdgpu_device * adev)3740  static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3741  {
3742  	if (amdgpu_sriov_vf(adev)) {
3743  		if (adev->is_atom_fw) {
3744  			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3745  				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3746  		} else {
3747  			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3748  				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3749  		}
3750  
3751  		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3752  			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3753  	}
3754  }
3755  
3756  /**
3757   * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3758   *
3759   * @asic_type: AMD asic type
3760   *
3761   * Check if there is DC (new modesetting infrastructre) support for an asic.
3762   * returns true if DC has support, false if not.
3763   */
amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)3764  bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3765  {
3766  	switch (asic_type) {
3767  #ifdef CONFIG_DRM_AMDGPU_SI
3768  	case CHIP_HAINAN:
3769  #endif
3770  	case CHIP_TOPAZ:
3771  		/* chips with no display hardware */
3772  		return false;
3773  #if defined(CONFIG_DRM_AMD_DC)
3774  	case CHIP_TAHITI:
3775  	case CHIP_PITCAIRN:
3776  	case CHIP_VERDE:
3777  	case CHIP_OLAND:
3778  		/*
3779  		 * We have systems in the wild with these ASICs that require
3780  		 * LVDS and VGA support which is not supported with DC.
3781  		 *
3782  		 * Fallback to the non-DC driver here by default so as not to
3783  		 * cause regressions.
3784  		 */
3785  #if defined(CONFIG_DRM_AMD_DC_SI)
3786  		return amdgpu_dc > 0;
3787  #else
3788  		return false;
3789  #endif
3790  	case CHIP_BONAIRE:
3791  	case CHIP_KAVERI:
3792  	case CHIP_KABINI:
3793  	case CHIP_MULLINS:
3794  		/*
3795  		 * We have systems in the wild with these ASICs that require
3796  		 * VGA support which is not supported with DC.
3797  		 *
3798  		 * Fallback to the non-DC driver here by default so as not to
3799  		 * cause regressions.
3800  		 */
3801  		return amdgpu_dc > 0;
3802  	default:
3803  		return amdgpu_dc != 0;
3804  #else
3805  	default:
3806  		if (amdgpu_dc > 0)
3807  			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3808  		return false;
3809  #endif
3810  	}
3811  }
3812  
3813  /**
3814   * amdgpu_device_has_dc_support - check if dc is supported
3815   *
3816   * @adev: amdgpu_device pointer
3817   *
3818   * Returns true for supported, false for not supported
3819   */
amdgpu_device_has_dc_support(struct amdgpu_device * adev)3820  bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3821  {
3822  	if (adev->enable_virtual_display ||
3823  	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3824  		return false;
3825  
3826  	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3827  }
3828  
amdgpu_device_xgmi_reset_func(struct work_struct * __work)3829  static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3830  {
3831  	struct amdgpu_device *adev =
3832  		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3833  	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3834  
3835  	/* It's a bug to not have a hive within this function */
3836  	if (WARN_ON(!hive))
3837  		return;
3838  
3839  	/*
3840  	 * Use task barrier to synchronize all xgmi reset works across the
3841  	 * hive. task_barrier_enter and task_barrier_exit will block
3842  	 * until all the threads running the xgmi reset works reach
3843  	 * those points. task_barrier_full will do both blocks.
3844  	 */
3845  	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3846  
3847  		task_barrier_enter(&hive->tb);
3848  		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3849  
3850  		if (adev->asic_reset_res)
3851  			goto fail;
3852  
3853  		task_barrier_exit(&hive->tb);
3854  		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3855  
3856  		if (adev->asic_reset_res)
3857  			goto fail;
3858  
3859  		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3860  	} else {
3861  
3862  		task_barrier_full(&hive->tb);
3863  		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3864  	}
3865  
3866  fail:
3867  	if (adev->asic_reset_res)
3868  		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3869  			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3870  	amdgpu_put_xgmi_hive(hive);
3871  }
3872  
amdgpu_device_get_job_timeout_settings(struct amdgpu_device * adev)3873  static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3874  {
3875  	char *input = amdgpu_lockup_timeout;
3876  	char *timeout_setting = NULL;
3877  	int index = 0;
3878  	long timeout;
3879  	int ret = 0;
3880  
3881  	/*
3882  	 * By default timeout for non compute jobs is 10000
3883  	 * and 60000 for compute jobs.
3884  	 * In SR-IOV or passthrough mode, timeout for compute
3885  	 * jobs are 60000 by default.
3886  	 */
3887  	adev->gfx_timeout = msecs_to_jiffies(10000);
3888  	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3889  	if (amdgpu_sriov_vf(adev))
3890  		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3891  					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3892  	else
3893  		adev->compute_timeout =  msecs_to_jiffies(60000);
3894  
3895  	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3896  		while ((timeout_setting = strsep(&input, ",")) &&
3897  				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3898  			ret = kstrtol(timeout_setting, 0, &timeout);
3899  			if (ret)
3900  				return ret;
3901  
3902  			if (timeout == 0) {
3903  				index++;
3904  				continue;
3905  			} else if (timeout < 0) {
3906  				timeout = MAX_SCHEDULE_TIMEOUT;
3907  				dev_warn(adev->dev, "lockup timeout disabled");
3908  				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3909  			} else {
3910  				timeout = msecs_to_jiffies(timeout);
3911  			}
3912  
3913  			switch (index++) {
3914  			case 0:
3915  				adev->gfx_timeout = timeout;
3916  				break;
3917  			case 1:
3918  				adev->compute_timeout = timeout;
3919  				break;
3920  			case 2:
3921  				adev->sdma_timeout = timeout;
3922  				break;
3923  			case 3:
3924  				adev->video_timeout = timeout;
3925  				break;
3926  			default:
3927  				break;
3928  			}
3929  		}
3930  		/*
3931  		 * There is only one value specified and
3932  		 * it should apply to all non-compute jobs.
3933  		 */
3934  		if (index == 1) {
3935  			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3936  			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3937  				adev->compute_timeout = adev->gfx_timeout;
3938  		}
3939  	}
3940  
3941  	return ret;
3942  }
3943  
3944  /**
3945   * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3946   *
3947   * @adev: amdgpu_device pointer
3948   *
3949   * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3950   */
amdgpu_device_check_iommu_direct_map(struct amdgpu_device * adev)3951  static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3952  {
3953  	struct iommu_domain *domain;
3954  
3955  	domain = iommu_get_domain_for_dev(adev->dev);
3956  	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3957  		adev->ram_is_direct_mapped = true;
3958  }
3959  
3960  #if defined(CONFIG_HSA_AMD_P2P)
3961  /**
3962   * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
3963   *
3964   * @adev: amdgpu_device pointer
3965   *
3966   * return if IOMMU remapping bar address
3967   */
amdgpu_device_check_iommu_remap(struct amdgpu_device * adev)3968  static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
3969  {
3970  	struct iommu_domain *domain;
3971  
3972  	domain = iommu_get_domain_for_dev(adev->dev);
3973  	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
3974  		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
3975  		return true;
3976  
3977  	return false;
3978  }
3979  #endif
3980  
3981  static const struct attribute *amdgpu_dev_attributes[] = {
3982  	&dev_attr_pcie_replay_count.attr,
3983  	NULL
3984  };
3985  
amdgpu_device_set_mcbp(struct amdgpu_device * adev)3986  static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3987  {
3988  	if (amdgpu_mcbp == 1)
3989  		adev->gfx.mcbp = true;
3990  	else if (amdgpu_mcbp == 0)
3991  		adev->gfx.mcbp = false;
3992  
3993  	if (amdgpu_sriov_vf(adev))
3994  		adev->gfx.mcbp = true;
3995  
3996  	if (adev->gfx.mcbp)
3997  		DRM_INFO("MCBP is enabled\n");
3998  }
3999  
4000  /**
4001   * amdgpu_device_init - initialize the driver
4002   *
4003   * @adev: amdgpu_device pointer
4004   * @flags: driver flags
4005   *
4006   * Initializes the driver info and hw (all asics).
4007   * Returns 0 for success or an error on failure.
4008   * Called at driver startup.
4009   */
amdgpu_device_init(struct amdgpu_device * adev,uint32_t flags)4010  int amdgpu_device_init(struct amdgpu_device *adev,
4011  		       uint32_t flags)
4012  {
4013  	struct drm_device *ddev = adev_to_drm(adev);
4014  	struct pci_dev *pdev = adev->pdev;
4015  	int r, i;
4016  	bool px = false;
4017  	u32 max_MBps;
4018  	int tmp;
4019  
4020  	adev->shutdown = false;
4021  	adev->flags = flags;
4022  
4023  	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
4024  		adev->asic_type = amdgpu_force_asic_type;
4025  	else
4026  		adev->asic_type = flags & AMD_ASIC_MASK;
4027  
4028  	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4029  	if (amdgpu_emu_mode == 1)
4030  		adev->usec_timeout *= 10;
4031  	adev->gmc.gart_size = 512 * 1024 * 1024;
4032  	adev->accel_working = false;
4033  	adev->num_rings = 0;
4034  	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4035  	adev->mman.buffer_funcs = NULL;
4036  	adev->mman.buffer_funcs_ring = NULL;
4037  	adev->vm_manager.vm_pte_funcs = NULL;
4038  	adev->vm_manager.vm_pte_num_scheds = 0;
4039  	adev->gmc.gmc_funcs = NULL;
4040  	adev->harvest_ip_mask = 0x0;
4041  	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4042  	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4043  
4044  	adev->smc_rreg = &amdgpu_invalid_rreg;
4045  	adev->smc_wreg = &amdgpu_invalid_wreg;
4046  	adev->pcie_rreg = &amdgpu_invalid_rreg;
4047  	adev->pcie_wreg = &amdgpu_invalid_wreg;
4048  	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4049  	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4050  	adev->pciep_rreg = &amdgpu_invalid_rreg;
4051  	adev->pciep_wreg = &amdgpu_invalid_wreg;
4052  	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4053  	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4054  	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4055  	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4056  	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4057  	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4058  	adev->didt_rreg = &amdgpu_invalid_rreg;
4059  	adev->didt_wreg = &amdgpu_invalid_wreg;
4060  	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4061  	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4062  	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4063  	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4064  
4065  	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4066  		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4067  		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4068  
4069  	/* mutex initialization are all done here so we
4070  	 * can recall function without having locking issues
4071  	 */
4072  	mutex_init(&adev->firmware.mutex);
4073  	mutex_init(&adev->pm.mutex);
4074  	mutex_init(&adev->gfx.gpu_clock_mutex);
4075  	mutex_init(&adev->srbm_mutex);
4076  	mutex_init(&adev->gfx.pipe_reserve_mutex);
4077  	mutex_init(&adev->gfx.gfx_off_mutex);
4078  	mutex_init(&adev->gfx.partition_mutex);
4079  	mutex_init(&adev->grbm_idx_mutex);
4080  	mutex_init(&adev->mn_lock);
4081  	mutex_init(&adev->virt.vf_errors.lock);
4082  	mutex_init(&adev->virt.rlcg_reg_lock);
4083  	hash_init(adev->mn_hash);
4084  	mutex_init(&adev->psp.mutex);
4085  	mutex_init(&adev->notifier_lock);
4086  	mutex_init(&adev->pm.stable_pstate_ctx_lock);
4087  	mutex_init(&adev->benchmark_mutex);
4088  	mutex_init(&adev->gfx.reset_sem_mutex);
4089  	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
4090  	mutex_init(&adev->enforce_isolation_mutex);
4091  	mutex_init(&adev->gfx.kfd_sch_mutex);
4092  
4093  	amdgpu_device_init_apu_flags(adev);
4094  
4095  	r = amdgpu_device_check_arguments(adev);
4096  	if (r)
4097  		return r;
4098  
4099  	spin_lock_init(&adev->mmio_idx_lock);
4100  	spin_lock_init(&adev->smc_idx_lock);
4101  	spin_lock_init(&adev->pcie_idx_lock);
4102  	spin_lock_init(&adev->uvd_ctx_idx_lock);
4103  	spin_lock_init(&adev->didt_idx_lock);
4104  	spin_lock_init(&adev->gc_cac_idx_lock);
4105  	spin_lock_init(&adev->se_cac_idx_lock);
4106  	spin_lock_init(&adev->audio_endpt_idx_lock);
4107  	spin_lock_init(&adev->mm_stats.lock);
4108  	spin_lock_init(&adev->wb.lock);
4109  
4110  	INIT_LIST_HEAD(&adev->reset_list);
4111  
4112  	INIT_LIST_HEAD(&adev->ras_list);
4113  
4114  	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4115  
4116  	INIT_DELAYED_WORK(&adev->delayed_init_work,
4117  			  amdgpu_device_delayed_init_work_handler);
4118  	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4119  			  amdgpu_device_delay_enable_gfx_off);
4120  	/*
4121  	 * Initialize the enforce_isolation work structures for each XCP
4122  	 * partition.  This work handler is responsible for enforcing shader
4123  	 * isolation on AMD GPUs.  It counts the number of emitted fences for
4124  	 * each GFX and compute ring.  If there are any fences, it schedules
4125  	 * the `enforce_isolation_work` to be run after a delay.  If there are
4126  	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4127  	 * runqueue.
4128  	 */
4129  	for (i = 0; i < MAX_XCP; i++) {
4130  		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4131  				  amdgpu_gfx_enforce_isolation_handler);
4132  		adev->gfx.enforce_isolation[i].adev = adev;
4133  		adev->gfx.enforce_isolation[i].xcp_id = i;
4134  	}
4135  
4136  	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4137  
4138  	adev->gfx.gfx_off_req_count = 1;
4139  	adev->gfx.gfx_off_residency = 0;
4140  	adev->gfx.gfx_off_entrycount = 0;
4141  	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4142  
4143  	atomic_set(&adev->throttling_logging_enabled, 1);
4144  	/*
4145  	 * If throttling continues, logging will be performed every minute
4146  	 * to avoid log flooding. "-1" is subtracted since the thermal
4147  	 * throttling interrupt comes every second. Thus, the total logging
4148  	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4149  	 * for throttling interrupt) = 60 seconds.
4150  	 */
4151  	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4152  	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4153  
4154  	/* Registers mapping */
4155  	/* TODO: block userspace mapping of io register */
4156  	if (adev->asic_type >= CHIP_BONAIRE) {
4157  		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4158  		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4159  	} else {
4160  		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4161  		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4162  	}
4163  
4164  	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4165  		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4166  
4167  	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4168  	if (!adev->rmmio)
4169  		return -ENOMEM;
4170  
4171  	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4172  	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4173  
4174  	/*
4175  	 * Reset domain needs to be present early, before XGMI hive discovered
4176  	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4177  	 * early on during init and before calling to RREG32.
4178  	 */
4179  	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4180  	if (!adev->reset_domain)
4181  		return -ENOMEM;
4182  
4183  	/* detect hw virtualization here */
4184  	amdgpu_detect_virtualization(adev);
4185  
4186  	amdgpu_device_get_pcie_info(adev);
4187  
4188  	r = amdgpu_device_get_job_timeout_settings(adev);
4189  	if (r) {
4190  		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4191  		return r;
4192  	}
4193  
4194  	amdgpu_device_set_mcbp(adev);
4195  
4196  	/* early init functions */
4197  	r = amdgpu_device_ip_early_init(adev);
4198  	if (r)
4199  		return r;
4200  
4201  	/* Get rid of things like offb */
4202  	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4203  	if (r)
4204  		return r;
4205  
4206  	/* Enable TMZ based on IP_VERSION */
4207  	amdgpu_gmc_tmz_set(adev);
4208  
4209  	if (amdgpu_sriov_vf(adev) &&
4210  	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4211  		/* VF MMIO access (except mailbox range) from CPU
4212  		 * will be blocked during sriov runtime
4213  		 */
4214  		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4215  
4216  	amdgpu_gmc_noretry_set(adev);
4217  	/* Need to get xgmi info early to decide the reset behavior*/
4218  	if (adev->gmc.xgmi.supported) {
4219  		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4220  		if (r)
4221  			return r;
4222  	}
4223  
4224  	/* enable PCIE atomic ops */
4225  	if (amdgpu_sriov_vf(adev)) {
4226  		if (adev->virt.fw_reserve.p_pf2vf)
4227  			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4228  						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4229  				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4230  	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4231  	 * internal path natively support atomics, set have_atomics_support to true.
4232  	 */
4233  	} else if ((adev->flags & AMD_IS_APU) &&
4234  		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4235  		    IP_VERSION(9, 0, 0))) {
4236  		adev->have_atomics_support = true;
4237  	} else {
4238  		adev->have_atomics_support =
4239  			!pci_enable_atomic_ops_to_root(adev->pdev,
4240  					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4241  					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4242  	}
4243  
4244  	if (!adev->have_atomics_support)
4245  		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4246  
4247  	/* doorbell bar mapping and doorbell index init*/
4248  	amdgpu_doorbell_init(adev);
4249  
4250  	if (amdgpu_emu_mode == 1) {
4251  		/* post the asic on emulation mode */
4252  		emu_soc_asic_init(adev);
4253  		goto fence_driver_init;
4254  	}
4255  
4256  	amdgpu_reset_init(adev);
4257  
4258  	/* detect if we are with an SRIOV vbios */
4259  	if (adev->bios)
4260  		amdgpu_device_detect_sriov_bios(adev);
4261  
4262  	/* check if we need to reset the asic
4263  	 *  E.g., driver was not cleanly unloaded previously, etc.
4264  	 */
4265  	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4266  		if (adev->gmc.xgmi.num_physical_nodes) {
4267  			dev_info(adev->dev, "Pending hive reset.\n");
4268  			adev->gmc.xgmi.pending_reset = true;
4269  			/* Only need to init necessary block for SMU to handle the reset */
4270  			for (i = 0; i < adev->num_ip_blocks; i++) {
4271  				if (!adev->ip_blocks[i].status.valid)
4272  					continue;
4273  				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4274  				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4275  				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4276  				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4277  					DRM_DEBUG("IP %s disabled for hw_init.\n",
4278  						adev->ip_blocks[i].version->funcs->name);
4279  					adev->ip_blocks[i].status.hw = true;
4280  				}
4281  			}
4282  		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4283  				   !amdgpu_device_has_display_hardware(adev)) {
4284  					r = psp_gpu_reset(adev);
4285  		} else {
4286  				tmp = amdgpu_reset_method;
4287  				/* It should do a default reset when loading or reloading the driver,
4288  				 * regardless of the module parameter reset_method.
4289  				 */
4290  				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4291  				r = amdgpu_asic_reset(adev);
4292  				amdgpu_reset_method = tmp;
4293  		}
4294  
4295  		if (r) {
4296  		  dev_err(adev->dev, "asic reset on init failed\n");
4297  		  goto failed;
4298  		}
4299  	}
4300  
4301  	/* Post card if necessary */
4302  	if (amdgpu_device_need_post(adev)) {
4303  		if (!adev->bios) {
4304  			dev_err(adev->dev, "no vBIOS found\n");
4305  			r = -EINVAL;
4306  			goto failed;
4307  		}
4308  		DRM_INFO("GPU posting now...\n");
4309  		r = amdgpu_device_asic_init(adev);
4310  		if (r) {
4311  			dev_err(adev->dev, "gpu post error!\n");
4312  			goto failed;
4313  		}
4314  	}
4315  
4316  	if (adev->bios) {
4317  		if (adev->is_atom_fw) {
4318  			/* Initialize clocks */
4319  			r = amdgpu_atomfirmware_get_clock_info(adev);
4320  			if (r) {
4321  				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4322  				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4323  				goto failed;
4324  			}
4325  		} else {
4326  			/* Initialize clocks */
4327  			r = amdgpu_atombios_get_clock_info(adev);
4328  			if (r) {
4329  				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4330  				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4331  				goto failed;
4332  			}
4333  			/* init i2c buses */
4334  			if (!amdgpu_device_has_dc_support(adev))
4335  				amdgpu_atombios_i2c_init(adev);
4336  		}
4337  	}
4338  
4339  fence_driver_init:
4340  	/* Fence driver */
4341  	r = amdgpu_fence_driver_sw_init(adev);
4342  	if (r) {
4343  		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4344  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4345  		goto failed;
4346  	}
4347  
4348  	/* init the mode config */
4349  	drm_mode_config_init(adev_to_drm(adev));
4350  
4351  	r = amdgpu_device_ip_init(adev);
4352  	if (r) {
4353  		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4354  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4355  		goto release_ras_con;
4356  	}
4357  
4358  	amdgpu_fence_driver_hw_init(adev);
4359  
4360  	dev_info(adev->dev,
4361  		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4362  			adev->gfx.config.max_shader_engines,
4363  			adev->gfx.config.max_sh_per_se,
4364  			adev->gfx.config.max_cu_per_sh,
4365  			adev->gfx.cu_info.number);
4366  
4367  	adev->accel_working = true;
4368  
4369  	amdgpu_vm_check_compute_bug(adev);
4370  
4371  	/* Initialize the buffer migration limit. */
4372  	if (amdgpu_moverate >= 0)
4373  		max_MBps = amdgpu_moverate;
4374  	else
4375  		max_MBps = 8; /* Allow 8 MB/s. */
4376  	/* Get a log2 for easy divisions. */
4377  	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4378  
4379  	/*
4380  	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4381  	 * Otherwise the mgpu fan boost feature will be skipped due to the
4382  	 * gpu instance is counted less.
4383  	 */
4384  	amdgpu_register_gpu_instance(adev);
4385  
4386  	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4387  	 * explicit gating rather than handling it automatically.
4388  	 */
4389  	if (!adev->gmc.xgmi.pending_reset) {
4390  		r = amdgpu_device_ip_late_init(adev);
4391  		if (r) {
4392  			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4393  			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4394  			goto release_ras_con;
4395  		}
4396  		/* must succeed. */
4397  		amdgpu_ras_resume(adev);
4398  		queue_delayed_work(system_wq, &adev->delayed_init_work,
4399  				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4400  	}
4401  
4402  	if (amdgpu_sriov_vf(adev)) {
4403  		amdgpu_virt_release_full_gpu(adev, true);
4404  		flush_delayed_work(&adev->delayed_init_work);
4405  	}
4406  
4407  	/*
4408  	 * Place those sysfs registering after `late_init`. As some of those
4409  	 * operations performed in `late_init` might affect the sysfs
4410  	 * interfaces creating.
4411  	 */
4412  	r = amdgpu_atombios_sysfs_init(adev);
4413  	if (r)
4414  		drm_err(&adev->ddev,
4415  			"registering atombios sysfs failed (%d).\n", r);
4416  
4417  	r = amdgpu_pm_sysfs_init(adev);
4418  	if (r)
4419  		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4420  
4421  	r = amdgpu_ucode_sysfs_init(adev);
4422  	if (r) {
4423  		adev->ucode_sysfs_en = false;
4424  		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4425  	} else
4426  		adev->ucode_sysfs_en = true;
4427  
4428  	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4429  	if (r)
4430  		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4431  
4432  	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4433  	if (r)
4434  		dev_err(adev->dev,
4435  			"Could not create amdgpu board attributes\n");
4436  
4437  	amdgpu_fru_sysfs_init(adev);
4438  	amdgpu_reg_state_sysfs_init(adev);
4439  
4440  	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4441  		r = amdgpu_pmu_init(adev);
4442  	if (r)
4443  		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4444  
4445  	/* Have stored pci confspace at hand for restore in sudden PCI error */
4446  	if (amdgpu_device_cache_pci_state(adev->pdev))
4447  		pci_restore_state(pdev);
4448  
4449  	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4450  	/* this will fail for cards that aren't VGA class devices, just
4451  	 * ignore it
4452  	 */
4453  	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4454  		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4455  
4456  	px = amdgpu_device_supports_px(ddev);
4457  
4458  	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4459  				apple_gmux_detect(NULL, NULL)))
4460  		vga_switcheroo_register_client(adev->pdev,
4461  					       &amdgpu_switcheroo_ops, px);
4462  
4463  	if (px)
4464  		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4465  
4466  	if (adev->gmc.xgmi.pending_reset)
4467  		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4468  				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4469  
4470  	amdgpu_device_check_iommu_direct_map(adev);
4471  
4472  	return 0;
4473  
4474  release_ras_con:
4475  	if (amdgpu_sriov_vf(adev))
4476  		amdgpu_virt_release_full_gpu(adev, true);
4477  
4478  	/* failed in exclusive mode due to timeout */
4479  	if (amdgpu_sriov_vf(adev) &&
4480  		!amdgpu_sriov_runtime(adev) &&
4481  		amdgpu_virt_mmio_blocked(adev) &&
4482  		!amdgpu_virt_wait_reset(adev)) {
4483  		dev_err(adev->dev, "VF exclusive mode timeout\n");
4484  		/* Don't send request since VF is inactive. */
4485  		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4486  		adev->virt.ops = NULL;
4487  		r = -EAGAIN;
4488  	}
4489  	amdgpu_release_ras_context(adev);
4490  
4491  failed:
4492  	amdgpu_vf_error_trans_all(adev);
4493  
4494  	return r;
4495  }
4496  
amdgpu_device_unmap_mmio(struct amdgpu_device * adev)4497  static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4498  {
4499  
4500  	/* Clear all CPU mappings pointing to this device */
4501  	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4502  
4503  	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4504  	amdgpu_doorbell_fini(adev);
4505  
4506  	iounmap(adev->rmmio);
4507  	adev->rmmio = NULL;
4508  	if (adev->mman.aper_base_kaddr)
4509  		iounmap(adev->mman.aper_base_kaddr);
4510  	adev->mman.aper_base_kaddr = NULL;
4511  
4512  	/* Memory manager related */
4513  	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4514  		arch_phys_wc_del(adev->gmc.vram_mtrr);
4515  		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4516  	}
4517  }
4518  
4519  /**
4520   * amdgpu_device_fini_hw - tear down the driver
4521   *
4522   * @adev: amdgpu_device pointer
4523   *
4524   * Tear down the driver info (all asics).
4525   * Called at driver shutdown.
4526   */
amdgpu_device_fini_hw(struct amdgpu_device * adev)4527  void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4528  {
4529  	dev_info(adev->dev, "amdgpu: finishing device.\n");
4530  	flush_delayed_work(&adev->delayed_init_work);
4531  
4532  	if (adev->mman.initialized)
4533  		drain_workqueue(adev->mman.bdev.wq);
4534  	adev->shutdown = true;
4535  
4536  	/* make sure IB test finished before entering exclusive mode
4537  	 * to avoid preemption on IB test
4538  	 */
4539  	if (amdgpu_sriov_vf(adev)) {
4540  		amdgpu_virt_request_full_gpu(adev, false);
4541  		amdgpu_virt_fini_data_exchange(adev);
4542  	}
4543  
4544  	/* disable all interrupts */
4545  	amdgpu_irq_disable_all(adev);
4546  	if (adev->mode_info.mode_config_initialized) {
4547  		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4548  			drm_helper_force_disable_all(adev_to_drm(adev));
4549  		else
4550  			drm_atomic_helper_shutdown(adev_to_drm(adev));
4551  	}
4552  	amdgpu_fence_driver_hw_fini(adev);
4553  
4554  	if (adev->pm.sysfs_initialized)
4555  		amdgpu_pm_sysfs_fini(adev);
4556  	if (adev->ucode_sysfs_en)
4557  		amdgpu_ucode_sysfs_fini(adev);
4558  	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4559  	amdgpu_fru_sysfs_fini(adev);
4560  
4561  	amdgpu_reg_state_sysfs_fini(adev);
4562  
4563  	/* disable ras feature must before hw fini */
4564  	amdgpu_ras_pre_fini(adev);
4565  
4566  	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4567  
4568  	amdgpu_device_ip_fini_early(adev);
4569  
4570  	amdgpu_irq_fini_hw(adev);
4571  
4572  	if (adev->mman.initialized)
4573  		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4574  
4575  	amdgpu_gart_dummy_page_fini(adev);
4576  
4577  	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4578  		amdgpu_device_unmap_mmio(adev);
4579  
4580  }
4581  
amdgpu_device_fini_sw(struct amdgpu_device * adev)4582  void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4583  {
4584  	int idx;
4585  	bool px;
4586  
4587  	amdgpu_fence_driver_sw_fini(adev);
4588  	amdgpu_device_ip_fini(adev);
4589  	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4590  	adev->accel_working = false;
4591  	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4592  
4593  	amdgpu_reset_fini(adev);
4594  
4595  	/* free i2c buses */
4596  	if (!amdgpu_device_has_dc_support(adev))
4597  		amdgpu_i2c_fini(adev);
4598  
4599  	if (amdgpu_emu_mode != 1)
4600  		amdgpu_atombios_fini(adev);
4601  
4602  	kfree(adev->bios);
4603  	adev->bios = NULL;
4604  
4605  	kfree(adev->fru_info);
4606  	adev->fru_info = NULL;
4607  
4608  	px = amdgpu_device_supports_px(adev_to_drm(adev));
4609  
4610  	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4611  				apple_gmux_detect(NULL, NULL)))
4612  		vga_switcheroo_unregister_client(adev->pdev);
4613  
4614  	if (px)
4615  		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4616  
4617  	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4618  		vga_client_unregister(adev->pdev);
4619  
4620  	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4621  
4622  		iounmap(adev->rmmio);
4623  		adev->rmmio = NULL;
4624  		amdgpu_doorbell_fini(adev);
4625  		drm_dev_exit(idx);
4626  	}
4627  
4628  	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4629  		amdgpu_pmu_fini(adev);
4630  	if (adev->mman.discovery_bin)
4631  		amdgpu_discovery_fini(adev);
4632  
4633  	amdgpu_reset_put_reset_domain(adev->reset_domain);
4634  	adev->reset_domain = NULL;
4635  
4636  	kfree(adev->pci_state);
4637  
4638  }
4639  
4640  /**
4641   * amdgpu_device_evict_resources - evict device resources
4642   * @adev: amdgpu device object
4643   *
4644   * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4645   * of the vram memory type. Mainly used for evicting device resources
4646   * at suspend time.
4647   *
4648   */
amdgpu_device_evict_resources(struct amdgpu_device * adev)4649  static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4650  {
4651  	int ret;
4652  
4653  	/* No need to evict vram on APUs for suspend to ram or s2idle */
4654  	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4655  		return 0;
4656  
4657  	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4658  	if (ret)
4659  		DRM_WARN("evicting device resources failed\n");
4660  	return ret;
4661  }
4662  
4663  /*
4664   * Suspend & resume.
4665   */
4666  /**
4667   * amdgpu_device_prepare - prepare for device suspend
4668   *
4669   * @dev: drm dev pointer
4670   *
4671   * Prepare to put the hw in the suspend state (all asics).
4672   * Returns 0 for success or an error on failure.
4673   * Called at driver suspend.
4674   */
amdgpu_device_prepare(struct drm_device * dev)4675  int amdgpu_device_prepare(struct drm_device *dev)
4676  {
4677  	struct amdgpu_device *adev = drm_to_adev(dev);
4678  	int i, r;
4679  
4680  	amdgpu_choose_low_power_state(adev);
4681  
4682  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4683  		return 0;
4684  
4685  	/* Evict the majority of BOs before starting suspend sequence */
4686  	r = amdgpu_device_evict_resources(adev);
4687  	if (r)
4688  		goto unprepare;
4689  
4690  	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4691  
4692  	for (i = 0; i < adev->num_ip_blocks; i++) {
4693  		if (!adev->ip_blocks[i].status.valid)
4694  			continue;
4695  		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4696  			continue;
4697  		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4698  		if (r)
4699  			goto unprepare;
4700  	}
4701  
4702  	return 0;
4703  
4704  unprepare:
4705  	adev->in_s0ix = adev->in_s3 = false;
4706  
4707  	return r;
4708  }
4709  
4710  /**
4711   * amdgpu_device_suspend - initiate device suspend
4712   *
4713   * @dev: drm dev pointer
4714   * @fbcon : notify the fbdev of suspend
4715   *
4716   * Puts the hw in the suspend state (all asics).
4717   * Returns 0 for success or an error on failure.
4718   * Called at driver suspend.
4719   */
amdgpu_device_suspend(struct drm_device * dev,bool fbcon)4720  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4721  {
4722  	struct amdgpu_device *adev = drm_to_adev(dev);
4723  	int r = 0;
4724  
4725  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4726  		return 0;
4727  
4728  	adev->in_suspend = true;
4729  
4730  	if (amdgpu_sriov_vf(adev)) {
4731  		amdgpu_virt_fini_data_exchange(adev);
4732  		r = amdgpu_virt_request_full_gpu(adev, false);
4733  		if (r)
4734  			return r;
4735  	}
4736  
4737  	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4738  		DRM_WARN("smart shift update failed\n");
4739  
4740  	if (fbcon)
4741  		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4742  
4743  	cancel_delayed_work_sync(&adev->delayed_init_work);
4744  
4745  	amdgpu_ras_suspend(adev);
4746  
4747  	amdgpu_device_ip_suspend_phase1(adev);
4748  
4749  	if (!adev->in_s0ix)
4750  		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4751  
4752  	r = amdgpu_device_evict_resources(adev);
4753  	if (r)
4754  		return r;
4755  
4756  	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4757  
4758  	amdgpu_fence_driver_hw_fini(adev);
4759  
4760  	amdgpu_device_ip_suspend_phase2(adev);
4761  
4762  	if (amdgpu_sriov_vf(adev))
4763  		amdgpu_virt_release_full_gpu(adev, false);
4764  
4765  	r = amdgpu_dpm_notify_rlc_state(adev, false);
4766  	if (r)
4767  		return r;
4768  
4769  	return 0;
4770  }
4771  
4772  /**
4773   * amdgpu_device_resume - initiate device resume
4774   *
4775   * @dev: drm dev pointer
4776   * @fbcon : notify the fbdev of resume
4777   *
4778   * Bring the hw back to operating state (all asics).
4779   * Returns 0 for success or an error on failure.
4780   * Called at driver resume.
4781   */
amdgpu_device_resume(struct drm_device * dev,bool fbcon)4782  int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4783  {
4784  	struct amdgpu_device *adev = drm_to_adev(dev);
4785  	int r = 0;
4786  
4787  	if (amdgpu_sriov_vf(adev)) {
4788  		r = amdgpu_virt_request_full_gpu(adev, true);
4789  		if (r)
4790  			return r;
4791  	}
4792  
4793  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4794  		return 0;
4795  
4796  	if (adev->in_s0ix)
4797  		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4798  
4799  	/* post card */
4800  	if (amdgpu_device_need_post(adev)) {
4801  		r = amdgpu_device_asic_init(adev);
4802  		if (r)
4803  			dev_err(adev->dev, "amdgpu asic init failed\n");
4804  	}
4805  
4806  	r = amdgpu_device_ip_resume(adev);
4807  
4808  	if (r) {
4809  		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4810  		goto exit;
4811  	}
4812  	amdgpu_fence_driver_hw_init(adev);
4813  
4814  	if (!adev->in_s0ix) {
4815  		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4816  		if (r)
4817  			goto exit;
4818  	}
4819  
4820  	r = amdgpu_device_ip_late_init(adev);
4821  	if (r)
4822  		goto exit;
4823  
4824  	queue_delayed_work(system_wq, &adev->delayed_init_work,
4825  			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4826  exit:
4827  	if (amdgpu_sriov_vf(adev)) {
4828  		amdgpu_virt_init_data_exchange(adev);
4829  		amdgpu_virt_release_full_gpu(adev, true);
4830  	}
4831  
4832  	if (r)
4833  		return r;
4834  
4835  	/* Make sure IB tests flushed */
4836  	flush_delayed_work(&adev->delayed_init_work);
4837  
4838  	if (fbcon)
4839  		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4840  
4841  	amdgpu_ras_resume(adev);
4842  
4843  	if (adev->mode_info.num_crtc) {
4844  		/*
4845  		 * Most of the connector probing functions try to acquire runtime pm
4846  		 * refs to ensure that the GPU is powered on when connector polling is
4847  		 * performed. Since we're calling this from a runtime PM callback,
4848  		 * trying to acquire rpm refs will cause us to deadlock.
4849  		 *
4850  		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4851  		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4852  		 */
4853  #ifdef CONFIG_PM
4854  		dev->dev->power.disable_depth++;
4855  #endif
4856  		if (!adev->dc_enabled)
4857  			drm_helper_hpd_irq_event(dev);
4858  		else
4859  			drm_kms_helper_hotplug_event(dev);
4860  #ifdef CONFIG_PM
4861  		dev->dev->power.disable_depth--;
4862  #endif
4863  	}
4864  	adev->in_suspend = false;
4865  
4866  	if (adev->enable_mes)
4867  		amdgpu_mes_self_test(adev);
4868  
4869  	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4870  		DRM_WARN("smart shift update failed\n");
4871  
4872  	return 0;
4873  }
4874  
4875  /**
4876   * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4877   *
4878   * @adev: amdgpu_device pointer
4879   *
4880   * The list of all the hardware IPs that make up the asic is walked and
4881   * the check_soft_reset callbacks are run.  check_soft_reset determines
4882   * if the asic is still hung or not.
4883   * Returns true if any of the IPs are still in a hung state, false if not.
4884   */
amdgpu_device_ip_check_soft_reset(struct amdgpu_device * adev)4885  static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4886  {
4887  	int i;
4888  	bool asic_hang = false;
4889  
4890  	if (amdgpu_sriov_vf(adev))
4891  		return true;
4892  
4893  	if (amdgpu_asic_need_full_reset(adev))
4894  		return true;
4895  
4896  	for (i = 0; i < adev->num_ip_blocks; i++) {
4897  		if (!adev->ip_blocks[i].status.valid)
4898  			continue;
4899  		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4900  			adev->ip_blocks[i].status.hang =
4901  				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4902  		if (adev->ip_blocks[i].status.hang) {
4903  			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4904  			asic_hang = true;
4905  		}
4906  	}
4907  	return asic_hang;
4908  }
4909  
4910  /**
4911   * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4912   *
4913   * @adev: amdgpu_device pointer
4914   *
4915   * The list of all the hardware IPs that make up the asic is walked and the
4916   * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4917   * handles any IP specific hardware or software state changes that are
4918   * necessary for a soft reset to succeed.
4919   * Returns 0 on success, negative error code on failure.
4920   */
amdgpu_device_ip_pre_soft_reset(struct amdgpu_device * adev)4921  static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4922  {
4923  	int i, r = 0;
4924  
4925  	for (i = 0; i < adev->num_ip_blocks; i++) {
4926  		if (!adev->ip_blocks[i].status.valid)
4927  			continue;
4928  		if (adev->ip_blocks[i].status.hang &&
4929  		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4930  			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4931  			if (r)
4932  				return r;
4933  		}
4934  	}
4935  
4936  	return 0;
4937  }
4938  
4939  /**
4940   * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4941   *
4942   * @adev: amdgpu_device pointer
4943   *
4944   * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4945   * reset is necessary to recover.
4946   * Returns true if a full asic reset is required, false if not.
4947   */
amdgpu_device_ip_need_full_reset(struct amdgpu_device * adev)4948  static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4949  {
4950  	int i;
4951  
4952  	if (amdgpu_asic_need_full_reset(adev))
4953  		return true;
4954  
4955  	for (i = 0; i < adev->num_ip_blocks; i++) {
4956  		if (!adev->ip_blocks[i].status.valid)
4957  			continue;
4958  		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4959  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4960  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4961  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4962  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4963  			if (adev->ip_blocks[i].status.hang) {
4964  				dev_info(adev->dev, "Some block need full reset!\n");
4965  				return true;
4966  			}
4967  		}
4968  	}
4969  	return false;
4970  }
4971  
4972  /**
4973   * amdgpu_device_ip_soft_reset - do a soft reset
4974   *
4975   * @adev: amdgpu_device pointer
4976   *
4977   * The list of all the hardware IPs that make up the asic is walked and the
4978   * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4979   * IP specific hardware or software state changes that are necessary to soft
4980   * reset the IP.
4981   * Returns 0 on success, negative error code on failure.
4982   */
amdgpu_device_ip_soft_reset(struct amdgpu_device * adev)4983  static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4984  {
4985  	int i, r = 0;
4986  
4987  	for (i = 0; i < adev->num_ip_blocks; i++) {
4988  		if (!adev->ip_blocks[i].status.valid)
4989  			continue;
4990  		if (adev->ip_blocks[i].status.hang &&
4991  		    adev->ip_blocks[i].version->funcs->soft_reset) {
4992  			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4993  			if (r)
4994  				return r;
4995  		}
4996  	}
4997  
4998  	return 0;
4999  }
5000  
5001  /**
5002   * amdgpu_device_ip_post_soft_reset - clean up from soft reset
5003   *
5004   * @adev: amdgpu_device pointer
5005   *
5006   * The list of all the hardware IPs that make up the asic is walked and the
5007   * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
5008   * handles any IP specific hardware or software state changes that are
5009   * necessary after the IP has been soft reset.
5010   * Returns 0 on success, negative error code on failure.
5011   */
amdgpu_device_ip_post_soft_reset(struct amdgpu_device * adev)5012  static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5013  {
5014  	int i, r = 0;
5015  
5016  	for (i = 0; i < adev->num_ip_blocks; i++) {
5017  		if (!adev->ip_blocks[i].status.valid)
5018  			continue;
5019  		if (adev->ip_blocks[i].status.hang &&
5020  		    adev->ip_blocks[i].version->funcs->post_soft_reset)
5021  			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
5022  		if (r)
5023  			return r;
5024  	}
5025  
5026  	return 0;
5027  }
5028  
5029  /**
5030   * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5031   *
5032   * @adev: amdgpu_device pointer
5033   * @reset_context: amdgpu reset context pointer
5034   *
5035   * do VF FLR and reinitialize Asic
5036   * return 0 means succeeded otherwise failed
5037   */
amdgpu_device_reset_sriov(struct amdgpu_device * adev,struct amdgpu_reset_context * reset_context)5038  static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5039  				     struct amdgpu_reset_context *reset_context)
5040  {
5041  	int r;
5042  	struct amdgpu_hive_info *hive = NULL;
5043  
5044  	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5045  		if (!amdgpu_ras_get_fed_status(adev))
5046  			amdgpu_virt_ready_to_reset(adev);
5047  		amdgpu_virt_wait_reset(adev);
5048  		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5049  		r = amdgpu_virt_request_full_gpu(adev, true);
5050  	} else {
5051  		r = amdgpu_virt_reset_gpu(adev);
5052  	}
5053  	if (r)
5054  		return r;
5055  
5056  	amdgpu_ras_set_fed(adev, false);
5057  	amdgpu_irq_gpu_reset_resume_helper(adev);
5058  
5059  	/* some sw clean up VF needs to do before recover */
5060  	amdgpu_virt_post_reset(adev);
5061  
5062  	/* Resume IP prior to SMC */
5063  	r = amdgpu_device_ip_reinit_early_sriov(adev);
5064  	if (r)
5065  		return r;
5066  
5067  	amdgpu_virt_init_data_exchange(adev);
5068  
5069  	r = amdgpu_device_fw_loading(adev);
5070  	if (r)
5071  		return r;
5072  
5073  	/* now we are okay to resume SMC/CP/SDMA */
5074  	r = amdgpu_device_ip_reinit_late_sriov(adev);
5075  	if (r)
5076  		return r;
5077  
5078  	hive = amdgpu_get_xgmi_hive(adev);
5079  	/* Update PSP FW topology after reset */
5080  	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5081  		r = amdgpu_xgmi_update_topology(hive, adev);
5082  	if (hive)
5083  		amdgpu_put_xgmi_hive(hive);
5084  	if (r)
5085  		return r;
5086  
5087  	r = amdgpu_ib_ring_tests(adev);
5088  	if (r)
5089  		return r;
5090  
5091  	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5092  		amdgpu_inc_vram_lost(adev);
5093  
5094  	/* need to be called during full access so we can't do it later like
5095  	 * bare-metal does.
5096  	 */
5097  	amdgpu_amdkfd_post_reset(adev);
5098  	amdgpu_virt_release_full_gpu(adev, true);
5099  
5100  	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5101  	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5102  	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5103  	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5104  	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5105  		amdgpu_ras_resume(adev);
5106  	return 0;
5107  }
5108  
5109  /**
5110   * amdgpu_device_has_job_running - check if there is any job in mirror list
5111   *
5112   * @adev: amdgpu_device pointer
5113   *
5114   * check if there is any job in mirror list
5115   */
amdgpu_device_has_job_running(struct amdgpu_device * adev)5116  bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5117  {
5118  	int i;
5119  	struct drm_sched_job *job;
5120  
5121  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5122  		struct amdgpu_ring *ring = adev->rings[i];
5123  
5124  		if (!amdgpu_ring_sched_ready(ring))
5125  			continue;
5126  
5127  		spin_lock(&ring->sched.job_list_lock);
5128  		job = list_first_entry_or_null(&ring->sched.pending_list,
5129  					       struct drm_sched_job, list);
5130  		spin_unlock(&ring->sched.job_list_lock);
5131  		if (job)
5132  			return true;
5133  	}
5134  	return false;
5135  }
5136  
5137  /**
5138   * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5139   *
5140   * @adev: amdgpu_device pointer
5141   *
5142   * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5143   * a hung GPU.
5144   */
amdgpu_device_should_recover_gpu(struct amdgpu_device * adev)5145  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5146  {
5147  
5148  	if (amdgpu_gpu_recovery == 0)
5149  		goto disabled;
5150  
5151  	/* Skip soft reset check in fatal error mode */
5152  	if (!amdgpu_ras_is_poison_mode_supported(adev))
5153  		return true;
5154  
5155  	if (amdgpu_sriov_vf(adev))
5156  		return true;
5157  
5158  	if (amdgpu_gpu_recovery == -1) {
5159  		switch (adev->asic_type) {
5160  #ifdef CONFIG_DRM_AMDGPU_SI
5161  		case CHIP_VERDE:
5162  		case CHIP_TAHITI:
5163  		case CHIP_PITCAIRN:
5164  		case CHIP_OLAND:
5165  		case CHIP_HAINAN:
5166  #endif
5167  #ifdef CONFIG_DRM_AMDGPU_CIK
5168  		case CHIP_KAVERI:
5169  		case CHIP_KABINI:
5170  		case CHIP_MULLINS:
5171  #endif
5172  		case CHIP_CARRIZO:
5173  		case CHIP_STONEY:
5174  		case CHIP_CYAN_SKILLFISH:
5175  			goto disabled;
5176  		default:
5177  			break;
5178  		}
5179  	}
5180  
5181  	return true;
5182  
5183  disabled:
5184  		dev_info(adev->dev, "GPU recovery disabled.\n");
5185  		return false;
5186  }
5187  
amdgpu_device_mode1_reset(struct amdgpu_device * adev)5188  int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5189  {
5190  	u32 i;
5191  	int ret = 0;
5192  
5193  	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5194  
5195  	dev_info(adev->dev, "GPU mode1 reset\n");
5196  
5197  	/* Cache the state before bus master disable. The saved config space
5198  	 * values are used in other cases like restore after mode-2 reset.
5199  	 */
5200  	amdgpu_device_cache_pci_state(adev->pdev);
5201  
5202  	/* disable BM */
5203  	pci_clear_master(adev->pdev);
5204  
5205  	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5206  		dev_info(adev->dev, "GPU smu mode1 reset\n");
5207  		ret = amdgpu_dpm_mode1_reset(adev);
5208  	} else {
5209  		dev_info(adev->dev, "GPU psp mode1 reset\n");
5210  		ret = psp_gpu_reset(adev);
5211  	}
5212  
5213  	if (ret)
5214  		goto mode1_reset_failed;
5215  
5216  	amdgpu_device_load_pci_state(adev->pdev);
5217  	ret = amdgpu_psp_wait_for_bootloader(adev);
5218  	if (ret)
5219  		goto mode1_reset_failed;
5220  
5221  	/* wait for asic to come out of reset */
5222  	for (i = 0; i < adev->usec_timeout; i++) {
5223  		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5224  
5225  		if (memsize != 0xffffffff)
5226  			break;
5227  		udelay(1);
5228  	}
5229  
5230  	if (i >= adev->usec_timeout) {
5231  		ret = -ETIMEDOUT;
5232  		goto mode1_reset_failed;
5233  	}
5234  
5235  	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5236  
5237  	return 0;
5238  
5239  mode1_reset_failed:
5240  	dev_err(adev->dev, "GPU mode1 reset failed\n");
5241  	return ret;
5242  }
5243  
amdgpu_device_pre_asic_reset(struct amdgpu_device * adev,struct amdgpu_reset_context * reset_context)5244  int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5245  				 struct amdgpu_reset_context *reset_context)
5246  {
5247  	int i, r = 0;
5248  	struct amdgpu_job *job = NULL;
5249  	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5250  	bool need_full_reset =
5251  		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5252  
5253  	if (reset_context->reset_req_dev == adev)
5254  		job = reset_context->job;
5255  
5256  	if (amdgpu_sriov_vf(adev))
5257  		amdgpu_virt_pre_reset(adev);
5258  
5259  	amdgpu_fence_driver_isr_toggle(adev, true);
5260  
5261  	/* block all schedulers and reset given job's ring */
5262  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5263  		struct amdgpu_ring *ring = adev->rings[i];
5264  
5265  		if (!amdgpu_ring_sched_ready(ring))
5266  			continue;
5267  
5268  		/* Clear job fence from fence drv to avoid force_completion
5269  		 * leave NULL and vm flush fence in fence drv
5270  		 */
5271  		amdgpu_fence_driver_clear_job_fences(ring);
5272  
5273  		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5274  		amdgpu_fence_driver_force_completion(ring);
5275  	}
5276  
5277  	amdgpu_fence_driver_isr_toggle(adev, false);
5278  
5279  	if (job && job->vm)
5280  		drm_sched_increase_karma(&job->base);
5281  
5282  	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5283  	/* If reset handler not implemented, continue; otherwise return */
5284  	if (r == -EOPNOTSUPP)
5285  		r = 0;
5286  	else
5287  		return r;
5288  
5289  	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5290  	if (!amdgpu_sriov_vf(adev)) {
5291  
5292  		if (!need_full_reset)
5293  			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5294  
5295  		if (!need_full_reset && amdgpu_gpu_recovery &&
5296  		    amdgpu_device_ip_check_soft_reset(adev)) {
5297  			amdgpu_device_ip_pre_soft_reset(adev);
5298  			r = amdgpu_device_ip_soft_reset(adev);
5299  			amdgpu_device_ip_post_soft_reset(adev);
5300  			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5301  				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5302  				need_full_reset = true;
5303  			}
5304  		}
5305  
5306  		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5307  			dev_info(tmp_adev->dev, "Dumping IP State\n");
5308  			/* Trigger ip dump before we reset the asic */
5309  			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5310  				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5311  					tmp_adev->ip_blocks[i].version->funcs
5312  						->dump_ip_state((void *)tmp_adev);
5313  			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5314  		}
5315  
5316  		if (need_full_reset)
5317  			r = amdgpu_device_ip_suspend(adev);
5318  		if (need_full_reset)
5319  			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5320  		else
5321  			clear_bit(AMDGPU_NEED_FULL_RESET,
5322  				  &reset_context->flags);
5323  	}
5324  
5325  	return r;
5326  }
5327  
amdgpu_do_asic_reset(struct list_head * device_list_handle,struct amdgpu_reset_context * reset_context)5328  int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5329  			 struct amdgpu_reset_context *reset_context)
5330  {
5331  	struct amdgpu_device *tmp_adev = NULL;
5332  	bool need_full_reset, skip_hw_reset, vram_lost = false;
5333  	int r = 0;
5334  
5335  	/* Try reset handler method first */
5336  	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5337  				    reset_list);
5338  
5339  	reset_context->reset_device_list = device_list_handle;
5340  	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5341  	/* If reset handler not implemented, continue; otherwise return */
5342  	if (r == -EOPNOTSUPP)
5343  		r = 0;
5344  	else
5345  		return r;
5346  
5347  	/* Reset handler not implemented, use the default method */
5348  	need_full_reset =
5349  		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5350  	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5351  
5352  	/*
5353  	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5354  	 * to allow proper links negotiation in FW (within 1 sec)
5355  	 */
5356  	if (!skip_hw_reset && need_full_reset) {
5357  		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5358  			/* For XGMI run all resets in parallel to speed up the process */
5359  			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5360  				tmp_adev->gmc.xgmi.pending_reset = false;
5361  				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5362  					r = -EALREADY;
5363  			} else
5364  				r = amdgpu_asic_reset(tmp_adev);
5365  
5366  			if (r) {
5367  				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5368  					 r, adev_to_drm(tmp_adev)->unique);
5369  				goto out;
5370  			}
5371  		}
5372  
5373  		/* For XGMI wait for all resets to complete before proceed */
5374  		if (!r) {
5375  			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5376  				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5377  					flush_work(&tmp_adev->xgmi_reset_work);
5378  					r = tmp_adev->asic_reset_res;
5379  					if (r)
5380  						break;
5381  				}
5382  			}
5383  		}
5384  	}
5385  
5386  	if (!r && amdgpu_ras_intr_triggered()) {
5387  		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5388  			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5389  		}
5390  
5391  		amdgpu_ras_intr_cleared();
5392  	}
5393  
5394  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5395  		if (need_full_reset) {
5396  			/* post card */
5397  			amdgpu_ras_set_fed(tmp_adev, false);
5398  			r = amdgpu_device_asic_init(tmp_adev);
5399  			if (r) {
5400  				dev_warn(tmp_adev->dev, "asic atom init failed!");
5401  			} else {
5402  				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5403  
5404  				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5405  				if (r)
5406  					goto out;
5407  
5408  				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5409  
5410  				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5411  					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5412  
5413  				if (vram_lost) {
5414  					DRM_INFO("VRAM is lost due to GPU reset!\n");
5415  					amdgpu_inc_vram_lost(tmp_adev);
5416  				}
5417  
5418  				r = amdgpu_device_fw_loading(tmp_adev);
5419  				if (r)
5420  					return r;
5421  
5422  				r = amdgpu_xcp_restore_partition_mode(
5423  					tmp_adev->xcp_mgr);
5424  				if (r)
5425  					goto out;
5426  
5427  				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5428  				if (r)
5429  					goto out;
5430  
5431  				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5432  					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5433  
5434  				if (vram_lost)
5435  					amdgpu_device_fill_reset_magic(tmp_adev);
5436  
5437  				/*
5438  				 * Add this ASIC as tracked as reset was already
5439  				 * complete successfully.
5440  				 */
5441  				amdgpu_register_gpu_instance(tmp_adev);
5442  
5443  				if (!reset_context->hive &&
5444  				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5445  					amdgpu_xgmi_add_device(tmp_adev);
5446  
5447  				r = amdgpu_device_ip_late_init(tmp_adev);
5448  				if (r)
5449  					goto out;
5450  
5451  				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5452  
5453  				/*
5454  				 * The GPU enters bad state once faulty pages
5455  				 * by ECC has reached the threshold, and ras
5456  				 * recovery is scheduled next. So add one check
5457  				 * here to break recovery if it indeed exceeds
5458  				 * bad page threshold, and remind user to
5459  				 * retire this GPU or setting one bigger
5460  				 * bad_page_threshold value to fix this once
5461  				 * probing driver again.
5462  				 */
5463  				if (!amdgpu_ras_is_rma(tmp_adev)) {
5464  					/* must succeed. */
5465  					amdgpu_ras_resume(tmp_adev);
5466  				} else {
5467  					r = -EINVAL;
5468  					goto out;
5469  				}
5470  
5471  				/* Update PSP FW topology after reset */
5472  				if (reset_context->hive &&
5473  				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5474  					r = amdgpu_xgmi_update_topology(
5475  						reset_context->hive, tmp_adev);
5476  			}
5477  		}
5478  
5479  out:
5480  		if (!r) {
5481  			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5482  			r = amdgpu_ib_ring_tests(tmp_adev);
5483  			if (r) {
5484  				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5485  				need_full_reset = true;
5486  				r = -EAGAIN;
5487  				goto end;
5488  			}
5489  		}
5490  
5491  		if (r)
5492  			tmp_adev->asic_reset_res = r;
5493  	}
5494  
5495  end:
5496  	if (need_full_reset)
5497  		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5498  	else
5499  		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5500  	return r;
5501  }
5502  
amdgpu_device_set_mp1_state(struct amdgpu_device * adev)5503  static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5504  {
5505  
5506  	switch (amdgpu_asic_reset_method(adev)) {
5507  	case AMD_RESET_METHOD_MODE1:
5508  		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5509  		break;
5510  	case AMD_RESET_METHOD_MODE2:
5511  		adev->mp1_state = PP_MP1_STATE_RESET;
5512  		break;
5513  	default:
5514  		adev->mp1_state = PP_MP1_STATE_NONE;
5515  		break;
5516  	}
5517  }
5518  
amdgpu_device_unset_mp1_state(struct amdgpu_device * adev)5519  static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5520  {
5521  	amdgpu_vf_error_trans_all(adev);
5522  	adev->mp1_state = PP_MP1_STATE_NONE;
5523  }
5524  
amdgpu_device_resume_display_audio(struct amdgpu_device * adev)5525  static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5526  {
5527  	struct pci_dev *p = NULL;
5528  
5529  	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5530  			adev->pdev->bus->number, 1);
5531  	if (p) {
5532  		pm_runtime_enable(&(p->dev));
5533  		pm_runtime_resume(&(p->dev));
5534  	}
5535  
5536  	pci_dev_put(p);
5537  }
5538  
amdgpu_device_suspend_display_audio(struct amdgpu_device * adev)5539  static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5540  {
5541  	enum amd_reset_method reset_method;
5542  	struct pci_dev *p = NULL;
5543  	u64 expires;
5544  
5545  	/*
5546  	 * For now, only BACO and mode1 reset are confirmed
5547  	 * to suffer the audio issue without proper suspended.
5548  	 */
5549  	reset_method = amdgpu_asic_reset_method(adev);
5550  	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5551  	     (reset_method != AMD_RESET_METHOD_MODE1))
5552  		return -EINVAL;
5553  
5554  	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5555  			adev->pdev->bus->number, 1);
5556  	if (!p)
5557  		return -ENODEV;
5558  
5559  	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5560  	if (!expires)
5561  		/*
5562  		 * If we cannot get the audio device autosuspend delay,
5563  		 * a fixed 4S interval will be used. Considering 3S is
5564  		 * the audio controller default autosuspend delay setting.
5565  		 * 4S used here is guaranteed to cover that.
5566  		 */
5567  		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5568  
5569  	while (!pm_runtime_status_suspended(&(p->dev))) {
5570  		if (!pm_runtime_suspend(&(p->dev)))
5571  			break;
5572  
5573  		if (expires < ktime_get_mono_fast_ns()) {
5574  			dev_warn(adev->dev, "failed to suspend display audio\n");
5575  			pci_dev_put(p);
5576  			/* TODO: abort the succeeding gpu reset? */
5577  			return -ETIMEDOUT;
5578  		}
5579  	}
5580  
5581  	pm_runtime_disable(&(p->dev));
5582  
5583  	pci_dev_put(p);
5584  	return 0;
5585  }
5586  
amdgpu_device_stop_pending_resets(struct amdgpu_device * adev)5587  static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5588  {
5589  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5590  
5591  #if defined(CONFIG_DEBUG_FS)
5592  	if (!amdgpu_sriov_vf(adev))
5593  		cancel_work(&adev->reset_work);
5594  #endif
5595  
5596  	if (adev->kfd.dev)
5597  		cancel_work(&adev->kfd.reset_work);
5598  
5599  	if (amdgpu_sriov_vf(adev))
5600  		cancel_work(&adev->virt.flr_work);
5601  
5602  	if (con && adev->ras_enabled)
5603  		cancel_work(&con->recovery_work);
5604  
5605  }
5606  
amdgpu_device_health_check(struct list_head * device_list_handle)5607  static int amdgpu_device_health_check(struct list_head *device_list_handle)
5608  {
5609  	struct amdgpu_device *tmp_adev;
5610  	int ret = 0;
5611  	u32 status;
5612  
5613  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5614  		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5615  		if (PCI_POSSIBLE_ERROR(status)) {
5616  			dev_err(tmp_adev->dev, "device lost from bus!");
5617  			ret = -ENODEV;
5618  		}
5619  	}
5620  
5621  	return ret;
5622  }
5623  
5624  /**
5625   * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5626   *
5627   * @adev: amdgpu_device pointer
5628   * @job: which job trigger hang
5629   * @reset_context: amdgpu reset context pointer
5630   *
5631   * Attempt to reset the GPU if it has hung (all asics).
5632   * Attempt to do soft-reset or full-reset and reinitialize Asic
5633   * Returns 0 for success or an error on failure.
5634   */
5635  
amdgpu_device_gpu_recover(struct amdgpu_device * adev,struct amdgpu_job * job,struct amdgpu_reset_context * reset_context)5636  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5637  			      struct amdgpu_job *job,
5638  			      struct amdgpu_reset_context *reset_context)
5639  {
5640  	struct list_head device_list, *device_list_handle =  NULL;
5641  	bool job_signaled = false;
5642  	struct amdgpu_hive_info *hive = NULL;
5643  	struct amdgpu_device *tmp_adev = NULL;
5644  	int i, r = 0;
5645  	bool need_emergency_restart = false;
5646  	bool audio_suspended = false;
5647  	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5648  
5649  	/*
5650  	 * Special case: RAS triggered and full reset isn't supported
5651  	 */
5652  	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5653  
5654  	/*
5655  	 * Flush RAM to disk so that after reboot
5656  	 * the user can read log and see why the system rebooted.
5657  	 */
5658  	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5659  		amdgpu_ras_get_context(adev)->reboot) {
5660  		DRM_WARN("Emergency reboot.");
5661  
5662  		ksys_sync_helper();
5663  		emergency_restart();
5664  	}
5665  
5666  	dev_info(adev->dev, "GPU %s begin!\n",
5667  		need_emergency_restart ? "jobs stop":"reset");
5668  
5669  	if (!amdgpu_sriov_vf(adev))
5670  		hive = amdgpu_get_xgmi_hive(adev);
5671  	if (hive)
5672  		mutex_lock(&hive->hive_lock);
5673  
5674  	reset_context->job = job;
5675  	reset_context->hive = hive;
5676  	/*
5677  	 * Build list of devices to reset.
5678  	 * In case we are in XGMI hive mode, resort the device list
5679  	 * to put adev in the 1st position.
5680  	 */
5681  	INIT_LIST_HEAD(&device_list);
5682  	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5683  		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5684  			list_add_tail(&tmp_adev->reset_list, &device_list);
5685  			if (adev->shutdown)
5686  				tmp_adev->shutdown = true;
5687  		}
5688  		if (!list_is_first(&adev->reset_list, &device_list))
5689  			list_rotate_to_front(&adev->reset_list, &device_list);
5690  		device_list_handle = &device_list;
5691  	} else {
5692  		list_add_tail(&adev->reset_list, &device_list);
5693  		device_list_handle = &device_list;
5694  	}
5695  
5696  	if (!amdgpu_sriov_vf(adev)) {
5697  		r = amdgpu_device_health_check(device_list_handle);
5698  		if (r)
5699  			goto end_reset;
5700  	}
5701  
5702  	/* We need to lock reset domain only once both for XGMI and single device */
5703  	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5704  				    reset_list);
5705  	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5706  
5707  	/* block all schedulers and reset given job's ring */
5708  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5709  
5710  		amdgpu_device_set_mp1_state(tmp_adev);
5711  
5712  		/*
5713  		 * Try to put the audio codec into suspend state
5714  		 * before gpu reset started.
5715  		 *
5716  		 * Due to the power domain of the graphics device
5717  		 * is shared with AZ power domain. Without this,
5718  		 * we may change the audio hardware from behind
5719  		 * the audio driver's back. That will trigger
5720  		 * some audio codec errors.
5721  		 */
5722  		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5723  			audio_suspended = true;
5724  
5725  		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5726  
5727  		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5728  
5729  		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5730  
5731  		/*
5732  		 * Mark these ASICs to be reseted as untracked first
5733  		 * And add them back after reset completed
5734  		 */
5735  		amdgpu_unregister_gpu_instance(tmp_adev);
5736  
5737  		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5738  
5739  		/* disable ras on ALL IPs */
5740  		if (!need_emergency_restart &&
5741  		      amdgpu_device_ip_need_full_reset(tmp_adev))
5742  			amdgpu_ras_suspend(tmp_adev);
5743  
5744  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5745  			struct amdgpu_ring *ring = tmp_adev->rings[i];
5746  
5747  			if (!amdgpu_ring_sched_ready(ring))
5748  				continue;
5749  
5750  			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5751  
5752  			if (need_emergency_restart)
5753  				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5754  		}
5755  		atomic_inc(&tmp_adev->gpu_reset_counter);
5756  	}
5757  
5758  	if (need_emergency_restart)
5759  		goto skip_sched_resume;
5760  
5761  	/*
5762  	 * Must check guilty signal here since after this point all old
5763  	 * HW fences are force signaled.
5764  	 *
5765  	 * job->base holds a reference to parent fence
5766  	 */
5767  	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5768  		job_signaled = true;
5769  		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5770  		goto skip_hw_reset;
5771  	}
5772  
5773  retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5774  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5775  		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5776  		/*TODO Should we stop ?*/
5777  		if (r) {
5778  			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5779  				  r, adev_to_drm(tmp_adev)->unique);
5780  			tmp_adev->asic_reset_res = r;
5781  		}
5782  	}
5783  
5784  	/* Actual ASIC resets if needed.*/
5785  	/* Host driver will handle XGMI hive reset for SRIOV */
5786  	if (amdgpu_sriov_vf(adev)) {
5787  		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5788  			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5789  			amdgpu_ras_set_fed(adev, true);
5790  			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5791  		}
5792  
5793  		r = amdgpu_device_reset_sriov(adev, reset_context);
5794  		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5795  			amdgpu_virt_release_full_gpu(adev, true);
5796  			goto retry;
5797  		}
5798  		if (r)
5799  			adev->asic_reset_res = r;
5800  	} else {
5801  		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5802  		if (r && r == -EAGAIN)
5803  			goto retry;
5804  	}
5805  
5806  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5807  		/*
5808  		 * Drop any pending non scheduler resets queued before reset is done.
5809  		 * Any reset scheduled after this point would be valid. Scheduler resets
5810  		 * were already dropped during drm_sched_stop and no new ones can come
5811  		 * in before drm_sched_start.
5812  		 */
5813  		amdgpu_device_stop_pending_resets(tmp_adev);
5814  	}
5815  
5816  skip_hw_reset:
5817  
5818  	/* Post ASIC reset for all devs .*/
5819  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5820  
5821  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5822  			struct amdgpu_ring *ring = tmp_adev->rings[i];
5823  
5824  			if (!amdgpu_ring_sched_ready(ring))
5825  				continue;
5826  
5827  			drm_sched_start(&ring->sched);
5828  		}
5829  
5830  		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5831  			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5832  
5833  		if (tmp_adev->asic_reset_res)
5834  			r = tmp_adev->asic_reset_res;
5835  
5836  		tmp_adev->asic_reset_res = 0;
5837  
5838  		if (r) {
5839  			/* bad news, how to tell it to userspace ?
5840  			 * for ras error, we should report GPU bad status instead of
5841  			 * reset failure
5842  			 */
5843  			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5844  			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5845  				dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
5846  					atomic_read(&tmp_adev->gpu_reset_counter));
5847  			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5848  		} else {
5849  			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5850  			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5851  				DRM_WARN("smart shift update failed\n");
5852  		}
5853  	}
5854  
5855  skip_sched_resume:
5856  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5857  		/* unlock kfd: SRIOV would do it separately */
5858  		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5859  			amdgpu_amdkfd_post_reset(tmp_adev);
5860  
5861  		/* kfd_post_reset will do nothing if kfd device is not initialized,
5862  		 * need to bring up kfd here if it's not be initialized before
5863  		 */
5864  		if (!adev->kfd.init_complete)
5865  			amdgpu_amdkfd_device_init(adev);
5866  
5867  		if (audio_suspended)
5868  			amdgpu_device_resume_display_audio(tmp_adev);
5869  
5870  		amdgpu_device_unset_mp1_state(tmp_adev);
5871  
5872  		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5873  	}
5874  
5875  	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5876  					    reset_list);
5877  	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5878  
5879  end_reset:
5880  	if (hive) {
5881  		mutex_unlock(&hive->hive_lock);
5882  		amdgpu_put_xgmi_hive(hive);
5883  	}
5884  
5885  	if (r)
5886  		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5887  
5888  	atomic_set(&adev->reset_domain->reset_res, r);
5889  	return r;
5890  }
5891  
5892  /**
5893   * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5894   *
5895   * @adev: amdgpu_device pointer
5896   * @speed: pointer to the speed of the link
5897   * @width: pointer to the width of the link
5898   *
5899   * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5900   * first physical partner to an AMD dGPU.
5901   * This will exclude any virtual switches and links.
5902   */
amdgpu_device_partner_bandwidth(struct amdgpu_device * adev,enum pci_bus_speed * speed,enum pcie_link_width * width)5903  static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5904  					    enum pci_bus_speed *speed,
5905  					    enum pcie_link_width *width)
5906  {
5907  	struct pci_dev *parent = adev->pdev;
5908  
5909  	if (!speed || !width)
5910  		return;
5911  
5912  	*speed = PCI_SPEED_UNKNOWN;
5913  	*width = PCIE_LNK_WIDTH_UNKNOWN;
5914  
5915  	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5916  		while ((parent = pci_upstream_bridge(parent))) {
5917  			/* skip upstream/downstream switches internal to dGPU*/
5918  			if (parent->vendor == PCI_VENDOR_ID_ATI)
5919  				continue;
5920  			*speed = pcie_get_speed_cap(parent);
5921  			*width = pcie_get_width_cap(parent);
5922  			break;
5923  		}
5924  	} else {
5925  		/* use the current speeds rather than max if switching is not supported */
5926  		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5927  	}
5928  }
5929  
5930  /**
5931   * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5932   *
5933   * @adev: amdgpu_device pointer
5934   *
5935   * Fetchs and stores in the driver the PCIE capabilities (gen speed
5936   * and lanes) of the slot the device is in. Handles APUs and
5937   * virtualized environments where PCIE config space may not be available.
5938   */
amdgpu_device_get_pcie_info(struct amdgpu_device * adev)5939  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5940  {
5941  	struct pci_dev *pdev;
5942  	enum pci_bus_speed speed_cap, platform_speed_cap;
5943  	enum pcie_link_width platform_link_width;
5944  
5945  	if (amdgpu_pcie_gen_cap)
5946  		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5947  
5948  	if (amdgpu_pcie_lane_cap)
5949  		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5950  
5951  	/* covers APUs as well */
5952  	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5953  		if (adev->pm.pcie_gen_mask == 0)
5954  			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5955  		if (adev->pm.pcie_mlw_mask == 0)
5956  			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5957  		return;
5958  	}
5959  
5960  	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5961  		return;
5962  
5963  	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5964  					&platform_link_width);
5965  
5966  	if (adev->pm.pcie_gen_mask == 0) {
5967  		/* asic caps */
5968  		pdev = adev->pdev;
5969  		speed_cap = pcie_get_speed_cap(pdev);
5970  		if (speed_cap == PCI_SPEED_UNKNOWN) {
5971  			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5972  						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5973  						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5974  		} else {
5975  			if (speed_cap == PCIE_SPEED_32_0GT)
5976  				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5977  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5978  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5979  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5980  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5981  			else if (speed_cap == PCIE_SPEED_16_0GT)
5982  				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5983  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5984  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5985  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5986  			else if (speed_cap == PCIE_SPEED_8_0GT)
5987  				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5988  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5989  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5990  			else if (speed_cap == PCIE_SPEED_5_0GT)
5991  				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5992  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5993  			else
5994  				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5995  		}
5996  		/* platform caps */
5997  		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5998  			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5999  						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6000  		} else {
6001  			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6002  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6003  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6004  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6005  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6006  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6007  			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6008  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6009  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6010  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6011  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6012  			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6013  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6014  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6015  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6016  			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6017  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6018  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6019  			else
6020  				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6021  
6022  		}
6023  	}
6024  	if (adev->pm.pcie_mlw_mask == 0) {
6025  		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6026  			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6027  		} else {
6028  			switch (platform_link_width) {
6029  			case PCIE_LNK_X32:
6030  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6031  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6032  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6033  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6034  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6035  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6036  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6037  				break;
6038  			case PCIE_LNK_X16:
6039  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6040  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6041  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6042  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6043  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6044  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6045  				break;
6046  			case PCIE_LNK_X12:
6047  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6048  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6049  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6050  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6051  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6052  				break;
6053  			case PCIE_LNK_X8:
6054  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6055  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6056  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6057  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6058  				break;
6059  			case PCIE_LNK_X4:
6060  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6061  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6062  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6063  				break;
6064  			case PCIE_LNK_X2:
6065  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6066  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6067  				break;
6068  			case PCIE_LNK_X1:
6069  				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6070  				break;
6071  			default:
6072  				break;
6073  			}
6074  		}
6075  	}
6076  }
6077  
6078  /**
6079   * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6080   *
6081   * @adev: amdgpu_device pointer
6082   * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6083   *
6084   * Return true if @peer_adev can access (DMA) @adev through the PCIe
6085   * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6086   * @peer_adev.
6087   */
amdgpu_device_is_peer_accessible(struct amdgpu_device * adev,struct amdgpu_device * peer_adev)6088  bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6089  				      struct amdgpu_device *peer_adev)
6090  {
6091  #ifdef CONFIG_HSA_AMD_P2P
6092  	bool p2p_access =
6093  		!adev->gmc.xgmi.connected_to_cpu &&
6094  		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6095  
6096  	bool is_large_bar = adev->gmc.visible_vram_size &&
6097  		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6098  	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6099  
6100  	if (!p2p_addressable) {
6101  		uint64_t address_mask = peer_adev->dev->dma_mask ?
6102  			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6103  		resource_size_t aper_limit =
6104  			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6105  
6106  		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6107  				     aper_limit & address_mask);
6108  	}
6109  	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6110  #else
6111  	return false;
6112  #endif
6113  }
6114  
amdgpu_device_baco_enter(struct drm_device * dev)6115  int amdgpu_device_baco_enter(struct drm_device *dev)
6116  {
6117  	struct amdgpu_device *adev = drm_to_adev(dev);
6118  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6119  
6120  	if (!amdgpu_device_supports_baco(dev))
6121  		return -ENOTSUPP;
6122  
6123  	if (ras && adev->ras_enabled &&
6124  	    adev->nbio.funcs->enable_doorbell_interrupt)
6125  		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6126  
6127  	return amdgpu_dpm_baco_enter(adev);
6128  }
6129  
amdgpu_device_baco_exit(struct drm_device * dev)6130  int amdgpu_device_baco_exit(struct drm_device *dev)
6131  {
6132  	struct amdgpu_device *adev = drm_to_adev(dev);
6133  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6134  	int ret = 0;
6135  
6136  	if (!amdgpu_device_supports_baco(dev))
6137  		return -ENOTSUPP;
6138  
6139  	ret = amdgpu_dpm_baco_exit(adev);
6140  	if (ret)
6141  		return ret;
6142  
6143  	if (ras && adev->ras_enabled &&
6144  	    adev->nbio.funcs->enable_doorbell_interrupt)
6145  		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6146  
6147  	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6148  	    adev->nbio.funcs->clear_doorbell_interrupt)
6149  		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6150  
6151  	return 0;
6152  }
6153  
6154  /**
6155   * amdgpu_pci_error_detected - Called when a PCI error is detected.
6156   * @pdev: PCI device struct
6157   * @state: PCI channel state
6158   *
6159   * Description: Called when a PCI error is detected.
6160   *
6161   * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6162   */
amdgpu_pci_error_detected(struct pci_dev * pdev,pci_channel_state_t state)6163  pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6164  {
6165  	struct drm_device *dev = pci_get_drvdata(pdev);
6166  	struct amdgpu_device *adev = drm_to_adev(dev);
6167  	int i;
6168  
6169  	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6170  
6171  	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6172  		DRM_WARN("No support for XGMI hive yet...");
6173  		return PCI_ERS_RESULT_DISCONNECT;
6174  	}
6175  
6176  	adev->pci_channel_state = state;
6177  
6178  	switch (state) {
6179  	case pci_channel_io_normal:
6180  		return PCI_ERS_RESULT_CAN_RECOVER;
6181  	/* Fatal error, prepare for slot reset */
6182  	case pci_channel_io_frozen:
6183  		/*
6184  		 * Locking adev->reset_domain->sem will prevent any external access
6185  		 * to GPU during PCI error recovery
6186  		 */
6187  		amdgpu_device_lock_reset_domain(adev->reset_domain);
6188  		amdgpu_device_set_mp1_state(adev);
6189  
6190  		/*
6191  		 * Block any work scheduling as we do for regular GPU reset
6192  		 * for the duration of the recovery
6193  		 */
6194  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6195  			struct amdgpu_ring *ring = adev->rings[i];
6196  
6197  			if (!amdgpu_ring_sched_ready(ring))
6198  				continue;
6199  
6200  			drm_sched_stop(&ring->sched, NULL);
6201  		}
6202  		atomic_inc(&adev->gpu_reset_counter);
6203  		return PCI_ERS_RESULT_NEED_RESET;
6204  	case pci_channel_io_perm_failure:
6205  		/* Permanent error, prepare for device removal */
6206  		return PCI_ERS_RESULT_DISCONNECT;
6207  	}
6208  
6209  	return PCI_ERS_RESULT_NEED_RESET;
6210  }
6211  
6212  /**
6213   * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6214   * @pdev: pointer to PCI device
6215   */
amdgpu_pci_mmio_enabled(struct pci_dev * pdev)6216  pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6217  {
6218  
6219  	DRM_INFO("PCI error: mmio enabled callback!!\n");
6220  
6221  	/* TODO - dump whatever for debugging purposes */
6222  
6223  	/* This called only if amdgpu_pci_error_detected returns
6224  	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6225  	 * works, no need to reset slot.
6226  	 */
6227  
6228  	return PCI_ERS_RESULT_RECOVERED;
6229  }
6230  
6231  /**
6232   * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6233   * @pdev: PCI device struct
6234   *
6235   * Description: This routine is called by the pci error recovery
6236   * code after the PCI slot has been reset, just before we
6237   * should resume normal operations.
6238   */
amdgpu_pci_slot_reset(struct pci_dev * pdev)6239  pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6240  {
6241  	struct drm_device *dev = pci_get_drvdata(pdev);
6242  	struct amdgpu_device *adev = drm_to_adev(dev);
6243  	int r, i;
6244  	struct amdgpu_reset_context reset_context;
6245  	u32 memsize;
6246  	struct list_head device_list;
6247  
6248  	/* PCI error slot reset should be skipped During RAS recovery */
6249  	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
6250  	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
6251  	    amdgpu_ras_in_recovery(adev))
6252  		return PCI_ERS_RESULT_RECOVERED;
6253  
6254  	DRM_INFO("PCI error: slot reset callback!!\n");
6255  
6256  	memset(&reset_context, 0, sizeof(reset_context));
6257  
6258  	INIT_LIST_HEAD(&device_list);
6259  	list_add_tail(&adev->reset_list, &device_list);
6260  
6261  	/* wait for asic to come out of reset */
6262  	msleep(500);
6263  
6264  	/* Restore PCI confspace */
6265  	amdgpu_device_load_pci_state(pdev);
6266  
6267  	/* confirm  ASIC came out of reset */
6268  	for (i = 0; i < adev->usec_timeout; i++) {
6269  		memsize = amdgpu_asic_get_config_memsize(adev);
6270  
6271  		if (memsize != 0xffffffff)
6272  			break;
6273  		udelay(1);
6274  	}
6275  	if (memsize == 0xffffffff) {
6276  		r = -ETIME;
6277  		goto out;
6278  	}
6279  
6280  	reset_context.method = AMD_RESET_METHOD_NONE;
6281  	reset_context.reset_req_dev = adev;
6282  	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6283  	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6284  
6285  	adev->no_hw_access = true;
6286  	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6287  	adev->no_hw_access = false;
6288  	if (r)
6289  		goto out;
6290  
6291  	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6292  
6293  out:
6294  	if (!r) {
6295  		if (amdgpu_device_cache_pci_state(adev->pdev))
6296  			pci_restore_state(adev->pdev);
6297  
6298  		DRM_INFO("PCIe error recovery succeeded\n");
6299  	} else {
6300  		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6301  		amdgpu_device_unset_mp1_state(adev);
6302  		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6303  	}
6304  
6305  	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6306  }
6307  
6308  /**
6309   * amdgpu_pci_resume() - resume normal ops after PCI reset
6310   * @pdev: pointer to PCI device
6311   *
6312   * Called when the error recovery driver tells us that its
6313   * OK to resume normal operation.
6314   */
amdgpu_pci_resume(struct pci_dev * pdev)6315  void amdgpu_pci_resume(struct pci_dev *pdev)
6316  {
6317  	struct drm_device *dev = pci_get_drvdata(pdev);
6318  	struct amdgpu_device *adev = drm_to_adev(dev);
6319  	int i;
6320  
6321  
6322  	DRM_INFO("PCI error: resume callback!!\n");
6323  
6324  	/* Only continue execution for the case of pci_channel_io_frozen */
6325  	if (adev->pci_channel_state != pci_channel_io_frozen)
6326  		return;
6327  
6328  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6329  		struct amdgpu_ring *ring = adev->rings[i];
6330  
6331  		if (!amdgpu_ring_sched_ready(ring))
6332  			continue;
6333  
6334  		drm_sched_start(&ring->sched);
6335  	}
6336  
6337  	amdgpu_device_unset_mp1_state(adev);
6338  	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6339  }
6340  
amdgpu_device_cache_pci_state(struct pci_dev * pdev)6341  bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6342  {
6343  	struct drm_device *dev = pci_get_drvdata(pdev);
6344  	struct amdgpu_device *adev = drm_to_adev(dev);
6345  	int r;
6346  
6347  	r = pci_save_state(pdev);
6348  	if (!r) {
6349  		kfree(adev->pci_state);
6350  
6351  		adev->pci_state = pci_store_saved_state(pdev);
6352  
6353  		if (!adev->pci_state) {
6354  			DRM_ERROR("Failed to store PCI saved state");
6355  			return false;
6356  		}
6357  	} else {
6358  		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6359  		return false;
6360  	}
6361  
6362  	return true;
6363  }
6364  
amdgpu_device_load_pci_state(struct pci_dev * pdev)6365  bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6366  {
6367  	struct drm_device *dev = pci_get_drvdata(pdev);
6368  	struct amdgpu_device *adev = drm_to_adev(dev);
6369  	int r;
6370  
6371  	if (!adev->pci_state)
6372  		return false;
6373  
6374  	r = pci_load_saved_state(pdev, adev->pci_state);
6375  
6376  	if (!r) {
6377  		pci_restore_state(pdev);
6378  	} else {
6379  		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6380  		return false;
6381  	}
6382  
6383  	return true;
6384  }
6385  
amdgpu_device_flush_hdp(struct amdgpu_device * adev,struct amdgpu_ring * ring)6386  void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6387  		struct amdgpu_ring *ring)
6388  {
6389  #ifdef CONFIG_X86_64
6390  	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6391  		return;
6392  #endif
6393  	if (adev->gmc.xgmi.connected_to_cpu)
6394  		return;
6395  
6396  	if (ring && ring->funcs->emit_hdp_flush)
6397  		amdgpu_ring_emit_hdp_flush(ring);
6398  	else
6399  		amdgpu_asic_flush_hdp(adev, ring);
6400  }
6401  
amdgpu_device_invalidate_hdp(struct amdgpu_device * adev,struct amdgpu_ring * ring)6402  void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6403  		struct amdgpu_ring *ring)
6404  {
6405  #ifdef CONFIG_X86_64
6406  	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6407  		return;
6408  #endif
6409  	if (adev->gmc.xgmi.connected_to_cpu)
6410  		return;
6411  
6412  	amdgpu_asic_invalidate_hdp(adev, ring);
6413  }
6414  
amdgpu_in_reset(struct amdgpu_device * adev)6415  int amdgpu_in_reset(struct amdgpu_device *adev)
6416  {
6417  	return atomic_read(&adev->reset_domain->in_gpu_reset);
6418  }
6419  
6420  /**
6421   * amdgpu_device_halt() - bring hardware to some kind of halt state
6422   *
6423   * @adev: amdgpu_device pointer
6424   *
6425   * Bring hardware to some kind of halt state so that no one can touch it
6426   * any more. It will help to maintain error context when error occurred.
6427   * Compare to a simple hang, the system will keep stable at least for SSH
6428   * access. Then it should be trivial to inspect the hardware state and
6429   * see what's going on. Implemented as following:
6430   *
6431   * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6432   *    clears all CPU mappings to device, disallows remappings through page faults
6433   * 2. amdgpu_irq_disable_all() disables all interrupts
6434   * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6435   * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6436   * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6437   * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6438   *    flush any in flight DMA operations
6439   */
amdgpu_device_halt(struct amdgpu_device * adev)6440  void amdgpu_device_halt(struct amdgpu_device *adev)
6441  {
6442  	struct pci_dev *pdev = adev->pdev;
6443  	struct drm_device *ddev = adev_to_drm(adev);
6444  
6445  	amdgpu_xcp_dev_unplug(adev);
6446  	drm_dev_unplug(ddev);
6447  
6448  	amdgpu_irq_disable_all(adev);
6449  
6450  	amdgpu_fence_driver_hw_fini(adev);
6451  
6452  	adev->no_hw_access = true;
6453  
6454  	amdgpu_device_unmap_mmio(adev);
6455  
6456  	pci_disable_device(pdev);
6457  	pci_wait_for_pending_transaction(pdev);
6458  }
6459  
amdgpu_device_pcie_port_rreg(struct amdgpu_device * adev,u32 reg)6460  u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6461  				u32 reg)
6462  {
6463  	unsigned long flags, address, data;
6464  	u32 r;
6465  
6466  	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6467  	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6468  
6469  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6470  	WREG32(address, reg * 4);
6471  	(void)RREG32(address);
6472  	r = RREG32(data);
6473  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6474  	return r;
6475  }
6476  
amdgpu_device_pcie_port_wreg(struct amdgpu_device * adev,u32 reg,u32 v)6477  void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6478  				u32 reg, u32 v)
6479  {
6480  	unsigned long flags, address, data;
6481  
6482  	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6483  	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6484  
6485  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6486  	WREG32(address, reg * 4);
6487  	(void)RREG32(address);
6488  	WREG32(data, v);
6489  	(void)RREG32(data);
6490  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6491  }
6492  
6493  /**
6494   * amdgpu_device_get_gang - return a reference to the current gang
6495   * @adev: amdgpu_device pointer
6496   *
6497   * Returns: A new reference to the current gang leader.
6498   */
amdgpu_device_get_gang(struct amdgpu_device * adev)6499  struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6500  {
6501  	struct dma_fence *fence;
6502  
6503  	rcu_read_lock();
6504  	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6505  	rcu_read_unlock();
6506  	return fence;
6507  }
6508  
6509  /**
6510   * amdgpu_device_switch_gang - switch to a new gang
6511   * @adev: amdgpu_device pointer
6512   * @gang: the gang to switch to
6513   *
6514   * Try to switch to a new gang.
6515   * Returns: NULL if we switched to the new gang or a reference to the current
6516   * gang leader.
6517   */
amdgpu_device_switch_gang(struct amdgpu_device * adev,struct dma_fence * gang)6518  struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6519  					    struct dma_fence *gang)
6520  {
6521  	struct dma_fence *old = NULL;
6522  
6523  	do {
6524  		dma_fence_put(old);
6525  		old = amdgpu_device_get_gang(adev);
6526  		if (old == gang)
6527  			break;
6528  
6529  		if (!dma_fence_is_signaled(old))
6530  			return old;
6531  
6532  	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6533  			 old, gang) != old);
6534  
6535  	dma_fence_put(old);
6536  	return NULL;
6537  }
6538  
amdgpu_device_has_display_hardware(struct amdgpu_device * adev)6539  bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6540  {
6541  	switch (adev->asic_type) {
6542  #ifdef CONFIG_DRM_AMDGPU_SI
6543  	case CHIP_HAINAN:
6544  #endif
6545  	case CHIP_TOPAZ:
6546  		/* chips with no display hardware */
6547  		return false;
6548  #ifdef CONFIG_DRM_AMDGPU_SI
6549  	case CHIP_TAHITI:
6550  	case CHIP_PITCAIRN:
6551  	case CHIP_VERDE:
6552  	case CHIP_OLAND:
6553  #endif
6554  #ifdef CONFIG_DRM_AMDGPU_CIK
6555  	case CHIP_BONAIRE:
6556  	case CHIP_HAWAII:
6557  	case CHIP_KAVERI:
6558  	case CHIP_KABINI:
6559  	case CHIP_MULLINS:
6560  #endif
6561  	case CHIP_TONGA:
6562  	case CHIP_FIJI:
6563  	case CHIP_POLARIS10:
6564  	case CHIP_POLARIS11:
6565  	case CHIP_POLARIS12:
6566  	case CHIP_VEGAM:
6567  	case CHIP_CARRIZO:
6568  	case CHIP_STONEY:
6569  		/* chips with display hardware */
6570  		return true;
6571  	default:
6572  		/* IP discovery */
6573  		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6574  		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6575  			return false;
6576  		return true;
6577  	}
6578  }
6579  
amdgpu_device_wait_on_rreg(struct amdgpu_device * adev,uint32_t inst,uint32_t reg_addr,char reg_name[],uint32_t expected_value,uint32_t mask)6580  uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6581  		uint32_t inst, uint32_t reg_addr, char reg_name[],
6582  		uint32_t expected_value, uint32_t mask)
6583  {
6584  	uint32_t ret = 0;
6585  	uint32_t old_ = 0;
6586  	uint32_t tmp_ = RREG32(reg_addr);
6587  	uint32_t loop = adev->usec_timeout;
6588  
6589  	while ((tmp_ & (mask)) != (expected_value)) {
6590  		if (old_ != tmp_) {
6591  			loop = adev->usec_timeout;
6592  			old_ = tmp_;
6593  		} else
6594  			udelay(1);
6595  		tmp_ = RREG32(reg_addr);
6596  		loop--;
6597  		if (!loop) {
6598  			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6599  				  inst, reg_name, (uint32_t)expected_value,
6600  				  (uint32_t)(tmp_ & (mask)));
6601  			ret = -ETIMEDOUT;
6602  			break;
6603  		}
6604  	}
6605  	return ret;
6606  }
6607