1  /*
2   * Copyright 2008 Advanced Micro Devices, Inc.
3   * Copyright 2008 Red Hat Inc.
4   * Copyright 2009 Jerome Glisse.
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a
7   * copy of this software and associated documentation files (the "Software"),
8   * to deal in the Software without restriction, including without limitation
9   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10   * and/or sell copies of the Software, and to permit persons to whom the
11   * Software is furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22   * OTHER DEALINGS IN THE SOFTWARE.
23   *
24   * Authors: Dave Airlie
25   *          Alex Deucher
26   *          Jerome Glisse
27   */
28  
29  #include <linux/debugfs.h>
30  #include <linux/firmware.h>
31  #include <linux/module.h>
32  #include <linux/pci.h>
33  #include <linux/seq_file.h>
34  #include <linux/slab.h>
35  
36  #include <drm/drm_device.h>
37  #include <drm/drm_file.h>
38  #include <drm/drm_fourcc.h>
39  #include <drm/drm_framebuffer.h>
40  #include <drm/drm_vblank.h>
41  #include <drm/radeon_drm.h>
42  
43  #include "atom.h"
44  #include "r100_reg_safe.h"
45  #include "r100d.h"
46  #include "radeon.h"
47  #include "radeon_asic.h"
48  #include "radeon_reg.h"
49  #include "rn50_reg_safe.h"
50  #include "rs100d.h"
51  #include "rv200d.h"
52  #include "rv250d.h"
53  
54  /* Firmware Names */
55  #define FIRMWARE_R100		"radeon/R100_cp.bin"
56  #define FIRMWARE_R200		"radeon/R200_cp.bin"
57  #define FIRMWARE_R300		"radeon/R300_cp.bin"
58  #define FIRMWARE_R420		"radeon/R420_cp.bin"
59  #define FIRMWARE_RS690		"radeon/RS690_cp.bin"
60  #define FIRMWARE_RS600		"radeon/RS600_cp.bin"
61  #define FIRMWARE_R520		"radeon/R520_cp.bin"
62  
63  MODULE_FIRMWARE(FIRMWARE_R100);
64  MODULE_FIRMWARE(FIRMWARE_R200);
65  MODULE_FIRMWARE(FIRMWARE_R300);
66  MODULE_FIRMWARE(FIRMWARE_R420);
67  MODULE_FIRMWARE(FIRMWARE_RS690);
68  MODULE_FIRMWARE(FIRMWARE_RS600);
69  MODULE_FIRMWARE(FIRMWARE_R520);
70  
71  #include "r100_track.h"
72  
73  /* This files gather functions specifics to:
74   * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
75   * and others in some cases.
76   */
77  
r100_is_in_vblank(struct radeon_device * rdev,int crtc)78  static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
79  {
80  	if (crtc == 0) {
81  		if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
82  			return true;
83  		else
84  			return false;
85  	} else {
86  		if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
87  			return true;
88  		else
89  			return false;
90  	}
91  }
92  
r100_is_counter_moving(struct radeon_device * rdev,int crtc)93  static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
94  {
95  	u32 vline1, vline2;
96  
97  	if (crtc == 0) {
98  		vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
99  		vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
100  	} else {
101  		vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
102  		vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
103  	}
104  	if (vline1 != vline2)
105  		return true;
106  	else
107  		return false;
108  }
109  
110  /**
111   * r100_wait_for_vblank - vblank wait asic callback.
112   *
113   * @rdev: radeon_device pointer
114   * @crtc: crtc to wait for vblank on
115   *
116   * Wait for vblank on the requested crtc (r1xx-r4xx).
117   */
r100_wait_for_vblank(struct radeon_device * rdev,int crtc)118  void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
119  {
120  	unsigned i = 0;
121  
122  	if (crtc >= rdev->num_crtc)
123  		return;
124  
125  	if (crtc == 0) {
126  		if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
127  			return;
128  	} else {
129  		if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
130  			return;
131  	}
132  
133  	/* depending on when we hit vblank, we may be close to active; if so,
134  	 * wait for another frame.
135  	 */
136  	while (r100_is_in_vblank(rdev, crtc)) {
137  		if (i++ % 100 == 0) {
138  			if (!r100_is_counter_moving(rdev, crtc))
139  				break;
140  		}
141  	}
142  
143  	while (!r100_is_in_vblank(rdev, crtc)) {
144  		if (i++ % 100 == 0) {
145  			if (!r100_is_counter_moving(rdev, crtc))
146  				break;
147  		}
148  	}
149  }
150  
151  /**
152   * r100_page_flip - pageflip callback.
153   *
154   * @rdev: radeon_device pointer
155   * @crtc_id: crtc to cleanup pageflip on
156   * @crtc_base: new address of the crtc (GPU MC address)
157   * @async: asynchronous flip
158   *
159   * Does the actual pageflip (r1xx-r4xx).
160   * During vblank we take the crtc lock and wait for the update_pending
161   * bit to go high, when it does, we release the lock, and allow the
162   * double buffered update to take place.
163   */
r100_page_flip(struct radeon_device * rdev,int crtc_id,u64 crtc_base,bool async)164  void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async)
165  {
166  	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
167  	uint32_t crtc_pitch, pitch_pixels;
168  	struct drm_framebuffer *fb = radeon_crtc->base.primary->fb;
169  	u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
170  	int i;
171  
172  	/* Lock the graphics update lock */
173  	/* update the scanout addresses */
174  	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
175  
176  	/* update pitch */
177  	pitch_pixels = fb->pitches[0] / fb->format->cpp[0];
178  	crtc_pitch = DIV_ROUND_UP(pitch_pixels * fb->format->cpp[0] * 8,
179  				  fb->format->cpp[0] * 8 * 8);
180  	crtc_pitch |= crtc_pitch << 16;
181  	WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch);
182  
183  	/* Wait for update_pending to go high. */
184  	for (i = 0; i < rdev->usec_timeout; i++) {
185  		if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
186  			break;
187  		udelay(1);
188  	}
189  	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
190  
191  	/* Unlock the lock, so double-buffering can take place inside vblank */
192  	tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
193  	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
194  
195  }
196  
197  /**
198   * r100_page_flip_pending - check if page flip is still pending
199   *
200   * @rdev: radeon_device pointer
201   * @crtc_id: crtc to check
202   *
203   * Check if the last pagefilp is still pending (r1xx-r4xx).
204   * Returns the current update pending status.
205   */
r100_page_flip_pending(struct radeon_device * rdev,int crtc_id)206  bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
207  {
208  	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
209  
210  	/* Return current update_pending status: */
211  	return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
212  		RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
213  }
214  
215  /**
216   * r100_pm_get_dynpm_state - look up dynpm power state callback.
217   *
218   * @rdev: radeon_device pointer
219   *
220   * Look up the optimal power state based on the
221   * current state of the GPU (r1xx-r5xx).
222   * Used for dynpm only.
223   */
r100_pm_get_dynpm_state(struct radeon_device * rdev)224  void r100_pm_get_dynpm_state(struct radeon_device *rdev)
225  {
226  	int i;
227  	rdev->pm.dynpm_can_upclock = true;
228  	rdev->pm.dynpm_can_downclock = true;
229  
230  	switch (rdev->pm.dynpm_planned_action) {
231  	case DYNPM_ACTION_MINIMUM:
232  		rdev->pm.requested_power_state_index = 0;
233  		rdev->pm.dynpm_can_downclock = false;
234  		break;
235  	case DYNPM_ACTION_DOWNCLOCK:
236  		if (rdev->pm.current_power_state_index == 0) {
237  			rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
238  			rdev->pm.dynpm_can_downclock = false;
239  		} else {
240  			if (rdev->pm.active_crtc_count > 1) {
241  				for (i = 0; i < rdev->pm.num_power_states; i++) {
242  					if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
243  						continue;
244  					else if (i >= rdev->pm.current_power_state_index) {
245  						rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
246  						break;
247  					} else {
248  						rdev->pm.requested_power_state_index = i;
249  						break;
250  					}
251  				}
252  			} else
253  				rdev->pm.requested_power_state_index =
254  					rdev->pm.current_power_state_index - 1;
255  		}
256  		/* don't use the power state if crtcs are active and no display flag is set */
257  		if ((rdev->pm.active_crtc_count > 0) &&
258  		    (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
259  		     RADEON_PM_MODE_NO_DISPLAY)) {
260  			rdev->pm.requested_power_state_index++;
261  		}
262  		break;
263  	case DYNPM_ACTION_UPCLOCK:
264  		if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
265  			rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
266  			rdev->pm.dynpm_can_upclock = false;
267  		} else {
268  			if (rdev->pm.active_crtc_count > 1) {
269  				for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
270  					if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
271  						continue;
272  					else if (i <= rdev->pm.current_power_state_index) {
273  						rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
274  						break;
275  					} else {
276  						rdev->pm.requested_power_state_index = i;
277  						break;
278  					}
279  				}
280  			} else
281  				rdev->pm.requested_power_state_index =
282  					rdev->pm.current_power_state_index + 1;
283  		}
284  		break;
285  	case DYNPM_ACTION_DEFAULT:
286  		rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
287  		rdev->pm.dynpm_can_upclock = false;
288  		break;
289  	case DYNPM_ACTION_NONE:
290  	default:
291  		DRM_ERROR("Requested mode for not defined action\n");
292  		return;
293  	}
294  	/* only one clock mode per power state */
295  	rdev->pm.requested_clock_mode_index = 0;
296  
297  	DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
298  		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
299  		  clock_info[rdev->pm.requested_clock_mode_index].sclk,
300  		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
301  		  clock_info[rdev->pm.requested_clock_mode_index].mclk,
302  		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
303  		  pcie_lanes);
304  }
305  
306  /**
307   * r100_pm_init_profile - Initialize power profiles callback.
308   *
309   * @rdev: radeon_device pointer
310   *
311   * Initialize the power states used in profile mode
312   * (r1xx-r3xx).
313   * Used for profile mode only.
314   */
r100_pm_init_profile(struct radeon_device * rdev)315  void r100_pm_init_profile(struct radeon_device *rdev)
316  {
317  	/* default */
318  	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
319  	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
320  	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
321  	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
322  	/* low sh */
323  	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
324  	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
325  	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
326  	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
327  	/* mid sh */
328  	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
329  	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
330  	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
331  	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
332  	/* high sh */
333  	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
334  	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
335  	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
336  	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
337  	/* low mh */
338  	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
339  	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
340  	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
341  	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
342  	/* mid mh */
343  	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
344  	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
345  	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
346  	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
347  	/* high mh */
348  	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
349  	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
350  	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
351  	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
352  }
353  
354  /**
355   * r100_pm_misc - set additional pm hw parameters callback.
356   *
357   * @rdev: radeon_device pointer
358   *
359   * Set non-clock parameters associated with a power state
360   * (voltage, pcie lanes, etc.) (r1xx-r4xx).
361   */
r100_pm_misc(struct radeon_device * rdev)362  void r100_pm_misc(struct radeon_device *rdev)
363  {
364  	int requested_index = rdev->pm.requested_power_state_index;
365  	struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
366  	struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
367  	u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
368  
369  	if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
370  		if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
371  			tmp = RREG32(voltage->gpio.reg);
372  			if (voltage->active_high)
373  				tmp |= voltage->gpio.mask;
374  			else
375  				tmp &= ~(voltage->gpio.mask);
376  			WREG32(voltage->gpio.reg, tmp);
377  			if (voltage->delay)
378  				udelay(voltage->delay);
379  		} else {
380  			tmp = RREG32(voltage->gpio.reg);
381  			if (voltage->active_high)
382  				tmp &= ~voltage->gpio.mask;
383  			else
384  				tmp |= voltage->gpio.mask;
385  			WREG32(voltage->gpio.reg, tmp);
386  			if (voltage->delay)
387  				udelay(voltage->delay);
388  		}
389  	}
390  
391  	sclk_cntl = RREG32_PLL(SCLK_CNTL);
392  	sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
393  	sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
394  	sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
395  	sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
396  	if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
397  		sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
398  		if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
399  			sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
400  		else
401  			sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
402  		if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
403  			sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
404  		else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
405  			sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
406  	} else
407  		sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
408  
409  	if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
410  		sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
411  		if (voltage->delay) {
412  			sclk_more_cntl |= VOLTAGE_DROP_SYNC;
413  			switch (voltage->delay) {
414  			case 33:
415  				sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
416  				break;
417  			case 66:
418  				sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
419  				break;
420  			case 99:
421  				sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
422  				break;
423  			case 132:
424  				sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
425  				break;
426  			}
427  		} else
428  			sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
429  	} else
430  		sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
431  
432  	if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
433  		sclk_cntl &= ~FORCE_HDP;
434  	else
435  		sclk_cntl |= FORCE_HDP;
436  
437  	WREG32_PLL(SCLK_CNTL, sclk_cntl);
438  	WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
439  	WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
440  
441  	/* set pcie lanes */
442  	if ((rdev->flags & RADEON_IS_PCIE) &&
443  	    !(rdev->flags & RADEON_IS_IGP) &&
444  	    rdev->asic->pm.set_pcie_lanes &&
445  	    (ps->pcie_lanes !=
446  	     rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
447  		radeon_set_pcie_lanes(rdev,
448  				      ps->pcie_lanes);
449  		DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
450  	}
451  }
452  
453  /**
454   * r100_pm_prepare - pre-power state change callback.
455   *
456   * @rdev: radeon_device pointer
457   *
458   * Prepare for a power state change (r1xx-r4xx).
459   */
r100_pm_prepare(struct radeon_device * rdev)460  void r100_pm_prepare(struct radeon_device *rdev)
461  {
462  	struct drm_device *ddev = rdev_to_drm(rdev);
463  	struct drm_crtc *crtc;
464  	struct radeon_crtc *radeon_crtc;
465  	u32 tmp;
466  
467  	/* disable any active CRTCs */
468  	list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
469  		radeon_crtc = to_radeon_crtc(crtc);
470  		if (radeon_crtc->enabled) {
471  			if (radeon_crtc->crtc_id) {
472  				tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
473  				tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
474  				WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
475  			} else {
476  				tmp = RREG32(RADEON_CRTC_GEN_CNTL);
477  				tmp |= RADEON_CRTC_DISP_REQ_EN_B;
478  				WREG32(RADEON_CRTC_GEN_CNTL, tmp);
479  			}
480  		}
481  	}
482  }
483  
484  /**
485   * r100_pm_finish - post-power state change callback.
486   *
487   * @rdev: radeon_device pointer
488   *
489   * Clean up after a power state change (r1xx-r4xx).
490   */
r100_pm_finish(struct radeon_device * rdev)491  void r100_pm_finish(struct radeon_device *rdev)
492  {
493  	struct drm_device *ddev = rdev_to_drm(rdev);
494  	struct drm_crtc *crtc;
495  	struct radeon_crtc *radeon_crtc;
496  	u32 tmp;
497  
498  	/* enable any active CRTCs */
499  	list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
500  		radeon_crtc = to_radeon_crtc(crtc);
501  		if (radeon_crtc->enabled) {
502  			if (radeon_crtc->crtc_id) {
503  				tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
504  				tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
505  				WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
506  			} else {
507  				tmp = RREG32(RADEON_CRTC_GEN_CNTL);
508  				tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
509  				WREG32(RADEON_CRTC_GEN_CNTL, tmp);
510  			}
511  		}
512  	}
513  }
514  
515  /**
516   * r100_gui_idle - gui idle callback.
517   *
518   * @rdev: radeon_device pointer
519   *
520   * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
521   * Returns true if idle, false if not.
522   */
r100_gui_idle(struct radeon_device * rdev)523  bool r100_gui_idle(struct radeon_device *rdev)
524  {
525  	if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
526  		return false;
527  	else
528  		return true;
529  }
530  
531  /* hpd for digital panel detect/disconnect */
532  /**
533   * r100_hpd_sense - hpd sense callback.
534   *
535   * @rdev: radeon_device pointer
536   * @hpd: hpd (hotplug detect) pin
537   *
538   * Checks if a digital monitor is connected (r1xx-r4xx).
539   * Returns true if connected, false if not connected.
540   */
r100_hpd_sense(struct radeon_device * rdev,enum radeon_hpd_id hpd)541  bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
542  {
543  	bool connected = false;
544  
545  	switch (hpd) {
546  	case RADEON_HPD_1:
547  		if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
548  			connected = true;
549  		break;
550  	case RADEON_HPD_2:
551  		if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
552  			connected = true;
553  		break;
554  	default:
555  		break;
556  	}
557  	return connected;
558  }
559  
560  /**
561   * r100_hpd_set_polarity - hpd set polarity callback.
562   *
563   * @rdev: radeon_device pointer
564   * @hpd: hpd (hotplug detect) pin
565   *
566   * Set the polarity of the hpd pin (r1xx-r4xx).
567   */
r100_hpd_set_polarity(struct radeon_device * rdev,enum radeon_hpd_id hpd)568  void r100_hpd_set_polarity(struct radeon_device *rdev,
569  			   enum radeon_hpd_id hpd)
570  {
571  	u32 tmp;
572  	bool connected = r100_hpd_sense(rdev, hpd);
573  
574  	switch (hpd) {
575  	case RADEON_HPD_1:
576  		tmp = RREG32(RADEON_FP_GEN_CNTL);
577  		if (connected)
578  			tmp &= ~RADEON_FP_DETECT_INT_POL;
579  		else
580  			tmp |= RADEON_FP_DETECT_INT_POL;
581  		WREG32(RADEON_FP_GEN_CNTL, tmp);
582  		break;
583  	case RADEON_HPD_2:
584  		tmp = RREG32(RADEON_FP2_GEN_CNTL);
585  		if (connected)
586  			tmp &= ~RADEON_FP2_DETECT_INT_POL;
587  		else
588  			tmp |= RADEON_FP2_DETECT_INT_POL;
589  		WREG32(RADEON_FP2_GEN_CNTL, tmp);
590  		break;
591  	default:
592  		break;
593  	}
594  }
595  
596  /**
597   * r100_hpd_init - hpd setup callback.
598   *
599   * @rdev: radeon_device pointer
600   *
601   * Setup the hpd pins used by the card (r1xx-r4xx).
602   * Set the polarity, and enable the hpd interrupts.
603   */
r100_hpd_init(struct radeon_device * rdev)604  void r100_hpd_init(struct radeon_device *rdev)
605  {
606  	struct drm_device *dev = rdev_to_drm(rdev);
607  	struct drm_connector *connector;
608  	unsigned enable = 0;
609  
610  	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
611  		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
612  		if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
613  			enable |= 1 << radeon_connector->hpd.hpd;
614  		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
615  	}
616  	radeon_irq_kms_enable_hpd(rdev, enable);
617  }
618  
619  /**
620   * r100_hpd_fini - hpd tear down callback.
621   *
622   * @rdev: radeon_device pointer
623   *
624   * Tear down the hpd pins used by the card (r1xx-r4xx).
625   * Disable the hpd interrupts.
626   */
r100_hpd_fini(struct radeon_device * rdev)627  void r100_hpd_fini(struct radeon_device *rdev)
628  {
629  	struct drm_device *dev = rdev_to_drm(rdev);
630  	struct drm_connector *connector;
631  	unsigned disable = 0;
632  
633  	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
634  		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
635  		if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
636  			disable |= 1 << radeon_connector->hpd.hpd;
637  	}
638  	radeon_irq_kms_disable_hpd(rdev, disable);
639  }
640  
641  /*
642   * PCI GART
643   */
r100_pci_gart_tlb_flush(struct radeon_device * rdev)644  void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
645  {
646  	/* TODO: can we do somethings here ? */
647  	/* It seems hw only cache one entry so we should discard this
648  	 * entry otherwise if first GPU GART read hit this entry it
649  	 * could end up in wrong address. */
650  }
651  
r100_pci_gart_init(struct radeon_device * rdev)652  int r100_pci_gart_init(struct radeon_device *rdev)
653  {
654  	int r;
655  
656  	if (rdev->gart.ptr) {
657  		WARN(1, "R100 PCI GART already initialized\n");
658  		return 0;
659  	}
660  	/* Initialize common gart structure */
661  	r = radeon_gart_init(rdev);
662  	if (r)
663  		return r;
664  	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
665  	rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
666  	rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
667  	rdev->asic->gart.set_page = &r100_pci_gart_set_page;
668  	return radeon_gart_table_ram_alloc(rdev);
669  }
670  
r100_pci_gart_enable(struct radeon_device * rdev)671  int r100_pci_gart_enable(struct radeon_device *rdev)
672  {
673  	uint32_t tmp;
674  
675  	/* discard memory request outside of configured range */
676  	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
677  	WREG32(RADEON_AIC_CNTL, tmp);
678  	/* set address range for PCI address translate */
679  	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
680  	WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
681  	/* set PCI GART page-table base address */
682  	WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
683  	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
684  	WREG32(RADEON_AIC_CNTL, tmp);
685  	r100_pci_gart_tlb_flush(rdev);
686  	DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
687  		 (unsigned)(rdev->mc.gtt_size >> 20),
688  		 (unsigned long long)rdev->gart.table_addr);
689  	rdev->gart.ready = true;
690  	return 0;
691  }
692  
r100_pci_gart_disable(struct radeon_device * rdev)693  void r100_pci_gart_disable(struct radeon_device *rdev)
694  {
695  	uint32_t tmp;
696  
697  	/* discard memory request outside of configured range */
698  	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
699  	WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
700  	WREG32(RADEON_AIC_LO_ADDR, 0);
701  	WREG32(RADEON_AIC_HI_ADDR, 0);
702  }
703  
r100_pci_gart_get_page_entry(uint64_t addr,uint32_t flags)704  uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
705  {
706  	return addr;
707  }
708  
r100_pci_gart_set_page(struct radeon_device * rdev,unsigned i,uint64_t entry)709  void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
710  			    uint64_t entry)
711  {
712  	u32 *gtt = rdev->gart.ptr;
713  	gtt[i] = cpu_to_le32(lower_32_bits(entry));
714  }
715  
r100_pci_gart_fini(struct radeon_device * rdev)716  void r100_pci_gart_fini(struct radeon_device *rdev)
717  {
718  	radeon_gart_fini(rdev);
719  	r100_pci_gart_disable(rdev);
720  	radeon_gart_table_ram_free(rdev);
721  }
722  
r100_irq_set(struct radeon_device * rdev)723  int r100_irq_set(struct radeon_device *rdev)
724  {
725  	uint32_t tmp = 0;
726  
727  	if (!rdev->irq.installed) {
728  		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
729  		WREG32(R_000040_GEN_INT_CNTL, 0);
730  		return -EINVAL;
731  	}
732  	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
733  		tmp |= RADEON_SW_INT_ENABLE;
734  	}
735  	if (rdev->irq.crtc_vblank_int[0] ||
736  	    atomic_read(&rdev->irq.pflip[0])) {
737  		tmp |= RADEON_CRTC_VBLANK_MASK;
738  	}
739  	if (rdev->irq.crtc_vblank_int[1] ||
740  	    atomic_read(&rdev->irq.pflip[1])) {
741  		tmp |= RADEON_CRTC2_VBLANK_MASK;
742  	}
743  	if (rdev->irq.hpd[0]) {
744  		tmp |= RADEON_FP_DETECT_MASK;
745  	}
746  	if (rdev->irq.hpd[1]) {
747  		tmp |= RADEON_FP2_DETECT_MASK;
748  	}
749  	WREG32(RADEON_GEN_INT_CNTL, tmp);
750  
751  	/* read back to post the write */
752  	RREG32(RADEON_GEN_INT_CNTL);
753  
754  	return 0;
755  }
756  
r100_irq_disable(struct radeon_device * rdev)757  void r100_irq_disable(struct radeon_device *rdev)
758  {
759  	u32 tmp;
760  
761  	WREG32(R_000040_GEN_INT_CNTL, 0);
762  	/* Wait and acknowledge irq */
763  	mdelay(1);
764  	tmp = RREG32(R_000044_GEN_INT_STATUS);
765  	WREG32(R_000044_GEN_INT_STATUS, tmp);
766  }
767  
r100_irq_ack(struct radeon_device * rdev)768  static uint32_t r100_irq_ack(struct radeon_device *rdev)
769  {
770  	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
771  	uint32_t irq_mask = RADEON_SW_INT_TEST |
772  		RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
773  		RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
774  
775  	if (irqs) {
776  		WREG32(RADEON_GEN_INT_STATUS, irqs);
777  	}
778  	return irqs & irq_mask;
779  }
780  
r100_irq_process(struct radeon_device * rdev)781  int r100_irq_process(struct radeon_device *rdev)
782  {
783  	uint32_t status, msi_rearm;
784  	bool queue_hotplug = false;
785  
786  	status = r100_irq_ack(rdev);
787  	if (!status) {
788  		return IRQ_NONE;
789  	}
790  	if (rdev->shutdown) {
791  		return IRQ_NONE;
792  	}
793  	while (status) {
794  		/* SW interrupt */
795  		if (status & RADEON_SW_INT_TEST) {
796  			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
797  		}
798  		/* Vertical blank interrupts */
799  		if (status & RADEON_CRTC_VBLANK_STAT) {
800  			if (rdev->irq.crtc_vblank_int[0]) {
801  				drm_handle_vblank(rdev_to_drm(rdev), 0);
802  				rdev->pm.vblank_sync = true;
803  				wake_up(&rdev->irq.vblank_queue);
804  			}
805  			if (atomic_read(&rdev->irq.pflip[0]))
806  				radeon_crtc_handle_vblank(rdev, 0);
807  		}
808  		if (status & RADEON_CRTC2_VBLANK_STAT) {
809  			if (rdev->irq.crtc_vblank_int[1]) {
810  				drm_handle_vblank(rdev_to_drm(rdev), 1);
811  				rdev->pm.vblank_sync = true;
812  				wake_up(&rdev->irq.vblank_queue);
813  			}
814  			if (atomic_read(&rdev->irq.pflip[1]))
815  				radeon_crtc_handle_vblank(rdev, 1);
816  		}
817  		if (status & RADEON_FP_DETECT_STAT) {
818  			queue_hotplug = true;
819  			DRM_DEBUG("HPD1\n");
820  		}
821  		if (status & RADEON_FP2_DETECT_STAT) {
822  			queue_hotplug = true;
823  			DRM_DEBUG("HPD2\n");
824  		}
825  		status = r100_irq_ack(rdev);
826  	}
827  	if (queue_hotplug)
828  		schedule_delayed_work(&rdev->hotplug_work, 0);
829  	if (rdev->msi_enabled) {
830  		switch (rdev->family) {
831  		case CHIP_RS400:
832  		case CHIP_RS480:
833  			msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
834  			WREG32(RADEON_AIC_CNTL, msi_rearm);
835  			WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
836  			break;
837  		default:
838  			WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
839  			break;
840  		}
841  	}
842  	return IRQ_HANDLED;
843  }
844  
r100_get_vblank_counter(struct radeon_device * rdev,int crtc)845  u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
846  {
847  	if (crtc == 0)
848  		return RREG32(RADEON_CRTC_CRNT_FRAME);
849  	else
850  		return RREG32(RADEON_CRTC2_CRNT_FRAME);
851  }
852  
853  /**
854   * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
855   * @rdev: radeon device structure
856   * @ring: ring buffer struct for emitting packets
857   */
r100_ring_hdp_flush(struct radeon_device * rdev,struct radeon_ring * ring)858  static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
859  {
860  	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
861  	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
862  				RADEON_HDP_READ_BUFFER_INVALIDATE);
863  	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
864  	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
865  }
866  
867  /* Who ever call radeon_fence_emit should call ring_lock and ask
868   * for enough space (today caller are ib schedule and buffer move) */
r100_fence_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)869  void r100_fence_ring_emit(struct radeon_device *rdev,
870  			  struct radeon_fence *fence)
871  {
872  	struct radeon_ring *ring = &rdev->ring[fence->ring];
873  
874  	/* We have to make sure that caches are flushed before
875  	 * CPU might read something from VRAM. */
876  	radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
877  	radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
878  	radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
879  	radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
880  	/* Wait until IDLE & CLEAN */
881  	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
882  	radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
883  	r100_ring_hdp_flush(rdev, ring);
884  	/* Emit fence sequence & fire IRQ */
885  	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
886  	radeon_ring_write(ring, fence->seq);
887  	radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
888  	radeon_ring_write(ring, RADEON_SW_INT_FIRE);
889  }
890  
r100_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)891  bool r100_semaphore_ring_emit(struct radeon_device *rdev,
892  			      struct radeon_ring *ring,
893  			      struct radeon_semaphore *semaphore,
894  			      bool emit_wait)
895  {
896  	/* Unused on older asics, since we don't have semaphores or multiple rings */
897  	BUG();
898  	return false;
899  }
900  
r100_copy_blit(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct dma_resv * resv)901  struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
902  				    uint64_t src_offset,
903  				    uint64_t dst_offset,
904  				    unsigned num_gpu_pages,
905  				    struct dma_resv *resv)
906  {
907  	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
908  	struct radeon_fence *fence;
909  	uint32_t cur_pages;
910  	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
911  	uint32_t pitch;
912  	uint32_t stride_pixels;
913  	unsigned ndw;
914  	int num_loops;
915  	int r = 0;
916  
917  	/* radeon limited to 16k stride */
918  	stride_bytes &= 0x3fff;
919  	/* radeon pitch is /64 */
920  	pitch = stride_bytes / 64;
921  	stride_pixels = stride_bytes / 4;
922  	num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
923  
924  	/* Ask for enough room for blit + flush + fence */
925  	ndw = 64 + (10 * num_loops);
926  	r = radeon_ring_lock(rdev, ring, ndw);
927  	if (r) {
928  		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
929  		return ERR_PTR(-EINVAL);
930  	}
931  	while (num_gpu_pages > 0) {
932  		cur_pages = num_gpu_pages;
933  		if (cur_pages > 8191) {
934  			cur_pages = 8191;
935  		}
936  		num_gpu_pages -= cur_pages;
937  
938  		/* pages are in Y direction - height
939  		   page width in X direction - width */
940  		radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
941  		radeon_ring_write(ring,
942  				  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
943  				  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
944  				  RADEON_GMC_SRC_CLIPPING |
945  				  RADEON_GMC_DST_CLIPPING |
946  				  RADEON_GMC_BRUSH_NONE |
947  				  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
948  				  RADEON_GMC_SRC_DATATYPE_COLOR |
949  				  RADEON_ROP3_S |
950  				  RADEON_DP_SRC_SOURCE_MEMORY |
951  				  RADEON_GMC_CLR_CMP_CNTL_DIS |
952  				  RADEON_GMC_WR_MSK_DIS);
953  		radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
954  		radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
955  		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
956  		radeon_ring_write(ring, 0);
957  		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
958  		radeon_ring_write(ring, num_gpu_pages);
959  		radeon_ring_write(ring, num_gpu_pages);
960  		radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
961  	}
962  	radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
963  	radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
964  	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
965  	radeon_ring_write(ring,
966  			  RADEON_WAIT_2D_IDLECLEAN |
967  			  RADEON_WAIT_HOST_IDLECLEAN |
968  			  RADEON_WAIT_DMA_GUI_IDLE);
969  	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
970  	if (r) {
971  		radeon_ring_unlock_undo(rdev, ring);
972  		return ERR_PTR(r);
973  	}
974  	radeon_ring_unlock_commit(rdev, ring, false);
975  	return fence;
976  }
977  
r100_cp_wait_for_idle(struct radeon_device * rdev)978  static int r100_cp_wait_for_idle(struct radeon_device *rdev)
979  {
980  	unsigned i;
981  	u32 tmp;
982  
983  	for (i = 0; i < rdev->usec_timeout; i++) {
984  		tmp = RREG32(R_000E40_RBBM_STATUS);
985  		if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
986  			return 0;
987  		}
988  		udelay(1);
989  	}
990  	return -1;
991  }
992  
r100_ring_start(struct radeon_device * rdev,struct radeon_ring * ring)993  void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
994  {
995  	int r;
996  
997  	r = radeon_ring_lock(rdev, ring, 2);
998  	if (r) {
999  		return;
1000  	}
1001  	radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
1002  	radeon_ring_write(ring,
1003  			  RADEON_ISYNC_ANY2D_IDLE3D |
1004  			  RADEON_ISYNC_ANY3D_IDLE2D |
1005  			  RADEON_ISYNC_WAIT_IDLEGUI |
1006  			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
1007  	radeon_ring_unlock_commit(rdev, ring, false);
1008  }
1009  
1010  
1011  /* Load the microcode for the CP */
r100_cp_init_microcode(struct radeon_device * rdev)1012  static int r100_cp_init_microcode(struct radeon_device *rdev)
1013  {
1014  	const char *fw_name = NULL;
1015  	int err;
1016  
1017  	DRM_DEBUG_KMS("\n");
1018  
1019  	switch (rdev->family) {
1020  	case CHIP_R100:
1021  	case CHIP_RV100:
1022  	case CHIP_RV200:
1023  	case CHIP_RS100:
1024  	case CHIP_RS200:
1025  		DRM_INFO("Loading R100 Microcode\n");
1026  		fw_name = FIRMWARE_R100;
1027  		break;
1028  
1029  	case CHIP_R200:
1030  	case CHIP_RV250:
1031  	case CHIP_RV280:
1032  	case CHIP_RS300:
1033  		DRM_INFO("Loading R200 Microcode\n");
1034  		fw_name = FIRMWARE_R200;
1035  		break;
1036  
1037  	case CHIP_R300:
1038  	case CHIP_R350:
1039  	case CHIP_RV350:
1040  	case CHIP_RV380:
1041  	case CHIP_RS400:
1042  	case CHIP_RS480:
1043  		DRM_INFO("Loading R300 Microcode\n");
1044  		fw_name = FIRMWARE_R300;
1045  		break;
1046  
1047  	case CHIP_R420:
1048  	case CHIP_R423:
1049  	case CHIP_RV410:
1050  		DRM_INFO("Loading R400 Microcode\n");
1051  		fw_name = FIRMWARE_R420;
1052  		break;
1053  
1054  	case CHIP_RS690:
1055  	case CHIP_RS740:
1056  		DRM_INFO("Loading RS690/RS740 Microcode\n");
1057  		fw_name = FIRMWARE_RS690;
1058  		break;
1059  
1060  	case CHIP_RS600:
1061  		DRM_INFO("Loading RS600 Microcode\n");
1062  		fw_name = FIRMWARE_RS600;
1063  		break;
1064  
1065  	case CHIP_RV515:
1066  	case CHIP_R520:
1067  	case CHIP_RV530:
1068  	case CHIP_R580:
1069  	case CHIP_RV560:
1070  	case CHIP_RV570:
1071  		DRM_INFO("Loading R500 Microcode\n");
1072  		fw_name = FIRMWARE_R520;
1073  		break;
1074  
1075  	default:
1076  		DRM_ERROR("Unsupported Radeon family %u\n", rdev->family);
1077  		return -EINVAL;
1078  	}
1079  
1080  	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1081  	if (err) {
1082  		pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name);
1083  	} else if (rdev->me_fw->size % 8) {
1084  		pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1085  		       rdev->me_fw->size, fw_name);
1086  		err = -EINVAL;
1087  		release_firmware(rdev->me_fw);
1088  		rdev->me_fw = NULL;
1089  	}
1090  	return err;
1091  }
1092  
r100_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)1093  u32 r100_gfx_get_rptr(struct radeon_device *rdev,
1094  		      struct radeon_ring *ring)
1095  {
1096  	u32 rptr;
1097  
1098  	if (rdev->wb.enabled)
1099  		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
1100  	else
1101  		rptr = RREG32(RADEON_CP_RB_RPTR);
1102  
1103  	return rptr;
1104  }
1105  
r100_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)1106  u32 r100_gfx_get_wptr(struct radeon_device *rdev,
1107  		      struct radeon_ring *ring)
1108  {
1109  	return RREG32(RADEON_CP_RB_WPTR);
1110  }
1111  
r100_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)1112  void r100_gfx_set_wptr(struct radeon_device *rdev,
1113  		       struct radeon_ring *ring)
1114  {
1115  	WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1116  	(void)RREG32(RADEON_CP_RB_WPTR);
1117  }
1118  
r100_cp_load_microcode(struct radeon_device * rdev)1119  static void r100_cp_load_microcode(struct radeon_device *rdev)
1120  {
1121  	const __be32 *fw_data;
1122  	int i, size;
1123  
1124  	if (r100_gui_wait_for_idle(rdev)) {
1125  		pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1126  	}
1127  
1128  	if (rdev->me_fw) {
1129  		size = rdev->me_fw->size / 4;
1130  		fw_data = (const __be32 *)&rdev->me_fw->data[0];
1131  		WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1132  		for (i = 0; i < size; i += 2) {
1133  			WREG32(RADEON_CP_ME_RAM_DATAH,
1134  			       be32_to_cpup(&fw_data[i]));
1135  			WREG32(RADEON_CP_ME_RAM_DATAL,
1136  			       be32_to_cpup(&fw_data[i + 1]));
1137  		}
1138  	}
1139  }
1140  
r100_cp_init(struct radeon_device * rdev,unsigned ring_size)1141  int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1142  {
1143  	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1144  	unsigned rb_bufsz;
1145  	unsigned rb_blksz;
1146  	unsigned max_fetch;
1147  	unsigned pre_write_timer;
1148  	unsigned pre_write_limit;
1149  	unsigned indirect2_start;
1150  	unsigned indirect1_start;
1151  	uint32_t tmp;
1152  	int r;
1153  
1154  	r100_debugfs_cp_init(rdev);
1155  	if (!rdev->me_fw) {
1156  		r = r100_cp_init_microcode(rdev);
1157  		if (r) {
1158  			DRM_ERROR("Failed to load firmware!\n");
1159  			return r;
1160  		}
1161  	}
1162  
1163  	/* Align ring size */
1164  	rb_bufsz = order_base_2(ring_size / 8);
1165  	ring_size = (1 << (rb_bufsz + 1)) * 4;
1166  	r100_cp_load_microcode(rdev);
1167  	r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1168  			     RADEON_CP_PACKET2);
1169  	if (r) {
1170  		return r;
1171  	}
1172  	/* Each time the cp read 1024 bytes (16 dword/quadword) update
1173  	 * the rptr copy in system ram */
1174  	rb_blksz = 9;
1175  	/* cp will read 128bytes at a time (4 dwords) */
1176  	max_fetch = 1;
1177  	ring->align_mask = 16 - 1;
1178  	/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1179  	pre_write_timer = 64;
1180  	/* Force CP_RB_WPTR write if written more than one time before the
1181  	 * delay expire
1182  	 */
1183  	pre_write_limit = 0;
1184  	/* Setup the cp cache like this (cache size is 96 dwords) :
1185  	 *	RING		0  to 15
1186  	 *	INDIRECT1	16 to 79
1187  	 *	INDIRECT2	80 to 95
1188  	 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1189  	 *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1190  	 *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1191  	 * Idea being that most of the gpu cmd will be through indirect1 buffer
1192  	 * so it gets the bigger cache.
1193  	 */
1194  	indirect2_start = 80;
1195  	indirect1_start = 16;
1196  	/* cp setup */
1197  	WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1198  	tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1199  	       REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1200  	       REG_SET(RADEON_MAX_FETCH, max_fetch));
1201  #ifdef __BIG_ENDIAN
1202  	tmp |= RADEON_BUF_SWAP_32BIT;
1203  #endif
1204  	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1205  
1206  	/* Set ring address */
1207  	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1208  	WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1209  	/* Force read & write ptr to 0 */
1210  	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1211  	WREG32(RADEON_CP_RB_RPTR_WR, 0);
1212  	ring->wptr = 0;
1213  	WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1214  
1215  	/* set the wb address whether it's enabled or not */
1216  	WREG32(R_00070C_CP_RB_RPTR_ADDR,
1217  		S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1218  	WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1219  
1220  	if (rdev->wb.enabled)
1221  		WREG32(R_000770_SCRATCH_UMSK, 0xff);
1222  	else {
1223  		tmp |= RADEON_RB_NO_UPDATE;
1224  		WREG32(R_000770_SCRATCH_UMSK, 0);
1225  	}
1226  
1227  	WREG32(RADEON_CP_RB_CNTL, tmp);
1228  	udelay(10);
1229  	/* Set cp mode to bus mastering & enable cp*/
1230  	WREG32(RADEON_CP_CSQ_MODE,
1231  	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1232  	       REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1233  	WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1234  	WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1235  	WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1236  
1237  	/* at this point everything should be setup correctly to enable master */
1238  	pci_set_master(rdev->pdev);
1239  
1240  	radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1241  	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1242  	if (r) {
1243  		DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1244  		return r;
1245  	}
1246  	ring->ready = true;
1247  	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1248  
1249  	if (!ring->rptr_save_reg /* not resuming from suspend */
1250  	    && radeon_ring_supports_scratch_reg(rdev, ring)) {
1251  		r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1252  		if (r) {
1253  			DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1254  			ring->rptr_save_reg = 0;
1255  		}
1256  	}
1257  	return 0;
1258  }
1259  
r100_cp_fini(struct radeon_device * rdev)1260  void r100_cp_fini(struct radeon_device *rdev)
1261  {
1262  	if (r100_cp_wait_for_idle(rdev)) {
1263  		DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1264  	}
1265  	/* Disable ring */
1266  	r100_cp_disable(rdev);
1267  	radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1268  	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1269  	DRM_INFO("radeon: cp finalized\n");
1270  }
1271  
r100_cp_disable(struct radeon_device * rdev)1272  void r100_cp_disable(struct radeon_device *rdev)
1273  {
1274  	/* Disable ring */
1275  	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1276  	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1277  	WREG32(RADEON_CP_CSQ_MODE, 0);
1278  	WREG32(RADEON_CP_CSQ_CNTL, 0);
1279  	WREG32(R_000770_SCRATCH_UMSK, 0);
1280  	if (r100_gui_wait_for_idle(rdev)) {
1281  		pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1282  	}
1283  }
1284  
1285  /*
1286   * CS functions
1287   */
r100_reloc_pitch_offset(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,unsigned idx,unsigned reg)1288  int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1289  			    struct radeon_cs_packet *pkt,
1290  			    unsigned idx,
1291  			    unsigned reg)
1292  {
1293  	int r;
1294  	u32 tile_flags = 0;
1295  	u32 tmp;
1296  	struct radeon_bo_list *reloc;
1297  	u32 value;
1298  
1299  	r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1300  	if (r) {
1301  		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1302  			  idx, reg);
1303  		radeon_cs_dump_packet(p, pkt);
1304  		return r;
1305  	}
1306  
1307  	value = radeon_get_ib_value(p, idx);
1308  	tmp = value & 0x003fffff;
1309  	tmp += (((u32)reloc->gpu_offset) >> 10);
1310  
1311  	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1312  		if (reloc->tiling_flags & RADEON_TILING_MACRO)
1313  			tile_flags |= RADEON_DST_TILE_MACRO;
1314  		if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1315  			if (reg == RADEON_SRC_PITCH_OFFSET) {
1316  				DRM_ERROR("Cannot src blit from microtiled surface\n");
1317  				radeon_cs_dump_packet(p, pkt);
1318  				return -EINVAL;
1319  			}
1320  			tile_flags |= RADEON_DST_TILE_MICRO;
1321  		}
1322  
1323  		tmp |= tile_flags;
1324  		p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1325  	} else
1326  		p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1327  	return 0;
1328  }
1329  
r100_packet3_load_vbpntr(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,int idx)1330  int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1331  			     struct radeon_cs_packet *pkt,
1332  			     int idx)
1333  {
1334  	unsigned c, i;
1335  	struct radeon_bo_list *reloc;
1336  	struct r100_cs_track *track;
1337  	int r = 0;
1338  	volatile uint32_t *ib;
1339  	u32 idx_value;
1340  
1341  	ib = p->ib.ptr;
1342  	track = (struct r100_cs_track *)p->track;
1343  	c = radeon_get_ib_value(p, idx++) & 0x1F;
1344  	if (c > 16) {
1345  	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1346  		      pkt->opcode);
1347  	    radeon_cs_dump_packet(p, pkt);
1348  	    return -EINVAL;
1349  	}
1350  	track->num_arrays = c;
1351  	for (i = 0; i < (c - 1); i += 2, idx += 3) {
1352  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1353  		if (r) {
1354  			DRM_ERROR("No reloc for packet3 %d\n",
1355  				  pkt->opcode);
1356  			radeon_cs_dump_packet(p, pkt);
1357  			return r;
1358  		}
1359  		idx_value = radeon_get_ib_value(p, idx);
1360  		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1361  
1362  		track->arrays[i + 0].esize = idx_value >> 8;
1363  		track->arrays[i + 0].robj = reloc->robj;
1364  		track->arrays[i + 0].esize &= 0x7F;
1365  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1366  		if (r) {
1367  			DRM_ERROR("No reloc for packet3 %d\n",
1368  				  pkt->opcode);
1369  			radeon_cs_dump_packet(p, pkt);
1370  			return r;
1371  		}
1372  		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
1373  		track->arrays[i + 1].robj = reloc->robj;
1374  		track->arrays[i + 1].esize = idx_value >> 24;
1375  		track->arrays[i + 1].esize &= 0x7F;
1376  	}
1377  	if (c & 1) {
1378  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1379  		if (r) {
1380  			DRM_ERROR("No reloc for packet3 %d\n",
1381  					  pkt->opcode);
1382  			radeon_cs_dump_packet(p, pkt);
1383  			return r;
1384  		}
1385  		idx_value = radeon_get_ib_value(p, idx);
1386  		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1387  		track->arrays[i + 0].robj = reloc->robj;
1388  		track->arrays[i + 0].esize = idx_value >> 8;
1389  		track->arrays[i + 0].esize &= 0x7F;
1390  	}
1391  	return r;
1392  }
1393  
r100_cs_parse_packet0(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,const unsigned * auth,unsigned n,radeon_packet0_check_t check)1394  int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1395  			  struct radeon_cs_packet *pkt,
1396  			  const unsigned *auth, unsigned n,
1397  			  radeon_packet0_check_t check)
1398  {
1399  	unsigned reg;
1400  	unsigned i, j, m;
1401  	unsigned idx;
1402  	int r;
1403  
1404  	idx = pkt->idx + 1;
1405  	reg = pkt->reg;
1406  	/* Check that register fall into register range
1407  	 * determined by the number of entry (n) in the
1408  	 * safe register bitmap.
1409  	 */
1410  	if (pkt->one_reg_wr) {
1411  		if ((reg >> 7) > n) {
1412  			return -EINVAL;
1413  		}
1414  	} else {
1415  		if (((reg + (pkt->count << 2)) >> 7) > n) {
1416  			return -EINVAL;
1417  		}
1418  	}
1419  	for (i = 0; i <= pkt->count; i++, idx++) {
1420  		j = (reg >> 7);
1421  		m = 1 << ((reg >> 2) & 31);
1422  		if (auth[j] & m) {
1423  			r = check(p, pkt, idx, reg);
1424  			if (r) {
1425  				return r;
1426  			}
1427  		}
1428  		if (pkt->one_reg_wr) {
1429  			if (!(auth[j] & m)) {
1430  				break;
1431  			}
1432  		} else {
1433  			reg += 4;
1434  		}
1435  	}
1436  	return 0;
1437  }
1438  
1439  /**
1440   * r100_cs_packet_parse_vline() - parse userspace VLINE packet
1441   * @p:		parser structure holding parsing context.
1442   *
1443   * Userspace sends a special sequence for VLINE waits.
1444   * PACKET0 - VLINE_START_END + value
1445   * PACKET0 - WAIT_UNTIL +_value
1446   * RELOC (P3) - crtc_id in reloc.
1447   *
1448   * This function parses this and relocates the VLINE START END
1449   * and WAIT UNTIL packets to the correct crtc.
1450   * It also detects a switched off crtc and nulls out the
1451   * wait in that case.
1452   */
r100_cs_packet_parse_vline(struct radeon_cs_parser * p)1453  int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1454  {
1455  	struct drm_crtc *crtc;
1456  	struct radeon_crtc *radeon_crtc;
1457  	struct radeon_cs_packet p3reloc, waitreloc;
1458  	int crtc_id;
1459  	int r;
1460  	uint32_t header, h_idx, reg;
1461  	volatile uint32_t *ib;
1462  
1463  	ib = p->ib.ptr;
1464  
1465  	/* parse the wait until */
1466  	r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
1467  	if (r)
1468  		return r;
1469  
1470  	/* check its a wait until and only 1 count */
1471  	if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1472  	    waitreloc.count != 0) {
1473  		DRM_ERROR("vline wait had illegal wait until segment\n");
1474  		return -EINVAL;
1475  	}
1476  
1477  	if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1478  		DRM_ERROR("vline wait had illegal wait until\n");
1479  		return -EINVAL;
1480  	}
1481  
1482  	/* jump over the NOP */
1483  	r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1484  	if (r)
1485  		return r;
1486  
1487  	h_idx = p->idx - 2;
1488  	p->idx += waitreloc.count + 2;
1489  	p->idx += p3reloc.count + 2;
1490  
1491  	header = radeon_get_ib_value(p, h_idx);
1492  	crtc_id = radeon_get_ib_value(p, h_idx + 5);
1493  	reg = R100_CP_PACKET0_GET_REG(header);
1494  	crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id);
1495  	if (!crtc) {
1496  		DRM_ERROR("cannot find crtc %d\n", crtc_id);
1497  		return -ENOENT;
1498  	}
1499  	radeon_crtc = to_radeon_crtc(crtc);
1500  	crtc_id = radeon_crtc->crtc_id;
1501  
1502  	if (!crtc->enabled) {
1503  		/* if the CRTC isn't enabled - we need to nop out the wait until */
1504  		ib[h_idx + 2] = PACKET2(0);
1505  		ib[h_idx + 3] = PACKET2(0);
1506  	} else if (crtc_id == 1) {
1507  		switch (reg) {
1508  		case AVIVO_D1MODE_VLINE_START_END:
1509  			header &= ~R300_CP_PACKET0_REG_MASK;
1510  			header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1511  			break;
1512  		case RADEON_CRTC_GUI_TRIG_VLINE:
1513  			header &= ~R300_CP_PACKET0_REG_MASK;
1514  			header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1515  			break;
1516  		default:
1517  			DRM_ERROR("unknown crtc reloc\n");
1518  			return -EINVAL;
1519  		}
1520  		ib[h_idx] = header;
1521  		ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1522  	}
1523  
1524  	return 0;
1525  }
1526  
r100_get_vtx_size(uint32_t vtx_fmt)1527  static int r100_get_vtx_size(uint32_t vtx_fmt)
1528  {
1529  	int vtx_size;
1530  	vtx_size = 2;
1531  	/* ordered according to bits in spec */
1532  	if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1533  		vtx_size++;
1534  	if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1535  		vtx_size += 3;
1536  	if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1537  		vtx_size++;
1538  	if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1539  		vtx_size++;
1540  	if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1541  		vtx_size += 3;
1542  	if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1543  		vtx_size++;
1544  	if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1545  		vtx_size++;
1546  	if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1547  		vtx_size += 2;
1548  	if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1549  		vtx_size += 2;
1550  	if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1551  		vtx_size++;
1552  	if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1553  		vtx_size += 2;
1554  	if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1555  		vtx_size++;
1556  	if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1557  		vtx_size += 2;
1558  	if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1559  		vtx_size++;
1560  	if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1561  		vtx_size++;
1562  	/* blend weight */
1563  	if (vtx_fmt & (0x7 << 15))
1564  		vtx_size += (vtx_fmt >> 15) & 0x7;
1565  	if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1566  		vtx_size += 3;
1567  	if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1568  		vtx_size += 2;
1569  	if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1570  		vtx_size++;
1571  	if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1572  		vtx_size++;
1573  	if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1574  		vtx_size++;
1575  	if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1576  		vtx_size++;
1577  	return vtx_size;
1578  }
1579  
r100_packet0_check(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,unsigned idx,unsigned reg)1580  static int r100_packet0_check(struct radeon_cs_parser *p,
1581  			      struct radeon_cs_packet *pkt,
1582  			      unsigned idx, unsigned reg)
1583  {
1584  	struct radeon_bo_list *reloc;
1585  	struct r100_cs_track *track;
1586  	volatile uint32_t *ib;
1587  	uint32_t tmp;
1588  	int r;
1589  	int i, face;
1590  	u32 tile_flags = 0;
1591  	u32 idx_value;
1592  
1593  	ib = p->ib.ptr;
1594  	track = (struct r100_cs_track *)p->track;
1595  
1596  	idx_value = radeon_get_ib_value(p, idx);
1597  
1598  	switch (reg) {
1599  	case RADEON_CRTC_GUI_TRIG_VLINE:
1600  		r = r100_cs_packet_parse_vline(p);
1601  		if (r) {
1602  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1603  				  idx, reg);
1604  			radeon_cs_dump_packet(p, pkt);
1605  			return r;
1606  		}
1607  		break;
1608  		/* FIXME: only allow PACKET3 blit? easier to check for out of
1609  		 * range access */
1610  	case RADEON_DST_PITCH_OFFSET:
1611  	case RADEON_SRC_PITCH_OFFSET:
1612  		r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1613  		if (r)
1614  			return r;
1615  		break;
1616  	case RADEON_RB3D_DEPTHOFFSET:
1617  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1618  		if (r) {
1619  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1620  				  idx, reg);
1621  			radeon_cs_dump_packet(p, pkt);
1622  			return r;
1623  		}
1624  		track->zb.robj = reloc->robj;
1625  		track->zb.offset = idx_value;
1626  		track->zb_dirty = true;
1627  		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1628  		break;
1629  	case RADEON_RB3D_COLOROFFSET:
1630  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1631  		if (r) {
1632  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1633  				  idx, reg);
1634  			radeon_cs_dump_packet(p, pkt);
1635  			return r;
1636  		}
1637  		track->cb[0].robj = reloc->robj;
1638  		track->cb[0].offset = idx_value;
1639  		track->cb_dirty = true;
1640  		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1641  		break;
1642  	case RADEON_PP_TXOFFSET_0:
1643  	case RADEON_PP_TXOFFSET_1:
1644  	case RADEON_PP_TXOFFSET_2:
1645  		i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1646  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1647  		if (r) {
1648  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1649  				  idx, reg);
1650  			radeon_cs_dump_packet(p, pkt);
1651  			return r;
1652  		}
1653  		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1654  			if (reloc->tiling_flags & RADEON_TILING_MACRO)
1655  				tile_flags |= RADEON_TXO_MACRO_TILE;
1656  			if (reloc->tiling_flags & RADEON_TILING_MICRO)
1657  				tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1658  
1659  			tmp = idx_value & ~(0x7 << 2);
1660  			tmp |= tile_flags;
1661  			ib[idx] = tmp + ((u32)reloc->gpu_offset);
1662  		} else
1663  			ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1664  		track->textures[i].robj = reloc->robj;
1665  		track->tex_dirty = true;
1666  		break;
1667  	case RADEON_PP_CUBIC_OFFSET_T0_0:
1668  	case RADEON_PP_CUBIC_OFFSET_T0_1:
1669  	case RADEON_PP_CUBIC_OFFSET_T0_2:
1670  	case RADEON_PP_CUBIC_OFFSET_T0_3:
1671  	case RADEON_PP_CUBIC_OFFSET_T0_4:
1672  		i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1673  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1674  		if (r) {
1675  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1676  				  idx, reg);
1677  			radeon_cs_dump_packet(p, pkt);
1678  			return r;
1679  		}
1680  		track->textures[0].cube_info[i].offset = idx_value;
1681  		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1682  		track->textures[0].cube_info[i].robj = reloc->robj;
1683  		track->tex_dirty = true;
1684  		break;
1685  	case RADEON_PP_CUBIC_OFFSET_T1_0:
1686  	case RADEON_PP_CUBIC_OFFSET_T1_1:
1687  	case RADEON_PP_CUBIC_OFFSET_T1_2:
1688  	case RADEON_PP_CUBIC_OFFSET_T1_3:
1689  	case RADEON_PP_CUBIC_OFFSET_T1_4:
1690  		i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1691  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1692  		if (r) {
1693  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1694  				  idx, reg);
1695  			radeon_cs_dump_packet(p, pkt);
1696  			return r;
1697  		}
1698  		track->textures[1].cube_info[i].offset = idx_value;
1699  		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1700  		track->textures[1].cube_info[i].robj = reloc->robj;
1701  		track->tex_dirty = true;
1702  		break;
1703  	case RADEON_PP_CUBIC_OFFSET_T2_0:
1704  	case RADEON_PP_CUBIC_OFFSET_T2_1:
1705  	case RADEON_PP_CUBIC_OFFSET_T2_2:
1706  	case RADEON_PP_CUBIC_OFFSET_T2_3:
1707  	case RADEON_PP_CUBIC_OFFSET_T2_4:
1708  		i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1709  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1710  		if (r) {
1711  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1712  				  idx, reg);
1713  			radeon_cs_dump_packet(p, pkt);
1714  			return r;
1715  		}
1716  		track->textures[2].cube_info[i].offset = idx_value;
1717  		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1718  		track->textures[2].cube_info[i].robj = reloc->robj;
1719  		track->tex_dirty = true;
1720  		break;
1721  	case RADEON_RE_WIDTH_HEIGHT:
1722  		track->maxy = ((idx_value >> 16) & 0x7FF);
1723  		track->cb_dirty = true;
1724  		track->zb_dirty = true;
1725  		break;
1726  	case RADEON_RB3D_COLORPITCH:
1727  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1728  		if (r) {
1729  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1730  				  idx, reg);
1731  			radeon_cs_dump_packet(p, pkt);
1732  			return r;
1733  		}
1734  		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1735  			if (reloc->tiling_flags & RADEON_TILING_MACRO)
1736  				tile_flags |= RADEON_COLOR_TILE_ENABLE;
1737  			if (reloc->tiling_flags & RADEON_TILING_MICRO)
1738  				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1739  
1740  			tmp = idx_value & ~(0x7 << 16);
1741  			tmp |= tile_flags;
1742  			ib[idx] = tmp;
1743  		} else
1744  			ib[idx] = idx_value;
1745  
1746  		track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1747  		track->cb_dirty = true;
1748  		break;
1749  	case RADEON_RB3D_DEPTHPITCH:
1750  		track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1751  		track->zb_dirty = true;
1752  		break;
1753  	case RADEON_RB3D_CNTL:
1754  		switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1755  		case 7:
1756  		case 8:
1757  		case 9:
1758  		case 11:
1759  		case 12:
1760  			track->cb[0].cpp = 1;
1761  			break;
1762  		case 3:
1763  		case 4:
1764  		case 15:
1765  			track->cb[0].cpp = 2;
1766  			break;
1767  		case 6:
1768  			track->cb[0].cpp = 4;
1769  			break;
1770  		default:
1771  			DRM_ERROR("Invalid color buffer format (%d) !\n",
1772  				  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1773  			return -EINVAL;
1774  		}
1775  		track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1776  		track->cb_dirty = true;
1777  		track->zb_dirty = true;
1778  		break;
1779  	case RADEON_RB3D_ZSTENCILCNTL:
1780  		switch (idx_value & 0xf) {
1781  		case 0:
1782  			track->zb.cpp = 2;
1783  			break;
1784  		case 2:
1785  		case 3:
1786  		case 4:
1787  		case 5:
1788  		case 9:
1789  		case 11:
1790  			track->zb.cpp = 4;
1791  			break;
1792  		default:
1793  			break;
1794  		}
1795  		track->zb_dirty = true;
1796  		break;
1797  	case RADEON_RB3D_ZPASS_ADDR:
1798  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1799  		if (r) {
1800  			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1801  				  idx, reg);
1802  			radeon_cs_dump_packet(p, pkt);
1803  			return r;
1804  		}
1805  		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1806  		break;
1807  	case RADEON_PP_CNTL:
1808  		{
1809  			uint32_t temp = idx_value >> 4;
1810  			for (i = 0; i < track->num_texture; i++)
1811  				track->textures[i].enabled = !!(temp & (1 << i));
1812  			track->tex_dirty = true;
1813  		}
1814  		break;
1815  	case RADEON_SE_VF_CNTL:
1816  		track->vap_vf_cntl = idx_value;
1817  		break;
1818  	case RADEON_SE_VTX_FMT:
1819  		track->vtx_size = r100_get_vtx_size(idx_value);
1820  		break;
1821  	case RADEON_PP_TEX_SIZE_0:
1822  	case RADEON_PP_TEX_SIZE_1:
1823  	case RADEON_PP_TEX_SIZE_2:
1824  		i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1825  		track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1826  		track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1827  		track->tex_dirty = true;
1828  		break;
1829  	case RADEON_PP_TEX_PITCH_0:
1830  	case RADEON_PP_TEX_PITCH_1:
1831  	case RADEON_PP_TEX_PITCH_2:
1832  		i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1833  		track->textures[i].pitch = idx_value + 32;
1834  		track->tex_dirty = true;
1835  		break;
1836  	case RADEON_PP_TXFILTER_0:
1837  	case RADEON_PP_TXFILTER_1:
1838  	case RADEON_PP_TXFILTER_2:
1839  		i = (reg - RADEON_PP_TXFILTER_0) / 24;
1840  		track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1841  						 >> RADEON_MAX_MIP_LEVEL_SHIFT);
1842  		tmp = (idx_value >> 23) & 0x7;
1843  		if (tmp == 2 || tmp == 6)
1844  			track->textures[i].roundup_w = false;
1845  		tmp = (idx_value >> 27) & 0x7;
1846  		if (tmp == 2 || tmp == 6)
1847  			track->textures[i].roundup_h = false;
1848  		track->tex_dirty = true;
1849  		break;
1850  	case RADEON_PP_TXFORMAT_0:
1851  	case RADEON_PP_TXFORMAT_1:
1852  	case RADEON_PP_TXFORMAT_2:
1853  		i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1854  		if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1855  			track->textures[i].use_pitch = true;
1856  		} else {
1857  			track->textures[i].use_pitch = false;
1858  			track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
1859  			track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
1860  		}
1861  		if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1862  			track->textures[i].tex_coord_type = 2;
1863  		switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1864  		case RADEON_TXFORMAT_I8:
1865  		case RADEON_TXFORMAT_RGB332:
1866  		case RADEON_TXFORMAT_Y8:
1867  			track->textures[i].cpp = 1;
1868  			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1869  			break;
1870  		case RADEON_TXFORMAT_AI88:
1871  		case RADEON_TXFORMAT_ARGB1555:
1872  		case RADEON_TXFORMAT_RGB565:
1873  		case RADEON_TXFORMAT_ARGB4444:
1874  		case RADEON_TXFORMAT_VYUY422:
1875  		case RADEON_TXFORMAT_YVYU422:
1876  		case RADEON_TXFORMAT_SHADOW16:
1877  		case RADEON_TXFORMAT_LDUDV655:
1878  		case RADEON_TXFORMAT_DUDV88:
1879  			track->textures[i].cpp = 2;
1880  			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1881  			break;
1882  		case RADEON_TXFORMAT_ARGB8888:
1883  		case RADEON_TXFORMAT_RGBA8888:
1884  		case RADEON_TXFORMAT_SHADOW32:
1885  		case RADEON_TXFORMAT_LDUDUV8888:
1886  			track->textures[i].cpp = 4;
1887  			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1888  			break;
1889  		case RADEON_TXFORMAT_DXT1:
1890  			track->textures[i].cpp = 1;
1891  			track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1892  			break;
1893  		case RADEON_TXFORMAT_DXT23:
1894  		case RADEON_TXFORMAT_DXT45:
1895  			track->textures[i].cpp = 1;
1896  			track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1897  			break;
1898  		}
1899  		track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1900  		track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1901  		track->tex_dirty = true;
1902  		break;
1903  	case RADEON_PP_CUBIC_FACES_0:
1904  	case RADEON_PP_CUBIC_FACES_1:
1905  	case RADEON_PP_CUBIC_FACES_2:
1906  		tmp = idx_value;
1907  		i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1908  		for (face = 0; face < 4; face++) {
1909  			track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1910  			track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1911  		}
1912  		track->tex_dirty = true;
1913  		break;
1914  	default:
1915  		pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1916  		return -EINVAL;
1917  	}
1918  	return 0;
1919  }
1920  
r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,struct radeon_bo * robj)1921  int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1922  					 struct radeon_cs_packet *pkt,
1923  					 struct radeon_bo *robj)
1924  {
1925  	unsigned idx;
1926  	u32 value;
1927  	idx = pkt->idx + 1;
1928  	value = radeon_get_ib_value(p, idx + 2);
1929  	if ((value + 1) > radeon_bo_size(robj)) {
1930  		DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1931  			  "(need %u have %lu) !\n",
1932  			  value + 1,
1933  			  radeon_bo_size(robj));
1934  		return -EINVAL;
1935  	}
1936  	return 0;
1937  }
1938  
r100_packet3_check(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt)1939  static int r100_packet3_check(struct radeon_cs_parser *p,
1940  			      struct radeon_cs_packet *pkt)
1941  {
1942  	struct radeon_bo_list *reloc;
1943  	struct r100_cs_track *track;
1944  	unsigned idx;
1945  	volatile uint32_t *ib;
1946  	int r;
1947  
1948  	ib = p->ib.ptr;
1949  	idx = pkt->idx + 1;
1950  	track = (struct r100_cs_track *)p->track;
1951  	switch (pkt->opcode) {
1952  	case PACKET3_3D_LOAD_VBPNTR:
1953  		r = r100_packet3_load_vbpntr(p, pkt, idx);
1954  		if (r)
1955  			return r;
1956  		break;
1957  	case PACKET3_INDX_BUFFER:
1958  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1959  		if (r) {
1960  			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1961  			radeon_cs_dump_packet(p, pkt);
1962  			return r;
1963  		}
1964  		ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
1965  		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1966  		if (r) {
1967  			return r;
1968  		}
1969  		break;
1970  	case 0x23:
1971  		/* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1972  		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1973  		if (r) {
1974  			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1975  			radeon_cs_dump_packet(p, pkt);
1976  			return r;
1977  		}
1978  		ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
1979  		track->num_arrays = 1;
1980  		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1981  
1982  		track->arrays[0].robj = reloc->robj;
1983  		track->arrays[0].esize = track->vtx_size;
1984  
1985  		track->max_indx = radeon_get_ib_value(p, idx+1);
1986  
1987  		track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1988  		track->immd_dwords = pkt->count - 1;
1989  		r = r100_cs_track_check(p->rdev, track);
1990  		if (r)
1991  			return r;
1992  		break;
1993  	case PACKET3_3D_DRAW_IMMD:
1994  		if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1995  			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1996  			return -EINVAL;
1997  		}
1998  		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1999  		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2000  		track->immd_dwords = pkt->count - 1;
2001  		r = r100_cs_track_check(p->rdev, track);
2002  		if (r)
2003  			return r;
2004  		break;
2005  		/* triggers drawing using in-packet vertex data */
2006  	case PACKET3_3D_DRAW_IMMD_2:
2007  		if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
2008  			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
2009  			return -EINVAL;
2010  		}
2011  		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2012  		track->immd_dwords = pkt->count;
2013  		r = r100_cs_track_check(p->rdev, track);
2014  		if (r)
2015  			return r;
2016  		break;
2017  		/* triggers drawing using in-packet vertex data */
2018  	case PACKET3_3D_DRAW_VBUF_2:
2019  		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2020  		r = r100_cs_track_check(p->rdev, track);
2021  		if (r)
2022  			return r;
2023  		break;
2024  		/* triggers drawing of vertex buffers setup elsewhere */
2025  	case PACKET3_3D_DRAW_INDX_2:
2026  		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2027  		r = r100_cs_track_check(p->rdev, track);
2028  		if (r)
2029  			return r;
2030  		break;
2031  		/* triggers drawing using indices to vertex buffer */
2032  	case PACKET3_3D_DRAW_VBUF:
2033  		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2034  		r = r100_cs_track_check(p->rdev, track);
2035  		if (r)
2036  			return r;
2037  		break;
2038  		/* triggers drawing of vertex buffers setup elsewhere */
2039  	case PACKET3_3D_DRAW_INDX:
2040  		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2041  		r = r100_cs_track_check(p->rdev, track);
2042  		if (r)
2043  			return r;
2044  		break;
2045  		/* triggers drawing using indices to vertex buffer */
2046  	case PACKET3_3D_CLEAR_HIZ:
2047  	case PACKET3_3D_CLEAR_ZMASK:
2048  		if (p->rdev->hyperz_filp != p->filp)
2049  			return -EINVAL;
2050  		break;
2051  	case PACKET3_NOP:
2052  		break;
2053  	default:
2054  		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2055  		return -EINVAL;
2056  	}
2057  	return 0;
2058  }
2059  
r100_cs_parse(struct radeon_cs_parser * p)2060  int r100_cs_parse(struct radeon_cs_parser *p)
2061  {
2062  	struct radeon_cs_packet pkt;
2063  	struct r100_cs_track *track;
2064  	int r;
2065  
2066  	track = kzalloc(sizeof(*track), GFP_KERNEL);
2067  	if (!track)
2068  		return -ENOMEM;
2069  	r100_cs_track_clear(p->rdev, track);
2070  	p->track = track;
2071  	do {
2072  		r = radeon_cs_packet_parse(p, &pkt, p->idx);
2073  		if (r) {
2074  			return r;
2075  		}
2076  		p->idx += pkt.count + 2;
2077  		switch (pkt.type) {
2078  		case RADEON_PACKET_TYPE0:
2079  			if (p->rdev->family >= CHIP_R200)
2080  				r = r100_cs_parse_packet0(p, &pkt,
2081  					p->rdev->config.r100.reg_safe_bm,
2082  					p->rdev->config.r100.reg_safe_bm_size,
2083  					&r200_packet0_check);
2084  			else
2085  				r = r100_cs_parse_packet0(p, &pkt,
2086  					p->rdev->config.r100.reg_safe_bm,
2087  					p->rdev->config.r100.reg_safe_bm_size,
2088  					&r100_packet0_check);
2089  			break;
2090  		case RADEON_PACKET_TYPE2:
2091  			break;
2092  		case RADEON_PACKET_TYPE3:
2093  			r = r100_packet3_check(p, &pkt);
2094  			break;
2095  		default:
2096  			DRM_ERROR("Unknown packet type %d !\n",
2097  				  pkt.type);
2098  			return -EINVAL;
2099  		}
2100  		if (r)
2101  			return r;
2102  	} while (p->idx < p->chunk_ib->length_dw);
2103  	return 0;
2104  }
2105  
r100_cs_track_texture_print(struct r100_cs_track_texture * t)2106  static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2107  {
2108  	DRM_ERROR("pitch                      %d\n", t->pitch);
2109  	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
2110  	DRM_ERROR("width                      %d\n", t->width);
2111  	DRM_ERROR("width_11                   %d\n", t->width_11);
2112  	DRM_ERROR("height                     %d\n", t->height);
2113  	DRM_ERROR("height_11                  %d\n", t->height_11);
2114  	DRM_ERROR("num levels                 %d\n", t->num_levels);
2115  	DRM_ERROR("depth                      %d\n", t->txdepth);
2116  	DRM_ERROR("bpp                        %d\n", t->cpp);
2117  	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2118  	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2119  	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2120  	DRM_ERROR("compress format            %d\n", t->compress_format);
2121  }
2122  
r100_track_compress_size(int compress_format,int w,int h)2123  static int r100_track_compress_size(int compress_format, int w, int h)
2124  {
2125  	int block_width, block_height, block_bytes;
2126  	int wblocks, hblocks;
2127  	int min_wblocks;
2128  	int sz;
2129  
2130  	block_width = 4;
2131  	block_height = 4;
2132  
2133  	switch (compress_format) {
2134  	case R100_TRACK_COMP_DXT1:
2135  		block_bytes = 8;
2136  		min_wblocks = 4;
2137  		break;
2138  	default:
2139  	case R100_TRACK_COMP_DXT35:
2140  		block_bytes = 16;
2141  		min_wblocks = 2;
2142  		break;
2143  	}
2144  
2145  	hblocks = (h + block_height - 1) / block_height;
2146  	wblocks = (w + block_width - 1) / block_width;
2147  	if (wblocks < min_wblocks)
2148  		wblocks = min_wblocks;
2149  	sz = wblocks * hblocks * block_bytes;
2150  	return sz;
2151  }
2152  
r100_cs_track_cube(struct radeon_device * rdev,struct r100_cs_track * track,unsigned idx)2153  static int r100_cs_track_cube(struct radeon_device *rdev,
2154  			      struct r100_cs_track *track, unsigned idx)
2155  {
2156  	unsigned face, w, h;
2157  	struct radeon_bo *cube_robj;
2158  	unsigned long size;
2159  	unsigned compress_format = track->textures[idx].compress_format;
2160  
2161  	for (face = 0; face < 5; face++) {
2162  		cube_robj = track->textures[idx].cube_info[face].robj;
2163  		w = track->textures[idx].cube_info[face].width;
2164  		h = track->textures[idx].cube_info[face].height;
2165  
2166  		if (compress_format) {
2167  			size = r100_track_compress_size(compress_format, w, h);
2168  		} else
2169  			size = w * h;
2170  		size *= track->textures[idx].cpp;
2171  
2172  		size += track->textures[idx].cube_info[face].offset;
2173  
2174  		if (size > radeon_bo_size(cube_robj)) {
2175  			DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2176  				  size, radeon_bo_size(cube_robj));
2177  			r100_cs_track_texture_print(&track->textures[idx]);
2178  			return -1;
2179  		}
2180  	}
2181  	return 0;
2182  }
2183  
r100_cs_track_texture_check(struct radeon_device * rdev,struct r100_cs_track * track)2184  static int r100_cs_track_texture_check(struct radeon_device *rdev,
2185  				       struct r100_cs_track *track)
2186  {
2187  	struct radeon_bo *robj;
2188  	unsigned long size;
2189  	unsigned u, i, w, h, d;
2190  	int ret;
2191  
2192  	for (u = 0; u < track->num_texture; u++) {
2193  		if (!track->textures[u].enabled)
2194  			continue;
2195  		if (track->textures[u].lookup_disable)
2196  			continue;
2197  		robj = track->textures[u].robj;
2198  		if (robj == NULL) {
2199  			DRM_ERROR("No texture bound to unit %u\n", u);
2200  			return -EINVAL;
2201  		}
2202  		size = 0;
2203  		for (i = 0; i <= track->textures[u].num_levels; i++) {
2204  			if (track->textures[u].use_pitch) {
2205  				if (rdev->family < CHIP_R300)
2206  					w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2207  				else
2208  					w = track->textures[u].pitch / (1 << i);
2209  			} else {
2210  				w = track->textures[u].width;
2211  				if (rdev->family >= CHIP_RV515)
2212  					w |= track->textures[u].width_11;
2213  				w = w / (1 << i);
2214  				if (track->textures[u].roundup_w)
2215  					w = roundup_pow_of_two(w);
2216  			}
2217  			h = track->textures[u].height;
2218  			if (rdev->family >= CHIP_RV515)
2219  				h |= track->textures[u].height_11;
2220  			h = h / (1 << i);
2221  			if (track->textures[u].roundup_h)
2222  				h = roundup_pow_of_two(h);
2223  			if (track->textures[u].tex_coord_type == 1) {
2224  				d = (1 << track->textures[u].txdepth) / (1 << i);
2225  				if (!d)
2226  					d = 1;
2227  			} else {
2228  				d = 1;
2229  			}
2230  			if (track->textures[u].compress_format) {
2231  
2232  				size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2233  				/* compressed textures are block based */
2234  			} else
2235  				size += w * h * d;
2236  		}
2237  		size *= track->textures[u].cpp;
2238  
2239  		switch (track->textures[u].tex_coord_type) {
2240  		case 0:
2241  		case 1:
2242  			break;
2243  		case 2:
2244  			if (track->separate_cube) {
2245  				ret = r100_cs_track_cube(rdev, track, u);
2246  				if (ret)
2247  					return ret;
2248  			} else
2249  				size *= 6;
2250  			break;
2251  		default:
2252  			DRM_ERROR("Invalid texture coordinate type %u for unit "
2253  				  "%u\n", track->textures[u].tex_coord_type, u);
2254  			return -EINVAL;
2255  		}
2256  		if (size > radeon_bo_size(robj)) {
2257  			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2258  				  "%lu\n", u, size, radeon_bo_size(robj));
2259  			r100_cs_track_texture_print(&track->textures[u]);
2260  			return -EINVAL;
2261  		}
2262  	}
2263  	return 0;
2264  }
2265  
r100_cs_track_check(struct radeon_device * rdev,struct r100_cs_track * track)2266  int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2267  {
2268  	unsigned i;
2269  	unsigned long size;
2270  	unsigned prim_walk;
2271  	unsigned nverts;
2272  	unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2273  
2274  	if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2275  	    !track->blend_read_enable)
2276  		num_cb = 0;
2277  
2278  	for (i = 0; i < num_cb; i++) {
2279  		if (track->cb[i].robj == NULL) {
2280  			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2281  			return -EINVAL;
2282  		}
2283  		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2284  		size += track->cb[i].offset;
2285  		if (size > radeon_bo_size(track->cb[i].robj)) {
2286  			DRM_ERROR("[drm] Buffer too small for color buffer %d "
2287  				  "(need %lu have %lu) !\n", i, size,
2288  				  radeon_bo_size(track->cb[i].robj));
2289  			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2290  				  i, track->cb[i].pitch, track->cb[i].cpp,
2291  				  track->cb[i].offset, track->maxy);
2292  			return -EINVAL;
2293  		}
2294  	}
2295  	track->cb_dirty = false;
2296  
2297  	if (track->zb_dirty && track->z_enabled) {
2298  		if (track->zb.robj == NULL) {
2299  			DRM_ERROR("[drm] No buffer for z buffer !\n");
2300  			return -EINVAL;
2301  		}
2302  		size = track->zb.pitch * track->zb.cpp * track->maxy;
2303  		size += track->zb.offset;
2304  		if (size > radeon_bo_size(track->zb.robj)) {
2305  			DRM_ERROR("[drm] Buffer too small for z buffer "
2306  				  "(need %lu have %lu) !\n", size,
2307  				  radeon_bo_size(track->zb.robj));
2308  			DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2309  				  track->zb.pitch, track->zb.cpp,
2310  				  track->zb.offset, track->maxy);
2311  			return -EINVAL;
2312  		}
2313  	}
2314  	track->zb_dirty = false;
2315  
2316  	if (track->aa_dirty && track->aaresolve) {
2317  		if (track->aa.robj == NULL) {
2318  			DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2319  			return -EINVAL;
2320  		}
2321  		/* I believe the format comes from colorbuffer0. */
2322  		size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2323  		size += track->aa.offset;
2324  		if (size > radeon_bo_size(track->aa.robj)) {
2325  			DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2326  				  "(need %lu have %lu) !\n", i, size,
2327  				  radeon_bo_size(track->aa.robj));
2328  			DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2329  				  i, track->aa.pitch, track->cb[0].cpp,
2330  				  track->aa.offset, track->maxy);
2331  			return -EINVAL;
2332  		}
2333  	}
2334  	track->aa_dirty = false;
2335  
2336  	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2337  	if (track->vap_vf_cntl & (1 << 14)) {
2338  		nverts = track->vap_alt_nverts;
2339  	} else {
2340  		nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2341  	}
2342  	switch (prim_walk) {
2343  	case 1:
2344  		for (i = 0; i < track->num_arrays; i++) {
2345  			size = track->arrays[i].esize * track->max_indx * 4UL;
2346  			if (track->arrays[i].robj == NULL) {
2347  				DRM_ERROR("(PW %u) Vertex array %u no buffer "
2348  					  "bound\n", prim_walk, i);
2349  				return -EINVAL;
2350  			}
2351  			if (size > radeon_bo_size(track->arrays[i].robj)) {
2352  				dev_err(rdev->dev, "(PW %u) Vertex array %u "
2353  					"need %lu dwords have %lu dwords\n",
2354  					prim_walk, i, size >> 2,
2355  					radeon_bo_size(track->arrays[i].robj)
2356  					>> 2);
2357  				DRM_ERROR("Max indices %u\n", track->max_indx);
2358  				return -EINVAL;
2359  			}
2360  		}
2361  		break;
2362  	case 2:
2363  		for (i = 0; i < track->num_arrays; i++) {
2364  			size = track->arrays[i].esize * (nverts - 1) * 4UL;
2365  			if (track->arrays[i].robj == NULL) {
2366  				DRM_ERROR("(PW %u) Vertex array %u no buffer "
2367  					  "bound\n", prim_walk, i);
2368  				return -EINVAL;
2369  			}
2370  			if (size > radeon_bo_size(track->arrays[i].robj)) {
2371  				dev_err(rdev->dev, "(PW %u) Vertex array %u "
2372  					"need %lu dwords have %lu dwords\n",
2373  					prim_walk, i, size >> 2,
2374  					radeon_bo_size(track->arrays[i].robj)
2375  					>> 2);
2376  				return -EINVAL;
2377  			}
2378  		}
2379  		break;
2380  	case 3:
2381  		size = track->vtx_size * nverts;
2382  		if (size != track->immd_dwords) {
2383  			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2384  				  track->immd_dwords, size);
2385  			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2386  				  nverts, track->vtx_size);
2387  			return -EINVAL;
2388  		}
2389  		break;
2390  	default:
2391  		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2392  			  prim_walk);
2393  		return -EINVAL;
2394  	}
2395  
2396  	if (track->tex_dirty) {
2397  		track->tex_dirty = false;
2398  		return r100_cs_track_texture_check(rdev, track);
2399  	}
2400  	return 0;
2401  }
2402  
r100_cs_track_clear(struct radeon_device * rdev,struct r100_cs_track * track)2403  void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2404  {
2405  	unsigned i, face;
2406  
2407  	track->cb_dirty = true;
2408  	track->zb_dirty = true;
2409  	track->tex_dirty = true;
2410  	track->aa_dirty = true;
2411  
2412  	if (rdev->family < CHIP_R300) {
2413  		track->num_cb = 1;
2414  		if (rdev->family <= CHIP_RS200)
2415  			track->num_texture = 3;
2416  		else
2417  			track->num_texture = 6;
2418  		track->maxy = 2048;
2419  		track->separate_cube = true;
2420  	} else {
2421  		track->num_cb = 4;
2422  		track->num_texture = 16;
2423  		track->maxy = 4096;
2424  		track->separate_cube = false;
2425  		track->aaresolve = false;
2426  		track->aa.robj = NULL;
2427  	}
2428  
2429  	for (i = 0; i < track->num_cb; i++) {
2430  		track->cb[i].robj = NULL;
2431  		track->cb[i].pitch = 8192;
2432  		track->cb[i].cpp = 16;
2433  		track->cb[i].offset = 0;
2434  	}
2435  	track->z_enabled = true;
2436  	track->zb.robj = NULL;
2437  	track->zb.pitch = 8192;
2438  	track->zb.cpp = 4;
2439  	track->zb.offset = 0;
2440  	track->vtx_size = 0x7F;
2441  	track->immd_dwords = 0xFFFFFFFFUL;
2442  	track->num_arrays = 11;
2443  	track->max_indx = 0x00FFFFFFUL;
2444  	for (i = 0; i < track->num_arrays; i++) {
2445  		track->arrays[i].robj = NULL;
2446  		track->arrays[i].esize = 0x7F;
2447  	}
2448  	for (i = 0; i < track->num_texture; i++) {
2449  		track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2450  		track->textures[i].pitch = 16536;
2451  		track->textures[i].width = 16536;
2452  		track->textures[i].height = 16536;
2453  		track->textures[i].width_11 = 1 << 11;
2454  		track->textures[i].height_11 = 1 << 11;
2455  		track->textures[i].num_levels = 12;
2456  		if (rdev->family <= CHIP_RS200) {
2457  			track->textures[i].tex_coord_type = 0;
2458  			track->textures[i].txdepth = 0;
2459  		} else {
2460  			track->textures[i].txdepth = 16;
2461  			track->textures[i].tex_coord_type = 1;
2462  		}
2463  		track->textures[i].cpp = 64;
2464  		track->textures[i].robj = NULL;
2465  		/* CS IB emission code makes sure texture unit are disabled */
2466  		track->textures[i].enabled = false;
2467  		track->textures[i].lookup_disable = false;
2468  		track->textures[i].roundup_w = true;
2469  		track->textures[i].roundup_h = true;
2470  		if (track->separate_cube)
2471  			for (face = 0; face < 5; face++) {
2472  				track->textures[i].cube_info[face].robj = NULL;
2473  				track->textures[i].cube_info[face].width = 16536;
2474  				track->textures[i].cube_info[face].height = 16536;
2475  				track->textures[i].cube_info[face].offset = 0;
2476  			}
2477  	}
2478  }
2479  
2480  /*
2481   * Global GPU functions
2482   */
r100_errata(struct radeon_device * rdev)2483  static void r100_errata(struct radeon_device *rdev)
2484  {
2485  	rdev->pll_errata = 0;
2486  
2487  	if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2488  		rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2489  	}
2490  
2491  	if (rdev->family == CHIP_RV100 ||
2492  	    rdev->family == CHIP_RS100 ||
2493  	    rdev->family == CHIP_RS200) {
2494  		rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2495  	}
2496  }
2497  
r100_rbbm_fifo_wait_for_entry(struct radeon_device * rdev,unsigned n)2498  static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2499  {
2500  	unsigned i;
2501  	uint32_t tmp;
2502  
2503  	for (i = 0; i < rdev->usec_timeout; i++) {
2504  		tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2505  		if (tmp >= n) {
2506  			return 0;
2507  		}
2508  		udelay(1);
2509  	}
2510  	return -1;
2511  }
2512  
r100_gui_wait_for_idle(struct radeon_device * rdev)2513  int r100_gui_wait_for_idle(struct radeon_device *rdev)
2514  {
2515  	unsigned i;
2516  	uint32_t tmp;
2517  
2518  	if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2519  		pr_warn("radeon: wait for empty RBBM fifo failed! Bad things might happen.\n");
2520  	}
2521  	for (i = 0; i < rdev->usec_timeout; i++) {
2522  		tmp = RREG32(RADEON_RBBM_STATUS);
2523  		if (!(tmp & RADEON_RBBM_ACTIVE)) {
2524  			return 0;
2525  		}
2526  		udelay(1);
2527  	}
2528  	return -1;
2529  }
2530  
r100_mc_wait_for_idle(struct radeon_device * rdev)2531  int r100_mc_wait_for_idle(struct radeon_device *rdev)
2532  {
2533  	unsigned i;
2534  	uint32_t tmp;
2535  
2536  	for (i = 0; i < rdev->usec_timeout; i++) {
2537  		/* read MC_STATUS */
2538  		tmp = RREG32(RADEON_MC_STATUS);
2539  		if (tmp & RADEON_MC_IDLE) {
2540  			return 0;
2541  		}
2542  		udelay(1);
2543  	}
2544  	return -1;
2545  }
2546  
r100_gpu_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)2547  bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2548  {
2549  	u32 rbbm_status;
2550  
2551  	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2552  	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2553  		radeon_ring_lockup_update(rdev, ring);
2554  		return false;
2555  	}
2556  	return radeon_ring_test_lockup(rdev, ring);
2557  }
2558  
2559  /* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
r100_enable_bm(struct radeon_device * rdev)2560  void r100_enable_bm(struct radeon_device *rdev)
2561  {
2562  	uint32_t tmp;
2563  	/* Enable bus mastering */
2564  	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2565  	WREG32(RADEON_BUS_CNTL, tmp);
2566  }
2567  
r100_bm_disable(struct radeon_device * rdev)2568  void r100_bm_disable(struct radeon_device *rdev)
2569  {
2570  	u32 tmp;
2571  
2572  	/* disable bus mastering */
2573  	tmp = RREG32(R_000030_BUS_CNTL);
2574  	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2575  	mdelay(1);
2576  	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2577  	mdelay(1);
2578  	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2579  	tmp = RREG32(RADEON_BUS_CNTL);
2580  	mdelay(1);
2581  	pci_clear_master(rdev->pdev);
2582  	mdelay(1);
2583  }
2584  
r100_asic_reset(struct radeon_device * rdev,bool hard)2585  int r100_asic_reset(struct radeon_device *rdev, bool hard)
2586  {
2587  	struct r100_mc_save save;
2588  	u32 status, tmp;
2589  	int ret = 0;
2590  
2591  	status = RREG32(R_000E40_RBBM_STATUS);
2592  	if (!G_000E40_GUI_ACTIVE(status)) {
2593  		return 0;
2594  	}
2595  	r100_mc_stop(rdev, &save);
2596  	status = RREG32(R_000E40_RBBM_STATUS);
2597  	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2598  	/* stop CP */
2599  	WREG32(RADEON_CP_CSQ_CNTL, 0);
2600  	tmp = RREG32(RADEON_CP_RB_CNTL);
2601  	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2602  	WREG32(RADEON_CP_RB_RPTR_WR, 0);
2603  	WREG32(RADEON_CP_RB_WPTR, 0);
2604  	WREG32(RADEON_CP_RB_CNTL, tmp);
2605  	/* save PCI state */
2606  	pci_save_state(rdev->pdev);
2607  	/* disable bus mastering */
2608  	r100_bm_disable(rdev);
2609  	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2610  					S_0000F0_SOFT_RESET_RE(1) |
2611  					S_0000F0_SOFT_RESET_PP(1) |
2612  					S_0000F0_SOFT_RESET_RB(1));
2613  	RREG32(R_0000F0_RBBM_SOFT_RESET);
2614  	mdelay(500);
2615  	WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2616  	mdelay(1);
2617  	status = RREG32(R_000E40_RBBM_STATUS);
2618  	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2619  	/* reset CP */
2620  	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2621  	RREG32(R_0000F0_RBBM_SOFT_RESET);
2622  	mdelay(500);
2623  	WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2624  	mdelay(1);
2625  	status = RREG32(R_000E40_RBBM_STATUS);
2626  	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2627  	/* restore PCI & busmastering */
2628  	pci_restore_state(rdev->pdev);
2629  	r100_enable_bm(rdev);
2630  	/* Check if GPU is idle */
2631  	if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2632  		G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2633  		dev_err(rdev->dev, "failed to reset GPU\n");
2634  		ret = -1;
2635  	} else
2636  		dev_info(rdev->dev, "GPU reset succeed\n");
2637  	r100_mc_resume(rdev, &save);
2638  	return ret;
2639  }
2640  
r100_set_common_regs(struct radeon_device * rdev)2641  void r100_set_common_regs(struct radeon_device *rdev)
2642  {
2643  	bool force_dac2 = false;
2644  	u32 tmp;
2645  
2646  	/* set these so they don't interfere with anything */
2647  	WREG32(RADEON_OV0_SCALE_CNTL, 0);
2648  	WREG32(RADEON_SUBPIC_CNTL, 0);
2649  	WREG32(RADEON_VIPH_CONTROL, 0);
2650  	WREG32(RADEON_I2C_CNTL_1, 0);
2651  	WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2652  	WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2653  	WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2654  
2655  	/* always set up dac2 on rn50 and some rv100 as lots
2656  	 * of servers seem to wire it up to a VGA port but
2657  	 * don't report it in the bios connector
2658  	 * table.
2659  	 */
2660  	switch (rdev->pdev->device) {
2661  		/* RN50 */
2662  	case 0x515e:
2663  	case 0x5969:
2664  		force_dac2 = true;
2665  		break;
2666  		/* RV100*/
2667  	case 0x5159:
2668  	case 0x515a:
2669  		/* DELL triple head servers */
2670  		if ((rdev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2671  		    ((rdev->pdev->subsystem_device == 0x016c) ||
2672  		     (rdev->pdev->subsystem_device == 0x016d) ||
2673  		     (rdev->pdev->subsystem_device == 0x016e) ||
2674  		     (rdev->pdev->subsystem_device == 0x016f) ||
2675  		     (rdev->pdev->subsystem_device == 0x0170) ||
2676  		     (rdev->pdev->subsystem_device == 0x017d) ||
2677  		     (rdev->pdev->subsystem_device == 0x017e) ||
2678  		     (rdev->pdev->subsystem_device == 0x0183) ||
2679  		     (rdev->pdev->subsystem_device == 0x018a) ||
2680  		     (rdev->pdev->subsystem_device == 0x019a)))
2681  			force_dac2 = true;
2682  		break;
2683  	}
2684  
2685  	if (force_dac2) {
2686  		u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2687  		u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2688  		u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2689  
2690  		/* For CRT on DAC2, don't turn it on if BIOS didn't
2691  		   enable it, even it's detected.
2692  		*/
2693  
2694  		/* force it to crtc0 */
2695  		dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2696  		dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2697  		disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2698  
2699  		/* set up the TV DAC */
2700  		tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2701  				 RADEON_TV_DAC_STD_MASK |
2702  				 RADEON_TV_DAC_RDACPD |
2703  				 RADEON_TV_DAC_GDACPD |
2704  				 RADEON_TV_DAC_BDACPD |
2705  				 RADEON_TV_DAC_BGADJ_MASK |
2706  				 RADEON_TV_DAC_DACADJ_MASK);
2707  		tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2708  				RADEON_TV_DAC_NHOLD |
2709  				RADEON_TV_DAC_STD_PS2 |
2710  				(0x58 << 16));
2711  
2712  		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2713  		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2714  		WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2715  	}
2716  
2717  	/* switch PM block to ACPI mode */
2718  	tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2719  	tmp &= ~RADEON_PM_MODE_SEL;
2720  	WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2721  
2722  }
2723  
2724  /*
2725   * VRAM info
2726   */
r100_vram_get_type(struct radeon_device * rdev)2727  static void r100_vram_get_type(struct radeon_device *rdev)
2728  {
2729  	uint32_t tmp;
2730  
2731  	rdev->mc.vram_is_ddr = false;
2732  	if (rdev->flags & RADEON_IS_IGP)
2733  		rdev->mc.vram_is_ddr = true;
2734  	else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2735  		rdev->mc.vram_is_ddr = true;
2736  	if ((rdev->family == CHIP_RV100) ||
2737  	    (rdev->family == CHIP_RS100) ||
2738  	    (rdev->family == CHIP_RS200)) {
2739  		tmp = RREG32(RADEON_MEM_CNTL);
2740  		if (tmp & RV100_HALF_MODE) {
2741  			rdev->mc.vram_width = 32;
2742  		} else {
2743  			rdev->mc.vram_width = 64;
2744  		}
2745  		if (rdev->flags & RADEON_SINGLE_CRTC) {
2746  			rdev->mc.vram_width /= 4;
2747  			rdev->mc.vram_is_ddr = true;
2748  		}
2749  	} else if (rdev->family <= CHIP_RV280) {
2750  		tmp = RREG32(RADEON_MEM_CNTL);
2751  		if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2752  			rdev->mc.vram_width = 128;
2753  		} else {
2754  			rdev->mc.vram_width = 64;
2755  		}
2756  	} else {
2757  		/* newer IGPs */
2758  		rdev->mc.vram_width = 128;
2759  	}
2760  }
2761  
r100_get_accessible_vram(struct radeon_device * rdev)2762  static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2763  {
2764  	u32 aper_size;
2765  	u8 byte;
2766  
2767  	aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2768  
2769  	/* Set HDP_APER_CNTL only on cards that are known not to be broken,
2770  	 * that is has the 2nd generation multifunction PCI interface
2771  	 */
2772  	if (rdev->family == CHIP_RV280 ||
2773  	    rdev->family >= CHIP_RV350) {
2774  		WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2775  		       ~RADEON_HDP_APER_CNTL);
2776  		DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2777  		return aper_size * 2;
2778  	}
2779  
2780  	/* Older cards have all sorts of funny issues to deal with. First
2781  	 * check if it's a multifunction card by reading the PCI config
2782  	 * header type... Limit those to one aperture size
2783  	 */
2784  	pci_read_config_byte(rdev->pdev, 0xe, &byte);
2785  	if (byte & 0x80) {
2786  		DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2787  		DRM_INFO("Limiting VRAM to one aperture\n");
2788  		return aper_size;
2789  	}
2790  
2791  	/* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2792  	 * have set it up. We don't write this as it's broken on some ASICs but
2793  	 * we expect the BIOS to have done the right thing (might be too optimistic...)
2794  	 */
2795  	if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2796  		return aper_size * 2;
2797  	return aper_size;
2798  }
2799  
r100_vram_init_sizes(struct radeon_device * rdev)2800  void r100_vram_init_sizes(struct radeon_device *rdev)
2801  {
2802  	u64 config_aper_size;
2803  
2804  	/* work out accessible VRAM */
2805  	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2806  	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2807  	rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2808  	/* FIXME we don't use the second aperture yet when we could use it */
2809  	if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2810  		rdev->mc.visible_vram_size = rdev->mc.aper_size;
2811  	config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2812  	if (rdev->flags & RADEON_IS_IGP) {
2813  		uint32_t tom;
2814  		/* read NB_TOM to get the amount of ram stolen for the GPU */
2815  		tom = RREG32(RADEON_NB_TOM);
2816  		rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2817  		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2818  		rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2819  	} else {
2820  		rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2821  		/* Some production boards of m6 will report 0
2822  		 * if it's 8 MB
2823  		 */
2824  		if (rdev->mc.real_vram_size == 0) {
2825  			rdev->mc.real_vram_size = 8192 * 1024;
2826  			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2827  		}
2828  		/* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
2829  		 * Novell bug 204882 + along with lots of ubuntu ones
2830  		 */
2831  		if (rdev->mc.aper_size > config_aper_size)
2832  			config_aper_size = rdev->mc.aper_size;
2833  
2834  		if (config_aper_size > rdev->mc.real_vram_size)
2835  			rdev->mc.mc_vram_size = config_aper_size;
2836  		else
2837  			rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2838  	}
2839  }
2840  
r100_vga_set_state(struct radeon_device * rdev,bool state)2841  void r100_vga_set_state(struct radeon_device *rdev, bool state)
2842  {
2843  	uint32_t temp;
2844  
2845  	temp = RREG32(RADEON_CONFIG_CNTL);
2846  	if (!state) {
2847  		temp &= ~RADEON_CFG_VGA_RAM_EN;
2848  		temp |= RADEON_CFG_VGA_IO_DIS;
2849  	} else {
2850  		temp &= ~RADEON_CFG_VGA_IO_DIS;
2851  	}
2852  	WREG32(RADEON_CONFIG_CNTL, temp);
2853  }
2854  
r100_mc_init(struct radeon_device * rdev)2855  static void r100_mc_init(struct radeon_device *rdev)
2856  {
2857  	u64 base;
2858  
2859  	r100_vram_get_type(rdev);
2860  	r100_vram_init_sizes(rdev);
2861  	base = rdev->mc.aper_base;
2862  	if (rdev->flags & RADEON_IS_IGP)
2863  		base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2864  	radeon_vram_location(rdev, &rdev->mc, base);
2865  	rdev->mc.gtt_base_align = 0;
2866  	if (!(rdev->flags & RADEON_IS_AGP))
2867  		radeon_gtt_location(rdev, &rdev->mc);
2868  	radeon_update_bandwidth_info(rdev);
2869  }
2870  
2871  
2872  /*
2873   * Indirect registers accessor
2874   */
r100_pll_errata_after_index(struct radeon_device * rdev)2875  void r100_pll_errata_after_index(struct radeon_device *rdev)
2876  {
2877  	if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2878  		(void)RREG32(RADEON_CLOCK_CNTL_DATA);
2879  		(void)RREG32(RADEON_CRTC_GEN_CNTL);
2880  	}
2881  }
2882  
r100_pll_errata_after_data(struct radeon_device * rdev)2883  static void r100_pll_errata_after_data(struct radeon_device *rdev)
2884  {
2885  	/* This workarounds is necessary on RV100, RS100 and RS200 chips
2886  	 * or the chip could hang on a subsequent access
2887  	 */
2888  	if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2889  		mdelay(5);
2890  	}
2891  
2892  	/* This function is required to workaround a hardware bug in some (all?)
2893  	 * revisions of the R300.  This workaround should be called after every
2894  	 * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2895  	 * may not be correct.
2896  	 */
2897  	if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2898  		uint32_t save, tmp;
2899  
2900  		save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2901  		tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2902  		WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2903  		tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2904  		WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2905  	}
2906  }
2907  
r100_pll_rreg(struct radeon_device * rdev,uint32_t reg)2908  uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2909  {
2910  	unsigned long flags;
2911  	uint32_t data;
2912  
2913  	spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2914  	WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2915  	r100_pll_errata_after_index(rdev);
2916  	data = RREG32(RADEON_CLOCK_CNTL_DATA);
2917  	r100_pll_errata_after_data(rdev);
2918  	spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2919  	return data;
2920  }
2921  
r100_pll_wreg(struct radeon_device * rdev,uint32_t reg,uint32_t v)2922  void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2923  {
2924  	unsigned long flags;
2925  
2926  	spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2927  	WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2928  	r100_pll_errata_after_index(rdev);
2929  	WREG32(RADEON_CLOCK_CNTL_DATA, v);
2930  	r100_pll_errata_after_data(rdev);
2931  	spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2932  }
2933  
r100_set_safe_registers(struct radeon_device * rdev)2934  static void r100_set_safe_registers(struct radeon_device *rdev)
2935  {
2936  	if (ASIC_IS_RN50(rdev)) {
2937  		rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2938  		rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2939  	} else if (rdev->family < CHIP_R200) {
2940  		rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2941  		rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2942  	} else {
2943  		r200_set_safe_registers(rdev);
2944  	}
2945  }
2946  
2947  /*
2948   * Debugfs info
2949   */
2950  #if defined(CONFIG_DEBUG_FS)
r100_debugfs_rbbm_info_show(struct seq_file * m,void * unused)2951  static int r100_debugfs_rbbm_info_show(struct seq_file *m, void *unused)
2952  {
2953  	struct radeon_device *rdev = m->private;
2954  	uint32_t reg, value;
2955  	unsigned i;
2956  
2957  	seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2958  	seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2959  	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2960  	for (i = 0; i < 64; i++) {
2961  		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2962  		reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2963  		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2964  		value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2965  		seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2966  	}
2967  	return 0;
2968  }
2969  
r100_debugfs_cp_ring_info_show(struct seq_file * m,void * unused)2970  static int r100_debugfs_cp_ring_info_show(struct seq_file *m, void *unused)
2971  {
2972  	struct radeon_device *rdev = m->private;
2973  	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2974  	uint32_t rdp, wdp;
2975  	unsigned count, i, j;
2976  
2977  	radeon_ring_free_size(rdev, ring);
2978  	rdp = RREG32(RADEON_CP_RB_RPTR);
2979  	wdp = RREG32(RADEON_CP_RB_WPTR);
2980  	count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
2981  	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2982  	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2983  	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2984  	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
2985  	seq_printf(m, "%u dwords in ring\n", count);
2986  	if (ring->ready) {
2987  		for (j = 0; j <= count; j++) {
2988  			i = (rdp + j) & ring->ptr_mask;
2989  			seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
2990  		}
2991  	}
2992  	return 0;
2993  }
2994  
2995  
r100_debugfs_cp_csq_fifo_show(struct seq_file * m,void * unused)2996  static int r100_debugfs_cp_csq_fifo_show(struct seq_file *m, void *unused)
2997  {
2998  	struct radeon_device *rdev = m->private;
2999  	uint32_t csq_stat, csq2_stat, tmp;
3000  	unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
3001  	unsigned i;
3002  
3003  	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
3004  	seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
3005  	csq_stat = RREG32(RADEON_CP_CSQ_STAT);
3006  	csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
3007  	r_rptr = (csq_stat >> 0) & 0x3ff;
3008  	r_wptr = (csq_stat >> 10) & 0x3ff;
3009  	ib1_rptr = (csq_stat >> 20) & 0x3ff;
3010  	ib1_wptr = (csq2_stat >> 0) & 0x3ff;
3011  	ib2_rptr = (csq2_stat >> 10) & 0x3ff;
3012  	ib2_wptr = (csq2_stat >> 20) & 0x3ff;
3013  	seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
3014  	seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
3015  	seq_printf(m, "Ring rptr %u\n", r_rptr);
3016  	seq_printf(m, "Ring wptr %u\n", r_wptr);
3017  	seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
3018  	seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
3019  	seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
3020  	seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
3021  	/* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
3022  	 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
3023  	seq_printf(m, "Ring fifo:\n");
3024  	for (i = 0; i < 256; i++) {
3025  		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3026  		tmp = RREG32(RADEON_CP_CSQ_DATA);
3027  		seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
3028  	}
3029  	seq_printf(m, "Indirect1 fifo:\n");
3030  	for (i = 256; i <= 512; i++) {
3031  		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3032  		tmp = RREG32(RADEON_CP_CSQ_DATA);
3033  		seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3034  	}
3035  	seq_printf(m, "Indirect2 fifo:\n");
3036  	for (i = 640; i < ib1_wptr; i++) {
3037  		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3038  		tmp = RREG32(RADEON_CP_CSQ_DATA);
3039  		seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3040  	}
3041  	return 0;
3042  }
3043  
r100_debugfs_mc_info_show(struct seq_file * m,void * unused)3044  static int r100_debugfs_mc_info_show(struct seq_file *m, void *unused)
3045  {
3046  	struct radeon_device *rdev = m->private;
3047  	uint32_t tmp;
3048  
3049  	tmp = RREG32(RADEON_CONFIG_MEMSIZE);
3050  	seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3051  	tmp = RREG32(RADEON_MC_FB_LOCATION);
3052  	seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3053  	tmp = RREG32(RADEON_BUS_CNTL);
3054  	seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3055  	tmp = RREG32(RADEON_MC_AGP_LOCATION);
3056  	seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3057  	tmp = RREG32(RADEON_AGP_BASE);
3058  	seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3059  	tmp = RREG32(RADEON_HOST_PATH_CNTL);
3060  	seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3061  	tmp = RREG32(0x01D0);
3062  	seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3063  	tmp = RREG32(RADEON_AIC_LO_ADDR);
3064  	seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3065  	tmp = RREG32(RADEON_AIC_HI_ADDR);
3066  	seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3067  	tmp = RREG32(0x01E4);
3068  	seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3069  	return 0;
3070  }
3071  
3072  DEFINE_SHOW_ATTRIBUTE(r100_debugfs_rbbm_info);
3073  DEFINE_SHOW_ATTRIBUTE(r100_debugfs_cp_ring_info);
3074  DEFINE_SHOW_ATTRIBUTE(r100_debugfs_cp_csq_fifo);
3075  DEFINE_SHOW_ATTRIBUTE(r100_debugfs_mc_info);
3076  
3077  #endif
3078  
r100_debugfs_rbbm_init(struct radeon_device * rdev)3079  void  r100_debugfs_rbbm_init(struct radeon_device *rdev)
3080  {
3081  #if defined(CONFIG_DEBUG_FS)
3082  	struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root;
3083  
3084  	debugfs_create_file("r100_rbbm_info", 0444, root, rdev,
3085  			    &r100_debugfs_rbbm_info_fops);
3086  #endif
3087  }
3088  
r100_debugfs_cp_init(struct radeon_device * rdev)3089  void r100_debugfs_cp_init(struct radeon_device *rdev)
3090  {
3091  #if defined(CONFIG_DEBUG_FS)
3092  	struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root;
3093  
3094  	debugfs_create_file("r100_cp_ring_info", 0444, root, rdev,
3095  			    &r100_debugfs_cp_ring_info_fops);
3096  	debugfs_create_file("r100_cp_csq_fifo", 0444, root, rdev,
3097  			    &r100_debugfs_cp_csq_fifo_fops);
3098  #endif
3099  }
3100  
r100_debugfs_mc_info_init(struct radeon_device * rdev)3101  void  r100_debugfs_mc_info_init(struct radeon_device *rdev)
3102  {
3103  #if defined(CONFIG_DEBUG_FS)
3104  	struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root;
3105  
3106  	debugfs_create_file("r100_mc_info", 0444, root, rdev,
3107  			    &r100_debugfs_mc_info_fops);
3108  #endif
3109  }
3110  
r100_set_surface_reg(struct radeon_device * rdev,int reg,uint32_t tiling_flags,uint32_t pitch,uint32_t offset,uint32_t obj_size)3111  int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3112  			 uint32_t tiling_flags, uint32_t pitch,
3113  			 uint32_t offset, uint32_t obj_size)
3114  {
3115  	int surf_index = reg * 16;
3116  	int flags = 0;
3117  
3118  	if (rdev->family <= CHIP_RS200) {
3119  		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3120  				 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3121  			flags |= RADEON_SURF_TILE_COLOR_BOTH;
3122  		if (tiling_flags & RADEON_TILING_MACRO)
3123  			flags |= RADEON_SURF_TILE_COLOR_MACRO;
3124  		/* setting pitch to 0 disables tiling */
3125  		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3126  				== 0)
3127  			pitch = 0;
3128  	} else if (rdev->family <= CHIP_RV280) {
3129  		if (tiling_flags & (RADEON_TILING_MACRO))
3130  			flags |= R200_SURF_TILE_COLOR_MACRO;
3131  		if (tiling_flags & RADEON_TILING_MICRO)
3132  			flags |= R200_SURF_TILE_COLOR_MICRO;
3133  	} else {
3134  		if (tiling_flags & RADEON_TILING_MACRO)
3135  			flags |= R300_SURF_TILE_MACRO;
3136  		if (tiling_flags & RADEON_TILING_MICRO)
3137  			flags |= R300_SURF_TILE_MICRO;
3138  	}
3139  
3140  	if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3141  		flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
3142  	if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3143  		flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
3144  
3145  	/* r100/r200 divide by 16 */
3146  	if (rdev->family < CHIP_R300)
3147  		flags |= pitch / 16;
3148  	else
3149  		flags |= pitch / 8;
3150  
3151  
3152  	DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3153  	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3154  	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3155  	WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3156  	return 0;
3157  }
3158  
r100_clear_surface_reg(struct radeon_device * rdev,int reg)3159  void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3160  {
3161  	int surf_index = reg * 16;
3162  	WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3163  }
3164  
r100_bandwidth_update(struct radeon_device * rdev)3165  void r100_bandwidth_update(struct radeon_device *rdev)
3166  {
3167  	fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3168  	fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3169  	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
3170  	fixed20_12 crit_point_ff = {0};
3171  	uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3172  	fixed20_12 memtcas_ff[8] = {
3173  		dfixed_init(1),
3174  		dfixed_init(2),
3175  		dfixed_init(3),
3176  		dfixed_init(0),
3177  		dfixed_init_half(1),
3178  		dfixed_init_half(2),
3179  		dfixed_init(0),
3180  	};
3181  	fixed20_12 memtcas_rs480_ff[8] = {
3182  		dfixed_init(0),
3183  		dfixed_init(1),
3184  		dfixed_init(2),
3185  		dfixed_init(3),
3186  		dfixed_init(0),
3187  		dfixed_init_half(1),
3188  		dfixed_init_half(2),
3189  		dfixed_init_half(3),
3190  	};
3191  	fixed20_12 memtcas2_ff[8] = {
3192  		dfixed_init(0),
3193  		dfixed_init(1),
3194  		dfixed_init(2),
3195  		dfixed_init(3),
3196  		dfixed_init(4),
3197  		dfixed_init(5),
3198  		dfixed_init(6),
3199  		dfixed_init(7),
3200  	};
3201  	fixed20_12 memtrbs[8] = {
3202  		dfixed_init(1),
3203  		dfixed_init_half(1),
3204  		dfixed_init(2),
3205  		dfixed_init_half(2),
3206  		dfixed_init(3),
3207  		dfixed_init_half(3),
3208  		dfixed_init(4),
3209  		dfixed_init_half(4)
3210  	};
3211  	fixed20_12 memtrbs_r4xx[8] = {
3212  		dfixed_init(4),
3213  		dfixed_init(5),
3214  		dfixed_init(6),
3215  		dfixed_init(7),
3216  		dfixed_init(8),
3217  		dfixed_init(9),
3218  		dfixed_init(10),
3219  		dfixed_init(11)
3220  	};
3221  	fixed20_12 min_mem_eff;
3222  	fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3223  	fixed20_12 cur_latency_mclk, cur_latency_sclk;
3224  	fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate = {0},
3225  		disp_drain_rate2, read_return_rate;
3226  	fixed20_12 time_disp1_drop_priority;
3227  	int c;
3228  	int cur_size = 16;       /* in octawords */
3229  	int critical_point = 0, critical_point2;
3230  /* 	uint32_t read_return_rate, time_disp1_drop_priority; */
3231  	int stop_req, max_stop_req;
3232  	struct drm_display_mode *mode1 = NULL;
3233  	struct drm_display_mode *mode2 = NULL;
3234  	uint32_t pixel_bytes1 = 0;
3235  	uint32_t pixel_bytes2 = 0;
3236  
3237  	/* Guess line buffer size to be 8192 pixels */
3238  	u32 lb_size = 8192;
3239  
3240  	if (!rdev->mode_info.mode_config_initialized)
3241  		return;
3242  
3243  	radeon_update_display_priority(rdev);
3244  
3245  	if (rdev->mode_info.crtcs[0]->base.enabled) {
3246  		const struct drm_framebuffer *fb =
3247  			rdev->mode_info.crtcs[0]->base.primary->fb;
3248  
3249  		mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3250  		pixel_bytes1 = fb->format->cpp[0];
3251  	}
3252  	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3253  		if (rdev->mode_info.crtcs[1]->base.enabled) {
3254  			const struct drm_framebuffer *fb =
3255  				rdev->mode_info.crtcs[1]->base.primary->fb;
3256  
3257  			mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3258  			pixel_bytes2 = fb->format->cpp[0];
3259  		}
3260  	}
3261  
3262  	min_mem_eff.full = dfixed_const_8(0);
3263  	/* get modes */
3264  	if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3265  		uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3266  		mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3267  		mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3268  		/* check crtc enables */
3269  		if (mode2)
3270  			mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3271  		if (mode1)
3272  			mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3273  		WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3274  	}
3275  
3276  	/*
3277  	 * determine is there is enough bw for current mode
3278  	 */
3279  	sclk_ff = rdev->pm.sclk;
3280  	mclk_ff = rdev->pm.mclk;
3281  
3282  	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3283  	temp_ff.full = dfixed_const(temp);
3284  	mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3285  
3286  	pix_clk.full = 0;
3287  	pix_clk2.full = 0;
3288  	peak_disp_bw.full = 0;
3289  	if (mode1) {
3290  		temp_ff.full = dfixed_const(1000);
3291  		pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3292  		pix_clk.full = dfixed_div(pix_clk, temp_ff);
3293  		temp_ff.full = dfixed_const(pixel_bytes1);
3294  		peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3295  	}
3296  	if (mode2) {
3297  		temp_ff.full = dfixed_const(1000);
3298  		pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3299  		pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3300  		temp_ff.full = dfixed_const(pixel_bytes2);
3301  		peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3302  	}
3303  
3304  	mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3305  	if (peak_disp_bw.full >= mem_bw.full) {
3306  		DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3307  			  "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3308  	}
3309  
3310  	/*  Get values from the EXT_MEM_CNTL register...converting its contents. */
3311  	temp = RREG32(RADEON_MEM_TIMING_CNTL);
3312  	if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3313  		mem_trcd = ((temp >> 2) & 0x3) + 1;
3314  		mem_trp  = ((temp & 0x3)) + 1;
3315  		mem_tras = ((temp & 0x70) >> 4) + 1;
3316  	} else if (rdev->family == CHIP_R300 ||
3317  		   rdev->family == CHIP_R350) { /* r300, r350 */
3318  		mem_trcd = (temp & 0x7) + 1;
3319  		mem_trp = ((temp >> 8) & 0x7) + 1;
3320  		mem_tras = ((temp >> 11) & 0xf) + 4;
3321  	} else if (rdev->family == CHIP_RV350 ||
3322  		   rdev->family == CHIP_RV380) {
3323  		/* rv3x0 */
3324  		mem_trcd = (temp & 0x7) + 3;
3325  		mem_trp = ((temp >> 8) & 0x7) + 3;
3326  		mem_tras = ((temp >> 11) & 0xf) + 6;
3327  	} else if (rdev->family == CHIP_R420 ||
3328  		   rdev->family == CHIP_R423 ||
3329  		   rdev->family == CHIP_RV410) {
3330  		/* r4xx */
3331  		mem_trcd = (temp & 0xf) + 3;
3332  		if (mem_trcd > 15)
3333  			mem_trcd = 15;
3334  		mem_trp = ((temp >> 8) & 0xf) + 3;
3335  		if (mem_trp > 15)
3336  			mem_trp = 15;
3337  		mem_tras = ((temp >> 12) & 0x1f) + 6;
3338  		if (mem_tras > 31)
3339  			mem_tras = 31;
3340  	} else { /* RV200, R200 */
3341  		mem_trcd = (temp & 0x7) + 1;
3342  		mem_trp = ((temp >> 8) & 0x7) + 1;
3343  		mem_tras = ((temp >> 12) & 0xf) + 4;
3344  	}
3345  	/* convert to FF */
3346  	trcd_ff.full = dfixed_const(mem_trcd);
3347  	trp_ff.full = dfixed_const(mem_trp);
3348  	tras_ff.full = dfixed_const(mem_tras);
3349  
3350  	/* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3351  	temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3352  	data = (temp & (7 << 20)) >> 20;
3353  	if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3354  		if (rdev->family == CHIP_RS480) /* don't think rs400 */
3355  			tcas_ff = memtcas_rs480_ff[data];
3356  		else
3357  			tcas_ff = memtcas_ff[data];
3358  	} else
3359  		tcas_ff = memtcas2_ff[data];
3360  
3361  	if (rdev->family == CHIP_RS400 ||
3362  	    rdev->family == CHIP_RS480) {
3363  		/* extra cas latency stored in bits 23-25 0-4 clocks */
3364  		data = (temp >> 23) & 0x7;
3365  		if (data < 5)
3366  			tcas_ff.full += dfixed_const(data);
3367  	}
3368  
3369  	if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3370  		/* on the R300, Tcas is included in Trbs.
3371  		 */
3372  		temp = RREG32(RADEON_MEM_CNTL);
3373  		data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3374  		if (data == 1) {
3375  			if (R300_MEM_USE_CD_CH_ONLY & temp) {
3376  				temp = RREG32(R300_MC_IND_INDEX);
3377  				temp &= ~R300_MC_IND_ADDR_MASK;
3378  				temp |= R300_MC_READ_CNTL_CD_mcind;
3379  				WREG32(R300_MC_IND_INDEX, temp);
3380  				temp = RREG32(R300_MC_IND_DATA);
3381  				data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3382  			} else {
3383  				temp = RREG32(R300_MC_READ_CNTL_AB);
3384  				data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3385  			}
3386  		} else {
3387  			temp = RREG32(R300_MC_READ_CNTL_AB);
3388  			data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3389  		}
3390  		if (rdev->family == CHIP_RV410 ||
3391  		    rdev->family == CHIP_R420 ||
3392  		    rdev->family == CHIP_R423)
3393  			trbs_ff = memtrbs_r4xx[data];
3394  		else
3395  			trbs_ff = memtrbs[data];
3396  		tcas_ff.full += trbs_ff.full;
3397  	}
3398  
3399  	sclk_eff_ff.full = sclk_ff.full;
3400  
3401  	if (rdev->flags & RADEON_IS_AGP) {
3402  		fixed20_12 agpmode_ff;
3403  		agpmode_ff.full = dfixed_const(radeon_agpmode);
3404  		temp_ff.full = dfixed_const_666(16);
3405  		sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3406  	}
3407  	/* TODO PCIE lanes may affect this - agpmode == 16?? */
3408  
3409  	if (ASIC_IS_R300(rdev)) {
3410  		sclk_delay_ff.full = dfixed_const(250);
3411  	} else {
3412  		if ((rdev->family == CHIP_RV100) ||
3413  		    rdev->flags & RADEON_IS_IGP) {
3414  			if (rdev->mc.vram_is_ddr)
3415  				sclk_delay_ff.full = dfixed_const(41);
3416  			else
3417  				sclk_delay_ff.full = dfixed_const(33);
3418  		} else {
3419  			if (rdev->mc.vram_width == 128)
3420  				sclk_delay_ff.full = dfixed_const(57);
3421  			else
3422  				sclk_delay_ff.full = dfixed_const(41);
3423  		}
3424  	}
3425  
3426  	mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3427  
3428  	if (rdev->mc.vram_is_ddr) {
3429  		if (rdev->mc.vram_width == 32) {
3430  			k1.full = dfixed_const(40);
3431  			c  = 3;
3432  		} else {
3433  			k1.full = dfixed_const(20);
3434  			c  = 1;
3435  		}
3436  	} else {
3437  		k1.full = dfixed_const(40);
3438  		c  = 3;
3439  	}
3440  
3441  	temp_ff.full = dfixed_const(2);
3442  	mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3443  	temp_ff.full = dfixed_const(c);
3444  	mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3445  	temp_ff.full = dfixed_const(4);
3446  	mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3447  	mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3448  	mc_latency_mclk.full += k1.full;
3449  
3450  	mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3451  	mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3452  
3453  	/*
3454  	  HW cursor time assuming worst case of full size colour cursor.
3455  	*/
3456  	temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3457  	temp_ff.full += trcd_ff.full;
3458  	if (temp_ff.full < tras_ff.full)
3459  		temp_ff.full = tras_ff.full;
3460  	cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3461  
3462  	temp_ff.full = dfixed_const(cur_size);
3463  	cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3464  	/*
3465  	  Find the total latency for the display data.
3466  	*/
3467  	disp_latency_overhead.full = dfixed_const(8);
3468  	disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3469  	mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3470  	mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3471  
3472  	if (mc_latency_mclk.full > mc_latency_sclk.full)
3473  		disp_latency.full = mc_latency_mclk.full;
3474  	else
3475  		disp_latency.full = mc_latency_sclk.full;
3476  
3477  	/* setup Max GRPH_STOP_REQ default value */
3478  	if (ASIC_IS_RV100(rdev))
3479  		max_stop_req = 0x5c;
3480  	else
3481  		max_stop_req = 0x7c;
3482  
3483  	if (mode1) {
3484  		/*  CRTC1
3485  		    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3486  		    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3487  		*/
3488  		stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3489  
3490  		if (stop_req > max_stop_req)
3491  			stop_req = max_stop_req;
3492  
3493  		/*
3494  		  Find the drain rate of the display buffer.
3495  		*/
3496  		temp_ff.full = dfixed_const((16/pixel_bytes1));
3497  		disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3498  
3499  		/*
3500  		  Find the critical point of the display buffer.
3501  		*/
3502  		crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3503  		crit_point_ff.full += dfixed_const_half(0);
3504  
3505  		critical_point = dfixed_trunc(crit_point_ff);
3506  
3507  		if (rdev->disp_priority == 2) {
3508  			critical_point = 0;
3509  		}
3510  
3511  		/*
3512  		  The critical point should never be above max_stop_req-4.  Setting
3513  		  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3514  		*/
3515  		if (max_stop_req - critical_point < 4)
3516  			critical_point = 0;
3517  
3518  		if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3519  			/* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3520  			critical_point = 0x10;
3521  		}
3522  
3523  		temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3524  		temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3525  		temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3526  		temp &= ~(RADEON_GRPH_START_REQ_MASK);
3527  		if ((rdev->family == CHIP_R350) &&
3528  		    (stop_req > 0x15)) {
3529  			stop_req -= 0x10;
3530  		}
3531  		temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3532  		temp |= RADEON_GRPH_BUFFER_SIZE;
3533  		temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3534  			  RADEON_GRPH_CRITICAL_AT_SOF |
3535  			  RADEON_GRPH_STOP_CNTL);
3536  		/*
3537  		  Write the result into the register.
3538  		*/
3539  		WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3540  						       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3541  
3542  #if 0
3543  		if ((rdev->family == CHIP_RS400) ||
3544  		    (rdev->family == CHIP_RS480)) {
3545  			/* attempt to program RS400 disp regs correctly ??? */
3546  			temp = RREG32(RS400_DISP1_REG_CNTL);
3547  			temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3548  				  RS400_DISP1_STOP_REQ_LEVEL_MASK);
3549  			WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3550  						       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3551  						       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3552  			temp = RREG32(RS400_DMIF_MEM_CNTL1);
3553  			temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3554  				  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3555  			WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3556  						      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3557  						      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3558  		}
3559  #endif
3560  
3561  		DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3562  			  /* 	  (unsigned int)info->SavedReg->grph_buffer_cntl, */
3563  			  (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3564  	}
3565  
3566  	if (mode2) {
3567  		u32 grph2_cntl;
3568  		stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3569  
3570  		if (stop_req > max_stop_req)
3571  			stop_req = max_stop_req;
3572  
3573  		/*
3574  		  Find the drain rate of the display buffer.
3575  		*/
3576  		temp_ff.full = dfixed_const((16/pixel_bytes2));
3577  		disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3578  
3579  		grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3580  		grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3581  		grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3582  		grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3583  		if ((rdev->family == CHIP_R350) &&
3584  		    (stop_req > 0x15)) {
3585  			stop_req -= 0x10;
3586  		}
3587  		grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3588  		grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3589  		grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3590  			  RADEON_GRPH_CRITICAL_AT_SOF |
3591  			  RADEON_GRPH_STOP_CNTL);
3592  
3593  		if ((rdev->family == CHIP_RS100) ||
3594  		    (rdev->family == CHIP_RS200))
3595  			critical_point2 = 0;
3596  		else {
3597  			temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3598  			temp_ff.full = dfixed_const(temp);
3599  			temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3600  			if (sclk_ff.full < temp_ff.full)
3601  				temp_ff.full = sclk_ff.full;
3602  
3603  			read_return_rate.full = temp_ff.full;
3604  
3605  			if (mode1) {
3606  				temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3607  				time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3608  			} else {
3609  				time_disp1_drop_priority.full = 0;
3610  			}
3611  			crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3612  			crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3613  			crit_point_ff.full += dfixed_const_half(0);
3614  
3615  			critical_point2 = dfixed_trunc(crit_point_ff);
3616  
3617  			if (rdev->disp_priority == 2) {
3618  				critical_point2 = 0;
3619  			}
3620  
3621  			if (max_stop_req - critical_point2 < 4)
3622  				critical_point2 = 0;
3623  
3624  		}
3625  
3626  		if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3627  			/* some R300 cards have problem with this set to 0 */
3628  			critical_point2 = 0x10;
3629  		}
3630  
3631  		WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3632  						  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3633  
3634  		if ((rdev->family == CHIP_RS400) ||
3635  		    (rdev->family == CHIP_RS480)) {
3636  #if 0
3637  			/* attempt to program RS400 disp2 regs correctly ??? */
3638  			temp = RREG32(RS400_DISP2_REQ_CNTL1);
3639  			temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3640  				  RS400_DISP2_STOP_REQ_LEVEL_MASK);
3641  			WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3642  						       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3643  						       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3644  			temp = RREG32(RS400_DISP2_REQ_CNTL2);
3645  			temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3646  				  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3647  			WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3648  						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3649  						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3650  #endif
3651  			WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3652  			WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3653  			WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3654  			WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3655  		}
3656  
3657  		DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3658  			  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3659  	}
3660  
3661  	/* Save number of lines the linebuffer leads before the scanout */
3662  	if (mode1)
3663  	    rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
3664  
3665  	if (mode2)
3666  	    rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
3667  }
3668  
r100_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3669  int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3670  {
3671  	uint32_t scratch;
3672  	uint32_t tmp = 0;
3673  	unsigned i;
3674  	int r;
3675  
3676  	r = radeon_scratch_get(rdev, &scratch);
3677  	if (r) {
3678  		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3679  		return r;
3680  	}
3681  	WREG32(scratch, 0xCAFEDEAD);
3682  	r = radeon_ring_lock(rdev, ring, 2);
3683  	if (r) {
3684  		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3685  		radeon_scratch_free(rdev, scratch);
3686  		return r;
3687  	}
3688  	radeon_ring_write(ring, PACKET0(scratch, 0));
3689  	radeon_ring_write(ring, 0xDEADBEEF);
3690  	radeon_ring_unlock_commit(rdev, ring, false);
3691  	for (i = 0; i < rdev->usec_timeout; i++) {
3692  		tmp = RREG32(scratch);
3693  		if (tmp == 0xDEADBEEF) {
3694  			break;
3695  		}
3696  		udelay(1);
3697  	}
3698  	if (i < rdev->usec_timeout) {
3699  		DRM_INFO("ring test succeeded in %d usecs\n", i);
3700  	} else {
3701  		DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3702  			  scratch, tmp);
3703  		r = -EINVAL;
3704  	}
3705  	radeon_scratch_free(rdev, scratch);
3706  	return r;
3707  }
3708  
r100_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)3709  void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3710  {
3711  	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3712  
3713  	if (ring->rptr_save_reg) {
3714  		u32 next_rptr = ring->wptr + 2 + 3;
3715  		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3716  		radeon_ring_write(ring, next_rptr);
3717  	}
3718  
3719  	radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3720  	radeon_ring_write(ring, ib->gpu_addr);
3721  	radeon_ring_write(ring, ib->length_dw);
3722  }
3723  
r100_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)3724  int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3725  {
3726  	struct radeon_ib ib;
3727  	uint32_t scratch;
3728  	uint32_t tmp = 0;
3729  	unsigned i;
3730  	int r;
3731  
3732  	r = radeon_scratch_get(rdev, &scratch);
3733  	if (r) {
3734  		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3735  		return r;
3736  	}
3737  	WREG32(scratch, 0xCAFEDEAD);
3738  	r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3739  	if (r) {
3740  		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3741  		goto free_scratch;
3742  	}
3743  	ib.ptr[0] = PACKET0(scratch, 0);
3744  	ib.ptr[1] = 0xDEADBEEF;
3745  	ib.ptr[2] = PACKET2(0);
3746  	ib.ptr[3] = PACKET2(0);
3747  	ib.ptr[4] = PACKET2(0);
3748  	ib.ptr[5] = PACKET2(0);
3749  	ib.ptr[6] = PACKET2(0);
3750  	ib.ptr[7] = PACKET2(0);
3751  	ib.length_dw = 8;
3752  	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3753  	if (r) {
3754  		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3755  		goto free_ib;
3756  	}
3757  	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3758  		RADEON_USEC_IB_TEST_TIMEOUT));
3759  	if (r < 0) {
3760  		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3761  		goto free_ib;
3762  	} else if (r == 0) {
3763  		DRM_ERROR("radeon: fence wait timed out.\n");
3764  		r = -ETIMEDOUT;
3765  		goto free_ib;
3766  	}
3767  	r = 0;
3768  	for (i = 0; i < rdev->usec_timeout; i++) {
3769  		tmp = RREG32(scratch);
3770  		if (tmp == 0xDEADBEEF) {
3771  			break;
3772  		}
3773  		udelay(1);
3774  	}
3775  	if (i < rdev->usec_timeout) {
3776  		DRM_INFO("ib test succeeded in %u usecs\n", i);
3777  	} else {
3778  		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3779  			  scratch, tmp);
3780  		r = -EINVAL;
3781  	}
3782  free_ib:
3783  	radeon_ib_free(rdev, &ib);
3784  free_scratch:
3785  	radeon_scratch_free(rdev, scratch);
3786  	return r;
3787  }
3788  
r100_mc_stop(struct radeon_device * rdev,struct r100_mc_save * save)3789  void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3790  {
3791  	/* Shutdown CP we shouldn't need to do that but better be safe than
3792  	 * sorry
3793  	 */
3794  	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3795  	WREG32(R_000740_CP_CSQ_CNTL, 0);
3796  
3797  	/* Save few CRTC registers */
3798  	save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3799  	save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3800  	save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3801  	save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3802  	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3803  		save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3804  		save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3805  	}
3806  
3807  	/* Disable VGA aperture access */
3808  	WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3809  	/* Disable cursor, overlay, crtc */
3810  	WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3811  	WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3812  					S_000054_CRTC_DISPLAY_DIS(1));
3813  	WREG32(R_000050_CRTC_GEN_CNTL,
3814  			(C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3815  			S_000050_CRTC_DISP_REQ_EN_B(1));
3816  	WREG32(R_000420_OV0_SCALE_CNTL,
3817  		C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3818  	WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3819  	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3820  		WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3821  						S_000360_CUR2_LOCK(1));
3822  		WREG32(R_0003F8_CRTC2_GEN_CNTL,
3823  			(C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3824  			S_0003F8_CRTC2_DISPLAY_DIS(1) |
3825  			S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3826  		WREG32(R_000360_CUR2_OFFSET,
3827  			C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3828  	}
3829  }
3830  
r100_mc_resume(struct radeon_device * rdev,struct r100_mc_save * save)3831  void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3832  {
3833  	/* Update base address for crtc */
3834  	WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3835  	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3836  		WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3837  	}
3838  	/* Restore CRTC registers */
3839  	WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3840  	WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3841  	WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3842  	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3843  		WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3844  	}
3845  }
3846  
r100_vga_render_disable(struct radeon_device * rdev)3847  void r100_vga_render_disable(struct radeon_device *rdev)
3848  {
3849  	u32 tmp;
3850  
3851  	tmp = RREG8(R_0003C2_GENMO_WT);
3852  	WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3853  }
3854  
r100_mc_program(struct radeon_device * rdev)3855  static void r100_mc_program(struct radeon_device *rdev)
3856  {
3857  	struct r100_mc_save save;
3858  
3859  	/* Stops all mc clients */
3860  	r100_mc_stop(rdev, &save);
3861  	if (rdev->flags & RADEON_IS_AGP) {
3862  		WREG32(R_00014C_MC_AGP_LOCATION,
3863  			S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3864  			S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3865  		WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3866  		if (rdev->family > CHIP_RV200)
3867  			WREG32(R_00015C_AGP_BASE_2,
3868  				upper_32_bits(rdev->mc.agp_base) & 0xff);
3869  	} else {
3870  		WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3871  		WREG32(R_000170_AGP_BASE, 0);
3872  		if (rdev->family > CHIP_RV200)
3873  			WREG32(R_00015C_AGP_BASE_2, 0);
3874  	}
3875  	/* Wait for mc idle */
3876  	if (r100_mc_wait_for_idle(rdev))
3877  		dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3878  	/* Program MC, should be a 32bits limited address space */
3879  	WREG32(R_000148_MC_FB_LOCATION,
3880  		S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3881  		S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3882  	r100_mc_resume(rdev, &save);
3883  }
3884  
r100_clock_startup(struct radeon_device * rdev)3885  static void r100_clock_startup(struct radeon_device *rdev)
3886  {
3887  	u32 tmp;
3888  
3889  	if (radeon_dynclks != -1 && radeon_dynclks)
3890  		radeon_legacy_set_clock_gating(rdev, 1);
3891  	/* We need to force on some of the block */
3892  	tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3893  	tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3894  	if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3895  		tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3896  	WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3897  }
3898  
r100_startup(struct radeon_device * rdev)3899  static int r100_startup(struct radeon_device *rdev)
3900  {
3901  	int r;
3902  
3903  	/* set common regs */
3904  	r100_set_common_regs(rdev);
3905  	/* program mc */
3906  	r100_mc_program(rdev);
3907  	/* Resume clock */
3908  	r100_clock_startup(rdev);
3909  	/* Initialize GART (initialize after TTM so we can allocate
3910  	 * memory through TTM but finalize after TTM) */
3911  	r100_enable_bm(rdev);
3912  	if (rdev->flags & RADEON_IS_PCI) {
3913  		r = r100_pci_gart_enable(rdev);
3914  		if (r)
3915  			return r;
3916  	}
3917  
3918  	/* allocate wb buffer */
3919  	r = radeon_wb_init(rdev);
3920  	if (r)
3921  		return r;
3922  
3923  	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3924  	if (r) {
3925  		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3926  		return r;
3927  	}
3928  
3929  	/* Enable IRQ */
3930  	if (!rdev->irq.installed) {
3931  		r = radeon_irq_kms_init(rdev);
3932  		if (r)
3933  			return r;
3934  	}
3935  
3936  	r100_irq_set(rdev);
3937  	rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3938  	/* 1M ring buffer */
3939  	r = r100_cp_init(rdev, 1024 * 1024);
3940  	if (r) {
3941  		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3942  		return r;
3943  	}
3944  
3945  	r = radeon_ib_pool_init(rdev);
3946  	if (r) {
3947  		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3948  		return r;
3949  	}
3950  
3951  	return 0;
3952  }
3953  
r100_resume(struct radeon_device * rdev)3954  int r100_resume(struct radeon_device *rdev)
3955  {
3956  	int r;
3957  
3958  	/* Make sur GART are not working */
3959  	if (rdev->flags & RADEON_IS_PCI)
3960  		r100_pci_gart_disable(rdev);
3961  	/* Resume clock before doing reset */
3962  	r100_clock_startup(rdev);
3963  	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
3964  	if (radeon_asic_reset(rdev)) {
3965  		dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3966  			RREG32(R_000E40_RBBM_STATUS),
3967  			RREG32(R_0007C0_CP_STAT));
3968  	}
3969  	/* post */
3970  	radeon_combios_asic_init(rdev_to_drm(rdev));
3971  	/* Resume clock after posting */
3972  	r100_clock_startup(rdev);
3973  	/* Initialize surface registers */
3974  	radeon_surface_init(rdev);
3975  
3976  	rdev->accel_working = true;
3977  	r = r100_startup(rdev);
3978  	if (r) {
3979  		rdev->accel_working = false;
3980  	}
3981  	return r;
3982  }
3983  
r100_suspend(struct radeon_device * rdev)3984  int r100_suspend(struct radeon_device *rdev)
3985  {
3986  	radeon_pm_suspend(rdev);
3987  	r100_cp_disable(rdev);
3988  	radeon_wb_disable(rdev);
3989  	r100_irq_disable(rdev);
3990  	if (rdev->flags & RADEON_IS_PCI)
3991  		r100_pci_gart_disable(rdev);
3992  	return 0;
3993  }
3994  
r100_fini(struct radeon_device * rdev)3995  void r100_fini(struct radeon_device *rdev)
3996  {
3997  	radeon_pm_fini(rdev);
3998  	r100_cp_fini(rdev);
3999  	radeon_wb_fini(rdev);
4000  	radeon_ib_pool_fini(rdev);
4001  	radeon_gem_fini(rdev);
4002  	if (rdev->flags & RADEON_IS_PCI)
4003  		r100_pci_gart_fini(rdev);
4004  	radeon_agp_fini(rdev);
4005  	radeon_irq_kms_fini(rdev);
4006  	radeon_fence_driver_fini(rdev);
4007  	radeon_bo_fini(rdev);
4008  	radeon_atombios_fini(rdev);
4009  	kfree(rdev->bios);
4010  	rdev->bios = NULL;
4011  }
4012  
4013  /*
4014   * Due to how kexec works, it can leave the hw fully initialised when it
4015   * boots the new kernel. However doing our init sequence with the CP and
4016   * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
4017   * do some quick sanity checks and restore sane values to avoid this
4018   * problem.
4019   */
r100_restore_sanity(struct radeon_device * rdev)4020  void r100_restore_sanity(struct radeon_device *rdev)
4021  {
4022  	u32 tmp;
4023  
4024  	tmp = RREG32(RADEON_CP_CSQ_CNTL);
4025  	if (tmp) {
4026  		WREG32(RADEON_CP_CSQ_CNTL, 0);
4027  	}
4028  	tmp = RREG32(RADEON_CP_RB_CNTL);
4029  	if (tmp) {
4030  		WREG32(RADEON_CP_RB_CNTL, 0);
4031  	}
4032  	tmp = RREG32(RADEON_SCRATCH_UMSK);
4033  	if (tmp) {
4034  		WREG32(RADEON_SCRATCH_UMSK, 0);
4035  	}
4036  }
4037  
r100_init(struct radeon_device * rdev)4038  int r100_init(struct radeon_device *rdev)
4039  {
4040  	int r;
4041  
4042  	/* Register debugfs file specific to this group of asics */
4043  	r100_debugfs_mc_info_init(rdev);
4044  	/* Disable VGA */
4045  	r100_vga_render_disable(rdev);
4046  	/* Initialize scratch registers */
4047  	radeon_scratch_init(rdev);
4048  	/* Initialize surface registers */
4049  	radeon_surface_init(rdev);
4050  	/* sanity check some register to avoid hangs like after kexec */
4051  	r100_restore_sanity(rdev);
4052  	/* TODO: disable VGA need to use VGA request */
4053  	/* BIOS*/
4054  	if (!radeon_get_bios(rdev)) {
4055  		if (ASIC_IS_AVIVO(rdev))
4056  			return -EINVAL;
4057  	}
4058  	if (rdev->is_atom_bios) {
4059  		dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4060  		return -EINVAL;
4061  	} else {
4062  		r = radeon_combios_init(rdev);
4063  		if (r)
4064  			return r;
4065  	}
4066  	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
4067  	if (radeon_asic_reset(rdev)) {
4068  		dev_warn(rdev->dev,
4069  			"GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4070  			RREG32(R_000E40_RBBM_STATUS),
4071  			RREG32(R_0007C0_CP_STAT));
4072  	}
4073  	/* check if cards are posted or not */
4074  	if (radeon_boot_test_post_card(rdev) == false)
4075  		return -EINVAL;
4076  	/* Set asic errata */
4077  	r100_errata(rdev);
4078  	/* Initialize clocks */
4079  	radeon_get_clock_info(rdev_to_drm(rdev));
4080  	/* initialize AGP */
4081  	if (rdev->flags & RADEON_IS_AGP) {
4082  		r = radeon_agp_init(rdev);
4083  		if (r) {
4084  			radeon_agp_disable(rdev);
4085  		}
4086  	}
4087  	/* initialize VRAM */
4088  	r100_mc_init(rdev);
4089  	/* Fence driver */
4090  	radeon_fence_driver_init(rdev);
4091  	/* Memory manager */
4092  	r = radeon_bo_init(rdev);
4093  	if (r)
4094  		return r;
4095  	if (rdev->flags & RADEON_IS_PCI) {
4096  		r = r100_pci_gart_init(rdev);
4097  		if (r)
4098  			return r;
4099  	}
4100  	r100_set_safe_registers(rdev);
4101  
4102  	/* Initialize power management */
4103  	radeon_pm_init(rdev);
4104  
4105  	rdev->accel_working = true;
4106  	r = r100_startup(rdev);
4107  	if (r) {
4108  		/* Somethings want wront with the accel init stop accel */
4109  		dev_err(rdev->dev, "Disabling GPU acceleration\n");
4110  		r100_cp_fini(rdev);
4111  		radeon_wb_fini(rdev);
4112  		radeon_ib_pool_fini(rdev);
4113  		radeon_irq_kms_fini(rdev);
4114  		if (rdev->flags & RADEON_IS_PCI)
4115  			r100_pci_gart_fini(rdev);
4116  		rdev->accel_working = false;
4117  	}
4118  	return 0;
4119  }
4120  
r100_mm_rreg_slow(struct radeon_device * rdev,uint32_t reg)4121  uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
4122  {
4123  	unsigned long flags;
4124  	uint32_t ret;
4125  
4126  	spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4127  	writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4128  	ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4129  	spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4130  	return ret;
4131  }
4132  
r100_mm_wreg_slow(struct radeon_device * rdev,uint32_t reg,uint32_t v)4133  void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
4134  {
4135  	unsigned long flags;
4136  
4137  	spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4138  	writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4139  	writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4140  	spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4141  }
4142  
r100_io_rreg(struct radeon_device * rdev,u32 reg)4143  u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4144  {
4145  	if (reg < rdev->rio_mem_size)
4146  		return ioread32(rdev->rio_mem + reg);
4147  	else {
4148  		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4149  		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4150  	}
4151  }
4152  
r100_io_wreg(struct radeon_device * rdev,u32 reg,u32 v)4153  void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4154  {
4155  	if (reg < rdev->rio_mem_size)
4156  		iowrite32(v, rdev->rio_mem + reg);
4157  	else {
4158  		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4159  		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
4160  	}
4161  }
4162