1  /* SPDX-License-Identifier: GPL-2.0-only */
2  /*
3   * Copyright (C) 2013 Red Hat
4   * Author: Rob Clark <robdclark@gmail.com>
5   */
6  
7  #ifndef __MSM_GPU_H__
8  #define __MSM_GPU_H__
9  
10  #include <linux/adreno-smmu-priv.h>
11  #include <linux/clk.h>
12  #include <linux/devfreq.h>
13  #include <linux/interconnect.h>
14  #include <linux/pm_opp.h>
15  #include <linux/regulator/consumer.h>
16  
17  #include "msm_drv.h"
18  #include "msm_fence.h"
19  #include "msm_ringbuffer.h"
20  #include "msm_gem.h"
21  
22  struct msm_gem_submit;
23  struct msm_gpu_perfcntr;
24  struct msm_gpu_state;
25  struct msm_file_private;
26  
27  struct msm_gpu_config {
28  	const char *ioname;
29  	unsigned int nr_rings;
30  };
31  
32  /* So far, with hardware that I've seen to date, we can have:
33   *  + zero, one, or two z180 2d cores
34   *  + a3xx or a2xx 3d core, which share a common CP (the firmware
35   *    for the CP seems to implement some different PM4 packet types
36   *    but the basics of cmdstream submission are the same)
37   *
38   * Which means that the eventual complete "class" hierarchy, once
39   * support for all past and present hw is in place, becomes:
40   *  + msm_gpu
41   *    + adreno_gpu
42   *      + a3xx_gpu
43   *      + a2xx_gpu
44   *    + z180_gpu
45   */
46  struct msm_gpu_funcs {
47  	int (*get_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
48  			 uint32_t param, uint64_t *value, uint32_t *len);
49  	int (*set_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
50  			 uint32_t param, uint64_t value, uint32_t len);
51  	int (*hw_init)(struct msm_gpu *gpu);
52  
53  	/**
54  	 * @ucode_load: Optional hook to upload fw to GEM objs
55  	 */
56  	int (*ucode_load)(struct msm_gpu *gpu);
57  
58  	int (*pm_suspend)(struct msm_gpu *gpu);
59  	int (*pm_resume)(struct msm_gpu *gpu);
60  	void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit);
61  	void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
62  	irqreturn_t (*irq)(struct msm_gpu *irq);
63  	struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu);
64  	void (*recover)(struct msm_gpu *gpu);
65  	void (*destroy)(struct msm_gpu *gpu);
66  #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
67  	/* show GPU status in debugfs: */
68  	void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state,
69  			struct drm_printer *p);
70  	/* for generation specific debugfs: */
71  	void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor);
72  #endif
73  	/* note: gpu_busy() can assume that we have been pm_resumed */
74  	u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate);
75  	struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu);
76  	int (*gpu_state_put)(struct msm_gpu_state *state);
77  	unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
78  	/* note: gpu_set_freq() can assume that we have been pm_resumed */
79  	void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp,
80  			     bool suspended);
81  	struct msm_gem_address_space *(*create_address_space)
82  		(struct msm_gpu *gpu, struct platform_device *pdev);
83  	struct msm_gem_address_space *(*create_private_address_space)
84  		(struct msm_gpu *gpu);
85  	uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
86  
87  	/**
88  	 * progress: Has the GPU made progress?
89  	 *
90  	 * Return true if GPU position in cmdstream has advanced (or changed)
91  	 * since the last call.  To avoid false negatives, this should account
92  	 * for cmdstream that is buffered in this FIFO upstream of the CP fw.
93  	 */
94  	bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
95  };
96  
97  /* Additional state for iommu faults: */
98  struct msm_gpu_fault_info {
99  	u64 ttbr0;
100  	unsigned long iova;
101  	int flags;
102  	const char *type;
103  	const char *block;
104  };
105  
106  /**
107   * struct msm_gpu_devfreq - devfreq related state
108   */
109  struct msm_gpu_devfreq {
110  	/** devfreq: devfreq instance */
111  	struct devfreq *devfreq;
112  
113  	/** lock: lock for "suspended", "busy_cycles", and "time" */
114  	struct mutex lock;
115  
116  	/**
117  	 * idle_freq:
118  	 *
119  	 * Shadow frequency used while the GPU is idle.  From the PoV of
120  	 * the devfreq governor, we are continuing to sample busyness and
121  	 * adjust frequency while the GPU is idle, but we use this shadow
122  	 * value as the GPU is actually clamped to minimum frequency while
123  	 * it is inactive.
124  	 */
125  	unsigned long idle_freq;
126  
127  	/**
128  	 * boost_constraint:
129  	 *
130  	 * A PM QoS constraint to boost min freq for a period of time
131  	 * until the boost expires.
132  	 */
133  	struct dev_pm_qos_request boost_freq;
134  
135  	/**
136  	 * busy_cycles: Last busy counter value, for calculating elapsed busy
137  	 * cycles since last sampling period.
138  	 */
139  	u64 busy_cycles;
140  
141  	/** time: Time of last sampling period. */
142  	ktime_t time;
143  
144  	/** idle_time: Time of last transition to idle: */
145  	ktime_t idle_time;
146  
147  	/**
148  	 * idle_work:
149  	 *
150  	 * Used to delay clamping to idle freq on active->idle transition.
151  	 */
152  	struct msm_hrtimer_work idle_work;
153  
154  	/**
155  	 * boost_work:
156  	 *
157  	 * Used to reset the boost_constraint after the boost period has
158  	 * elapsed
159  	 */
160  	struct msm_hrtimer_work boost_work;
161  
162  	/** suspended: tracks if we're suspended */
163  	bool suspended;
164  };
165  
166  struct msm_gpu {
167  	const char *name;
168  	struct drm_device *dev;
169  	struct platform_device *pdev;
170  	const struct msm_gpu_funcs *funcs;
171  
172  	struct adreno_smmu_priv adreno_smmu;
173  
174  	/* performance counters (hw & sw): */
175  	spinlock_t perf_lock;
176  	bool perfcntr_active;
177  	struct {
178  		bool active;
179  		ktime_t time;
180  	} last_sample;
181  	uint32_t totaltime, activetime;    /* sw counters */
182  	uint32_t last_cntrs[5];            /* hw counters */
183  	const struct msm_gpu_perfcntr *perfcntrs;
184  	uint32_t num_perfcntrs;
185  
186  	struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
187  	int nr_rings;
188  
189  	/**
190  	 * sysprof_active:
191  	 *
192  	 * The count of contexts that have enabled system profiling.
193  	 */
194  	refcount_t sysprof_active;
195  
196  	/**
197  	 * cur_ctx_seqno:
198  	 *
199  	 * The ctx->seqno value of the last context to submit rendering,
200  	 * and the one with current pgtables installed (for generations
201  	 * that support per-context pgtables).  Tracked by seqno rather
202  	 * than pointer value to avoid dangling pointers, and cases where
203  	 * a ctx can be freed and a new one created with the same address.
204  	 */
205  	int cur_ctx_seqno;
206  
207  	/**
208  	 * lock:
209  	 *
210  	 * General lock for serializing all the gpu things.
211  	 *
212  	 * TODO move to per-ring locking where feasible (ie. submit/retire
213  	 * path, etc)
214  	 */
215  	struct mutex lock;
216  
217  	/**
218  	 * active_submits:
219  	 *
220  	 * The number of submitted but not yet retired submits, used to
221  	 * determine transitions between active and idle.
222  	 *
223  	 * Protected by active_lock
224  	 */
225  	int active_submits;
226  
227  	/** lock: protects active_submits and idle/active transitions */
228  	struct mutex active_lock;
229  
230  	/* does gpu need hw_init? */
231  	bool needs_hw_init;
232  
233  	/**
234  	 * global_faults: number of GPU hangs not attributed to a particular
235  	 * address space
236  	 */
237  	int global_faults;
238  
239  	void __iomem *mmio;
240  	int irq;
241  
242  	struct msm_gem_address_space *aspace;
243  
244  	/* Power Control: */
245  	struct regulator *gpu_reg, *gpu_cx;
246  	struct clk_bulk_data *grp_clks;
247  	int nr_clocks;
248  	struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
249  	uint32_t fast_rate;
250  
251  	/* Hang and Inactivity Detection:
252  	 */
253  #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
254  
255  #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
256  #define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
257  	struct timer_list hangcheck_timer;
258  
259  	/* Fault info for most recent iova fault: */
260  	struct msm_gpu_fault_info fault_info;
261  
262  	/* work for handling GPU ioval faults: */
263  	struct kthread_work fault_work;
264  
265  	/* work for handling GPU recovery: */
266  	struct kthread_work recover_work;
267  
268  	/** retire_event: notified when submits are retired: */
269  	wait_queue_head_t retire_event;
270  
271  	/* work for handling active-list retiring: */
272  	struct kthread_work retire_work;
273  
274  	/* worker for retire/recover: */
275  	struct kthread_worker *worker;
276  
277  	struct drm_gem_object *memptrs_bo;
278  
279  	struct msm_gpu_devfreq devfreq;
280  
281  	uint32_t suspend_count;
282  
283  	struct msm_gpu_state *crashstate;
284  
285  	/* True if the hardware supports expanded apriv (a650 and newer) */
286  	bool hw_apriv;
287  
288  	/**
289  	 * @allow_relocs: allow relocs in SUBMIT ioctl
290  	 *
291  	 * Mesa won't use relocs for driver version 1.4.0 and later.  This
292  	 * switch-over happened early enough in mesa a6xx bringup that we
293  	 * can disallow relocs for a6xx and newer.
294  	 */
295  	bool allow_relocs;
296  
297  	struct thermal_cooling_device *cooling;
298  };
299  
dev_to_gpu(struct device * dev)300  static inline struct msm_gpu *dev_to_gpu(struct device *dev)
301  {
302  	struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev);
303  
304  	if (!adreno_smmu)
305  		return NULL;
306  
307  	return container_of(adreno_smmu, struct msm_gpu, adreno_smmu);
308  }
309  
310  /* It turns out that all targets use the same ringbuffer size */
311  #define MSM_GPU_RINGBUFFER_SZ SZ_32K
312  #define MSM_GPU_RINGBUFFER_BLKSIZE 32
313  
314  #define MSM_GPU_RB_CNTL_DEFAULT \
315  		(AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \
316  		AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8)))
317  
msm_gpu_active(struct msm_gpu * gpu)318  static inline bool msm_gpu_active(struct msm_gpu *gpu)
319  {
320  	int i;
321  
322  	for (i = 0; i < gpu->nr_rings; i++) {
323  		struct msm_ringbuffer *ring = gpu->rb[i];
324  
325  		if (fence_after(ring->fctx->last_fence, ring->memptrs->fence))
326  			return true;
327  	}
328  
329  	return false;
330  }
331  
332  /* Perf-Counters:
333   * The select_reg and select_val are just there for the benefit of the child
334   * class that actually enables the perf counter..  but msm_gpu base class
335   * will handle sampling/displaying the counters.
336   */
337  
338  struct msm_gpu_perfcntr {
339  	uint32_t select_reg;
340  	uint32_t sample_reg;
341  	uint32_t select_val;
342  	const char *name;
343  };
344  
345  /*
346   * The number of priority levels provided by drm gpu scheduler.  The
347   * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
348   * cases, so we don't use it (no need for kernel generated jobs).
349   */
350  #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_LOW - DRM_SCHED_PRIORITY_HIGH)
351  
352  /**
353   * struct msm_file_private - per-drm_file context
354   *
355   * @queuelock:    synchronizes access to submitqueues list
356   * @submitqueues: list of &msm_gpu_submitqueue created by userspace
357   * @queueid:      counter incremented each time a submitqueue is created,
358   *                used to assign &msm_gpu_submitqueue.id
359   * @aspace:       the per-process GPU address-space
360   * @ref:          reference count
361   * @seqno:        unique per process seqno
362   */
363  struct msm_file_private {
364  	rwlock_t queuelock;
365  	struct list_head submitqueues;
366  	int queueid;
367  	struct msm_gem_address_space *aspace;
368  	struct kref ref;
369  	int seqno;
370  
371  	/**
372  	 * sysprof:
373  	 *
374  	 * The value of MSM_PARAM_SYSPROF set by userspace.  This is
375  	 * intended to be used by system profiling tools like Mesa's
376  	 * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN.
377  	 *
378  	 * Setting a value of 1 will preserve performance counters across
379  	 * context switches.  Setting a value of 2 will in addition
380  	 * suppress suspend.  (Performance counters lose state across
381  	 * power collapse, which is undesirable for profiling in some
382  	 * cases.)
383  	 *
384  	 * The value automatically reverts to zero when the drm device
385  	 * file is closed.
386  	 */
387  	int sysprof;
388  
389  	/**
390  	 * comm: Overridden task comm, see MSM_PARAM_COMM
391  	 *
392  	 * Accessed under msm_gpu::lock
393  	 */
394  	char *comm;
395  
396  	/**
397  	 * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE
398  	 *
399  	 * Accessed under msm_gpu::lock
400  	 */
401  	char *cmdline;
402  
403  	/**
404  	 * elapsed:
405  	 *
406  	 * The total (cumulative) elapsed time GPU was busy with rendering
407  	 * from this context in ns.
408  	 */
409  	uint64_t elapsed_ns;
410  
411  	/**
412  	 * cycles:
413  	 *
414  	 * The total (cumulative) GPU cycles elapsed attributed to this
415  	 * context.
416  	 */
417  	uint64_t cycles;
418  
419  	/**
420  	 * entities:
421  	 *
422  	 * Table of per-priority-level sched entities used by submitqueues
423  	 * associated with this &drm_file.  Because some userspace apps
424  	 * make assumptions about rendering from multiple gl contexts
425  	 * (of the same priority) within the process happening in FIFO
426  	 * order without requiring any fencing beyond MakeCurrent(), we
427  	 * create at most one &drm_sched_entity per-process per-priority-
428  	 * level.
429  	 */
430  	struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS];
431  
432  	/**
433  	 * ctx_mem:
434  	 *
435  	 * Total amount of memory of GEM buffers with handles attached for
436  	 * this context.
437  	 */
438  	atomic64_t ctx_mem;
439  };
440  
441  /**
442   * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority
443   *
444   * @gpu:        the gpu instance
445   * @prio:       the userspace priority level
446   * @ring_nr:    [out] the ringbuffer the userspace priority maps to
447   * @sched_prio: [out] the gpu scheduler priority level which the userspace
448   *              priority maps to
449   *
450   * With drm/scheduler providing it's own level of prioritization, our total
451   * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES).
452   * Each ring is associated with it's own scheduler instance.  However, our
453   * UABI is that lower numerical values are higher priority.  So mapping the
454   * single userspace priority level into ring_nr and sched_prio takes some
455   * care.  The userspace provided priority (when a submitqueue is created)
456   * is mapped to ring nr and scheduler priority as such:
457   *
458   *   ring_nr    = userspace_prio / NR_SCHED_PRIORITIES
459   *   sched_prio = NR_SCHED_PRIORITIES -
460   *                (userspace_prio % NR_SCHED_PRIORITIES) - 1
461   *
462   * This allows generations without preemption (nr_rings==1) to have some
463   * amount of prioritization, and provides more priority levels for gens
464   * that do have preemption.
465   */
msm_gpu_convert_priority(struct msm_gpu * gpu,int prio,unsigned * ring_nr,enum drm_sched_priority * sched_prio)466  static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
467  		unsigned *ring_nr, enum drm_sched_priority *sched_prio)
468  {
469  	unsigned rn, sp;
470  
471  	rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp);
472  
473  	/* invert sched priority to map to higher-numeric-is-higher-
474  	 * priority convention
475  	 */
476  	sp = NR_SCHED_PRIORITIES - sp - 1;
477  
478  	if (rn >= gpu->nr_rings)
479  		return -EINVAL;
480  
481  	*ring_nr = rn;
482  	*sched_prio = sp;
483  
484  	return 0;
485  }
486  
487  /**
488   * struct msm_gpu_submitqueues - Userspace created context.
489   *
490   * A submitqueue is associated with a gl context or vk queue (or equiv)
491   * in userspace.
492   *
493   * @id:        userspace id for the submitqueue, unique within the drm_file
494   * @flags:     userspace flags for the submitqueue, specified at creation
495   *             (currently unusued)
496   * @ring_nr:   the ringbuffer used by this submitqueue, which is determined
497   *             by the submitqueue's priority
498   * @faults:    the number of GPU hangs associated with this submitqueue
499   * @last_fence: the sequence number of the last allocated fence (for error
500   *             checking)
501   * @ctx:       the per-drm_file context associated with the submitqueue (ie.
502   *             which set of pgtables do submits jobs associated with the
503   *             submitqueue use)
504   * @node:      node in the context's list of submitqueues
505   * @fence_idr: maps fence-id to dma_fence for userspace visible fence
506   *             seqno, protected by submitqueue lock
507   * @idr_lock:  for serializing access to fence_idr
508   * @lock:      submitqueue lock for serializing submits on a queue
509   * @ref:       reference count
510   * @entity:    the submit job-queue
511   */
512  struct msm_gpu_submitqueue {
513  	int id;
514  	u32 flags;
515  	u32 ring_nr;
516  	int faults;
517  	uint32_t last_fence;
518  	struct msm_file_private *ctx;
519  	struct list_head node;
520  	struct idr fence_idr;
521  	struct spinlock idr_lock;
522  	struct mutex lock;
523  	struct kref ref;
524  	struct drm_sched_entity *entity;
525  };
526  
527  struct msm_gpu_state_bo {
528  	u64 iova;
529  	size_t size;
530  	u32 flags;
531  	void *data;
532  	bool encoded;
533  	char name[32];
534  };
535  
536  struct msm_gpu_state {
537  	struct kref ref;
538  	struct timespec64 time;
539  
540  	struct {
541  		u64 iova;
542  		u32 fence;
543  		u32 seqno;
544  		u32 rptr;
545  		u32 wptr;
546  		void *data;
547  		int data_size;
548  		bool encoded;
549  	} ring[MSM_GPU_MAX_RINGS];
550  
551  	int nr_registers;
552  	u32 *registers;
553  
554  	u32 rbbm_status;
555  
556  	char *comm;
557  	char *cmd;
558  
559  	struct msm_gpu_fault_info fault_info;
560  
561  	int nr_bos;
562  	struct msm_gpu_state_bo *bos;
563  };
564  
gpu_write(struct msm_gpu * gpu,u32 reg,u32 data)565  static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
566  {
567  	writel(data, gpu->mmio + (reg << 2));
568  }
569  
gpu_read(struct msm_gpu * gpu,u32 reg)570  static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
571  {
572  	return readl(gpu->mmio + (reg << 2));
573  }
574  
gpu_rmw(struct msm_gpu * gpu,u32 reg,u32 mask,u32 or)575  static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
576  {
577  	msm_rmw(gpu->mmio + (reg << 2), mask, or);
578  }
579  
gpu_read64(struct msm_gpu * gpu,u32 reg)580  static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
581  {
582  	u64 val;
583  
584  	/*
585  	 * Why not a readq here? Two reasons: 1) many of the LO registers are
586  	 * not quad word aligned and 2) the GPU hardware designers have a bit
587  	 * of a history of putting registers where they fit, especially in
588  	 * spins. The longer a GPU family goes the higher the chance that
589  	 * we'll get burned.  We could do a series of validity checks if we
590  	 * wanted to, but really is a readq() that much better? Nah.
591  	 */
592  
593  	/*
594  	 * For some lo/hi registers (like perfcounters), the hi value is latched
595  	 * when the lo is read, so make sure to read the lo first to trigger
596  	 * that
597  	 */
598  	val = (u64) readl(gpu->mmio + (reg << 2));
599  	val |= ((u64) readl(gpu->mmio + ((reg + 1) << 2)) << 32);
600  
601  	return val;
602  }
603  
gpu_write64(struct msm_gpu * gpu,u32 reg,u64 val)604  static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val)
605  {
606  	/* Why not a writeq here? Read the screed above */
607  	writel(lower_32_bits(val), gpu->mmio + (reg << 2));
608  	writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2));
609  }
610  
611  int msm_gpu_pm_suspend(struct msm_gpu *gpu);
612  int msm_gpu_pm_resume(struct msm_gpu *gpu);
613  
614  void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
615  			 struct drm_printer *p);
616  
617  int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
618  struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
619  		u32 id);
620  int msm_submitqueue_create(struct drm_device *drm,
621  		struct msm_file_private *ctx,
622  		u32 prio, u32 flags, u32 *id);
623  int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
624  		struct drm_msm_submitqueue_query *args);
625  int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
626  void msm_submitqueue_close(struct msm_file_private *ctx);
627  
628  void msm_submitqueue_destroy(struct kref *kref);
629  
630  int msm_file_private_set_sysprof(struct msm_file_private *ctx,
631  				 struct msm_gpu *gpu, int sysprof);
632  void __msm_file_private_destroy(struct kref *kref);
633  
msm_file_private_put(struct msm_file_private * ctx)634  static inline void msm_file_private_put(struct msm_file_private *ctx)
635  {
636  	kref_put(&ctx->ref, __msm_file_private_destroy);
637  }
638  
msm_file_private_get(struct msm_file_private * ctx)639  static inline struct msm_file_private *msm_file_private_get(
640  	struct msm_file_private *ctx)
641  {
642  	kref_get(&ctx->ref);
643  	return ctx;
644  }
645  
646  void msm_devfreq_init(struct msm_gpu *gpu);
647  void msm_devfreq_cleanup(struct msm_gpu *gpu);
648  void msm_devfreq_resume(struct msm_gpu *gpu);
649  void msm_devfreq_suspend(struct msm_gpu *gpu);
650  void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor);
651  void msm_devfreq_active(struct msm_gpu *gpu);
652  void msm_devfreq_idle(struct msm_gpu *gpu);
653  
654  int msm_gpu_hw_init(struct msm_gpu *gpu);
655  
656  void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
657  void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
658  int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
659  		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
660  
661  void msm_gpu_retire(struct msm_gpu *gpu);
662  void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
663  
664  int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
665  		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
666  		const char *name, struct msm_gpu_config *config);
667  
668  struct msm_gem_address_space *
669  msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task);
670  
671  void msm_gpu_cleanup(struct msm_gpu *gpu);
672  
673  struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
674  void __init adreno_register(void);
675  void __exit adreno_unregister(void);
676  
msm_submitqueue_put(struct msm_gpu_submitqueue * queue)677  static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue)
678  {
679  	if (queue)
680  		kref_put(&queue->ref, msm_submitqueue_destroy);
681  }
682  
msm_gpu_crashstate_get(struct msm_gpu * gpu)683  static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu)
684  {
685  	struct msm_gpu_state *state = NULL;
686  
687  	mutex_lock(&gpu->lock);
688  
689  	if (gpu->crashstate) {
690  		kref_get(&gpu->crashstate->ref);
691  		state = gpu->crashstate;
692  	}
693  
694  	mutex_unlock(&gpu->lock);
695  
696  	return state;
697  }
698  
msm_gpu_crashstate_put(struct msm_gpu * gpu)699  static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
700  {
701  	mutex_lock(&gpu->lock);
702  
703  	if (gpu->crashstate) {
704  		if (gpu->funcs->gpu_state_put(gpu->crashstate))
705  			gpu->crashstate = NULL;
706  	}
707  
708  	mutex_unlock(&gpu->lock);
709  }
710  
711  /*
712   * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can
713   * support expanded privileges
714   */
715  #define check_apriv(gpu, flags) \
716  	(((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags))
717  
718  
719  #endif /* __MSM_GPU_H__ */
720