1  // SPDX-License-Identifier: MIT
2  /*
3   * Copyright © 2023-2024 Intel Corporation
4   */
5  
6  #include <linux/anon_inodes.h>
7  #include <linux/delay.h>
8  #include <linux/nospec.h>
9  #include <linux/poll.h>
10  
11  #include <drm/drm_drv.h>
12  #include <drm/drm_managed.h>
13  #include <uapi/drm/xe_drm.h>
14  
15  #include "abi/guc_actions_slpc_abi.h"
16  #include "instructions/xe_mi_commands.h"
17  #include "regs/xe_engine_regs.h"
18  #include "regs/xe_gt_regs.h"
19  #include "regs/xe_lrc_layout.h"
20  #include "regs/xe_oa_regs.h"
21  #include "xe_assert.h"
22  #include "xe_bb.h"
23  #include "xe_bo.h"
24  #include "xe_device.h"
25  #include "xe_exec_queue.h"
26  #include "xe_force_wake.h"
27  #include "xe_gt.h"
28  #include "xe_gt_mcr.h"
29  #include "xe_gt_printk.h"
30  #include "xe_guc_pc.h"
31  #include "xe_lrc.h"
32  #include "xe_macros.h"
33  #include "xe_mmio.h"
34  #include "xe_oa.h"
35  #include "xe_observation.h"
36  #include "xe_pm.h"
37  #include "xe_sched_job.h"
38  #include "xe_sriov.h"
39  
40  #define DEFAULT_POLL_FREQUENCY_HZ 200
41  #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
42  #define XE_OA_UNIT_INVALID U32_MAX
43  
44  struct xe_oa_reg {
45  	struct xe_reg addr;
46  	u32 value;
47  };
48  
49  struct xe_oa_config {
50  	struct xe_oa *oa;
51  
52  	char uuid[UUID_STRING_LEN + 1];
53  	int id;
54  
55  	const struct xe_oa_reg *regs;
56  	u32 regs_len;
57  
58  	struct attribute_group sysfs_metric;
59  	struct attribute *attrs[2];
60  	struct kobj_attribute sysfs_metric_id;
61  
62  	struct kref ref;
63  	struct rcu_head rcu;
64  };
65  
66  struct flex {
67  	struct xe_reg reg;
68  	u32 offset;
69  	u32 value;
70  };
71  
72  struct xe_oa_open_param {
73  	u32 oa_unit_id;
74  	bool sample;
75  	u32 metric_set;
76  	enum xe_oa_format_name oa_format;
77  	int period_exponent;
78  	bool disabled;
79  	int exec_queue_id;
80  	int engine_instance;
81  	struct xe_exec_queue *exec_q;
82  	struct xe_hw_engine *hwe;
83  	bool no_preempt;
84  };
85  
86  struct xe_oa_config_bo {
87  	struct llist_node node;
88  
89  	struct xe_oa_config *oa_config;
90  	struct xe_bb *bb;
91  };
92  
93  #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
94  
95  static const struct xe_oa_format oa_formats[] = {
96  	[XE_OA_FORMAT_C4_B8]			= { 7, 64,  DRM_FMT(OAG) },
97  	[XE_OA_FORMAT_A12]			= { 0, 64,  DRM_FMT(OAG) },
98  	[XE_OA_FORMAT_A12_B8_C8]		= { 2, 128, DRM_FMT(OAG) },
99  	[XE_OA_FORMAT_A32u40_A4u32_B8_C8]	= { 5, 256, DRM_FMT(OAG) },
100  	[XE_OAR_FORMAT_A32u40_A4u32_B8_C8]	= { 5, 256, DRM_FMT(OAR) },
101  	[XE_OA_FORMAT_A24u40_A14u32_B8_C8]	= { 5, 256, DRM_FMT(OAG) },
102  	[XE_OAC_FORMAT_A24u64_B8_C8]		= { 1, 320, DRM_FMT(OAC), HDR_64_BIT },
103  	[XE_OAC_FORMAT_A22u32_R2u32_B8_C8]	= { 2, 192, DRM_FMT(OAC), HDR_64_BIT },
104  	[XE_OAM_FORMAT_MPEC8u64_B8_C8]		= { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT },
105  	[XE_OAM_FORMAT_MPEC8u32_B8_C8]		= { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT },
106  	[XE_OA_FORMAT_PEC64u64]			= { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 },
107  	[XE_OA_FORMAT_PEC64u64_B8_C8]		= { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 },
108  	[XE_OA_FORMAT_PEC64u32]			= { 1, 320, DRM_FMT(PEC), HDR_64_BIT },
109  	[XE_OA_FORMAT_PEC32u64_G1]		= { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 },
110  	[XE_OA_FORMAT_PEC32u32_G1]		= { 5, 192, DRM_FMT(PEC), HDR_64_BIT },
111  	[XE_OA_FORMAT_PEC32u64_G2]		= { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 },
112  	[XE_OA_FORMAT_PEC32u32_G2]		= { 6, 192, DRM_FMT(PEC), HDR_64_BIT },
113  	[XE_OA_FORMAT_PEC36u64_G1_32_G2_4]	= { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 },
114  	[XE_OA_FORMAT_PEC36u64_G1_4_G2_32]	= { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 },
115  };
116  
xe_oa_circ_diff(struct xe_oa_stream * stream,u32 tail,u32 head)117  static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
118  {
119  	return tail >= head ? tail - head :
120  		tail + stream->oa_buffer.circ_size - head;
121  }
122  
xe_oa_circ_incr(struct xe_oa_stream * stream,u32 ptr,u32 n)123  static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
124  {
125  	return ptr + n >= stream->oa_buffer.circ_size ?
126  		ptr + n - stream->oa_buffer.circ_size : ptr + n;
127  }
128  
xe_oa_config_release(struct kref * ref)129  static void xe_oa_config_release(struct kref *ref)
130  {
131  	struct xe_oa_config *oa_config =
132  		container_of(ref, typeof(*oa_config), ref);
133  
134  	kfree(oa_config->regs);
135  
136  	kfree_rcu(oa_config, rcu);
137  }
138  
xe_oa_config_put(struct xe_oa_config * oa_config)139  static void xe_oa_config_put(struct xe_oa_config *oa_config)
140  {
141  	if (!oa_config)
142  		return;
143  
144  	kref_put(&oa_config->ref, xe_oa_config_release);
145  }
146  
xe_oa_config_get(struct xe_oa_config * oa_config)147  static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config)
148  {
149  	return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL;
150  }
151  
xe_oa_get_oa_config(struct xe_oa * oa,int metrics_set)152  static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set)
153  {
154  	struct xe_oa_config *oa_config;
155  
156  	rcu_read_lock();
157  	oa_config = idr_find(&oa->metrics_idr, metrics_set);
158  	if (oa_config)
159  		oa_config = xe_oa_config_get(oa_config);
160  	rcu_read_unlock();
161  
162  	return oa_config;
163  }
164  
free_oa_config_bo(struct xe_oa_config_bo * oa_bo)165  static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
166  {
167  	xe_oa_config_put(oa_bo->oa_config);
168  	xe_bb_free(oa_bo->bb, NULL);
169  	kfree(oa_bo);
170  }
171  
__oa_regs(struct xe_oa_stream * stream)172  static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream)
173  {
174  	return &stream->hwe->oa_unit->regs;
175  }
176  
xe_oa_hw_tail_read(struct xe_oa_stream * stream)177  static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
178  {
179  	return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) &
180  		OAG_OATAILPTR_MASK;
181  }
182  
183  #define oa_report_header_64bit(__s) \
184  	((__s)->oa_buffer.format->header == HDR_64_BIT)
185  
oa_report_id(struct xe_oa_stream * stream,void * report)186  static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
187  {
188  	return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
189  }
190  
oa_report_id_clear(struct xe_oa_stream * stream,u32 * report)191  static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
192  {
193  	if (oa_report_header_64bit(stream))
194  		*(u64 *)report = 0;
195  	else
196  		*report = 0;
197  }
198  
oa_timestamp(struct xe_oa_stream * stream,void * report)199  static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
200  {
201  	return oa_report_header_64bit(stream) ?
202  		*((u64 *)report + 1) :
203  		*((u32 *)report + 1);
204  }
205  
oa_timestamp_clear(struct xe_oa_stream * stream,u32 * report)206  static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
207  {
208  	if (oa_report_header_64bit(stream))
209  		*(u64 *)&report[2] = 0;
210  	else
211  		report[1] = 0;
212  }
213  
xe_oa_buffer_check_unlocked(struct xe_oa_stream * stream)214  static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
215  {
216  	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
217  	int report_size = stream->oa_buffer.format->size;
218  	u32 tail, hw_tail;
219  	unsigned long flags;
220  	bool pollin;
221  	u32 partial_report_size;
222  
223  	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
224  
225  	hw_tail = xe_oa_hw_tail_read(stream);
226  	hw_tail -= gtt_offset;
227  
228  	/*
229  	 * The tail pointer increases in 64 byte (cacheline size), not in report_size
230  	 * increments. Also report size may not be a power of 2. Compute potential
231  	 * partially landed report in OA buffer.
232  	 */
233  	partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail);
234  	partial_report_size %= report_size;
235  
236  	/* Subtract partial amount off the tail */
237  	hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size);
238  
239  	tail = hw_tail;
240  
241  	/*
242  	 * Walk the stream backward until we find a report with report id and timestamp
243  	 * not 0. We can't tell whether a report has fully landed in memory before the
244  	 * report id and timestamp of the following report have landed.
245  	 *
246  	 * This is assuming that the writes of the OA unit land in memory in the order
247  	 * they were written.  If not : (╯°□°)╯︵ ┻━┻
248  	 */
249  	while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
250  		void *report = stream->oa_buffer.vaddr + tail;
251  
252  		if (oa_report_id(stream, report) || oa_timestamp(stream, report))
253  			break;
254  
255  		tail = xe_oa_circ_diff(stream, tail, report_size);
256  	}
257  
258  	if (xe_oa_circ_diff(stream, hw_tail, tail) > report_size)
259  		drm_dbg(&stream->oa->xe->drm,
260  			"unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
261  			stream->oa_buffer.head, tail, hw_tail);
262  
263  	stream->oa_buffer.tail = tail;
264  
265  	pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail,
266  				 stream->oa_buffer.head) >= report_size;
267  
268  	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
269  
270  	return pollin;
271  }
272  
xe_oa_poll_check_timer_cb(struct hrtimer * hrtimer)273  static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
274  {
275  	struct xe_oa_stream *stream =
276  		container_of(hrtimer, typeof(*stream), poll_check_timer);
277  
278  	if (xe_oa_buffer_check_unlocked(stream)) {
279  		stream->pollin = true;
280  		wake_up(&stream->poll_wq);
281  	}
282  
283  	hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns));
284  
285  	return HRTIMER_RESTART;
286  }
287  
xe_oa_append_report(struct xe_oa_stream * stream,char __user * buf,size_t count,size_t * offset,const u8 * report)288  static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
289  			       size_t count, size_t *offset, const u8 *report)
290  {
291  	int report_size = stream->oa_buffer.format->size;
292  	int report_size_partial;
293  	u8 *oa_buf_end;
294  
295  	if ((count - *offset) < report_size)
296  		return -ENOSPC;
297  
298  	buf += *offset;
299  
300  	oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
301  	report_size_partial = oa_buf_end - report;
302  
303  	if (report_size_partial < report_size) {
304  		if (copy_to_user(buf, report, report_size_partial))
305  			return -EFAULT;
306  		buf += report_size_partial;
307  
308  		if (copy_to_user(buf, stream->oa_buffer.vaddr,
309  				 report_size - report_size_partial))
310  			return -EFAULT;
311  	} else if (copy_to_user(buf, report, report_size)) {
312  		return -EFAULT;
313  	}
314  
315  	*offset += report_size;
316  
317  	return 0;
318  }
319  
xe_oa_append_reports(struct xe_oa_stream * stream,char __user * buf,size_t count,size_t * offset)320  static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
321  				size_t count, size_t *offset)
322  {
323  	int report_size = stream->oa_buffer.format->size;
324  	u8 *oa_buf_base = stream->oa_buffer.vaddr;
325  	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
326  	size_t start_offset = *offset;
327  	unsigned long flags;
328  	u32 head, tail;
329  	int ret = 0;
330  
331  	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
332  	head = stream->oa_buffer.head;
333  	tail = stream->oa_buffer.tail;
334  	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
335  
336  	xe_assert(stream->oa->xe,
337  		  head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
338  
339  	for (; xe_oa_circ_diff(stream, tail, head);
340  	     head = xe_oa_circ_incr(stream, head, report_size)) {
341  		u8 *report = oa_buf_base + head;
342  
343  		ret = xe_oa_append_report(stream, buf, count, offset, report);
344  		if (ret)
345  			break;
346  
347  		if (!(stream->oa_buffer.circ_size % report_size)) {
348  			/* Clear out report id and timestamp to detect unlanded reports */
349  			oa_report_id_clear(stream, (void *)report);
350  			oa_timestamp_clear(stream, (void *)report);
351  		} else {
352  			u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
353  			u32 part = oa_buf_end - report;
354  
355  			/* Zero out the entire report */
356  			if (report_size <= part) {
357  				memset(report, 0, report_size);
358  			} else {
359  				memset(report, 0, part);
360  				memset(oa_buf_base, 0, report_size - part);
361  			}
362  		}
363  	}
364  
365  	if (start_offset != *offset) {
366  		struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr;
367  
368  		spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
369  		xe_mmio_write32(stream->gt, oaheadptr,
370  				(head + gtt_offset) & OAG_OAHEADPTR_MASK);
371  		stream->oa_buffer.head = head;
372  		spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
373  	}
374  
375  	return ret;
376  }
377  
xe_oa_init_oa_buffer(struct xe_oa_stream * stream)378  static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
379  {
380  	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
381  	u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
382  	unsigned long flags;
383  
384  	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
385  
386  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0);
387  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
388  			gtt_offset & OAG_OAHEADPTR_MASK);
389  	stream->oa_buffer.head = 0;
390  	/*
391  	 * PRM says: "This MMIO must be set before the OATAILPTR register and after the
392  	 * OAHEADPTR register. This is to enable proper functionality of the overflow bit".
393  	 */
394  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf);
395  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr,
396  			gtt_offset & OAG_OATAILPTR_MASK);
397  
398  	/* Mark that we need updated tail pointer to read from */
399  	stream->oa_buffer.tail = 0;
400  
401  	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
402  
403  	/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
404  	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size);
405  }
406  
__format_to_oactrl(const struct xe_oa_format * format,int counter_sel_mask)407  static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
408  {
409  	return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) |
410  		REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) |
411  		REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size);
412  }
413  
__oa_ccs_select(struct xe_oa_stream * stream)414  static u32 __oa_ccs_select(struct xe_oa_stream *stream)
415  {
416  	u32 val;
417  
418  	if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE)
419  		return 0;
420  
421  	val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance);
422  	xe_assert(stream->oa->xe,
423  		  REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance);
424  	return val;
425  }
426  
xe_oa_enable(struct xe_oa_stream * stream)427  static void xe_oa_enable(struct xe_oa_stream *stream)
428  {
429  	const struct xe_oa_format *format = stream->oa_buffer.format;
430  	const struct xe_oa_regs *regs;
431  	u32 val;
432  
433  	/*
434  	 * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA
435  	 * buffer must be correctly initialized
436  	 */
437  	xe_oa_init_oa_buffer(stream);
438  
439  	regs = __oa_regs(stream);
440  	val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) |
441  		__oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE;
442  
443  	if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
444  	    stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
445  		val |= OAG_OACONTROL_OA_PES_DISAG_EN;
446  
447  	xe_mmio_write32(stream->gt, regs->oa_ctrl, val);
448  }
449  
xe_oa_disable(struct xe_oa_stream * stream)450  static void xe_oa_disable(struct xe_oa_stream *stream)
451  {
452  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0);
453  	if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl,
454  			   OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false))
455  		drm_err(&stream->oa->xe->drm,
456  			"wait for OA to be disabled timed out\n");
457  
458  	if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) {
459  		/* <= XE_METEORLAKE except XE_PVC */
460  		xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1);
461  		if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
462  			drm_err(&stream->oa->xe->drm,
463  				"wait for OA tlb invalidate timed out\n");
464  	}
465  }
466  
xe_oa_wait_unlocked(struct xe_oa_stream * stream)467  static int xe_oa_wait_unlocked(struct xe_oa_stream *stream)
468  {
469  	/* We might wait indefinitely if periodic sampling is not enabled */
470  	if (!stream->periodic)
471  		return -EINVAL;
472  
473  	return wait_event_interruptible(stream->poll_wq,
474  					xe_oa_buffer_check_unlocked(stream));
475  }
476  
477  #define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \
478  				OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST)
479  
__xe_oa_read(struct xe_oa_stream * stream,char __user * buf,size_t count,size_t * offset)480  static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf,
481  			size_t count, size_t *offset)
482  {
483  	/* Only clear our bits to avoid side-effects */
484  	stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status,
485  					  OASTATUS_RELEVANT_BITS, 0);
486  	/*
487  	 * Signal to userspace that there is non-zero OA status to read via
488  	 * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl
489  	 */
490  	if (stream->oa_status & OASTATUS_RELEVANT_BITS)
491  		return -EIO;
492  
493  	return xe_oa_append_reports(stream, buf, count, offset);
494  }
495  
xe_oa_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)496  static ssize_t xe_oa_read(struct file *file, char __user *buf,
497  			  size_t count, loff_t *ppos)
498  {
499  	struct xe_oa_stream *stream = file->private_data;
500  	size_t offset = 0;
501  	int ret;
502  
503  	/* Can't read from disabled streams */
504  	if (!stream->enabled || !stream->sample)
505  		return -EINVAL;
506  
507  	if (!(file->f_flags & O_NONBLOCK)) {
508  		do {
509  			ret = xe_oa_wait_unlocked(stream);
510  			if (ret)
511  				return ret;
512  
513  			mutex_lock(&stream->stream_lock);
514  			ret = __xe_oa_read(stream, buf, count, &offset);
515  			mutex_unlock(&stream->stream_lock);
516  		} while (!offset && !ret);
517  	} else {
518  		mutex_lock(&stream->stream_lock);
519  		ret = __xe_oa_read(stream, buf, count, &offset);
520  		mutex_unlock(&stream->stream_lock);
521  	}
522  
523  	/*
524  	 * Typically we clear pollin here in order to wait for the new hrtimer callback
525  	 * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC,
526  	 * which means that more OA data is available than could fit in the user provided
527  	 * buffer. In this case we want the next poll() call to not block.
528  	 *
529  	 * Also in case of -EIO, we have already waited for data before returning
530  	 * -EIO, so need to wait again
531  	 */
532  	if (ret != -ENOSPC && ret != -EIO)
533  		stream->pollin = false;
534  
535  	/* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */
536  	return offset ?: (ret ?: -EAGAIN);
537  }
538  
xe_oa_poll_locked(struct xe_oa_stream * stream,struct file * file,poll_table * wait)539  static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream,
540  				  struct file *file, poll_table *wait)
541  {
542  	__poll_t events = 0;
543  
544  	poll_wait(file, &stream->poll_wq, wait);
545  
546  	/*
547  	 * We don't explicitly check whether there's something to read here since this
548  	 * path may be hot depending on what else userspace is polling, or on the timeout
549  	 * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there
550  	 * are samples to read
551  	 */
552  	if (stream->pollin)
553  		events |= EPOLLIN;
554  
555  	return events;
556  }
557  
xe_oa_poll(struct file * file,poll_table * wait)558  static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
559  {
560  	struct xe_oa_stream *stream = file->private_data;
561  	__poll_t ret;
562  
563  	mutex_lock(&stream->stream_lock);
564  	ret = xe_oa_poll_locked(stream, file, wait);
565  	mutex_unlock(&stream->stream_lock);
566  
567  	return ret;
568  }
569  
xe_oa_submit_bb(struct xe_oa_stream * stream,struct xe_bb * bb)570  static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
571  {
572  	struct xe_sched_job *job;
573  	struct dma_fence *fence;
574  	long timeout;
575  	int err = 0;
576  
577  	/* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
578  	job = xe_bb_create_job(stream->k_exec_q, bb);
579  	if (IS_ERR(job)) {
580  		err = PTR_ERR(job);
581  		goto exit;
582  	}
583  
584  	xe_sched_job_arm(job);
585  	fence = dma_fence_get(&job->drm.s_fence->finished);
586  	xe_sched_job_push(job);
587  
588  	timeout = dma_fence_wait_timeout(fence, false, HZ);
589  	dma_fence_put(fence);
590  	if (timeout < 0)
591  		err = timeout;
592  	else if (!timeout)
593  		err = -ETIME;
594  exit:
595  	return err;
596  }
597  
write_cs_mi_lri(struct xe_bb * bb,const struct xe_oa_reg * reg_data,u32 n_regs)598  static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
599  {
600  	u32 i;
601  
602  #define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
603  
604  	for (i = 0; i < n_regs; i++) {
605  		if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
606  			u32 n_lri = min_t(u32, n_regs - i,
607  					  MI_LOAD_REGISTER_IMM_MAX_REGS);
608  
609  			bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri);
610  		}
611  		bb->cs[bb->len++] = reg_data[i].addr.addr;
612  		bb->cs[bb->len++] = reg_data[i].value;
613  	}
614  }
615  
num_lri_dwords(int num_regs)616  static int num_lri_dwords(int num_regs)
617  {
618  	int count = 0;
619  
620  	if (num_regs > 0) {
621  		count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
622  		count += num_regs * 2;
623  	}
624  
625  	return count;
626  }
627  
xe_oa_free_oa_buffer(struct xe_oa_stream * stream)628  static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
629  {
630  	xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
631  }
632  
xe_oa_free_configs(struct xe_oa_stream * stream)633  static void xe_oa_free_configs(struct xe_oa_stream *stream)
634  {
635  	struct xe_oa_config_bo *oa_bo, *tmp;
636  
637  	xe_oa_config_put(stream->oa_config);
638  	llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
639  		free_oa_config_bo(oa_bo);
640  }
641  
xe_oa_store_flex(struct xe_oa_stream * stream,struct xe_lrc * lrc,struct xe_bb * bb,const struct flex * flex,u32 count)642  static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
643  			     struct xe_bb *bb, const struct flex *flex, u32 count)
644  {
645  	u32 offset = xe_bo_ggtt_addr(lrc->bo);
646  
647  	do {
648  		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
649  		bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
650  		bb->cs[bb->len++] = 0;
651  		bb->cs[bb->len++] = flex->value;
652  
653  	} while (flex++, --count);
654  }
655  
xe_oa_modify_ctx_image(struct xe_oa_stream * stream,struct xe_lrc * lrc,const struct flex * flex,u32 count)656  static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
657  				  const struct flex *flex, u32 count)
658  {
659  	struct xe_bb *bb;
660  	int err;
661  
662  	bb = xe_bb_new(stream->gt, 4 * count, false);
663  	if (IS_ERR(bb)) {
664  		err = PTR_ERR(bb);
665  		goto exit;
666  	}
667  
668  	xe_oa_store_flex(stream, lrc, bb, flex, count);
669  
670  	err = xe_oa_submit_bb(stream, bb);
671  	xe_bb_free(bb, NULL);
672  exit:
673  	return err;
674  }
675  
xe_oa_load_with_lri(struct xe_oa_stream * stream,struct xe_oa_reg * reg_lri)676  static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
677  {
678  	struct xe_bb *bb;
679  	int err;
680  
681  	bb = xe_bb_new(stream->gt, 3, false);
682  	if (IS_ERR(bb)) {
683  		err = PTR_ERR(bb);
684  		goto exit;
685  	}
686  
687  	write_cs_mi_lri(bb, reg_lri, 1);
688  
689  	err = xe_oa_submit_bb(stream, bb);
690  	xe_bb_free(bb, NULL);
691  exit:
692  	return err;
693  }
694  
xe_oa_configure_oar_context(struct xe_oa_stream * stream,bool enable)695  static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
696  {
697  	const struct xe_oa_format *format = stream->oa_buffer.format;
698  	struct xe_lrc *lrc = stream->exec_q->lrc[0];
699  	u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
700  	u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
701  		(enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
702  
703  	struct flex regs_context[] = {
704  		{
705  			OACTXCONTROL(stream->hwe->mmio_base),
706  			stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
707  			enable ? OA_COUNTER_RESUME : 0,
708  		},
709  		{
710  			RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
711  			regs_offset + CTX_CONTEXT_CONTROL,
712  			_MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE),
713  		},
714  	};
715  	struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol };
716  	int err;
717  
718  	/* Modify stream hwe context image with regs_context */
719  	err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
720  				     regs_context, ARRAY_SIZE(regs_context));
721  	if (err)
722  		return err;
723  
724  	/* Apply reg_lri using LRI */
725  	return xe_oa_load_with_lri(stream, &reg_lri);
726  }
727  
xe_oa_configure_oac_context(struct xe_oa_stream * stream,bool enable)728  static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
729  {
730  	const struct xe_oa_format *format = stream->oa_buffer.format;
731  	struct xe_lrc *lrc = stream->exec_q->lrc[0];
732  	u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
733  	u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
734  		(enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
735  	struct flex regs_context[] = {
736  		{
737  			OACTXCONTROL(stream->hwe->mmio_base),
738  			stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
739  			enable ? OA_COUNTER_RESUME : 0,
740  		},
741  		{
742  			RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
743  			regs_offset + CTX_CONTEXT_CONTROL,
744  			_MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) |
745  			_MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0),
746  		},
747  	};
748  	struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol };
749  	int err;
750  
751  	/* Set ccs select to enable programming of OAC_OACONTROL */
752  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream));
753  
754  	/* Modify stream hwe context image with regs_context */
755  	err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
756  				     regs_context, ARRAY_SIZE(regs_context));
757  	if (err)
758  		return err;
759  
760  	/* Apply reg_lri using LRI */
761  	return xe_oa_load_with_lri(stream, &reg_lri);
762  }
763  
xe_oa_configure_oa_context(struct xe_oa_stream * stream,bool enable)764  static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable)
765  {
766  	switch (stream->hwe->class) {
767  	case XE_ENGINE_CLASS_RENDER:
768  		return xe_oa_configure_oar_context(stream, enable);
769  	case XE_ENGINE_CLASS_COMPUTE:
770  		return xe_oa_configure_oac_context(stream, enable);
771  	default:
772  		/* Video engines do not support MI_REPORT_PERF_COUNT */
773  		return 0;
774  	}
775  }
776  
777  #define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255)
778  
oag_configure_mmio_trigger(const struct xe_oa_stream * stream,bool enable)779  static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool enable)
780  {
781  	return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_MMIO_TRG,
782  			     enable && stream && stream->sample ?
783  			     0 : OAG_OA_DEBUG_DISABLE_MMIO_TRG);
784  }
785  
xe_oa_disable_metric_set(struct xe_oa_stream * stream)786  static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
787  {
788  	u32 sqcnt1;
789  
790  	/*
791  	 * Wa_1508761755:xehpsdv, dg2
792  	 * Enable thread stall DOP gating and EU DOP gating.
793  	 */
794  	if (stream->oa->xe->info.platform == XE_DG2) {
795  		xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
796  					  _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE));
797  		xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
798  					  _MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
799  	}
800  
801  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
802  			oag_configure_mmio_trigger(stream, false));
803  
804  	/* disable the context save/restore or OAR counters */
805  	if (stream->exec_q)
806  		xe_oa_configure_oa_context(stream, false);
807  
808  	/* Make sure we disable noa to save power. */
809  	xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0);
810  
811  	sqcnt1 = SQCNT1_PMON_ENABLE |
812  		 (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
813  
814  	/* Reset PMON Enable to save power. */
815  	xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0);
816  }
817  
xe_oa_stream_destroy(struct xe_oa_stream * stream)818  static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
819  {
820  	struct xe_oa_unit *u = stream->hwe->oa_unit;
821  	struct xe_gt *gt = stream->hwe->gt;
822  
823  	if (WARN_ON(stream != u->exclusive_stream))
824  		return;
825  
826  	WRITE_ONCE(u->exclusive_stream, NULL);
827  
828  	mutex_destroy(&stream->stream_lock);
829  
830  	xe_oa_disable_metric_set(stream);
831  	xe_exec_queue_put(stream->k_exec_q);
832  
833  	xe_oa_free_oa_buffer(stream);
834  
835  	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
836  	xe_pm_runtime_put(stream->oa->xe);
837  
838  	/* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */
839  	if (stream->override_gucrc)
840  		xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
841  
842  	xe_oa_free_configs(stream);
843  }
844  
xe_oa_alloc_oa_buffer(struct xe_oa_stream * stream)845  static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
846  {
847  	struct xe_bo *bo;
848  
849  	BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE);
850  	BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M);
851  
852  	bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
853  				  XE_OA_BUFFER_SIZE, ttm_bo_type_kernel,
854  				  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
855  	if (IS_ERR(bo))
856  		return PTR_ERR(bo);
857  
858  	stream->oa_buffer.bo = bo;
859  	/* mmap implementation requires OA buffer to be in system memory */
860  	xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
861  	stream->oa_buffer.vaddr = bo->vmap.vaddr;
862  	return 0;
863  }
864  
865  static struct xe_oa_config_bo *
__xe_oa_alloc_config_buffer(struct xe_oa_stream * stream,struct xe_oa_config * oa_config)866  __xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config)
867  {
868  	struct xe_oa_config_bo *oa_bo;
869  	size_t config_length;
870  	struct xe_bb *bb;
871  
872  	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
873  	if (!oa_bo)
874  		return ERR_PTR(-ENOMEM);
875  
876  	config_length = num_lri_dwords(oa_config->regs_len);
877  	config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32);
878  
879  	bb = xe_bb_new(stream->gt, config_length, false);
880  	if (IS_ERR(bb))
881  		goto err_free;
882  
883  	write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len);
884  
885  	oa_bo->bb = bb;
886  	oa_bo->oa_config = xe_oa_config_get(oa_config);
887  	llist_add(&oa_bo->node, &stream->oa_config_bos);
888  
889  	return oa_bo;
890  err_free:
891  	kfree(oa_bo);
892  	return ERR_CAST(bb);
893  }
894  
895  static struct xe_oa_config_bo *
xe_oa_alloc_config_buffer(struct xe_oa_stream * stream,struct xe_oa_config * oa_config)896  xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config)
897  {
898  	struct xe_oa_config_bo *oa_bo;
899  
900  	/* Look for the buffer in the already allocated BOs attached to the stream */
901  	llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
902  		if (oa_bo->oa_config == oa_config &&
903  		    memcmp(oa_bo->oa_config->uuid, oa_config->uuid,
904  			   sizeof(oa_config->uuid)) == 0)
905  			goto out;
906  	}
907  
908  	oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config);
909  out:
910  	return oa_bo;
911  }
912  
xe_oa_emit_oa_config(struct xe_oa_stream * stream,struct xe_oa_config * config)913  static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config)
914  {
915  #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
916  	struct xe_oa_config_bo *oa_bo;
917  	int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
918  
919  	oa_bo = xe_oa_alloc_config_buffer(stream, config);
920  	if (IS_ERR(oa_bo)) {
921  		err = PTR_ERR(oa_bo);
922  		goto exit;
923  	}
924  
925  	err = xe_oa_submit_bb(stream, oa_bo->bb);
926  
927  	/* Additional empirical delay needed for NOA programming after registers are written */
928  	usleep_range(us, 2 * us);
929  exit:
930  	return err;
931  }
932  
oag_report_ctx_switches(const struct xe_oa_stream * stream)933  static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
934  {
935  	/* If user didn't require OA reports, ask HW not to emit ctx switch reports */
936  	return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,
937  			     stream->sample ?
938  			     0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
939  }
940  
xe_oa_enable_metric_set(struct xe_oa_stream * stream)941  static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
942  {
943  	u32 oa_debug, sqcnt1;
944  	int ret;
945  
946  	/*
947  	 * Wa_1508761755:xehpsdv, dg2
948  	 * EU NOA signals behave incorrectly if EU clock gating is enabled.
949  	 * Disable thread stall DOP gating and EU DOP gating.
950  	 */
951  	if (stream->oa->xe->info.platform == XE_DG2) {
952  		xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
953  					  _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
954  		xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
955  					  _MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
956  	}
957  
958  	/* Disable clk ratio reports */
959  	oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
960  		OAG_OA_DEBUG_INCLUDE_CLK_RATIO;
961  
962  	if (GRAPHICS_VER(stream->oa->xe) >= 20)
963  		oa_debug |=
964  			/* The three bits below are needed to get PEC counters running */
965  			OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL |
966  			OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL |
967  			OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL;
968  
969  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
970  			_MASKED_BIT_ENABLE(oa_debug) |
971  			oag_report_ctx_switches(stream) |
972  			oag_configure_mmio_trigger(stream, true));
973  
974  	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
975  			(OAG_OAGLBCTXCTRL_COUNTER_RESUME |
976  			 OAG_OAGLBCTXCTRL_TIMER_ENABLE |
977  			 REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK,
978  					stream->period_exponent)) : 0);
979  
980  	/*
981  	 * Initialize Super Queue Internal Cnt Register
982  	 * Set PMON Enable in order to collect valid metrics
983  	 * Enable bytes per clock reporting
984  	 */
985  	sqcnt1 = SQCNT1_PMON_ENABLE |
986  		 (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
987  
988  	xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1);
989  
990  	/* Configure OAR/OAC */
991  	if (stream->exec_q) {
992  		ret = xe_oa_configure_oa_context(stream, true);
993  		if (ret)
994  			return ret;
995  	}
996  
997  	return xe_oa_emit_oa_config(stream, stream->oa_config);
998  }
999  
xe_oa_stream_enable(struct xe_oa_stream * stream)1000  static void xe_oa_stream_enable(struct xe_oa_stream *stream)
1001  {
1002  	stream->pollin = false;
1003  
1004  	xe_oa_enable(stream);
1005  
1006  	if (stream->sample)
1007  		hrtimer_start(&stream->poll_check_timer,
1008  			      ns_to_ktime(stream->poll_period_ns),
1009  			      HRTIMER_MODE_REL_PINNED);
1010  }
1011  
xe_oa_stream_disable(struct xe_oa_stream * stream)1012  static void xe_oa_stream_disable(struct xe_oa_stream *stream)
1013  {
1014  	xe_oa_disable(stream);
1015  
1016  	if (stream->sample)
1017  		hrtimer_cancel(&stream->poll_check_timer);
1018  }
1019  
xe_oa_enable_preempt_timeslice(struct xe_oa_stream * stream)1020  static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream)
1021  {
1022  	struct xe_exec_queue *q = stream->exec_q;
1023  	int ret1, ret2;
1024  
1025  	/* Best effort recovery: try to revert both to original, irrespective of error */
1026  	ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us);
1027  	ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us);
1028  	if (ret1 || ret2)
1029  		goto err;
1030  	return 0;
1031  err:
1032  	drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2);
1033  	return ret1 ?: ret2;
1034  }
1035  
xe_oa_disable_preempt_timeslice(struct xe_oa_stream * stream)1036  static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream)
1037  {
1038  	struct xe_exec_queue *q = stream->exec_q;
1039  	int ret;
1040  
1041  	/* Setting values to 0 will disable timeslice and preempt_timeout */
1042  	ret = q->ops->set_timeslice(q, 0);
1043  	if (ret)
1044  		goto err;
1045  
1046  	ret = q->ops->set_preempt_timeout(q, 0);
1047  	if (ret)
1048  		goto err;
1049  
1050  	return 0;
1051  err:
1052  	xe_oa_enable_preempt_timeslice(stream);
1053  	drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret);
1054  	return ret;
1055  }
1056  
xe_oa_enable_locked(struct xe_oa_stream * stream)1057  static int xe_oa_enable_locked(struct xe_oa_stream *stream)
1058  {
1059  	if (stream->enabled)
1060  		return 0;
1061  
1062  	if (stream->no_preempt) {
1063  		int ret = xe_oa_disable_preempt_timeslice(stream);
1064  
1065  		if (ret)
1066  			return ret;
1067  	}
1068  
1069  	xe_oa_stream_enable(stream);
1070  
1071  	stream->enabled = true;
1072  	return 0;
1073  }
1074  
xe_oa_disable_locked(struct xe_oa_stream * stream)1075  static int xe_oa_disable_locked(struct xe_oa_stream *stream)
1076  {
1077  	int ret = 0;
1078  
1079  	if (!stream->enabled)
1080  		return 0;
1081  
1082  	xe_oa_stream_disable(stream);
1083  
1084  	if (stream->no_preempt)
1085  		ret = xe_oa_enable_preempt_timeslice(stream);
1086  
1087  	stream->enabled = false;
1088  	return ret;
1089  }
1090  
xe_oa_config_locked(struct xe_oa_stream * stream,u64 arg)1091  static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg)
1092  {
1093  	struct drm_xe_ext_set_property ext;
1094  	long ret = stream->oa_config->id;
1095  	struct xe_oa_config *config;
1096  	int err;
1097  
1098  	err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext));
1099  	if (XE_IOCTL_DBG(stream->oa->xe, err))
1100  		return -EFAULT;
1101  
1102  	if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) ||
1103  	    XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) ||
1104  	    XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) ||
1105  	    XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET))
1106  		return -EINVAL;
1107  
1108  	config = xe_oa_get_oa_config(stream->oa, ext.value);
1109  	if (!config)
1110  		return -ENODEV;
1111  
1112  	if (config != stream->oa_config) {
1113  		err = xe_oa_emit_oa_config(stream, config);
1114  		if (!err)
1115  			config = xchg(&stream->oa_config, config);
1116  		else
1117  			ret = err;
1118  	}
1119  
1120  	xe_oa_config_put(config);
1121  
1122  	return ret;
1123  }
1124  
xe_oa_status_locked(struct xe_oa_stream * stream,unsigned long arg)1125  static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
1126  {
1127  	struct drm_xe_oa_stream_status status = {};
1128  	void __user *uaddr = (void __user *)arg;
1129  
1130  	/* Map from register to uapi bits */
1131  	if (stream->oa_status & OASTATUS_REPORT_LOST)
1132  		status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST;
1133  	if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW)
1134  		status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW;
1135  	if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW)
1136  		status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW;
1137  	if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL)
1138  		status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL;
1139  
1140  	if (copy_to_user(uaddr, &status, sizeof(status)))
1141  		return -EFAULT;
1142  
1143  	return 0;
1144  }
1145  
xe_oa_info_locked(struct xe_oa_stream * stream,unsigned long arg)1146  static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg)
1147  {
1148  	struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, };
1149  	void __user *uaddr = (void __user *)arg;
1150  
1151  	if (copy_to_user(uaddr, &info, sizeof(info)))
1152  		return -EFAULT;
1153  
1154  	return 0;
1155  }
1156  
xe_oa_ioctl_locked(struct xe_oa_stream * stream,unsigned int cmd,unsigned long arg)1157  static long xe_oa_ioctl_locked(struct xe_oa_stream *stream,
1158  			       unsigned int cmd,
1159  			       unsigned long arg)
1160  {
1161  	switch (cmd) {
1162  	case DRM_XE_OBSERVATION_IOCTL_ENABLE:
1163  		return xe_oa_enable_locked(stream);
1164  	case DRM_XE_OBSERVATION_IOCTL_DISABLE:
1165  		return xe_oa_disable_locked(stream);
1166  	case DRM_XE_OBSERVATION_IOCTL_CONFIG:
1167  		return xe_oa_config_locked(stream, arg);
1168  	case DRM_XE_OBSERVATION_IOCTL_STATUS:
1169  		return xe_oa_status_locked(stream, arg);
1170  	case DRM_XE_OBSERVATION_IOCTL_INFO:
1171  		return xe_oa_info_locked(stream, arg);
1172  	}
1173  
1174  	return -EINVAL;
1175  }
1176  
xe_oa_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1177  static long xe_oa_ioctl(struct file *file,
1178  			unsigned int cmd,
1179  			unsigned long arg)
1180  {
1181  	struct xe_oa_stream *stream = file->private_data;
1182  	long ret;
1183  
1184  	mutex_lock(&stream->stream_lock);
1185  	ret = xe_oa_ioctl_locked(stream, cmd, arg);
1186  	mutex_unlock(&stream->stream_lock);
1187  
1188  	return ret;
1189  }
1190  
xe_oa_destroy_locked(struct xe_oa_stream * stream)1191  static void xe_oa_destroy_locked(struct xe_oa_stream *stream)
1192  {
1193  	if (stream->enabled)
1194  		xe_oa_disable_locked(stream);
1195  
1196  	xe_oa_stream_destroy(stream);
1197  
1198  	if (stream->exec_q)
1199  		xe_exec_queue_put(stream->exec_q);
1200  
1201  	kfree(stream);
1202  }
1203  
xe_oa_release(struct inode * inode,struct file * file)1204  static int xe_oa_release(struct inode *inode, struct file *file)
1205  {
1206  	struct xe_oa_stream *stream = file->private_data;
1207  	struct xe_gt *gt = stream->gt;
1208  
1209  	xe_pm_runtime_get(gt_to_xe(gt));
1210  	mutex_lock(&gt->oa.gt_lock);
1211  	xe_oa_destroy_locked(stream);
1212  	mutex_unlock(&gt->oa.gt_lock);
1213  	xe_pm_runtime_put(gt_to_xe(gt));
1214  
1215  	/* Release the reference the OA stream kept on the driver */
1216  	drm_dev_put(&gt_to_xe(gt)->drm);
1217  
1218  	return 0;
1219  }
1220  
xe_oa_mmap(struct file * file,struct vm_area_struct * vma)1221  static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
1222  {
1223  	struct xe_oa_stream *stream = file->private_data;
1224  	struct xe_bo *bo = stream->oa_buffer.bo;
1225  	unsigned long start = vma->vm_start;
1226  	int i, ret;
1227  
1228  	if (xe_observation_paranoid && !perfmon_capable()) {
1229  		drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n");
1230  		return -EACCES;
1231  	}
1232  
1233  	/* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */
1234  	if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) {
1235  		drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n");
1236  		return -EINVAL;
1237  	}
1238  
1239  	/*
1240  	 * Only support VM_READ, enforce MAP_PRIVATE by checking for
1241  	 * VM_MAYSHARE, don't copy the vma on fork
1242  	 */
1243  	if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) {
1244  		drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n");
1245  		return -EINVAL;
1246  	}
1247  	vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY,
1248  		     VM_MAYWRITE | VM_MAYEXEC);
1249  
1250  	xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma));
1251  	for (i = 0; i < bo->ttm.ttm->num_pages; i++) {
1252  		ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]),
1253  				      PAGE_SIZE, vma->vm_page_prot);
1254  		if (ret)
1255  			break;
1256  
1257  		start += PAGE_SIZE;
1258  	}
1259  
1260  	return ret;
1261  }
1262  
1263  static const struct file_operations xe_oa_fops = {
1264  	.owner		= THIS_MODULE,
1265  	.release	= xe_oa_release,
1266  	.poll		= xe_oa_poll,
1267  	.read		= xe_oa_read,
1268  	.unlocked_ioctl	= xe_oa_ioctl,
1269  	.mmap		= xe_oa_mmap,
1270  };
1271  
engine_supports_mi_query(struct xe_hw_engine * hwe)1272  static bool engine_supports_mi_query(struct xe_hw_engine *hwe)
1273  {
1274  	return hwe->class == XE_ENGINE_CLASS_RENDER ||
1275  		hwe->class == XE_ENGINE_CLASS_COMPUTE;
1276  }
1277  
xe_oa_find_reg_in_lri(u32 * state,u32 reg,u32 * offset,u32 end)1278  static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
1279  {
1280  	u32 idx = *offset;
1281  	u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
1282  	bool found = false;
1283  
1284  	idx++;
1285  	for (; idx < len; idx += 2) {
1286  		if (state[idx] == reg) {
1287  			found = true;
1288  			break;
1289  		}
1290  	}
1291  
1292  	*offset = idx;
1293  	return found;
1294  }
1295  
1296  #define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \
1297  			  REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM))
1298  
xe_oa_context_image_offset(struct xe_oa_stream * stream,u32 reg)1299  static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg)
1300  {
1301  	struct xe_lrc *lrc = stream->exec_q->lrc[0];
1302  	u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) +
1303  		   lrc->ring.size) / sizeof(u32);
1304  	u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
1305  	u32 *state = (u32 *)lrc->bo->vmap.vaddr;
1306  
1307  	if (drm_WARN_ON(&stream->oa->xe->drm, !state))
1308  		return U32_MAX;
1309  
1310  	for (; offset < len; ) {
1311  		if (IS_MI_LRI_CMD(state[offset])) {
1312  			/*
1313  			 * We expect reg-value pairs in MI_LRI command, so
1314  			 * MI_LRI_LEN() should be even
1315  			 */
1316  			drm_WARN_ON(&stream->oa->xe->drm,
1317  				    MI_LRI_LEN(state[offset]) & 0x1);
1318  
1319  			if (xe_oa_find_reg_in_lri(state, reg, &offset, len))
1320  				break;
1321  		} else {
1322  			offset++;
1323  		}
1324  	}
1325  
1326  	return offset < len ? offset : U32_MAX;
1327  }
1328  
xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream * stream)1329  static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream)
1330  {
1331  	struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base);
1332  	u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class];
1333  
1334  	/* Do this only once. Failure is stored as offset of U32_MAX */
1335  	if (offset)
1336  		goto exit;
1337  
1338  	offset = xe_oa_context_image_offset(stream, reg.addr);
1339  	stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset;
1340  
1341  	drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n",
1342  		stream->hwe->name, offset);
1343  exit:
1344  	return offset && offset != U32_MAX ? 0 : -ENODEV;
1345  }
1346  
xe_oa_stream_init(struct xe_oa_stream * stream,struct xe_oa_open_param * param)1347  static int xe_oa_stream_init(struct xe_oa_stream *stream,
1348  			     struct xe_oa_open_param *param)
1349  {
1350  	struct xe_oa_unit *u = param->hwe->oa_unit;
1351  	struct xe_gt *gt = param->hwe->gt;
1352  	int ret;
1353  
1354  	stream->exec_q = param->exec_q;
1355  	stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS;
1356  	stream->hwe = param->hwe;
1357  	stream->gt = stream->hwe->gt;
1358  	stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format];
1359  
1360  	stream->sample = param->sample;
1361  	stream->periodic = param->period_exponent > 0;
1362  	stream->period_exponent = param->period_exponent;
1363  	stream->no_preempt = param->no_preempt;
1364  
1365  	/*
1366  	 * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
1367  	 * of buffer, making the OA buffer effectively a non-power-of-2 size circular
1368  	 * buffer whose size, circ_size, is a multiple of the report size
1369  	 */
1370  	if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
1371  	    stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
1372  		stream->oa_buffer.circ_size =
1373  			XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
1374  	else
1375  		stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
1376  
1377  	if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
1378  		/* If we don't find the context offset, just return error */
1379  		ret = xe_oa_set_ctx_ctrl_offset(stream);
1380  		if (ret) {
1381  			drm_err(&stream->oa->xe->drm,
1382  				"xe_oa_set_ctx_ctrl_offset failed for %s\n",
1383  				stream->hwe->name);
1384  			goto exit;
1385  		}
1386  	}
1387  
1388  	stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set);
1389  	if (!stream->oa_config) {
1390  		drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set);
1391  		ret = -EINVAL;
1392  		goto exit;
1393  	}
1394  
1395  	/*
1396  	 * Wa_1509372804:pvc
1397  	 *
1398  	 * GuC reset of engines causes OA to lose configuration
1399  	 * state. Prevent this by overriding GUCRC mode.
1400  	 */
1401  	if (stream->oa->xe->info.platform == XE_PVC) {
1402  		ret = xe_guc_pc_override_gucrc_mode(&gt->uc.guc.pc,
1403  						    SLPC_GUCRC_MODE_GUCRC_NO_RC6);
1404  		if (ret)
1405  			goto err_free_configs;
1406  
1407  		stream->override_gucrc = true;
1408  	}
1409  
1410  	/* Take runtime pm ref and forcewake to disable RC6 */
1411  	xe_pm_runtime_get(stream->oa->xe);
1412  	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
1413  
1414  	ret = xe_oa_alloc_oa_buffer(stream);
1415  	if (ret)
1416  		goto err_fw_put;
1417  
1418  	stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL,
1419  						BIT(stream->hwe->logical_instance), 1,
1420  						stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
1421  	if (IS_ERR(stream->k_exec_q)) {
1422  		ret = PTR_ERR(stream->k_exec_q);
1423  		drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d",
1424  			stream->gt->info.id, stream->hwe->name, ret);
1425  		goto err_free_oa_buf;
1426  	}
1427  
1428  	ret = xe_oa_enable_metric_set(stream);
1429  	if (ret) {
1430  		drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n");
1431  		goto err_put_k_exec_q;
1432  	}
1433  
1434  	drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n",
1435  		stream->oa_config->uuid);
1436  
1437  	WRITE_ONCE(u->exclusive_stream, stream);
1438  
1439  	hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1440  	stream->poll_check_timer.function = xe_oa_poll_check_timer_cb;
1441  	init_waitqueue_head(&stream->poll_wq);
1442  
1443  	spin_lock_init(&stream->oa_buffer.ptr_lock);
1444  	mutex_init(&stream->stream_lock);
1445  
1446  	return 0;
1447  
1448  err_put_k_exec_q:
1449  	xe_oa_disable_metric_set(stream);
1450  	xe_exec_queue_put(stream->k_exec_q);
1451  err_free_oa_buf:
1452  	xe_oa_free_oa_buffer(stream);
1453  err_fw_put:
1454  	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
1455  	xe_pm_runtime_put(stream->oa->xe);
1456  	if (stream->override_gucrc)
1457  		xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
1458  err_free_configs:
1459  	xe_oa_free_configs(stream);
1460  exit:
1461  	return ret;
1462  }
1463  
xe_oa_stream_open_ioctl_locked(struct xe_oa * oa,struct xe_oa_open_param * param)1464  static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
1465  					  struct xe_oa_open_param *param)
1466  {
1467  	struct xe_oa_stream *stream;
1468  	int stream_fd;
1469  	int ret;
1470  
1471  	/* We currently only allow exclusive access */
1472  	if (param->hwe->oa_unit->exclusive_stream) {
1473  		drm_dbg(&oa->xe->drm, "OA unit already in use\n");
1474  		ret = -EBUSY;
1475  		goto exit;
1476  	}
1477  
1478  	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
1479  	if (!stream) {
1480  		ret = -ENOMEM;
1481  		goto exit;
1482  	}
1483  
1484  	stream->oa = oa;
1485  	ret = xe_oa_stream_init(stream, param);
1486  	if (ret)
1487  		goto err_free;
1488  
1489  	if (!param->disabled) {
1490  		ret = xe_oa_enable_locked(stream);
1491  		if (ret)
1492  			goto err_destroy;
1493  	}
1494  
1495  	stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0);
1496  	if (stream_fd < 0) {
1497  		ret = stream_fd;
1498  		goto err_disable;
1499  	}
1500  
1501  	/* Hold a reference on the drm device till stream_fd is released */
1502  	drm_dev_get(&stream->oa->xe->drm);
1503  
1504  	return stream_fd;
1505  err_disable:
1506  	if (!param->disabled)
1507  		xe_oa_disable_locked(stream);
1508  err_destroy:
1509  	xe_oa_stream_destroy(stream);
1510  err_free:
1511  	kfree(stream);
1512  exit:
1513  	return ret;
1514  }
1515  
1516  /**
1517   * xe_oa_timestamp_frequency - Return OA timestamp frequency
1518   * @gt: @xe_gt
1519   *
1520   * OA timestamp frequency = CS timestamp frequency in most platforms. On some
1521   * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such
1522   * cases, return the adjusted CS timestamp frequency to the user.
1523   */
xe_oa_timestamp_frequency(struct xe_gt * gt)1524  u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
1525  {
1526  	u32 reg, shift;
1527  
1528  	/*
1529  	 * Wa_18013179988:dg2
1530  	 * Wa_14015568240:pvc
1531  	 * Wa_14015846243:mtl
1532  	 */
1533  	switch (gt_to_xe(gt)->info.platform) {
1534  	case XE_DG2:
1535  	case XE_PVC:
1536  	case XE_METEORLAKE:
1537  		xe_pm_runtime_get(gt_to_xe(gt));
1538  		reg = xe_mmio_read32(gt, RPM_CONFIG0);
1539  		xe_pm_runtime_put(gt_to_xe(gt));
1540  
1541  		shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
1542  		return gt->info.reference_clock << (3 - shift);
1543  
1544  	default:
1545  		return gt->info.reference_clock;
1546  	}
1547  }
1548  
oa_exponent_to_ns(struct xe_gt * gt,int exponent)1549  static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent)
1550  {
1551  	u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
1552  	u32 den = xe_oa_timestamp_frequency(gt);
1553  
1554  	return div_u64(nom + den - 1, den);
1555  }
1556  
engine_supports_oa_format(const struct xe_hw_engine * hwe,int type)1557  static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type)
1558  {
1559  	switch (hwe->oa_unit->type) {
1560  	case DRM_XE_OA_UNIT_TYPE_OAG:
1561  		return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR ||
1562  			type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC;
1563  	case DRM_XE_OA_UNIT_TYPE_OAM:
1564  		return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC;
1565  	default:
1566  		return false;
1567  	}
1568  }
1569  
decode_oa_format(struct xe_oa * oa,u64 fmt,enum xe_oa_format_name * name)1570  static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
1571  {
1572  	u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
1573  	u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
1574  	u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
1575  	u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
1576  	int idx;
1577  
1578  	for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
1579  		const struct xe_oa_format *f = &oa->oa_formats[idx];
1580  
1581  		if (counter_size == f->counter_size && bc_report == f->bc_report &&
1582  		    type == f->type && counter_sel == f->counter_select) {
1583  			*name = idx;
1584  			return 0;
1585  		}
1586  	}
1587  
1588  	return -EINVAL;
1589  }
1590  
1591  /**
1592   * xe_oa_unit_id - Return OA unit ID for a hardware engine
1593   * @hwe: @xe_hw_engine
1594   *
1595   * Return OA unit ID for a hardware engine when available
1596   */
xe_oa_unit_id(struct xe_hw_engine * hwe)1597  u16 xe_oa_unit_id(struct xe_hw_engine *hwe)
1598  {
1599  	return hwe->oa_unit && hwe->oa_unit->num_engines ?
1600  		hwe->oa_unit->oa_unit_id : U16_MAX;
1601  }
1602  
xe_oa_assign_hwe(struct xe_oa * oa,struct xe_oa_open_param * param)1603  static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param)
1604  {
1605  	struct xe_gt *gt;
1606  	int i, ret = 0;
1607  
1608  	if (param->exec_q) {
1609  		/* When we have an exec_q, get hwe from the exec_q */
1610  		param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class,
1611  					     param->engine_instance, true);
1612  	} else {
1613  		struct xe_hw_engine *hwe;
1614  		enum xe_hw_engine_id id;
1615  
1616  		/* Else just get the first hwe attached to the oa unit */
1617  		for_each_gt(gt, oa->xe, i) {
1618  			for_each_hw_engine(hwe, gt, id) {
1619  				if (xe_oa_unit_id(hwe) == param->oa_unit_id) {
1620  					param->hwe = hwe;
1621  					goto out;
1622  				}
1623  			}
1624  		}
1625  	}
1626  out:
1627  	if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) {
1628  		drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n",
1629  			param->exec_q ? param->exec_q->class : -1,
1630  			param->engine_instance, param->oa_unit_id);
1631  		ret = -EINVAL;
1632  	}
1633  
1634  	return ret;
1635  }
1636  
xe_oa_set_prop_oa_unit_id(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1637  static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
1638  				     struct xe_oa_open_param *param)
1639  {
1640  	if (value >= oa->oa_unit_ids) {
1641  		drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
1642  		return -EINVAL;
1643  	}
1644  	param->oa_unit_id = value;
1645  	return 0;
1646  }
1647  
xe_oa_set_prop_sample_oa(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1648  static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
1649  				    struct xe_oa_open_param *param)
1650  {
1651  	param->sample = value;
1652  	return 0;
1653  }
1654  
xe_oa_set_prop_metric_set(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1655  static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
1656  				     struct xe_oa_open_param *param)
1657  {
1658  	param->metric_set = value;
1659  	return 0;
1660  }
1661  
xe_oa_set_prop_oa_format(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1662  static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
1663  				    struct xe_oa_open_param *param)
1664  {
1665  	int ret = decode_oa_format(oa, value, &param->oa_format);
1666  
1667  	if (ret) {
1668  		drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
1669  		return ret;
1670  	}
1671  	return 0;
1672  }
1673  
xe_oa_set_prop_oa_exponent(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1674  static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
1675  				      struct xe_oa_open_param *param)
1676  {
1677  #define OA_EXPONENT_MAX 31
1678  
1679  	if (value > OA_EXPONENT_MAX) {
1680  		drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
1681  		return -EINVAL;
1682  	}
1683  	param->period_exponent = value;
1684  	return 0;
1685  }
1686  
xe_oa_set_prop_disabled(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1687  static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
1688  				   struct xe_oa_open_param *param)
1689  {
1690  	param->disabled = value;
1691  	return 0;
1692  }
1693  
xe_oa_set_prop_exec_queue_id(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1694  static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
1695  					struct xe_oa_open_param *param)
1696  {
1697  	param->exec_queue_id = value;
1698  	return 0;
1699  }
1700  
xe_oa_set_prop_engine_instance(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1701  static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
1702  					  struct xe_oa_open_param *param)
1703  {
1704  	param->engine_instance = value;
1705  	return 0;
1706  }
1707  
xe_oa_set_no_preempt(struct xe_oa * oa,u64 value,struct xe_oa_open_param * param)1708  static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
1709  				struct xe_oa_open_param *param)
1710  {
1711  	param->no_preempt = value;
1712  	return 0;
1713  }
1714  
1715  typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
1716  				     struct xe_oa_open_param *param);
1717  static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
1718  	[DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
1719  	[DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
1720  	[DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
1721  	[DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
1722  	[DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
1723  	[DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
1724  	[DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
1725  	[DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
1726  	[DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
1727  };
1728  
xe_oa_user_ext_set_property(struct xe_oa * oa,u64 extension,struct xe_oa_open_param * param)1729  static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension,
1730  				       struct xe_oa_open_param *param)
1731  {
1732  	u64 __user *address = u64_to_user_ptr(extension);
1733  	struct drm_xe_ext_set_property ext;
1734  	int err;
1735  	u32 idx;
1736  
1737  	err = __copy_from_user(&ext, address, sizeof(ext));
1738  	if (XE_IOCTL_DBG(oa->xe, err))
1739  		return -EFAULT;
1740  
1741  	if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) ||
1742  	    XE_IOCTL_DBG(oa->xe, ext.pad))
1743  		return -EINVAL;
1744  
1745  	idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs));
1746  	return xe_oa_set_property_funcs[idx](oa, ext.value, param);
1747  }
1748  
1749  typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension,
1750  				       struct xe_oa_open_param *param);
1751  static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
1752  	[DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
1753  };
1754  
1755  #define MAX_USER_EXTENSIONS	16
xe_oa_user_extensions(struct xe_oa * oa,u64 extension,int ext_number,struct xe_oa_open_param * param)1756  static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number,
1757  				 struct xe_oa_open_param *param)
1758  {
1759  	u64 __user *address = u64_to_user_ptr(extension);
1760  	struct drm_xe_user_extension ext;
1761  	int err;
1762  	u32 idx;
1763  
1764  	if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
1765  		return -E2BIG;
1766  
1767  	err = __copy_from_user(&ext, address, sizeof(ext));
1768  	if (XE_IOCTL_DBG(oa->xe, err))
1769  		return -EFAULT;
1770  
1771  	if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
1772  	    XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
1773  		return -EINVAL;
1774  
1775  	idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
1776  	err = xe_oa_user_extension_funcs[idx](oa, extension, param);
1777  	if (XE_IOCTL_DBG(oa->xe, err))
1778  		return err;
1779  
1780  	if (ext.next_extension)
1781  		return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param);
1782  
1783  	return 0;
1784  }
1785  
1786  /**
1787   * xe_oa_stream_open_ioctl - Opens an OA stream
1788   * @dev: @drm_device
1789   * @data: pointer to struct @drm_xe_oa_config
1790   * @file: @drm_file
1791   *
1792   * The functions opens an OA stream. An OA stream, opened with specified
1793   * properties, enables OA counter samples to be collected, either
1794   * periodically (time based sampling), or on request (using OA queries)
1795   */
xe_oa_stream_open_ioctl(struct drm_device * dev,u64 data,struct drm_file * file)1796  int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file)
1797  {
1798  	struct xe_device *xe = to_xe_device(dev);
1799  	struct xe_oa *oa = &xe->oa;
1800  	struct xe_file *xef = to_xe_file(file);
1801  	struct xe_oa_open_param param = {};
1802  	const struct xe_oa_format *f;
1803  	bool privileged_op = true;
1804  	int ret;
1805  
1806  	if (!oa->xe) {
1807  		drm_dbg(&xe->drm, "xe oa interface not available for this system\n");
1808  		return -ENODEV;
1809  	}
1810  
1811  	ret = xe_oa_user_extensions(oa, data, 0, &param);
1812  	if (ret)
1813  		return ret;
1814  
1815  	if (param.exec_queue_id > 0) {
1816  		param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id);
1817  		if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
1818  			return -ENOENT;
1819  
1820  		if (param.exec_q->width > 1)
1821  			drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n");
1822  	}
1823  
1824  	/*
1825  	 * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC,
1826  	 * without global stream access, can be an unprivileged operation
1827  	 */
1828  	if (param.exec_q && !param.sample)
1829  		privileged_op = false;
1830  
1831  	if (param.no_preempt) {
1832  		if (!param.exec_q) {
1833  			drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n");
1834  			ret = -EINVAL;
1835  			goto err_exec_q;
1836  		}
1837  		privileged_op = true;
1838  	}
1839  
1840  	if (privileged_op && xe_observation_paranoid && !perfmon_capable()) {
1841  		drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n");
1842  		ret = -EACCES;
1843  		goto err_exec_q;
1844  	}
1845  
1846  	if (!param.exec_q && !param.sample) {
1847  		drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n");
1848  		ret = -EINVAL;
1849  		goto err_exec_q;
1850  	}
1851  
1852  	ret = xe_oa_assign_hwe(oa, &param);
1853  	if (ret)
1854  		goto err_exec_q;
1855  
1856  	f = &oa->oa_formats[param.oa_format];
1857  	if (!param.oa_format || !f->size ||
1858  	    !engine_supports_oa_format(param.hwe, f->type)) {
1859  		drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n",
1860  			param.oa_format, f->type, f->size, param.hwe->class);
1861  		ret = -EINVAL;
1862  		goto err_exec_q;
1863  	}
1864  
1865  	if (param.period_exponent > 0) {
1866  		u64 oa_period, oa_freq_hz;
1867  
1868  		/* Requesting samples from OAG buffer is a privileged operation */
1869  		if (!param.sample) {
1870  			drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n");
1871  			ret = -EINVAL;
1872  			goto err_exec_q;
1873  		}
1874  		oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent);
1875  		oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period);
1876  		drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
1877  	}
1878  
1879  	mutex_lock(&param.hwe->gt->oa.gt_lock);
1880  	ret = xe_oa_stream_open_ioctl_locked(oa, &param);
1881  	mutex_unlock(&param.hwe->gt->oa.gt_lock);
1882  err_exec_q:
1883  	if (ret < 0 && param.exec_q)
1884  		xe_exec_queue_put(param.exec_q);
1885  	return ret;
1886  }
1887  
xe_oa_is_valid_flex_addr(struct xe_oa * oa,u32 addr)1888  static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr)
1889  {
1890  	static const struct xe_reg flex_eu_regs[] = {
1891  		EU_PERF_CNTL0,
1892  		EU_PERF_CNTL1,
1893  		EU_PERF_CNTL2,
1894  		EU_PERF_CNTL3,
1895  		EU_PERF_CNTL4,
1896  		EU_PERF_CNTL5,
1897  		EU_PERF_CNTL6,
1898  	};
1899  	int i;
1900  
1901  	for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
1902  		if (flex_eu_regs[i].addr == addr)
1903  			return true;
1904  	}
1905  	return false;
1906  }
1907  
xe_oa_reg_in_range_table(u32 addr,const struct xe_mmio_range * table)1908  static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table)
1909  {
1910  	while (table->start && table->end) {
1911  		if (addr >= table->start && addr <= table->end)
1912  			return true;
1913  
1914  		table++;
1915  	}
1916  
1917  	return false;
1918  }
1919  
1920  static const struct xe_mmio_range xehp_oa_b_counters[] = {
1921  	{ .start = 0xdc48, .end = 0xdc48 },	/* OAA_ENABLE_REG */
1922  	{ .start = 0xdd00, .end = 0xdd48 },	/* OAG_LCE0_0 - OAA_LENABLE_REG */
1923  	{}
1924  };
1925  
1926  static const struct xe_mmio_range gen12_oa_b_counters[] = {
1927  	{ .start = 0x2b2c, .end = 0x2b2c },	/* OAG_OA_PESS */
1928  	{ .start = 0xd900, .end = 0xd91c },	/* OAG_OASTARTTRIG[1-8] */
1929  	{ .start = 0xd920, .end = 0xd93c },	/* OAG_OAREPORTTRIG1[1-8] */
1930  	{ .start = 0xd940, .end = 0xd97c },	/* OAG_CEC[0-7][0-1] */
1931  	{ .start = 0xdc00, .end = 0xdc3c },	/* OAG_SCEC[0-7][0-1] */
1932  	{ .start = 0xdc40, .end = 0xdc40 },	/* OAG_SPCTR_CNF */
1933  	{ .start = 0xdc44, .end = 0xdc44 },	/* OAA_DBG_REG */
1934  	{}
1935  };
1936  
1937  static const struct xe_mmio_range mtl_oam_b_counters[] = {
1938  	{ .start = 0x393000, .end = 0x39301c },	/* OAM_STARTTRIG1[1-8] */
1939  	{ .start = 0x393020, .end = 0x39303c },	/* OAM_REPORTTRIG1[1-8] */
1940  	{ .start = 0x393040, .end = 0x39307c },	/* OAM_CEC[0-7][0-1] */
1941  	{ .start = 0x393200, .end = 0x39323C },	/* MPES[0-7] */
1942  	{}
1943  };
1944  
1945  static const struct xe_mmio_range xe2_oa_b_counters[] = {
1946  	{ .start = 0x393200, .end = 0x39323C },	/* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */
1947  	{ .start = 0x394200, .end = 0x39423C },	/* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */
1948  	{ .start = 0x394A00, .end = 0x394A3C },	/* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */
1949  	{},
1950  };
1951  
xe_oa_is_valid_b_counter_addr(struct xe_oa * oa,u32 addr)1952  static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr)
1953  {
1954  	return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) ||
1955  		xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) ||
1956  		xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) ||
1957  		(GRAPHICS_VER(oa->xe) >= 20 &&
1958  		 xe_oa_reg_in_range_table(addr, xe2_oa_b_counters));
1959  }
1960  
1961  static const struct xe_mmio_range mtl_oa_mux_regs[] = {
1962  	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
1963  	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
1964  	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
1965  	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
1966  	{ .start = 0x38d100, .end = 0x38d114},	/* VISACTL */
1967  	{}
1968  };
1969  
1970  static const struct xe_mmio_range gen12_oa_mux_regs[] = {
1971  	{ .start = 0x0d00, .end = 0x0d04 },     /* RPM_CONFIG[0-1] */
1972  	{ .start = 0x0d0c, .end = 0x0d2c },     /* NOA_CONFIG[0-8] */
1973  	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
1974  	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
1975  	{ .start = 0x20cc, .end = 0x20cc },	/* WAIT_FOR_RC6_EXIT */
1976  	{}
1977  };
1978  
1979  static const struct xe_mmio_range xe2_oa_mux_regs[] = {
1980  	{ .start = 0x5194, .end = 0x5194 },	/* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */
1981  	{ .start = 0x8704, .end = 0x8704 },	/* LMEM_LAT_MEASURE_MCFG_GRP */
1982  	{ .start = 0xB1BC, .end = 0xB1BC },	/* L3_BANK_LAT_MEASURE_LBCF_GFX */
1983  	{ .start = 0xE18C, .end = 0xE18C },	/* SAMPLER_MODE */
1984  	{ .start = 0xE590, .end = 0xE590 },	/* TDL_LSC_LAT_MEASURE_TDL_GFX */
1985  	{ .start = 0x13000, .end = 0x137FC },	/* PES_0_PESL0 - PES_63_UPPER_PESL3 */
1986  	{},
1987  };
1988  
xe_oa_is_valid_mux_addr(struct xe_oa * oa,u32 addr)1989  static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr)
1990  {
1991  	if (GRAPHICS_VER(oa->xe) >= 20)
1992  		return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs);
1993  	else if (GRAPHICS_VERx100(oa->xe) >= 1270)
1994  		return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs);
1995  	else
1996  		return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs);
1997  }
1998  
xe_oa_is_valid_config_reg_addr(struct xe_oa * oa,u32 addr)1999  static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr)
2000  {
2001  	return xe_oa_is_valid_flex_addr(oa, addr) ||
2002  		xe_oa_is_valid_b_counter_addr(oa, addr) ||
2003  		xe_oa_is_valid_mux_addr(oa, addr);
2004  }
2005  
2006  static struct xe_oa_reg *
xe_oa_alloc_regs(struct xe_oa * oa,bool (* is_valid)(struct xe_oa * oa,u32 addr),u32 __user * regs,u32 n_regs)2007  xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr),
2008  		 u32 __user *regs, u32 n_regs)
2009  {
2010  	struct xe_oa_reg *oa_regs;
2011  	int err;
2012  	u32 i;
2013  
2014  	oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
2015  	if (!oa_regs)
2016  		return ERR_PTR(-ENOMEM);
2017  
2018  	for (i = 0; i < n_regs; i++) {
2019  		u32 addr, value;
2020  
2021  		err = get_user(addr, regs);
2022  		if (err)
2023  			goto addr_err;
2024  
2025  		if (!is_valid(oa, addr)) {
2026  			drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr);
2027  			err = -EINVAL;
2028  			goto addr_err;
2029  		}
2030  
2031  		err = get_user(value, regs + 1);
2032  		if (err)
2033  			goto addr_err;
2034  
2035  		oa_regs[i].addr = XE_REG(addr);
2036  		oa_regs[i].value = value;
2037  
2038  		regs += 2;
2039  	}
2040  
2041  	return oa_regs;
2042  
2043  addr_err:
2044  	kfree(oa_regs);
2045  	return ERR_PTR(err);
2046  }
2047  
show_dynamic_id(struct kobject * kobj,struct kobj_attribute * attr,char * buf)2048  static ssize_t show_dynamic_id(struct kobject *kobj,
2049  			       struct kobj_attribute *attr,
2050  			       char *buf)
2051  {
2052  	struct xe_oa_config *oa_config =
2053  		container_of(attr, typeof(*oa_config), sysfs_metric_id);
2054  
2055  	return sysfs_emit(buf, "%d\n", oa_config->id);
2056  }
2057  
create_dynamic_oa_sysfs_entry(struct xe_oa * oa,struct xe_oa_config * oa_config)2058  static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa,
2059  					 struct xe_oa_config *oa_config)
2060  {
2061  	sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
2062  	oa_config->sysfs_metric_id.attr.name = "id";
2063  	oa_config->sysfs_metric_id.attr.mode = 0444;
2064  	oa_config->sysfs_metric_id.show = show_dynamic_id;
2065  	oa_config->sysfs_metric_id.store = NULL;
2066  
2067  	oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
2068  	oa_config->attrs[1] = NULL;
2069  
2070  	oa_config->sysfs_metric.name = oa_config->uuid;
2071  	oa_config->sysfs_metric.attrs = oa_config->attrs;
2072  
2073  	return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric);
2074  }
2075  
2076  /**
2077   * xe_oa_add_config_ioctl - Adds one OA config
2078   * @dev: @drm_device
2079   * @data: pointer to struct @drm_xe_oa_config
2080   * @file: @drm_file
2081   *
2082   * The functions adds an OA config to the set of OA configs maintained in
2083   * the kernel. The config determines which OA metrics are collected for an
2084   * OA stream.
2085   */
xe_oa_add_config_ioctl(struct drm_device * dev,u64 data,struct drm_file * file)2086  int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file)
2087  {
2088  	struct xe_device *xe = to_xe_device(dev);
2089  	struct xe_oa *oa = &xe->oa;
2090  	struct drm_xe_oa_config param;
2091  	struct drm_xe_oa_config *arg = &param;
2092  	struct xe_oa_config *oa_config, *tmp;
2093  	struct xe_oa_reg *regs;
2094  	int err, id;
2095  
2096  	if (!oa->xe) {
2097  		drm_dbg(&xe->drm, "xe oa interface not available for this system\n");
2098  		return -ENODEV;
2099  	}
2100  
2101  	if (xe_observation_paranoid && !perfmon_capable()) {
2102  		drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n");
2103  		return -EACCES;
2104  	}
2105  
2106  	err = __copy_from_user(&param, u64_to_user_ptr(data), sizeof(param));
2107  	if (XE_IOCTL_DBG(oa->xe, err))
2108  		return -EFAULT;
2109  
2110  	if (XE_IOCTL_DBG(oa->xe, arg->extensions) ||
2111  	    XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) ||
2112  	    XE_IOCTL_DBG(oa->xe, !arg->n_regs))
2113  		return -EINVAL;
2114  
2115  	oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
2116  	if (!oa_config)
2117  		return -ENOMEM;
2118  
2119  	oa_config->oa = oa;
2120  	kref_init(&oa_config->ref);
2121  
2122  	if (!uuid_is_valid(arg->uuid)) {
2123  		drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n");
2124  		err = -EINVAL;
2125  		goto reg_err;
2126  	}
2127  
2128  	/* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */
2129  	memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid));
2130  
2131  	oa_config->regs_len = arg->n_regs;
2132  	regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr,
2133  				u64_to_user_ptr(arg->regs_ptr),
2134  				arg->n_regs);
2135  	if (IS_ERR(regs)) {
2136  		drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n");
2137  		err = PTR_ERR(regs);
2138  		goto reg_err;
2139  	}
2140  	oa_config->regs = regs;
2141  
2142  	err = mutex_lock_interruptible(&oa->metrics_lock);
2143  	if (err)
2144  		goto reg_err;
2145  
2146  	/* We shouldn't have too many configs, so this iteration shouldn't be too costly */
2147  	idr_for_each_entry(&oa->metrics_idr, tmp, id) {
2148  		if (!strcmp(tmp->uuid, oa_config->uuid)) {
2149  			drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n");
2150  			err = -EADDRINUSE;
2151  			goto sysfs_err;
2152  		}
2153  	}
2154  
2155  	err = create_dynamic_oa_sysfs_entry(oa, oa_config);
2156  	if (err) {
2157  		drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n");
2158  		goto sysfs_err;
2159  	}
2160  
2161  	oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL);
2162  	if (oa_config->id < 0) {
2163  		drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n");
2164  		err = oa_config->id;
2165  		goto sysfs_err;
2166  	}
2167  
2168  	mutex_unlock(&oa->metrics_lock);
2169  
2170  	drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id);
2171  
2172  	return oa_config->id;
2173  
2174  sysfs_err:
2175  	mutex_unlock(&oa->metrics_lock);
2176  reg_err:
2177  	xe_oa_config_put(oa_config);
2178  	drm_dbg(&oa->xe->drm, "Failed to add new OA config\n");
2179  	return err;
2180  }
2181  
2182  /**
2183   * xe_oa_remove_config_ioctl - Removes one OA config
2184   * @dev: @drm_device
2185   * @data: pointer to struct @drm_xe_observation_param
2186   * @file: @drm_file
2187   */
xe_oa_remove_config_ioctl(struct drm_device * dev,u64 data,struct drm_file * file)2188  int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file)
2189  {
2190  	struct xe_device *xe = to_xe_device(dev);
2191  	struct xe_oa *oa = &xe->oa;
2192  	struct xe_oa_config *oa_config;
2193  	u64 arg, *ptr = u64_to_user_ptr(data);
2194  	int ret;
2195  
2196  	if (!oa->xe) {
2197  		drm_dbg(&xe->drm, "xe oa interface not available for this system\n");
2198  		return -ENODEV;
2199  	}
2200  
2201  	if (xe_observation_paranoid && !perfmon_capable()) {
2202  		drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n");
2203  		return -EACCES;
2204  	}
2205  
2206  	ret = get_user(arg, ptr);
2207  	if (XE_IOCTL_DBG(oa->xe, ret))
2208  		return ret;
2209  
2210  	ret = mutex_lock_interruptible(&oa->metrics_lock);
2211  	if (ret)
2212  		return ret;
2213  
2214  	oa_config = idr_find(&oa->metrics_idr, arg);
2215  	if (!oa_config) {
2216  		drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n");
2217  		ret = -ENOENT;
2218  		goto err_unlock;
2219  	}
2220  
2221  	WARN_ON(arg != oa_config->id);
2222  
2223  	sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric);
2224  	idr_remove(&oa->metrics_idr, arg);
2225  
2226  	mutex_unlock(&oa->metrics_lock);
2227  
2228  	drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
2229  
2230  	xe_oa_config_put(oa_config);
2231  
2232  	return 0;
2233  
2234  err_unlock:
2235  	mutex_unlock(&oa->metrics_lock);
2236  	return ret;
2237  }
2238  
2239  /**
2240   * xe_oa_register - Xe OA registration
2241   * @xe: @xe_device
2242   *
2243   * Exposes the metrics sysfs directory upon completion of module initialization
2244   */
xe_oa_register(struct xe_device * xe)2245  void xe_oa_register(struct xe_device *xe)
2246  {
2247  	struct xe_oa *oa = &xe->oa;
2248  
2249  	if (!oa->xe)
2250  		return;
2251  
2252  	oa->metrics_kobj = kobject_create_and_add("metrics",
2253  						  &xe->drm.primary->kdev->kobj);
2254  }
2255  
2256  /**
2257   * xe_oa_unregister - Xe OA de-registration
2258   * @xe: @xe_device
2259   */
xe_oa_unregister(struct xe_device * xe)2260  void xe_oa_unregister(struct xe_device *xe)
2261  {
2262  	struct xe_oa *oa = &xe->oa;
2263  
2264  	if (!oa->metrics_kobj)
2265  		return;
2266  
2267  	kobject_put(oa->metrics_kobj);
2268  	oa->metrics_kobj = NULL;
2269  }
2270  
num_oa_units_per_gt(struct xe_gt * gt)2271  static u32 num_oa_units_per_gt(struct xe_gt *gt)
2272  {
2273  	return 1;
2274  }
2275  
__hwe_oam_unit(struct xe_hw_engine * hwe)2276  static u32 __hwe_oam_unit(struct xe_hw_engine *hwe)
2277  {
2278  	if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) {
2279  		/*
2280  		 * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
2281  		 * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA
2282  		 */
2283  		xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA);
2284  
2285  		return 0;
2286  	}
2287  
2288  	return XE_OA_UNIT_INVALID;
2289  }
2290  
__hwe_oa_unit(struct xe_hw_engine * hwe)2291  static u32 __hwe_oa_unit(struct xe_hw_engine *hwe)
2292  {
2293  	switch (hwe->class) {
2294  	case XE_ENGINE_CLASS_RENDER:
2295  	case XE_ENGINE_CLASS_COMPUTE:
2296  		return 0;
2297  
2298  	case XE_ENGINE_CLASS_VIDEO_DECODE:
2299  	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
2300  		return __hwe_oam_unit(hwe);
2301  
2302  	default:
2303  		return XE_OA_UNIT_INVALID;
2304  	}
2305  }
2306  
__oam_regs(u32 base)2307  static struct xe_oa_regs __oam_regs(u32 base)
2308  {
2309  	return (struct xe_oa_regs) {
2310  		base,
2311  		OAM_HEAD_POINTER(base),
2312  		OAM_TAIL_POINTER(base),
2313  		OAM_BUFFER(base),
2314  		OAM_CONTEXT_CONTROL(base),
2315  		OAM_CONTROL(base),
2316  		OAM_DEBUG(base),
2317  		OAM_STATUS(base),
2318  		OAM_CONTROL_COUNTER_SEL_MASK,
2319  	};
2320  }
2321  
__oag_regs(void)2322  static struct xe_oa_regs __oag_regs(void)
2323  {
2324  	return (struct xe_oa_regs) {
2325  		0,
2326  		OAG_OAHEADPTR,
2327  		OAG_OATAILPTR,
2328  		OAG_OABUFFER,
2329  		OAG_OAGLBCTXCTRL,
2330  		OAG_OACONTROL,
2331  		OAG_OA_DEBUG,
2332  		OAG_OASTATUS,
2333  		OAG_OACONTROL_OA_COUNTER_SEL_MASK,
2334  	};
2335  }
2336  
__xe_oa_init_oa_units(struct xe_gt * gt)2337  static void __xe_oa_init_oa_units(struct xe_gt *gt)
2338  {
2339  	const u32 mtl_oa_base[] = { 0x13000 };
2340  	int i, num_units = gt->oa.num_oa_units;
2341  
2342  	for (i = 0; i < num_units; i++) {
2343  		struct xe_oa_unit *u = &gt->oa.oa_unit[i];
2344  
2345  		if (gt->info.type != XE_GT_TYPE_MEDIA) {
2346  			u->regs = __oag_regs();
2347  			u->type = DRM_XE_OA_UNIT_TYPE_OAG;
2348  		} else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
2349  			u->regs = __oam_regs(mtl_oa_base[i]);
2350  			u->type = DRM_XE_OA_UNIT_TYPE_OAM;
2351  		}
2352  
2353  		/* Ensure MMIO trigger remains disabled till there is a stream */
2354  		xe_mmio_write32(gt, u->regs.oa_debug,
2355  				oag_configure_mmio_trigger(NULL, false));
2356  
2357  		/* Set oa_unit_ids now to ensure ids remain contiguous */
2358  		u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++;
2359  	}
2360  }
2361  
xe_oa_init_gt(struct xe_gt * gt)2362  static int xe_oa_init_gt(struct xe_gt *gt)
2363  {
2364  	u32 num_oa_units = num_oa_units_per_gt(gt);
2365  	struct xe_hw_engine *hwe;
2366  	enum xe_hw_engine_id id;
2367  	struct xe_oa_unit *u;
2368  
2369  	u = drmm_kcalloc(&gt_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL);
2370  	if (!u)
2371  		return -ENOMEM;
2372  
2373  	for_each_hw_engine(hwe, gt, id) {
2374  		u32 index = __hwe_oa_unit(hwe);
2375  
2376  		hwe->oa_unit = NULL;
2377  		if (index < num_oa_units) {
2378  			u[index].num_engines++;
2379  			hwe->oa_unit = &u[index];
2380  		}
2381  	}
2382  
2383  	/*
2384  	 * Fused off engines can result in oa_unit's with num_engines == 0. These units
2385  	 * will appear in OA unit query, but no OA streams can be opened on them.
2386  	 */
2387  	gt->oa.num_oa_units = num_oa_units;
2388  	gt->oa.oa_unit = u;
2389  
2390  	__xe_oa_init_oa_units(gt);
2391  
2392  	drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->oa.gt_lock);
2393  
2394  	return 0;
2395  }
2396  
xe_oa_init_oa_units(struct xe_oa * oa)2397  static int xe_oa_init_oa_units(struct xe_oa *oa)
2398  {
2399  	struct xe_gt *gt;
2400  	int i, ret;
2401  
2402  	for_each_gt(gt, oa->xe, i) {
2403  		ret = xe_oa_init_gt(gt);
2404  		if (ret)
2405  			return ret;
2406  	}
2407  
2408  	return 0;
2409  }
2410  
oa_format_add(struct xe_oa * oa,enum xe_oa_format_name format)2411  static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format)
2412  {
2413  	__set_bit(format, oa->format_mask);
2414  }
2415  
xe_oa_init_supported_formats(struct xe_oa * oa)2416  static void xe_oa_init_supported_formats(struct xe_oa *oa)
2417  {
2418  	if (GRAPHICS_VER(oa->xe) >= 20) {
2419  		/* Xe2+ */
2420  		oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8);
2421  		oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8);
2422  		oa_format_add(oa, XE_OA_FORMAT_PEC64u64);
2423  		oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8);
2424  		oa_format_add(oa, XE_OA_FORMAT_PEC64u32);
2425  		oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1);
2426  		oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1);
2427  		oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2);
2428  		oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2);
2429  		oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4);
2430  		oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32);
2431  	} else if (GRAPHICS_VERx100(oa->xe) >= 1270) {
2432  		/* XE_METEORLAKE */
2433  		oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8);
2434  		oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8);
2435  		oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8);
2436  		oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8);
2437  		oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8);
2438  		oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8);
2439  	} else if (GRAPHICS_VERx100(oa->xe) >= 1255) {
2440  		/* XE_DG2, XE_PVC */
2441  		oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8);
2442  		oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8);
2443  		oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8);
2444  		oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8);
2445  	} else {
2446  		/* Gen12+ */
2447  		xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12);
2448  		oa_format_add(oa, XE_OA_FORMAT_A12);
2449  		oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8);
2450  		oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8);
2451  		oa_format_add(oa, XE_OA_FORMAT_C4_B8);
2452  	}
2453  }
2454  
2455  /**
2456   * xe_oa_init - OA initialization during device probe
2457   * @xe: @xe_device
2458   *
2459   * Return: 0 on success or a negative error code on failure
2460   */
xe_oa_init(struct xe_device * xe)2461  int xe_oa_init(struct xe_device *xe)
2462  {
2463  	struct xe_oa *oa = &xe->oa;
2464  	int ret;
2465  
2466  	/* Support OA only with GuC submission and Gen12+ */
2467  	if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12)
2468  		return 0;
2469  
2470  	if (IS_SRIOV_VF(xe))
2471  		return 0;
2472  
2473  	oa->xe = xe;
2474  	oa->oa_formats = oa_formats;
2475  
2476  	drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock);
2477  	idr_init_base(&oa->metrics_idr, 1);
2478  
2479  	ret = xe_oa_init_oa_units(oa);
2480  	if (ret) {
2481  		drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret));
2482  		goto exit;
2483  	}
2484  
2485  	xe_oa_init_supported_formats(oa);
2486  	return 0;
2487  exit:
2488  	oa->xe = NULL;
2489  	return ret;
2490  }
2491  
destroy_config(int id,void * p,void * data)2492  static int destroy_config(int id, void *p, void *data)
2493  {
2494  	xe_oa_config_put(p);
2495  	return 0;
2496  }
2497  
2498  /**
2499   * xe_oa_fini - OA de-initialization during device remove
2500   * @xe: @xe_device
2501   */
xe_oa_fini(struct xe_device * xe)2502  void xe_oa_fini(struct xe_device *xe)
2503  {
2504  	struct xe_oa *oa = &xe->oa;
2505  
2506  	if (!oa->xe)
2507  		return;
2508  
2509  	idr_for_each(&oa->metrics_idr, destroy_config, oa);
2510  	idr_destroy(&oa->metrics_idr);
2511  
2512  	oa->xe = NULL;
2513  }
2514