Lines Matching +full:disable +full:- +full:report +full:- +full:mask

2  * Copyright © 2015-2016 Intel Corporation
44 * without special privileges. Access to system-wide metrics requires root
58 * might sample sets of tightly-coupled counters, depending on the
70 * interleaved with event-type specific members.
76 * would be acceptable to expose them to unprivileged applications - to hide
96 * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're
102 * For posterity, in case we might re-visit trying to adapt core perf to be
106 * - The perf based OA PMU driver broke some significant design assumptions:
110 * implications, the need to fake cpu-related data (such as user/kernel
112 * as a way to forward device-specific status records.
117 * Given the way we were periodically forward data from the GPU-mapped, OA
124 * and while we could pull a report out of the OA buffer we can't
125 * trigger a report from the cpu on demand.
127 * Related to being report based; the OA counters are configured in HW as a
130 * opened, there's no clear precedent for being able to provide group-wide
132 * OA unit report format used to capture all counters in a set, or specify a
139 * for combining with the side-band raw reports it captures using
142 * - As a side note on perf's grouping feature; there was also some concern
148 * With the OA unit's report formats, counters are packed together as 32
149 * or 40bit values, with the largest report size being 256 bytes.
158 * one time. The OA unit is not designed to allow re-configuration while in
173 * ring buffer. To include more than the OA report we'd have to copy the
174 * report into an intermediate larger buffer. I'd been considering allowing a
178 * - It felt like our perf based PMU was making some technical compromises
184 * that process - so not appropriate for us. When an event is related to a
188 * majority of the OA driver ran in atomic context, including all OA report
229 #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
240 * by checking for a zeroed report-id field in tail reports, we want to account
246 * report with its first 2 dwords not 0 meaning its previous report is
249 * first dword is the reason for this report while the second is the timestamp,
260 * non-periodic reports (such as on context switch) or the OA unit may be
281 * overflow in OA report timestamps.
314 /* XXX: beware if future OA HW adds new report formats that the current
315 * code assumes all reports have a power-of-two size and ~(size - 1) can
316 * be used as a mask to align the OA tail pointer.
343 * struct perf_open_properties - for validated properties given to open a stream
350 * @oa_format: An OA unit HW report format
402 kfree(oa_config->flex_regs); in i915_oa_config_release()
403 kfree(oa_config->b_counter_regs); in i915_oa_config_release()
404 kfree(oa_config->mux_regs); in i915_oa_config_release()
415 oa_config = idr_find(&perf->metrics_idr, metrics_set); in i915_perf_get_oa_config()
425 i915_oa_config_put(oa_bo->oa_config); in free_oa_config_bo()
426 i915_vma_put(oa_bo->vma); in free_oa_config_bo()
433 return &stream->engine->oa_group->regs; in __oa_regs()
438 struct intel_uncore *uncore = stream->uncore; in gen12_oa_hw_tail_read()
440 return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) & in gen12_oa_hw_tail_read()
446 struct intel_uncore *uncore = stream->uncore; in gen8_oa_hw_tail_read()
453 struct intel_uncore *uncore = stream->uncore; in gen7_oa_hw_tail_read()
460 ((__s)->oa_buffer.format->header == HDR_64_BIT)
462 static u64 oa_report_id(struct i915_perf_stream *stream, void *report) in oa_report_id() argument
464 return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; in oa_report_id()
467 static u64 oa_report_reason(struct i915_perf_stream *stream, void *report) in oa_report_reason() argument
469 return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) & in oa_report_reason()
470 (GRAPHICS_VER(stream->perf->i915) == 12 ? in oa_report_reason()
475 static void oa_report_id_clear(struct i915_perf_stream *stream, u32 *report) in oa_report_id_clear() argument
478 *(u64 *)report = 0; in oa_report_id_clear()
480 *report = 0; in oa_report_id_clear()
483 static bool oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report) in oa_report_ctx_invalid() argument
485 return !(oa_report_id(stream, report) & in oa_report_ctx_invalid()
486 stream->perf->gen8_valid_ctx_bit); in oa_report_ctx_invalid()
489 static u64 oa_timestamp(struct i915_perf_stream *stream, void *report) in oa_timestamp() argument
492 *((u64 *)report + 1) : in oa_timestamp()
493 *((u32 *)report + 1); in oa_timestamp()
496 static void oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report) in oa_timestamp_clear() argument
499 *(u64 *)&report[2] = 0; in oa_timestamp_clear()
501 report[1] = 0; in oa_timestamp_clear()
504 static u32 oa_context_id(struct i915_perf_stream *stream, u32 *report) in oa_context_id() argument
506 u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2]; in oa_context_id()
508 return ctx_id & stream->specific_ctx_id_mask; in oa_context_id()
511 static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report) in oa_context_id_squash() argument
514 report[4] = INVALID_CTX_ID; in oa_context_id_squash()
516 report[2] = INVALID_CTX_ID; in oa_context_id_squash()
520 * oa_buffer_check_unlocked - check for data and update tail ptr state
544 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); in oa_buffer_check_unlocked()
545 int report_size = stream->oa_buffer.format->size; in oa_buffer_check_unlocked()
555 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); in oa_buffer_check_unlocked()
557 hw_tail = stream->perf->ops.oa_hw_tail_read(stream); in oa_buffer_check_unlocked()
558 hw_tail -= gtt_offset; in oa_buffer_check_unlocked()
561 * steps. Also the report size may not be a power of 2. Compute in oa_buffer_check_unlocked()
562 * potentially partially landed report in the OA buffer in oa_buffer_check_unlocked()
564 partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); in oa_buffer_check_unlocked()
572 /* Walk the stream backward until we find a report with report in oa_buffer_check_unlocked()
575 * to 256 bytes long, we can't tell whether a report has fully in oa_buffer_check_unlocked()
576 * landed in memory before the report id and timestamp of the in oa_buffer_check_unlocked()
577 * following report have effectively landed. in oa_buffer_check_unlocked()
583 while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { in oa_buffer_check_unlocked()
584 void *report = stream->oa_buffer.vaddr + tail; in oa_buffer_check_unlocked() local
586 if (oa_report_id(stream, report) || in oa_buffer_check_unlocked()
587 oa_timestamp(stream, report)) in oa_buffer_check_unlocked()
590 tail = (tail - report_size) & (OA_BUFFER_SIZE - 1); in oa_buffer_check_unlocked()
594 __ratelimit(&stream->perf->tail_pointer_race)) in oa_buffer_check_unlocked()
595 drm_notice(&stream->uncore->i915->drm, in oa_buffer_check_unlocked()
596 "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", in oa_buffer_check_unlocked()
597 stream->oa_buffer.head, tail, hw_tail); in oa_buffer_check_unlocked()
599 stream->oa_buffer.tail = tail; in oa_buffer_check_unlocked()
601 pollin = OA_TAKEN(stream->oa_buffer.tail, in oa_buffer_check_unlocked()
602 stream->oa_buffer.head) >= report_size; in oa_buffer_check_unlocked()
604 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); in oa_buffer_check_unlocked()
610 * append_oa_status - Appends a status record to a userspace read() buffer.
611 * @stream: An i915-perf stream opened for OA metrics
615 * @type: The kind of status to report to userspace
632 if ((count - *offset) < header.size) in append_oa_status()
633 return -ENOSPC; in append_oa_status()
636 return -EFAULT; in append_oa_status()
644 * append_oa_sample - Copies single OA report into userspace read() buffer.
645 * @stream: An i915-perf stream opened for OA metrics
649 * @report: A single OA report to (optionally) include as part of the sample
652 * properties when opening a stream, tracked as `stream->sample_flags`. This
664 const u8 *report) in append_oa_sample() argument
666 int report_size = stream->oa_buffer.format->size; in append_oa_sample()
673 header.size = stream->sample_size; in append_oa_sample()
675 if ((count - *offset) < header.size) in append_oa_sample()
676 return -ENOSPC; in append_oa_sample()
680 return -EFAULT; in append_oa_sample()
683 oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE; in append_oa_sample()
684 report_size_partial = oa_buf_end - report; in append_oa_sample()
687 if (copy_to_user(buf, report, report_size_partial)) in append_oa_sample()
688 return -EFAULT; in append_oa_sample()
691 if (copy_to_user(buf, stream->oa_buffer.vaddr, in append_oa_sample()
692 report_size - report_size_partial)) in append_oa_sample()
693 return -EFAULT; in append_oa_sample()
694 } else if (copy_to_user(buf, report, report_size)) { in append_oa_sample()
695 return -EFAULT; in append_oa_sample()
704 * gen8_append_oa_reports - Copies all buffered OA reports into
706 * @stream: An i915-perf stream opened for OA metrics
711 * Notably any error condition resulting in a short read (-%ENOSPC or
712 * -%EFAULT) will be returned even though one or more records may
719 * and back-to-front you're not alone, but this follows the
729 struct intel_uncore *uncore = stream->uncore; in gen8_append_oa_reports()
730 int report_size = stream->oa_buffer.format->size; in gen8_append_oa_reports()
731 u8 *oa_buf_base = stream->oa_buffer.vaddr; in gen8_append_oa_reports()
732 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); in gen8_append_oa_reports()
733 u32 mask = (OA_BUFFER_SIZE - 1); in gen8_append_oa_reports() local
739 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)) in gen8_append_oa_reports()
740 return -EIO; in gen8_append_oa_reports()
742 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); in gen8_append_oa_reports()
744 head = stream->oa_buffer.head; in gen8_append_oa_reports()
745 tail = stream->oa_buffer.tail; in gen8_append_oa_reports()
747 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); in gen8_append_oa_reports()
753 * only be incremented by multiples of the report size. in gen8_append_oa_reports()
755 if (drm_WARN_ONCE(&uncore->i915->drm, in gen8_append_oa_reports()
760 return -EIO; in gen8_append_oa_reports()
765 head = (head + report_size) & mask) { in gen8_append_oa_reports()
766 u8 *report = oa_buf_base + head; in gen8_append_oa_reports() local
767 u32 *report32 = (void *)report; in gen8_append_oa_reports()
773 * triggered this specific report (mostly timer in gen8_append_oa_reports()
776 reason = oa_report_reason(stream, report); in gen8_append_oa_reports()
781 * invalid to be sure we avoid false-positive, single-context in gen8_append_oa_reports()
793 * context-switch-report: This is a report with the reason type in gen8_append_oa_reports()
794 * being context-switch. It is generated when a context switches in gen8_append_oa_reports()
797 * context-valid-bit: A bit that is set in the report ID field in gen8_append_oa_reports()
800 * gpu-idle: A condition characterized by a in gen8_append_oa_reports()
801 * context-switch-report with context-valid-bit set to 0. in gen8_append_oa_reports()
803 * On prior platforms, context-id-valid bit is set to 0 only in gen8_append_oa_reports()
806 * On XEHP platforms, context-valid-bit is set to 1 in a context in gen8_append_oa_reports()
807 * switch report if a new context switched in. For all other in gen8_append_oa_reports()
812 * context ID field and the context-valid-bit is 0. The logic in gen8_append_oa_reports()
813 * below to squash the context ID would render the report in gen8_append_oa_reports()
819 if (oa_report_ctx_invalid(stream, report) && in gen8_append_oa_reports()
820 GRAPHICS_VER_FULL(stream->engine->i915) < IP_VER(12, 55)) { in gen8_append_oa_reports()
828 * stop the counters from updating as system-wide / global in gen8_append_oa_reports()
836 * provide a side-band view of the real values. in gen8_append_oa_reports()
840 * needs be forwarded bookend context-switch reports so that it in gen8_append_oa_reports()
853 * switches since it's not-uncommon for periodic samples to in gen8_append_oa_reports()
854 * identify a switch before any 'context switch' report. in gen8_append_oa_reports()
856 if (!stream->ctx || in gen8_append_oa_reports()
857 stream->specific_ctx_id == ctx_id || in gen8_append_oa_reports()
858 stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || in gen8_append_oa_reports()
865 if (stream->ctx && in gen8_append_oa_reports()
866 stream->specific_ctx_id != ctx_id) { in gen8_append_oa_reports()
871 report); in gen8_append_oa_reports()
875 stream->oa_buffer.last_ctx_id = ctx_id; in gen8_append_oa_reports()
880 * Clear out the report id and timestamp as a means in gen8_append_oa_reports()
886 u8 *oa_buf_end = stream->oa_buffer.vaddr + in gen8_append_oa_reports()
888 u32 part = oa_buf_end - (u8 *)report32; in gen8_append_oa_reports()
890 /* Zero out the entire report */ in gen8_append_oa_reports()
895 memset(oa_buf_base, 0, report_size - part); in gen8_append_oa_reports()
903 oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ? in gen8_append_oa_reports()
904 __oa_regs(stream)->oa_head_ptr : in gen8_append_oa_reports()
907 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); in gen8_append_oa_reports()
915 stream->oa_buffer.head = head; in gen8_append_oa_reports()
917 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); in gen8_append_oa_reports()
924 * gen8_oa_read - copy status records then buffered OA reports
925 * @stream: An i915-perf stream opened for OA metrics
948 struct intel_uncore *uncore = stream->uncore; in gen8_oa_read()
953 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr)) in gen8_oa_read()
954 return -EIO; in gen8_oa_read()
956 oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ? in gen8_oa_read()
957 __oa_regs(stream)->oa_status : in gen8_oa_read()
982 drm_dbg(&stream->perf->i915->drm, in gen8_oa_read()
984 stream->period_exponent); in gen8_oa_read()
986 stream->perf->ops.oa_disable(stream); in gen8_oa_read()
987 stream->perf->ops.oa_enable(stream); in gen8_oa_read()
990 * Note: .oa_enable() is expected to re-init the oabuffer and in gen8_oa_read()
1005 IS_GRAPHICS_VER(uncore->i915, 8, 11) ? in gen8_oa_read()
1014 * gen7_append_oa_reports - Copies all buffered OA reports into
1016 * @stream: An i915-perf stream opened for OA metrics
1021 * Notably any error condition resulting in a short read (-%ENOSPC or
1022 * -%EFAULT) will be returned even though one or more records may
1029 * and back-to-front you're not alone, but this follows the
1039 struct intel_uncore *uncore = stream->uncore; in gen7_append_oa_reports()
1040 int report_size = stream->oa_buffer.format->size; in gen7_append_oa_reports()
1041 u8 *oa_buf_base = stream->oa_buffer.vaddr; in gen7_append_oa_reports()
1042 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); in gen7_append_oa_reports()
1043 u32 mask = (OA_BUFFER_SIZE - 1); in gen7_append_oa_reports() local
1049 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)) in gen7_append_oa_reports()
1050 return -EIO; in gen7_append_oa_reports()
1052 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); in gen7_append_oa_reports()
1054 head = stream->oa_buffer.head; in gen7_append_oa_reports()
1055 tail = stream->oa_buffer.tail; in gen7_append_oa_reports()
1057 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); in gen7_append_oa_reports()
1062 * only be incremented by multiples of the report size (notably also in gen7_append_oa_reports()
1065 if (drm_WARN_ONCE(&uncore->i915->drm, in gen7_append_oa_reports()
1070 return -EIO; in gen7_append_oa_reports()
1075 head = (head + report_size) & mask) { in gen7_append_oa_reports()
1076 u8 *report = oa_buf_base + head; in gen7_append_oa_reports() local
1077 u32 *report32 = (void *)report; in gen7_append_oa_reports()
1079 /* All the report sizes factor neatly into the buffer in gen7_append_oa_reports()
1080 * size so we never expect to see a report split in gen7_append_oa_reports()
1087 if (drm_WARN_ON(&uncore->i915->drm, in gen7_append_oa_reports()
1088 (OA_BUFFER_SIZE - head) < report_size)) { in gen7_append_oa_reports()
1089 drm_err(&uncore->i915->drm, in gen7_append_oa_reports()
1090 "Spurious OA head ptr: non-integral report offset\n"); in gen7_append_oa_reports()
1094 /* The report-ID field for periodic samples includes in gen7_append_oa_reports()
1096 * the report and is never expected to be zero so we in gen7_append_oa_reports()
1097 * can check that the report isn't invalid before in gen7_append_oa_reports()
1101 if (__ratelimit(&stream->perf->spurious_report_rs)) in gen7_append_oa_reports()
1102 drm_notice(&uncore->i915->drm, in gen7_append_oa_reports()
1103 "Skipping spurious, invalid OA report\n"); in gen7_append_oa_reports()
1107 ret = append_oa_sample(stream, buf, count, offset, report); in gen7_append_oa_reports()
1119 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); in gen7_append_oa_reports()
1124 stream->oa_buffer.head = head; in gen7_append_oa_reports()
1126 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); in gen7_append_oa_reports()
1133 * gen7_oa_read - copy status records then buffered OA reports
1134 * @stream: An i915-perf stream opened for OA metrics
1153 struct intel_uncore *uncore = stream->uncore; in gen7_oa_read()
1157 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr)) in gen7_oa_read()
1158 return -EIO; in gen7_oa_read()
1167 oastatus1 &= ~stream->perf->gen7_latched_oastatus1; in gen7_oa_read()
1171 * - The status can be interpreted to mean that the buffer is in gen7_oa_read()
1173 * which will start to report a near-empty buffer after an in gen7_oa_read()
1178 * - Since it also implies the HW has started overwriting old in gen7_oa_read()
1183 * - In the future we may want to introduce a flight recorder in gen7_oa_read()
1195 drm_dbg(&stream->perf->i915->drm, in gen7_oa_read()
1197 stream->period_exponent); in gen7_oa_read()
1199 stream->perf->ops.oa_disable(stream); in gen7_oa_read()
1200 stream->perf->ops.oa_enable(stream); in gen7_oa_read()
1210 stream->perf->gen7_latched_oastatus1 |= in gen7_oa_read()
1218 * i915_oa_wait_unlocked - handles blocking IO until OA data available
1219 * @stream: An i915-perf stream opened for OA metrics
1222 * for OA metrics. It waits until the hrtimer callback finds a non-empty
1226 * since any subsequent read handling will return -EAGAIN if there isn't
1234 if (!stream->periodic) in i915_oa_wait_unlocked()
1235 return -EIO; in i915_oa_wait_unlocked()
1237 return wait_event_interruptible(stream->poll_wq, in i915_oa_wait_unlocked()
1242 * i915_oa_poll_wait - call poll_wait() for an OA stream poll()
1243 * @stream: An i915-perf stream opened for OA metrics
1255 poll_wait(file, &stream->poll_wq, wait); in i915_oa_poll_wait()
1259 * i915_oa_read - just calls through to &i915_oa_ops->read
1260 * @stream: An i915-perf stream opened for OA metrics
1275 return stream->perf->ops.read(stream, buf, count, offset); in i915_oa_read()
1281 struct i915_gem_context *ctx = stream->ctx; in oa_pin_context()
1284 int err = -ENODEV; in oa_pin_context()
1287 if (ce->engine != stream->engine) /* first match! */ in oa_pin_context()
1305 if (err == -EDEADLK) { in oa_pin_context()
1315 stream->pinned_ctx = ce; in oa_pin_context()
1316 return stream->pinned_ctx; in oa_pin_context()
1325 if (GRAPHICS_VER(rq->i915) >= 8) in __store_reg_to_mem()
1358 err = -ETIME; in __read_reg()
1372 scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4); in gen12_guc_sw_ctx_id()
1380 err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base), in gen12_guc_sw_ctx_id()
1385 val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); in gen12_guc_sw_ctx_id()
1392 i915_gem_object_unpin_map(scratch->obj); in gen12_guc_sw_ctx_id()
1401 * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
1412 u32 ctx_id, mask; in gen12_get_render_context_id() local
1415 if (intel_engine_uses_guc(stream->engine)) { in gen12_get_render_context_id()
1416 ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id); in gen12_get_render_context_id()
1420 mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) << in gen12_get_render_context_id()
1421 (GEN12_GUC_SW_CTX_ID_SHIFT - 32); in gen12_get_render_context_id()
1422 } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 55)) { in gen12_get_render_context_id()
1423 ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) << in gen12_get_render_context_id()
1424 (XEHP_SW_CTX_ID_SHIFT - 32); in gen12_get_render_context_id()
1426 mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) << in gen12_get_render_context_id()
1427 (XEHP_SW_CTX_ID_SHIFT - 32); in gen12_get_render_context_id()
1429 ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << in gen12_get_render_context_id()
1430 (GEN11_SW_CTX_ID_SHIFT - 32); in gen12_get_render_context_id()
1432 mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << in gen12_get_render_context_id()
1433 (GEN11_SW_CTX_ID_SHIFT - 32); in gen12_get_render_context_id()
1435 stream->specific_ctx_id = ctx_id & mask; in gen12_get_render_context_id()
1436 stream->specific_ctx_id_mask = mask; in gen12_get_render_context_id()
1461 u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4; in oa_context_image_offset()
1462 u32 *state = ce->lrc_reg_state; in oa_context_image_offset()
1464 if (drm_WARN_ON(&ce->engine->i915->drm, !state)) in oa_context_image_offset()
1470 * We expect reg-value pairs in MI_LRI command, so in oa_context_image_offset()
1473 drm_WARN_ON(&ce->engine->i915->drm, in oa_context_image_offset()
1488 i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base); in set_oa_ctx_ctrl_offset()
1489 struct i915_perf *perf = &ce->engine->i915->perf; in set_oa_ctx_ctrl_offset()
1490 u32 offset = perf->ctx_oactxctrl_offset; in set_oa_ctx_ctrl_offset()
1497 perf->ctx_oactxctrl_offset = offset; in set_oa_ctx_ctrl_offset()
1499 drm_dbg(&ce->engine->i915->drm, in set_oa_ctx_ctrl_offset()
1501 ce->engine->name, offset); in set_oa_ctx_ctrl_offset()
1504 return offset && offset != U32_MAX ? 0 : -ENODEV; in set_oa_ctx_ctrl_offset()
1509 return engine->class == RENDER_CLASS; in engine_supports_mi_query()
1513 * oa_get_render_ctx_id - determine and hold ctx hw id
1514 * @stream: An i915-perf stream opened for OA metrics
1531 if (engine_supports_mi_query(stream->engine) && in oa_get_render_ctx_id()
1532 HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) { in oa_get_render_ctx_id()
1540 drm_err(&stream->perf->i915->drm, in oa_get_render_ctx_id()
1542 stream->engine->name); in oa_get_render_ctx_id()
1547 switch (GRAPHICS_VER(ce->engine->i915)) { in oa_get_render_ctx_id()
1551 * and don't need to use the mask. in oa_get_render_ctx_id()
1553 stream->specific_ctx_id = i915_ggtt_offset(ce->state); in oa_get_render_ctx_id()
1554 stream->specific_ctx_id_mask = 0; in oa_get_render_ctx_id()
1560 if (intel_engine_uses_guc(ce->engine)) { in oa_get_render_ctx_id()
1571 stream->specific_ctx_id = ce->lrc.lrca >> 12; in oa_get_render_ctx_id()
1577 stream->specific_ctx_id_mask = in oa_get_render_ctx_id()
1578 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; in oa_get_render_ctx_id()
1580 stream->specific_ctx_id_mask = in oa_get_render_ctx_id()
1581 (1U << GEN8_CTX_ID_WIDTH) - 1; in oa_get_render_ctx_id()
1582 stream->specific_ctx_id = stream->specific_ctx_id_mask; in oa_get_render_ctx_id()
1592 MISSING_CASE(GRAPHICS_VER(ce->engine->i915)); in oa_get_render_ctx_id()
1595 ce->tag = stream->specific_ctx_id; in oa_get_render_ctx_id()
1597 drm_dbg(&stream->perf->i915->drm, in oa_get_render_ctx_id()
1599 stream->specific_ctx_id, in oa_get_render_ctx_id()
1600 stream->specific_ctx_id_mask); in oa_get_render_ctx_id()
1606 * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
1607 * @stream: An i915-perf stream opened for OA metrics
1616 ce = fetch_and_zero(&stream->pinned_ctx); in oa_put_render_ctx_id()
1618 ce->tag = 0; /* recomputed on next submission after parking */ in oa_put_render_ctx_id()
1622 stream->specific_ctx_id = INVALID_CTX_ID; in oa_put_render_ctx_id()
1623 stream->specific_ctx_id_mask = 0; in oa_put_render_ctx_id()
1629 i915_vma_unpin_and_release(&stream->oa_buffer.vma, in free_oa_buffer()
1632 stream->oa_buffer.vaddr = NULL; in free_oa_buffer()
1640 i915_oa_config_put(stream->oa_config); in free_oa_configs()
1641 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) in free_oa_configs()
1648 i915_vma_unpin_and_release(&stream->noa_wait, 0); in free_noa_wait()
1653 return engine->oa_group; in engine_supports_oa()
1658 return engine->oa_group && engine->oa_group->type == type; in engine_supports_oa_format()
1663 struct i915_perf *perf = stream->perf; in i915_oa_stream_destroy()
1664 struct intel_gt *gt = stream->engine->gt; in i915_oa_stream_destroy()
1665 struct i915_perf_group *g = stream->engine->oa_group; in i915_oa_stream_destroy()
1667 if (WARN_ON(stream != g->exclusive_stream)) in i915_oa_stream_destroy()
1676 WRITE_ONCE(g->exclusive_stream, NULL); in i915_oa_stream_destroy()
1677 perf->ops.disable_metric_set(stream); in i915_oa_stream_destroy()
1681 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); in i915_oa_stream_destroy()
1682 intel_engine_pm_put(stream->engine); in i915_oa_stream_destroy()
1684 if (stream->ctx) in i915_oa_stream_destroy()
1690 if (perf->spurious_report_rs.missed) { in i915_oa_stream_destroy()
1691 gt_notice(gt, "%d spurious OA report notices suppressed due to ratelimiting\n", in i915_oa_stream_destroy()
1692 perf->spurious_report_rs.missed); in i915_oa_stream_destroy()
1698 struct intel_uncore *uncore = stream->uncore; in gen7_init_oa_buffer()
1699 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); in gen7_init_oa_buffer()
1702 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); in gen7_init_oa_buffer()
1704 /* Pre-DevBDW: OABUFFER must be set with counters off, in gen7_init_oa_buffer()
1709 stream->oa_buffer.head = 0; in gen7_init_oa_buffer()
1717 stream->oa_buffer.tail = 0; in gen7_init_oa_buffer()
1719 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); in gen7_init_oa_buffer()
1725 stream->perf->gen7_latched_oastatus1 = 0; in gen7_init_oa_buffer()
1729 * first allocating), we may re-init the OA buffer, either in gen7_init_oa_buffer()
1730 * when re-enabling a stream or in error/reset paths. in gen7_init_oa_buffer()
1732 * The reason we clear the buffer for each re-init is for the in gen7_init_oa_buffer()
1734 * report-id field to make sure it's non-zero which relies on in gen7_init_oa_buffer()
1738 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); in gen7_init_oa_buffer()
1743 struct intel_uncore *uncore = stream->uncore; in gen8_init_oa_buffer()
1744 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); in gen8_init_oa_buffer()
1747 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); in gen8_init_oa_buffer()
1751 stream->oa_buffer.head = 0; in gen8_init_oa_buffer()
1768 stream->oa_buffer.tail = 0; in gen8_init_oa_buffer()
1775 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; in gen8_init_oa_buffer()
1777 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); in gen8_init_oa_buffer()
1782 * first allocating), we may re-init the OA buffer, either in gen8_init_oa_buffer()
1783 * when re-enabling a stream or in error/reset paths. in gen8_init_oa_buffer()
1785 * The reason we clear the buffer for each re-init is for the in gen8_init_oa_buffer()
1787 * reason field to make sure it's non-zero which relies on in gen8_init_oa_buffer()
1791 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); in gen8_init_oa_buffer()
1796 struct intel_uncore *uncore = stream->uncore; in gen12_init_oa_buffer()
1797 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); in gen12_init_oa_buffer()
1800 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); in gen12_init_oa_buffer()
1802 intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0); in gen12_init_oa_buffer()
1803 intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr, in gen12_init_oa_buffer()
1805 stream->oa_buffer.head = 0; in gen12_init_oa_buffer()
1815 intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset | in gen12_init_oa_buffer()
1817 intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr, in gen12_init_oa_buffer()
1821 stream->oa_buffer.tail = 0; in gen12_init_oa_buffer()
1828 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; in gen12_init_oa_buffer()
1830 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); in gen12_init_oa_buffer()
1835 * first allocating), we may re-init the OA buffer, either in gen12_init_oa_buffer()
1836 * when re-enabling a stream or in error/reset paths. in gen12_init_oa_buffer()
1838 * The reason we clear the buffer for each re-init is for the in gen12_init_oa_buffer()
1840 * reason field to make sure it's non-zero which relies on in gen12_init_oa_buffer()
1844 memset(stream->oa_buffer.vaddr, 0, in gen12_init_oa_buffer()
1845 stream->oa_buffer.vma->size); in gen12_init_oa_buffer()
1850 struct drm_i915_private *i915 = stream->perf->i915; in alloc_oa_buffer()
1851 struct intel_gt *gt = stream->engine->gt; in alloc_oa_buffer()
1856 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.vma)) in alloc_oa_buffer()
1857 return -ENODEV; in alloc_oa_buffer()
1862 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE); in alloc_oa_buffer()
1864 drm_err(&i915->drm, "Failed to allocate OA buffer\n"); in alloc_oa_buffer()
1871 vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL); in alloc_oa_buffer()
1887 stream->oa_buffer.vma = vma; in alloc_oa_buffer()
1889 stream->oa_buffer.vaddr = in alloc_oa_buffer()
1891 if (IS_ERR(stream->oa_buffer.vaddr)) { in alloc_oa_buffer()
1892 ret = PTR_ERR(stream->oa_buffer.vaddr); in alloc_oa_buffer()
1904 stream->oa_buffer.vaddr = NULL; in alloc_oa_buffer()
1905 stream->oa_buffer.vma = NULL; in alloc_oa_buffer()
1919 if (GRAPHICS_VER(stream->perf->i915) >= 8) in save_restore_register()
1925 *cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d; in save_restore_register()
1934 struct drm_i915_private *i915 = stream->perf->i915; in alloc_noa_wait()
1935 struct intel_gt *gt = stream->engine->gt; in alloc_noa_wait()
1938 const u64 delay_ticks = 0xffffffffffffffff - in alloc_noa_wait()
1939 intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915), in alloc_noa_wait()
1940 atomic64_read(&stream->perf->noa_programming_delay)); in alloc_noa_wait()
1941 const u32 base = stream->engine->mmio_base; in alloc_noa_wait()
1959 * gt->scratch was being used to save/restore the GPR registers, but on in alloc_noa_wait()
1966 drm_err(&i915->drm, in alloc_noa_wait()
1982 vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL); in alloc_noa_wait()
1998 stream->noa_wait = vma; in alloc_noa_wait()
2023 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); in alloc_noa_wait()
2041 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); in alloc_noa_wait()
2061 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); in alloc_noa_wait()
2073 *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; in alloc_noa_wait()
2081 * (((1 * << 64) - 1) - delay_ns) in alloc_noa_wait()
2104 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); in alloc_noa_wait()
2116 *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; in alloc_noa_wait()
2134 GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch)); in alloc_noa_wait()
2144 if (ret == -EDEADLK) { in alloc_noa_wait()
2164 n_regs - i, in write_cs_mi_lri()
2201 return ERR_PTR(-ENOMEM); in alloc_oa_config_buffer()
2203 config_length += num_lri_dwords(oa_config->mux_regs_len); in alloc_oa_config_buffer()
2204 config_length += num_lri_dwords(oa_config->b_counter_regs_len); in alloc_oa_config_buffer()
2205 config_length += num_lri_dwords(oa_config->flex_regs_len); in alloc_oa_config_buffer()
2209 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); in alloc_oa_config_buffer()
2228 oa_config->mux_regs, in alloc_oa_config_buffer()
2229 oa_config->mux_regs_len); in alloc_oa_config_buffer()
2231 oa_config->b_counter_regs, in alloc_oa_config_buffer()
2232 oa_config->b_counter_regs_len); in alloc_oa_config_buffer()
2234 oa_config->flex_regs, in alloc_oa_config_buffer()
2235 oa_config->flex_regs_len); in alloc_oa_config_buffer()
2238 *cs++ = (GRAPHICS_VER(stream->perf->i915) < 8 ? in alloc_oa_config_buffer()
2241 *cs++ = i915_ggtt_offset(stream->noa_wait); in alloc_oa_config_buffer()
2247 oa_bo->vma = i915_vma_instance(obj, in alloc_oa_config_buffer()
2248 &stream->engine->gt->ggtt->vm, in alloc_oa_config_buffer()
2250 if (IS_ERR(oa_bo->vma)) { in alloc_oa_config_buffer()
2251 err = PTR_ERR(oa_bo->vma); in alloc_oa_config_buffer()
2255 oa_bo->oa_config = i915_oa_config_get(oa_config); in alloc_oa_config_buffer()
2256 llist_add(&oa_bo->node, &stream->oa_config_bos); in alloc_oa_config_buffer()
2259 if (err == -EDEADLK) { in alloc_oa_config_buffer()
2285 llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { in get_oa_vma()
2286 if (oa_bo->oa_config == oa_config && in get_oa_vma()
2287 memcmp(oa_bo->oa_config->uuid, in get_oa_vma()
2288 oa_config->uuid, in get_oa_vma()
2289 sizeof(oa_config->uuid)) == 0) in get_oa_vma()
2298 return i915_vma_get(oa_bo->vma); in get_oa_vma()
2318 err = i915_gem_object_lock(vma->obj, &ww); in emit_oa_config()
2326 intel_engine_pm_get(ce->engine); in emit_oa_config()
2328 intel_engine_pm_put(ce->engine); in emit_oa_config()
2350 err = rq->engine->emit_bb_start(rq, in emit_oa_config()
2361 if (err == -EDEADLK) { in emit_oa_config()
2374 return stream->pinned_ctx ?: stream->engine->kernel_context; in oa_context()
2381 struct intel_uncore *uncore = stream->uncore; in hsw_enable_metric_set()
2388 * unable to count the events from non-render clock domain. in hsw_enable_metric_set()
2390 * count the events from non-render domain. Unit level clock in hsw_enable_metric_set()
2399 stream->oa_config, oa_context(stream), in hsw_enable_metric_set()
2405 struct intel_uncore *uncore = stream->uncore; in hsw_disable_metric_set()
2429 for (i = 0; i < oa_config->flex_regs_len; i++) { in oa_config_flex_reg()
2430 if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) in oa_config_flex_reg()
2431 return oa_config->flex_regs[i].value; in oa_config_flex_reg()
2440 * It's fine to put out-of-date values into these per-context registers
2447 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; in gen8_update_reg_state_unlocked()
2448 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; in gen8_update_reg_state_unlocked()
2459 u32 *reg_state = ce->lrc_reg_state; in gen8_update_reg_state_unlocked()
2463 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | in gen8_update_reg_state_unlocked()
2464 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | in gen8_update_reg_state_unlocked()
2469 oa_config_flex_reg(stream->oa_config, flex_regs[i]); in gen8_update_reg_state_unlocked()
2490 offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET; in gen8_store_flex()
2493 *cs++ = offset + flex->offset * sizeof(u32); in gen8_store_flex()
2495 *cs++ = flex->value; in gen8_store_flex()
2496 } while (flex++, --count); in gen8_store_flex()
2518 *cs++ = i915_mmio_reg_offset(flex->reg); in gen8_load_flex()
2519 *cs++ = flex->value; in gen8_load_flex()
2520 } while (flex++, --count); in gen8_load_flex()
2534 rq = intel_engine_create_kernel_request(ce->engine); in gen8_modify_context()
2555 intel_engine_pm_get(ce->engine); in gen8_modify_self()
2557 intel_engine_pm_put(ce->engine); in gen8_modify_self()
2585 GEM_BUG_ON(ce == ce->engine->kernel_context); in gen8_configure_context()
2587 if (ce->engine->class != RENDER_CLASS) in gen8_configure_context()
2594 flex->value = intel_sseu_make_rpcs(ce->engine->gt, &ce->sseu); in gen8_configure_context()
2610 struct intel_context *ce = stream->pinned_ctx; in gen12_configure_oar_context()
2611 u32 format = stream->oa_buffer.format->format; in gen12_configure_oar_context()
2612 u32 offset = stream->perf->ctx_oactxctrl_offset; in gen12_configure_oar_context()
2632 RING_CONTEXT_CONTROL(ce->engine->mmio_base), in gen12_configure_oar_context()
2657 * Manages updating the per-context aspects of the OA stream
2667 * won't automatically reload an out-of-date timer exponent even
2671 * - Ensure the currently running context's per-context OA state is
2673 * - Ensure that all existing contexts will have the correct per-context
2675 * - Ensure any new contexts will be initialized with the correct
2676 * per-context OA state.
2687 struct drm_i915_private *i915 = stream->perf->i915; in oa_configure_all_contexts()
2689 struct intel_gt *gt = stream->engine->gt; in oa_configure_all_contexts()
2693 lockdep_assert_held(&gt->perf.lock); in oa_configure_all_contexts()
2698 * lite-restore). This means we can't safely update a context's image, in oa_configure_all_contexts()
2711 spin_lock(&i915->gem.contexts.lock); in oa_configure_all_contexts()
2712 list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { in oa_configure_all_contexts()
2713 if (!kref_get_unless_zero(&ctx->ref)) in oa_configure_all_contexts()
2716 spin_unlock(&i915->gem.contexts.lock); in oa_configure_all_contexts()
2724 spin_lock(&i915->gem.contexts.lock); in oa_configure_all_contexts()
2728 spin_unlock(&i915->gem.contexts.lock); in oa_configure_all_contexts()
2736 struct intel_context *ce = engine->kernel_context; in oa_configure_all_contexts()
2738 if (engine->class != RENDER_CLASS) in oa_configure_all_contexts()
2741 regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu); in oa_configure_all_contexts()
2756 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; in lrc_configure_all_contexts()
2758 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; in lrc_configure_all_contexts()
2781 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | in lrc_configure_all_contexts()
2782 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | in lrc_configure_all_contexts()
2797 struct intel_uncore *uncore = stream->uncore; in gen8_enable_metric_set()
2798 struct i915_oa_config *oa_config = stream->oa_config; in gen8_enable_metric_set()
2802 * We disable slice/unslice clock ratio change reports on SKL since in gen8_enable_metric_set()
2808 * Although we don't currently use the 'disable overrun' OABUFFER in gen8_enable_metric_set()
2813 * Currently none of the high-level metrics we have depend on knowing in gen8_enable_metric_set()
2817 * that's OK considering that we disable RC6 while the OA unit is in gen8_enable_metric_set()
2824 if (IS_GRAPHICS_VER(stream->perf->i915, 9, 11)) { in gen8_enable_metric_set()
2840 stream->oa_config, oa_context(stream), in gen8_enable_metric_set()
2847 (stream->sample_flags & SAMPLE_OA_REPORT) ? in oag_report_ctx_switches()
2855 struct drm_i915_private *i915 = stream->perf->i915; in gen12_enable_metric_set()
2856 struct intel_uncore *uncore = stream->uncore; in gen12_enable_metric_set()
2857 bool periodic = stream->periodic; in gen12_enable_metric_set()
2858 u32 period_exponent = stream->period_exponent; in gen12_enable_metric_set()
2865 * Disable thread stall DOP gating and EU DOP gating. in gen12_enable_metric_set()
2868 intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN, in gen12_enable_metric_set()
2874 intel_uncore_write(uncore, __oa_regs(stream)->oa_debug, in gen12_enable_metric_set()
2875 /* Disable clk ratio reports, like previous Gens. */ in gen12_enable_metric_set()
2884 intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ? in gen12_enable_metric_set()
2905 if (stream->ctx) { in gen12_enable_metric_set()
2912 stream->oa_config, oa_context(stream), in gen12_enable_metric_set()
2918 struct intel_uncore *uncore = stream->uncore; in gen8_disable_metric_set()
2928 struct intel_uncore *uncore = stream->uncore; in gen11_disable_metric_set()
2933 /* Make sure we disable noa to save power. */ in gen11_disable_metric_set()
2939 struct intel_uncore *uncore = stream->uncore; in gen12_disable_metric_set()
2940 struct drm_i915_private *i915 = stream->perf->i915; in gen12_disable_metric_set()
2947 intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN, in gen12_disable_metric_set()
2953 /* disable the context save/restore or OAR counters */ in gen12_disable_metric_set()
2954 if (stream->ctx) in gen12_disable_metric_set()
2957 /* Make sure we disable noa to save power. */ in gen12_disable_metric_set()
2969 struct intel_uncore *uncore = stream->uncore; in gen7_oa_enable()
2970 struct i915_gem_context *ctx = stream->ctx; in gen7_oa_enable()
2971 u32 ctx_id = stream->specific_ctx_id; in gen7_oa_enable()
2972 bool periodic = stream->periodic; in gen7_oa_enable()
2973 u32 period_exponent = stream->period_exponent; in gen7_oa_enable()
2974 u32 report_format = stream->oa_buffer.format->format; in gen7_oa_enable()
2999 struct intel_uncore *uncore = stream->uncore; in gen8_oa_enable()
3000 u32 report_format = stream->oa_buffer.format->format; in gen8_oa_enable()
3015 * filtering and instead filter on the cpu based on the context-id in gen8_oa_enable()
3032 if (!(stream->sample_flags & SAMPLE_OA_REPORT)) in gen12_oa_enable()
3038 val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) | in gen12_oa_enable()
3041 intel_uncore_write(stream->uncore, regs->oa_ctrl, val); in gen12_oa_enable()
3045 * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
3055 stream->pollin = false; in i915_oa_stream_enable()
3057 stream->perf->ops.oa_enable(stream); in i915_oa_stream_enable()
3059 if (stream->sample_flags & SAMPLE_OA_REPORT) in i915_oa_stream_enable()
3060 hrtimer_start(&stream->poll_check_timer, in i915_oa_stream_enable()
3061 ns_to_ktime(stream->poll_oa_period), in i915_oa_stream_enable()
3067 struct intel_uncore *uncore = stream->uncore; in gen7_oa_disable()
3073 drm_err(&stream->perf->i915->drm, in gen7_oa_disable()
3079 struct intel_uncore *uncore = stream->uncore; in gen8_oa_disable()
3085 drm_err(&stream->perf->i915->drm, in gen8_oa_disable()
3091 struct intel_uncore *uncore = stream->uncore; in gen12_oa_disable()
3093 intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0); in gen12_oa_disable()
3095 __oa_regs(stream)->oa_ctrl, in gen12_oa_disable()
3098 drm_err(&stream->perf->i915->drm, in gen12_oa_disable()
3106 drm_err(&stream->perf->i915->drm, in gen12_oa_disable()
3111 * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
3120 stream->perf->ops.oa_disable(stream); in i915_oa_stream_disable()
3122 if (stream->sample_flags & SAMPLE_OA_REPORT) in i915_oa_stream_disable()
3123 hrtimer_cancel(&stream->poll_check_timer); in i915_oa_stream_disable()
3129 .disable = i915_oa_stream_disable,
3142 return -ENOMEM; in i915_perf_stream_enable_sync()
3144 err = stream->perf->ops.enable_metric_set(stream, active); in i915_perf_stream_enable_sync()
3156 const struct sseu_dev_info *devinfo_sseu = &engine->gt->info.sseu; in get_default_sseu_config()
3160 if (GRAPHICS_VER(engine->i915) == 11) { in get_default_sseu_config()
3163 * we select - just turn off low bits in the amount of half of in get_default_sseu_config()
3166 out_sseu->subslice_mask = in get_default_sseu_config()
3167 ~(~0 << (hweight8(out_sseu->subslice_mask) / 2)); in get_default_sseu_config()
3168 out_sseu->slice_mask = 0x1; in get_default_sseu_config()
3177 if (drm_sseu->engine.engine_class != engine->uabi_class || in get_sseu_config()
3178 drm_sseu->engine.engine_instance != engine->uabi_instance) in get_sseu_config()
3179 return -EINVAL; in get_sseu_config()
3181 return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu); in get_sseu_config()
3198 with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) in i915_perf_oa_timestamp_frequency()
3199 reg = intel_uncore_read(to_gt(i915)->uncore, RPM_CONFIG0); in i915_perf_oa_timestamp_frequency()
3204 return to_gt(i915)->clock_frequency << (3 - shift); in i915_perf_oa_timestamp_frequency()
3207 return to_gt(i915)->clock_frequency; in i915_perf_oa_timestamp_frequency()
3211 * i915_oa_stream_init - validate combined props for OA stream and init
3232 struct drm_i915_private *i915 = stream->perf->i915; in i915_oa_stream_init()
3233 struct i915_perf *perf = stream->perf; in i915_oa_stream_init()
3237 if (!props->engine) { in i915_oa_stream_init()
3238 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3240 return -EINVAL; in i915_oa_stream_init()
3242 g = props->engine->oa_group; in i915_oa_stream_init()
3249 if (!perf->metrics_kobj) { in i915_oa_stream_init()
3250 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3252 return -EINVAL; in i915_oa_stream_init()
3255 if (!(props->sample_flags & SAMPLE_OA_REPORT) && in i915_oa_stream_init()
3256 (GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) { in i915_oa_stream_init()
3257 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3258 "Only OA report sampling supported\n"); in i915_oa_stream_init()
3259 return -EINVAL; in i915_oa_stream_init()
3262 if (!perf->ops.enable_metric_set) { in i915_oa_stream_init()
3263 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3265 return -ENODEV; in i915_oa_stream_init()
3273 if (g->exclusive_stream) { in i915_oa_stream_init()
3274 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3276 return -EBUSY; in i915_oa_stream_init()
3279 if (!props->oa_format) { in i915_oa_stream_init()
3280 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3281 "OA report format not specified\n"); in i915_oa_stream_init()
3282 return -EINVAL; in i915_oa_stream_init()
3285 stream->engine = props->engine; in i915_oa_stream_init()
3286 stream->uncore = stream->engine->gt->uncore; in i915_oa_stream_init()
3288 stream->sample_size = sizeof(struct drm_i915_perf_record_header); in i915_oa_stream_init()
3290 stream->oa_buffer.format = &perf->oa_formats[props->oa_format]; in i915_oa_stream_init()
3291 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0)) in i915_oa_stream_init()
3292 return -EINVAL; in i915_oa_stream_init()
3294 stream->sample_flags = props->sample_flags; in i915_oa_stream_init()
3295 stream->sample_size += stream->oa_buffer.format->size; in i915_oa_stream_init()
3297 stream->hold_preemption = props->hold_preemption; in i915_oa_stream_init()
3299 stream->periodic = props->oa_periodic; in i915_oa_stream_init()
3300 if (stream->periodic) in i915_oa_stream_init()
3301 stream->period_exponent = props->oa_period_exponent; in i915_oa_stream_init()
3303 if (stream->ctx) { in i915_oa_stream_init()
3306 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3314 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3319 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); in i915_oa_stream_init()
3320 if (!stream->oa_config) { in i915_oa_stream_init()
3321 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3322 "Invalid OA config id=%i\n", props->metrics_set); in i915_oa_stream_init()
3323 ret = -EINVAL; in i915_oa_stream_init()
3327 /* PRM - observability performance counters: in i915_oa_stream_init()
3337 * references will effectively disable RC6. in i915_oa_stream_init()
3339 intel_engine_pm_get(stream->engine); in i915_oa_stream_init()
3340 intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); in i915_oa_stream_init()
3346 stream->ops = &i915_oa_stream_ops; in i915_oa_stream_init()
3348 stream->engine->gt->perf.sseu = props->sseu; in i915_oa_stream_init()
3349 WRITE_ONCE(g->exclusive_stream, stream); in i915_oa_stream_init()
3353 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3358 drm_dbg(&stream->perf->i915->drm, in i915_oa_stream_init()
3360 stream->oa_config->uuid); in i915_oa_stream_init()
3362 hrtimer_init(&stream->poll_check_timer, in i915_oa_stream_init()
3364 stream->poll_check_timer.function = oa_poll_check_timer_cb; in i915_oa_stream_init()
3365 init_waitqueue_head(&stream->poll_wq); in i915_oa_stream_init()
3366 spin_lock_init(&stream->oa_buffer.ptr_lock); in i915_oa_stream_init()
3367 mutex_init(&stream->lock); in i915_oa_stream_init()
3372 WRITE_ONCE(g->exclusive_stream, NULL); in i915_oa_stream_init()
3373 perf->ops.disable_metric_set(stream); in i915_oa_stream_init()
3378 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); in i915_oa_stream_init()
3379 intel_engine_pm_put(stream->engine); in i915_oa_stream_init()
3387 if (stream->ctx) in i915_oa_stream_init()
3398 if (engine->class != RENDER_CLASS) in i915_oa_init_reg_state()
3402 stream = READ_ONCE(engine->oa_group->exclusive_stream); in i915_oa_init_reg_state()
3403 if (stream && GRAPHICS_VER(stream->perf->i915) < 12) in i915_oa_init_reg_state()
3408 * i915_perf_read - handles read() FOP for i915 perf stream FDs
3416 * &i915_perf_stream_ops->read but to save having stream implementations (of
3430 struct i915_perf_stream *stream = file->private_data; in i915_perf_read()
3438 if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT)) in i915_perf_read()
3439 return -EIO; in i915_perf_read()
3441 if (!(file->f_flags & O_NONBLOCK)) { in i915_perf_read()
3443 * stream->ops->wait_unlocked. in i915_perf_read()
3446 * oabuffer has >= 1 report we don't immediately know whether in i915_perf_read()
3450 ret = stream->ops->wait_unlocked(stream); in i915_perf_read()
3454 mutex_lock(&stream->lock); in i915_perf_read()
3455 ret = stream->ops->read(stream, buf, count, &offset); in i915_perf_read()
3456 mutex_unlock(&stream->lock); in i915_perf_read()
3459 mutex_lock(&stream->lock); in i915_perf_read()
3460 ret = stream->ops->read(stream, buf, count, &offset); in i915_perf_read()
3461 mutex_unlock(&stream->lock); in i915_perf_read()
3464 /* We allow the poll checking to sometimes report false positive EPOLLIN in i915_perf_read()
3465 * events where we might actually report EAGAIN on read() if there's in i915_perf_read()
3468 * and read() returning -EAGAIN. Clearing the oa.pollin state here in i915_perf_read()
3471 * The exception to this is if ops->read() returned -ENOSPC which means in i915_perf_read()
3475 if (ret != -ENOSPC) in i915_perf_read()
3476 stream->pollin = false; in i915_perf_read()
3478 /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */ in i915_perf_read()
3479 return offset ?: (ret ?: -EAGAIN); in i915_perf_read()
3488 stream->pollin = true; in oa_poll_check_timer_cb()
3489 wake_up(&stream->poll_wq); in oa_poll_check_timer_cb()
3493 ns_to_ktime(stream->poll_oa_period)); in oa_poll_check_timer_cb()
3499 * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
3505 * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
3516 stream->ops->poll_wait(stream, file, wait); in i915_perf_poll_locked()
3524 if (stream->pollin) in i915_perf_poll_locked()
3531 * i915_perf_poll - call poll_wait() with a suitable wait queue for stream
3545 struct i915_perf_stream *stream = file->private_data; in i915_perf_poll()
3548 mutex_lock(&stream->lock); in i915_perf_poll()
3550 mutex_unlock(&stream->lock); in i915_perf_poll()
3556 * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
3567 if (stream->enabled) in i915_perf_enable_locked()
3570 /* Allow stream->ops->enable() to refer to this */ in i915_perf_enable_locked()
3571 stream->enabled = true; in i915_perf_enable_locked()
3573 if (stream->ops->enable) in i915_perf_enable_locked()
3574 stream->ops->enable(stream); in i915_perf_enable_locked()
3576 if (stream->hold_preemption) in i915_perf_enable_locked()
3577 intel_context_set_nopreempt(stream->pinned_ctx); in i915_perf_enable_locked()
3581 * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl
3586 * The intention is that disabling an re-enabling a stream will ideally be
3587 * cheaper than destroying and re-opening a stream with the same configuration,
3589 * must be retained between disabling and re-enabling a stream.
3592 * to attempt to read from the stream (-EIO).
3596 if (!stream->enabled) in i915_perf_disable_locked()
3599 /* Allow stream->ops->disable() to refer to this */ in i915_perf_disable_locked()
3600 stream->enabled = false; in i915_perf_disable_locked()
3602 if (stream->hold_preemption) in i915_perf_disable_locked()
3603 intel_context_clear_nopreempt(stream->pinned_ctx); in i915_perf_disable_locked()
3605 if (stream->ops->disable) in i915_perf_disable_locked()
3606 stream->ops->disable(stream); in i915_perf_disable_locked()
3613 long ret = stream->oa_config->id; in i915_perf_config_locked()
3615 config = i915_perf_get_oa_config(stream->perf, metrics_set); in i915_perf_config_locked()
3617 return -EINVAL; in i915_perf_config_locked()
3619 if (config != stream->oa_config) { in i915_perf_config_locked()
3633 config = xchg(&stream->oa_config, config); in i915_perf_config_locked()
3644 * i915_perf_ioctl_locked - support ioctl() usage with i915 perf stream FDs
3649 * Returns: zero on success or a negative error code. Returns -EINVAL for
3667 return -EINVAL; in i915_perf_ioctl_locked()
3671 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3678 * Returns: zero on success or a negative error code. Returns -EINVAL for
3685 struct i915_perf_stream *stream = file->private_data; in i915_perf_ioctl()
3688 mutex_lock(&stream->lock); in i915_perf_ioctl()
3690 mutex_unlock(&stream->lock); in i915_perf_ioctl()
3696 * i915_perf_destroy_locked - destroy an i915 perf stream
3702 * Note: The &gt->perf.lock mutex has been taken to serialize
3703 * with any non-file-operation driver hooks.
3707 if (stream->enabled) in i915_perf_destroy_locked()
3710 if (stream->ops->destroy) in i915_perf_destroy_locked()
3711 stream->ops->destroy(stream); in i915_perf_destroy_locked()
3713 if (stream->ctx) in i915_perf_destroy_locked()
3714 i915_gem_context_put(stream->ctx); in i915_perf_destroy_locked()
3720 * i915_perf_release - handles userspace close() of a stream file
3732 struct i915_perf_stream *stream = file->private_data; in i915_perf_release()
3733 struct i915_perf *perf = stream->perf; in i915_perf_release()
3734 struct intel_gt *gt = stream->engine->gt; in i915_perf_release()
3738 * other user of stream->lock. Use the perf lock to destroy the stream in i915_perf_release()
3741 mutex_lock(&gt->perf.lock); in i915_perf_release()
3743 mutex_unlock(&gt->perf.lock); in i915_perf_release()
3746 drm_dev_put(&perf->i915->drm); in i915_perf_release()
3766 * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
3775 * behalf of i915_perf_open_ioctl() with the &gt->perf.lock mutex
3776 * taken to serialize with any non-file-operation driver hooks.
3802 if (props->single_context) { in i915_perf_open_ioctl_locked()
3803 u32 ctx_handle = props->ctx_handle; in i915_perf_open_ioctl_locked()
3804 struct drm_i915_file_private *file_priv = file->driver_priv; in i915_perf_open_ioctl_locked()
3808 drm_dbg(&perf->i915->drm, in i915_perf_open_ioctl_locked()
3820 * non-privileged client. in i915_perf_open_ioctl_locked()
3822 * For Gen8->11 the OA unit no longer supports clock gating off for a in i915_perf_open_ioctl_locked()
3824 * from updating as system-wide / global values. Even though we can in i915_perf_open_ioctl_locked()
3835 if (IS_HASWELL(perf->i915) && specific_ctx) in i915_perf_open_ioctl_locked()
3837 else if (GRAPHICS_VER(perf->i915) == 12 && specific_ctx && in i915_perf_open_ioctl_locked()
3838 (props->sample_flags & SAMPLE_OA_REPORT) == 0) in i915_perf_open_ioctl_locked()
3841 if (props->hold_preemption) { in i915_perf_open_ioctl_locked()
3842 if (!props->single_context) { in i915_perf_open_ioctl_locked()
3843 drm_dbg(&perf->i915->drm, in i915_perf_open_ioctl_locked()
3844 "preemption disable with no context\n"); in i915_perf_open_ioctl_locked()
3845 ret = -EINVAL; in i915_perf_open_ioctl_locked()
3854 if (props->has_sseu) in i915_perf_open_ioctl_locked()
3857 get_default_sseu_config(&props->sseu, props->engine); in i915_perf_open_ioctl_locked()
3866 drm_dbg(&perf->i915->drm, in i915_perf_open_ioctl_locked()
3868 ret = -EACCES; in i915_perf_open_ioctl_locked()
3874 ret = -ENOMEM; in i915_perf_open_ioctl_locked()
3878 stream->perf = perf; in i915_perf_open_ioctl_locked()
3879 stream->ctx = specific_ctx; in i915_perf_open_ioctl_locked()
3880 stream->poll_oa_period = props->poll_oa_period; in i915_perf_open_ioctl_locked()
3886 /* we avoid simply assigning stream->sample_flags = props->sample_flags in i915_perf_open_ioctl_locked()
3890 if (WARN_ON(stream->sample_flags != props->sample_flags)) { in i915_perf_open_ioctl_locked()
3891 ret = -ENODEV; in i915_perf_open_ioctl_locked()
3895 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) in i915_perf_open_ioctl_locked()
3897 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) in i915_perf_open_ioctl_locked()
3906 if (!(param->flags & I915_PERF_FLAG_DISABLED)) in i915_perf_open_ioctl_locked()
3912 drm_dev_get(&perf->i915->drm); in i915_perf_open_ioctl_locked()
3917 if (stream->ops->destroy) in i915_perf_open_ioctl_locked()
3918 stream->ops->destroy(stream); in i915_perf_open_ioctl_locked()
3931 u32 den = i915_perf_oa_timestamp_frequency(perf->i915); in oa_exponent_to_ns()
3933 return div_u64(nom + den - 1, den); in oa_exponent_to_ns()
3939 return test_bit(format, perf->format_mask); in oa_format_valid()
3945 __set_bit(format, perf->format_mask); in oa_format_add()
3949 * read_properties_unlocked - validate + copy userspace stream open properties
3979 props->poll_oa_period = DEFAULT_POLL_PERIOD_NS; in read_properties_unlocked()
3988 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
3990 return -EINVAL; in read_properties_unlocked()
4010 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4012 return -EINVAL; in read_properties_unlocked()
4017 props->single_context = 1; in read_properties_unlocked()
4018 props->ctx_handle = value; in read_properties_unlocked()
4022 props->sample_flags |= SAMPLE_OA_REPORT; in read_properties_unlocked()
4026 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4028 return -EINVAL; in read_properties_unlocked()
4030 props->metrics_set = value; in read_properties_unlocked()
4034 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4035 "Out-of-range OA report format %llu\n", in read_properties_unlocked()
4037 return -EINVAL; in read_properties_unlocked()
4040 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4041 "Unsupported OA report format %llu\n", in read_properties_unlocked()
4043 return -EINVAL; in read_properties_unlocked()
4045 props->oa_format = value; in read_properties_unlocked()
4049 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4052 return -EINVAL; in read_properties_unlocked()
4078 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4081 return -EACCES; in read_properties_unlocked()
4084 props->oa_periodic = true; in read_properties_unlocked()
4085 props->oa_period_exponent = value; in read_properties_unlocked()
4088 props->hold_preemption = !!value; in read_properties_unlocked()
4091 if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 55)) { in read_properties_unlocked()
4092 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4094 GRAPHICS_VER_FULL(perf->i915)); in read_properties_unlocked()
4095 return -ENODEV; in read_properties_unlocked()
4101 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4103 return -EFAULT; in read_properties_unlocked()
4110 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4113 return -EINVAL; in read_properties_unlocked()
4115 props->poll_oa_period = value; in read_properties_unlocked()
4127 return -EINVAL; in read_properties_unlocked()
4135 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4136 "OA engine-class and engine-instance parameters must be passed together\n"); in read_properties_unlocked()
4137 return -EINVAL; in read_properties_unlocked()
4140 props->engine = intel_engine_lookup_user(perf->i915, class, instance); in read_properties_unlocked()
4141 if (!props->engine) { in read_properties_unlocked()
4142 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4145 return -EINVAL; in read_properties_unlocked()
4148 if (!engine_supports_oa(props->engine)) { in read_properties_unlocked()
4149 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4152 return -EINVAL; in read_properties_unlocked()
4157 * C6 disable in BIOS. Fail if Media C6 is enabled on steppings where OAM in read_properties_unlocked()
4160 if (IS_MEDIA_GT_IP_STEP(props->engine->gt, IP_VER(13, 0), STEP_A0, STEP_C0) && in read_properties_unlocked()
4161 props->engine->oa_group->type == TYPE_OAM && in read_properties_unlocked()
4162 intel_check_bios_c6_setup(&props->engine->gt->rc6)) { in read_properties_unlocked()
4163 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4165 return -EINVAL; in read_properties_unlocked()
4168 i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX); in read_properties_unlocked()
4169 f = &perf->oa_formats[i]; in read_properties_unlocked()
4170 if (!engine_supports_oa_format(props->engine, f->type)) { in read_properties_unlocked()
4171 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4173 f->type, props->engine->class); in read_properties_unlocked()
4174 return -EINVAL; in read_properties_unlocked()
4178 ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); in read_properties_unlocked()
4180 drm_dbg(&perf->i915->drm, in read_properties_unlocked()
4184 props->has_sseu = true; in read_properties_unlocked()
4191 * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD
4201 * i915-perf stream is expected to be a suitable interface for other forms of
4208 * i915_perf_open_ioctl_locked() after taking the &gt->perf.lock
4209 * mutex for serializing with any non-file-operation driver hooks.
4217 struct i915_perf *perf = &to_i915(dev)->perf; in i915_perf_open_ioctl()
4224 if (!perf->i915) in i915_perf_open_ioctl()
4225 return -ENOTSUPP; in i915_perf_open_ioctl()
4230 if (param->flags & ~known_open_flags) { in i915_perf_open_ioctl()
4231 drm_dbg(&perf->i915->drm, in i915_perf_open_ioctl()
4233 return -EINVAL; in i915_perf_open_ioctl()
4237 u64_to_user_ptr(param->properties_ptr), in i915_perf_open_ioctl()
4238 param->num_properties, in i915_perf_open_ioctl()
4243 gt = props.engine->gt; in i915_perf_open_ioctl()
4245 mutex_lock(&gt->perf.lock); in i915_perf_open_ioctl()
4247 mutex_unlock(&gt->perf.lock); in i915_perf_open_ioctl()
4253 * i915_perf_register - exposes i915-perf to userspace
4258 * used to open an i915-perf stream.
4262 struct i915_perf *perf = &i915->perf; in i915_perf_register()
4265 if (!perf->i915) in i915_perf_register()
4272 mutex_lock(&gt->perf.lock); in i915_perf_register()
4274 perf->metrics_kobj = in i915_perf_register()
4276 &i915->drm.primary->kdev->kobj); in i915_perf_register()
4278 mutex_unlock(&gt->perf.lock); in i915_perf_register()
4282 * i915_perf_unregister - hide i915-perf from userspace
4285 * i915-perf state cleanup is split up into an 'unregister' and
4292 struct i915_perf *perf = &i915->perf; in i915_perf_unregister()
4294 if (!perf->metrics_kobj) in i915_perf_unregister()
4297 kobject_put(perf->metrics_kobj); in i915_perf_unregister()
4298 perf->metrics_kobj = NULL; in i915_perf_unregister()
4323 while (table->start || table->end) { in reg_in_range_table()
4324 if (addr >= table->start && addr <= table->end) in reg_in_range_table()
4337 { .start = 0x2710, .end = 0x272c }, /* OASTARTTRIG[1-8] */
4338 { .start = 0x2740, .end = 0x275c }, /* OAREPORTTRIG[1-8] */
4339 { .start = 0x2770, .end = 0x27ac }, /* OACEC[0-7][0-1] */
4345 { .start = 0xd900, .end = 0xd91c }, /* GEN12_OAG_OASTARTTRIG[1-8] */
4346 { .start = 0xd920, .end = 0xd93c }, /* GEN12_OAG_OAREPORTTRIG1[1-8] */
4347 { .start = 0xd940, .end = 0xd97c }, /* GEN12_OAG_CEC[0-7][0-1] */
4348 { .start = 0xdc00, .end = 0xdc3c }, /* GEN12_OAG_SCEC[0-7][0-1] */
4355 { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */
4356 { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */
4357 { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */
4358 { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
4364 { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
4369 { .start = 0x91b8, .end = 0x91cc }, /* OA_PERFCNT[1-2], OA_PERFMATRIX */
4370 { .start = 0x9800, .end = 0x9888 }, /* MICRO_BP0_0 - NOA_WRITE */
4376 { .start = 0x09e80, .end = 0x09ea4 }, /* HSW_MBVID2_NOA[0-9] */
4388 { .start = 0x0d00, .end = 0x0d2c }, /* RPM_CONFIG[0-1], NOA_CONFIG[0-8] */
4394 { .start = 0x91c8, .end = 0x91dc }, /* OA_PERFCNT[3-4] */
4399 { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
4400 { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
4412 { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
4413 { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
4457 if (HAS_OAM(perf->i915) && in mtl_is_valid_oam_b_counter_addr()
4458 GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) in mtl_is_valid_oam_b_counter_addr()
4473 if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) in gen12_is_valid_mux_addr()
4513 return ERR_PTR(-EINVAL); in alloc_oa_regs()
4517 return ERR_PTR(-ENOMEM); in alloc_oa_regs()
4527 drm_dbg(&perf->i915->drm, in alloc_oa_regs()
4529 err = -EINVAL; in alloc_oa_regs()
4557 return sprintf(buf, "%d\n", oa_config->id); in show_dynamic_id()
4563 sysfs_attr_init(&oa_config->sysfs_metric_id.attr); in create_dynamic_oa_sysfs_entry()
4564 oa_config->sysfs_metric_id.attr.name = "id"; in create_dynamic_oa_sysfs_entry()
4565 oa_config->sysfs_metric_id.attr.mode = S_IRUGO; in create_dynamic_oa_sysfs_entry()
4566 oa_config->sysfs_metric_id.show = show_dynamic_id; in create_dynamic_oa_sysfs_entry()
4567 oa_config->sysfs_metric_id.store = NULL; in create_dynamic_oa_sysfs_entry()
4569 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; in create_dynamic_oa_sysfs_entry()
4570 oa_config->attrs[1] = NULL; in create_dynamic_oa_sysfs_entry()
4572 oa_config->sysfs_metric.name = oa_config->uuid; in create_dynamic_oa_sysfs_entry()
4573 oa_config->sysfs_metric.attrs = oa_config->attrs; in create_dynamic_oa_sysfs_entry()
4575 return sysfs_create_group(perf->metrics_kobj, in create_dynamic_oa_sysfs_entry()
4576 &oa_config->sysfs_metric); in create_dynamic_oa_sysfs_entry()
4580 * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config
4595 struct i915_perf *perf = &to_i915(dev)->perf; in i915_perf_add_config_ioctl()
4601 if (!perf->i915) in i915_perf_add_config_ioctl()
4602 return -ENOTSUPP; in i915_perf_add_config_ioctl()
4604 if (!perf->metrics_kobj) { in i915_perf_add_config_ioctl()
4605 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4607 return -EINVAL; in i915_perf_add_config_ioctl()
4611 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4613 return -EACCES; in i915_perf_add_config_ioctl()
4616 if ((!args->mux_regs_ptr || !args->n_mux_regs) && in i915_perf_add_config_ioctl()
4617 (!args->boolean_regs_ptr || !args->n_boolean_regs) && in i915_perf_add_config_ioctl()
4618 (!args->flex_regs_ptr || !args->n_flex_regs)) { in i915_perf_add_config_ioctl()
4619 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4621 return -EINVAL; in i915_perf_add_config_ioctl()
4626 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4628 return -ENOMEM; in i915_perf_add_config_ioctl()
4631 oa_config->perf = perf; in i915_perf_add_config_ioctl()
4632 kref_init(&oa_config->ref); in i915_perf_add_config_ioctl()
4634 if (!uuid_is_valid(args->uuid)) { in i915_perf_add_config_ioctl()
4635 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4637 err = -EINVAL; in i915_perf_add_config_ioctl()
4641 /* Last character in oa_config->uuid will be 0 because oa_config is in i915_perf_add_config_ioctl()
4644 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); in i915_perf_add_config_ioctl()
4646 oa_config->mux_regs_len = args->n_mux_regs; in i915_perf_add_config_ioctl()
4648 perf->ops.is_valid_mux_reg, in i915_perf_add_config_ioctl()
4649 u64_to_user_ptr(args->mux_regs_ptr), in i915_perf_add_config_ioctl()
4650 args->n_mux_regs); in i915_perf_add_config_ioctl()
4653 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4658 oa_config->mux_regs = regs; in i915_perf_add_config_ioctl()
4660 oa_config->b_counter_regs_len = args->n_boolean_regs; in i915_perf_add_config_ioctl()
4662 perf->ops.is_valid_b_counter_reg, in i915_perf_add_config_ioctl()
4663 u64_to_user_ptr(args->boolean_regs_ptr), in i915_perf_add_config_ioctl()
4664 args->n_boolean_regs); in i915_perf_add_config_ioctl()
4667 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4672 oa_config->b_counter_regs = regs; in i915_perf_add_config_ioctl()
4674 if (GRAPHICS_VER(perf->i915) < 8) { in i915_perf_add_config_ioctl()
4675 if (args->n_flex_regs != 0) { in i915_perf_add_config_ioctl()
4676 err = -EINVAL; in i915_perf_add_config_ioctl()
4680 oa_config->flex_regs_len = args->n_flex_regs; in i915_perf_add_config_ioctl()
4682 perf->ops.is_valid_flex_reg, in i915_perf_add_config_ioctl()
4683 u64_to_user_ptr(args->flex_regs_ptr), in i915_perf_add_config_ioctl()
4684 args->n_flex_regs); in i915_perf_add_config_ioctl()
4687 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4692 oa_config->flex_regs = regs; in i915_perf_add_config_ioctl()
4695 err = mutex_lock_interruptible(&perf->metrics_lock); in i915_perf_add_config_ioctl()
4702 idr_for_each_entry(&perf->metrics_idr, tmp, id) { in i915_perf_add_config_ioctl()
4703 if (!strcmp(tmp->uuid, oa_config->uuid)) { in i915_perf_add_config_ioctl()
4704 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4706 err = -EADDRINUSE; in i915_perf_add_config_ioctl()
4713 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4719 oa_config->id = idr_alloc(&perf->metrics_idr, in i915_perf_add_config_ioctl()
4722 if (oa_config->id < 0) { in i915_perf_add_config_ioctl()
4723 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4725 err = oa_config->id; in i915_perf_add_config_ioctl()
4728 id = oa_config->id; in i915_perf_add_config_ioctl()
4730 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4731 "Added config %s id=%i\n", oa_config->uuid, oa_config->id); in i915_perf_add_config_ioctl()
4732 mutex_unlock(&perf->metrics_lock); in i915_perf_add_config_ioctl()
4737 mutex_unlock(&perf->metrics_lock); in i915_perf_add_config_ioctl()
4740 drm_dbg(&perf->i915->drm, in i915_perf_add_config_ioctl()
4746 * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config
4759 struct i915_perf *perf = &to_i915(dev)->perf; in i915_perf_remove_config_ioctl()
4764 if (!perf->i915) in i915_perf_remove_config_ioctl()
4765 return -ENOTSUPP; in i915_perf_remove_config_ioctl()
4768 drm_dbg(&perf->i915->drm, in i915_perf_remove_config_ioctl()
4770 return -EACCES; in i915_perf_remove_config_ioctl()
4773 ret = mutex_lock_interruptible(&perf->metrics_lock); in i915_perf_remove_config_ioctl()
4777 oa_config = idr_find(&perf->metrics_idr, *arg); in i915_perf_remove_config_ioctl()
4779 drm_dbg(&perf->i915->drm, in i915_perf_remove_config_ioctl()
4781 ret = -ENOENT; in i915_perf_remove_config_ioctl()
4785 GEM_BUG_ON(*arg != oa_config->id); in i915_perf_remove_config_ioctl()
4787 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); in i915_perf_remove_config_ioctl()
4789 idr_remove(&perf->metrics_idr, *arg); in i915_perf_remove_config_ioctl()
4791 mutex_unlock(&perf->metrics_lock); in i915_perf_remove_config_ioctl()
4793 drm_dbg(&perf->i915->drm, in i915_perf_remove_config_ioctl()
4794 "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); in i915_perf_remove_config_ioctl()
4801 mutex_unlock(&perf->metrics_lock); in i915_perf_remove_config_ioctl()
4833 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) { in __oam_engine_group()
4838 drm_WARN_ON(&engine->i915->drm, in __oam_engine_group()
4839 engine->gt->type != GT_MEDIA); in __oam_engine_group()
4849 switch (engine->class) { in __oa_engine_group()
4894 int i, num_groups = gt->perf.num_perf_groups; in oa_init_groups()
4897 struct i915_perf_group *g = &gt->perf.group[i]; in oa_init_groups()
4900 if (g->num_engines == 0) in oa_init_groups()
4903 if (i == PERF_GROUP_OAG && gt->type != GT_MEDIA) { in oa_init_groups()
4904 g->regs = __oag_regs(); in oa_init_groups()
4905 g->type = TYPE_OAG; in oa_init_groups()
4906 } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) { in oa_init_groups()
4907 g->regs = __oam_regs(mtl_oa_base[i]); in oa_init_groups()
4908 g->type = TYPE_OAM; in oa_init_groups()
4922 return -ENOMEM; in oa_init_gt()
4927 engine->oa_group = NULL; in oa_init_gt()
4930 engine->oa_group = &g[index]; in oa_init_gt()
4934 gt->perf.num_perf_groups = num_groups; in oa_init_gt()
4935 gt->perf.group = g; in oa_init_gt()
4947 for_each_gt(gt, perf->i915, i) { in oa_init_engine_groups()
4958 struct drm_i915_private *i915 = perf->i915; in oa_init_supported_formats()
4959 enum intel_platform platform = INTEL_INFO(i915)->platform; in oa_init_supported_formats()
5014 struct i915_perf *perf = &i915->perf; in i915_perf_init_info()
5018 perf->ctx_oactxctrl_offset = 0x120; in i915_perf_init_info()
5019 perf->ctx_flexeu0_offset = 0x2ce; in i915_perf_init_info()
5020 perf->gen8_valid_ctx_bit = BIT(25); in i915_perf_init_info()
5023 perf->ctx_oactxctrl_offset = 0x128; in i915_perf_init_info()
5024 perf->ctx_flexeu0_offset = 0x3de; in i915_perf_init_info()
5025 perf->gen8_valid_ctx_bit = BIT(16); in i915_perf_init_info()
5028 perf->ctx_oactxctrl_offset = 0x124; in i915_perf_init_info()
5029 perf->ctx_flexeu0_offset = 0x78e; in i915_perf_init_info()
5030 perf->gen8_valid_ctx_bit = BIT(16); in i915_perf_init_info()
5033 perf->gen8_valid_ctx_bit = BIT(16); in i915_perf_init_info()
5036 * cache the value in perf->ctx_oactxctrl_offset. in i915_perf_init_info()
5045 * i915_perf_init - initialize i915-perf state on module bind
5048 * Initializes i915-perf state without exposing anything to userspace.
5050 * Note: i915-perf initialization is split into an 'init' and 'register'
5055 struct i915_perf *perf = &i915->perf; in i915_perf_init()
5057 perf->oa_formats = oa_formats; in i915_perf_init()
5059 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; in i915_perf_init()
5060 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr; in i915_perf_init()
5061 perf->ops.is_valid_flex_reg = NULL; in i915_perf_init()
5062 perf->ops.enable_metric_set = hsw_enable_metric_set; in i915_perf_init()
5063 perf->ops.disable_metric_set = hsw_disable_metric_set; in i915_perf_init()
5064 perf->ops.oa_enable = gen7_oa_enable; in i915_perf_init()
5065 perf->ops.oa_disable = gen7_oa_disable; in i915_perf_init()
5066 perf->ops.read = gen7_oa_read; in i915_perf_init()
5067 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read; in i915_perf_init()
5075 perf->ops.read = gen8_oa_read; in i915_perf_init()
5079 perf->ops.is_valid_b_counter_reg = in i915_perf_init()
5081 perf->ops.is_valid_mux_reg = in i915_perf_init()
5083 perf->ops.is_valid_flex_reg = in i915_perf_init()
5087 perf->ops.is_valid_mux_reg = in i915_perf_init()
5091 perf->ops.oa_enable = gen8_oa_enable; in i915_perf_init()
5092 perf->ops.oa_disable = gen8_oa_disable; in i915_perf_init()
5093 perf->ops.enable_metric_set = gen8_enable_metric_set; in i915_perf_init()
5094 perf->ops.disable_metric_set = gen8_disable_metric_set; in i915_perf_init()
5095 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; in i915_perf_init()
5097 perf->ops.is_valid_b_counter_reg = in i915_perf_init()
5099 perf->ops.is_valid_mux_reg = in i915_perf_init()
5101 perf->ops.is_valid_flex_reg = in i915_perf_init()
5104 perf->ops.oa_enable = gen8_oa_enable; in i915_perf_init()
5105 perf->ops.oa_disable = gen8_oa_disable; in i915_perf_init()
5106 perf->ops.enable_metric_set = gen8_enable_metric_set; in i915_perf_init()
5107 perf->ops.disable_metric_set = gen11_disable_metric_set; in i915_perf_init()
5108 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; in i915_perf_init()
5110 perf->ops.is_valid_b_counter_reg = in i915_perf_init()
5114 perf->ops.is_valid_mux_reg = in i915_perf_init()
5116 perf->ops.is_valid_flex_reg = in i915_perf_init()
5119 perf->ops.oa_enable = gen12_oa_enable; in i915_perf_init()
5120 perf->ops.oa_disable = gen12_oa_disable; in i915_perf_init()
5121 perf->ops.enable_metric_set = gen12_enable_metric_set; in i915_perf_init()
5122 perf->ops.disable_metric_set = gen12_disable_metric_set; in i915_perf_init()
5123 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; in i915_perf_init()
5127 if (perf->ops.enable_metric_set) { in i915_perf_init()
5132 mutex_init(&gt->perf.lock); in i915_perf_init()
5135 oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2; in i915_perf_init()
5137 mutex_init(&perf->metrics_lock); in i915_perf_init()
5138 idr_init_base(&perf->metrics_idr, 1); in i915_perf_init()
5150 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10); in i915_perf_init()
5155 ratelimit_set_flags(&perf->spurious_report_rs, in i915_perf_init()
5158 ratelimit_state_init(&perf->tail_pointer_race, in i915_perf_init()
5160 ratelimit_set_flags(&perf->tail_pointer_race, in i915_perf_init()
5163 atomic64_set(&perf->noa_programming_delay, in i915_perf_init()
5166 perf->i915 = i915; in i915_perf_init()
5170 drm_err(&i915->drm, in i915_perf_init()
5199 * i915_perf_fini - Counter part to i915_perf_init()
5204 struct i915_perf *perf = &i915->perf; in i915_perf_fini()
5208 if (!perf->i915) in i915_perf_fini()
5211 for_each_gt(gt, perf->i915, i) in i915_perf_fini()
5212 kfree(gt->perf.group); in i915_perf_fini()
5214 idr_for_each(&perf->metrics_idr, destroy_config, perf); in i915_perf_fini()
5215 idr_destroy(&perf->metrics_idr); in i915_perf_fini()
5217 memset(&perf->ops, 0, sizeof(perf->ops)); in i915_perf_fini()
5218 perf->i915 = NULL; in i915_perf_fini()
5222 * i915_perf_ioctl_version - Version of the i915-perf subsystem
5257 * C6 disable in BIOS. If Media C6 is enabled in BIOS, return version 6 in i915_perf_ioctl_version()
5260 if (IS_MEDIA_GT_IP_STEP(i915->media_gt, IP_VER(13, 0), STEP_A0, STEP_C0) && in i915_perf_ioctl_version()
5261 intel_check_bios_c6_setup(&i915->media_gt->rc6)) in i915_perf_ioctl_version()