Lines Matching +full:fine +full:- +full:granular

2  * Performance events - AMD IBS
6 * For licencing details see kernel-base/COPYING
29 #include <asm/amd-ibs.h>
103 s64 left = local64_read(&hwc->period_left); in perf_event_set_period()
104 s64 period = hwc->sample_period; in perf_event_set_period()
110 if (unlikely(left <= -period)) { in perf_event_set_period()
112 local64_set(&hwc->period_left, left); in perf_event_set_period()
113 hwc->last_period = period; in perf_event_set_period()
119 local64_set(&hwc->period_left, left); in perf_event_set_period()
120 hwc->last_period = period; in perf_event_set_period()
127 * Thus we shorten the next-to-last period and set the last in perf_event_set_period()
131 left -= max; in perf_event_set_period()
146 struct hw_perf_event *hwc = &event->hw; in perf_event_try_update()
147 int shift = 64 - width; in perf_event_try_update()
155 * exchange a new raw count - then add that new-prev delta in perf_event_try_update()
158 prev_raw_count = local64_read(&hwc->prev_count); in perf_event_try_update()
159 if (!local64_try_cmpxchg(&hwc->prev_count, in perf_event_try_update()
166 * (event-)time and add that to the generic event. in perf_event_try_update()
168 * Careful, not all hw sign-extends above the physical width in perf_event_try_update()
171 delta = (new_raw_count << shift) - (prev_raw_count << shift); in perf_event_try_update()
174 local64_add(delta, &event->count); in perf_event_try_update()
175 local64_sub(delta, &hwc->period_left); in perf_event_try_update()
193 * core pmu config -> IBS config
195 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
196 * perf record -a -e r076:p ... # same as -e cpu-cycles:p
197 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
200 * MSRC001_1033) is used to select either cycle or micro-ops counting
205 switch (event->attr.type) { in core_pmu_ibs_config()
207 switch (event->attr.config) { in core_pmu_ibs_config()
214 switch (event->attr.config) { in core_pmu_ibs_config()
224 return -ENOENT; in core_pmu_ibs_config()
227 return -EOPNOTSUPP; in core_pmu_ibs_config()
240 if (!event->attr.precise_ip || event->attr.precise_ip > 2) in forward_event_to_ibs()
241 return -EOPNOTSUPP; in forward_event_to_ibs()
244 event->attr.type = perf_ibs_op.pmu.type; in forward_event_to_ibs()
245 event->attr.config = config; in forward_event_to_ibs()
247 return -ENOENT; in forward_event_to_ibs()
258 if (event->group_leader == event) in validate_group()
261 if (event->group_leader->pmu == event->pmu) in validate_group()
262 return -EINVAL; in validate_group()
264 for_each_sibling_event(sibling, event->group_leader) { in validate_group()
265 if (sibling->pmu == event->pmu) in validate_group()
266 return -EINVAL; in validate_group()
273 struct hw_perf_event *hwc = &event->hw; in perf_ibs_init()
278 perf_ibs = get_ibs_pmu(event->attr.type); in perf_ibs_init()
280 return -ENOENT; in perf_ibs_init()
282 config = event->attr.config; in perf_ibs_init()
284 if (event->pmu != &perf_ibs->pmu) in perf_ibs_init()
285 return -ENOENT; in perf_ibs_init()
287 if (config & ~perf_ibs->config_mask) in perf_ibs_init()
288 return -EINVAL; in perf_ibs_init()
291 return -EOPNOTSUPP; in perf_ibs_init()
297 if (hwc->sample_period) { in perf_ibs_init()
298 if (config & perf_ibs->cnt_mask) in perf_ibs_init()
300 return -EINVAL; in perf_ibs_init()
301 if (!event->attr.sample_freq && hwc->sample_period & 0x0f) in perf_ibs_init()
307 return -EINVAL; in perf_ibs_init()
308 hwc->sample_period &= ~0x0FULL; in perf_ibs_init()
309 if (!hwc->sample_period) in perf_ibs_init()
310 hwc->sample_period = 0x10; in perf_ibs_init()
312 max_cnt = config & perf_ibs->cnt_mask; in perf_ibs_init()
313 config &= ~perf_ibs->cnt_mask; in perf_ibs_init()
314 event->attr.sample_period = max_cnt << 4; in perf_ibs_init()
315 hwc->sample_period = event->attr.sample_period; in perf_ibs_init()
318 if (!hwc->sample_period) in perf_ibs_init()
319 return -EINVAL; in perf_ibs_init()
322 * If we modify hwc->sample_period, we also need to update in perf_ibs_init()
323 * hwc->last_period and hwc->period_left. in perf_ibs_init()
325 hwc->last_period = hwc->sample_period; in perf_ibs_init()
326 local64_set(&hwc->period_left, hwc->sample_period); in perf_ibs_init()
328 hwc->config_base = perf_ibs->msr; in perf_ibs_init()
329 hwc->config = config; in perf_ibs_init()
340 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); in perf_ibs_set_period()
341 local64_set(&hwc->prev_count, 0); in perf_ibs_set_period()
359 * If the internal 27-bit counter rolled over, the count is MaxCnt in get_ibs_op_count()
361 * Otherwise CurCnt has the full 27-bit current counter value. in get_ibs_op_count()
378 u64 count = perf_ibs->get_count(*config); in perf_ibs_event_update()
386 rdmsrl(event->hw.config_base, *config); in perf_ibs_event_update()
387 count = perf_ibs->get_count(*config); in perf_ibs_event_update()
394 u64 tmp = hwc->config | config; in perf_ibs_enable_event()
396 if (perf_ibs->fetch_count_reset_broken) in perf_ibs_enable_event()
397 wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); in perf_ibs_enable_event()
399 wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); in perf_ibs_enable_event()
403 * Erratum #420 Instruction-Based Sampling Engine May Generate
412 config &= ~perf_ibs->cnt_mask; in perf_ibs_disable_event()
414 wrmsrl(hwc->config_base, config); in perf_ibs_disable_event()
415 config &= ~perf_ibs->enable_mask; in perf_ibs_disable_event()
416 wrmsrl(hwc->config_base, config); in perf_ibs_disable_event()
427 struct hw_perf_event *hwc = &event->hw; in perf_ibs_start()
428 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); in perf_ibs_start()
429 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_start()
432 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) in perf_ibs_start()
435 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); in perf_ibs_start()
436 hwc->state = 0; in perf_ibs_start()
449 set_bit(IBS_STARTED, pcpu->state); in perf_ibs_start()
450 clear_bit(IBS_STOPPING, pcpu->state); in perf_ibs_start()
458 struct hw_perf_event *hwc = &event->hw; in perf_ibs_stop()
459 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); in perf_ibs_stop()
460 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_stop()
464 if (test_and_set_bit(IBS_STOPPING, pcpu->state)) in perf_ibs_stop()
467 stopping = test_bit(IBS_STARTED, pcpu->state); in perf_ibs_stop()
469 if (!stopping && (hwc->state & PERF_HES_UPTODATE)) in perf_ibs_stop()
472 rdmsrl(hwc->config_base, config); in perf_ibs_stop()
481 set_bit(IBS_STOPPED, pcpu->state); in perf_ibs_stop()
492 clear_bit(IBS_STARTED, pcpu->state); in perf_ibs_stop()
493 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); in perf_ibs_stop()
494 hwc->state |= PERF_HES_STOPPED; in perf_ibs_stop()
497 if (hwc->state & PERF_HES_UPTODATE) in perf_ibs_stop()
504 config &= ~perf_ibs->valid_mask; in perf_ibs_stop()
507 hwc->state |= PERF_HES_UPTODATE; in perf_ibs_stop()
512 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); in perf_ibs_add()
513 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_add()
515 if (test_and_set_bit(IBS_ENABLED, pcpu->state)) in perf_ibs_add()
516 return -ENOSPC; in perf_ibs_add()
518 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; in perf_ibs_add()
520 pcpu->event = event; in perf_ibs_add()
530 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); in perf_ibs_del()
531 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_del()
533 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) in perf_ibs_del()
538 pcpu->event = NULL; in perf_ibs_del()
578 return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; in zen4_ibs_extensions_is_visible()
628 return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; in cnt_ctl_is_visible()
712 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_mem_op()
714 data_src->mem_op = PERF_MEM_OP_NA; in perf_ibs_get_mem_op()
716 if (op_data3->ld_op) in perf_ibs_get_mem_op()
717 data_src->mem_op = PERF_MEM_OP_LOAD; in perf_ibs_get_mem_op()
718 else if (op_data3->st_op) in perf_ibs_get_mem_op()
719 data_src->mem_op = PERF_MEM_OP_STORE; in perf_ibs_get_mem_op()
724 * more fine granular DataSrc encodings. Others have coarse.
729 return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; in perf_ibs_data_src()
731 return op_data2->data_src_lo; in perf_ibs_data_src()
768 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_mem_lvl()
771 data_src->mem_lvl = 0; in perf_ibs_get_mem_lvl()
772 data_src->mem_lvl_num = 0; in perf_ibs_get_mem_lvl()
778 if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) in perf_ibs_get_mem_lvl()
782 if (op_data3->dc_miss == 0) in perf_ibs_get_mem_lvl()
786 if (op_data3->l2_miss == 0) { in perf_ibs_get_mem_lvl()
789 !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) in perf_ibs_get_mem_lvl()
797 if (data_src->mem_op != PERF_MEM_OP_LOAD) in perf_ibs_get_mem_lvl()
807 if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) { in perf_ibs_get_mem_lvl()
822 if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) { in perf_ibs_get_mem_lvl()
840 if (op_data3->dc_miss_no_mab_alloc) in perf_ibs_get_mem_lvl()
849 /* 0: Uninitialized, 1: Valid, -1: Invalid */ in perf_ibs_cache_hit_st_valid()
857 cache_hit_st_valid = -1; in perf_ibs_cache_hit_st_valid()
869 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_mem_snoop()
872 data_src->mem_snoop = PERF_MEM_SNOOP_NA; in perf_ibs_get_mem_snoop()
875 data_src->mem_op != PERF_MEM_OP_LOAD || in perf_ibs_get_mem_snoop()
876 data_src->mem_lvl & PERF_MEM_LVL_L1 || in perf_ibs_get_mem_snoop()
877 data_src->mem_lvl & PERF_MEM_LVL_L2 || in perf_ibs_get_mem_snoop()
878 op_data2->cache_hit_st) in perf_ibs_get_mem_snoop()
887 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; in perf_ibs_get_mem_snoop()
889 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; in perf_ibs_get_mem_snoop()
896 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_tlb_lvl()
898 data_src->mem_dtlb = PERF_MEM_TLB_NA; in perf_ibs_get_tlb_lvl()
900 if (!op_data3->dc_lin_addr_valid) in perf_ibs_get_tlb_lvl()
903 if (!op_data3->dc_l1tlb_miss) { in perf_ibs_get_tlb_lvl()
904 data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; in perf_ibs_get_tlb_lvl()
908 if (!op_data3->dc_l2tlb_miss) { in perf_ibs_get_tlb_lvl()
909 data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; in perf_ibs_get_tlb_lvl()
913 data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; in perf_ibs_get_tlb_lvl()
919 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_mem_lock()
921 data_src->mem_lock = PERF_MEM_LOCK_NA; in perf_ibs_get_mem_lock()
923 if (op_data3->dc_locked_op) in perf_ibs_get_mem_lock()
924 data_src->mem_lock = PERF_MEM_LOCK_LOCKED; in perf_ibs_get_mem_lock()
927 #define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL)
934 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_data_src()
936 data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data); in perf_ibs_get_data_src()
945 __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; in perf_ibs_get_op_data2()
949 (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { in perf_ibs_get_op_data2()
968 data->data_src.val = PERF_MEM_NA; in perf_ibs_parse_ld_st_data()
969 op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; in perf_ibs_parse_ld_st_data()
972 if (data->data_src.mem_op != PERF_MEM_OP_LOAD && in perf_ibs_parse_ld_st_data()
973 data->data_src.mem_op != PERF_MEM_OP_STORE) in perf_ibs_parse_ld_st_data()
980 data->sample_flags |= PERF_SAMPLE_DATA_SRC; in perf_ibs_parse_ld_st_data()
984 data->data_src.mem_op == PERF_MEM_OP_LOAD) { in perf_ibs_parse_ld_st_data()
985 op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; in perf_ibs_parse_ld_st_data()
988 data->weight.var1_dw = op_data3.dc_miss_lat; in perf_ibs_parse_ld_st_data()
989 data->weight.var2_w = op_data.tag_to_ret_ctr; in perf_ibs_parse_ld_st_data()
991 data->weight.full = op_data3.dc_miss_lat; in perf_ibs_parse_ld_st_data()
993 data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; in perf_ibs_parse_ld_st_data()
997 data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; in perf_ibs_parse_ld_st_data()
998 data->sample_flags |= PERF_SAMPLE_ADDR; in perf_ibs_parse_ld_st_data()
1002 data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; in perf_ibs_parse_ld_st_data()
1003 data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; in perf_ibs_parse_ld_st_data()
1016 return perf_ibs->offset_max; in perf_ibs_get_offset_max()
1024 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_handle_irq()
1025 struct perf_event *event = pcpu->event; in perf_ibs_handle_irq()
1035 if (!test_bit(IBS_STARTED, pcpu->state)) { in perf_ibs_handle_irq()
1043 if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) in perf_ibs_handle_irq()
1052 hwc = &event->hw; in perf_ibs_handle_irq()
1053 msr = hwc->config_base; in perf_ibs_handle_irq()
1056 if (!(*buf++ & perf_ibs->valid_mask)) in perf_ibs_handle_irq()
1061 perf_sample_data_init(&data, 0, hwc->last_period); in perf_ibs_handle_irq()
1070 offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); in perf_ibs_handle_irq()
1075 offset = find_next_bit(perf_ibs->offset_mask, in perf_ibs_handle_irq()
1076 perf_ibs->offset_max, in perf_ibs_handle_irq()
1084 if (event->attr.sample_type & PERF_SAMPLE_RAW) { in perf_ibs_handle_irq()
1107 if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) in perf_ibs_handle_irq()
1114 if (event->attr.sample_type & PERF_SAMPLE_RAW) { in perf_ibs_handle_irq()
1125 perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); in perf_ibs_handle_irq()
1132 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) in perf_ibs_handle_irq()
1170 perf_sample_event_took(sched_clock() - stamp); in perf_ibs_nmi_handler()
1183 return -ENOMEM; in perf_ibs_pmu_init()
1185 perf_ibs->pcpu = pcpu; in perf_ibs_pmu_init()
1187 ret = perf_pmu_register(&perf_ibs->pmu, name, -1); in perf_ibs_pmu_init()
1189 perf_ibs->pcpu = NULL; in perf_ibs_pmu_init()
1200 * they need a 0-1 transition of IbsFetchEn. in perf_ibs_fetch_init()
1277 /* IBS - apic initialization, for perf and oprofile */
1372 return -EINVAL; in setup_ibs_ctl()
1378 return -ENODEV; in setup_ibs_ctl()
1388 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
1445 return -EINVAL; in get_ibs_lvt_offset()
1522 return -ENODEV; /* ibs not supported by the cpu */ in amd_ibs_init()
1527 return -EINVAL; in amd_ibs_init()