Lines Matching +full:cpu +full:- +full:bpmp +full:- +full:tx
1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2020 - 2022, NVIDIA CORPORATION. All rights reserved
6 #include <linux/cpu.h>
8 #include <linux/dma-mapping.h>
18 #include <soc/tegra/bpmp.h>
19 #include <soc/tegra/bpmp-abi.h>
30 #define CORE_OFFSET(cpu) (cpu * 8) argument
32 #define SCRATCH_FREQ_CORE_REG(data, cpu) (data->regs + CMU_CLKS_BASE + CORE_OFFSET(cpu)) argument
36 (data->regs + (MMCRAB_CLUSTER_BASE(cl) + data->soc->actmon_cntr_base))
37 #define CORE_ACTMON_CNTR_REG(data, cl, cpu) (CLUSTER_ACTMON_BASE(data, cl) + CORE_OFFSET(cpu)) argument
49 u32 cpu; member
62 void (*get_cpu_cluster_id)(u32 cpu, u32 *cpuid, u32 *clusterid);
63 int (*get_cpu_ndiv)(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv);
91 dev = get_cpu_device(policy->cpu); in tegra_cpufreq_set_bw()
93 return -ENODEV; in tegra_cpufreq_set_bw()
101 data->icc_dram_bw_scaling = false; in tegra_cpufreq_set_bw()
112 static void tegra234_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) in tegra234_get_cpu_cluster_id() argument
116 smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); in tegra234_get_cpu_cluster_id()
124 static int tegra234_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) in tegra234_get_cpu_ndiv() argument
128 *ndiv = readl(data->cpu_data[cpu].freq_core_reg) & NDIV_MASK; in tegra234_get_cpu_ndiv()
136 u32 cpu; in tegra234_set_cpu_ndiv() local
138 for_each_cpu(cpu, policy->cpus) in tegra234_set_cpu_ndiv()
139 writel(ndiv, data->cpu_data[cpu].freq_core_reg); in tegra234_set_cpu_ndiv()
144 * 64-bit read. The counter values are used to determine the average
157 actmon_reg = CORE_ACTMON_CNTR_REG(data, data->cpu_data[c->cpu].clusterid, in tegra234_read_counters()
158 data->cpu_data[c->cpu].cpuid); in tegra234_read_counters()
161 c->last_refclk_cnt = upper_32_bits(val); in tegra234_read_counters()
162 c->last_coreclk_cnt = lower_32_bits(val); in tegra234_read_counters()
166 * clock cycles which is known to give a stable value of CPU frequency. in tegra234_read_counters()
170 c->refclk_cnt = upper_32_bits(val); in tegra234_read_counters()
171 c->coreclk_cnt = lower_32_bits(val); in tegra234_read_counters()
172 if (c->refclk_cnt < c->last_refclk_cnt) in tegra234_read_counters()
173 delta_refcnt = c->refclk_cnt + (MAX_CNT - c->last_refclk_cnt); in tegra234_read_counters()
175 delta_refcnt = c->refclk_cnt - c->last_refclk_cnt; in tegra234_read_counters()
177 pr_warn("cpufreq: problem with refclk on cpu:%d, delta_refcnt:%u, cnt:%d\n", in tegra234_read_counters()
178 c->cpu, delta_refcnt, cnt); in tegra234_read_counters()
181 } while (delta_refcnt < data->soc->refclk_delta_min); in tegra234_read_counters()
207 static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) in tegra194_get_cpu_cluster_id() argument
211 smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); in tegra194_get_cpu_cluster_id()
220 * Read per-core Read-only system register NVFREQ_FEEDBACK_EL1.
240 return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv); in map_ndiv_to_freq()
251 c->last_refclk_cnt = lower_32_bits(val); in tegra194_read_counters()
252 c->last_coreclk_cnt = upper_32_bits(val); in tegra194_read_counters()
256 * clock cycles which is known to give a stable value of CPU frequency. in tegra194_read_counters()
260 c->refclk_cnt = lower_32_bits(val); in tegra194_read_counters()
261 c->coreclk_cnt = upper_32_bits(val); in tegra194_read_counters()
262 if (c->refclk_cnt < c->last_refclk_cnt) in tegra194_read_counters()
263 delta_refcnt = c->refclk_cnt + (MAX_CNT - c->last_refclk_cnt); in tegra194_read_counters()
265 delta_refcnt = c->refclk_cnt - c->last_refclk_cnt; in tegra194_read_counters()
267 pr_warn("cpufreq: problem with refclk on cpu:%d, delta_refcnt:%u, cnt:%d\n", in tegra194_read_counters()
268 c->cpu, delta_refcnt, cnt); in tegra194_read_counters()
271 } while (delta_refcnt < data->soc->refclk_delta_min); in tegra194_read_counters()
294 c = &read_counters_work->c; in tegra_read_counters()
296 data->soc->ops->read_counters(c); in tegra_read_counters()
300 * Return instantaneous cpu speed
301 * Instantaneous freq is calculated as -
302 * -Takes sample on every query of getting the freq.
303 * - Read core and ref clock counters;
304 * - Delay for X us
305 * - Read above cycle counters again
306 * - Calculates freq by subtracting current and previous counters
308 * - Return Kcycles/second, freq in KHz
317 * @cpu - logical cpu whose freq to be updated
318 * Returns freq in KHz on success, 0 if cpu is offline
320 static unsigned int tegra194_calculate_speed(u32 cpu) in tegra194_calculate_speed() argument
329 * Reconstruct cpu frequency over an observation/sampling window. in tegra194_calculate_speed()
332 read_counters_work.c.cpu = cpu; in tegra194_calculate_speed()
334 queue_work_on(cpu, read_counters_wq, &read_counters_work.work); in tegra194_calculate_speed()
339 delta_ccnt = c.coreclk_cnt + (MAX_CNT - c.last_coreclk_cnt); in tegra194_calculate_speed()
341 delta_ccnt = c.coreclk_cnt - c.last_coreclk_cnt; in tegra194_calculate_speed()
347 delta_refcnt = c.refclk_cnt + (MAX_CNT - c.last_refclk_cnt); in tegra194_calculate_speed()
349 delta_refcnt = c.refclk_cnt - c.last_refclk_cnt; in tegra194_calculate_speed()
351 pr_debug("cpufreq: %d is idle, delta_refcnt: 0\n", cpu); in tegra194_calculate_speed()
368 static int tegra194_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) in tegra194_get_cpu_ndiv() argument
370 return smp_call_function_single(cpu, tegra194_get_cpu_ndiv_sysreg, &ndiv, true); in tegra194_get_cpu_ndiv()
382 on_each_cpu_mask(policy->cpus, tegra194_set_cpu_ndiv_sysreg, &ndiv, true); in tegra194_set_cpu_ndiv()
385 static unsigned int tegra194_get_speed(u32 cpu) in tegra194_get_speed() argument
388 u32 clusterid = data->cpu_data[cpu].clusterid; in tegra194_get_speed()
394 /* reconstruct actual cpu freq using counters */ in tegra194_get_speed()
395 rate = tegra194_calculate_speed(cpu); in tegra194_get_speed()
398 ret = data->soc->ops->get_cpu_ndiv(cpu, data->cpu_data[cpu].cpuid, clusterid, &ndiv); in tegra194_get_speed()
408 cpufreq_for_each_valid_entry(pos, data->bpmp_luts[clusterid]) { in tegra194_get_speed()
409 if (pos->driver_data != ndiv) in tegra194_get_speed()
412 if (abs(pos->frequency - rate) > MAX_DELTA_KHZ) { in tegra194_get_speed()
413 pr_warn("cpufreq: cpu%d,cur:%u,set:%u,delta:%d,set ndiv:%llu\n", in tegra194_get_speed()
414 cpu, rate, pos->frequency, abs(rate - pos->frequency), ndiv); in tegra194_get_speed()
416 rate = pos->frequency; in tegra194_get_speed()
436 cpu_dev = get_cpu_device(policy->cpu); in tegra_cpufreq_init_cpufreq_table()
438 pr_err("%s: failed to get cpu%d device\n", __func__, policy->cpu); in tegra_cpufreq_init_cpufreq_table()
439 return -ENODEV; in tegra_cpufreq_init_cpufreq_table()
442 /* Initialize OPP table mentioned in operating-points-v2 property in DT */ in tegra_cpufreq_init_cpufreq_table()
451 /* Disable all opps and cross-validate against LUT later */ in tegra_cpufreq_init_cpufreq_table()
462 data->icc_dram_bw_scaling = false; in tegra_cpufreq_init_cpufreq_table()
468 return -ENOMEM; in tegra_cpufreq_init_cpufreq_table()
471 * Cross check the frequencies from BPMP-FW LUT against the OPP's present in DT. in tegra_cpufreq_init_cpufreq_table()
475 opp = dev_pm_opp_find_freq_exact(cpu_dev, pos->frequency * KHZ, false); in tegra_cpufreq_init_cpufreq_table()
481 ret = dev_pm_opp_enable(cpu_dev, pos->frequency * KHZ); in tegra_cpufreq_init_cpufreq_table()
485 freq_table[j].driver_data = pos->driver_data; in tegra_cpufreq_init_cpufreq_table()
486 freq_table[j].frequency = pos->frequency; in tegra_cpufreq_init_cpufreq_table()
490 freq_table[j].driver_data = pos->driver_data; in tegra_cpufreq_init_cpufreq_table()
495 dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); in tegra_cpufreq_init_cpufreq_table()
503 int maxcpus_per_cluster = data->soc->maxcpus_per_cluster; in tegra194_cpufreq_init()
504 u32 clusterid = data->cpu_data[policy->cpu].clusterid; in tegra194_cpufreq_init()
507 u32 start_cpu, cpu; in tegra194_cpufreq_init() local
510 if (clusterid >= data->soc->num_clusters || !data->bpmp_luts[clusterid]) in tegra194_cpufreq_init()
511 return -EINVAL; in tegra194_cpufreq_init()
513 start_cpu = rounddown(policy->cpu, maxcpus_per_cluster); in tegra194_cpufreq_init()
515 for (cpu = start_cpu; cpu < (start_cpu + maxcpus_per_cluster); cpu++) { in tegra194_cpufreq_init()
516 if (cpu_possible(cpu)) in tegra194_cpufreq_init()
517 cpumask_set_cpu(cpu, policy->cpus); in tegra194_cpufreq_init()
519 policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY; in tegra194_cpufreq_init()
521 bpmp_lut = data->bpmp_luts[clusterid]; in tegra194_cpufreq_init()
523 if (data->icc_dram_bw_scaling) { in tegra194_cpufreq_init()
526 policy->freq_table = freq_table; in tegra194_cpufreq_init()
531 data->icc_dram_bw_scaling = false; in tegra194_cpufreq_init()
532 policy->freq_table = bpmp_lut; in tegra194_cpufreq_init()
540 /* We did light-weight tear down earlier, nothing to do here */ in tegra194_cpufreq_online()
547 * Preserve policy->driver_data and don't free resources on light-weight in tegra194_cpufreq_offline()
556 struct device *cpu_dev = get_cpu_device(policy->cpu); in tegra194_cpufreq_exit()
559 dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); in tegra194_cpufreq_exit()
565 struct cpufreq_frequency_table *tbl = policy->freq_table + index; in tegra194_cpufreq_set_target()
573 data->soc->ops->set_cpu_ndiv(policy, (u64)tbl->driver_data); in tegra194_cpufreq_set_target()
575 if (data->icc_dram_bw_scaling) in tegra194_cpufreq_set_target()
576 tegra_cpufreq_set_bw(policy, tbl->frequency); in tegra194_cpufreq_set_target()
615 tegra_cpufreq_bpmp_read_lut(struct platform_device *pdev, struct tegra_bpmp *bpmp, in tegra_cpufreq_bpmp_read_lut() argument
631 msg.tx.data = &req; in tegra_cpufreq_bpmp_read_lut()
632 msg.tx.size = sizeof(req); in tegra_cpufreq_bpmp_read_lut()
636 err = tegra_bpmp_transfer(bpmp, &msg); in tegra_cpufreq_bpmp_read_lut()
639 if (msg.rx.ret == -BPMP_EINVAL) { in tegra_cpufreq_bpmp_read_lut()
644 return ERR_PTR(-EINVAL); in tegra_cpufreq_bpmp_read_lut()
653 dev_dbg(&pdev->dev, "cluster %d: frequency table step size: %d\n", in tegra_cpufreq_bpmp_read_lut()
656 delta_ndiv = resp.ndiv_max - resp.ndiv_min; in tegra_cpufreq_bpmp_read_lut()
667 freq_table = devm_kcalloc(&pdev->dev, num_freqs + 1, in tegra_cpufreq_bpmp_read_lut()
670 return ERR_PTR(-ENOMEM); in tegra_cpufreq_bpmp_read_lut()
686 static int tegra194_cpufreq_store_physids(unsigned int cpu, struct tegra194_cpufreq_data *data) in tegra194_cpufreq_store_physids() argument
688 int num_cpus = data->soc->maxcpus_per_cluster * data->soc->num_clusters; in tegra194_cpufreq_store_physids()
692 if (cpu > (num_cpus - 1)) { in tegra194_cpufreq_store_physids()
694 return -EINVAL; in tegra194_cpufreq_store_physids()
697 data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid); in tegra194_cpufreq_store_physids()
699 mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid; in tegra194_cpufreq_store_physids()
701 data->cpu_data[cpu].cpuid = cpuid; in tegra194_cpufreq_store_physids()
702 data->cpu_data[cpu].clusterid = clusterid; in tegra194_cpufreq_store_physids()
703 data->cpu_data[cpu].freq_core_reg = SCRATCH_FREQ_CORE_REG(data, mpidr_id); in tegra194_cpufreq_store_physids()
712 struct tegra_bpmp *bpmp; in tegra194_cpufreq_probe() local
715 u32 cpu; in tegra194_cpufreq_probe() local
717 data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); in tegra194_cpufreq_probe()
719 return -ENOMEM; in tegra194_cpufreq_probe()
721 soc = of_device_get_match_data(&pdev->dev); in tegra194_cpufreq_probe()
723 if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters && soc->refclk_delta_min) { in tegra194_cpufreq_probe()
724 data->soc = soc; in tegra194_cpufreq_probe()
726 dev_err(&pdev->dev, "soc data missing\n"); in tegra194_cpufreq_probe()
727 return -EINVAL; in tegra194_cpufreq_probe()
730 data->bpmp_luts = devm_kcalloc(&pdev->dev, data->soc->num_clusters, in tegra194_cpufreq_probe()
731 sizeof(*data->bpmp_luts), GFP_KERNEL); in tegra194_cpufreq_probe()
732 if (!data->bpmp_luts) in tegra194_cpufreq_probe()
733 return -ENOMEM; in tegra194_cpufreq_probe()
735 if (soc->actmon_cntr_base) { in tegra194_cpufreq_probe()
736 /* mmio registers are used for frequency request and re-construction */ in tegra194_cpufreq_probe()
737 data->regs = devm_platform_ioremap_resource(pdev, 0); in tegra194_cpufreq_probe()
738 if (IS_ERR(data->regs)) in tegra194_cpufreq_probe()
739 return PTR_ERR(data->regs); in tegra194_cpufreq_probe()
742 data->cpu_data = devm_kcalloc(&pdev->dev, data->soc->num_clusters * in tegra194_cpufreq_probe()
743 data->soc->maxcpus_per_cluster, in tegra194_cpufreq_probe()
744 sizeof(*data->cpu_data), GFP_KERNEL); in tegra194_cpufreq_probe()
745 if (!data->cpu_data) in tegra194_cpufreq_probe()
746 return -ENOMEM; in tegra194_cpufreq_probe()
750 bpmp = tegra_bpmp_get(&pdev->dev); in tegra194_cpufreq_probe()
751 if (IS_ERR(bpmp)) in tegra194_cpufreq_probe()
752 return PTR_ERR(bpmp); in tegra194_cpufreq_probe()
756 dev_err(&pdev->dev, "fail to create_workqueue\n"); in tegra194_cpufreq_probe()
757 err = -EINVAL; in tegra194_cpufreq_probe()
761 for (i = 0; i < data->soc->num_clusters; i++) { in tegra194_cpufreq_probe()
762 data->bpmp_luts[i] = tegra_cpufreq_bpmp_read_lut(pdev, bpmp, i); in tegra194_cpufreq_probe()
763 if (IS_ERR(data->bpmp_luts[i])) { in tegra194_cpufreq_probe()
764 err = PTR_ERR(data->bpmp_luts[i]); in tegra194_cpufreq_probe()
769 for_each_possible_cpu(cpu) { in tegra194_cpufreq_probe()
770 err = tegra194_cpufreq_store_physids(cpu, data); in tegra194_cpufreq_probe()
780 err = -EPROBE_DEFER; in tegra194_cpufreq_probe()
787 data->icc_dram_bw_scaling = true; in tegra194_cpufreq_probe()
797 tegra_bpmp_put(bpmp); in tegra194_cpufreq_probe()
808 { .compatible = "nvidia,tegra194-ccplex", .data = &tegra194_cpufreq_soc },
809 { .compatible = "nvidia,tegra234-ccplex-cluster", .data = &tegra234_cpufreq_soc },
810 { .compatible = "nvidia,tegra239-ccplex-cluster", .data = &tegra239_cpufreq_soc },
817 .name = "tegra194-cpufreq",