Lines Matching +full:cpu +full:- +full:bpmp +full:- +full:rx
1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2020 - 2022, NVIDIA CORPORATION. All rights reserved
6 #include <linux/cpu.h>
9 #include <linux/dma-mapping.h>
19 #include <soc/tegra/bpmp.h>
20 #include <soc/tegra/bpmp-abi.h>
30 #define CORE_OFFSET(cpu) (cpu * 8) argument
32 #define SCRATCH_FREQ_CORE_REG(data, cpu) (data->regs + CMU_CLKS_BASE + CORE_OFFSET(cpu)) argument
36 (data->regs + (MMCRAB_CLUSTER_BASE(cl) + data->soc->actmon_cntr_base))
37 #define CORE_ACTMON_CNTR_REG(data, cl, cpu) (CLUSTER_ACTMON_BASE(data, cl) + CORE_OFFSET(cpu)) argument
43 u32 cpu; member
56 void (*get_cpu_cluster_id)(u32 cpu, u32 *cpuid, u32 *clusterid);
57 int (*get_cpu_ndiv)(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv);
83 dev = get_cpu_device(policy->cpu); in tegra_cpufreq_set_bw()
85 return -ENODEV; in tegra_cpufreq_set_bw()
93 data->icc_dram_bw_scaling = false; in tegra_cpufreq_set_bw()
104 static void tegra234_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) in tegra234_get_cpu_cluster_id() argument
108 smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); in tegra234_get_cpu_cluster_id()
116 static int tegra234_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) in tegra234_get_cpu_ndiv() argument
123 mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid; in tegra234_get_cpu_ndiv()
135 u32 cpu, cpuid, clusterid; in tegra234_set_cpu_ndiv() local
138 for_each_cpu_and(cpu, policy->cpus, cpu_online_mask) { in tegra234_set_cpu_ndiv()
139 data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid); in tegra234_set_cpu_ndiv()
142 mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid; in tegra234_set_cpu_ndiv()
151 * 64-bit read. The counter values are used to determine the average
163 data->soc->ops->get_cpu_cluster_id(c->cpu, &cpuid, &clusterid); in tegra234_read_counters()
167 c->last_refclk_cnt = upper_32_bits(val); in tegra234_read_counters()
168 c->last_coreclk_cnt = lower_32_bits(val); in tegra234_read_counters()
171 c->refclk_cnt = upper_32_bits(val); in tegra234_read_counters()
172 c->coreclk_cnt = lower_32_bits(val); in tegra234_read_counters()
196 static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) in tegra194_get_cpu_cluster_id() argument
200 smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); in tegra194_get_cpu_cluster_id()
209 * Read per-core Read-only system register NVFREQ_FEEDBACK_EL1.
229 return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv); in map_ndiv_to_freq()
237 c->last_refclk_cnt = lower_32_bits(val); in tegra194_read_counters()
238 c->last_coreclk_cnt = upper_32_bits(val); in tegra194_read_counters()
241 c->refclk_cnt = lower_32_bits(val); in tegra194_read_counters()
242 c->coreclk_cnt = upper_32_bits(val); in tegra194_read_counters()
265 c = &read_counters_work->c; in tegra_read_counters()
267 data->soc->ops->read_counters(c); in tegra_read_counters()
271 * Return instantaneous cpu speed
272 * Instantaneous freq is calculated as -
273 * -Takes sample on every query of getting the freq.
274 * - Read core and ref clock counters;
275 * - Delay for X us
276 * - Read above cycle counters again
277 * - Calculates freq by subtracting current and previous counters
279 * - Return Kcycles/second, freq in KHz
288 * @cpu - logical cpu whose freq to be updated
289 * Returns freq in KHz on success, 0 if cpu is offline
291 static unsigned int tegra194_calculate_speed(u32 cpu) in tegra194_calculate_speed() argument
300 * udelay() is required to reconstruct cpu frequency over an in tegra194_calculate_speed()
304 read_counters_work.c.cpu = cpu; in tegra194_calculate_speed()
306 queue_work_on(cpu, read_counters_wq, &read_counters_work.work); in tegra194_calculate_speed()
311 delta_ccnt = c.coreclk_cnt + (MAX_CNT - c.last_coreclk_cnt); in tegra194_calculate_speed()
313 delta_ccnt = c.coreclk_cnt - c.last_coreclk_cnt; in tegra194_calculate_speed()
319 delta_refcnt = c.refclk_cnt + (MAX_CNT - c.last_refclk_cnt); in tegra194_calculate_speed()
321 delta_refcnt = c.refclk_cnt - c.last_refclk_cnt; in tegra194_calculate_speed()
323 pr_debug("cpufreq: %d is idle, delta_refcnt: 0\n", cpu); in tegra194_calculate_speed()
340 static int tegra194_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) in tegra194_get_cpu_ndiv() argument
342 return smp_call_function_single(cpu, tegra194_get_cpu_ndiv_sysreg, &ndiv, true); in tegra194_get_cpu_ndiv()
354 on_each_cpu_mask(policy->cpus, tegra194_set_cpu_ndiv_sysreg, &ndiv, true); in tegra194_set_cpu_ndiv()
357 static unsigned int tegra194_get_speed(u32 cpu) in tegra194_get_speed() argument
366 data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid); in tegra194_get_speed()
368 /* reconstruct actual cpu freq using counters */ in tegra194_get_speed()
369 rate = tegra194_calculate_speed(cpu); in tegra194_get_speed()
372 ret = data->soc->ops->get_cpu_ndiv(cpu, cpuid, clusterid, &ndiv); in tegra194_get_speed()
382 cpufreq_for_each_valid_entry(pos, data->bpmp_luts[clusterid]) { in tegra194_get_speed()
383 if (pos->driver_data != ndiv) in tegra194_get_speed()
386 if (abs(pos->frequency - rate) > 115200) { in tegra194_get_speed()
387 pr_warn("cpufreq: cpu%d,cur:%u,set:%u,set ndiv:%llu\n", in tegra194_get_speed()
388 cpu, rate, pos->frequency, ndiv); in tegra194_get_speed()
390 rate = pos->frequency; in tegra194_get_speed()
410 cpu_dev = get_cpu_device(policy->cpu); in tegra_cpufreq_init_cpufreq_table()
412 pr_err("%s: failed to get cpu%d device\n", __func__, policy->cpu); in tegra_cpufreq_init_cpufreq_table()
413 return -ENODEV; in tegra_cpufreq_init_cpufreq_table()
416 /* Initialize OPP table mentioned in operating-points-v2 property in DT */ in tegra_cpufreq_init_cpufreq_table()
425 /* Disable all opps and cross-validate against LUT later */ in tegra_cpufreq_init_cpufreq_table()
436 data->icc_dram_bw_scaling = false; in tegra_cpufreq_init_cpufreq_table()
442 return -ENOMEM; in tegra_cpufreq_init_cpufreq_table()
445 * Cross check the frequencies from BPMP-FW LUT against the OPP's present in DT. in tegra_cpufreq_init_cpufreq_table()
449 opp = dev_pm_opp_find_freq_exact(cpu_dev, pos->frequency * KHZ, false); in tegra_cpufreq_init_cpufreq_table()
455 ret = dev_pm_opp_enable(cpu_dev, pos->frequency * KHZ); in tegra_cpufreq_init_cpufreq_table()
459 freq_table[j].driver_data = pos->driver_data; in tegra_cpufreq_init_cpufreq_table()
460 freq_table[j].frequency = pos->frequency; in tegra_cpufreq_init_cpufreq_table()
464 freq_table[j].driver_data = pos->driver_data; in tegra_cpufreq_init_cpufreq_table()
469 dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); in tegra_cpufreq_init_cpufreq_table()
477 int maxcpus_per_cluster = data->soc->maxcpus_per_cluster; in tegra194_cpufreq_init()
480 u32 start_cpu, cpu; in tegra194_cpufreq_init() local
484 data->soc->ops->get_cpu_cluster_id(policy->cpu, NULL, &clusterid); in tegra194_cpufreq_init()
485 if (clusterid >= data->soc->num_clusters || !data->bpmp_luts[clusterid]) in tegra194_cpufreq_init()
486 return -EINVAL; in tegra194_cpufreq_init()
488 start_cpu = rounddown(policy->cpu, maxcpus_per_cluster); in tegra194_cpufreq_init()
490 for (cpu = start_cpu; cpu < (start_cpu + maxcpus_per_cluster); cpu++) { in tegra194_cpufreq_init()
491 if (cpu_possible(cpu)) in tegra194_cpufreq_init()
492 cpumask_set_cpu(cpu, policy->cpus); in tegra194_cpufreq_init()
494 policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY; in tegra194_cpufreq_init()
496 bpmp_lut = data->bpmp_luts[clusterid]; in tegra194_cpufreq_init()
498 if (data->icc_dram_bw_scaling) { in tegra194_cpufreq_init()
501 policy->freq_table = freq_table; in tegra194_cpufreq_init()
506 data->icc_dram_bw_scaling = false; in tegra194_cpufreq_init()
507 policy->freq_table = bpmp_lut; in tegra194_cpufreq_init()
515 /* We did light-weight tear down earlier, nothing to do here */ in tegra194_cpufreq_online()
522 * Preserve policy->driver_data and don't free resources on light-weight in tegra194_cpufreq_offline()
531 struct device *cpu_dev = get_cpu_device(policy->cpu); in tegra194_cpufreq_exit()
534 dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); in tegra194_cpufreq_exit()
542 struct cpufreq_frequency_table *tbl = policy->freq_table + index; in tegra194_cpufreq_set_target()
550 data->soc->ops->set_cpu_ndiv(policy, (u64)tbl->driver_data); in tegra194_cpufreq_set_target()
552 if (data->icc_dram_bw_scaling) in tegra194_cpufreq_set_target()
553 tegra_cpufreq_set_bw(policy, tbl->frequency); in tegra194_cpufreq_set_target()
591 tegra_cpufreq_bpmp_read_lut(struct platform_device *pdev, struct tegra_bpmp *bpmp, in tegra_cpufreq_bpmp_read_lut() argument
609 msg.rx.data = &resp; in tegra_cpufreq_bpmp_read_lut()
610 msg.rx.size = sizeof(resp); in tegra_cpufreq_bpmp_read_lut()
612 err = tegra_bpmp_transfer(bpmp, &msg); in tegra_cpufreq_bpmp_read_lut()
615 if (msg.rx.ret == -BPMP_EINVAL) { in tegra_cpufreq_bpmp_read_lut()
619 if (msg.rx.ret) in tegra_cpufreq_bpmp_read_lut()
620 return ERR_PTR(-EINVAL); in tegra_cpufreq_bpmp_read_lut()
629 dev_dbg(&pdev->dev, "cluster %d: frequency table step size: %d\n", in tegra_cpufreq_bpmp_read_lut()
632 delta_ndiv = resp.ndiv_max - resp.ndiv_min; in tegra_cpufreq_bpmp_read_lut()
643 freq_table = devm_kcalloc(&pdev->dev, num_freqs + 1, in tegra_cpufreq_bpmp_read_lut()
646 return ERR_PTR(-ENOMEM); in tegra_cpufreq_bpmp_read_lut()
666 struct tegra_bpmp *bpmp; in tegra194_cpufreq_probe() local
670 data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); in tegra194_cpufreq_probe()
672 return -ENOMEM; in tegra194_cpufreq_probe()
674 soc = of_device_get_match_data(&pdev->dev); in tegra194_cpufreq_probe()
676 if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters) { in tegra194_cpufreq_probe()
677 data->soc = soc; in tegra194_cpufreq_probe()
679 dev_err(&pdev->dev, "soc data missing\n"); in tegra194_cpufreq_probe()
680 return -EINVAL; in tegra194_cpufreq_probe()
683 data->bpmp_luts = devm_kcalloc(&pdev->dev, data->soc->num_clusters, in tegra194_cpufreq_probe()
684 sizeof(*data->bpmp_luts), GFP_KERNEL); in tegra194_cpufreq_probe()
685 if (!data->bpmp_luts) in tegra194_cpufreq_probe()
686 return -ENOMEM; in tegra194_cpufreq_probe()
688 if (soc->actmon_cntr_base) { in tegra194_cpufreq_probe()
689 /* mmio registers are used for frequency request and re-construction */ in tegra194_cpufreq_probe()
690 data->regs = devm_platform_ioremap_resource(pdev, 0); in tegra194_cpufreq_probe()
691 if (IS_ERR(data->regs)) in tegra194_cpufreq_probe()
692 return PTR_ERR(data->regs); in tegra194_cpufreq_probe()
697 bpmp = tegra_bpmp_get(&pdev->dev); in tegra194_cpufreq_probe()
698 if (IS_ERR(bpmp)) in tegra194_cpufreq_probe()
699 return PTR_ERR(bpmp); in tegra194_cpufreq_probe()
703 dev_err(&pdev->dev, "fail to create_workqueue\n"); in tegra194_cpufreq_probe()
704 err = -EINVAL; in tegra194_cpufreq_probe()
708 for (i = 0; i < data->soc->num_clusters; i++) { in tegra194_cpufreq_probe()
709 data->bpmp_luts[i] = tegra_cpufreq_bpmp_read_lut(pdev, bpmp, i); in tegra194_cpufreq_probe()
710 if (IS_ERR(data->bpmp_luts[i])) { in tegra194_cpufreq_probe()
711 err = PTR_ERR(data->bpmp_luts[i]); in tegra194_cpufreq_probe()
721 err = -EPROBE_DEFER; in tegra194_cpufreq_probe()
728 data->icc_dram_bw_scaling = true; in tegra194_cpufreq_probe()
738 tegra_bpmp_put(bpmp); in tegra194_cpufreq_probe()
749 { .compatible = "nvidia,tegra194-ccplex", .data = &tegra194_cpufreq_soc },
750 { .compatible = "nvidia,tegra234-ccplex-cluster", .data = &tegra234_cpufreq_soc },
751 { .compatible = "nvidia,tegra239-ccplex-cluster", .data = &tegra239_cpufreq_soc },
758 .name = "tegra194-cpufreq",