More power management updates for 6.14-rc1
- Add missing error handling for syscore_suspend() to the hibernation core code (Wentao Liang). - Revert a commit that added unused macros (Andy Shevchenko). - Synchronize the runtime PM status of devices that were runtime- suspended before a system-wide suspend and need to be resumed during the subsequent system-wide resume transition (Rafael Wysocki). - Clean up the teo cpuidle governor and make the handling of short idle intervals in it consistent regardless of the properties of idle states supplied by the cpuidle driver (Rafael Wysocki). - Fix some boost-related issues in cpufreq (Lifeng Zheng). - Fix build issues in the s3c64xx and airoha cpufreq drivers (Viresh Kumar). - Remove unconditional binding of schedutil governor kthreads to the affected CPUs if the cpufreq driver indicates that updates can happen from any CPU (Christian Loehle). -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAmeb5xYSHHJqd0Byand5 c29ja2kubmV0AAoJEILEb/54YlRxcFsP/2FIoEI2G6J7pk8zChWT225qkkaieh5P tHIkcFINlgzyjLnqmyWELUdt+sB7re6/dMmoLor+abudHimvBvUfAj6Oiz1F3p2F utE9TpfhOkXi1ci5zBl9h6+iDj2Z5op3Qe/qw/W3DTlcManAD+6r60A2tOEy0jhi GTbp2SEEU28+LU/2J59IfxEuRTTH4pbQGXi+iKv/k9bmtLvQofa1saXyQCBSZrvO z3MBdqnAxLeZCg/qILmEGsBvbv1wpugvp3yoMLVwGNyul12Augcs8PreQz7e5tFq spEuCfpBJwyJLAGlOnjOYgsPbJBXWRkIBeLH7JealfZr9TX0y4LZSAHi/xe0Asd3 BBZLLDojxhYMLzmqSkuafHlQd5J7jKl++RS1A9Qm6aqglKjeSOC9Ca9fmrc9Ub9P Jpf1SVJ3kJsv1Z7wuUcaj6oLxD8wlAgCo5pigNgWTP2HllhP2bmc22M8JWxpAz+m nMeW8nr8bAu4XViZeb74YKGgUDngO/uKRwBpthSkE2fqM7q0Wr5E7G0u9M91mzG6 nd/XDwta5TeznMQpSy339NgT61i4HHfyc/SDIdpkBxI0C5l6jknNawq79i9gy7/E In4MyooOlls/iX/JR0uxx2hXEByltF0IHqwsRYeJ6dmIajYgASXR1Hhh5Iy9fPJ/ JTJ7vR5oZPB/ =EgsM -----END PGP SIGNATURE----- Merge tag 'pm-6.14-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm Pull more power management updates from Rafael Wysocki: "These are mostly fixes on top of the previously merged power management material with the addition of some teo cpuidle governor updates, some of which may also be regarded as fixes: - Add missing error handling for syscore_suspend() to the hibernation core code (Wentao Liang) - Revert a commit that added unused macros (Andy Shevchenko) - Synchronize the runtime PM status of devices that were runtime- suspended before a system-wide suspend and need to be resumed during the subsequent system-wide resume transition (Rafael Wysocki) - Clean up the teo cpuidle governor and make the handling of short idle intervals in it consistent regardless of the properties of idle states supplied by the cpuidle driver (Rafael Wysocki) - Fix some boost-related issues in cpufreq (Lifeng Zheng) - Fix build issues in the s3c64xx and airoha cpufreq drivers (Viresh Kumar) - Remove unconditional binding of schedutil governor kthreads to the affected CPUs if the cpufreq driver indicates that updates can happen from any CPU (Christian Loehle)" * tag 'pm-6.14-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: PM: sleep: core: Synchronize runtime PM status of parents and children cpufreq: airoha: Depends on OF PM: Revert "Add EXPORT macros for exporting PM functions" PM: hibernate: Add error handling for syscore_suspend() cpufreq/schedutil: Only bind threads if needed cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init() cpufreq: CPPC: Fix wrong max_freq in policy initialization cpufreq: Introduce a more generic way to set default per-policy boost flag cpufreq: Fix re-boost issue after hotplugging a CPU cpufreq: s3c64xx: Fix compilation warning cpuidle: teo: Skip sleep length computation for low latency constraints cpuidle: teo: Replace time_span_ns with a flag cpuidle: teo: Simplify handling of total events count cpuidle: teo: Skip getting the sleep length if wakeups are very frequent cpuidle: teo: Simplify counting events used for tick management cpuidle: teo: Clarify two code comments cpuidle: teo: Drop local variable prev_intercept_idx cpuidle: teo: Combine candidate state index checks against 0 cpuidle: teo: Reorder candidate state index checks cpuidle: teo: Rearrange idle state lookup code
This commit is contained in:
commit
f55b0671e3
11 changed files with 162 additions and 132 deletions
|
@ -656,13 +656,15 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy
|
||||||
* so change its status accordingly.
|
* so change its status accordingly.
|
||||||
*
|
*
|
||||||
* Otherwise, the device is going to be resumed, so set its PM-runtime
|
* Otherwise, the device is going to be resumed, so set its PM-runtime
|
||||||
* status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set
|
* status to "active" unless its power.set_active flag is clear, in
|
||||||
* to avoid confusing drivers that don't use it.
|
* which case it is not necessary to update its PM-runtime status.
|
||||||
*/
|
*/
|
||||||
if (skip_resume)
|
if (skip_resume) {
|
||||||
pm_runtime_set_suspended(dev);
|
pm_runtime_set_suspended(dev);
|
||||||
else if (dev_pm_skip_suspend(dev))
|
} else if (dev->power.set_active) {
|
||||||
pm_runtime_set_active(dev);
|
pm_runtime_set_active(dev);
|
||||||
|
dev->power.set_active = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (dev->pm_domain) {
|
if (dev->pm_domain) {
|
||||||
info = "noirq power domain ";
|
info = "noirq power domain ";
|
||||||
|
@ -1189,18 +1191,24 @@ static pm_message_t resume_event(pm_message_t sleep_state)
|
||||||
return PMSG_ON;
|
return PMSG_ON;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dpm_superior_set_must_resume(struct device *dev)
|
static void dpm_superior_set_must_resume(struct device *dev, bool set_active)
|
||||||
{
|
{
|
||||||
struct device_link *link;
|
struct device_link *link;
|
||||||
int idx;
|
int idx;
|
||||||
|
|
||||||
if (dev->parent)
|
if (dev->parent) {
|
||||||
dev->parent->power.must_resume = true;
|
dev->parent->power.must_resume = true;
|
||||||
|
if (set_active)
|
||||||
|
dev->parent->power.set_active = true;
|
||||||
|
}
|
||||||
|
|
||||||
idx = device_links_read_lock();
|
idx = device_links_read_lock();
|
||||||
|
|
||||||
list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
|
list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
|
||||||
link->supplier->power.must_resume = true;
|
link->supplier->power.must_resume = true;
|
||||||
|
if (set_active)
|
||||||
|
link->supplier->power.set_active = true;
|
||||||
|
}
|
||||||
|
|
||||||
device_links_read_unlock(idx);
|
device_links_read_unlock(idx);
|
||||||
}
|
}
|
||||||
|
@ -1278,8 +1286,11 @@ Skip:
|
||||||
dev->power.may_skip_resume))
|
dev->power.may_skip_resume))
|
||||||
dev->power.must_resume = true;
|
dev->power.must_resume = true;
|
||||||
|
|
||||||
if (dev->power.must_resume)
|
if (dev->power.must_resume) {
|
||||||
dpm_superior_set_must_resume(dev);
|
dev->power.set_active = dev->power.set_active ||
|
||||||
|
dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
|
||||||
|
dpm_superior_set_must_resume(dev, dev->power.set_active);
|
||||||
|
}
|
||||||
|
|
||||||
Complete:
|
Complete:
|
||||||
complete_all(&dev->power.completion);
|
complete_all(&dev->power.completion);
|
||||||
|
|
|
@ -17,7 +17,7 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
|
||||||
|
|
||||||
config ARM_AIROHA_SOC_CPUFREQ
|
config ARM_AIROHA_SOC_CPUFREQ
|
||||||
tristate "Airoha EN7581 SoC CPUFreq support"
|
tristate "Airoha EN7581 SoC CPUFreq support"
|
||||||
depends on ARCH_AIROHA || COMPILE_TEST
|
depends on (ARCH_AIROHA && OF) || COMPILE_TEST
|
||||||
select PM_OPP
|
select PM_OPP
|
||||||
default ARCH_AIROHA
|
default ARCH_AIROHA
|
||||||
help
|
help
|
||||||
|
|
|
@ -909,11 +909,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
|
||||||
if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
|
if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
|
||||||
pr_warn(FW_WARN "P-state 0 is not max freq\n");
|
pr_warn(FW_WARN "P-state 0 is not max freq\n");
|
||||||
|
|
||||||
if (acpi_cpufreq_driver.set_boost) {
|
|
||||||
set_boost(policy, acpi_cpufreq_driver.boost_enabled);
|
|
||||||
policy->boost_enabled = acpi_cpufreq_driver.boost_enabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
err_unreg:
|
err_unreg:
|
||||||
|
|
|
@ -611,7 +611,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
|
||||||
* Section 8.4.7.1.1.5 of ACPI 6.1 spec)
|
* Section 8.4.7.1.1.5 of ACPI 6.1 spec)
|
||||||
*/
|
*/
|
||||||
policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf);
|
policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf);
|
||||||
policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
|
policy->max = cppc_perf_to_khz(caps, policy->boost_enabled ?
|
||||||
|
caps->highest_perf : caps->nominal_perf);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set cpuinfo.min_freq to Lowest to make the full range of performance
|
* Set cpuinfo.min_freq to Lowest to make the full range of performance
|
||||||
|
@ -619,7 +620,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
|
||||||
* nonlinear perf
|
* nonlinear perf
|
||||||
*/
|
*/
|
||||||
policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf);
|
policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf);
|
||||||
policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf);
|
policy->cpuinfo.max_freq = policy->max;
|
||||||
|
|
||||||
policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
|
policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
|
||||||
policy->shared_type = cpu_data->shared_type;
|
policy->shared_type = cpu_data->shared_type;
|
||||||
|
|
|
@ -1410,10 +1410,6 @@ static int cpufreq_online(unsigned int cpu)
|
||||||
goto out_free_policy;
|
goto out_free_policy;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
|
|
||||||
if (cpufreq_boost_enabled() && policy_has_boost_freq(policy))
|
|
||||||
policy->boost_enabled = true;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The initialization has succeeded and the policy is online.
|
* The initialization has succeeded and the policy is online.
|
||||||
* If there is a problem with its frequency table, take it
|
* If there is a problem with its frequency table, take it
|
||||||
|
@ -1476,6 +1472,10 @@ static int cpufreq_online(unsigned int cpu)
|
||||||
|
|
||||||
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
|
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
|
||||||
CPUFREQ_CREATE_POLICY, policy);
|
CPUFREQ_CREATE_POLICY, policy);
|
||||||
|
} else {
|
||||||
|
ret = freq_qos_update_request(policy->max_freq_req, policy->max);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_destroy_policy;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cpufreq_driver->get && has_target()) {
|
if (cpufreq_driver->get && has_target()) {
|
||||||
|
@ -1570,6 +1570,18 @@ static int cpufreq_online(unsigned int cpu)
|
||||||
if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
|
if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
|
||||||
policy->cdev = of_cpufreq_cooling_register(policy);
|
policy->cdev = of_cpufreq_cooling_register(policy);
|
||||||
|
|
||||||
|
/* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
|
||||||
|
if (policy->boost_enabled != cpufreq_boost_enabled()) {
|
||||||
|
policy->boost_enabled = cpufreq_boost_enabled();
|
||||||
|
ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
|
||||||
|
if (ret) {
|
||||||
|
/* If the set_boost fails, the online operation is not affected */
|
||||||
|
pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu,
|
||||||
|
policy->boost_enabled ? "enable" : "disable");
|
||||||
|
policy->boost_enabled = !policy->boost_enabled;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pr_debug("initialization complete\n");
|
pr_debug("initialization complete\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -24,6 +24,7 @@ struct s3c64xx_dvfs {
|
||||||
unsigned int vddarm_max;
|
unsigned int vddarm_max;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_REGULATOR
|
||||||
static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = {
|
static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = {
|
||||||
[0] = { 1000000, 1150000 },
|
[0] = { 1000000, 1150000 },
|
||||||
[1] = { 1050000, 1150000 },
|
[1] = { 1050000, 1150000 },
|
||||||
|
@ -31,6 +32,7 @@ static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = {
|
||||||
[3] = { 1200000, 1350000 },
|
[3] = { 1200000, 1350000 },
|
||||||
[4] = { 1300000, 1350000 },
|
[4] = { 1300000, 1350000 },
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
|
static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
|
||||||
{ 0, 0, 66000 },
|
{ 0, 0, 66000 },
|
||||||
|
@ -51,15 +53,16 @@ static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
|
||||||
static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy,
|
static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy,
|
||||||
unsigned int index)
|
unsigned int index)
|
||||||
{
|
{
|
||||||
struct s3c64xx_dvfs *dvfs;
|
unsigned int new_freq = s3c64xx_freq_table[index].frequency;
|
||||||
unsigned int old_freq, new_freq;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
#ifdef CONFIG_REGULATOR
|
||||||
|
struct s3c64xx_dvfs *dvfs;
|
||||||
|
unsigned int old_freq;
|
||||||
|
|
||||||
old_freq = clk_get_rate(policy->clk) / 1000;
|
old_freq = clk_get_rate(policy->clk) / 1000;
|
||||||
new_freq = s3c64xx_freq_table[index].frequency;
|
|
||||||
dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data];
|
dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data];
|
||||||
|
|
||||||
#ifdef CONFIG_REGULATOR
|
|
||||||
if (vddarm && new_freq > old_freq) {
|
if (vddarm && new_freq > old_freq) {
|
||||||
ret = regulator_set_voltage(vddarm,
|
ret = regulator_set_voltage(vddarm,
|
||||||
dvfs->vddarm_min,
|
dvfs->vddarm_min,
|
||||||
|
|
|
@ -41,11 +41,7 @@
|
||||||
* idle state 2, the third bin spans from the target residency of idle state 2
|
* idle state 2, the third bin spans from the target residency of idle state 2
|
||||||
* up to, but not including, the target residency of idle state 3 and so on.
|
* up to, but not including, the target residency of idle state 3 and so on.
|
||||||
* The last bin spans from the target residency of the deepest idle state
|
* The last bin spans from the target residency of the deepest idle state
|
||||||
* supplied by the driver to the scheduler tick period length or to infinity if
|
* supplied by the driver to infinity.
|
||||||
* the tick period length is less than the target residency of that state. In
|
|
||||||
* the latter case, the governor also counts events with the measured idle
|
|
||||||
* duration between the tick period length and the target residency of the
|
|
||||||
* deepest idle state.
|
|
||||||
*
|
*
|
||||||
* Two metrics called "hits" and "intercepts" are associated with each bin.
|
* Two metrics called "hits" and "intercepts" are associated with each bin.
|
||||||
* They are updated every time before selecting an idle state for the given CPU
|
* They are updated every time before selecting an idle state for the given CPU
|
||||||
|
@ -60,6 +56,10 @@
|
||||||
* into by the sleep length (these events are also referred to as "intercepts"
|
* into by the sleep length (these events are also referred to as "intercepts"
|
||||||
* below).
|
* below).
|
||||||
*
|
*
|
||||||
|
* The governor also counts "intercepts" with the measured idle duration below
|
||||||
|
* the tick period length and uses this information when deciding whether or not
|
||||||
|
* to stop the scheduler tick.
|
||||||
|
*
|
||||||
* In order to select an idle state for a CPU, the governor takes the following
|
* In order to select an idle state for a CPU, the governor takes the following
|
||||||
* steps (modulo the possible latency constraint that must be taken into account
|
* steps (modulo the possible latency constraint that must be taken into account
|
||||||
* too):
|
* too):
|
||||||
|
@ -105,6 +105,12 @@
|
||||||
|
|
||||||
#include "gov.h"
|
#include "gov.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Idle state exit latency threshold used for deciding whether or not to check
|
||||||
|
* the time till the closest expected timer event.
|
||||||
|
*/
|
||||||
|
#define LATENCY_THRESHOLD_NS (RESIDENCY_THRESHOLD_NS / 2)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
|
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
|
||||||
* is used for decreasing metrics on a regular basis.
|
* is used for decreasing metrics on a regular basis.
|
||||||
|
@ -124,18 +130,20 @@ struct teo_bin {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct teo_cpu - CPU data used by the TEO cpuidle governor.
|
* struct teo_cpu - CPU data used by the TEO cpuidle governor.
|
||||||
* @time_span_ns: Time between idle state selection and post-wakeup update.
|
|
||||||
* @sleep_length_ns: Time till the closest timer event (at the selection time).
|
* @sleep_length_ns: Time till the closest timer event (at the selection time).
|
||||||
* @state_bins: Idle state data bins for this CPU.
|
* @state_bins: Idle state data bins for this CPU.
|
||||||
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
|
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
|
||||||
* @tick_hits: Number of "hits" after TICK_NSEC.
|
* @tick_intercepts: "Intercepts" before TICK_NSEC.
|
||||||
|
* @short_idles: Wakeups after short idle periods.
|
||||||
|
* @artificial_wakeup: Set if the wakeup has been triggered by a safety net.
|
||||||
*/
|
*/
|
||||||
struct teo_cpu {
|
struct teo_cpu {
|
||||||
s64 time_span_ns;
|
|
||||||
s64 sleep_length_ns;
|
s64 sleep_length_ns;
|
||||||
struct teo_bin state_bins[CPUIDLE_STATE_MAX];
|
struct teo_bin state_bins[CPUIDLE_STATE_MAX];
|
||||||
unsigned int total;
|
unsigned int total;
|
||||||
unsigned int tick_hits;
|
unsigned int tick_intercepts;
|
||||||
|
unsigned int short_idles;
|
||||||
|
bool artificial_wakeup;
|
||||||
};
|
};
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
|
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
|
||||||
|
@ -152,23 +160,17 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||||
s64 target_residency_ns;
|
s64 target_residency_ns;
|
||||||
u64 measured_ns;
|
u64 measured_ns;
|
||||||
|
|
||||||
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
|
cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
|
||||||
|
|
||||||
|
if (cpu_data->artificial_wakeup) {
|
||||||
/*
|
/*
|
||||||
* One of the safety nets has triggered or the wakeup was close
|
* If one of the safety nets has triggered, assume that this
|
||||||
* enough to the closest timer event expected at the idle state
|
* might have been a long sleep.
|
||||||
* selection time to be discarded.
|
|
||||||
*/
|
*/
|
||||||
measured_ns = U64_MAX;
|
measured_ns = U64_MAX;
|
||||||
} else {
|
} else {
|
||||||
u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
|
u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
|
||||||
|
|
||||||
/*
|
|
||||||
* The computations below are to determine whether or not the
|
|
||||||
* (saved) time till the next timer event and the measured idle
|
|
||||||
* duration fall into the same "bin", so use last_residency_ns
|
|
||||||
* for that instead of time_span_ns which includes the cpuidle
|
|
||||||
* overhead.
|
|
||||||
*/
|
|
||||||
measured_ns = dev->last_residency_ns;
|
measured_ns = dev->last_residency_ns;
|
||||||
/*
|
/*
|
||||||
* The delay between the wakeup and the first instruction
|
* The delay between the wakeup and the first instruction
|
||||||
|
@ -176,14 +178,16 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||||
* time, so take 1/2 of the exit latency as a very rough
|
* time, so take 1/2 of the exit latency as a very rough
|
||||||
* approximation of the average of it.
|
* approximation of the average of it.
|
||||||
*/
|
*/
|
||||||
if (measured_ns >= lat_ns)
|
if (measured_ns >= lat_ns) {
|
||||||
measured_ns -= lat_ns / 2;
|
measured_ns -= lat_ns / 2;
|
||||||
else
|
if (measured_ns < RESIDENCY_THRESHOLD_NS)
|
||||||
|
cpu_data->short_idles += PULSE;
|
||||||
|
} else {
|
||||||
measured_ns /= 2;
|
measured_ns /= 2;
|
||||||
|
cpu_data->short_idles += PULSE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_data->total = 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decay the "hits" and "intercepts" metrics for all of the bins and
|
* Decay the "hits" and "intercepts" metrics for all of the bins and
|
||||||
* find the bins that the sleep length and the measured idle duration
|
* find the bins that the sleep length and the measured idle duration
|
||||||
|
@ -195,8 +199,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||||
bin->hits -= bin->hits >> DECAY_SHIFT;
|
bin->hits -= bin->hits >> DECAY_SHIFT;
|
||||||
bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
|
bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
|
||||||
|
|
||||||
cpu_data->total += bin->hits + bin->intercepts;
|
|
||||||
|
|
||||||
target_residency_ns = drv->states[i].target_residency_ns;
|
target_residency_ns = drv->states[i].target_residency_ns;
|
||||||
|
|
||||||
if (target_residency_ns <= cpu_data->sleep_length_ns) {
|
if (target_residency_ns <= cpu_data->sleep_length_ns) {
|
||||||
|
@ -206,38 +208,22 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
|
||||||
* If the deepest state's target residency is below the tick length,
|
|
||||||
* make a record of it to help teo_select() decide whether or not
|
|
||||||
* to stop the tick. This effectively adds an extra hits-only bin
|
|
||||||
* beyond the last state-related one.
|
|
||||||
*/
|
|
||||||
if (target_residency_ns < TICK_NSEC) {
|
|
||||||
cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT;
|
|
||||||
|
|
||||||
cpu_data->total += cpu_data->tick_hits;
|
|
||||||
|
|
||||||
if (TICK_NSEC <= cpu_data->sleep_length_ns) {
|
|
||||||
idx_timer = drv->state_count;
|
|
||||||
if (TICK_NSEC <= measured_ns) {
|
|
||||||
cpu_data->tick_hits += PULSE;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the measured idle duration falls into the same bin as the sleep
|
* If the measured idle duration falls into the same bin as the sleep
|
||||||
* length, this is a "hit", so update the "hits" metric for that bin.
|
* length, this is a "hit", so update the "hits" metric for that bin.
|
||||||
* Otherwise, update the "intercepts" metric for the bin fallen into by
|
* Otherwise, update the "intercepts" metric for the bin fallen into by
|
||||||
* the measured idle duration.
|
* the measured idle duration.
|
||||||
*/
|
*/
|
||||||
if (idx_timer == idx_duration)
|
if (idx_timer == idx_duration) {
|
||||||
cpu_data->state_bins[idx_timer].hits += PULSE;
|
cpu_data->state_bins[idx_timer].hits += PULSE;
|
||||||
else
|
} else {
|
||||||
cpu_data->state_bins[idx_duration].intercepts += PULSE;
|
cpu_data->state_bins[idx_duration].intercepts += PULSE;
|
||||||
|
if (TICK_NSEC <= measured_ns)
|
||||||
|
cpu_data->tick_intercepts += PULSE;
|
||||||
|
}
|
||||||
|
|
||||||
end:
|
cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
|
||||||
cpu_data->total += PULSE;
|
cpu_data->total += PULSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -285,14 +271,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||||
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
||||||
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
|
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
|
||||||
ktime_t delta_tick = TICK_NSEC / 2;
|
ktime_t delta_tick = TICK_NSEC / 2;
|
||||||
unsigned int tick_intercept_sum = 0;
|
|
||||||
unsigned int idx_intercept_sum = 0;
|
unsigned int idx_intercept_sum = 0;
|
||||||
unsigned int intercept_sum = 0;
|
unsigned int intercept_sum = 0;
|
||||||
unsigned int idx_hit_sum = 0;
|
unsigned int idx_hit_sum = 0;
|
||||||
unsigned int hit_sum = 0;
|
unsigned int hit_sum = 0;
|
||||||
int constraint_idx = 0;
|
int constraint_idx = 0;
|
||||||
int idx0 = 0, idx = -1;
|
int idx0 = 0, idx = -1;
|
||||||
int prev_intercept_idx;
|
|
||||||
s64 duration_ns;
|
s64 duration_ns;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -301,10 +285,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||||
dev->last_state_idx = -1;
|
dev->last_state_idx = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_data->time_span_ns = local_clock();
|
|
||||||
/*
|
/*
|
||||||
* Set the expected sleep length to infinity in case of an early
|
* Set the sleep length to infinity in case the invocation of
|
||||||
* return.
|
* tick_nohz_get_sleep_length() below is skipped, in which case it won't
|
||||||
|
* be known whether or not the subsequent wakeup is caused by a timer.
|
||||||
|
* It is generally fine to count the wakeup as an intercept then, except
|
||||||
|
* for the cases when the CPU is mostly woken up by timers and there may
|
||||||
|
* be opportunities to ask for a deeper idle state when no imminent
|
||||||
|
* timers are scheduled which may be missed.
|
||||||
*/
|
*/
|
||||||
cpu_data->sleep_length_ns = KTIME_MAX;
|
cpu_data->sleep_length_ns = KTIME_MAX;
|
||||||
|
|
||||||
|
@ -360,17 +348,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
tick_intercept_sum = intercept_sum +
|
|
||||||
cpu_data->state_bins[drv->state_count-1].intercepts;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the sum of the intercepts metric for all of the idle states
|
* If the sum of the intercepts metric for all of the idle states
|
||||||
* shallower than the current candidate one (idx) is greater than the
|
* shallower than the current candidate one (idx) is greater than the
|
||||||
* sum of the intercepts and hits metrics for the candidate state and
|
* sum of the intercepts and hits metrics for the candidate state and
|
||||||
* all of the deeper states a shallower idle state is likely to be a
|
* all of the deeper states, a shallower idle state is likely to be a
|
||||||
* better choice.
|
* better choice.
|
||||||
*/
|
*/
|
||||||
prev_intercept_idx = idx;
|
|
||||||
if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
|
if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
|
||||||
int first_suitable_idx = idx;
|
int first_suitable_idx = idx;
|
||||||
|
|
||||||
|
@ -396,41 +380,38 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||||
* first enabled state that is deep enough.
|
* first enabled state that is deep enough.
|
||||||
*/
|
*/
|
||||||
if (teo_state_ok(i, drv) &&
|
if (teo_state_ok(i, drv) &&
|
||||||
!dev->states_usage[i].disable)
|
!dev->states_usage[i].disable) {
|
||||||
idx = i;
|
idx = i;
|
||||||
else
|
break;
|
||||||
idx = first_suitable_idx;
|
}
|
||||||
|
idx = first_suitable_idx;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dev->states_usage[i].disable)
|
if (dev->states_usage[i].disable)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!teo_state_ok(i, drv)) {
|
if (teo_state_ok(i, drv)) {
|
||||||
/*
|
/*
|
||||||
* The current state is too shallow, but if an
|
* The current state is deep enough, but still
|
||||||
* alternative candidate state has been found,
|
* there may be a better one.
|
||||||
* it may still turn out to be a better choice.
|
|
||||||
*/
|
*/
|
||||||
if (first_suitable_idx != idx)
|
first_suitable_idx = i;
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
first_suitable_idx = i;
|
/*
|
||||||
|
* The current state is too shallow, so if no suitable
|
||||||
|
* states other than the initial candidate have been
|
||||||
|
* found, give up (the remaining states to check are
|
||||||
|
* shallower still), but otherwise the first suitable
|
||||||
|
* state other than the initial candidate may turn out
|
||||||
|
* to be preferable.
|
||||||
|
*/
|
||||||
|
if (first_suitable_idx == idx)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!idx && prev_intercept_idx) {
|
|
||||||
/*
|
|
||||||
* We have to query the sleep length here otherwise we don't
|
|
||||||
* know after wakeup if our guess was correct.
|
|
||||||
*/
|
|
||||||
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
|
|
||||||
cpu_data->sleep_length_ns = duration_ns;
|
|
||||||
goto out_tick;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a latency constraint, it may be necessary to select an
|
* If there is a latency constraint, it may be necessary to select an
|
||||||
|
@ -440,24 +421,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||||
idx = constraint_idx;
|
idx = constraint_idx;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Skip the timers check if state 0 is the current candidate one,
|
* If either the candidate state is state 0 or its target residency is
|
||||||
* because an immediate non-timer wakeup is expected in that case.
|
* low enough, there is basically nothing more to do, but if the sleep
|
||||||
|
* length is not updated, the subsequent wakeup will be counted as an
|
||||||
|
* "intercept" which may be problematic in the cases when timer wakeups
|
||||||
|
* are dominant. Namely, it may effectively prevent deeper idle states
|
||||||
|
* from being selected at one point even if no imminent timers are
|
||||||
|
* scheduled.
|
||||||
|
*
|
||||||
|
* However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one
|
||||||
|
* CPU are unlikely (user space has a default 50 us slack value for
|
||||||
|
* hrtimers and there are relatively few timers with a lower deadline
|
||||||
|
* value in the kernel), and even if they did happen, the potential
|
||||||
|
* benefit from using a deep idle state in that case would be
|
||||||
|
* questionable anyway for latency reasons. Thus if the measured idle
|
||||||
|
* duration falls into that range in the majority of cases, assume
|
||||||
|
* non-timer wakeups to be dominant and skip updating the sleep length
|
||||||
|
* to reduce latency.
|
||||||
|
*
|
||||||
|
* Also, if the latency constraint is sufficiently low, it will force
|
||||||
|
* shallow idle states regardless of the wakeup type, so the sleep
|
||||||
|
* length need not be known in that case.
|
||||||
*/
|
*/
|
||||||
if (!idx)
|
if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
|
||||||
goto out_tick;
|
(2 * cpu_data->short_idles >= cpu_data->total ||
|
||||||
|
latency_req < LATENCY_THRESHOLD_NS))
|
||||||
/*
|
|
||||||
* If state 0 is a polling one, check if the target residency of
|
|
||||||
* the current candidate state is low enough and skip the timers
|
|
||||||
* check in that case too.
|
|
||||||
*/
|
|
||||||
if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
|
|
||||||
drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
|
|
||||||
goto out_tick;
|
goto out_tick;
|
||||||
|
|
||||||
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
|
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
|
||||||
cpu_data->sleep_length_ns = duration_ns;
|
cpu_data->sleep_length_ns = duration_ns;
|
||||||
|
|
||||||
|
if (!idx)
|
||||||
|
goto out_tick;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the closest expected timer is before the target residency of the
|
* If the closest expected timer is before the target residency of the
|
||||||
* candidate state, a shallower one needs to be found.
|
* candidate state, a shallower one needs to be found.
|
||||||
|
@ -474,7 +470,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||||
* total wakeup events, do not stop the tick.
|
* total wakeup events, do not stop the tick.
|
||||||
*/
|
*/
|
||||||
if (drv->states[idx].target_residency_ns < TICK_NSEC &&
|
if (drv->states[idx].target_residency_ns < TICK_NSEC &&
|
||||||
tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8)
|
cpu_data->tick_intercepts > cpu_data->total / 2 + cpu_data->total / 8)
|
||||||
duration_ns = TICK_NSEC / 2;
|
duration_ns = TICK_NSEC / 2;
|
||||||
|
|
||||||
end:
|
end:
|
||||||
|
@ -511,17 +507,16 @@ static void teo_reflect(struct cpuidle_device *dev, int state)
|
||||||
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
||||||
|
|
||||||
dev->last_state_idx = state;
|
dev->last_state_idx = state;
|
||||||
/*
|
|
||||||
* If the wakeup was not "natural", but triggered by one of the safety
|
|
||||||
* nets, assume that the CPU might have been idle for the entire sleep
|
|
||||||
* length time.
|
|
||||||
*/
|
|
||||||
if (dev->poll_time_limit ||
|
if (dev->poll_time_limit ||
|
||||||
(tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
|
(tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
|
||||||
|
/*
|
||||||
|
* The wakeup was not "genuine", but triggered by one of the
|
||||||
|
* safety nets.
|
||||||
|
*/
|
||||||
dev->poll_time_limit = false;
|
dev->poll_time_limit = false;
|
||||||
cpu_data->time_span_ns = cpu_data->sleep_length_ns;
|
cpu_data->artificial_wakeup = true;
|
||||||
} else {
|
} else {
|
||||||
cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns;
|
cpu_data->artificial_wakeup = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -384,12 +384,8 @@ const struct dev_pm_ops name = { \
|
||||||
|
|
||||||
#ifdef CONFIG_PM
|
#ifdef CONFIG_PM
|
||||||
#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns)
|
#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns)
|
||||||
#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name)
|
|
||||||
#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, "ns")
|
|
||||||
#else
|
#else
|
||||||
#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns)
|
#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns)
|
||||||
#define EXPORT_PM_FN_GPL(name)
|
|
||||||
#define EXPORT_PM_FN_NS_GPL(name, ns)
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_PM_SLEEP
|
#ifdef CONFIG_PM_SLEEP
|
||||||
|
@ -684,6 +680,7 @@ struct dev_pm_info {
|
||||||
bool no_pm_callbacks:1; /* Owned by the PM core */
|
bool no_pm_callbacks:1; /* Owned by the PM core */
|
||||||
bool async_in_progress:1; /* Owned by the PM core */
|
bool async_in_progress:1; /* Owned by the PM core */
|
||||||
bool must_resume:1; /* Owned by the PM core */
|
bool must_resume:1; /* Owned by the PM core */
|
||||||
|
bool set_active:1; /* Owned by the PM core */
|
||||||
bool may_skip_resume:1; /* Set by subsystems */
|
bool may_skip_resume:1; /* Set by subsystems */
|
||||||
#else
|
#else
|
||||||
bool should_wakeup:1;
|
bool should_wakeup:1;
|
||||||
|
|
|
@ -608,7 +608,11 @@ int hibernation_platform_enter(void)
|
||||||
|
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
system_state = SYSTEM_SUSPEND;
|
system_state = SYSTEM_SUSPEND;
|
||||||
syscore_suspend();
|
|
||||||
|
error = syscore_suspend();
|
||||||
|
if (error)
|
||||||
|
goto Enable_irqs;
|
||||||
|
|
||||||
if (pm_wakeup_pending()) {
|
if (pm_wakeup_pending()) {
|
||||||
error = -EAGAIN;
|
error = -EAGAIN;
|
||||||
goto Power_up;
|
goto Power_up;
|
||||||
|
@ -620,6 +624,7 @@ int hibernation_platform_enter(void)
|
||||||
|
|
||||||
Power_up:
|
Power_up:
|
||||||
syscore_resume();
|
syscore_resume();
|
||||||
|
Enable_irqs:
|
||||||
system_state = SYSTEM_RUNNING;
|
system_state = SYSTEM_RUNNING;
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
|
|
||||||
|
|
|
@ -666,7 +666,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
|
||||||
}
|
}
|
||||||
|
|
||||||
sg_policy->thread = thread;
|
sg_policy->thread = thread;
|
||||||
kthread_bind_mask(thread, policy->related_cpus);
|
if (policy->dvfs_possible_from_any_cpu)
|
||||||
|
set_cpus_allowed_ptr(thread, policy->related_cpus);
|
||||||
|
else
|
||||||
|
kthread_bind_mask(thread, policy->related_cpus);
|
||||||
|
|
||||||
init_irq_work(&sg_policy->irq_work, sugov_irq_work);
|
init_irq_work(&sg_policy->irq_work, sugov_irq_work);
|
||||||
mutex_init(&sg_policy->work_lock);
|
mutex_init(&sg_policy->work_lock);
|
||||||
|
|
||||||
|
|
|
@ -1129,6 +1129,13 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
|
||||||
if (!task_has_dl_policy(p) || !dl_bandwidth_enabled())
|
if (!task_has_dl_policy(p) || !dl_bandwidth_enabled())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The special/sugov task isn't part of regular bandwidth/admission
|
||||||
|
* control so let userspace change affinities.
|
||||||
|
*/
|
||||||
|
if (dl_entity_is_special(&p->dl))
|
||||||
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since bandwidth control happens on root_domain basis,
|
* Since bandwidth control happens on root_domain basis,
|
||||||
* if admission test is enabled, we only admit -deadline
|
* if admission test is enabled, we only admit -deadline
|
||||||
|
|
Loading…
Add table
Reference in a new issue