Initial commit; kernel source import

This commit is contained in:
Nathan
2025-04-06 23:50:55 -05:00
commit 25c6d769f4
45093 changed files with 18199410 additions and 0 deletions

42
drivers/cpuidle/Kconfig Normal file
View File

@@ -0,0 +1,42 @@
config CPU_IDLE
bool "CPU idle PM support"
default y if ACPI || PPC_PSERIES
help
CPU idle is a generic framework for supporting software-controlled
idle processor power management. It includes modular cross-platform
governors that can be swapped during runtime.
If you're using an ACPI-enabled platform, you should say Y here.
config CPU_IDLE_MULTIPLE_DRIVERS
bool "Support multiple cpuidle drivers"
depends on CPU_IDLE
default n
help
Allows the cpuidle framework to use different drivers for each CPU.
This is useful if you have a system with different CPU latencies and
states. If unsure say N.
config CPU_IDLE_GOV_LADDER
bool
depends on CPU_IDLE
default y
config CPU_IDLE_GOV_MENU
bool
depends on CPU_IDLE && NO_HZ
default y
config ARCH_NEEDS_CPU_IDLE_COUPLED
def_bool n
if CPU_IDLE
config CPU_IDLE_CALXEDA
bool "CPU Idle Driver for Calxeda processors"
depends on ARCH_HIGHBANK
help
Select this to enable cpuidle on Calxeda processors.
endif

9
drivers/cpuidle/Makefile Normal file
View File

@@ -0,0 +1,9 @@
#
# Makefile for cpuidle.
#
obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o
obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o

792
drivers/cpuidle/coupled.c Normal file
View File

@@ -0,0 +1,792 @@
/*
* coupled.c - helper functions to enter the same idle state on multiple cpus
*
* Copyright (c) 2011 Google, Inc.
*
* Author: Colin Cross <ccross@android.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include "cpuidle.h"
/**
* DOC: Coupled cpuidle states
*
* On some ARM SMP SoCs (OMAP4460, Tegra 2, and probably more), the
* cpus cannot be independently powered down, either due to
* sequencing restrictions (on Tegra 2, cpu 0 must be the last to
* power down), or due to HW bugs (on OMAP4460, a cpu powering up
* will corrupt the gic state unless the other cpu runs a work
* around). Each cpu has a power state that it can enter without
* coordinating with the other cpu (usually Wait For Interrupt, or
* WFI), and one or more "coupled" power states that affect blocks
* shared between the cpus (L2 cache, interrupt controller, and
* sometimes the whole SoC). Entering a coupled power state must
* be tightly controlled on both cpus.
*
* This file implements a solution, where each cpu will wait in the
* WFI state until all cpus are ready to enter a coupled state, at
* which point the coupled state function will be called on all
* cpus at approximately the same time.
*
* Once all cpus are ready to enter idle, they are woken by an smp
* cross call. At this point, there is a chance that one of the
* cpus will find work to do, and choose not to enter idle. A
* final pass is needed to guarantee that all cpus will call the
* power state enter function at the same time. During this pass,
* each cpu will increment the ready counter, and continue once the
* ready counter matches the number of online coupled cpus. If any
* cpu exits idle, the other cpus will decrement their counter and
* retry.
*
* requested_state stores the deepest coupled idle state each cpu
* is ready for. It is assumed that the states are indexed from
* shallowest (highest power, lowest exit latency) to deepest
* (lowest power, highest exit latency). The requested_state
* variable is not locked. It is only written from the cpu that
* it stores (or by the on/offlining cpu if that cpu is offline),
* and only read after all the cpus are ready for the coupled idle
* state are are no longer updating it.
*
* Three atomic counters are used. alive_count tracks the number
* of cpus in the coupled set that are currently or soon will be
* online. waiting_count tracks the number of cpus that are in
* the waiting loop, in the ready loop, or in the coupled idle state.
* ready_count tracks the number of cpus that are in the ready loop
* or in the coupled idle state.
*
* To use coupled cpuidle states, a cpuidle driver must:
*
* Set struct cpuidle_device.coupled_cpus to the mask of all
* coupled cpus, usually the same as cpu_possible_mask if all cpus
* are part of the same cluster. The coupled_cpus mask must be
* set in the struct cpuidle_device for each cpu.
*
* Set struct cpuidle_device.safe_state to a state that is not a
* coupled state. This is usually WFI.
*
* Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
* state that affects multiple cpus.
*
* Provide a struct cpuidle_state.enter function for each state
* that affects multiple cpus. This function is guaranteed to be
* called on all cpus at approximately the same time. The driver
* should ensure that the cpus all abort together if any cpu tries
* to abort once the function is called. The function should return
* with interrupts still disabled.
*/
/**
* struct cpuidle_coupled - data for set of cpus that share a coupled idle state
* @coupled_cpus: mask of cpus that are part of the coupled set
* @requested_state: array of requested states for cpus in the coupled set
* @ready_waiting_counts: combined count of cpus in ready or waiting loops
* @online_count: count of cpus that are online
* @refcnt: reference count of cpuidle devices that are using this struct
* @prevent: flag to prevent coupled idle while a cpu is hotplugging
*/
struct cpuidle_coupled {
cpumask_t coupled_cpus;
int requested_state[NR_CPUS];
atomic_t ready_waiting_counts;
atomic_t abort_barrier;
int online_count;
int refcnt;
int prevent;
};
#define WAITING_BITS 16
#define MAX_WAITING_CPUS (1 << WAITING_BITS)
#define WAITING_MASK (MAX_WAITING_CPUS - 1)
#define READY_MASK (~WAITING_MASK)
#define CPUIDLE_COUPLED_NOT_IDLE (-1)
static DEFINE_MUTEX(cpuidle_coupled_lock);
static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
/*
* The cpuidle_coupled_poke_pending mask is used to avoid calling
* __smp_call_function_single with the per cpu call_single_data struct already
* in use. This prevents a deadlock where two cpus are waiting for each others
* call_single_data struct to be available
*/
static cpumask_t cpuidle_coupled_poke_pending;
/*
* The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked
* once to minimize entering the ready loop with a poke pending, which would
* require aborting and retrying.
*/
static cpumask_t cpuidle_coupled_poked;
/**
* cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus
* @dev: cpuidle_device of the calling cpu
* @a: atomic variable to hold the barrier
*
* No caller to this function will return from this function until all online
* cpus in the same coupled group have called this function. Once any caller
* has returned from this function, the barrier is immediately available for
* reuse.
*
* The atomic variable a must be initialized to 0 before any cpu calls
* this function, will be reset to 0 before any cpu returns from this function.
*
* Must only be called from within a coupled idle state handler
* (state.enter when state.flags has CPUIDLE_FLAG_COUPLED set).
*
* Provides full smp barrier semantics before and after calling.
*/
void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a)
{
int n = dev->coupled->online_count;
smp_mb__before_atomic_inc();
atomic_inc(a);
while (atomic_read(a) < n)
cpu_relax();
if (atomic_inc_return(a) == n * 2) {
atomic_set(a, 0);
return;
}
while (atomic_read(a) > n)
cpu_relax();
}
/**
* cpuidle_state_is_coupled - check if a state is part of a coupled set
* @dev: struct cpuidle_device for the current cpu
* @drv: struct cpuidle_driver for the platform
* @state: index of the target state in drv->states
*
* Returns true if the target state is coupled with cpus besides this one
*/
bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int state)
{
return drv->states[state].flags & CPUIDLE_FLAG_COUPLED;
}
/**
* cpuidle_coupled_set_ready - mark a cpu as ready
* @coupled: the struct coupled that contains the current cpu
*/
static inline void cpuidle_coupled_set_ready(struct cpuidle_coupled *coupled)
{
atomic_add(MAX_WAITING_CPUS, &coupled->ready_waiting_counts);
}
/**
* cpuidle_coupled_set_not_ready - mark a cpu as not ready
* @coupled: the struct coupled that contains the current cpu
*
* Decrements the ready counter, unless the ready (and thus the waiting) counter
* is equal to the number of online cpus. Prevents a race where one cpu
* decrements the waiting counter and then re-increments it just before another
* cpu has decremented its ready counter, leading to the ready counter going
* down from the number of online cpus without going through the coupled idle
* state.
*
* Returns 0 if the counter was decremented successfully, -EINVAL if the ready
* counter was equal to the number of online cpus.
*/
static
inline int cpuidle_coupled_set_not_ready(struct cpuidle_coupled *coupled)
{
int all;
int ret;
all = coupled->online_count | (coupled->online_count << WAITING_BITS);
ret = atomic_add_unless(&coupled->ready_waiting_counts,
-MAX_WAITING_CPUS, all);
return ret ? 0 : -EINVAL;
}
/**
* cpuidle_coupled_no_cpus_ready - check if no cpus in a coupled set are ready
* @coupled: the struct coupled that contains the current cpu
*
* Returns true if all of the cpus in a coupled set are out of the ready loop.
*/
static inline int cpuidle_coupled_no_cpus_ready(struct cpuidle_coupled *coupled)
{
int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
return r == 0;
}
/**
* cpuidle_coupled_cpus_ready - check if all cpus in a coupled set are ready
* @coupled: the struct coupled that contains the current cpu
*
* Returns true if all cpus coupled to this target state are in the ready loop
*/
static inline bool cpuidle_coupled_cpus_ready(struct cpuidle_coupled *coupled)
{
int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
return r == coupled->online_count;
}
/**
* cpuidle_coupled_cpus_waiting - check if all cpus in a coupled set are waiting
* @coupled: the struct coupled that contains the current cpu
*
* Returns true if all cpus coupled to this target state are in the wait loop
*/
static inline bool cpuidle_coupled_cpus_waiting(struct cpuidle_coupled *coupled)
{
int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
return w == coupled->online_count;
}
/**
* cpuidle_coupled_no_cpus_waiting - check if no cpus in coupled set are waiting
* @coupled: the struct coupled that contains the current cpu
*
* Returns true if all of the cpus in a coupled set are out of the waiting loop.
*/
static inline int cpuidle_coupled_no_cpus_waiting(struct cpuidle_coupled *coupled)
{
int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
return w == 0;
}
/**
* cpuidle_coupled_get_state - determine the deepest idle state
* @dev: struct cpuidle_device for this cpu
* @coupled: the struct coupled that contains the current cpu
*
* Returns the deepest idle state that all coupled cpus can enter
*/
static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
struct cpuidle_coupled *coupled)
{
int i;
int state = INT_MAX;
/*
* Read barrier ensures that read of requested_state is ordered after
* reads of ready_count. Matches the write barriers
* cpuidle_set_state_waiting.
*/
smp_rmb();
for_each_cpu_mask(i, coupled->coupled_cpus)
if (cpu_online(i) && coupled->requested_state[i] < state)
state = coupled->requested_state[i];
return state;
}
static void cpuidle_coupled_handle_poke(void *info)
{
int cpu = (unsigned long)info;
cpumask_set_cpu(cpu, &cpuidle_coupled_poked);
cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending);
}
/**
* cpuidle_coupled_poke - wake up a cpu that may be waiting
* @cpu: target cpu
*
* Ensures that the target cpu exits it's waiting idle state (if it is in it)
* and will see updates to waiting_count before it re-enters it's waiting idle
* state.
*
* If cpuidle_coupled_poked_mask is already set for the target cpu, that cpu
* either has or will soon have a pending IPI that will wake it out of idle,
* or it is currently processing the IPI and is not in idle.
*/
static void cpuidle_coupled_poke(int cpu)
{
struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
__smp_call_function_single(cpu, csd, 0);
}
/**
* cpuidle_coupled_poke_others - wake up all other cpus that may be waiting
* @dev: struct cpuidle_device for this cpu
* @coupled: the struct coupled that contains the current cpu
*
* Calls cpuidle_coupled_poke on all other online cpus.
*/
static void cpuidle_coupled_poke_others(int this_cpu,
struct cpuidle_coupled *coupled)
{
int cpu;
for_each_cpu_mask(cpu, coupled->coupled_cpus)
if (cpu != this_cpu && cpu_online(cpu))
cpuidle_coupled_poke(cpu);
}
/**
* cpuidle_coupled_set_waiting - mark this cpu as in the wait loop
* @dev: struct cpuidle_device for this cpu
* @coupled: the struct coupled that contains the current cpu
* @next_state: the index in drv->states of the requested state for this cpu
*
* Updates the requested idle state for the specified cpuidle device.
* Returns the number of waiting cpus.
*/
static int cpuidle_coupled_set_waiting(int cpu,
struct cpuidle_coupled *coupled, int next_state)
{
coupled->requested_state[cpu] = next_state;
/*
* The atomic_inc_return provides a write barrier to order the write
* to requested_state with the later write that increments ready_count.
*/
return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
}
/**
* cpuidle_coupled_set_not_waiting - mark this cpu as leaving the wait loop
* @dev: struct cpuidle_device for this cpu
* @coupled: the struct coupled that contains the current cpu
*
* Removes the requested idle state for the specified cpuidle device.
*/
static void cpuidle_coupled_set_not_waiting(int cpu,
struct cpuidle_coupled *coupled)
{
/*
* Decrementing waiting count can race with incrementing it in
* cpuidle_coupled_set_waiting, but that's OK. Worst case, some
* cpus will increment ready_count and then spin until they
* notice that this cpu has cleared it's requested_state.
*/
atomic_dec(&coupled->ready_waiting_counts);
coupled->requested_state[cpu] = CPUIDLE_COUPLED_NOT_IDLE;
}
/**
* cpuidle_coupled_set_done - mark this cpu as leaving the ready loop
* @cpu: the current cpu
* @coupled: the struct coupled that contains the current cpu
*
* Marks this cpu as no longer in the ready and waiting loops. Decrements
* the waiting count first to prevent another cpu looping back in and seeing
* this cpu as waiting just before it exits idle.
*/
static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
{
cpuidle_coupled_set_not_waiting(cpu, coupled);
atomic_sub(MAX_WAITING_CPUS, &coupled->ready_waiting_counts);
}
/**
* cpuidle_coupled_clear_pokes - spin until the poke interrupt is processed
* @cpu - this cpu
*
* Turns on interrupts and spins until any outstanding poke interrupts have
* been processed and the poke bit has been cleared.
*
* Other interrupts may also be processed while interrupts are enabled, so
* need_resched() must be tested after this function returns to make sure
* the interrupt didn't schedule work that should take the cpu out of idle.
*
* Returns 0 if no poke was pending, 1 if a poke was cleared.
*/
static int cpuidle_coupled_clear_pokes(int cpu)
{
if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
return 0;
local_irq_enable();
while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
cpu_relax();
local_irq_disable();
return 1;
}
static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled)
{
cpumask_t cpus;
int ret;
cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus);
return ret;
}
/**
* cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus
* @dev: struct cpuidle_device for the current cpu
* @drv: struct cpuidle_driver for the platform
* @next_state: index of the requested state in drv->states
*
* Coordinate with coupled cpus to enter the target state. This is a two
* stage process. In the first stage, the cpus are operating independently,
* and may call into cpuidle_enter_state_coupled at completely different times.
* To save as much power as possible, the first cpus to call this function will
* go to an intermediate state (the cpuidle_device's safe state), and wait for
* all the other cpus to call this function. Once all coupled cpus are idle,
* the second stage will start. Each coupled cpu will spin until all cpus have
* guaranteed that they will call the target_state.
*
* This function must be called with interrupts disabled. It may enable
* interrupts while preparing for idle, and it will always return with
* interrupts enabled.
*/
int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int next_state)
{
int entered_state = -1;
struct cpuidle_coupled *coupled = dev->coupled;
int w;
if (!coupled)
return -EINVAL;
while (coupled->prevent) {
cpuidle_coupled_clear_pokes(dev->cpu);
if (need_resched()) {
local_irq_enable();
return entered_state;
}
entered_state = cpuidle_enter_state(dev, drv,
dev->safe_state_index);
}
/* Read barrier ensures online_count is read after prevent is cleared */
smp_rmb();
reset:
cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked);
w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
/*
* If this is the last cpu to enter the waiting state, poke
* all the other cpus out of their waiting state so they can
* enter a deeper state. This can race with one of the cpus
* exiting the waiting state due to an interrupt and
* decrementing waiting_count, see comment below.
*/
if (w == coupled->online_count) {
cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked);
cpuidle_coupled_poke_others(dev->cpu, coupled);
}
retry:
/*
* Wait for all coupled cpus to be idle, using the deepest state
* allowed for a single cpu. If this was not the poking cpu, wait
* for at least one poke before leaving to avoid a race where
* two cpus could arrive at the waiting loop at the same time,
* but the first of the two to arrive could skip the loop without
* processing the pokes from the last to arrive.
*/
while (!cpuidle_coupled_cpus_waiting(coupled) ||
!cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) {
if (cpuidle_coupled_clear_pokes(dev->cpu))
continue;
if (need_resched()) {
cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
goto out;
}
if (coupled->prevent) {
cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
goto out;
}
entered_state = cpuidle_enter_state(dev, drv,
dev->safe_state_index);
}
cpuidle_coupled_clear_pokes(dev->cpu);
if (need_resched()) {
cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
goto out;
}
/*
* Make sure final poke status for this cpu is visible before setting
* cpu as ready.
*/
smp_wmb();
/*
* All coupled cpus are probably idle. There is a small chance that
* one of the other cpus just became active. Increment the ready count,
* and spin until all coupled cpus have incremented the counter. Once a
* cpu has incremented the ready counter, it cannot abort idle and must
* spin until either all cpus have incremented the ready counter, or
* another cpu leaves idle and decrements the waiting counter.
*/
cpuidle_coupled_set_ready(coupled);
while (!cpuidle_coupled_cpus_ready(coupled)) {
/* Check if any other cpus bailed out of idle. */
if (!cpuidle_coupled_cpus_waiting(coupled))
if (!cpuidle_coupled_set_not_ready(coupled))
goto retry;
cpu_relax();
}
/*
* Make sure read of all cpus ready is done before reading pending pokes
*/
smp_rmb();
/*
* There is a small chance that a cpu left and reentered idle after this
* cpu saw that all cpus were waiting. The cpu that reentered idle will
* have sent this cpu a poke, which will still be pending after the
* ready loop. The pending interrupt may be lost by the interrupt
* controller when entering the deep idle state. It's not possible to
* clear a pending interrupt without turning interrupts on and handling
* it, and it's too late to turn on interrupts here, so reset the
* coupled idle state of all cpus and retry.
*/
if (cpuidle_coupled_any_pokes_pending(coupled)) {
cpuidle_coupled_set_done(dev->cpu, coupled);
/* Wait for all cpus to see the pending pokes */
cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier);
goto reset;
}
/* all cpus have acked the coupled state */
next_state = cpuidle_coupled_get_state(dev, coupled);
entered_state = cpuidle_enter_state(dev, drv, next_state);
cpuidle_coupled_set_done(dev->cpu, coupled);
out:
/*
* Normal cpuidle states are expected to return with irqs enabled.
* That leads to an inefficiency where a cpu receiving an interrupt
* that brings it out of idle will process that interrupt before
* exiting the idle enter function and decrementing ready_count. All
* other cpus will need to spin waiting for the cpu that is processing
* the interrupt. If the driver returns with interrupts disabled,
* all other cpus will loop back into the safe idle state instead of
* spinning, saving power.
*
* Calling local_irq_enable here allows coupled states to return with
* interrupts disabled, but won't cause problems for drivers that
* exit with interrupts enabled.
*/
local_irq_enable();
/*
* Wait until all coupled cpus have exited idle. There is no risk that
* a cpu exits and re-enters the ready state because this cpu has
* already decremented its waiting_count.
*/
while (!cpuidle_coupled_no_cpus_ready(coupled))
cpu_relax();
return entered_state;
}
static void cpuidle_coupled_update_online_cpus(struct cpuidle_coupled *coupled)
{
cpumask_t cpus;
cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
coupled->online_count = cpumask_weight(&cpus);
}
/**
* cpuidle_coupled_register_device - register a coupled cpuidle device
* @dev: struct cpuidle_device for the current cpu
*
* Called from cpuidle_register_device to handle coupled idle init. Finds the
* cpuidle_coupled struct for this set of coupled cpus, or creates one if none
* exists yet.
*/
int cpuidle_coupled_register_device(struct cpuidle_device *dev)
{
int cpu;
struct cpuidle_device *other_dev;
struct call_single_data *csd;
struct cpuidle_coupled *coupled;
if (cpumask_empty(&dev->coupled_cpus))
return 0;
for_each_cpu_mask(cpu, dev->coupled_cpus) {
other_dev = per_cpu(cpuidle_devices, cpu);
if (other_dev && other_dev->coupled) {
coupled = other_dev->coupled;
goto have_coupled;
}
}
/* No existing coupled info found, create a new one */
coupled = kzalloc(sizeof(struct cpuidle_coupled), GFP_KERNEL);
if (!coupled)
return -ENOMEM;
coupled->coupled_cpus = dev->coupled_cpus;
have_coupled:
dev->coupled = coupled;
if (WARN_ON(!cpumask_equal(&dev->coupled_cpus, &coupled->coupled_cpus)))
coupled->prevent++;
cpuidle_coupled_update_online_cpus(coupled);
coupled->refcnt++;
csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
csd->func = cpuidle_coupled_handle_poke;
csd->info = (void *)(unsigned long)dev->cpu;
return 0;
}
/**
* cpuidle_coupled_unregister_device - unregister a coupled cpuidle device
* @dev: struct cpuidle_device for the current cpu
*
* Called from cpuidle_unregister_device to tear down coupled idle. Removes the
* cpu from the coupled idle set, and frees the cpuidle_coupled_info struct if
* this was the last cpu in the set.
*/
void cpuidle_coupled_unregister_device(struct cpuidle_device *dev)
{
struct cpuidle_coupled *coupled = dev->coupled;
if (cpumask_empty(&dev->coupled_cpus))
return;
if (--coupled->refcnt)
kfree(coupled);
dev->coupled = NULL;
}
/**
* cpuidle_coupled_prevent_idle - prevent cpus from entering a coupled state
* @coupled: the struct coupled that contains the cpu that is changing state
*
* Disables coupled cpuidle on a coupled set of cpus. Used to ensure that
* cpu_online_mask doesn't change while cpus are coordinating coupled idle.
*/
static void cpuidle_coupled_prevent_idle(struct cpuidle_coupled *coupled)
{
int cpu = get_cpu();
/* Force all cpus out of the waiting loop. */
coupled->prevent++;
cpuidle_coupled_poke_others(cpu, coupled);
put_cpu();
while (!cpuidle_coupled_no_cpus_waiting(coupled))
cpu_relax();
}
/**
* cpuidle_coupled_allow_idle - allows cpus to enter a coupled state
* @coupled: the struct coupled that contains the cpu that is changing state
*
* Enables coupled cpuidle on a coupled set of cpus. Used to ensure that
* cpu_online_mask doesn't change while cpus are coordinating coupled idle.
*/
static void cpuidle_coupled_allow_idle(struct cpuidle_coupled *coupled)
{
int cpu = get_cpu();
/*
* Write barrier ensures readers see the new online_count when they
* see prevent == 0.
*/
smp_wmb();
coupled->prevent--;
/* Force cpus out of the prevent loop. */
cpuidle_coupled_poke_others(cpu, coupled);
put_cpu();
}
/**
* cpuidle_coupled_cpu_notify - notifier called during hotplug transitions
* @nb: notifier block
* @action: hotplug transition
* @hcpu: target cpu number
*
* Called when a cpu is brought on or offline using hotplug. Updates the
* coupled cpu set appropriately
*/
static int cpuidle_coupled_cpu_notify(struct notifier_block *nb,
unsigned long action, void *hcpu)
{
int cpu = (unsigned long)hcpu;
struct cpuidle_device *dev;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
case CPU_DOWN_PREPARE:
case CPU_ONLINE:
case CPU_DEAD:
case CPU_UP_CANCELED:
case CPU_DOWN_FAILED:
break;
default:
return NOTIFY_OK;
}
mutex_lock(&cpuidle_lock);
dev = per_cpu(cpuidle_devices, cpu);
if (!dev || !dev->coupled)
goto out;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
case CPU_DOWN_PREPARE:
cpuidle_coupled_prevent_idle(dev->coupled);
break;
case CPU_ONLINE:
case CPU_DEAD:
cpuidle_coupled_update_online_cpus(dev->coupled);
/* Fall through */
case CPU_UP_CANCELED:
case CPU_DOWN_FAILED:
cpuidle_coupled_allow_idle(dev->coupled);
break;
}
out:
mutex_unlock(&cpuidle_lock);
return NOTIFY_OK;
}
static struct notifier_block cpuidle_coupled_cpu_notifier = {
.notifier_call = cpuidle_coupled_cpu_notify,
};
static int __init cpuidle_coupled_init(void)
{
return register_cpu_notifier(&cpuidle_coupled_cpu_notifier);
}
core_initcall(cpuidle_coupled_init);

View File

@@ -0,0 +1,112 @@
/*
* Copyright 2012 Calxeda, Inc.
*
* Based on arch/arm/plat-mxc/cpuidle.c: #v3.7
* Copyright 2012 Freescale Semiconductor, Inc.
* Copyright 2012 Linaro Ltd.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* Maintainer: Rob Herring <rob.herring@calxeda.com>
*/
#include <linux/cpuidle.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/of.h>
#include <linux/time.h>
#include <linux/delay.h>
#include <linux/suspend.h>
#include <asm/cpuidle.h>
#include <asm/proc-fns.h>
#include <asm/smp_scu.h>
#include <asm/suspend.h>
#include <asm/cacheflush.h>
#include <asm/cp15.h>
extern void highbank_set_cpu_jump(int cpu, void *jump_addr);
extern void *scu_base_addr;
static inline unsigned int get_auxcr(void)
{
unsigned int val;
asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val) : : "cc");
return val;
}
static inline void set_auxcr(unsigned int val)
{
asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR"
: : "r" (val) : "cc");
isb();
}
static noinline void calxeda_idle_restore(void)
{
set_cr(get_cr() | CR_C);
set_auxcr(get_auxcr() | 0x40);
scu_power_mode(scu_base_addr, SCU_PM_NORMAL);
}
static int calxeda_idle_finish(unsigned long val)
{
/* Already flushed cache, but do it again as the outer cache functions
* dirty the cache with spinlocks */
flush_cache_all();
set_auxcr(get_auxcr() & ~0x40);
set_cr(get_cr() & ~CR_C);
scu_power_mode(scu_base_addr, SCU_PM_DORMANT);
cpu_do_idle();
/* Restore things if we didn't enter power-gating */
calxeda_idle_restore();
return 1;
}
static int calxeda_pwrdown_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
highbank_set_cpu_jump(smp_processor_id(), cpu_resume);
cpu_suspend(0, calxeda_idle_finish);
return index;
}
static struct cpuidle_driver calxeda_idle_driver = {
.name = "calxeda_idle",
.states = {
ARM_CPUIDLE_WFI_STATE,
{
.name = "PG",
.desc = "Power Gate",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 30,
.power_usage = 50,
.target_residency = 200,
.enter = calxeda_pwrdown_idle,
},
},
.state_count = 2,
};
static int __init calxeda_cpuidle_init(void)
{
if (!of_machine_is_compatible("calxeda,highbank"))
return -ENODEV;
return cpuidle_register(&calxeda_idle_driver, NULL);
}
module_init(calxeda_cpuidle_init);

View File

@@ -0,0 +1,93 @@
/*
* CPU idle Marvell Kirkwood SoCs
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*
* The cpu idle uses wait-for-interrupt and DDR self refresh in order
* to implement two idle states -
* #1 wait-for-interrupt
* #2 wait-for-interrupt and DDR self refresh
*
* Maintainer: Jason Cooper <jason@lakedaemon.net>
* Maintainer: Andrew Lunn <andrew@lunn.ch>
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/cpuidle.h>
#include <linux/io.h>
#include <linux/export.h>
#include <asm/proc-fns.h>
#include <asm/cpuidle.h>
#define KIRKWOOD_MAX_STATES 2
static void __iomem *ddr_operation_base;
/* Actual code that puts the SoC in different idle states */
static int kirkwood_enter_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
writel(0x7, ddr_operation_base);
cpu_do_idle();
return index;
}
static struct cpuidle_driver kirkwood_idle_driver = {
.name = "kirkwood_idle",
.owner = THIS_MODULE,
.states[0] = ARM_CPUIDLE_WFI_STATE,
.states[1] = {
.enter = kirkwood_enter_idle,
.exit_latency = 10,
.target_residency = 100000,
.flags = CPUIDLE_FLAG_TIME_VALID,
.name = "DDR SR",
.desc = "WFI and DDR Self Refresh",
},
.state_count = KIRKWOOD_MAX_STATES,
};
/* Initialize CPU idle by registering the idle states */
static int kirkwood_cpuidle_probe(struct platform_device *pdev)
{
struct resource *res;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (res == NULL)
return -EINVAL;
ddr_operation_base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(ddr_operation_base))
return PTR_ERR(ddr_operation_base);
return cpuidle_register(&kirkwood_idle_driver, NULL);
}
int kirkwood_cpuidle_remove(struct platform_device *pdev)
{
cpuidle_unregister(&kirkwood_idle_driver);
return 0;
}
static struct platform_driver kirkwood_cpuidle_driver = {
.probe = kirkwood_cpuidle_probe,
.remove = kirkwood_cpuidle_remove,
.driver = {
.name = "kirkwood_cpuidle",
.owner = THIS_MODULE,
},
};
module_platform_driver(kirkwood_cpuidle_driver);
MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>");
MODULE_DESCRIPTION("Kirkwood cpu idle driver");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("platform:kirkwood-cpuidle");

583
drivers/cpuidle/cpuidle.c Normal file
View File

@@ -0,0 +1,583 @@
/*
* cpuidle.c - core cpuidle infrastructure
*
* (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Shaohua Li <shaohua.li@intel.com>
* Adam Belay <abelay@novell.com>
*
* This code is licenced under the GPL.
*/
#include <linux/clockchips.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/module.h>
#include <trace/events/power.h>
#include "cpuidle.h"
DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
DEFINE_PER_CPU(struct cpuidle_device, cpuidle_dev);
DEFINE_MUTEX(cpuidle_lock);
LIST_HEAD(cpuidle_detected_devices);
static int enabled_devices;
static int off __read_mostly;
static int initialized __read_mostly;
int cpuidle_disabled(void)
{
return off;
}
void disable_cpuidle(void)
{
off = 1;
}
static int __cpuidle_register_device(struct cpuidle_device *dev);
/**
* cpuidle_play_dead - cpu off-lining
*
* Returns in case of an error or no driver
*/
int cpuidle_play_dead(void)
{
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int i;
if (!drv)
return -ENODEV;
/* Find lowest-power state that supports long-term idle */
for (i = drv->state_count - 1; i >= CPUIDLE_DRIVER_STATE_START; i--)
if (drv->states[i].enter_dead)
return drv->states[i].enter_dead(dev, i);
return -ENODEV;
}
/**
* cpuidle_enter_state - enter the state and update stats
* @dev: cpuidle device for this cpu
* @drv: cpuidle driver for this cpu
* @next_state: index into drv->states of the state to enter
*/
int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
int index)
{
int entered_state;
struct cpuidle_state *target_state = &drv->states[index];
ktime_t time_start, time_end;
s64 diff;
time_start = ktime_get();
entered_state = target_state->enter(dev, drv, index);
time_end = ktime_get();
local_irq_enable();
diff = ktime_to_us(ktime_sub(time_end, time_start));
if (diff > INT_MAX)
diff = INT_MAX;
dev->last_residency = (int) diff;
if (entered_state >= 0) {
/* Update cpuidle counters */
/* This can be moved to within driver enter routine
* but that results in multiple copies of same code.
*/
dev->states_usage[entered_state].time += dev->last_residency;
dev->states_usage[entered_state].usage++;
} else {
dev->last_residency = 0;
}
return entered_state;
}
/**
* cpuidle_idle_call - the main idle loop
*
* NOTE: no locks or semaphores should be used here
* return non-zero on failure
*/
int cpuidle_idle_call(void)
{
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
struct cpuidle_driver *drv;
int next_state, entered_state;
if (off)
return -ENODEV;
if (!initialized)
return -ENODEV;
/* check if the device is ready */
if (!dev || !dev->enabled)
return -EBUSY;
drv = cpuidle_get_cpu_driver(dev);
/* ask the governor for the next state */
next_state = cpuidle_curr_governor->select(drv, dev);
if (need_resched()) {
dev->last_residency = 0;
/* give the governor an opportunity to reflect on the outcome */
if (cpuidle_curr_governor->reflect)
cpuidle_curr_governor->reflect(dev, next_state);
local_irq_enable();
return 0;
}
trace_cpu_idle_rcuidle(next_state, dev->cpu);
if (drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP)
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
&dev->cpu);
if (cpuidle_state_is_coupled(dev, drv, next_state))
entered_state = cpuidle_enter_state_coupled(dev, drv,
next_state);
else
entered_state = cpuidle_enter_state(dev, drv, next_state);
if (drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP)
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
&dev->cpu);
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
/* give the governor an opportunity to reflect on the outcome */
if (cpuidle_curr_governor->reflect)
cpuidle_curr_governor->reflect(dev, entered_state);
return 0;
}
/**
* cpuidle_install_idle_handler - installs the cpuidle idle loop handler
*/
void cpuidle_install_idle_handler(void)
{
if (enabled_devices) {
/* Make sure all changes finished before we switch to new idle */
smp_wmb();
initialized = 1;
}
}
/**
* cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
*/
void cpuidle_uninstall_idle_handler(void)
{
if (enabled_devices) {
initialized = 0;
kick_all_cpus_sync();
}
}
/**
* cpuidle_pause_and_lock - temporarily disables CPUIDLE
*/
void cpuidle_pause_and_lock(void)
{
mutex_lock(&cpuidle_lock);
cpuidle_uninstall_idle_handler();
}
EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
/**
* cpuidle_resume_and_unlock - resumes CPUIDLE operation
*/
void cpuidle_resume_and_unlock(void)
{
cpuidle_install_idle_handler();
mutex_unlock(&cpuidle_lock);
}
EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
/* Currently used in suspend/resume path to suspend cpuidle */
void cpuidle_pause(void)
{
mutex_lock(&cpuidle_lock);
cpuidle_uninstall_idle_handler();
mutex_unlock(&cpuidle_lock);
}
/* Currently used in suspend/resume path to resume cpuidle */
void cpuidle_resume(void)
{
mutex_lock(&cpuidle_lock);
cpuidle_install_idle_handler();
mutex_unlock(&cpuidle_lock);
}
#ifdef CONFIG_ARCH_HAS_CPU_RELAX
static int poll_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
ktime_t t1, t2;
s64 diff;
t1 = ktime_get();
local_irq_enable();
while (!need_resched())
cpu_relax();
t2 = ktime_get();
diff = ktime_to_us(ktime_sub(t2, t1));
if (diff > INT_MAX)
diff = INT_MAX;
dev->last_residency = (int) diff;
return index;
}
static void poll_idle_init(struct cpuidle_driver *drv)
{
struct cpuidle_state *state = &drv->states[0];
snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
state->exit_latency = 0;
state->target_residency = 0;
state->power_usage = -1;
state->flags = 0;
state->enter = poll_idle;
state->disabled = false;
}
#else
static void poll_idle_init(struct cpuidle_driver *drv) {}
#endif /* CONFIG_ARCH_HAS_CPU_RELAX */
/**
* cpuidle_enable_device - enables idle PM for a CPU
* @dev: the CPU
*
* This function must be called between cpuidle_pause_and_lock and
* cpuidle_resume_and_unlock when used externally.
*/
int cpuidle_enable_device(struct cpuidle_device *dev)
{
int ret, i;
struct cpuidle_driver *drv;
if (!dev)
return -EINVAL;
if (dev->enabled)
return 0;
drv = cpuidle_get_cpu_driver(dev);
if (!drv || !cpuidle_curr_governor)
return -EIO;
if (!dev->state_count)
dev->state_count = drv->state_count;
if (dev->registered == 0) {
ret = __cpuidle_register_device(dev);
if (ret)
return ret;
}
poll_idle_init(drv);
ret = cpuidle_add_device_sysfs(dev);
if (ret)
return ret;
if (cpuidle_curr_governor->enable &&
(ret = cpuidle_curr_governor->enable(drv, dev)))
goto fail_sysfs;
for (i = 0; i < dev->state_count; i++) {
dev->states_usage[i].usage = 0;
dev->states_usage[i].time = 0;
}
dev->last_residency = 0;
smp_wmb();
dev->enabled = 1;
enabled_devices++;
return 0;
fail_sysfs:
cpuidle_remove_device_sysfs(dev);
return ret;
}
EXPORT_SYMBOL_GPL(cpuidle_enable_device);
/**
* cpuidle_disable_device - disables idle PM for a CPU
* @dev: the CPU
*
* This function must be called between cpuidle_pause_and_lock and
* cpuidle_resume_and_unlock when used externally.
*/
void cpuidle_disable_device(struct cpuidle_device *dev)
{
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
if (!dev || !dev->enabled)
return;
if (!drv || !cpuidle_curr_governor)
return;
dev->enabled = 0;
if (cpuidle_curr_governor->disable)
cpuidle_curr_governor->disable(drv, dev);
cpuidle_remove_device_sysfs(dev);
enabled_devices--;
}
EXPORT_SYMBOL_GPL(cpuidle_disable_device);
/**
* __cpuidle_register_device - internal register function called before register
* and enable routines
* @dev: the cpu
*
* cpuidle_lock mutex must be held before this is called
*/
static int __cpuidle_register_device(struct cpuidle_device *dev)
{
int ret;
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
if (!try_module_get(drv->owner))
return -EINVAL;
per_cpu(cpuidle_devices, dev->cpu) = dev;
list_add(&dev->device_list, &cpuidle_detected_devices);
ret = cpuidle_add_sysfs(dev);
if (ret)
goto err_sysfs;
ret = cpuidle_coupled_register_device(dev);
if (ret)
goto err_coupled;
dev->registered = 1;
return 0;
err_coupled:
cpuidle_remove_sysfs(dev);
err_sysfs:
list_del(&dev->device_list);
per_cpu(cpuidle_devices, dev->cpu) = NULL;
module_put(drv->owner);
return ret;
}
/**
* cpuidle_register_device - registers a CPU's idle PM feature
* @dev: the cpu
*/
int cpuidle_register_device(struct cpuidle_device *dev)
{
int ret;
if (!dev)
return -EINVAL;
mutex_lock(&cpuidle_lock);
if ((ret = __cpuidle_register_device(dev))) {
mutex_unlock(&cpuidle_lock);
return ret;
}
cpuidle_enable_device(dev);
cpuidle_install_idle_handler();
mutex_unlock(&cpuidle_lock);
return 0;
}
EXPORT_SYMBOL_GPL(cpuidle_register_device);
/**
* cpuidle_unregister_device - unregisters a CPU's idle PM feature
* @dev: the cpu
*/
void cpuidle_unregister_device(struct cpuidle_device *dev)
{
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
if (dev->registered == 0)
return;
cpuidle_pause_and_lock();
cpuidle_disable_device(dev);
cpuidle_remove_sysfs(dev);
list_del(&dev->device_list);
per_cpu(cpuidle_devices, dev->cpu) = NULL;
cpuidle_coupled_unregister_device(dev);
cpuidle_resume_and_unlock();
module_put(drv->owner);
}
EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
/**
* cpuidle_unregister: unregister a driver and the devices. This function
* can be used only if the driver has been previously registered through
* the cpuidle_register function.
*
* @drv: a valid pointer to a struct cpuidle_driver
*/
void cpuidle_unregister(struct cpuidle_driver *drv)
{
int cpu;
struct cpuidle_device *device;
for_each_possible_cpu(cpu) {
device = &per_cpu(cpuidle_dev, cpu);
cpuidle_unregister_device(device);
}
cpuidle_unregister_driver(drv);
}
EXPORT_SYMBOL_GPL(cpuidle_unregister);
/**
* cpuidle_register: registers the driver and the cpu devices with the
* coupled_cpus passed as parameter. This function is used for all common
* initialization pattern there are in the arch specific drivers. The
* devices is globally defined in this file.
*
* @drv : a valid pointer to a struct cpuidle_driver
* @coupled_cpus: a cpumask for the coupled states
*
* Returns 0 on success, < 0 otherwise
*/
int cpuidle_register(struct cpuidle_driver *drv,
const struct cpumask *const coupled_cpus)
{
int ret, cpu;
struct cpuidle_device *device;
ret = cpuidle_register_driver(drv);
if (ret) {
pr_err("failed to register cpuidle driver\n");
return ret;
}
for_each_possible_cpu(cpu) {
device = &per_cpu(cpuidle_dev, cpu);
device->cpu = cpu;
#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
/*
* On multiplatform for ARM, the coupled idle states could
* enabled in the kernel even if the cpuidle driver does not
* use it. Note, coupled_cpus is a struct copy.
*/
if (coupled_cpus)
device->coupled_cpus = *coupled_cpus;
#endif
ret = cpuidle_register_device(device);
if (!ret)
continue;
pr_err("Failed to register cpuidle device for cpu%d\n", cpu);
cpuidle_unregister(drv);
break;
}
return ret;
}
EXPORT_SYMBOL_GPL(cpuidle_register);
#ifdef CONFIG_SMP
static void smp_callback(void *v)
{
/* we already woke the CPU up, nothing more to do */
}
/*
* This function gets called when a part of the kernel has a new latency
* requirement. This means we need to get all processors out of their C-state,
* and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
* wakes them all right up.
*/
static int cpuidle_latency_notify(struct notifier_block *b,
unsigned long l, void *v)
{
smp_call_function(smp_callback, NULL, 1);
return NOTIFY_OK;
}
static struct notifier_block cpuidle_latency_notifier = {
.notifier_call = cpuidle_latency_notify,
};
static inline void latency_notifier_init(struct notifier_block *n)
{
pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
}
#else /* CONFIG_SMP */
#define latency_notifier_init(x) do { } while (0)
#endif /* CONFIG_SMP */
/**
* cpuidle_init - core initializer
*/
static int __init cpuidle_init(void)
{
int ret;
if (cpuidle_disabled())
return -ENODEV;
ret = cpuidle_add_interface(cpu_subsys.dev_root);
if (ret)
return ret;
latency_notifier_init(&cpuidle_latency_notifier);
return 0;
}
module_param(off, int, 0444);
core_initcall(cpuidle_init);

66
drivers/cpuidle/cpuidle.h Normal file
View File

@@ -0,0 +1,66 @@
/*
* cpuidle.h - The internal header file
*/
#ifndef __DRIVER_CPUIDLE_H
#define __DRIVER_CPUIDLE_H
/* For internal use only */
extern struct cpuidle_governor *cpuidle_curr_governor;
extern struct list_head cpuidle_governors;
extern struct list_head cpuidle_detected_devices;
extern struct mutex cpuidle_lock;
extern spinlock_t cpuidle_driver_lock;
extern int cpuidle_disabled(void);
extern int cpuidle_enter_state(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int next_state);
/* idle loop */
extern void cpuidle_install_idle_handler(void);
extern void cpuidle_uninstall_idle_handler(void);
/* governors */
extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
/* sysfs */
struct device;
extern int cpuidle_add_interface(struct device *dev);
extern void cpuidle_remove_interface(struct device *dev);
extern int cpuidle_add_device_sysfs(struct cpuidle_device *device);
extern void cpuidle_remove_device_sysfs(struct cpuidle_device *device);
extern int cpuidle_add_sysfs(struct cpuidle_device *dev);
extern void cpuidle_remove_sysfs(struct cpuidle_device *dev);
#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int state);
int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int next_state);
int cpuidle_coupled_register_device(struct cpuidle_device *dev);
void cpuidle_coupled_unregister_device(struct cpuidle_device *dev);
#else
static inline bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int state)
{
return false;
}
static inline int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int next_state)
{
return -1;
}
static inline int cpuidle_coupled_register_device(struct cpuidle_device *dev)
{
return 0;
}
static inline void cpuidle_coupled_unregister_device(struct cpuidle_device *dev)
{
}
#endif
#endif /* __DRIVER_CPUIDLE_H */

270
drivers/cpuidle/driver.c Normal file
View File

@@ -0,0 +1,270 @@
/*
* driver.c - driver support
*
* (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Shaohua Li <shaohua.li@intel.com>
* Adam Belay <abelay@novell.com>
*
* This code is licenced under the GPL.
*/
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/cpuidle.h>
#include <linux/cpumask.h>
#include <linux/clockchips.h>
#include "cpuidle.h"
DEFINE_SPINLOCK(cpuidle_driver_lock);
static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu);
static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu);
static void cpuidle_setup_broadcast_timer(void *arg)
{
int cpu = smp_processor_id();
clockevents_notify((long)(arg), &cpu);
}
static void __cpuidle_driver_init(struct cpuidle_driver *drv, int cpu)
{
int i;
drv->refcnt = 0;
for (i = drv->state_count - 1; i >= 0 ; i--) {
if (!(drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP))
continue;
drv->bctimer = 1;
on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer,
(void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
break;
}
}
static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu)
{
if (!drv || !drv->state_count)
return -EINVAL;
if (cpuidle_disabled())
return -ENODEV;
if (__cpuidle_get_cpu_driver(cpu))
return -EBUSY;
__cpuidle_driver_init(drv, cpu);
__cpuidle_set_cpu_driver(drv, cpu);
return 0;
}
static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu)
{
if (drv != __cpuidle_get_cpu_driver(cpu))
return;
if (!WARN_ON(drv->refcnt > 0))
__cpuidle_set_cpu_driver(NULL, cpu);
if (drv->bctimer) {
drv->bctimer = 0;
on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer,
(void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1);
}
}
#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
{
per_cpu(cpuidle_drivers, cpu) = drv;
}
static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
{
return per_cpu(cpuidle_drivers, cpu);
}
static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv)
{
int cpu;
for_each_present_cpu(cpu)
__cpuidle_unregister_driver(drv, cpu);
}
static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
{
int ret = 0;
int i, cpu;
for_each_present_cpu(cpu) {
ret = __cpuidle_register_driver(drv, cpu);
if (ret)
break;
}
if (ret)
for_each_present_cpu(i) {
if (i == cpu)
break;
__cpuidle_unregister_driver(drv, i);
}
return ret;
}
int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu)
{
int ret;
spin_lock(&cpuidle_driver_lock);
ret = __cpuidle_register_driver(drv, cpu);
spin_unlock(&cpuidle_driver_lock);
return ret;
}
void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu)
{
spin_lock(&cpuidle_driver_lock);
__cpuidle_unregister_driver(drv, cpu);
spin_unlock(&cpuidle_driver_lock);
}
/**
* cpuidle_register_driver - registers a driver
* @drv: the driver
*/
int cpuidle_register_driver(struct cpuidle_driver *drv)
{
int ret;
spin_lock(&cpuidle_driver_lock);
ret = __cpuidle_register_all_cpu_driver(drv);
spin_unlock(&cpuidle_driver_lock);
return ret;
}
EXPORT_SYMBOL_GPL(cpuidle_register_driver);
/**
* cpuidle_unregister_driver - unregisters a driver
* @drv: the driver
*/
void cpuidle_unregister_driver(struct cpuidle_driver *drv)
{
spin_lock(&cpuidle_driver_lock);
__cpuidle_unregister_all_cpu_driver(drv);
spin_unlock(&cpuidle_driver_lock);
}
EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
#else
static struct cpuidle_driver *cpuidle_curr_driver;
static inline void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
{
cpuidle_curr_driver = drv;
}
static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
{
return cpuidle_curr_driver;
}
/**
* cpuidle_register_driver - registers a driver
* @drv: the driver
*/
int cpuidle_register_driver(struct cpuidle_driver *drv)
{
int ret, cpu;
cpu = get_cpu();
spin_lock(&cpuidle_driver_lock);
ret = __cpuidle_register_driver(drv, cpu);
spin_unlock(&cpuidle_driver_lock);
put_cpu();
return ret;
}
EXPORT_SYMBOL_GPL(cpuidle_register_driver);
/**
* cpuidle_unregister_driver - unregisters a driver
* @drv: the driver
*/
void cpuidle_unregister_driver(struct cpuidle_driver *drv)
{
int cpu;
cpu = get_cpu();
spin_lock(&cpuidle_driver_lock);
__cpuidle_unregister_driver(drv, cpu);
spin_unlock(&cpuidle_driver_lock);
put_cpu();
}
EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
#endif
/**
* cpuidle_get_driver - return the current driver
*/
struct cpuidle_driver *cpuidle_get_driver(void)
{
struct cpuidle_driver *drv;
int cpu;
cpu = get_cpu();
drv = __cpuidle_get_cpu_driver(cpu);
put_cpu();
return drv;
}
EXPORT_SYMBOL_GPL(cpuidle_get_driver);
/**
* cpuidle_get_cpu_driver - return the driver tied with a cpu
*/
struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
{
if (!dev)
return NULL;
return __cpuidle_get_cpu_driver(dev->cpu);
}
EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);
struct cpuidle_driver *cpuidle_driver_ref(void)
{
struct cpuidle_driver *drv;
spin_lock(&cpuidle_driver_lock);
drv = cpuidle_get_driver();
drv->refcnt++;
spin_unlock(&cpuidle_driver_lock);
return drv;
}
void cpuidle_driver_unref(void)
{
struct cpuidle_driver *drv = cpuidle_get_driver();
spin_lock(&cpuidle_driver_lock);
if (drv && !WARN_ON(drv->refcnt <= 0))
drv->refcnt--;
spin_unlock(&cpuidle_driver_lock);
}

141
drivers/cpuidle/governor.c Normal file
View File

@@ -0,0 +1,141 @@
/*
* governor.c - governor support
*
* (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Shaohua Li <shaohua.li@intel.com>
* Adam Belay <abelay@novell.com>
*
* This code is licenced under the GPL.
*/
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/cpuidle.h>
#include "cpuidle.h"
LIST_HEAD(cpuidle_governors);
struct cpuidle_governor *cpuidle_curr_governor;
/**
* __cpuidle_find_governor - finds a governor of the specified name
* @str: the name
*
* Must be called with cpuidle_lock acquired.
*/
static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
{
struct cpuidle_governor *gov;
list_for_each_entry(gov, &cpuidle_governors, governor_list)
if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN))
return gov;
return NULL;
}
/**
* cpuidle_switch_governor - changes the governor
* @gov: the new target governor
*
* NOTE: "gov" can be NULL to specify disabled
* Must be called with cpuidle_lock acquired.
*/
int cpuidle_switch_governor(struct cpuidle_governor *gov)
{
struct cpuidle_device *dev;
if (gov == cpuidle_curr_governor)
return 0;
cpuidle_uninstall_idle_handler();
if (cpuidle_curr_governor) {
list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
cpuidle_disable_device(dev);
module_put(cpuidle_curr_governor->owner);
}
cpuidle_curr_governor = gov;
if (gov) {
if (!try_module_get(cpuidle_curr_governor->owner))
return -EINVAL;
list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
cpuidle_enable_device(dev);
cpuidle_install_idle_handler();
printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
}
return 0;
}
/**
* cpuidle_register_governor - registers a governor
* @gov: the governor
*/
int cpuidle_register_governor(struct cpuidle_governor *gov)
{
int ret = -EEXIST;
if (!gov || !gov->select)
return -EINVAL;
if (cpuidle_disabled())
return -ENODEV;
mutex_lock(&cpuidle_lock);
if (__cpuidle_find_governor(gov->name) == NULL) {
ret = 0;
list_add_tail(&gov->governor_list, &cpuidle_governors);
if (!cpuidle_curr_governor ||
cpuidle_curr_governor->rating < gov->rating)
cpuidle_switch_governor(gov);
}
mutex_unlock(&cpuidle_lock);
return ret;
}
/**
* cpuidle_replace_governor - find a replacement governor
* @exclude_rating: the rating that will be skipped while looking for
* new governor.
*/
static struct cpuidle_governor *cpuidle_replace_governor(int exclude_rating)
{
struct cpuidle_governor *gov;
struct cpuidle_governor *ret_gov = NULL;
unsigned int max_rating = 0;
list_for_each_entry(gov, &cpuidle_governors, governor_list) {
if (gov->rating == exclude_rating)
continue;
if (gov->rating > max_rating) {
max_rating = gov->rating;
ret_gov = gov;
}
}
return ret_gov;
}
/**
* cpuidle_unregister_governor - unregisters a governor
* @gov: the governor
*/
void cpuidle_unregister_governor(struct cpuidle_governor *gov)
{
if (!gov)
return;
mutex_lock(&cpuidle_lock);
if (gov == cpuidle_curr_governor) {
struct cpuidle_governor *new_gov;
new_gov = cpuidle_replace_governor(gov->rating);
cpuidle_switch_governor(new_gov);
}
list_del(&gov->governor_list);
mutex_unlock(&cpuidle_lock);
}

View File

@@ -0,0 +1,6 @@
#
# Makefile for cpuidle governors.
#
obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o

View File

@@ -0,0 +1,205 @@
/*
* ladder.c - the residency ladder algorithm
*
* Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
* Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
*
* (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Shaohua Li <shaohua.li@intel.com>
* Adam Belay <abelay@novell.com>
*
* This code is licenced under the GPL.
*/
#include <linux/kernel.h>
#include <linux/cpuidle.h>
#include <linux/pm_qos.h>
#include <linux/module.h>
#include <linux/jiffies.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#define PROMOTION_COUNT 4
#define DEMOTION_COUNT 1
struct ladder_device_state {
struct {
u32 promotion_count;
u32 demotion_count;
u32 promotion_time;
u32 demotion_time;
} threshold;
struct {
int promotion_count;
int demotion_count;
} stats;
};
struct ladder_device {
struct ladder_device_state states[CPUIDLE_STATE_MAX];
int last_state_idx;
};
static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
/**
* ladder_do_selection - prepares private data for a state change
* @ldev: the ladder device
* @old_idx: the current state index
* @new_idx: the new target state index
*/
static inline void ladder_do_selection(struct ladder_device *ldev,
int old_idx, int new_idx)
{
ldev->states[old_idx].stats.promotion_count = 0;
ldev->states[old_idx].stats.demotion_count = 0;
ldev->last_state_idx = new_idx;
}
/**
* ladder_select_state - selects the next state to enter
* @drv: cpuidle driver
* @dev: the CPU
*/
static int ladder_select_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
struct ladder_device_state *last_state;
int last_residency, last_idx = ldev->last_state_idx;
int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
/* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0)) {
ladder_do_selection(ldev, last_idx, 0);
return 0;
}
last_state = &ldev->states[last_idx];
if (drv->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID) {
last_residency = cpuidle_get_last_residency(dev) - \
drv->states[last_idx].exit_latency;
}
else
last_residency = last_state->threshold.promotion_time + 1;
/* consider promotion */
if (last_idx < drv->state_count - 1 &&
!drv->states[last_idx + 1].disabled &&
!dev->states_usage[last_idx + 1].disable &&
last_residency > last_state->threshold.promotion_time &&
drv->states[last_idx + 1].exit_latency <= latency_req) {
last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0;
if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
ladder_do_selection(ldev, last_idx, last_idx + 1);
return last_idx + 1;
}
}
/* consider demotion */
if (last_idx > CPUIDLE_DRIVER_STATE_START &&
(drv->states[last_idx].disabled ||
dev->states_usage[last_idx].disable ||
drv->states[last_idx].exit_latency > latency_req)) {
int i;
for (i = last_idx - 1; i > CPUIDLE_DRIVER_STATE_START; i--) {
if (drv->states[i].exit_latency <= latency_req)
break;
}
ladder_do_selection(ldev, last_idx, i);
return i;
}
if (last_idx > CPUIDLE_DRIVER_STATE_START &&
last_residency < last_state->threshold.demotion_time) {
last_state->stats.demotion_count++;
last_state->stats.promotion_count = 0;
if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
ladder_do_selection(ldev, last_idx, last_idx - 1);
return last_idx - 1;
}
}
/* otherwise remain at the current state */
return last_idx;
}
/**
* ladder_enable_device - setup for the governor
* @drv: cpuidle driver
* @dev: the CPU
*/
static int ladder_enable_device(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
int i;
struct ladder_device *ldev = &per_cpu(ladder_devices, dev->cpu);
struct ladder_device_state *lstate;
struct cpuidle_state *state;
ldev->last_state_idx = CPUIDLE_DRIVER_STATE_START;
for (i = 0; i < drv->state_count; i++) {
state = &drv->states[i];
lstate = &ldev->states[i];
lstate->stats.promotion_count = 0;
lstate->stats.demotion_count = 0;
lstate->threshold.promotion_count = PROMOTION_COUNT;
lstate->threshold.demotion_count = DEMOTION_COUNT;
if (i < drv->state_count - 1)
lstate->threshold.promotion_time = state->exit_latency;
if (i > 0)
lstate->threshold.demotion_time = state->exit_latency;
}
return 0;
}
/**
* ladder_reflect - update the correct last_state_idx
* @dev: the CPU
* @index: the index of actual state entered
*/
static void ladder_reflect(struct cpuidle_device *dev, int index)
{
struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
if (index > 0)
ldev->last_state_idx = index;
}
static struct cpuidle_governor ladder_governor = {
.name = "ladder",
.rating = 10,
.enable = ladder_enable_device,
.select = ladder_select_state,
.reflect = ladder_reflect,
.owner = THIS_MODULE,
};
/**
* init_ladder - initializes the governor
*/
static int __init init_ladder(void)
{
return cpuidle_register_governor(&ladder_governor);
}
/**
* exit_ladder - exits the governor
*/
static void __exit exit_ladder(void)
{
cpuidle_unregister_governor(&ladder_governor);
}
MODULE_LICENSE("GPL");
module_init(init_ladder);
module_exit(exit_ladder);

View File

@@ -0,0 +1,455 @@
/*
* menu.c - the menu idle governor
*
* Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
* Copyright (C) 2009 Intel Corporation
* Author:
* Arjan van de Ven <arjan@linux.intel.com>
*
* This code is licenced under the GPL version 2 as described
* in the COPYING file that acompanies the Linux Kernel.
*/
#include <linux/kernel.h>
#include <linux/cpuidle.h>
#include <linux/pm_qos.h>
#include <linux/time.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/sched.h>
#include <linux/math64.h>
#include <linux/module.h>
#define BUCKETS 12
#define INTERVALS 8
#define RESOLUTION 1024
#define DECAY 8
#define MAX_INTERESTING 50000
#define STDDEV_THRESH 400
/*
* Concepts and ideas behind the menu governor
*
* For the menu governor, there are 3 decision factors for picking a C
* state:
* 1) Energy break even point
* 2) Performance impact
* 3) Latency tolerance (from pmqos infrastructure)
* These these three factors are treated independently.
*
* Energy break even point
* -----------------------
* C state entry and exit have an energy cost, and a certain amount of time in
* the C state is required to actually break even on this cost. CPUIDLE
* provides us this duration in the "target_residency" field. So all that we
* need is a good prediction of how long we'll be idle. Like the traditional
* menu governor, we start with the actual known "next timer event" time.
*
* Since there are other source of wakeups (interrupts for example) than
* the next timer event, this estimation is rather optimistic. To get a
* more realistic estimate, a correction factor is applied to the estimate,
* that is based on historic behavior. For example, if in the past the actual
* duration always was 50% of the next timer tick, the correction factor will
* be 0.5.
*
* menu uses a running average for this correction factor, however it uses a
* set of factors, not just a single factor. This stems from the realization
* that the ratio is dependent on the order of magnitude of the expected
* duration; if we expect 500 milliseconds of idle time the likelihood of
* getting an interrupt very early is much higher than if we expect 50 micro
* seconds of idle time. A second independent factor that has big impact on
* the actual factor is if there is (disk) IO outstanding or not.
* (as a special twist, we consider every sleep longer than 50 milliseconds
* as perfect; there are no power gains for sleeping longer than this)
*
* For these two reasons we keep an array of 12 independent factors, that gets
* indexed based on the magnitude of the expected duration as well as the
* "is IO outstanding" property.
*
* Repeatable-interval-detector
* ----------------------------
* There are some cases where "next timer" is a completely unusable predictor:
* Those cases where the interval is fixed, for example due to hardware
* interrupt mitigation, but also due to fixed transfer rate devices such as
* mice.
* For this, we use a different predictor: We track the duration of the last 8
* intervals and if the stand deviation of these 8 intervals is below a
* threshold value, we use the average of these intervals as prediction.
*
* Limiting Performance Impact
* ---------------------------
* C states, especially those with large exit latencies, can have a real
* noticeable impact on workloads, which is not acceptable for most sysadmins,
* and in addition, less performance has a power price of its own.
*
* As a general rule of thumb, menu assumes that the following heuristic
* holds:
* The busier the system, the less impact of C states is acceptable
*
* This rule-of-thumb is implemented using a performance-multiplier:
* If the exit latency times the performance multiplier is longer than
* the predicted duration, the C state is not considered a candidate
* for selection due to a too high performance impact. So the higher
* this multiplier is, the longer we need to be idle to pick a deep C
* state, and thus the less likely a busy CPU will hit such a deep
* C state.
*
* Two factors are used in determing this multiplier:
* a value of 10 is added for each point of "per cpu load average" we have.
* a value of 5 points is added for each process that is waiting for
* IO on this CPU.
* (these values are experimentally determined)
*
* The load average factor gives a longer term (few seconds) input to the
* decision, while the iowait value gives a cpu local instantanious input.
* The iowait factor may look low, but realize that this is also already
* represented in the system load average.
*
*/
struct menu_device {
int last_state_idx;
int needs_update;
unsigned int expected_us;
u64 predicted_us;
unsigned int exit_us;
unsigned int bucket;
u64 correction_factor[BUCKETS];
u32 intervals[INTERVALS];
int interval_ptr;
};
#define LOAD_INT(x) ((x) >> FSHIFT)
#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
static int get_loadavg(void)
{
unsigned long this = this_cpu_load();
return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10;
}
static inline int which_bucket(unsigned int duration)
{
int bucket = 0;
/*
* We keep two groups of stats; one with no
* IO pending, one without.
* This allows us to calculate
* E(duration)|iowait
*/
if (nr_iowait_cpu(smp_processor_id()))
bucket = BUCKETS/2;
if (duration < 10)
return bucket;
if (duration < 100)
return bucket + 1;
if (duration < 1000)
return bucket + 2;
if (duration < 10000)
return bucket + 3;
if (duration < 100000)
return bucket + 4;
return bucket + 5;
}
/*
* Return a multiplier for the exit latency that is intended
* to take performance requirements into account.
* The more performance critical we estimate the system
* to be, the higher this multiplier, and thus the higher
* the barrier to go to an expensive C state.
*/
static inline int performance_multiplier(void)
{
int mult = 1;
/* for higher loadavg, we are more reluctant */
mult += 2 * get_loadavg();
/* for IO wait tasks (per cpu!) we add 5x each */
mult += 10 * nr_iowait_cpu(smp_processor_id());
return mult;
}
static DEFINE_PER_CPU(struct menu_device, menu_devices);
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
/* This implements DIV_ROUND_CLOSEST but avoids 64 bit division */
static u64 div_round64(u64 dividend, u32 divisor)
{
return div_u64(dividend + (divisor / 2), divisor);
}
/*
* Try detecting repeating patterns by keeping track of the last 8
* intervals, and checking if the standard deviation of that set
* of points is below a threshold. If it is... then use the
* average of these 8 points as the estimated value.
*/
static void get_typical_interval(struct menu_device *data)
{
int i = 0, divisor = 0;
uint64_t max = 0, avg = 0, stddev = 0;
int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */
again:
/* first calculate average and standard deviation of the past */
max = avg = divisor = stddev = 0;
for (i = 0; i < INTERVALS; i++) {
int64_t value = data->intervals[i];
if (value <= thresh) {
avg += value;
divisor++;
if (value > max)
max = value;
}
}
do_div(avg, divisor);
for (i = 0; i < INTERVALS; i++) {
int64_t value = data->intervals[i];
if (value <= thresh) {
int64_t diff = value - avg;
stddev += diff * diff;
}
}
do_div(stddev, divisor);
stddev = int_sqrt(stddev);
/*
* If we have outliers to the upside in our distribution, discard
* those by setting the threshold to exclude these outliers, then
* calculate the average and standard deviation again. Once we get
* down to the bottom 3/4 of our samples, stop excluding samples.
*
* This can deal with workloads that have long pauses interspersed
* with sporadic activity with a bunch of short pauses.
*
* The typical interval is obtained when standard deviation is small
* or standard deviation is small compared to the average interval.
*/
if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
|| stddev <= 20) {
data->predicted_us = avg;
return;
} else if ((divisor * 4) > INTERVALS * 3) {
/* Exclude the max interval */
thresh = max - 1;
goto again;
}
}
/**
* menu_select - selects the next idle state to enter
* @drv: cpuidle driver containing state data
* @dev: the CPU
*/
static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
int i;
int multiplier;
struct timespec t;
if (data->needs_update) {
menu_update(drv, dev);
data->needs_update = 0;
}
data->last_state_idx = 0;
data->exit_us = 0;
/* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0))
return 0;
/* determine the expected residency time, round up */
t = ktime_to_timespec(tick_nohz_get_sleep_length());
data->expected_us =
t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC;
data->bucket = which_bucket(data->expected_us);
multiplier = performance_multiplier();
/*
* if the correction factor is 0 (eg first time init or cpu hotplug
* etc), we actually want to start out with a unity factor.
*/
if (data->correction_factor[data->bucket] == 0)
data->correction_factor[data->bucket] = RESOLUTION * DECAY;
/* Make sure to round up for half microseconds */
data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
RESOLUTION * DECAY);
get_typical_interval(data);
/*
* We want to default to C1 (hlt), not to busy polling
* unless the timer is happening really really soon.
*/
if (data->expected_us > 5 &&
!drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
/*
* Find the idle state with the lowest power while satisfying
* our constraints.
*/
for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable)
continue;
if (s->target_residency > data->predicted_us)
continue;
if (s->exit_latency > latency_req)
continue;
if (s->exit_latency * multiplier > data->predicted_us)
continue;
data->last_state_idx = i;
data->exit_us = s->exit_latency;
}
return data->last_state_idx;
}
/**
* menu_reflect - records that data structures need update
* @dev: the CPU
* @index: the index of actual entered state
*
* NOTE: it's important to be fast here because this operation will add to
* the overall exit latency.
*/
static void menu_reflect(struct cpuidle_device *dev, int index)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
data->last_state_idx = index;
if (index >= 0)
data->needs_update = 1;
}
/**
* menu_update - attempts to guess what happened after entry
* @drv: cpuidle driver containing state data
* @dev: the CPU
*/
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
int last_idx = data->last_state_idx;
unsigned int last_idle_us = cpuidle_get_last_residency(dev);
struct cpuidle_state *target = &drv->states[last_idx];
unsigned int measured_us;
u64 new_factor;
/*
* Ugh, this idle state doesn't support residency measurements, so we
* are basically lost in the dark. As a compromise, assume we slept
* for the whole expected time.
*/
if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
last_idle_us = data->expected_us;
measured_us = last_idle_us;
/*
* We correct for the exit latency; we are assuming here that the
* exit latency happens after the event that we're interested in.
*/
if (measured_us > data->exit_us)
measured_us -= data->exit_us;
/* update our correction ratio */
new_factor = data->correction_factor[data->bucket]
* (DECAY - 1) / DECAY;
if (data->expected_us > 0 && measured_us < MAX_INTERESTING)
new_factor += RESOLUTION * measured_us / data->expected_us;
else
/*
* we were idle so long that we count it as a perfect
* prediction
*/
new_factor += RESOLUTION;
/*
* We don't want 0 as factor; we always want at least
* a tiny bit of estimated time.
*/
if (new_factor == 0)
new_factor = 1;
data->correction_factor[data->bucket] = new_factor;
/* update the repeating-pattern data */
data->intervals[data->interval_ptr++] = last_idle_us;
if (data->interval_ptr >= INTERVALS)
data->interval_ptr = 0;
}
/**
* menu_enable_device - scans a CPU's states and does setup
* @drv: cpuidle driver
* @dev: the CPU
*/
static int menu_enable_device(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
memset(data, 0, sizeof(struct menu_device));
return 0;
}
static struct cpuidle_governor menu_governor = {
.name = "menu",
.rating = 20,
.enable = menu_enable_device,
.select = menu_select,
.reflect = menu_reflect,
.owner = THIS_MODULE,
};
/**
* init_menu - initializes the governor
*/
static int __init init_menu(void)
{
return cpuidle_register_governor(&menu_governor);
}
/**
* exit_menu - exits the governor
*/
static void __exit exit_menu(void)
{
cpuidle_unregister_governor(&menu_governor);
}
MODULE_LICENSE("GPL");
module_init(init_menu);
module_exit(exit_menu);

603
drivers/cpuidle/sysfs.c Normal file
View File

@@ -0,0 +1,603 @@
/*
* sysfs.c - sysfs support
*
* (C) 2006-2007 Shaohua Li <shaohua.li@intel.com>
*
* This code is licenced under the GPL.
*/
#include <linux/kernel.h>
#include <linux/cpuidle.h>
#include <linux/sysfs.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/capability.h>
#include <linux/device.h>
#include "cpuidle.h"
static unsigned int sysfs_switch;
static int __init cpuidle_sysfs_setup(char *unused)
{
sysfs_switch = 1;
return 1;
}
__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
static ssize_t show_available_governors(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t i = 0;
struct cpuidle_governor *tmp;
mutex_lock(&cpuidle_lock);
list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
goto out;
i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
}
out:
i+= sprintf(&buf[i], "\n");
mutex_unlock(&cpuidle_lock);
return i;
}
static ssize_t show_current_driver(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret;
struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
spin_lock(&cpuidle_driver_lock);
if (cpuidle_driver)
ret = sprintf(buf, "%s\n", cpuidle_driver->name);
else
ret = sprintf(buf, "none\n");
spin_unlock(&cpuidle_driver_lock);
return ret;
}
static ssize_t show_current_governor(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret;
mutex_lock(&cpuidle_lock);
if (cpuidle_curr_governor)
ret = sprintf(buf, "%s\n", cpuidle_curr_governor->name);
else
ret = sprintf(buf, "none\n");
mutex_unlock(&cpuidle_lock);
return ret;
}
static ssize_t store_current_governor(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
char gov_name[CPUIDLE_NAME_LEN];
int ret = -EINVAL;
size_t len = count;
struct cpuidle_governor *gov;
if (!len || len >= sizeof(gov_name))
return -EINVAL;
memcpy(gov_name, buf, len);
gov_name[len] = '\0';
if (gov_name[len - 1] == '\n')
gov_name[--len] = '\0';
mutex_lock(&cpuidle_lock);
list_for_each_entry(gov, &cpuidle_governors, governor_list) {
if (strlen(gov->name) == len && !strcmp(gov->name, gov_name)) {
ret = cpuidle_switch_governor(gov);
break;
}
}
mutex_unlock(&cpuidle_lock);
if (ret)
return ret;
else
return count;
}
static DEVICE_ATTR(current_driver, 0444, show_current_driver, NULL);
static DEVICE_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
static struct attribute *cpuidle_default_attrs[] = {
&dev_attr_current_driver.attr,
&dev_attr_current_governor_ro.attr,
NULL
};
static DEVICE_ATTR(available_governors, 0444, show_available_governors, NULL);
static DEVICE_ATTR(current_governor, 0644, show_current_governor,
store_current_governor);
static struct attribute *cpuidle_switch_attrs[] = {
&dev_attr_available_governors.attr,
&dev_attr_current_driver.attr,
&dev_attr_current_governor.attr,
NULL
};
static struct attribute_group cpuidle_attr_group = {
.attrs = cpuidle_default_attrs,
.name = "cpuidle",
};
/**
* cpuidle_add_interface - add CPU global sysfs attributes
*/
int cpuidle_add_interface(struct device *dev)
{
if (sysfs_switch)
cpuidle_attr_group.attrs = cpuidle_switch_attrs;
return sysfs_create_group(&dev->kobj, &cpuidle_attr_group);
}
/**
* cpuidle_remove_interface - remove CPU global sysfs attributes
*/
void cpuidle_remove_interface(struct device *dev)
{
sysfs_remove_group(&dev->kobj, &cpuidle_attr_group);
}
struct cpuidle_attr {
struct attribute attr;
ssize_t (*show)(struct cpuidle_device *, char *);
ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
};
#define define_one_ro(_name, show) \
static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
#define define_one_rw(_name, show, store) \
static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
{
int ret = -EIO;
struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
if (cattr->show) {
mutex_lock(&cpuidle_lock);
ret = cattr->show(dev, buf);
mutex_unlock(&cpuidle_lock);
}
return ret;
}
static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
int ret = -EIO;
struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
if (cattr->store) {
mutex_lock(&cpuidle_lock);
ret = cattr->store(dev, buf, count);
mutex_unlock(&cpuidle_lock);
}
return ret;
}
static const struct sysfs_ops cpuidle_sysfs_ops = {
.show = cpuidle_show,
.store = cpuidle_store,
};
static void cpuidle_sysfs_release(struct kobject *kobj)
{
struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
complete(&dev->kobj_unregister);
}
static struct kobj_type ktype_cpuidle = {
.sysfs_ops = &cpuidle_sysfs_ops,
.release = cpuidle_sysfs_release,
};
struct cpuidle_state_attr {
struct attribute attr;
ssize_t (*show)(struct cpuidle_state *, \
struct cpuidle_state_usage *, char *);
ssize_t (*store)(struct cpuidle_state *, \
struct cpuidle_state_usage *, const char *, size_t);
};
#define define_one_state_ro(_name, show) \
static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
#define define_one_state_rw(_name, show, store) \
static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0644, show, store)
#define define_show_state_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, char *buf) \
{ \
return sprintf(buf, "%u\n", state->_name);\
}
#define define_store_state_ull_function(_name) \
static ssize_t store_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, \
const char *buf, size_t size) \
{ \
unsigned long long value; \
int err; \
if (!capable(CAP_SYS_ADMIN)) \
return -EPERM; \
err = kstrtoull(buf, 0, &value); \
if (err) \
return err; \
if (value) \
state_usage->_name = 1; \
else \
state_usage->_name = 0; \
return size; \
}
#define define_show_state_ull_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, char *buf) \
{ \
return sprintf(buf, "%llu\n", state_usage->_name);\
}
#define define_show_state_str_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, char *buf) \
{ \
if (state->_name[0] == '\0')\
return sprintf(buf, "<null>\n");\
return sprintf(buf, "%s\n", state->_name);\
}
define_show_state_function(exit_latency)
define_show_state_function(power_usage)
define_show_state_ull_function(usage)
define_show_state_ull_function(time)
define_show_state_str_function(name)
define_show_state_str_function(desc)
define_show_state_ull_function(disable)
define_store_state_ull_function(disable)
define_one_state_ro(name, show_state_name);
define_one_state_ro(desc, show_state_desc);
define_one_state_ro(latency, show_state_exit_latency);
define_one_state_ro(power, show_state_power_usage);
define_one_state_ro(usage, show_state_usage);
define_one_state_ro(time, show_state_time);
define_one_state_rw(disable, show_state_disable, store_state_disable);
static struct attribute *cpuidle_state_default_attrs[] = {
&attr_name.attr,
&attr_desc.attr,
&attr_latency.attr,
&attr_power.attr,
&attr_usage.attr,
&attr_time.attr,
&attr_disable.attr,
NULL
};
struct cpuidle_state_kobj {
struct cpuidle_state *state;
struct cpuidle_state_usage *state_usage;
struct completion kobj_unregister;
struct kobject kobj;
};
#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
#define kobj_to_state(k) (kobj_to_state_obj(k)->state)
#define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage)
#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
static ssize_t cpuidle_state_show(struct kobject * kobj,
struct attribute * attr ,char * buf)
{
int ret = -EIO;
struct cpuidle_state *state = kobj_to_state(kobj);
struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj);
struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
if (cattr->show)
ret = cattr->show(state, state_usage, buf);
return ret;
}
static ssize_t cpuidle_state_store(struct kobject *kobj,
struct attribute *attr, const char *buf, size_t size)
{
int ret = -EIO;
struct cpuidle_state *state = kobj_to_state(kobj);
struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj);
struct cpuidle_state_attr *cattr = attr_to_stateattr(attr);
if (cattr->store)
ret = cattr->store(state, state_usage, buf, size);
return ret;
}
static const struct sysfs_ops cpuidle_state_sysfs_ops = {
.show = cpuidle_state_show,
.store = cpuidle_state_store,
};
static void cpuidle_state_sysfs_release(struct kobject *kobj)
{
struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj);
complete(&state_obj->kobj_unregister);
}
static struct kobj_type ktype_state_cpuidle = {
.sysfs_ops = &cpuidle_state_sysfs_ops,
.default_attrs = cpuidle_state_default_attrs,
.release = cpuidle_state_sysfs_release,
};
static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i)
{
kobject_put(&device->kobjs[i]->kobj);
wait_for_completion(&device->kobjs[i]->kobj_unregister);
kfree(device->kobjs[i]);
device->kobjs[i] = NULL;
}
/**
* cpuidle_add_state_sysfs - adds cpuidle states sysfs attributes
* @device: the target device
*/
static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
{
int i, ret = -ENOMEM;
struct cpuidle_state_kobj *kobj;
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
/* state statistics */
for (i = 0; i < device->state_count; i++) {
kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
if (!kobj)
goto error_state;
kobj->state = &drv->states[i];
kobj->state_usage = &device->states_usage[i];
init_completion(&kobj->kobj_unregister);
ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
&device->kobj, "state%d", i);
if (ret) {
kfree(kobj);
goto error_state;
}
kobject_uevent(&kobj->kobj, KOBJ_ADD);
device->kobjs[i] = kobj;
}
return 0;
error_state:
for (i = i - 1; i >= 0; i--)
cpuidle_free_state_kobj(device, i);
return ret;
}
/**
* cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes
* @device: the target device
*/
static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
{
int i;
for (i = 0; i < device->state_count; i++)
cpuidle_free_state_kobj(device, i);
}
#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
#define kobj_to_driver_kobj(k) container_of(k, struct cpuidle_driver_kobj, kobj)
#define attr_to_driver_attr(a) container_of(a, struct cpuidle_driver_attr, attr)
#define define_one_driver_ro(_name, show) \
static struct cpuidle_driver_attr attr_driver_##_name = \
__ATTR(_name, 0644, show, NULL)
struct cpuidle_driver_kobj {
struct cpuidle_driver *drv;
struct completion kobj_unregister;
struct kobject kobj;
};
struct cpuidle_driver_attr {
struct attribute attr;
ssize_t (*show)(struct cpuidle_driver *, char *);
ssize_t (*store)(struct cpuidle_driver *, const char *, size_t);
};
static ssize_t show_driver_name(struct cpuidle_driver *drv, char *buf)
{
ssize_t ret;
spin_lock(&cpuidle_driver_lock);
ret = sprintf(buf, "%s\n", drv ? drv->name : "none");
spin_unlock(&cpuidle_driver_lock);
return ret;
}
static void cpuidle_driver_sysfs_release(struct kobject *kobj)
{
struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
complete(&driver_kobj->kobj_unregister);
}
static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr,
char * buf)
{
int ret = -EIO;
struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr);
if (dattr->show)
ret = dattr->show(driver_kobj->drv, buf);
return ret;
}
static ssize_t cpuidle_driver_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t size)
{
int ret = -EIO;
struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr);
if (dattr->store)
ret = dattr->store(driver_kobj->drv, buf, size);
return ret;
}
define_one_driver_ro(name, show_driver_name);
static const struct sysfs_ops cpuidle_driver_sysfs_ops = {
.show = cpuidle_driver_show,
.store = cpuidle_driver_store,
};
static struct attribute *cpuidle_driver_default_attrs[] = {
&attr_driver_name.attr,
NULL
};
static struct kobj_type ktype_driver_cpuidle = {
.sysfs_ops = &cpuidle_driver_sysfs_ops,
.default_attrs = cpuidle_driver_default_attrs,
.release = cpuidle_driver_sysfs_release,
};
/**
* cpuidle_add_driver_sysfs - adds the driver name sysfs attribute
* @device: the target device
*/
static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
{
struct cpuidle_driver_kobj *kdrv;
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int ret;
kdrv = kzalloc(sizeof(*kdrv), GFP_KERNEL);
if (!kdrv)
return -ENOMEM;
kdrv->drv = drv;
init_completion(&kdrv->kobj_unregister);
ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle,
&dev->kobj, "driver");
if (ret) {
kfree(kdrv);
return ret;
}
kobject_uevent(&kdrv->kobj, KOBJ_ADD);
dev->kobj_driver = kdrv;
return ret;
}
/**
* cpuidle_remove_driver_sysfs - removes the driver name sysfs attribute
* @device: the target device
*/
static void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev)
{
struct cpuidle_driver_kobj *kdrv = dev->kobj_driver;
kobject_put(&kdrv->kobj);
wait_for_completion(&kdrv->kobj_unregister);
kfree(kdrv);
}
#else
static inline int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
{
return 0;
}
static inline void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev)
{
;
}
#endif
/**
* cpuidle_add_device_sysfs - adds device specific sysfs attributes
* @device: the target device
*/
int cpuidle_add_device_sysfs(struct cpuidle_device *device)
{
int ret;
ret = cpuidle_add_state_sysfs(device);
if (ret)
return ret;
ret = cpuidle_add_driver_sysfs(device);
if (ret)
cpuidle_remove_state_sysfs(device);
return ret;
}
/**
* cpuidle_remove_device_sysfs : removes device specific sysfs attributes
* @device : the target device
*/
void cpuidle_remove_device_sysfs(struct cpuidle_device *device)
{
cpuidle_remove_driver_sysfs(device);
cpuidle_remove_state_sysfs(device);
}
/**
* cpuidle_add_sysfs - creates a sysfs instance for the target device
* @dev: the target device
*/
int cpuidle_add_sysfs(struct cpuidle_device *dev)
{
struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
int error;
init_completion(&dev->kobj_unregister);
error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
"cpuidle");
if (!error)
kobject_uevent(&dev->kobj, KOBJ_ADD);
return error;
}
/**
* cpuidle_remove_sysfs - deletes a sysfs instance on the target device
* @dev: the target device
*/
void cpuidle_remove_sysfs(struct cpuidle_device *dev)
{
kobject_put(&dev->kobj);
wait_for_completion(&dev->kobj_unregister);
}