Initial commit; kernel source import

This commit is contained in:
Nathan
2025-04-06 23:50:55 -05:00
commit 25c6d769f4
45093 changed files with 18199410 additions and 0 deletions

167
kernel/time/Kconfig Normal file
View File

@@ -0,0 +1,167 @@
#
# Timer subsystem related configuration options
#
# Options selectable by arch Kconfig
# Watchdog function for clocksources to detect instabilities
config CLOCKSOURCE_WATCHDOG
bool
# Architecture has extra clocksource data
config ARCH_CLOCKSOURCE_DATA
bool
# Clocksources require validation of the clocksource against the last
# cycle update - x86/TSC misfeature
config CLOCKSOURCE_VALIDATE_LAST_CYCLE
bool
# Timekeeping vsyscall support
config GENERIC_TIME_VSYSCALL
bool
# Timekeeping vsyscall support
config GENERIC_TIME_VSYSCALL_OLD
bool
# ktime_t scalar 64bit nsec representation
config KTIME_SCALAR
bool
# Old style timekeeping
config ARCH_USES_GETTIMEOFFSET
bool
# The generic clock events infrastructure
config GENERIC_CLOCKEVENTS
bool
# Migration helper. Builds, but does not invoke
config GENERIC_CLOCKEVENTS_BUILD
bool
default y
depends on GENERIC_CLOCKEVENTS
# Architecture can handle broadcast in a driver-agnostic way
config ARCH_HAS_TICK_BROADCAST
bool
# Clockevents broadcasting infrastructure
config GENERIC_CLOCKEVENTS_BROADCAST
bool
depends on GENERIC_CLOCKEVENTS
# Automatically adjust the min. reprogramming time for
# clock event device
config GENERIC_CLOCKEVENTS_MIN_ADJUST
bool
# Generic update of CMOS clock
config GENERIC_CMOS_UPDATE
bool
if GENERIC_CLOCKEVENTS
menu "Timers subsystem"
# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is
# only related to the tick functionality. Oneshot clockevent devices
# are supported independ of this.
config TICK_ONESHOT
bool
config NO_HZ_COMMON
bool
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
select TICK_ONESHOT
choice
prompt "Timer tick handling"
default NO_HZ_IDLE if NO_HZ
config HZ_PERIODIC
bool "Periodic timer ticks (constant rate, no dynticks)"
help
This option keeps the tick running periodically at a constant
rate, even when the CPU doesn't need it.
config NO_HZ_IDLE
bool "Idle dynticks system (tickless idle)"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
select NO_HZ_COMMON
help
This option enables a tickless idle system: timer interrupts
will only trigger on an as-needed basis when the system is idle.
This is usually interesting for energy saving.
Most of the time you want to say Y here.
config NO_HZ_FULL
bool "Full dynticks system (tickless)"
# NO_HZ_COMMON dependency
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
# We need at least one periodic CPU for timekeeping
depends on SMP
# RCU_USER_QS dependency
depends on HAVE_CONTEXT_TRACKING
# VIRT_CPU_ACCOUNTING_GEN dependency
depends on 64BIT
select NO_HZ_COMMON
select RCU_USER_QS
select RCU_NOCB_CPU
select VIRT_CPU_ACCOUNTING_GEN
select CONTEXT_TRACKING_FORCE
select IRQ_WORK
help
Adaptively try to shutdown the tick whenever possible, even when
the CPU is running tasks. Typically this requires running a single
task on the CPU. Chances for running tickless are maximized when
the task mostly runs in userspace and has few kernel activity.
You need to fill up the nohz_full boot parameter with the
desired range of dynticks CPUs.
This is implemented at the expense of some overhead in user <-> kernel
transitions: syscalls, exceptions and interrupts. Even when it's
dynamically off.
Say N.
endchoice
config NO_HZ_FULL_ALL
bool "Full dynticks system on all CPUs by default"
depends on NO_HZ_FULL
help
If the user doesn't pass the nohz_full boot option to
define the range of full dynticks CPUs, consider that all
CPUs in the system are full dynticks by default.
Note the boot CPU will still be kept outside the range to
handle the timekeeping duty.
config NO_HZ
bool "Old Idle dynticks config"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
help
This is the old config entry that enables dynticks idle.
We keep it around for a little while to enforce backward
compatibility with older config files.
config HIGH_RES_TIMERS
bool "High Resolution Timer Support"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
select TICK_ONESHOT
help
This option enables high resolution timer support. If your
hardware is not capable then this option only increases
the size of the kernel image.
endmenu
endif
config UDELAY_TEST
tristate "udelay test driver"
default n
help
This builds the "udelay_test" module that helps to make sure
that udelay() is working properly.

10
kernel/time/Makefile Normal file
View File

@@ -0,0 +1,10 @@
obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
obj-y += timeconv.o posix-clock.o alarmtimer.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
obj-$(CONFIG_TIMER_STATS) += timer_stats.o
obj-$(CONFIG_UDELAY_TEST) += udelay_test.o

800
kernel/time/alarmtimer.c Normal file
View File

@@ -0,0 +1,800 @@
/*
* Alarmtimer interface
*
* This interface provides a timer which is similarto hrtimers,
* but triggers a RTC alarm if the box is suspend.
*
* This interface is influenced by the Android RTC Alarm timer
* interface.
*
* Copyright (C) 2010 IBM Corperation
*
* Author: John Stultz <john.stultz@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/timerqueue.h>
#include <linux/rtc.h>
#include <linux/alarmtimer.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/posix-timers.h>
#include <linux/workqueue.h>
#include <linux/freezer.h>
/**
* struct alarm_base - Alarm timer bases
* @lock: Lock for syncrhonized access to the base
* @timerqueue: Timerqueue head managing the list of events
* @timer: hrtimer used to schedule events while running
* @gettime: Function to read the time correlating to the base
* @base_clockid: clockid for the base
*/
static struct alarm_base {
spinlock_t lock;
struct timerqueue_head timerqueue;
ktime_t (*gettime)(void);
clockid_t base_clockid;
} alarm_bases[ALARM_NUMTYPE];
/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
static ktime_t freezer_delta;
static DEFINE_SPINLOCK(freezer_delta_lock);
static struct wakeup_source *ws;
#ifdef CONFIG_RTC_CLASS
/* rtc timer and device for setting alarm wakeups at suspend */
static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);
/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
* This function returns the rtc device to use for wakealarms.
* If one has not already been chosen, it checks to see if a
* functional rtc device is available.
*/
struct rtc_device *alarmtimer_get_rtcdev(void)
{
unsigned long flags;
struct rtc_device *ret;
spin_lock_irqsave(&rtcdev_lock, flags);
ret = rtcdev;
spin_unlock_irqrestore(&rtcdev_lock, flags);
return ret;
}
static int alarmtimer_rtc_add_device(struct device *dev,
struct class_interface *class_intf)
{
unsigned long flags;
struct rtc_device *rtc = to_rtc_device(dev);
if (rtcdev)
return -EBUSY;
if (!rtc->ops->set_alarm)
return -1;
if (!device_may_wakeup(rtc->dev.parent))
return -1;
spin_lock_irqsave(&rtcdev_lock, flags);
if (!rtcdev) {
rtcdev = rtc;
/* hold a reference so it doesn't go away */
get_device(dev);
}
spin_unlock_irqrestore(&rtcdev_lock, flags);
return 0;
}
static inline void alarmtimer_rtc_timer_init(void)
{
rtc_timer_init(&rtctimer, NULL, NULL);
}
static struct class_interface alarmtimer_rtc_interface = {
.add_dev = &alarmtimer_rtc_add_device,
};
static int alarmtimer_rtc_interface_setup(void)
{
alarmtimer_rtc_interface.class = rtc_class;
return class_interface_register(&alarmtimer_rtc_interface);
}
static void alarmtimer_rtc_interface_remove(void)
{
class_interface_unregister(&alarmtimer_rtc_interface);
}
#else
struct rtc_device *alarmtimer_get_rtcdev(void)
{
return NULL;
}
#define rtcdev (NULL)
static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
static inline void alarmtimer_rtc_interface_remove(void) { }
static inline void alarmtimer_rtc_timer_init(void) { }
#endif
/**
* alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
* @base: pointer to the base where the timer is being run
* @alarm: pointer to alarm being enqueued.
*
* Adds alarm to a alarm_base timerqueue
*
* Must hold base->lock when calling.
*/
static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
{
if (alarm->state & ALARMTIMER_STATE_ENQUEUED)
timerqueue_del(&base->timerqueue, &alarm->node);
timerqueue_add(&base->timerqueue, &alarm->node);
alarm->state |= ALARMTIMER_STATE_ENQUEUED;
}
/**
* alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue
* @base: pointer to the base where the timer is running
* @alarm: pointer to alarm being removed
*
* Removes alarm to a alarm_base timerqueue
*
* Must hold base->lock when calling.
*/
static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
{
if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED))
return;
timerqueue_del(&base->timerqueue, &alarm->node);
alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
}
/**
* alarmtimer_fired - Handles alarm hrtimer being fired.
* @timer: pointer to hrtimer being run
*
* When a alarm timer fires, this runs through the timerqueue to
* see which alarms expired, and runs those. If there are more alarm
* timers queued for the future, we set the hrtimer to fire when
* when the next future alarm timer expires.
*/
static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
{
struct alarm *alarm = container_of(timer, struct alarm, timer);
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
int ret = HRTIMER_NORESTART;
int restart = ALARMTIMER_NORESTART;
spin_lock_irqsave(&base->lock, flags);
alarmtimer_dequeue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
if (alarm->function)
restart = alarm->function(alarm, base->gettime());
spin_lock_irqsave(&base->lock, flags);
if (restart != ALARMTIMER_NORESTART) {
hrtimer_set_expires(&alarm->timer, alarm->node.expires);
alarmtimer_enqueue(base, alarm);
ret = HRTIMER_RESTART;
}
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
#ifdef CONFIG_RTC_CLASS
/**
* alarmtimer_suspend - Suspend time callback
* @dev: unused
* @state: unused
*
* When we are going into suspend, we look through the bases
* to see which is the soonest timer to expire. We then
* set an rtc timer to fire that far into the future, which
* will wake us from suspend.
*/
static int alarmtimer_suspend(struct device *dev)
{
struct rtc_time tm;
ktime_t min, now;
unsigned long flags;
struct rtc_device *rtc;
int i;
int ret;
spin_lock_irqsave(&freezer_delta_lock, flags);
min = freezer_delta;
freezer_delta = ktime_set(0, 0);
spin_unlock_irqrestore(&freezer_delta_lock, flags);
rtc = alarmtimer_get_rtcdev();
/* If we have no rtcdev, just return */
if (!rtc)
return 0;
/* Find the soonest timer to expire*/
for (i = 0; i < ALARM_NUMTYPE; i++) {
struct alarm_base *base = &alarm_bases[i];
struct timerqueue_node *next;
ktime_t delta;
spin_lock_irqsave(&base->lock, flags);
next = timerqueue_getnext(&base->timerqueue);
spin_unlock_irqrestore(&base->lock, flags);
if (!next)
continue;
delta = ktime_sub(next->expires, base->gettime());
if (!min.tv64 || (delta.tv64 < min.tv64))
min = delta;
}
if (min.tv64 == 0)
return 0;
if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
__pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
return -EBUSY;
}
/* Setup an rtc timer to fire that far in the future */
rtc_timer_cancel(rtc, &rtctimer);
rtc_read_time(rtc, &tm);
now = rtc_tm_to_ktime(tm);
now = ktime_add(now, min);
/* Set alarm, if in the past reject suspend briefly to handle */
ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
if (ret < 0)
__pm_wakeup_event(ws, MSEC_PER_SEC);
return ret;
}
#else
static int alarmtimer_suspend(struct device *dev)
{
return 0;
}
#endif
static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
{
ktime_t delta;
unsigned long flags;
struct alarm_base *base = &alarm_bases[type];
delta = ktime_sub(absexp, base->gettime());
spin_lock_irqsave(&freezer_delta_lock, flags);
if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64))
freezer_delta = delta;
spin_unlock_irqrestore(&freezer_delta_lock, flags);
}
/**
* alarm_init - Initialize an alarm structure
* @alarm: ptr to alarm to be initialized
* @type: the type of the alarm
* @function: callback that is run when the alarm fires
*/
void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
{
timerqueue_init(&alarm->node);
hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid,
HRTIMER_MODE_ABS);
alarm->timer.function = alarmtimer_fired;
alarm->function = function;
alarm->type = type;
alarm->state = ALARMTIMER_STATE_INACTIVE;
}
/**
* alarm_start - Sets an alarm to fire
* @alarm: ptr to alarm to set
* @start: time to run the alarm
*/
int alarm_start(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
int ret;
spin_lock_irqsave(&base->lock, flags);
alarm->node.expires = start;
alarmtimer_enqueue(base, alarm);
ret = hrtimer_start(&alarm->timer, alarm->node.expires,
HRTIMER_MODE_ABS);
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
/**
* alarm_try_to_cancel - Tries to cancel an alarm timer
* @alarm: ptr to alarm to be canceled
*
* Returns 1 if the timer was canceled, 0 if it was not running,
* and -1 if the callback was running
*/
int alarm_try_to_cancel(struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
int ret;
spin_lock_irqsave(&base->lock, flags);
ret = hrtimer_try_to_cancel(&alarm->timer);
if (ret >= 0)
alarmtimer_dequeue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
/**
* alarm_cancel - Spins trying to cancel an alarm timer until it is done
* @alarm: ptr to alarm to be canceled
*
* Returns 1 if the timer was canceled, 0 if it was not active.
*/
int alarm_cancel(struct alarm *alarm)
{
for (;;) {
int ret = alarm_try_to_cancel(alarm);
if (ret >= 0)
return ret;
cpu_relax();
}
}
u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
{
u64 overrun = 1;
ktime_t delta;
delta = ktime_sub(now, alarm->node.expires);
if (delta.tv64 < 0)
return 0;
if (unlikely(delta.tv64 >= interval.tv64)) {
s64 incr = ktime_to_ns(interval);
overrun = ktime_divns(delta, incr);
alarm->node.expires = ktime_add_ns(alarm->node.expires,
incr*overrun);
if (alarm->node.expires.tv64 > now.tv64)
return overrun;
/*
* This (and the ktime_add() below) is the
* correction for exact:
*/
overrun++;
}
alarm->node.expires = ktime_add(alarm->node.expires, interval);
return overrun;
}
/**
* clock2alarm - helper that converts from clockid to alarmtypes
* @clockid: clockid.
*/
static enum alarmtimer_type clock2alarm(clockid_t clockid)
{
if (clockid == CLOCK_REALTIME_ALARM)
return ALARM_REALTIME;
if (clockid == CLOCK_BOOTTIME_ALARM)
return ALARM_BOOTTIME;
return -1;
}
/**
* alarm_handle_timer - Callback for posix timers
* @alarm: alarm that fired
*
* Posix timer callback for expired alarm timers.
*/
static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
ktime_t now)
{
struct k_itimer *ptr = container_of(alarm, struct k_itimer,
it.alarm.alarmtimer);
if (posix_timer_event(ptr, 0) != 0)
ptr->it_overrun++;
/* Re-add periodic timers */
if (ptr->it.alarm.interval.tv64) {
ptr->it_overrun += alarm_forward(alarm, now,
ptr->it.alarm.interval);
return ALARMTIMER_RESTART;
}
return ALARMTIMER_NORESTART;
}
/**
* alarm_clock_getres - posix getres interface
* @which_clock: clockid
* @tp: timespec to fill
*
* Returns the granularity of underlying alarm base clock
*/
static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
{
clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
if (!alarmtimer_get_rtcdev())
return -ENOTSUPP;
return hrtimer_get_res(baseid, tp);
}
/**
* alarm_clock_get - posix clock_get interface
* @which_clock: clockid
* @tp: timespec to fill.
*
* Provides the underlying alarm base time.
*/
static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
{
struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];
if (!alarmtimer_get_rtcdev())
return -ENOTSUPP;
*tp = ktime_to_timespec(base->gettime());
return 0;
}
/**
* alarm_timer_create - posix timer_create interface
* @new_timer: k_itimer pointer to manage
*
* Initializes the k_itimer structure.
*/
static int alarm_timer_create(struct k_itimer *new_timer)
{
enum alarmtimer_type type;
struct alarm_base *base;
if (!alarmtimer_get_rtcdev())
return -ENOTSUPP;
if (!capable(CAP_WAKE_ALARM))
return -EPERM;
type = clock2alarm(new_timer->it_clock);
base = &alarm_bases[type];
alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer);
return 0;
}
/**
* alarm_timer_get - posix timer_get interface
* @new_timer: k_itimer pointer
* @cur_setting: itimerspec data to fill
*
* Copies the itimerspec data out from the k_itimer
*/
static void alarm_timer_get(struct k_itimer *timr,
struct itimerspec *cur_setting)
{
memset(cur_setting, 0, sizeof(struct itimerspec));
cur_setting->it_interval =
ktime_to_timespec(timr->it.alarm.interval);
cur_setting->it_value =
ktime_to_timespec(timr->it.alarm.alarmtimer.node.expires);
return;
}
/**
* alarm_timer_del - posix timer_del interface
* @timr: k_itimer pointer to be deleted
*
* Cancels any programmed alarms for the given timer.
*/
static int alarm_timer_del(struct k_itimer *timr)
{
if (!rtcdev)
return -ENOTSUPP;
if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0)
return TIMER_RETRY;
return 0;
}
/**
* alarm_timer_set - posix timer_set interface
* @timr: k_itimer pointer to be deleted
* @flags: timer flags
* @new_setting: itimerspec to be used
* @old_setting: itimerspec being replaced
*
* Sets the timer to new_setting, and starts the timer.
*/
static int alarm_timer_set(struct k_itimer *timr, int flags,
struct itimerspec *new_setting,
struct itimerspec *old_setting)
{
if (!rtcdev)
return -ENOTSUPP;
if (old_setting)
alarm_timer_get(timr, old_setting);
/* If the timer was already set, cancel it */
if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0)
return TIMER_RETRY;
/* start the timer */
timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
alarm_start(&timr->it.alarm.alarmtimer,
timespec_to_ktime(new_setting->it_value));
return 0;
}
/**
* alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep
* @alarm: ptr to alarm that fired
*
* Wakes up the task that set the alarmtimer
*/
static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm,
ktime_t now)
{
struct task_struct *task = (struct task_struct *)alarm->data;
alarm->data = NULL;
if (task)
wake_up_process(task);
return ALARMTIMER_NORESTART;
}
/**
* alarmtimer_do_nsleep - Internal alarmtimer nsleep implementation
* @alarm: ptr to alarmtimer
* @absexp: absolute expiration time
*
* Sets the alarm timer and sleeps until it is fired or interrupted.
*/
static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp)
{
alarm->data = (void *)current;
do {
set_current_state(TASK_INTERRUPTIBLE);
alarm_start(alarm, absexp);
if (likely(alarm->data))
schedule();
alarm_cancel(alarm);
} while (alarm->data && !signal_pending(current));
__set_current_state(TASK_RUNNING);
return (alarm->data == NULL);
}
/**
* update_rmtp - Update remaining timespec value
* @exp: expiration time
* @type: timer type
* @rmtp: user pointer to remaining timepsec value
*
* Helper function that fills in rmtp value with time between
* now and the exp value
*/
static int update_rmtp(ktime_t exp, enum alarmtimer_type type,
struct timespec __user *rmtp)
{
struct timespec rmt;
ktime_t rem;
rem = ktime_sub(exp, alarm_bases[type].gettime());
if (rem.tv64 <= 0)
return 0;
rmt = ktime_to_timespec(rem);
if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
return -EFAULT;
return 1;
}
/**
* alarm_timer_nsleep_restart - restartblock alarmtimer nsleep
* @restart: ptr to restart block
*
* Handles restarted clock_nanosleep calls
*/
static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
{
enum alarmtimer_type type = restart->nanosleep.clockid;
ktime_t exp;
struct timespec __user *rmtp;
struct alarm alarm;
int ret = 0;
exp.tv64 = restart->nanosleep.expires;
alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
if (alarmtimer_do_nsleep(&alarm, exp))
goto out;
if (freezing(current))
alarmtimer_freezerset(exp, type);
rmtp = restart->nanosleep.rmtp;
if (rmtp) {
ret = update_rmtp(exp, type, rmtp);
if (ret <= 0)
goto out;
}
/* The other values in restart are already filled in */
ret = -ERESTART_RESTARTBLOCK;
out:
return ret;
}
/**
* alarm_timer_nsleep - alarmtimer nanosleep
* @which_clock: clockid
* @flags: determins abstime or relative
* @tsreq: requested sleep time (abs or rel)
* @rmtp: remaining sleep time saved
*
* Handles clock_nanosleep calls against _ALARM clockids
*/
static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
struct timespec *tsreq, struct timespec __user *rmtp)
{
enum alarmtimer_type type = clock2alarm(which_clock);
struct alarm alarm;
ktime_t exp;
int ret = 0;
struct restart_block *restart;
if (!alarmtimer_get_rtcdev())
return -ENOTSUPP;
if (!capable(CAP_WAKE_ALARM))
return -EPERM;
alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
exp = timespec_to_ktime(*tsreq);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now = alarm_bases[type].gettime();
exp = ktime_add(now, exp);
}
if (alarmtimer_do_nsleep(&alarm, exp))
goto out;
if (freezing(current))
alarmtimer_freezerset(exp, type);
/* abs timers don't set remaining time or restart */
if (flags == TIMER_ABSTIME) {
ret = -ERESTARTNOHAND;
goto out;
}
if (rmtp) {
ret = update_rmtp(exp, type, rmtp);
if (ret <= 0)
goto out;
}
restart = &current_thread_info()->restart_block;
restart->fn = alarm_timer_nsleep_restart;
restart->nanosleep.clockid = type;
restart->nanosleep.expires = exp.tv64;
restart->nanosleep.rmtp = rmtp;
ret = -ERESTART_RESTARTBLOCK;
out:
return ret;
}
/* Suspend hook structures */
static const struct dev_pm_ops alarmtimer_pm_ops = {
.suspend = alarmtimer_suspend,
};
static struct platform_driver alarmtimer_driver = {
.driver = {
.name = "alarmtimer",
.pm = &alarmtimer_pm_ops,
}
};
/**
* alarmtimer_init - Initialize alarm timer code
*
* This function initializes the alarm bases and registers
* the posix clock ids.
*/
static int __init alarmtimer_init(void)
{
struct platform_device *pdev;
int error = 0;
int i;
struct k_clock alarm_clock = {
.clock_getres = alarm_clock_getres,
.clock_get = alarm_clock_get,
.timer_create = alarm_timer_create,
.timer_set = alarm_timer_set,
.timer_del = alarm_timer_del,
.timer_get = alarm_timer_get,
.nsleep = alarm_timer_nsleep,
};
alarmtimer_rtc_timer_init();
posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
/* Initialize alarm bases */
alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime;
for (i = 0; i < ALARM_NUMTYPE; i++) {
timerqueue_init_head(&alarm_bases[i].timerqueue);
spin_lock_init(&alarm_bases[i].lock);
}
error = alarmtimer_rtc_interface_setup();
if (error)
return error;
error = platform_driver_register(&alarmtimer_driver);
if (error)
goto out_if;
pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0);
if (IS_ERR(pdev)) {
error = PTR_ERR(pdev);
goto out_drv;
}
ws = wakeup_source_register("alarmtimer");
return 0;
out_drv:
platform_driver_unregister(&alarmtimer_driver);
out_if:
alarmtimer_rtc_interface_remove();
return error;
}
device_initcall(alarmtimer_init);

465
kernel/time/clockevents.c Normal file
View File

@@ -0,0 +1,465 @@
/*
* linux/kernel/time/clockevents.c
*
* This file contains functions which manage clock event devices.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/clockchips.h>
#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include "tick-internal.h"
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
/* Notification for clock events */
static RAW_NOTIFIER_HEAD(clockevents_chain);
/* Protection for the above */
static DEFINE_RAW_SPINLOCK(clockevents_lock);
/**
* clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
* @latch: value to convert
* @evt: pointer to clock event device descriptor
*
* Math helper, returns latch value converted to nanoseconds (bound checked)
*/
u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
{
u64 clc = (u64) latch << evt->shift;
if (unlikely(!evt->mult)) {
evt->mult = 1;
WARN_ON(1);
}
do_div(clc, evt->mult);
if (clc < 1000)
clc = 1000;
if (clc > KTIME_MAX)
clc = KTIME_MAX;
return clc;
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
/**
* clockevents_set_mode - set the operating mode of a clock event device
* @dev: device to modify
* @mode: new mode
*
* Must be called with interrupts disabled !
*/
void clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode)
{
if (dev->mode != mode) {
dev->set_mode(mode, dev);
dev->mode = mode;
/*
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
* on it, so fix it up and emit a warning:
*/
if (mode == CLOCK_EVT_MODE_ONESHOT) {
if (unlikely(!dev->mult)) {
dev->mult = 1;
WARN_ON(1);
}
}
}
}
/**
* clockevents_shutdown - shutdown the device and clear next_event
* @dev: device to shutdown
*/
void clockevents_shutdown(struct clock_event_device *dev)
{
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
dev->next_event.tv64 = KTIME_MAX;
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
/* Limit min_delta to a jiffie */
#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
/**
* clockevents_increase_min_delta - raise minimum delta of a clock event device
* @dev: device to increase the minimum delta
*
* Returns 0 on success, -ETIME when the minimum delta reached the limit.
*/
static int clockevents_increase_min_delta(struct clock_event_device *dev)
{
/* Nothing to do if we already reached the limit */
if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n");
dev->next_event.tv64 = KTIME_MAX;
return -ETIME;
}
if (dev->min_delta_ns < 5000)
dev->min_delta_ns = 5000;
else
dev->min_delta_ns += dev->min_delta_ns >> 1;
if (dev->min_delta_ns > MIN_DELTA_LIMIT)
dev->min_delta_ns = MIN_DELTA_LIMIT;
printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
dev->name ? dev->name : "?",
(unsigned long long) dev->min_delta_ns);
return 0;
}
/**
* clockevents_program_min_delta - Set clock event device to the minimum delay.
* @dev: device to program
*
* Returns 0 on success, -ETIME when the retry loop failed.
*/
static int clockevents_program_min_delta(struct clock_event_device *dev)
{
unsigned long long clc;
int64_t delta;
int i;
for (i = 0;;) {
delta = dev->min_delta_ns;
dev->next_event = ktime_add_ns(ktime_get(), delta);
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
dev->retries++;
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
if (dev->set_next_event((unsigned long) clc, dev) == 0)
return 0;
if (++i > 2) {
/*
* We tried 3 times to program the device with the
* given min_delta_ns. Try to increase the minimum
* delta, if that fails as well get out of here.
*/
if (clockevents_increase_min_delta(dev))
return -ETIME;
i = 0;
}
}
}
#else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
/**
* clockevents_program_min_delta - Set clock event device to the minimum delay.
* @dev: device to program
*
* Returns 0 on success, -ETIME when the retry loop failed.
*/
static int clockevents_program_min_delta(struct clock_event_device *dev)
{
unsigned long long clc;
int64_t delta;
delta = dev->min_delta_ns;
dev->next_event = ktime_add_ns(ktime_get(), delta);
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
dev->retries++;
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
return dev->set_next_event((unsigned long) clc, dev);
}
#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
/**
* clockevents_program_event - Reprogram the clock event device.
* @dev: device to program
* @expires: absolute expiry time (monotonic clock)
* @force: program minimum delay if expires can not be set
*
* Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
bool force)
{
unsigned long long clc;
int64_t delta;
int rc;
if (unlikely(expires.tv64 < 0)) {
WARN_ON_ONCE(1);
return -ETIME;
}
dev->next_event = expires;
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
/* Shortcut for clockevent devices that can deal with ktime. */
if (dev->features & CLOCK_EVT_FEAT_KTIME)
return dev->set_next_ktime(expires, dev);
delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
if (delta <= 0)
return force ? clockevents_program_min_delta(dev) : -ETIME;
delta = min(delta, (int64_t) dev->max_delta_ns);
delta = max(delta, (int64_t) dev->min_delta_ns);
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
rc = dev->set_next_event((unsigned long) clc, dev);
return (rc && force) ? clockevents_program_min_delta(dev) : rc;
}
/**
* clockevents_register_notifier - register a clock events change listener
*/
int clockevents_register_notifier(struct notifier_block *nb)
{
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&clockevents_lock, flags);
ret = raw_notifier_chain_register(&clockevents_chain, nb);
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
return ret;
}
/*
* Notify about a clock event change. Called with clockevents_lock
* held.
*/
static void clockevents_do_notify(unsigned long reason, void *dev)
{
raw_notifier_call_chain(&clockevents_chain, reason, dev);
}
/*
* Called after a notify add to make devices available which were
* released from the notifier call.
*/
static void clockevents_notify_released(void)
{
struct clock_event_device *dev;
while (!list_empty(&clockevents_released)) {
dev = list_entry(clockevents_released.next,
struct clock_event_device, list);
list_del(&dev->list);
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
}
}
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
*/
void clockevents_register_device(struct clock_event_device *dev)
{
unsigned long flags;
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
dev->cpumask = cpumask_of(smp_processor_id());
}
raw_spin_lock_irqsave(&clockevents_lock, flags);
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
clockevents_notify_released();
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_register_device);
void clockevents_config(struct clock_event_device *dev, u32 freq)
{
u64 sec;
if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return;
/*
* Calculate the maximum number of seconds we can sleep. Limit
* to 10 minutes for hardware which can program more than
* 32bit ticks so we still get reasonable conversion values.
*/
sec = dev->max_delta_ticks;
do_div(sec, freq);
if (!sec)
sec = 1;
else if (sec > 600 && dev->max_delta_ticks > UINT_MAX)
sec = 600;
clockevents_calc_mult_shift(dev, freq, sec);
dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev);
dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
}
/**
* clockevents_config_and_register - Configure and register a clock event device
* @dev: device to register
* @freq: The clock frequency
* @min_delta: The minimum clock ticks to program in oneshot mode
* @max_delta: The maximum clock ticks to program in oneshot mode
*
* min/max_delta can be 0 for devices which do not support oneshot mode.
*/
void clockevents_config_and_register(struct clock_event_device *dev,
u32 freq, unsigned long min_delta,
unsigned long max_delta)
{
dev->min_delta_ticks = min_delta;
dev->max_delta_ticks = max_delta;
clockevents_config(dev, freq);
clockevents_register_device(dev);
}
EXPORT_SYMBOL_GPL(clockevents_config_and_register);
/**
* clockevents_update_freq - Update frequency and reprogram a clock event device.
* @dev: device to modify
* @freq: new device frequency
*
* Reconfigure and reprogram a clock event device in oneshot
* mode. Must be called on the cpu for which the device delivers per
* cpu timer events with interrupts disabled! Returns 0 on success,
* -ETIME when the event is in the past.
*/
int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
clockevents_config(dev, freq);
if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
return 0;
return clockevents_program_event(dev, dev->next_event, false);
}
/*
* Noop handler when we shut down an event device
*/
void clockevents_handle_noop(struct clock_event_device *dev)
{
}
/**
* clockevents_exchange_device - release and request clock devices
* @old: device to release (can be NULL)
* @new: device to request (can be NULL)
*
* Called from the notifier chain. clockevents_lock is held already
*/
void clockevents_exchange_device(struct clock_event_device *old,
struct clock_event_device *new)
{
unsigned long flags;
local_irq_save(flags);
/*
* Caller releases a clock event device. We queue it into the
* released list and do a notify add later.
*/
if (old) {
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
list_del(&old->list);
list_add(&old->list, &clockevents_released);
}
if (new) {
BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
clockevents_shutdown(new);
}
local_irq_restore(flags);
}
/**
* clockevents_suspend - suspend clock devices
*/
void clockevents_suspend(void)
{
struct clock_event_device *dev;
list_for_each_entry_reverse(dev, &clockevent_devices, list)
if (dev->suspend)
dev->suspend(dev);
}
/**
* clockevents_resume - resume clock devices
*/
void clockevents_resume(void)
{
struct clock_event_device *dev;
list_for_each_entry(dev, &clockevent_devices, list)
if (dev->resume)
dev->resume(dev);
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS
/**
* clockevents_notify - notification about relevant events
*/
void clockevents_notify(unsigned long reason, void *arg)
{
struct clock_event_device *dev, *tmp;
unsigned long flags;
int cpu;
raw_spin_lock_irqsave(&clockevents_lock, flags);
clockevents_do_notify(reason, arg);
switch (reason) {
case CLOCK_EVT_NOTIFY_CPU_DEAD:
/*
* Unregister the clock event devices which were
* released from the users in the notify chain.
*/
list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
list_del(&dev->list);
/*
* Now check whether the CPU has left unused per cpu devices
*/
cpu = *((int *)arg);
list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
if (cpumask_test_cpu(cpu, dev->cpumask) &&
cpumask_weight(dev->cpumask) == 1 &&
!tick_is_broadcast_device(dev)) {
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
list_del(&dev->list);
}
}
break;
default:
break;
}
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
#endif

962
kernel/time/clocksource.c Normal file
View File

@@ -0,0 +1,962 @@
/*
* linux/kernel/time/clocksource.c
*
* This file contains the functions which manage clocksource drivers.
*
* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* TODO WishList:
* o Allow clocksource drivers to be unregistered
*/
#include <linux/device.h>
#include <linux/clocksource.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
#include <linux/tick.h>
#include <linux/kthread.h>
#include "timekeeping_internal.h"
void timecounter_init(struct timecounter *tc,
const struct cyclecounter *cc,
u64 start_tstamp)
{
tc->cc = cc;
tc->cycle_last = cc->read(cc);
tc->nsec = start_tstamp;
}
EXPORT_SYMBOL_GPL(timecounter_init);
/**
* timecounter_read_delta - get nanoseconds since last call of this function
* @tc: Pointer to time counter
*
* When the underlying cycle counter runs over, this will be handled
* correctly as long as it does not run over more than once between
* calls.
*
* The first call to this function for a new time counter initializes
* the time tracking and returns an undefined result.
*/
static u64 timecounter_read_delta(struct timecounter *tc)
{
cycle_t cycle_now, cycle_delta;
u64 ns_offset;
/* read cycle counter: */
cycle_now = tc->cc->read(tc->cc);
/* calculate the delta since the last timecounter_read_delta(): */
cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
/* convert to nanoseconds: */
ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
/* update time stamp of timecounter_read_delta() call: */
tc->cycle_last = cycle_now;
return ns_offset;
}
u64 timecounter_read(struct timecounter *tc)
{
u64 nsec;
/* increment time by nanoseconds since last call */
nsec = timecounter_read_delta(tc);
nsec += tc->nsec;
tc->nsec = nsec;
return nsec;
}
EXPORT_SYMBOL_GPL(timecounter_read);
u64 timecounter_cyc2time(struct timecounter *tc,
cycle_t cycle_tstamp)
{
u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
u64 nsec;
/*
* Instead of always treating cycle_tstamp as more recent
* than tc->cycle_last, detect when it is too far in the
* future and treat it as old time stamp instead.
*/
if (cycle_delta > tc->cc->mask / 2) {
cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
} else {
nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
}
return nsec;
}
EXPORT_SYMBOL_GPL(timecounter_cyc2time);
/**
* clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
* @mult: pointer to mult variable
* @shift: pointer to shift variable
* @from: frequency to convert from
* @to: frequency to convert to
* @maxsec: guaranteed runtime conversion range in seconds
*
* The function evaluates the shift/mult pair for the scaled math
* operations of clocksources and clockevents.
*
* @to and @from are frequency values in HZ. For clock sources @to is
* NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
* event @to is the counter frequency and @from is NSEC_PER_SEC.
*
* The @maxsec conversion range argument controls the time frame in
* seconds which must be covered by the runtime conversion with the
* calculated mult and shift factors. This guarantees that no 64bit
* overflow happens when the input value of the conversion is
* multiplied with the calculated mult factor. Larger ranges may
* reduce the conversion accuracy by chosing smaller mult and shift
* factors.
*/
void
clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
{
u64 tmp;
u32 sft, sftacc= 32;
/*
* Calculate the shift factor which is limiting the conversion
* range:
*/
tmp = ((u64)maxsec * from) >> 32;
while (tmp) {
tmp >>=1;
sftacc--;
}
/*
* Find the conversion shift/mult pair which has the best
* accuracy and fits the maxsec conversion range:
*/
for (sft = 32; sft > 0; sft--) {
tmp = (u64) to << sft;
tmp += from / 2;
do_div(tmp, from);
if ((tmp >> sftacc) == 0)
break;
}
*mult = tmp;
*shift = sft;
}
/*[Clocksource internal variables]---------
* curr_clocksource:
* currently selected clocksource.
* clocksource_list:
* linked list with the registered clocksources
* clocksource_mutex:
* protects manipulations to curr_clocksource and the clocksource_list
* override_name:
* Name of the user-specified clocksource.
*/
static struct clocksource *curr_clocksource;
static LIST_HEAD(clocksource_list);
static DEFINE_MUTEX(clocksource_mutex);
static char override_name[32];
static int finished_booting;
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog;
static struct timer_list watchdog_timer;
static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock);
static int watchdog_running;
static atomic_t watchdog_reset_pending;
static int clocksource_watchdog_kthread(void *data);
static void __clocksource_change_rating(struct clocksource *cs, int rating);
/*
* Interval: 0.5sec Threshold: 0.0625s
*/
#define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
static void clocksource_watchdog_work(struct work_struct *work)
{
/*
* If kthread_run fails the next watchdog scan over the
* watchdog_list will find the unstable clock again.
*/
kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
}
static void __clocksource_unstable(struct clocksource *cs)
{
cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
cs->flags |= CLOCK_SOURCE_UNSTABLE;
if (finished_booting)
schedule_work(&watchdog_work);
}
static void clocksource_unstable(struct clocksource *cs, int64_t delta)
{
printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
cs->name, delta);
__clocksource_unstable(cs);
}
/**
* clocksource_mark_unstable - mark clocksource unstable via watchdog
* @cs: clocksource to be marked unstable
*
* This function is called instead of clocksource_change_rating from
* cpu hotplug code to avoid a deadlock between the clocksource mutex
* and the cpu hotplug mutex. It defers the update of the clocksource
* to the watchdog thread.
*/
void clocksource_mark_unstable(struct clocksource *cs)
{
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
if (list_empty(&cs->wd_list))
list_add(&cs->wd_list, &watchdog_list);
__clocksource_unstable(cs);
}
spin_unlock_irqrestore(&watchdog_lock, flags);
}
static void clocksource_watchdog(unsigned long data)
{
struct clocksource *cs;
cycle_t csnow, wdnow, delta;
int64_t wd_nsec, cs_nsec;
int next_cpu, reset_pending;
spin_lock(&watchdog_lock);
if (!watchdog_running)
goto out;
reset_pending = atomic_read(&watchdog_reset_pending);
list_for_each_entry(cs, &watchdog_list, wd_list) {
/* Clocksource already marked unstable? */
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
if (finished_booting)
schedule_work(&watchdog_work);
continue;
}
local_irq_disable();
csnow = cs->read(cs);
wdnow = watchdog->read(watchdog);
local_irq_enable();
/* Clocksource initialized ? */
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
atomic_read(&watchdog_reset_pending)) {
cs->flags |= CLOCK_SOURCE_WATCHDOG;
cs->wd_last = wdnow;
cs->cs_last = csnow;
continue;
}
delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
watchdog->shift);
delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
cs->cs_last = csnow;
cs->wd_last = wdnow;
if (atomic_read(&watchdog_reset_pending))
continue;
/* Check the deviation from the watchdog clocksource. */
if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
clocksource_unstable(cs, cs_nsec - wd_nsec);
continue;
}
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
(cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
(watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
/*
* We just marked the clocksource as highres-capable,
* notify the rest of the system as well so that we
* transition into high-res mode:
*/
tick_clock_notify();
}
}
/*
* We only clear the watchdog_reset_pending, when we did a
* full cycle through all clocksources.
*/
if (reset_pending)
atomic_dec(&watchdog_reset_pending);
/*
* Cycle through CPUs to check if the CPUs stay synchronized
* to each other.
*/
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(cpu_online_mask);
watchdog_timer.expires += WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer, next_cpu);
out:
spin_unlock(&watchdog_lock);
}
static inline void clocksource_start_watchdog(void)
{
if (watchdog_running || !watchdog || list_empty(&watchdog_list))
return;
init_timer(&watchdog_timer);
watchdog_timer.function = clocksource_watchdog;
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
watchdog_running = 1;
}
static inline void clocksource_stop_watchdog(void)
{
if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
return;
del_timer(&watchdog_timer);
watchdog_running = 0;
}
static inline void clocksource_reset_watchdog(void)
{
struct clocksource *cs;
list_for_each_entry(cs, &watchdog_list, wd_list)
cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
}
static void clocksource_resume_watchdog(void)
{
atomic_inc(&watchdog_reset_pending);
}
static void clocksource_enqueue_watchdog(struct clocksource *cs)
{
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
/* cs is a clocksource to be watched. */
list_add(&cs->wd_list, &watchdog_list);
cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
} else {
/* cs is a watchdog. */
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
/* Pick the best watchdog. */
if (!watchdog || cs->rating > watchdog->rating) {
watchdog = cs;
/* Reset watchdog cycles */
clocksource_reset_watchdog();
}
}
/* Check if the watchdog timer needs to be started. */
clocksource_start_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
}
static void clocksource_dequeue_watchdog(struct clocksource *cs)
{
struct clocksource *tmp;
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
/* cs is a watched clocksource. */
list_del_init(&cs->wd_list);
} else if (cs == watchdog) {
/* Reset watchdog cycles */
clocksource_reset_watchdog();
/* Current watchdog is removed. Find an alternative. */
watchdog = NULL;
list_for_each_entry(tmp, &clocksource_list, list) {
if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
continue;
if (!watchdog || tmp->rating > watchdog->rating)
watchdog = tmp;
}
}
cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
/* Check if the watchdog timer needs to be stopped. */
clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
}
static int clocksource_watchdog_kthread(void *data)
{
struct clocksource *cs, *tmp;
unsigned long flags;
LIST_HEAD(unstable);
mutex_lock(&clocksource_mutex);
spin_lock_irqsave(&watchdog_lock, flags);
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
list_del_init(&cs->wd_list);
list_add(&cs->wd_list, &unstable);
}
/* Check if the watchdog timer needs to be stopped. */
clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
/* Needs to be done outside of watchdog lock */
list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
list_del_init(&cs->wd_list);
__clocksource_change_rating(cs, 0);
}
mutex_unlock(&clocksource_mutex);
return 0;
}
#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
static void clocksource_enqueue_watchdog(struct clocksource *cs)
{
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
}
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { }
static inline int clocksource_watchdog_kthread(void *data) { return 0; }
#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
/**
* clocksource_suspend - suspend the clocksource(s)
*/
void clocksource_suspend(void)
{
struct clocksource *cs;
list_for_each_entry_reverse(cs, &clocksource_list, list)
if (cs->suspend)
cs->suspend(cs);
}
/**
* clocksource_resume - resume the clocksource(s)
*/
void clocksource_resume(void)
{
struct clocksource *cs;
list_for_each_entry(cs, &clocksource_list, list)
if (cs->resume)
cs->resume(cs);
clocksource_resume_watchdog();
}
/**
* clocksource_touch_watchdog - Update watchdog
*
* Update the watchdog after exception contexts such as kgdb so as not
* to incorrectly trip the watchdog. This might fail when the kernel
* was stopped in code which holds watchdog_lock.
*/
void clocksource_touch_watchdog(void)
{
clocksource_resume_watchdog();
}
/**
* clocksource_max_adjustment- Returns max adjustment amount
* @cs: Pointer to clocksource
*
*/
static u32 clocksource_max_adjustment(struct clocksource *cs)
{
u64 ret;
/*
* We won't try to correct for more than 11% adjustments (110,000 ppm),
*/
ret = (u64)cs->mult * 11;
do_div(ret,100);
return (u32)ret;
}
/**
* clocksource_max_deferment - Returns max time the clocksource can be deferred
* @cs: Pointer to clocksource
*
*/
static u64 clocksource_max_deferment(struct clocksource *cs)
{
u64 max_nsecs, max_cycles;
/*
* Calculate the maximum number of cycles that we can pass to the
* cyc2ns function without overflowing a 64-bit signed result. The
* maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj)
* which is equivalent to the below.
* max_cycles < (2^63)/(cs->mult + cs->maxadj)
* max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj)))
* max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj))
* max_cycles < 2^(63 - log2(cs->mult + cs->maxadj))
* max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj))
* Please note that we add 1 to the result of the log2 to account for
* any rounding errors, ensure the above inequality is satisfied and
* no overflow will occur.
*/
max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1));
/*
* The actual maximum number of cycles we can defer the clocksource is
* determined by the minimum of max_cycles and cs->mask.
* Note: Here we subtract the maxadj to make sure we don't sleep for
* too long if there's a large negative adjustment.
*/
max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj,
cs->shift);
/*
* To ensure that the clocksource does not wrap whilst we are idle,
* limit the time the clocksource can be deferred by 12.5%. Please
* note a margin of 12.5% is used because this can be computed with
* a shift, versus say 10% which would require division.
*/
return max_nsecs - (max_nsecs >> 3);
}
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
/**
* clocksource_select - Select the best clocksource available
*
* Private function. Must hold clocksource_mutex when called.
*
* Select the clocksource with the best rating, or the clocksource,
* which is selected by userspace override.
*/
static void clocksource_select(void)
{
struct clocksource *best, *cs;
if (!finished_booting || list_empty(&clocksource_list))
return;
/* First clocksource on the list has the best rating. */
best = list_first_entry(&clocksource_list, struct clocksource, list);
/* Check for the override clocksource. */
list_for_each_entry(cs, &clocksource_list, list) {
if (strcmp(cs->name, override_name) != 0)
continue;
/*
* Check to make sure we don't switch to a non-highres
* capable clocksource if the tick code is in oneshot
* mode (highres or nohz)
*/
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
tick_oneshot_mode_active()) {
/* Override clocksource cannot be used. */
printk(KERN_WARNING "Override clocksource %s is not "
"HRT compatible. Cannot switch while in "
"HRT/NOHZ mode\n", cs->name);
override_name[0] = 0;
} else
/* Override clocksource can be used. */
best = cs;
break;
}
if (curr_clocksource != best && !timekeeping_notify(best)) {
pr_info("Switched to clocksource %s\n", best->name);
curr_clocksource = best;
}
}
#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
static inline void clocksource_select(void) { }
#endif
/*
* clocksource_done_booting - Called near the end of core bootup
*
* Hack to avoid lots of clocksource churn at boot time.
* We use arch_initcall because we want this to start before
* device_initcall but after SMP init.
*/
static int __init clocksource_done_booting(void)
{
mutex_lock(&clocksource_mutex);
curr_clocksource = clocksource_default_clock();
mutex_unlock(&clocksource_mutex);
finished_booting = 1;
/*
* Run the watchdog first to eliminate unstable clock sources
*/
clocksource_watchdog_kthread(NULL);
mutex_lock(&clocksource_mutex);
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
}
arch_initcall(clocksource_done_booting);
/*
* Enqueue the clocksource sorted by rating
*/
static void clocksource_enqueue(struct clocksource *cs)
{
struct list_head *entry = &clocksource_list;
struct clocksource *tmp;
list_for_each_entry(tmp, &clocksource_list, list)
/* Keep track of the place, where to insert */
if (tmp->rating >= cs->rating)
entry = &tmp->list;
list_add(&cs->list, entry);
}
/**
* __clocksource_updatefreq_scale - Used update clocksource with new freq
* @cs: clocksource to be registered
* @scale: Scale factor multiplied against freq to get clocksource hz
* @freq: clocksource frequency (cycles per second) divided by scale
*
* This should only be called from the clocksource->enable() method.
*
* This *SHOULD NOT* be called directly! Please use the
* clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
*/
void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
{
u64 sec;
/*
* Calc the maximum number of seconds which we can run before
* wrapping around. For clocksources which have a mask > 32bit
* we need to limit the max sleep time to have a good
* conversion precision. 10 minutes is still a reasonable
* amount. That results in a shift value of 24 for a
* clocksource with mask >= 40bit and f >= 4GHz. That maps to
* ~ 0.06ppm granularity for NTP. We apply the same 12.5%
* margin as we do in clocksource_max_deferment()
*/
sec = (cs->mask - (cs->mask >> 3));
do_div(sec, freq);
do_div(sec, scale);
if (!sec)
sec = 1;
else if (sec > 600 && cs->mask > UINT_MAX)
sec = 600;
clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
NSEC_PER_SEC / scale, sec * scale);
/*
* for clocksources that have large mults, to avoid overflow.
* Since mult may be adjusted by ntp, add an safety extra margin
*
*/
cs->maxadj = clocksource_max_adjustment(cs);
while ((cs->mult + cs->maxadj < cs->mult)
|| (cs->mult - cs->maxadj > cs->mult)) {
cs->mult >>= 1;
cs->shift--;
cs->maxadj = clocksource_max_adjustment(cs);
}
cs->max_idle_ns = clocksource_max_deferment(cs);
}
EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
/**
* __clocksource_register_scale - Used to install new clocksources
* @cs: clocksource to be registered
* @scale: Scale factor multiplied against freq to get clocksource hz
* @freq: clocksource frequency (cycles per second) divided by scale
*
* Returns -EBUSY if registration fails, zero otherwise.
*
* This *SHOULD NOT* be called directly! Please use the
* clocksource_register_hz() or clocksource_register_khz helper functions.
*/
int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
/* Initialize mult/shift and max_idle_ns */
__clocksource_updatefreq_scale(cs, scale, freq);
/* Add clocksource to the clcoksource list */
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(__clocksource_register_scale);
/**
* clocksource_register - Used to install new clocksources
* @cs: clocksource to be registered
*
* Returns -EBUSY if registration fails, zero otherwise.
*/
int clocksource_register(struct clocksource *cs)
{
/* calculate max adjustment for given mult/shift */
cs->maxadj = clocksource_max_adjustment(cs);
WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
"Clocksource %s might overflow on 11%% adjustment\n",
cs->name);
/* calculate max idle time permitted for this clocksource */
cs->max_idle_ns = clocksource_max_deferment(cs);
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
}
EXPORT_SYMBOL(clocksource_register);
static void __clocksource_change_rating(struct clocksource *cs, int rating)
{
list_del(&cs->list);
cs->rating = rating;
clocksource_enqueue(cs);
clocksource_select();
}
/**
* clocksource_change_rating - Change the rating of a registered clocksource
* @cs: clocksource to be changed
* @rating: new rating
*/
void clocksource_change_rating(struct clocksource *cs, int rating)
{
mutex_lock(&clocksource_mutex);
__clocksource_change_rating(cs, rating);
mutex_unlock(&clocksource_mutex);
}
EXPORT_SYMBOL(clocksource_change_rating);
/**
* clocksource_unregister - remove a registered clocksource
* @cs: clocksource to be unregistered
*/
void clocksource_unregister(struct clocksource *cs)
{
mutex_lock(&clocksource_mutex);
clocksource_dequeue_watchdog(cs);
list_del(&cs->list);
clocksource_select();
mutex_unlock(&clocksource_mutex);
}
EXPORT_SYMBOL(clocksource_unregister);
#ifdef CONFIG_SYSFS
/**
* sysfs_show_current_clocksources - sysfs interface for current clocksource
* @dev: unused
* @attr: unused
* @buf: char buffer to be filled with clocksource list
*
* Provides sysfs interface for listing current clocksource.
*/
static ssize_t
sysfs_show_current_clocksources(struct device *dev,
struct device_attribute *attr, char *buf)
{
ssize_t count = 0;
mutex_lock(&clocksource_mutex);
count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
mutex_unlock(&clocksource_mutex);
return count;
}
/**
* sysfs_override_clocksource - interface for manually overriding clocksource
* @dev: unused
* @attr: unused
* @buf: name of override clocksource
* @count: length of buffer
*
* Takes input from sysfs interface for manually overriding the default
* clocksource selection.
*/
static ssize_t sysfs_override_clocksource(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
size_t ret = count;
/* strings from sysfs write are not 0 terminated! */
if (count >= sizeof(override_name))
return -EINVAL;
/* strip of \n: */
if (buf[count-1] == '\n')
count--;
mutex_lock(&clocksource_mutex);
if (count > 0)
memcpy(override_name, buf, count);
override_name[count] = 0;
clocksource_select();
mutex_unlock(&clocksource_mutex);
return ret;
}
/**
* sysfs_show_available_clocksources - sysfs interface for listing clocksource
* @dev: unused
* @attr: unused
* @buf: char buffer to be filled with clocksource list
*
* Provides sysfs interface for listing registered clocksources
*/
static ssize_t
sysfs_show_available_clocksources(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct clocksource *src;
ssize_t count = 0;
mutex_lock(&clocksource_mutex);
list_for_each_entry(src, &clocksource_list, list) {
/*
* Don't show non-HRES clocksource if the tick code is
* in one shot mode (highres=on or nohz=on)
*/
if (!tick_oneshot_mode_active() ||
(src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
count += snprintf(buf + count,
max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
"%s ", src->name);
}
mutex_unlock(&clocksource_mutex);
count += snprintf(buf + count,
max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
return count;
}
/*
* Sysfs setup bits:
*/
static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
sysfs_override_clocksource);
static DEVICE_ATTR(available_clocksource, 0444,
sysfs_show_available_clocksources, NULL);
static struct bus_type clocksource_subsys = {
.name = "clocksource",
.dev_name = "clocksource",
};
static struct device device_clocksource = {
.id = 0,
.bus = &clocksource_subsys,
};
static int __init init_clocksource_sysfs(void)
{
int error = subsys_system_register(&clocksource_subsys, NULL);
if (!error)
error = device_register(&device_clocksource);
if (!error)
error = device_create_file(
&device_clocksource,
&dev_attr_current_clocksource);
if (!error)
error = device_create_file(
&device_clocksource,
&dev_attr_available_clocksource);
return error;
}
device_initcall(init_clocksource_sysfs);
#endif /* CONFIG_SYSFS */
/**
* boot_override_clocksource - boot clock override
* @str: override name
*
* Takes a clocksource= boot argument and uses it
* as the clocksource override name.
*/
static int __init boot_override_clocksource(char* str)
{
mutex_lock(&clocksource_mutex);
if (str)
strlcpy(override_name, str, sizeof(override_name));
mutex_unlock(&clocksource_mutex);
return 1;
}
__setup("clocksource=", boot_override_clocksource);
/**
* boot_override_clock - Compatibility layer for deprecated boot option
* @str: override name
*
* DEPRECATED! Takes a clock= boot argument and uses it
* as the clocksource override name
*/
static int __init boot_override_clock(char* str)
{
if (!strcmp(str, "pmtmr")) {
printk("Warning: clock=pmtmr is deprecated. "
"Use clocksource=acpi_pm.\n");
return boot_override_clocksource("acpi_pm");
}
printk("Warning! clock= boot option is deprecated. "
"Use clocksource=xyz\n");
return boot_override_clocksource(str);
}
__setup("clock=", boot_override_clock);

129
kernel/time/jiffies.c Normal file
View File

@@ -0,0 +1,129 @@
/***********************************************************************
* linux/kernel/time/jiffies.c
*
* This file contains the jiffies based clocksource.
*
* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
************************************************************************/
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/init.h>
#include "tick-internal.h"
/* The Jiffies based clocksource is the lowest common
* denominator clock source which should function on
* all systems. It has the same coarse resolution as
* the timer interrupt frequency HZ and it suffers
* inaccuracies caused by missed or lost timer
* interrupts and the inability for the timer
* interrupt hardware to accuratly tick at the
* requested HZ value. It is also not recommended
* for "tick-less" systems.
*/
#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ)
/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
* conversion, the .shift value could be zero. However
* this would make NTP adjustments impossible as they are
* in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
* shift both the nominator and denominator the same
* amount, and give ntp adjustments in units of 1/2^8
*
* The value 8 is somewhat carefully chosen, as anything
* larger can result in overflows. NSEC_PER_JIFFY grows as
* HZ shrinks, so values greater than 8 overflow 32bits when
* HZ=100.
*/
#define JIFFIES_SHIFT 8
static cycle_t jiffies_read(struct clocksource *cs)
{
return (cycle_t) jiffies;
}
static struct clocksource clocksource_jiffies = {
.name = "jiffies",
.rating = 1, /* lowest valid rating*/
.read = jiffies_read,
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
};
__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
{
unsigned long seq;
u64 ret;
do {
seq = read_seqbegin(&jiffies_lock);
ret = jiffies_64;
} while (read_seqretry(&jiffies_lock, seq));
return ret;
}
EXPORT_SYMBOL(get_jiffies_64);
#endif
EXPORT_SYMBOL(jiffies);
static int __init init_jiffies_clocksource(void)
{
return clocksource_register(&clocksource_jiffies);
}
core_initcall(init_jiffies_clocksource);
struct clocksource * __init __weak clocksource_default_clock(void)
{
return &clocksource_jiffies;
}
struct clocksource refined_jiffies;
int register_refined_jiffies(long cycles_per_second)
{
u64 nsec_per_tick, shift_hz;
long cycles_per_tick;
refined_jiffies = clocksource_jiffies;
refined_jiffies.name = "refined-jiffies";
refined_jiffies.rating++;
/* Calc cycles per tick */
cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
/* shift_hz stores hz<<8 for extra accuracy */
shift_hz = (u64)cycles_per_second << 8;
shift_hz += cycles_per_tick/2;
do_div(shift_hz, cycles_per_tick);
/* Calculate nsec_per_tick using shift_hz */
nsec_per_tick = (u64)NSEC_PER_SEC << 8;
nsec_per_tick += (u32)shift_hz/2;
do_div(nsec_per_tick, (u32)shift_hz);
refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
clocksource_register(&refined_jiffies);
return 0;
}

935
kernel/time/ntp.c Normal file
View File

@@ -0,0 +1,935 @@
/*
* NTP state machine interfaces and logic.
*
* This code was mainly moved from kernel/timer.c and kernel/time.c
* Please see those files for relevant copyright info and historical
* changelogs.
*/
#include <linux/capability.h>
#include <linux/clocksource.h>
#include <linux/workqueue.h>
#include <linux/hrtimer.h>
#include <linux/jiffies.h>
#include <linux/math64.h>
#include <linux/timex.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/rtc.h>
#include "tick-internal.h"
#include "ntp_internal.h"
/*
* NTP timekeeping variables:
*
* Note: All of the NTP state is protected by the timekeeping locks.
*/
/* USER_HZ period (usecs): */
unsigned long tick_usec = TICK_USEC;
/* SHIFTED_HZ period (nsecs): */
unsigned long tick_nsec;
static u64 tick_length;
static u64 tick_length_base;
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
/*
* phase-lock loop variables
*/
/*
* clock synchronization status
*
* (TIME_ERROR prevents overwriting the CMOS clock)
*/
static int time_state = TIME_OK;
/* clock status bits: */
static int time_status = STA_UNSYNC;
/* time adjustment (nsecs): */
static s64 time_offset;
/* pll time constant: */
static long time_constant = 2;
/* maximum error (usecs): */
static long time_maxerror = NTP_PHASE_LIMIT;
/* estimated error (usecs): */
static long time_esterror = NTP_PHASE_LIMIT;
/* frequency offset (scaled nsecs/secs): */
static s64 time_freq;
/* time at last adjustment (secs): */
static long time_reftime;
static long time_adjust;
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
static s64 ntp_tick_adj;
#ifdef CONFIG_NTP_PPS
/*
* The following variables are used when a pulse-per-second (PPS) signal
* is available. They establish the engineering parameters of the clock
* discipline loop when controlled by the PPS signal.
*/
#define PPS_VALID 10 /* PPS signal watchdog max (s) */
#define PPS_POPCORN 4 /* popcorn spike threshold (shift) */
#define PPS_INTMIN 2 /* min freq interval (s) (shift) */
#define PPS_INTMAX 8 /* max freq interval (s) (shift) */
#define PPS_INTCOUNT 4 /* number of consecutive good intervals to
increase pps_shift or consecutive bad
intervals to decrease it */
#define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */
static int pps_valid; /* signal watchdog counter */
static long pps_tf[3]; /* phase median filter */
static long pps_jitter; /* current jitter (ns) */
static struct timespec pps_fbase; /* beginning of the last freq interval */
static int pps_shift; /* current interval duration (s) (shift) */
static int pps_intcnt; /* interval counter */
static s64 pps_freq; /* frequency offset (scaled ns/s) */
static long pps_stabil; /* current stability (scaled ns/s) */
/*
* PPS signal quality monitors
*/
static long pps_calcnt; /* calibration intervals */
static long pps_jitcnt; /* jitter limit exceeded */
static long pps_stbcnt; /* stability limit exceeded */
static long pps_errcnt; /* calibration errors */
/* PPS kernel consumer compensates the whole phase error immediately.
* Otherwise, reduce the offset by a fixed factor times the time constant.
*/
static inline s64 ntp_offset_chunk(s64 offset)
{
if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
return offset;
else
return shift_right(offset, SHIFT_PLL + time_constant);
}
static inline void pps_reset_freq_interval(void)
{
/* the PPS calibration interval may end
surprisingly early */
pps_shift = PPS_INTMIN;
pps_intcnt = 0;
}
/**
* pps_clear - Clears the PPS state variables
*/
static inline void pps_clear(void)
{
pps_reset_freq_interval();
pps_tf[0] = 0;
pps_tf[1] = 0;
pps_tf[2] = 0;
pps_fbase.tv_sec = pps_fbase.tv_nsec = 0;
pps_freq = 0;
}
/* Decrease pps_valid to indicate that another second has passed since
* the last PPS signal. When it reaches 0, indicate that PPS signal is
* missing.
*/
static inline void pps_dec_valid(void)
{
if (pps_valid > 0)
pps_valid--;
else {
time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
STA_PPSWANDER | STA_PPSERROR);
pps_clear();
}
}
static inline void pps_set_freq(s64 freq)
{
pps_freq = freq;
}
static inline int is_error_status(int status)
{
return (time_status & (STA_UNSYNC|STA_CLOCKERR))
/* PPS signal lost when either PPS time or
* PPS frequency synchronization requested
*/
|| ((time_status & (STA_PPSFREQ|STA_PPSTIME))
&& !(time_status & STA_PPSSIGNAL))
/* PPS jitter exceeded when
* PPS time synchronization requested */
|| ((time_status & (STA_PPSTIME|STA_PPSJITTER))
== (STA_PPSTIME|STA_PPSJITTER))
/* PPS wander exceeded or calibration error when
* PPS frequency synchronization requested
*/
|| ((time_status & STA_PPSFREQ)
&& (time_status & (STA_PPSWANDER|STA_PPSERROR)));
}
static inline void pps_fill_timex(struct timex *txc)
{
txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
PPM_SCALE_INV, NTP_SCALE_SHIFT);
txc->jitter = pps_jitter;
if (!(time_status & STA_NANO))
txc->jitter /= NSEC_PER_USEC;
txc->shift = pps_shift;
txc->stabil = pps_stabil;
txc->jitcnt = pps_jitcnt;
txc->calcnt = pps_calcnt;
txc->errcnt = pps_errcnt;
txc->stbcnt = pps_stbcnt;
}
#else /* !CONFIG_NTP_PPS */
static inline s64 ntp_offset_chunk(s64 offset)
{
return shift_right(offset, SHIFT_PLL + time_constant);
}
static inline void pps_reset_freq_interval(void) {}
static inline void pps_clear(void) {}
static inline void pps_dec_valid(void) {}
static inline void pps_set_freq(s64 freq) {}
static inline int is_error_status(int status)
{
return status & (STA_UNSYNC|STA_CLOCKERR);
}
static inline void pps_fill_timex(struct timex *txc)
{
/* PPS is not implemented, so these are zero */
txc->ppsfreq = 0;
txc->jitter = 0;
txc->shift = 0;
txc->stabil = 0;
txc->jitcnt = 0;
txc->calcnt = 0;
txc->errcnt = 0;
txc->stbcnt = 0;
}
#endif /* CONFIG_NTP_PPS */
/**
* ntp_synced - Returns 1 if the NTP status is not UNSYNC
*
*/
static inline int ntp_synced(void)
{
return !(time_status & STA_UNSYNC);
}
/*
* NTP methods:
*/
/*
* Update (tick_length, tick_length_base, tick_nsec), based
* on (tick_usec, ntp_tick_adj, time_freq):
*/
static void ntp_update_frequency(void)
{
u64 second_length;
u64 new_base;
second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
<< NTP_SCALE_SHIFT;
second_length += ntp_tick_adj;
second_length += time_freq;
tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
new_base = div_u64(second_length, NTP_INTERVAL_FREQ);
/*
* Don't wait for the next second_overflow, apply
* the change to the tick length immediately:
*/
tick_length += new_base - tick_length_base;
tick_length_base = new_base;
}
static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
{
time_status &= ~STA_MODE;
if (secs < MINSEC)
return 0;
if (!(time_status & STA_FLL) && (secs <= MAXSEC))
return 0;
time_status |= STA_MODE;
return div64_long(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
}
static void ntp_update_offset(long offset)
{
s64 freq_adj;
s64 offset64;
long secs;
if (!(time_status & STA_PLL))
return;
if (!(time_status & STA_NANO))
offset *= NSEC_PER_USEC;
/*
* Scale the phase adjustment and
* clamp to the operating range.
*/
offset = min(offset, MAXPHASE);
offset = max(offset, -MAXPHASE);
/*
* Select how the frequency is to be controlled
* and in which mode (PLL or FLL).
*/
secs = get_seconds() - time_reftime;
if (unlikely(time_status & STA_FREQHOLD))
secs = 0;
time_reftime = get_seconds();
offset64 = offset;
freq_adj = ntp_update_offset_fll(offset64, secs);
/*
* Clamp update interval to reduce PLL gain with low
* sampling rate (e.g. intermittent network connection)
* to avoid instability.
*/
if (unlikely(secs > 1 << (SHIFT_PLL + 1 + time_constant)))
secs = 1 << (SHIFT_PLL + 1 + time_constant);
freq_adj += (offset64 * secs) <<
(NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant));
freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED);
time_freq = max(freq_adj, -MAXFREQ_SCALED);
time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
}
/**
* ntp_clear - Clears the NTP state variables
*/
void ntp_clear(void)
{
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
ntp_update_frequency();
tick_length = tick_length_base;
time_offset = 0;
/* Clear PPS state variables */
pps_clear();
}
u64 ntp_tick_length(void)
{
return tick_length;
}
/*
* this routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*
* Also handles leap second processing, and returns leap offset
*/
int second_overflow(unsigned long secs)
{
s64 delta;
int leap = 0;
/*
* Leap second processing. If in leap-insert state at the end of the
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second.
*/
switch (time_state) {
case TIME_OK:
if (time_status & STA_INS)
time_state = TIME_INS;
else if (time_status & STA_DEL)
time_state = TIME_DEL;
break;
case TIME_INS:
if (!(time_status & STA_INS))
time_state = TIME_OK;
else if (secs % 86400 == 0) {
leap = -1;
time_state = TIME_OOP;
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
}
break;
case TIME_DEL:
if (!(time_status & STA_DEL))
time_state = TIME_OK;
else if ((secs + 1) % 86400 == 0) {
leap = 1;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
}
break;
case TIME_OOP:
time_state = TIME_WAIT;
break;
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
}
/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
if (time_maxerror > NTP_PHASE_LIMIT) {
time_maxerror = NTP_PHASE_LIMIT;
time_status |= STA_UNSYNC;
}
/* Compute the phase adjustment for the next second */
tick_length = tick_length_base;
delta = ntp_offset_chunk(time_offset);
time_offset -= delta;
tick_length += delta;
/* Check PPS signal */
pps_dec_valid();
if (!time_adjust)
goto out;
if (time_adjust > MAX_TICKADJ) {
time_adjust -= MAX_TICKADJ;
tick_length += MAX_TICKADJ_SCALED;
goto out;
}
if (time_adjust < -MAX_TICKADJ) {
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
goto out;
}
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
<< NTP_SCALE_SHIFT;
time_adjust = 0;
out:
return leap;
}
#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
static void sync_cmos_clock(struct work_struct *work);
static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
static void sync_cmos_clock(struct work_struct *work)
{
struct timespec now, next;
int fail = 1;
/*
* If we have an externally synchronized Linux clock, then update
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
* This code is run on a timer. If the clock is set, that timer
* may not expire at the correct time. Thus, we adjust...
*/
if (!ntp_synced()) {
/*
* Not synced, exit, do not restart a timer (if one is
* running, let it run out).
*/
return;
}
getnstimeofday(&now);
if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) {
struct timespec adjust = now;
fail = -ENODEV;
if (persistent_clock_is_local)
adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
#ifdef CONFIG_GENERIC_CMOS_UPDATE
fail = update_persistent_clock(adjust);
#endif
#ifdef CONFIG_RTC_SYSTOHC
if (fail == -ENODEV)
fail = rtc_set_ntp_time(adjust);
#endif
}
next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
if (next.tv_nsec <= 0)
next.tv_nsec += NSEC_PER_SEC;
if (!fail || fail == -ENODEV)
next.tv_sec = 659;
else
next.tv_sec = 0;
if (next.tv_nsec >= NSEC_PER_SEC) {
next.tv_sec++;
next.tv_nsec -= NSEC_PER_SEC;
}
schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
}
void ntp_notify_cmos_timer(void)
{
schedule_delayed_work(&sync_cmos_work, 0);
}
#else
void ntp_notify_cmos_timer(void) { }
#endif
/*
* Propagate a new txc->status value into the NTP state:
*/
static inline void process_adj_status(struct timex *txc, struct timespec *ts)
{
if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
time_state = TIME_OK;
time_status = STA_UNSYNC;
/* restart PPS frequency calibration */
pps_reset_freq_interval();
}
/*
* If we turn on PLL adjustments then reset the
* reference time to current time.
*/
if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
time_reftime = get_seconds();
/* only set allowed bits */
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;
}
static inline void process_adjtimex_modes(struct timex *txc,
struct timespec *ts,
s32 *time_tai)
{
if (txc->modes & ADJ_STATUS)
process_adj_status(txc, ts);
if (txc->modes & ADJ_NANO)
time_status |= STA_NANO;
if (txc->modes & ADJ_MICRO)
time_status &= ~STA_NANO;
if (txc->modes & ADJ_FREQUENCY) {
time_freq = txc->freq * PPM_SCALE;
time_freq = min(time_freq, MAXFREQ_SCALED);
time_freq = max(time_freq, -MAXFREQ_SCALED);
/* update pps_freq */
pps_set_freq(time_freq);
}
if (txc->modes & ADJ_MAXERROR)
time_maxerror = txc->maxerror;
if (txc->modes & ADJ_ESTERROR)
time_esterror = txc->esterror;
if (txc->modes & ADJ_TIMECONST) {
time_constant = txc->constant;
if (!(time_status & STA_NANO))
time_constant += 4;
time_constant = min(time_constant, (long)MAXTC);
time_constant = max(time_constant, 0l);
}
if (txc->modes & ADJ_TAI && txc->constant > 0)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
ntp_update_offset(txc->offset);
if (txc->modes & ADJ_TICK)
tick_usec = txc->tick;
if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
ntp_update_frequency();
}
/**
* ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
*/
int ntp_validate_timex(struct timex *txc)
{
if (txc->modes & ADJ_ADJTIME) {
/* singleshot must not be used with any other mode bits */
if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
return -EINVAL;
if (!(txc->modes & ADJ_OFFSET_READONLY) &&
!capable(CAP_SYS_TIME))
return -EPERM;
} else {
/* In order to modify anything, you gotta be super-user! */
if (txc->modes && !capable(CAP_SYS_TIME))
return -EPERM;
/*
* if the quartz is off by more than 10% then
* something is VERY wrong!
*/
if (txc->modes & ADJ_TICK &&
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;
}
if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
return -EPERM;
return 0;
}
/*
* adjtimex mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
{
int result;
if (txc->modes & ADJ_ADJTIME) {
long save_adjust = time_adjust;
if (!(txc->modes & ADJ_OFFSET_READONLY)) {
/* adjtime() is independent from ntp_adjtime() */
time_adjust = txc->offset;
ntp_update_frequency();
}
txc->offset = save_adjust;
} else {
/* If there are input parameters, then process them: */
if (txc->modes)
process_adjtimex_modes(txc, ts, time_tai);
txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
NTP_SCALE_SHIFT);
if (!(time_status & STA_NANO))
txc->offset /= NSEC_PER_USEC;
}
result = time_state; /* mostly `TIME_OK' */
/* check for errors */
if (is_error_status(time_status))
result = TIME_ERROR;
txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
PPM_SCALE_INV, NTP_SCALE_SHIFT);
txc->maxerror = time_maxerror;
txc->esterror = time_esterror;
txc->status = time_status;
txc->constant = time_constant;
txc->precision = 1;
txc->tolerance = MAXFREQ_SCALED / PPM_SCALE;
txc->tick = tick_usec;
txc->tai = *time_tai;
/* fill PPS status fields */
pps_fill_timex(txc);
txc->time.tv_sec = ts->tv_sec;
txc->time.tv_usec = ts->tv_nsec;
if (!(time_status & STA_NANO))
txc->time.tv_usec /= NSEC_PER_USEC;
return result;
}
#ifdef CONFIG_NTP_PPS
/* actually struct pps_normtime is good old struct timespec, but it is
* semantically different (and it is the reason why it was invented):
* pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
* while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
struct pps_normtime {
__kernel_time_t sec; /* seconds */
long nsec; /* nanoseconds */
};
/* normalize the timestamp so that nsec is in the
( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
static inline struct pps_normtime pps_normalize_ts(struct timespec ts)
{
struct pps_normtime norm = {
.sec = ts.tv_sec,
.nsec = ts.tv_nsec
};
if (norm.nsec > (NSEC_PER_SEC >> 1)) {
norm.nsec -= NSEC_PER_SEC;
norm.sec++;
}
return norm;
}
/* get current phase correction and jitter */
static inline long pps_phase_filter_get(long *jitter)
{
*jitter = pps_tf[0] - pps_tf[1];
if (*jitter < 0)
*jitter = -*jitter;
/* TODO: test various filters */
return pps_tf[0];
}
/* add the sample to the phase filter */
static inline void pps_phase_filter_add(long err)
{
pps_tf[2] = pps_tf[1];
pps_tf[1] = pps_tf[0];
pps_tf[0] = err;
}
/* decrease frequency calibration interval length.
* It is halved after four consecutive unstable intervals.
*/
static inline void pps_dec_freq_interval(void)
{
if (--pps_intcnt <= -PPS_INTCOUNT) {
pps_intcnt = -PPS_INTCOUNT;
if (pps_shift > PPS_INTMIN) {
pps_shift--;
pps_intcnt = 0;
}
}
}
/* increase frequency calibration interval length.
* It is doubled after four consecutive stable intervals.
*/
static inline void pps_inc_freq_interval(void)
{
if (++pps_intcnt >= PPS_INTCOUNT) {
pps_intcnt = PPS_INTCOUNT;
if (pps_shift < PPS_INTMAX) {
pps_shift++;
pps_intcnt = 0;
}
}
}
/* update clock frequency based on MONOTONIC_RAW clock PPS signal
* timestamps
*
* At the end of the calibration interval the difference between the
* first and last MONOTONIC_RAW clock timestamps divided by the length
* of the interval becomes the frequency update. If the interval was
* too long, the data are discarded.
* Returns the difference between old and new frequency values.
*/
static long hardpps_update_freq(struct pps_normtime freq_norm)
{
long delta, delta_mod;
s64 ftemp;
/* check if the frequency interval was too long */
if (freq_norm.sec > (2 << pps_shift)) {
time_status |= STA_PPSERROR;
pps_errcnt++;
pps_dec_freq_interval();
pr_err("hardpps: PPSERROR: interval too long - %ld s\n",
freq_norm.sec);
return 0;
}
/* here the raw frequency offset and wander (stability) is
* calculated. If the wander is less than the wander threshold
* the interval is increased; otherwise it is decreased.
*/
ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT,
freq_norm.sec);
delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
pps_freq = ftemp;
if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
pr_warning("hardpps: PPSWANDER: change=%ld\n", delta);
time_status |= STA_PPSWANDER;
pps_stbcnt++;
pps_dec_freq_interval();
} else { /* good sample */
pps_inc_freq_interval();
}
/* the stability metric is calculated as the average of recent
* frequency changes, but is used only for performance
* monitoring
*/
delta_mod = delta;
if (delta_mod < 0)
delta_mod = -delta_mod;
pps_stabil += (div_s64(((s64)delta_mod) <<
(NTP_SCALE_SHIFT - SHIFT_USEC),
NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN;
/* if enabled, the system clock frequency is updated */
if ((time_status & STA_PPSFREQ) != 0 &&
(time_status & STA_FREQHOLD) == 0) {
time_freq = pps_freq;
ntp_update_frequency();
}
return delta;
}
/* correct REALTIME clock phase error against PPS signal */
static void hardpps_update_phase(long error)
{
long correction = -error;
long jitter;
/* add the sample to the median filter */
pps_phase_filter_add(correction);
correction = pps_phase_filter_get(&jitter);
/* Nominal jitter is due to PPS signal noise. If it exceeds the
* threshold, the sample is discarded; otherwise, if so enabled,
* the time offset is updated.
*/
if (jitter > (pps_jitter << PPS_POPCORN)) {
pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
jitter, (pps_jitter << PPS_POPCORN));
time_status |= STA_PPSJITTER;
pps_jitcnt++;
} else if (time_status & STA_PPSTIME) {
/* correct the time using the phase offset */
time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT,
NTP_INTERVAL_FREQ);
/* cancel running adjtime() */
time_adjust = 0;
}
/* update jitter */
pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN;
}
/*
* __hardpps() - discipline CPU clock oscillator to external PPS signal
*
* This routine is called at each PPS signal arrival in order to
* discipline the CPU clock oscillator to the PPS signal. It takes two
* parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former
* is used to correct clock phase error and the latter is used to
* correct the frequency.
*
* This code is based on David Mills's reference nanokernel
* implementation. It was mostly rewritten but keeps the same idea.
*/
void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
{
struct pps_normtime pts_norm, freq_norm;
pts_norm = pps_normalize_ts(*phase_ts);
/* clear the error bits, they will be set again if needed */
time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
/* indicate signal presence */
time_status |= STA_PPSSIGNAL;
pps_valid = PPS_VALID;
/* when called for the first time,
* just start the frequency interval */
if (unlikely(pps_fbase.tv_sec == 0)) {
pps_fbase = *raw_ts;
return;
}
/* ok, now we have a base for frequency calculation */
freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase));
/* check that the signal is in the range
* [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
if ((freq_norm.sec == 0) ||
(freq_norm.nsec > MAXFREQ * freq_norm.sec) ||
(freq_norm.nsec < -MAXFREQ * freq_norm.sec)) {
time_status |= STA_PPSJITTER;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
pr_err("hardpps: PPSJITTER: bad pulse\n");
return;
}
/* signal is ok */
/* check if the current frequency interval is finished */
if (freq_norm.sec >= (1 << pps_shift)) {
pps_calcnt++;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
hardpps_update_freq(freq_norm);
}
hardpps_update_phase(pts_norm.nsec);
}
#endif /* CONFIG_NTP_PPS */
static int __init ntp_tick_adj_setup(char *str)
{
ntp_tick_adj = simple_strtol(str, NULL, 0);
ntp_tick_adj <<= NTP_SCALE_SHIFT;
return 1;
}
__setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
ntp_clear();
}

View File

@@ -0,0 +1,12 @@
#ifndef _LINUX_NTP_INTERNAL_H
#define _LINUX_NTP_INTERNAL_H
extern void ntp_init(void);
extern void ntp_clear(void);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(void);
extern int second_overflow(unsigned long secs);
extern int ntp_validate_timex(struct timex *);
extern int __do_adjtimex(struct timex *, struct timespec *, s32 *);
extern void __hardpps(const struct timespec *, const struct timespec *);
#endif /* _LINUX_NTP_INTERNAL_H */

446
kernel/time/posix-clock.c Normal file
View File

@@ -0,0 +1,446 @@
/*
* posix-clock.c - support for dynamic clock devices
*
* Copyright (C) 2010 OMICRON electronics GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/device.h>
#include <linux/export.h>
#include <linux/file.h>
#include <linux/posix-clock.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
static void delete_clock(struct kref *kref);
/*
* Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
*/
static struct posix_clock *get_posix_clock(struct file *fp)
{
struct posix_clock *clk = fp->private_data;
down_read(&clk->rwsem);
if (!clk->zombie)
return clk;
up_read(&clk->rwsem);
return NULL;
}
static void put_posix_clock(struct posix_clock *clk)
{
up_read(&clk->rwsem);
}
static ssize_t posix_clock_read(struct file *fp, char __user *buf,
size_t count, loff_t *ppos)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = -EINVAL;
if (!clk)
return -ENODEV;
if (clk->ops.read)
err = clk->ops.read(clk, fp->f_flags, buf, count);
put_posix_clock(clk);
return err;
}
static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
{
struct posix_clock *clk = get_posix_clock(fp);
int result = 0;
if (!clk)
return -ENODEV;
if (clk->ops.poll)
result = clk->ops.poll(clk, fp, wait);
put_posix_clock(clk);
return result;
}
static int posix_clock_fasync(int fd, struct file *fp, int on)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = 0;
if (!clk)
return -ENODEV;
if (clk->ops.fasync)
err = clk->ops.fasync(clk, fd, fp, on);
put_posix_clock(clk);
return err;
}
static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = -ENODEV;
if (!clk)
return -ENODEV;
if (clk->ops.mmap)
err = clk->ops.mmap(clk, vma);
put_posix_clock(clk);
return err;
}
static long posix_clock_ioctl(struct file *fp,
unsigned int cmd, unsigned long arg)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = -ENOTTY;
if (!clk)
return -ENODEV;
if (clk->ops.ioctl)
err = clk->ops.ioctl(clk, cmd, arg);
put_posix_clock(clk);
return err;
}
#ifdef CONFIG_COMPAT
static long posix_clock_compat_ioctl(struct file *fp,
unsigned int cmd, unsigned long arg)
{
struct posix_clock *clk = get_posix_clock(fp);
int err = -ENOTTY;
if (!clk)
return -ENODEV;
if (clk->ops.ioctl)
err = clk->ops.ioctl(clk, cmd, arg);
put_posix_clock(clk);
return err;
}
#endif
static int posix_clock_open(struct inode *inode, struct file *fp)
{
int err;
struct posix_clock *clk =
container_of(inode->i_cdev, struct posix_clock, cdev);
down_read(&clk->rwsem);
if (clk->zombie) {
err = -ENODEV;
goto out;
}
if (clk->ops.open)
err = clk->ops.open(clk, fp->f_mode);
else
err = 0;
if (!err) {
kref_get(&clk->kref);
fp->private_data = clk;
}
out:
up_read(&clk->rwsem);
return err;
}
static int posix_clock_release(struct inode *inode, struct file *fp)
{
struct posix_clock *clk = fp->private_data;
int err = 0;
if (clk->ops.release)
err = clk->ops.release(clk);
kref_put(&clk->kref, delete_clock);
fp->private_data = NULL;
return err;
}
static const struct file_operations posix_clock_file_operations = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read = posix_clock_read,
.poll = posix_clock_poll,
.unlocked_ioctl = posix_clock_ioctl,
.open = posix_clock_open,
.release = posix_clock_release,
.fasync = posix_clock_fasync,
.mmap = posix_clock_mmap,
#ifdef CONFIG_COMPAT
.compat_ioctl = posix_clock_compat_ioctl,
#endif
};
int posix_clock_register(struct posix_clock *clk, dev_t devid)
{
int err;
kref_init(&clk->kref);
init_rwsem(&clk->rwsem);
cdev_init(&clk->cdev, &posix_clock_file_operations);
clk->cdev.owner = clk->ops.owner;
err = cdev_add(&clk->cdev, devid, 1);
return err;
}
EXPORT_SYMBOL_GPL(posix_clock_register);
static void delete_clock(struct kref *kref)
{
struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
if (clk->release)
clk->release(clk);
}
void posix_clock_unregister(struct posix_clock *clk)
{
cdev_del(&clk->cdev);
down_write(&clk->rwsem);
clk->zombie = true;
up_write(&clk->rwsem);
kref_put(&clk->kref, delete_clock);
}
EXPORT_SYMBOL_GPL(posix_clock_unregister);
struct posix_clock_desc {
struct file *fp;
struct posix_clock *clk;
};
static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
{
struct file *fp = fget(CLOCKID_TO_FD(id));
int err = -EINVAL;
if (!fp)
return err;
if (fp->f_op->open != posix_clock_open || !fp->private_data)
goto out;
cd->fp = fp;
cd->clk = get_posix_clock(fp);
err = cd->clk ? 0 : -ENODEV;
out:
if (err)
fput(fp);
return err;
}
static void put_clock_desc(struct posix_clock_desc *cd)
{
put_posix_clock(cd->clk);
fput(cd->fp);
}
static int pc_clock_adjtime(clockid_t id, struct timex *tx)
{
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
err = -EACCES;
goto out;
}
if (cd.clk->ops.clock_adjtime)
err = cd.clk->ops.clock_adjtime(cd.clk, tx);
else
err = -EOPNOTSUPP;
out:
put_clock_desc(&cd);
return err;
}
static int pc_clock_gettime(clockid_t id, struct timespec *ts)
{
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.clock_gettime)
err = cd.clk->ops.clock_gettime(cd.clk, ts);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
static int pc_clock_getres(clockid_t id, struct timespec *ts)
{
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.clock_getres)
err = cd.clk->ops.clock_getres(cd.clk, ts);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
static int pc_clock_settime(clockid_t id, const struct timespec *ts)
{
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
err = -EACCES;
goto out;
}
if (cd.clk->ops.clock_settime)
err = cd.clk->ops.clock_settime(cd.clk, ts);
else
err = -EOPNOTSUPP;
out:
put_clock_desc(&cd);
return err;
}
static int pc_timer_create(struct k_itimer *kit)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.timer_create)
err = cd.clk->ops.timer_create(cd.clk, kit);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
static int pc_timer_delete(struct k_itimer *kit)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.timer_delete)
err = cd.clk->ops.timer_delete(cd.clk, kit);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
if (get_clock_desc(id, &cd))
return;
if (cd.clk->ops.timer_gettime)
cd.clk->ops.timer_gettime(cd.clk, kit, ts);
put_clock_desc(&cd);
}
static int pc_timer_settime(struct k_itimer *kit, int flags,
struct itimerspec *ts, struct itimerspec *old)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
if (cd.clk->ops.timer_settime)
err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
else
err = -EOPNOTSUPP;
put_clock_desc(&cd);
return err;
}
struct k_clock clock_posix_dynamic = {
.clock_getres = pc_clock_getres,
.clock_set = pc_clock_settime,
.clock_get = pc_clock_gettime,
.clock_adj = pc_clock_adjtime,
.timer_create = pc_timer_create,
.timer_set = pc_timer_settime,
.timer_del = pc_timer_delete,
.timer_get = pc_timer_gettime,
};

View File

@@ -0,0 +1,857 @@
/*
* linux/kernel/time/tick-broadcast.c
*
* This file contains functions which emulate a local clock-event
* device via a broadcast event source.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include "tick-internal.h"
/*
* Broadcast support for broken x86 hardware, where the local apic
* timer stops in C3 state.
*/
static struct tick_device tick_broadcast_device;
static cpumask_var_t tick_broadcast_mask;
static cpumask_var_t tick_broadcast_on;
static cpumask_var_t tmpmask;
static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
static int tick_broadcast_force;
#ifdef CONFIG_TICK_ONESHOT
static void tick_broadcast_clear_oneshot(int cpu);
#else
static inline void tick_broadcast_clear_oneshot(int cpu) { }
#endif
/*
* Debugging: see timer_list.c
*/
struct tick_device *tick_get_broadcast_device(void)
{
return &tick_broadcast_device;
}
struct cpumask *tick_get_broadcast_mask(void)
{
return tick_broadcast_mask;
}
/*
* Start the device in periodic mode
*/
static void tick_broadcast_start_periodic(struct clock_event_device *bc)
{
if (bc)
tick_setup_periodic(bc, 1);
}
/*
* Check, if the device can be utilized as broadcast device:
*/
int tick_check_broadcast_device(struct clock_event_device *dev)
{
struct clock_event_device *cur = tick_broadcast_device.evtdev;
if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
(tick_broadcast_device.evtdev &&
tick_broadcast_device.evtdev->rating >= dev->rating) ||
(dev->features & CLOCK_EVT_FEAT_C3STOP))
return 0;
clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
if (cur)
cur->event_handler = clockevents_handle_noop;
tick_broadcast_device.evtdev = dev;
if (!cpumask_empty(tick_broadcast_mask))
tick_broadcast_start_periodic(dev);
/*
* Inform all cpus about this. We might be in a situation
* where we did not switch to oneshot mode because the per cpu
* devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
* of a oneshot capable broadcast device. Without that
* notification the systems stays stuck in periodic mode
* forever.
*/
if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_clock_notify();
return 1;
}
/*
* Check, if the device is the broadcast device
*/
int tick_is_broadcast_device(struct clock_event_device *dev)
{
return (dev && tick_broadcast_device.evtdev == dev);
}
static void err_broadcast(const struct cpumask *mask)
{
pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
}
static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
{
if (!dev->broadcast)
dev->broadcast = tick_broadcast;
if (!dev->broadcast) {
pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
dev->name);
dev->broadcast = err_broadcast;
}
}
/*
* Check, if the device is disfunctional and a place holder, which
* needs to be handled by the broadcast device.
*/
int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Devices might be registered with both periodic and oneshot
* mode disabled. This signals, that the device needs to be
* operated from the broadcast device and is a placeholder for
* the cpu local device.
*/
if (!tick_device_is_functional(dev)) {
dev->event_handler = tick_handle_periodic;
tick_device_setup_broadcast_func(dev);
cpumask_set_cpu(cpu, tick_broadcast_mask);
tick_broadcast_start_periodic(bc);
ret = 1;
} else {
/*
* Clear the broadcast bit for this cpu if the
* device is not power state affected.
*/
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
cpumask_clear_cpu(cpu, tick_broadcast_mask);
else
tick_device_setup_broadcast_func(dev);
/*
* Clear the broadcast bit if the CPU is not in
* periodic broadcast on state.
*/
if (!cpumask_test_cpu(cpu, tick_broadcast_on))
cpumask_clear_cpu(cpu, tick_broadcast_mask);
switch (tick_broadcast_device.mode) {
case TICKDEV_MODE_ONESHOT:
/*
* If the system is in oneshot mode we can
* unconditionally clear the oneshot mask bit,
* because the CPU is running and therefore
* not in an idle state which causes the power
* state affected device to stop. Let the
* caller initialize the device.
*/
tick_broadcast_clear_oneshot(cpu);
ret = 0;
break;
case TICKDEV_MODE_PERIODIC:
/*
* If the system is in periodic mode, check
* whether the broadcast device can be
* switched off now.
*/
if (cpumask_empty(tick_broadcast_mask) && bc)
clockevents_shutdown(bc);
/*
* If we kept the cpu in the broadcast mask,
* tell the caller to leave the per cpu device
* in shutdown state. The periodic interrupt
* is delivered by the broadcast device.
*/
ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
break;
default:
/* Nothing to do */
ret = 0;
break;
}
}
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return ret;
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
int tick_receive_broadcast(void)
{
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
struct clock_event_device *evt = td->evtdev;
if (!evt)
return -ENODEV;
if (!evt->event_handler)
return -EINVAL;
evt->event_handler(evt);
return 0;
}
#endif
/*
* Broadcast the event to the cpus, which are set in the mask (mangled).
*/
static void tick_do_broadcast(struct cpumask *mask)
{
int cpu = smp_processor_id();
struct tick_device *td;
/*
* Check, if the current cpu is in the mask
*/
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
td = &per_cpu(tick_cpu_device, cpu);
td->evtdev->event_handler(td->evtdev);
}
if (!cpumask_empty(mask)) {
/*
* It might be necessary to actually check whether the devices
* have different broadcast functions. For now, just use the
* one of the first device. This works as long as we have this
* misfeature only on x86 (lapic)
*/
td = &per_cpu(tick_cpu_device, cpumask_first(mask));
td->evtdev->broadcast(mask);
}
}
/*
* Periodic broadcast:
* - invoke the broadcast handlers
*/
static void tick_do_periodic_broadcast(void)
{
raw_spin_lock(&tick_broadcast_lock);
cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
tick_do_broadcast(tmpmask);
raw_spin_unlock(&tick_broadcast_lock);
}
/*
* Event handler for periodic broadcast ticks
*/
static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
{
ktime_t next;
tick_do_periodic_broadcast();
/*
* The device is in periodic mode. No reprogramming necessary:
*/
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
return;
/*
* Setup the next period for devices, which do not have
* periodic mode. We read dev->next_event first and add to it
* when the event already expired. clockevents_program_event()
* sets dev->next_event only when the event is really
* programmed to the device.
*/
for (next = dev->next_event; ;) {
next = ktime_add(next, tick_period);
if (!clockevents_program_event(dev, next, false))
return;
tick_do_periodic_broadcast();
}
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
*/
static void tick_do_broadcast_on_off(unsigned long *reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags;
int cpu, bc_stopped;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
bc = tick_broadcast_device.evtdev;
/*
* Is the device not affected by the powerstate ?
*/
if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
goto out;
if (!tick_device_is_functional(dev))
goto out;
bc_stopped = cpumask_empty(tick_broadcast_mask);
switch (*reason) {
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
cpumask_set_cpu(cpu, tick_broadcast_on);
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
clockevents_shutdown(dev);
}
if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
tick_broadcast_force = 1;
break;
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
if (tick_broadcast_force)
break;
cpumask_clear_cpu(cpu, tick_broadcast_on);
if (!tick_device_is_functional(dev))
break;
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
tick_setup_periodic(dev, 0);
}
break;
}
if (cpumask_empty(tick_broadcast_mask)) {
if (!bc_stopped)
clockevents_shutdown(bc);
} else if (bc_stopped) {
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
else
tick_broadcast_setup_oneshot(bc);
}
out:
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop.
*/
void tick_broadcast_on_off(unsigned long reason, int *oncpu)
{
if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
"offline CPU #%d\n", *oncpu);
else
tick_do_broadcast_on_off(&reason);
}
/*
* Set the periodic handler depending on broadcast on/off
*/
void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
{
if (!broadcast)
dev->event_handler = tick_handle_periodic;
else
dev->event_handler = tick_handle_periodic_broadcast;
}
/*
* Remove a CPU from broadcasting
*/
void tick_shutdown_broadcast(unsigned int *cpup)
{
struct clock_event_device *bc;
unsigned long flags;
unsigned int cpu = *cpup;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
cpumask_clear_cpu(cpu, tick_broadcast_mask);
cpumask_clear_cpu(cpu, tick_broadcast_on);
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
if (bc && cpumask_empty(tick_broadcast_mask))
clockevents_shutdown(bc);
}
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
void tick_suspend_broadcast(void)
{
struct clock_event_device *bc;
unsigned long flags;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
if (bc)
clockevents_shutdown(bc);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
int tick_resume_broadcast(void)
{
struct clock_event_device *bc;
unsigned long flags;
int broadcast = 0;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
if (bc) {
clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
switch (tick_broadcast_device.mode) {
case TICKDEV_MODE_PERIODIC:
if (!cpumask_empty(tick_broadcast_mask))
tick_broadcast_start_periodic(bc);
broadcast = cpumask_test_cpu(smp_processor_id(),
tick_broadcast_mask);
break;
case TICKDEV_MODE_ONESHOT:
if (!cpumask_empty(tick_broadcast_mask))
broadcast = tick_resume_broadcast_oneshot(bc);
break;
}
}
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return broadcast;
}
#ifdef CONFIG_TICK_ONESHOT
static cpumask_var_t tick_broadcast_oneshot_mask;
static cpumask_var_t tick_broadcast_pending_mask;
static cpumask_var_t tick_broadcast_force_mask;
/*
* Exposed for debugging: see timer_list.c
*/
struct cpumask *tick_get_broadcast_oneshot_mask(void)
{
return tick_broadcast_oneshot_mask;
}
/*
* Called before going idle with interrupts disabled. Checks whether a
* broadcast event from the other core is about to happen. We detected
* that in tick_broadcast_oneshot_control(). The callsite can use this
* to avoid a deep idle transition as we are about to get the
* broadcast IPI right away.
*/
int tick_check_broadcast_expired(void)
{
return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
}
/*
* Set broadcast interrupt affinity
*/
static void tick_broadcast_set_affinity(struct clock_event_device *bc,
const struct cpumask *cpumask)
{
if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
return;
if (cpumask_equal(bc->cpumask, cpumask))
return;
bc->cpumask = cpumask;
irq_set_affinity(bc->irq, bc->cpumask);
}
static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
ktime_t expires, int force)
{
int ret;
if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
ret = clockevents_program_event(bc, expires, force);
if (!ret)
tick_broadcast_set_affinity(bc, cpumask_of(cpu));
return ret;
}
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
return 0;
}
/*
* Called from irq_enter() when idle was interrupted to reenable the
* per cpu device.
*/
void tick_check_oneshot_broadcast(int cpu)
{
if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
/*
* We might be in the middle of switching over from
* periodic to oneshot. If the CPU has not yet
* switched over, leave the device alone.
*/
if (td->mode == TICKDEV_MODE_ONESHOT) {
clockevents_set_mode(td->evtdev,
CLOCK_EVT_MODE_ONESHOT);
}
}
}
/*
* Handle oneshot mode broadcasting
*/
static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
{
struct tick_device *td;
ktime_t now, next_event;
int cpu, next_cpu = 0;
raw_spin_lock(&tick_broadcast_lock);
again:
dev->next_event.tv64 = KTIME_MAX;
next_event.tv64 = KTIME_MAX;
cpumask_clear(tmpmask);
now = ktime_get();
/* Find all expired events */
for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 <= now.tv64) {
cpumask_set_cpu(cpu, tmpmask);
/*
* Mark the remote cpu in the pending mask, so
* it can avoid reprogramming the cpu local
* timer in tick_broadcast_oneshot_control().
*/
cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
} else if (td->evtdev->next_event.tv64 < next_event.tv64) {
next_event.tv64 = td->evtdev->next_event.tv64;
next_cpu = cpu;
}
}
/*
* Remove the current cpu from the pending mask. The event is
* delivered immediately in tick_do_broadcast() !
*/
cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
/* Take care of enforced broadcast requests */
cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
cpumask_clear(tick_broadcast_force_mask);
/*
* Wakeup the cpus which have an expired event.
*/
tick_do_broadcast(tmpmask);
/*
* Two reasons for reprogram:
*
* - The global event did not expire any CPU local
* events. This happens in dyntick mode, as the maximum PIT
* delta is quite small.
*
* - There are pending events on sleeping CPUs which were not
* in the event mask
*/
if (next_event.tv64 != KTIME_MAX) {
/*
* Rearm the broadcast device. If event expired,
* repeat the above
*/
if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
goto again;
}
raw_spin_unlock(&tick_broadcast_lock);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
*/
void tick_broadcast_oneshot_control(unsigned long reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags;
ktime_t now;
int cpu;
/*
* Periodic mode does not care about the enter/exit of power
* states
*/
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
return;
/*
* We are called with preemtion disabled from the depth of the
* idle code, so we can't be moved away.
*/
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
return;
bc = tick_broadcast_device.evtdev;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
/*
* We only reprogram the broadcast timer if we
* did not mark ourself in the force mask and
* if the cpu local event is earlier than the
* broadcast event. If the current CPU is in
* the force mask, then we are going to be
* woken by the IPI right away.
*/
if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
dev->next_event.tv64 < bc->next_event.tv64)
tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
}
} else {
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
/*
* The cpu which was handling the broadcast
* timer marked this cpu in the broadcast
* pending mask and fired the broadcast
* IPI. So we are going to handle the expired
* event anyway via the broadcast IPI
* handler. No need to reprogram the timer
* with an already expired event.
*/
if (cpumask_test_and_clear_cpu(cpu,
tick_broadcast_pending_mask))
goto out;
/*
* Bail out if there is no next event.
*/
if (dev->next_event.tv64 == KTIME_MAX)
goto out;
/*
* If the pending bit is not set, then we are
* either the CPU handling the broadcast
* interrupt or we got woken by something else.
*
* We are not longer in the broadcast mask, so
* if the cpu local expiry time is already
* reached, we would reprogram the cpu local
* timer with an already expired event.
*
* This can lead to a ping-pong when we return
* to idle and therefor rearm the broadcast
* timer before the cpu local timer was able
* to fire. This happens because the forced
* reprogramming makes sure that the event
* will happen in the future and depending on
* the min_delta setting this might be far
* enough out that the ping-pong starts.
*
* If the cpu local next_event has expired
* then we know that the broadcast timer
* next_event has expired as well and
* broadcast is about to be handled. So we
* avoid reprogramming and enforce that the
* broadcast handler, which did not run yet,
* will invoke the cpu local handler.
*
* We cannot call the handler directly from
* here, because we might be in a NOHZ phase
* and we did not go through the irq_enter()
* nohz fixups.
*/
now = ktime_get();
if (dev->next_event.tv64 <= now.tv64) {
cpumask_set_cpu(cpu, tick_broadcast_force_mask);
goto out;
}
/*
* We got woken by something else. Reprogram
* the cpu local timer device.
*/
tick_program_event(dev->next_event, 1);
}
}
out:
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Reset the one shot broadcast for a cpu
*
* Called with tick_broadcast_lock held
*/
static void tick_broadcast_clear_oneshot(int cpu)
{
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
}
static void tick_broadcast_init_next_event(struct cpumask *mask,
ktime_t expires)
{
struct tick_device *td;
int cpu;
for_each_cpu(cpu, mask) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev)
td->evtdev->next_event = expires;
}
}
/**
* tick_broadcast_setup_oneshot - setup the broadcast device
*/
void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
int cpu = smp_processor_id();
/* Set it up only once ! */
if (bc->event_handler != tick_handle_oneshot_broadcast) {
int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
bc->event_handler = tick_handle_oneshot_broadcast;
/*
* We must be careful here. There might be other CPUs
* waiting for periodic broadcast. We need to set the
* oneshot_mask bits for those and program the
* broadcast device to fire.
*/
cpumask_copy(tmpmask, tick_broadcast_mask);
cpumask_clear_cpu(cpu, tmpmask);
cpumask_or(tick_broadcast_oneshot_mask,
tick_broadcast_oneshot_mask, tmpmask);
if (was_periodic && !cpumask_empty(tmpmask)) {
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
tick_broadcast_init_next_event(tmpmask,
tick_next_period);
tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
} else
bc->next_event.tv64 = KTIME_MAX;
} else {
/*
* The first cpu which switches to oneshot mode sets
* the bit for all other cpus which are in the general
* (periodic) broadcast mask. So the bit is set and
* would prevent the first broadcast enter after this
* to program the bc device.
*/
tick_broadcast_clear_oneshot(cpu);
}
}
/*
* Select oneshot operating mode for the broadcast device
*/
void tick_broadcast_switch_to_oneshot(void)
{
struct clock_event_device *bc;
unsigned long flags;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
bc = tick_broadcast_device.evtdev;
if (bc)
tick_broadcast_setup_oneshot(bc);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Remove a dead CPU from broadcasting
*/
void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
{
unsigned long flags;
unsigned int cpu = *cpup;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Clear the broadcast mask flag for the dead cpu, but do not
* stop the broadcast device!
*/
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Check, whether the broadcast device is in one shot mode
*/
int tick_broadcast_oneshot_active(void)
{
return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
}
/*
* Check whether the broadcast device supports oneshot.
*/
bool tick_broadcast_oneshot_available(void)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
}
#endif
void __init tick_broadcast_init(void)
{
zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
#ifdef CONFIG_TICK_ONESHOT
zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
#endif
}

425
kernel/time/tick-common.c Normal file
View File

@@ -0,0 +1,425 @@
/*
* linux/kernel/time/tick-common.c
*
* This file contains the base functions to manage periodic tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <asm/irq_regs.h>
#include "tick-internal.h"
/*
* Tick devices
*/
DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
/*
* Tick next event: keeps track of the tick time
*/
ktime_t tick_next_period;
ktime_t tick_period;
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
static DEFINE_RAW_SPINLOCK(tick_device_lock);
/*
* Debugging: see timer_list.c
*/
struct tick_device *tick_get_device(int cpu)
{
return &per_cpu(tick_cpu_device, cpu);
}
/**
* tick_is_oneshot_available - check for a oneshot capable event device
*/
int tick_is_oneshot_available(void)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return 0;
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
return 1;
return tick_broadcast_oneshot_available();
}
/*
* Periodic tick
*/
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {
write_seqlock(&jiffies_lock);
/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);
do_timer(1);
write_sequnlock(&jiffies_lock);
}
update_process_times(user_mode(get_irq_regs()));
profile_tick(CPU_PROFILING);
}
/*
* Event handler for periodic ticks
*/
void tick_handle_periodic(struct clock_event_device *dev)
{
int cpu = smp_processor_id();
ktime_t next;
tick_periodic(cpu);
if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
return;
/*
* Setup the next period for devices, which do not have
* periodic mode:
*/
next = ktime_add(dev->next_event, tick_period);
for (;;) {
if (!clockevents_program_event(dev, next, false))
return;
/*
* Have to be careful here. If we're in oneshot mode,
* before we call tick_periodic() in a loop, we need
* to be sure we're using a real hardware clocksource.
* Otherwise we could get trapped in an infinite
* loop, as the tick_periodic() increments jiffies,
* when then will increment time, posibly causing
* the loop to trigger again and again.
*/
if (timekeeping_valid_for_hres())
tick_periodic(cpu);
next = ktime_add(next, tick_period);
}
}
/*
* Setup the device for a periodic tick
*/
void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
tick_set_periodic_handler(dev, broadcast);
/* Broadcast setup ? */
if (!tick_device_is_functional(dev))
return;
if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!tick_broadcast_oneshot_active()) {
clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
} else {
unsigned long seq;
ktime_t next;
do {
seq = read_seqbegin(&jiffies_lock);
next = tick_next_period;
} while (read_seqretry(&jiffies_lock, seq));
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
for (;;) {
if (!clockevents_program_event(dev, next, false))
return;
next = ktime_add(next, tick_period);
}
}
}
/*
* Setup the tick device
*/
static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
const struct cpumask *cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL;
/*
* First device setup ?
*/
if (!td->evtdev) {
/*
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
if (!tick_nohz_full_cpu(cpu))
tick_do_timer_cpu = cpu;
else
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
tick_next_period = ktime_get();
tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
}
/*
* Startup in periodic mode first.
*/
td->mode = TICKDEV_MODE_PERIODIC;
} else {
handler = td->evtdev->event_handler;
next_event = td->evtdev->next_event;
td->evtdev->event_handler = clockevents_handle_noop;
}
td->evtdev = newdev;
/*
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
if (!cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);
/*
* When global broadcasting is active, check if the current
* device is registered as a placeholder for broadcast mode.
* This allows us to handle this x86 misfeature in a generic
* way. This function also returns !=0 when we keep the
* current active broadcast state for this CPU.
*/
if (tick_device_uses_broadcast(newdev, cpu))
return;
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(newdev, 0);
else
tick_setup_oneshot(newdev, handler, next_event);
}
/*
* Check, if the new registered device should be used.
*/
static int tick_check_new_device(struct clock_event_device *newdev)
{
struct clock_event_device *curdev;
struct tick_device *td;
int cpu, ret = NOTIFY_OK;
unsigned long flags;
raw_spin_lock_irqsave(&tick_device_lock, flags);
cpu = smp_processor_id();
if (!cpumask_test_cpu(cpu, newdev->cpumask))
goto out_bc;
td = &per_cpu(tick_cpu_device, cpu);
curdev = td->evtdev;
/* cpu local device ? */
if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
/*
* If the cpu affinity of the device interrupt can not
* be set, ignore it.
*/
if (!irq_can_set_affinity(newdev->irq))
goto out_bc;
/*
* If we have a cpu local device already, do not replace it
* by a non cpu local device
*/
if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
goto out_bc;
}
/*
* If we have an active device, then check the rating and the oneshot
* feature.
*/
if (curdev) {
/*
* Prefer one shot capable devices !
*/
if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
!(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
goto out_bc;
/*
* Check the rating
*/
if (curdev->rating >= newdev->rating)
goto out_bc;
}
/*
* Replace the eventually existing device by the new
* device. If the current device is the broadcast device, do
* not give it back to the clockevents layer !
*/
if (tick_is_broadcast_device(curdev)) {
clockevents_shutdown(curdev);
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev);
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
return NOTIFY_STOP;
out_bc:
/*
* Can the new device be used as a broadcast device ?
*/
if (tick_check_broadcast_device(newdev))
ret = NOTIFY_STOP;
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
return ret;
}
/*
* Transfer the do_timer job away from a dying cpu.
*
* Called with interrupts disabled.
*/
static void tick_handover_do_timer(int *cpup)
{
if (*cpup == tick_do_timer_cpu) {
int cpu = cpumask_first(cpu_online_mask);
tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
TICK_DO_TIMER_NONE;
}
}
/*
* Shutdown an event device on a given cpu:
*
* This is called on a life CPU, when a CPU is dead. So we cannot
* access the hardware device itself.
* We just set the mode and remove it from the lists.
*/
static void tick_shutdown(unsigned int *cpup)
{
struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
struct clock_event_device *dev = td->evtdev;
unsigned long flags;
raw_spin_lock_irqsave(&tick_device_lock, flags);
td->mode = TICKDEV_MODE_PERIODIC;
if (dev) {
/*
* Prevent that the clock events layer tries to call
* the set mode function!
*/
dev->mode = CLOCK_EVT_MODE_UNUSED;
clockevents_exchange_device(dev, NULL);
dev->event_handler = clockevents_handle_noop;
td->evtdev = NULL;
}
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
static void tick_suspend(void)
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
unsigned long flags;
raw_spin_lock_irqsave(&tick_device_lock, flags);
clockevents_shutdown(td->evtdev);
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
static void tick_resume(void)
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
unsigned long flags;
int broadcast = tick_resume_broadcast();
raw_spin_lock_irqsave(&tick_device_lock, flags);
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
if (!broadcast) {
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(td->evtdev, 0);
else
tick_resume_oneshot();
}
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
/*
* Notification about clock event devices
*/
static int tick_notify(struct notifier_block *nb, unsigned long reason,
void *dev)
{
switch (reason) {
case CLOCK_EVT_NOTIFY_ADD:
return tick_check_new_device(dev);
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
tick_broadcast_on_off(reason, dev);
break;
case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
tick_broadcast_oneshot_control(reason);
break;
case CLOCK_EVT_NOTIFY_CPU_DYING:
tick_handover_do_timer(dev);
break;
case CLOCK_EVT_NOTIFY_CPU_DEAD:
tick_shutdown_broadcast_oneshot(dev);
tick_shutdown_broadcast(dev);
tick_shutdown(dev);
break;
case CLOCK_EVT_NOTIFY_SUSPEND:
tick_suspend();
tick_suspend_broadcast();
break;
case CLOCK_EVT_NOTIFY_RESUME:
tick_resume();
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block tick_notifier = {
.notifier_call = tick_notify,
};
/**
* tick_init - initialize the tick control
*
* Register the notifier with the clockevents framework
*/
void __init tick_init(void)
{
clockevents_register_notifier(&tick_notifier);
tick_broadcast_init();
}

146
kernel/time/tick-internal.h Normal file
View File

@@ -0,0 +1,146 @@
/*
* tick internal variable and functions used by low/high res code
*/
#include <linux/hrtimer.h>
#include <linux/tick.h>
extern seqlock_t jiffies_lock;
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
#define TICK_DO_TIMER_NONE -1
#define TICK_DO_TIMER_BOOT -2
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
extern ktime_t tick_next_period;
extern ktime_t tick_period;
extern int tick_do_timer_cpu __read_mostly;
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
extern void clockevents_shutdown(struct clock_event_device *dev);
/*
* NO_HZ / high resolution timer shared code
*/
#ifdef CONFIG_TICK_ONESHOT
extern void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt);
extern int tick_program_event(ktime_t expires, int force);
extern void tick_oneshot_notify(void);
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
extern void tick_resume_oneshot(void);
# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
extern void tick_broadcast_oneshot_control(unsigned long reason);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
extern int tick_broadcast_oneshot_active(void);
extern void tick_check_oneshot_broadcast(int cpu);
bool tick_broadcast_oneshot_available(void);
# else /* BROADCAST */
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }
static inline void tick_check_oneshot_broadcast(int cpu) { }
static inline bool tick_broadcast_oneshot_available(void) { return true; }
# endif /* !BROADCAST */
#else /* !ONESHOT */
static inline
void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt)
{
BUG();
}
static inline void tick_resume_oneshot(void)
{
BUG();
}
static inline int tick_program_event(ktime_t expires, int force)
{
return 0;
}
static inline void tick_oneshot_notify(void) { }
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
return 0;
}
static inline int tick_broadcast_oneshot_active(void) { return 0; }
static inline bool tick_broadcast_oneshot_available(void) { return false; }
#endif /* !TICK_ONESHOT */
/*
* Broadcasting support
*/
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
extern int tick_check_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
extern void tick_shutdown_broadcast(unsigned int *cpup);
extern void tick_suspend_broadcast(void);
extern int tick_resume_broadcast(void);
extern void tick_broadcast_init(void);
extern void
tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
#else /* !BROADCAST */
static inline int tick_check_broadcast_device(struct clock_event_device *dev)
{
return 0;
}
static inline int tick_is_broadcast_device(struct clock_event_device *dev)
{
return 0;
}
static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
int cpu)
{
return 0;
}
static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
static inline void tick_suspend_broadcast(void) { }
static inline int tick_resume_broadcast(void) { return 0; }
static inline void tick_broadcast_init(void) { }
/*
* Set the periodic handler in non broadcast mode
*/
static inline void tick_set_periodic_handler(struct clock_event_device *dev,
int broadcast)
{
dev->event_handler = tick_handle_periodic;
}
#endif /* !BROADCAST */
/*
* Check, if the device is functional or a dummy for broadcast
*/
static inline int tick_device_is_functional(struct clock_event_device *dev)
{
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
#endif
extern void do_timer(unsigned long ticks);

116
kernel/time/tick-oneshot.c Normal file
View File

@@ -0,0 +1,116 @@
/*
* linux/kernel/time/tick-oneshot.c
*
* This file contains functions which manage high resolution tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include "tick-internal.h"
/**
* tick_program_event
*/
int tick_program_event(ktime_t expires, int force)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
return clockevents_program_event(dev, expires, force);
}
/**
* tick_resume_onshot - resume oneshot mode
*/
void tick_resume_oneshot(void)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
clockevents_program_event(dev, ktime_get(), true);
}
/**
* tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz)
*/
void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t next_event)
{
newdev->event_handler = handler;
clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
clockevents_program_event(newdev, next_event, true);
}
/**
* tick_switch_to_oneshot - switch to oneshot mode
*/
int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
struct clock_event_device *dev = td->evtdev;
if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
!tick_device_is_functional(dev)) {
printk(KERN_INFO "Clockevents: "
"could not switch to one-shot mode:");
if (!dev) {
printk(" no tick device\n");
} else {
if (!tick_device_is_functional(dev))
printk(" %s is not functional.\n", dev->name);
else
printk(" %s does not support one-shot mode.\n",
dev->name);
}
return -EINVAL;
}
td->mode = TICKDEV_MODE_ONESHOT;
dev->event_handler = handler;
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
tick_broadcast_switch_to_oneshot();
return 0;
}
/**
* tick_check_oneshot_mode - check whether the system is in oneshot mode
*
* returns 1 when either nohz or highres are enabled. otherwise 0.
*/
int tick_oneshot_mode_active(void)
{
unsigned long flags;
int ret;
local_irq_save(flags);
ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT;
local_irq_restore(flags);
return ret;
}
#ifdef CONFIG_HIGH_RES_TIMERS
/**
* tick_init_highres - switch to high resolution mode
*
* Called with interrupts disabled.
*/
int tick_init_highres(void)
{
return tick_switch_to_oneshot(hrtimer_interrupt);
}
#endif

1229
kernel/time/tick-sched.c Normal file

File diff suppressed because it is too large Load Diff

127
kernel/time/timeconv.c Normal file
View File

@@ -0,0 +1,127 @@
/*
* Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
* This file is part of the GNU C Library.
* Contributed by Paul Eggert (eggert@twinsun.com).
*
* The GNU C Library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* The GNU C Library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with the GNU C Library; see the file COPYING.LIB. If not,
* write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
/*
* Converts the calendar time to broken-down time representation
* Based on code from glibc-2.6
*
* 2009-7-14:
* Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
*/
#include <linux/time.h>
#include <linux/module.h>
/*
* Nonzero if YEAR is a leap year (every 4 years,
* except every 100th isn't, and every 400th is).
*/
static int __isleap(long year)
{
return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
}
/* do a mathdiv for long type */
static long math_div(long a, long b)
{
return a / b - (a % b < 0);
}
/* How many leap years between y1 and y2, y1 must less or equal to y2 */
static long leaps_between(long y1, long y2)
{
long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
+ math_div(y1 - 1, 400);
long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
+ math_div(y2 - 1, 400);
return leaps2 - leaps1;
}
/* How many days come before each month (0-12). */
static const unsigned short __mon_yday[2][13] = {
/* Normal years. */
{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
/* Leap years. */
{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
};
#define SECS_PER_HOUR (60 * 60)
#define SECS_PER_DAY (SECS_PER_HOUR * 24)
/**
* time_to_tm - converts the calendar time to local broken-down time
*
* @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970,
* Coordinated Universal Time (UTC).
* @offset offset seconds adding to totalsecs.
* @result pointer to struct tm variable to receive broken-down time
*/
void time_to_tm(time_t totalsecs, int offset, struct tm *result)
{
long days, rem, y;
const unsigned short *ip;
days = totalsecs / SECS_PER_DAY;
rem = totalsecs % SECS_PER_DAY;
rem += offset;
while (rem < 0) {
rem += SECS_PER_DAY;
--days;
}
while (rem >= SECS_PER_DAY) {
rem -= SECS_PER_DAY;
++days;
}
result->tm_hour = rem / SECS_PER_HOUR;
rem %= SECS_PER_HOUR;
result->tm_min = rem / 60;
result->tm_sec = rem % 60;
/* January 1, 1970 was a Thursday. */
result->tm_wday = (4 + days) % 7;
if (result->tm_wday < 0)
result->tm_wday += 7;
y = 1970;
while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
/* Guess a corrected year, assuming 365 days per year. */
long yg = y + math_div(days, 365);
/* Adjust DAYS and Y to match the guessed year. */
days -= (yg - y) * 365 + leaps_between(y, yg);
y = yg;
}
result->tm_year = y - 1900;
result->tm_yday = days;
ip = __mon_yday[__isleap(y)];
for (y = 11; days < ip[y]; y--)
continue;
days -= ip[y];
result->tm_mon = y;
result->tm_mday = days + 1;
}
EXPORT_SYMBOL(time_to_tm);

1846
kernel/time/timekeeping.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,23 @@
#ifndef _TIMEKEEPING_INTERNAL_H
#define _TIMEKEEPING_INTERNAL_H
/*
* timekeeping debug functions
*/
#include <linux/clocksource.h>
#include <linux/time.h>
#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE
static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
{
cycle_t ret = (now - last) & mask;
return (s64) ret > 0 ? ret : 0;
}
#else
static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
{
return (now - last) & mask;
}
#endif
#endif /* _TIMEKEEPING_INTERNAL_H */

370
kernel/time/timer_list.c Normal file
View File

@@ -0,0 +1,370 @@
/*
* kernel/time/timer_list.c
*
* List pending timers
*
* Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/tick.h>
#include <asm/uaccess.h>
struct timer_list_iter {
int cpu;
bool second_pass;
u64 now;
};
typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
/*
* This allows printing both to /proc/timer_list and
* to the console (on SysRq-Q):
*/
#define SEQ_printf(m, x...) \
do { \
if (m) \
seq_printf(m, x); \
else \
printk(x); \
} while (0)
static void print_name_offset(struct seq_file *m, void *sym)
{
char symname[KSYM_NAME_LEN];
if (lookup_symbol_name((unsigned long)sym, symname) < 0)
SEQ_printf(m, "<%pK>", sym);
else
SEQ_printf(m, "%s", symname);
}
static void
print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
int idx, u64 now)
{
#ifdef CONFIG_TIMER_STATS
char tmp[TASK_COMM_LEN + 1];
#endif
SEQ_printf(m, " #%d: ", idx);
print_name_offset(m, taddr);
SEQ_printf(m, ", ");
print_name_offset(m, timer->function);
SEQ_printf(m, ", S:%02lx", timer->state);
#ifdef CONFIG_TIMER_STATS
SEQ_printf(m, ", ");
print_name_offset(m, timer->start_site);
memcpy(tmp, timer->start_comm, TASK_COMM_LEN);
tmp[TASK_COMM_LEN] = 0;
SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
#endif
SEQ_printf(m, "\n");
SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
(unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
(unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)),
(long long)(ktime_to_ns(hrtimer_get_softexpires(timer)) - now),
(long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now));
}
static void
print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
u64 now)
{
struct hrtimer *timer, tmp;
unsigned long next = 0, i;
struct timerqueue_node *curr;
unsigned long flags;
next_one:
i = 0;
raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
curr = timerqueue_getnext(&base->active);
/*
* Crude but we have to do this O(N*N) thing, because
* we have to unlock the base when printing:
*/
while (curr && i < next) {
curr = timerqueue_iterate_next(curr);
i++;
}
if (curr) {
timer = container_of(curr, struct hrtimer, node);
tmp = *timer;
raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
print_timer(m, timer, &tmp, i, now);
next++;
goto next_one;
}
raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
}
static void
print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
{
SEQ_printf(m, " .base: %pK\n", base);
SEQ_printf(m, " .index: %d\n",
base->index);
SEQ_printf(m, " .resolution: %Lu nsecs\n",
(unsigned long long)ktime_to_ns(base->resolution));
SEQ_printf(m, " .get_time: ");
print_name_offset(m, base->get_time);
SEQ_printf(m, "\n");
#ifdef CONFIG_HIGH_RES_TIMERS
SEQ_printf(m, " .offset: %Lu nsecs\n",
(unsigned long long) ktime_to_ns(base->offset));
#endif
SEQ_printf(m, "active timers:\n");
print_active_timers(m, base, now);
}
static void print_cpu(struct seq_file *m, int cpu, u64 now)
{
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
SEQ_printf(m, "cpu: %d\n", cpu);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
SEQ_printf(m, " clock %d:\n", i);
print_base(m, cpu_base->clock_base + i, now);
}
#define P(x) \
SEQ_printf(m, " .%-15s: %Lu\n", #x, \
(unsigned long long)(cpu_base->x))
#define P_ns(x) \
SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \
(unsigned long long)(ktime_to_ns(cpu_base->x)))
#ifdef CONFIG_HIGH_RES_TIMERS
P_ns(expires_next);
P(hres_active);
P(nr_events);
P(nr_retries);
P(nr_hangs);
P_ns(max_hang_time);
#endif
#undef P
#undef P_ns
#ifdef CONFIG_TICK_ONESHOT
# define P(x) \
SEQ_printf(m, " .%-15s: %Lu\n", #x, \
(unsigned long long)(ts->x))
# define P_ns(x) \
SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \
(unsigned long long)(ktime_to_ns(ts->x)))
{
struct tick_sched *ts = tick_get_tick_sched(cpu);
P(nohz_mode);
P_ns(last_tick);
P(tick_stopped);
P(idle_jiffies);
P(idle_calls);
P(idle_sleeps);
P_ns(idle_entrytime);
P_ns(idle_waketime);
P_ns(idle_exittime);
P_ns(idle_sleeptime);
P_ns(iowait_sleeptime);
P(last_jiffies);
P(next_jiffies);
P_ns(idle_expires);
SEQ_printf(m, "jiffies: %Lu\n",
(unsigned long long)jiffies);
}
#endif
#undef P
#undef P_ns
SEQ_printf(m, "\n");
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS
static void
print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
{
struct clock_event_device *dev = td->evtdev;
SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
if (cpu < 0)
SEQ_printf(m, "Broadcast device\n");
else
SEQ_printf(m, "Per CPU device: %d\n", cpu);
SEQ_printf(m, "Clock Event Device: ");
if (!dev) {
SEQ_printf(m, "<NULL>\n");
return;
}
SEQ_printf(m, "%s\n", dev->name);
SEQ_printf(m, " max_delta_ns: %llu\n",
(unsigned long long) dev->max_delta_ns);
SEQ_printf(m, " min_delta_ns: %llu\n",
(unsigned long long) dev->min_delta_ns);
SEQ_printf(m, " mult: %u\n", dev->mult);
SEQ_printf(m, " shift: %u\n", dev->shift);
SEQ_printf(m, " mode: %d\n", dev->mode);
SEQ_printf(m, " next_event: %Ld nsecs\n",
(unsigned long long) ktime_to_ns(dev->next_event));
SEQ_printf(m, " set_next_event: ");
print_name_offset(m, dev->set_next_event);
SEQ_printf(m, "\n");
SEQ_printf(m, " set_mode: ");
print_name_offset(m, dev->set_mode);
SEQ_printf(m, "\n");
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);
SEQ_printf(m, "\n");
SEQ_printf(m, " retries: %lu\n", dev->retries);
SEQ_printf(m, "\n");
}
static void timer_list_show_tickdevices_header(struct seq_file *m)
{
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device(), -1);
SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
cpumask_bits(tick_get_broadcast_mask())[0]);
#ifdef CONFIG_TICK_ONESHOT
SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
#endif
SEQ_printf(m, "\n");
#endif
}
#endif
static inline void timer_list_header(struct seq_file *m, u64 now)
{
SEQ_printf(m, "Timer List Version: v0.7\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
SEQ_printf(m, "\n");
}
static int timer_list_show(struct seq_file *m, void *v)
{
struct timer_list_iter *iter = v;
if (iter->cpu == -1 && !iter->second_pass)
timer_list_header(m, iter->now);
else if (!iter->second_pass)
print_cpu(m, iter->cpu, iter->now);
#ifdef CONFIG_GENERIC_CLOCKEVENTS
else if (iter->cpu == -1 && iter->second_pass)
timer_list_show_tickdevices_header(m);
else
print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
#endif
return 0;
}
void sysrq_timer_list_show(void)
{
u64 now = ktime_to_ns(ktime_get());
int cpu;
timer_list_header(NULL, now);
for_each_online_cpu(cpu)
print_cpu(NULL, cpu, now);
#ifdef CONFIG_GENERIC_CLOCKEVENTS
timer_list_show_tickdevices_header(NULL);
for_each_online_cpu(cpu)
print_tickdevice(NULL, tick_get_device(cpu), cpu);
#endif
return;
}
static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
for (; offset; offset--) {
iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
if (iter->cpu >= nr_cpu_ids) {
#ifdef CONFIG_GENERIC_CLOCKEVENTS
if (!iter->second_pass) {
iter->cpu = -1;
iter->second_pass = true;
} else
return NULL;
#else
return NULL;
#endif
}
}
return iter;
}
static void *timer_list_start(struct seq_file *file, loff_t *offset)
{
struct timer_list_iter *iter = file->private;
if (!*offset)
iter->now = ktime_to_ns(ktime_get());
iter->cpu = -1;
iter->second_pass = false;
return move_iter(iter, *offset);
}
static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
{
struct timer_list_iter *iter = file->private;
++*offset;
return move_iter(iter, 1);
}
static void timer_list_stop(struct seq_file *seq, void *v)
{
}
static const struct seq_operations timer_list_sops = {
.start = timer_list_start,
.next = timer_list_next,
.stop = timer_list_stop,
.show = timer_list_show,
};
static int timer_list_open(struct inode *inode, struct file *filp)
{
return seq_open_private(filp, &timer_list_sops,
sizeof(struct timer_list_iter));
}
static const struct file_operations timer_list_fops = {
.open = timer_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
static int __init init_timer_list_procfs(void)
{
struct proc_dir_entry *pe;
pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
if (!pe)
return -ENOMEM;
return 0;
}
__initcall(init_timer_list_procfs);

425
kernel/time/timer_stats.c Normal file
View File

@@ -0,0 +1,425 @@
/*
* kernel/time/timer_stats.c
*
* Collect timer usage statistics.
*
* Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* timer_stats is based on timer_top, a similar functionality which was part of
* Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
* Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
* on dynamic allocation of the statistics entries and linear search based
* lookup combined with a global lock, rather than the static array, hash
* and per-CPU locking which is used by timer_stats. It was written for the
* pre hrtimer kernel code and therefore did not take hrtimers into account.
* Nevertheless it provided the base for the timer_stats implementation and
* was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
* for this effort.
*
* timer_top.c is
* Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
* Written by Daniel Petrini <d.pensator@gmail.com>
* timer_top.c was released under the GNU General Public License version 2
*
* We export the addresses and counting of timer functions being called,
* the pid and cmdline from the owner process if applicable.
*
* Start/stop data collection:
* # echo [1|0] >/proc/timer_stats
*
* Display the information collected so far:
* # cat /proc/timer_stats
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <asm/uaccess.h>
/*
* This is our basic unit of interest: a timer expiry event identified
* by the timer, its start/expire functions and the PID of the task that
* started the timer. We count the number of times an event happens:
*/
struct entry {
/*
* Hash list:
*/
struct entry *next;
/*
* Hash keys:
*/
void *timer;
void *start_func;
void *expire_func;
pid_t pid;
/*
* Number of timeout events:
*/
unsigned long count;
unsigned int timer_flag;
/*
* We save the command-line string to preserve
* this information past task exit:
*/
char comm[TASK_COMM_LEN + 1];
} ____cacheline_aligned_in_smp;
/*
* Spinlock protecting the tables - not taken during lookup:
*/
static DEFINE_RAW_SPINLOCK(table_lock);
/*
* Per-CPU lookup locks for fast hash lookup:
*/
static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
/*
* Mutex to serialize state changes with show-stats activities:
*/
static DEFINE_MUTEX(show_mutex);
/*
* Collection status, active/inactive:
*/
int __read_mostly timer_stats_active;
/*
* Beginning/end timestamps of measurement:
*/
static ktime_t time_start, time_stop;
/*
* tstat entry structs only get allocated while collection is
* active and never freed during that time - this simplifies
* things quite a bit.
*
* They get freed when a new collection period is started.
*/
#define MAX_ENTRIES_BITS 10
#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
static unsigned long nr_entries;
static struct entry entries[MAX_ENTRIES];
static atomic_t overflow_count;
/*
* The entries are in a hash-table, for fast lookup:
*/
#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
#define __tstat_hashfn(entry) \
(((unsigned long)(entry)->timer ^ \
(unsigned long)(entry)->start_func ^ \
(unsigned long)(entry)->expire_func ^ \
(unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
static void reset_entries(void)
{
nr_entries = 0;
memset(entries, 0, sizeof(entries));
memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
atomic_set(&overflow_count, 0);
}
static struct entry *alloc_entry(void)
{
if (nr_entries >= MAX_ENTRIES)
return NULL;
return entries + nr_entries++;
}
static int match_entries(struct entry *entry1, struct entry *entry2)
{
return entry1->timer == entry2->timer &&
entry1->start_func == entry2->start_func &&
entry1->expire_func == entry2->expire_func &&
entry1->pid == entry2->pid;
}
/*
* Look up whether an entry matching this item is present
* in the hash already. Must be called with irqs off and the
* lookup lock held:
*/
static struct entry *tstat_lookup(struct entry *entry, char *comm)
{
struct entry **head, *curr, *prev;
head = tstat_hashentry(entry);
curr = *head;
/*
* The fastpath is when the entry is already hashed,
* we do this with the lookup lock held, but with the
* table lock not held:
*/
while (curr) {
if (match_entries(curr, entry))
return curr;
curr = curr->next;
}
/*
* Slowpath: allocate, set up and link a new hash entry:
*/
prev = NULL;
curr = *head;
raw_spin_lock(&table_lock);
/*
* Make sure we have not raced with another CPU:
*/
while (curr) {
if (match_entries(curr, entry))
goto out_unlock;
prev = curr;
curr = curr->next;
}
curr = alloc_entry();
if (curr) {
*curr = *entry;
curr->count = 0;
curr->next = NULL;
memcpy(curr->comm, comm, TASK_COMM_LEN);
smp_mb(); /* Ensure that curr is initialized before insert */
if (prev)
prev->next = curr;
else
*head = curr;
}
out_unlock:
raw_spin_unlock(&table_lock);
return curr;
}
/**
* timer_stats_update_stats - Update the statistics for a timer.
* @timer: pointer to either a timer_list or a hrtimer
* @pid: the pid of the task which set up the timer
* @startf: pointer to the function which did the timer setup
* @timerf: pointer to the timer callback function of the timer
* @comm: name of the process which set up the timer
*
* When the timer is already registered, then the event counter is
* incremented. Otherwise the timer is registered in a free slot.
*/
void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
void *timerf, char *comm,
unsigned int timer_flag)
{
/*
* It doesn't matter which lock we take:
*/
raw_spinlock_t *lock;
struct entry *entry, input;
unsigned long flags;
if (likely(!timer_stats_active))
return;
lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
input.timer = timer;
input.start_func = startf;
input.expire_func = timerf;
input.pid = pid;
input.timer_flag = timer_flag;
raw_spin_lock_irqsave(lock, flags);
if (!timer_stats_active)
goto out_unlock;
entry = tstat_lookup(&input, comm);
if (likely(entry))
entry->count++;
else
atomic_inc(&overflow_count);
out_unlock:
raw_spin_unlock_irqrestore(lock, flags);
}
static void print_name_offset(struct seq_file *m, unsigned long addr)
{
char symname[KSYM_NAME_LEN];
if (lookup_symbol_name(addr, symname) < 0)
seq_printf(m, "<%p>", (void *)addr);
else
seq_printf(m, "%s", symname);
}
static int tstats_show(struct seq_file *m, void *v)
{
struct timespec period;
struct entry *entry;
unsigned long ms;
long events = 0;
ktime_t time;
int i;
mutex_lock(&show_mutex);
/*
* If still active then calculate up to now:
*/
if (timer_stats_active)
time_stop = ktime_get();
time = ktime_sub(time_stop, time_start);
period = ktime_to_timespec(time);
ms = period.tv_nsec / 1000000;
seq_puts(m, "Timer Stats Version: v0.2\n");
seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
if (atomic_read(&overflow_count))
seq_printf(m, "Overflow: %d entries\n",
atomic_read(&overflow_count));
for (i = 0; i < nr_entries; i++) {
entry = entries + i;
if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
seq_printf(m, "%4luD, %5d %-16s ",
entry->count, entry->pid, entry->comm);
} else {
seq_printf(m, " %4lu, %5d %-16s ",
entry->count, entry->pid, entry->comm);
}
print_name_offset(m, (unsigned long)entry->start_func);
seq_puts(m, " (");
print_name_offset(m, (unsigned long)entry->expire_func);
seq_puts(m, ")\n");
events += entry->count;
}
ms += period.tv_sec * 1000;
if (!ms)
ms = 1;
if (events && period.tv_sec)
seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
events, events * 1000 / ms,
(events * 1000000 / ms) % 1000);
else
seq_printf(m, "%ld total events\n", events);
mutex_unlock(&show_mutex);
return 0;
}
/*
* After a state change, make sure all concurrent lookup/update
* activities have stopped:
*/
static void sync_access(void)
{
unsigned long flags;
int cpu;
for_each_online_cpu(cpu) {
raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
raw_spin_lock_irqsave(lock, flags);
/* nothing */
raw_spin_unlock_irqrestore(lock, flags);
}
}
static ssize_t tstats_write(struct file *file, const char __user *buf,
size_t count, loff_t *offs)
{
char ctl[2];
if (count != 2 || *offs)
return -EINVAL;
if (copy_from_user(ctl, buf, count))
return -EFAULT;
mutex_lock(&show_mutex);
switch (ctl[0]) {
case '0':
if (timer_stats_active) {
timer_stats_active = 0;
time_stop = ktime_get();
sync_access();
}
break;
case '1':
if (!timer_stats_active) {
reset_entries();
time_start = ktime_get();
smp_mb();
timer_stats_active = 1;
}
break;
default:
count = -EINVAL;
}
mutex_unlock(&show_mutex);
return count;
}
static int tstats_open(struct inode *inode, struct file *filp)
{
return single_open(filp, tstats_show, NULL);
}
static const struct file_operations tstats_fops = {
.open = tstats_open,
.read = seq_read,
.write = tstats_write,
.llseek = seq_lseek,
.release = single_release,
};
void __init init_timer_stats(void)
{
int cpu;
for_each_possible_cpu(cpu)
raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
}
static int __init init_tstats_procfs(void)
{
struct proc_dir_entry *pe;
pe = proc_create("timer_stats", 0644, NULL, &tstats_fops);
if (!pe)
return -ENOMEM;
return 0;
}
__initcall(init_tstats_procfs);

169
kernel/time/udelay_test.c Normal file
View File

@@ -0,0 +1,169 @@
/*
* udelay() test kernel module
*
* Test is executed by writing and reading to /sys/kernel/debug/udelay_test
* Tests are configured by writing: USECS ITERATIONS
* Tests are executed by reading from the same file.
* Specifying usecs of 0 or negative values will run multiples tests.
*
* Copyright (C) 2014 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#define DEFAULT_ITERATIONS 100
#define DEBUGFS_FILENAME "udelay_test"
static DEFINE_MUTEX(udelay_test_lock);
static struct dentry *udelay_test_debugfs_file;
static int udelay_test_usecs;
static int udelay_test_iterations = DEFAULT_ITERATIONS;
static int udelay_test_single(struct seq_file *s, int usecs, int iters)
{
int min, max, fail_count = 0;
long long sum = 0;
long long avg;
int i;
/* Allow udelay to be up to 0.5% fast */
int allowed_error_ns = usecs * 5;
if (iters <= 0)
return 0;
for (i = 0; i < iters; ++i) {
struct timespec ts1, ts2;
int time_passed;
ktime_get_ts(&ts1);
udelay(usecs);
ktime_get_ts(&ts2);
time_passed = timespec_to_ns(&ts2) - timespec_to_ns(&ts1);
if (i == 0 || time_passed < min)
min = time_passed;
if (i == 0 || time_passed > max)
max = time_passed;
if ((time_passed + allowed_error_ns) / 1000 < usecs)
++fail_count;
sum += time_passed;
}
avg = sum;
do_div(avg, iters);
seq_printf(s, "%d usecs x %d: exp=%d allowed=%d min=%d avg=%lld max=%d",
usecs, iters, usecs * 1000,
(usecs * 1000) - allowed_error_ns, min, avg, max);
if (fail_count)
seq_printf(s, " FAIL=%d", fail_count);
seq_puts(s, "\n");
return 0;
}
static int udelay_test_show(struct seq_file *s, void *v)
{
int usecs;
int iters;
int ret = 0;
mutex_lock(&udelay_test_lock);
usecs = udelay_test_usecs;
iters = udelay_test_iterations;
mutex_unlock(&udelay_test_lock);
if (usecs > 0) {
return udelay_test_single(s, usecs, iters);
} else if (usecs == 0) {
struct timespec ts;
ktime_get_ts(&ts);
seq_printf(s, "udelay() test (lpj=%ld kt=%ld.%09ld)\n",
loops_per_jiffy, ts.tv_sec, ts.tv_nsec);
seq_puts(s, "usage:\n");
seq_puts(s, "echo USECS [ITERS] > " DEBUGFS_FILENAME "\n");
seq_puts(s, "cat " DEBUGFS_FILENAME "\n");
}
return ret;
}
static int udelay_test_open(struct inode *inode, struct file *file)
{
return single_open(file, udelay_test_show, inode->i_private);
}
static ssize_t udelay_test_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
char lbuf[32];
int ret;
int usecs;
int iters;
if (count >= sizeof(lbuf))
return -EINVAL;
if (copy_from_user(lbuf, buf, count))
return -EFAULT;
lbuf[count] = '\0';
ret = sscanf(lbuf, "%d %d", &usecs, &iters);
if (ret < 1)
return -EINVAL;
else if (ret < 2)
iters = DEFAULT_ITERATIONS;
mutex_lock(&udelay_test_lock);
udelay_test_usecs = usecs;
udelay_test_iterations = iters;
mutex_unlock(&udelay_test_lock);
return count;
}
static const struct file_operations udelay_test_debugfs_ops = {
.owner = THIS_MODULE,
.open = udelay_test_open,
.read = seq_read,
.write = udelay_test_write,
.llseek = seq_lseek,
.release = single_release,
};
static int __init udelay_test_init(void)
{
mutex_lock(&udelay_test_lock);
udelay_test_debugfs_file = debugfs_create_file(DEBUGFS_FILENAME,
S_IRUSR, NULL, NULL, &udelay_test_debugfs_ops);
mutex_unlock(&udelay_test_lock);
return 0;
}
module_init(udelay_test_init);
static void __exit udelay_test_exit(void)
{
mutex_lock(&udelay_test_lock);
debugfs_remove(udelay_test_debugfs_file);
mutex_unlock(&udelay_test_lock);
}
module_exit(udelay_test_exit);
MODULE_AUTHOR("David Riley <davidriley@chromium.org>");
MODULE_LICENSE("GPL");