Initial commit; kernel source import

This commit is contained in:
Nathan
2025-04-06 23:50:55 -05:00
commit 25c6d769f4
45093 changed files with 18199410 additions and 0 deletions

View File

@@ -0,0 +1,335 @@
/*
* (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
*
* Licensed under the terms of the GNU GPL License version 2.
*
* PCI initialization based on example code from:
* Andreas Herrmann <andreas.herrmann3@amd.com>
*/
#if defined(__i386__) || defined(__x86_64__)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
#include <string.h>
#include <pci/pci.h>
#include "idle_monitor/cpupower-monitor.h"
#include "helpers/helpers.h"
#define PCI_NON_PC0_OFFSET 0xb0
#define PCI_PC1_OFFSET 0xb4
#define PCI_PC6_OFFSET 0xb8
#define PCI_MONITOR_ENABLE_REG 0xe0
#define PCI_NON_PC0_ENABLE_BIT 0
#define PCI_PC1_ENABLE_BIT 1
#define PCI_PC6_ENABLE_BIT 2
#define PCI_NBP1_STAT_OFFSET 0x98
#define PCI_NBP1_ACTIVE_BIT 2
#define PCI_NBP1_ENTERED_BIT 1
#define PCI_NBP1_CAP_OFFSET 0x90
#define PCI_NBP1_CAPABLE_BIT 31
#define OVERFLOW_MS 343597 /* 32 bit register filled at 12500 HZ
(1 tick per 80ns) */
enum amd_fam14h_states {NON_PC0 = 0, PC1, PC6, NBP1,
AMD_FAM14H_STATE_NUM};
static int fam14h_get_count_percent(unsigned int self_id, double *percent,
unsigned int cpu);
static int fam14h_nbp1_count(unsigned int id, unsigned long long *count,
unsigned int cpu);
static cstate_t amd_fam14h_cstates[AMD_FAM14H_STATE_NUM] = {
{
.name = "!PC0",
.desc = N_("Package in sleep state (PC1 or deeper)"),
.id = NON_PC0,
.range = RANGE_PACKAGE,
.get_count_percent = fam14h_get_count_percent,
},
{
.name = "PC1",
.desc = N_("Processor Package C1"),
.id = PC1,
.range = RANGE_PACKAGE,
.get_count_percent = fam14h_get_count_percent,
},
{
.name = "PC6",
.desc = N_("Processor Package C6"),
.id = PC6,
.range = RANGE_PACKAGE,
.get_count_percent = fam14h_get_count_percent,
},
{
.name = "NBP1",
.desc = N_("North Bridge P1 boolean counter (returns 0 or 1)"),
.id = NBP1,
.range = RANGE_PACKAGE,
.get_count = fam14h_nbp1_count,
},
};
static struct pci_access *pci_acc;
static struct pci_dev *amd_fam14h_pci_dev;
static int nbp1_entered;
struct timespec start_time;
static unsigned long long timediff;
#ifdef DEBUG
struct timespec dbg_time;
long dbg_timediff;
#endif
static unsigned long long *previous_count[AMD_FAM14H_STATE_NUM];
static unsigned long long *current_count[AMD_FAM14H_STATE_NUM];
static int amd_fam14h_get_pci_info(struct cstate *state,
unsigned int *pci_offset,
unsigned int *enable_bit,
unsigned int cpu)
{
switch (state->id) {
case NON_PC0:
*enable_bit = PCI_NON_PC0_ENABLE_BIT;
*pci_offset = PCI_NON_PC0_OFFSET;
break;
case PC1:
*enable_bit = PCI_PC1_ENABLE_BIT;
*pci_offset = PCI_PC1_OFFSET;
break;
case PC6:
*enable_bit = PCI_PC6_ENABLE_BIT;
*pci_offset = PCI_PC6_OFFSET;
break;
case NBP1:
*enable_bit = PCI_NBP1_ENTERED_BIT;
*pci_offset = PCI_NBP1_STAT_OFFSET;
break;
default:
return -1;
};
return 0;
}
static int amd_fam14h_init(cstate_t *state, unsigned int cpu)
{
int enable_bit, pci_offset, ret;
uint32_t val;
ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu);
if (ret)
return ret;
/* NBP1 needs extra treating -> write 1 to D18F6x98 bit 1 for init */
if (state->id == NBP1) {
val = pci_read_long(amd_fam14h_pci_dev, pci_offset);
val |= 1 << enable_bit;
val = pci_write_long(amd_fam14h_pci_dev, pci_offset, val);
return ret;
}
/* Enable monitor */
val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG);
dprint("Init %s: read at offset: 0x%x val: %u\n", state->name,
PCI_MONITOR_ENABLE_REG, (unsigned int) val);
val |= 1 << enable_bit;
pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val);
dprint("Init %s: offset: 0x%x enable_bit: %d - val: %u (%u)\n",
state->name, PCI_MONITOR_ENABLE_REG, enable_bit,
(unsigned int) val, cpu);
/* Set counter to zero */
pci_write_long(amd_fam14h_pci_dev, pci_offset, 0);
previous_count[state->id][cpu] = 0;
return 0;
}
static int amd_fam14h_disable(cstate_t *state, unsigned int cpu)
{
int enable_bit, pci_offset, ret;
uint32_t val;
ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu);
if (ret)
return ret;
val = pci_read_long(amd_fam14h_pci_dev, pci_offset);
dprint("%s: offset: 0x%x %u\n", state->name, pci_offset, val);
if (state->id == NBP1) {
/* was the bit whether NBP1 got entered set? */
nbp1_entered = (val & (1 << PCI_NBP1_ACTIVE_BIT)) |
(val & (1 << PCI_NBP1_ENTERED_BIT));
dprint("NBP1 was %sentered - 0x%x - enable_bit: "
"%d - pci_offset: 0x%x\n",
nbp1_entered ? "" : "not ",
val, enable_bit, pci_offset);
return ret;
}
current_count[state->id][cpu] = val;
dprint("%s: Current - %llu (%u)\n", state->name,
current_count[state->id][cpu], cpu);
dprint("%s: Previous - %llu (%u)\n", state->name,
previous_count[state->id][cpu], cpu);
val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG);
val &= ~(1 << enable_bit);
pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val);
return 0;
}
static int fam14h_nbp1_count(unsigned int id, unsigned long long *count,
unsigned int cpu)
{
if (id == NBP1) {
if (nbp1_entered)
*count = 1;
else
*count = 0;
return 0;
}
return -1;
}
static int fam14h_get_count_percent(unsigned int id, double *percent,
unsigned int cpu)
{
unsigned long diff;
if (id >= AMD_FAM14H_STATE_NUM)
return -1;
/* residency count in 80ns -> divide through 12.5 to get us residency */
diff = current_count[id][cpu] - previous_count[id][cpu];
if (timediff == 0)
*percent = 0.0;
else
*percent = 100.0 * diff / timediff / 12.5;
dprint("Timediff: %llu - res~: %lu us - percent: %.2f %%\n",
timediff, diff * 10 / 125, *percent);
return 0;
}
static int amd_fam14h_start(void)
{
int num, cpu;
clock_gettime(CLOCK_REALTIME, &start_time);
for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) {
for (cpu = 0; cpu < cpu_count; cpu++)
amd_fam14h_init(&amd_fam14h_cstates[num], cpu);
}
#ifdef DEBUG
clock_gettime(CLOCK_REALTIME, &dbg_time);
dbg_timediff = timespec_diff_us(start_time, dbg_time);
dprint("Enabling counters took: %lu us\n",
dbg_timediff);
#endif
return 0;
}
static int amd_fam14h_stop(void)
{
int num, cpu;
struct timespec end_time;
clock_gettime(CLOCK_REALTIME, &end_time);
for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) {
for (cpu = 0; cpu < cpu_count; cpu++)
amd_fam14h_disable(&amd_fam14h_cstates[num], cpu);
}
#ifdef DEBUG
clock_gettime(CLOCK_REALTIME, &dbg_time);
dbg_timediff = timespec_diff_us(end_time, dbg_time);
dprint("Disabling counters took: %lu ns\n", dbg_timediff);
#endif
timediff = timespec_diff_us(start_time, end_time);
if (timediff / 1000 > OVERFLOW_MS)
print_overflow_err((unsigned int)timediff / 1000000,
OVERFLOW_MS / 1000);
return 0;
}
static int is_nbp1_capable(void)
{
uint32_t val;
val = pci_read_long(amd_fam14h_pci_dev, PCI_NBP1_CAP_OFFSET);
return val & (1 << 31);
}
struct cpuidle_monitor *amd_fam14h_register(void)
{
int num;
if (cpupower_cpu_info.vendor != X86_VENDOR_AMD)
return NULL;
if (cpupower_cpu_info.family == 0x14)
strncpy(amd_fam14h_monitor.name, "Fam_14h",
MONITOR_NAME_LEN - 1);
else if (cpupower_cpu_info.family == 0x12)
strncpy(amd_fam14h_monitor.name, "Fam_12h",
MONITOR_NAME_LEN - 1);
else
return NULL;
/* We do not alloc for nbp1 machine wide counter */
for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) {
previous_count[num] = calloc(cpu_count,
sizeof(unsigned long long));
current_count[num] = calloc(cpu_count,
sizeof(unsigned long long));
}
/* We need PCI device: Slot 18, Func 6, compare with BKDG
for fam 12h/14h */
amd_fam14h_pci_dev = pci_slot_func_init(&pci_acc, 0x18, 6);
if (amd_fam14h_pci_dev == NULL || pci_acc == NULL)
return NULL;
if (!is_nbp1_capable())
amd_fam14h_monitor.hw_states_num = AMD_FAM14H_STATE_NUM - 1;
amd_fam14h_monitor.name_len = strlen(amd_fam14h_monitor.name);
return &amd_fam14h_monitor;
}
static void amd_fam14h_unregister(void)
{
int num;
for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) {
free(previous_count[num]);
free(current_count[num]);
}
pci_cleanup(pci_acc);
}
struct cpuidle_monitor amd_fam14h_monitor = {
.name = "",
.hw_states = amd_fam14h_cstates,
.hw_states_num = AMD_FAM14H_STATE_NUM,
.start = amd_fam14h_start,
.stop = amd_fam14h_stop,
.do_register = amd_fam14h_register,
.unregister = amd_fam14h_unregister,
.needs_root = 1,
.overflow_s = OVERFLOW_MS / 1000,
};
#endif /* #if defined(__i386__) || defined(__x86_64__) */

View File

@@ -0,0 +1,196 @@
/*
* (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc
*
* Licensed under the terms of the GNU GPL License version 2.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
#include "helpers/sysfs.h"
#include "helpers/helpers.h"
#include "idle_monitor/cpupower-monitor.h"
#define CPUIDLE_STATES_MAX 10
static cstate_t cpuidle_cstates[CPUIDLE_STATES_MAX];
struct cpuidle_monitor cpuidle_sysfs_monitor;
static unsigned long long **previous_count;
static unsigned long long **current_count;
struct timespec start_time;
static unsigned long long timediff;
static int cpuidle_get_count_percent(unsigned int id, double *percent,
unsigned int cpu)
{
unsigned long long statediff = current_count[cpu][id]
- previous_count[cpu][id];
dprint("%s: - diff: %llu - percent: %f (%u)\n",
cpuidle_cstates[id].name, timediff, *percent, cpu);
if (timediff == 0)
*percent = 0.0;
else
*percent = ((100.0 * statediff) / timediff);
dprint("%s: - timediff: %llu - statediff: %llu - percent: %f (%u)\n",
cpuidle_cstates[id].name, timediff, statediff, *percent, cpu);
return 0;
}
static int cpuidle_start(void)
{
int cpu, state;
clock_gettime(CLOCK_REALTIME, &start_time);
for (cpu = 0; cpu < cpu_count; cpu++) {
for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
state++) {
previous_count[cpu][state] =
sysfs_get_idlestate_time(cpu, state);
dprint("CPU %d - State: %d - Val: %llu\n",
cpu, state, previous_count[cpu][state]);
}
};
return 0;
}
static int cpuidle_stop(void)
{
int cpu, state;
struct timespec end_time;
clock_gettime(CLOCK_REALTIME, &end_time);
timediff = timespec_diff_us(start_time, end_time);
for (cpu = 0; cpu < cpu_count; cpu++) {
for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
state++) {
current_count[cpu][state] =
sysfs_get_idlestate_time(cpu, state);
dprint("CPU %d - State: %d - Val: %llu\n",
cpu, state, previous_count[cpu][state]);
}
};
return 0;
}
void fix_up_intel_idle_driver_name(char *tmp, int num)
{
/* fix up cpuidle name for intel idle driver */
if (!strncmp(tmp, "NHM-", 4)) {
switch (num) {
case 1:
strcpy(tmp, "C1");
break;
case 2:
strcpy(tmp, "C3");
break;
case 3:
strcpy(tmp, "C6");
break;
}
} else if (!strncmp(tmp, "SNB-", 4)) {
switch (num) {
case 1:
strcpy(tmp, "C1");
break;
case 2:
strcpy(tmp, "C3");
break;
case 3:
strcpy(tmp, "C6");
break;
case 4:
strcpy(tmp, "C7");
break;
}
} else if (!strncmp(tmp, "ATM-", 4)) {
switch (num) {
case 1:
strcpy(tmp, "C1");
break;
case 2:
strcpy(tmp, "C2");
break;
case 3:
strcpy(tmp, "C4");
break;
case 4:
strcpy(tmp, "C6");
break;
}
}
}
static struct cpuidle_monitor *cpuidle_register(void)
{
int num;
char *tmp;
/* Assume idle state count is the same for all CPUs */
cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
if (cpuidle_sysfs_monitor.hw_states_num <= 0)
return NULL;
for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
tmp = sysfs_get_idlestate_name(0, num);
if (tmp == NULL)
continue;
fix_up_intel_idle_driver_name(tmp, num);
strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
free(tmp);
tmp = sysfs_get_idlestate_desc(0, num);
if (tmp == NULL)
continue;
strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1);
free(tmp);
cpuidle_cstates[num].range = RANGE_THREAD;
cpuidle_cstates[num].id = num;
cpuidle_cstates[num].get_count_percent =
cpuidle_get_count_percent;
};
/* Free this at program termination */
previous_count = malloc(sizeof(long long *) * cpu_count);
current_count = malloc(sizeof(long long *) * cpu_count);
for (num = 0; num < cpu_count; num++) {
previous_count[num] = malloc(sizeof(long long) *
cpuidle_sysfs_monitor.hw_states_num);
current_count[num] = malloc(sizeof(long long) *
cpuidle_sysfs_monitor.hw_states_num);
}
cpuidle_sysfs_monitor.name_len = strlen(cpuidle_sysfs_monitor.name);
return &cpuidle_sysfs_monitor;
}
void cpuidle_unregister(void)
{
int num;
for (num = 0; num < cpu_count; num++) {
free(previous_count[num]);
free(current_count[num]);
}
free(previous_count);
free(current_count);
}
struct cpuidle_monitor cpuidle_sysfs_monitor = {
.name = "Idle_Stats",
.hw_states = cpuidle_cstates,
.start = cpuidle_start,
.stop = cpuidle_stop,
.do_register = cpuidle_register,
.unregister = cpuidle_unregister,
.needs_root = 0,
.overflow_s = UINT_MAX,
};

View File

@@ -0,0 +1,455 @@
/*
* (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
*
* Licensed under the terms of the GNU GPL License version 2.
*
* Output format inspired by Len Brown's <lenb@kernel.org> turbostat tool.
*
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <libgen.h>
#include "idle_monitor/cpupower-monitor.h"
#include "idle_monitor/idle_monitors.h"
#include "helpers/helpers.h"
/* Define pointers to all monitors. */
#define DEF(x) & x ## _monitor ,
struct cpuidle_monitor *all_monitors[] = {
#include "idle_monitors.def"
0
};
static struct cpuidle_monitor *monitors[MONITORS_MAX];
static unsigned int avail_monitors;
static char *progname;
enum operation_mode_e { list = 1, show, show_all };
static int mode;
static int interval = 1;
static char *show_monitors_param;
static struct cpupower_topology cpu_top;
static unsigned int wake_cpus;
/* ToDo: Document this in the manpage */
static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
static void print_wrong_arg_exit(void)
{
printf(_("invalid or unknown argument\n"));
exit(EXIT_FAILURE);
}
long long timespec_diff_us(struct timespec start, struct timespec end)
{
struct timespec temp;
if ((end.tv_nsec - start.tv_nsec) < 0) {
temp.tv_sec = end.tv_sec - start.tv_sec - 1;
temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec - start.tv_sec;
temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000);
}
void print_n_spaces(int n)
{
int x;
for (x = 0; x < n; x++)
printf(" ");
}
/* size of s must be at least n + 1 */
int fill_string_with_spaces(char *s, int n)
{
int len = strlen(s);
if (len > n)
return -1;
for (; len < n; len++)
s[len] = ' ';
s[len] = '\0';
return 0;
}
void print_header(int topology_depth)
{
int unsigned mon;
int state, need_len;
cstate_t s;
char buf[128] = "";
int percent_width = 4;
fill_string_with_spaces(buf, topology_depth * 5 - 1);
printf("%s|", buf);
for (mon = 0; mon < avail_monitors; mon++) {
need_len = monitors[mon]->hw_states_num * (percent_width + 3)
- 1;
if (mon != 0) {
printf("|| ");
need_len--;
}
sprintf(buf, "%s", monitors[mon]->name);
fill_string_with_spaces(buf, need_len);
printf("%s", buf);
}
printf("\n");
if (topology_depth > 2)
printf("PKG |");
if (topology_depth > 1)
printf("CORE|");
if (topology_depth > 0)
printf("CPU |");
for (mon = 0; mon < avail_monitors; mon++) {
if (mon != 0)
printf("|| ");
else
printf(" ");
for (state = 0; state < monitors[mon]->hw_states_num; state++) {
if (state != 0)
printf(" | ");
s = monitors[mon]->hw_states[state];
sprintf(buf, "%s", s.name);
fill_string_with_spaces(buf, percent_width);
printf("%s", buf);
}
printf(" ");
}
printf("\n");
}
void print_results(int topology_depth, int cpu)
{
unsigned int mon;
int state, ret;
double percent;
unsigned long long result;
cstate_t s;
/* Be careful CPUs may got resorted for pkg value do not just use cpu */
if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu))
return;
if (topology_depth > 2)
printf("%4d|", cpu_top.core_info[cpu].pkg);
if (topology_depth > 1)
printf("%4d|", cpu_top.core_info[cpu].core);
if (topology_depth > 0)
printf("%4d|", cpu_top.core_info[cpu].cpu);
for (mon = 0; mon < avail_monitors; mon++) {
if (mon != 0)
printf("||");
for (state = 0; state < monitors[mon]->hw_states_num; state++) {
if (state != 0)
printf("|");
s = monitors[mon]->hw_states[state];
if (s.get_count_percent) {
ret = s.get_count_percent(s.id, &percent,
cpu_top.core_info[cpu].cpu);
if (ret)
printf("******");
else if (percent >= 100.0)
printf("%6.1f", percent);
else
printf("%6.2f", percent);
} else if (s.get_count) {
ret = s.get_count(s.id, &result,
cpu_top.core_info[cpu].cpu);
if (ret)
printf("******");
else
printf("%6llu", result);
} else {
printf(_("Monitor %s, Counter %s has no count "
"function. Implementation error\n"),
monitors[mon]->name, s.name);
exit(EXIT_FAILURE);
}
}
}
/*
* The monitor could still provide useful data, for example
* AMD HW counters partly sit in PCI config space.
* It's up to the monitor plug-in to check .is_online, this one
* is just for additional info.
*/
if (!cpu_top.core_info[cpu].is_online) {
printf(_(" *is offline\n"));
return;
} else
printf("\n");
}
/* param: string passed by -m param (The list of monitors to show)
*
* Monitors must have been registered already, matching monitors
* are picked out and available monitors array is overridden
* with matching ones
*
* Monitors get sorted in the same order the user passes them
*/
static void parse_monitor_param(char *param)
{
unsigned int num;
int mon, hits = 0;
char *tmp = param, *token;
struct cpuidle_monitor *tmp_mons[MONITORS_MAX];
for (mon = 0; mon < MONITORS_MAX; mon++, tmp = NULL) {
token = strtok(tmp, ",");
if (token == NULL)
break;
if (strlen(token) >= MONITOR_NAME_LEN) {
printf(_("%s: max monitor name length"
" (%d) exceeded\n"), token, MONITOR_NAME_LEN);
continue;
}
for (num = 0; num < avail_monitors; num++) {
if (!strcmp(monitors[num]->name, token)) {
dprint("Found requested monitor: %s\n", token);
tmp_mons[hits] = monitors[num];
hits++;
}
}
}
if (hits == 0) {
printf(_("No matching monitor found in %s, "
"try -l option\n"), param);
exit(EXIT_FAILURE);
}
/* Override detected/registerd monitors array with requested one */
memcpy(monitors, tmp_mons,
sizeof(struct cpuidle_monitor *) * MONITORS_MAX);
avail_monitors = hits;
}
void list_monitors(void)
{
unsigned int mon;
int state;
cstate_t s;
for (mon = 0; mon < avail_monitors; mon++) {
printf(_("Monitor \"%s\" (%d states) - Might overflow after %u "
"s\n"),
monitors[mon]->name, monitors[mon]->hw_states_num,
monitors[mon]->overflow_s);
for (state = 0; state < monitors[mon]->hw_states_num; state++) {
s = monitors[mon]->hw_states[state];
/*
* ToDo show more state capabilities:
* percent, time (granlarity)
*/
printf("%s\t[%c] -> %s\n", s.name, range_abbr[s.range],
gettext(s.desc));
}
}
}
int fork_it(char **argv)
{
int status;
unsigned int num;
unsigned long long timediff;
pid_t child_pid;
struct timespec start, end;
child_pid = fork();
clock_gettime(CLOCK_REALTIME, &start);
for (num = 0; num < avail_monitors; num++)
monitors[num]->start();
if (!child_pid) {
/* child */
execvp(argv[0], argv);
} else {
/* parent */
if (child_pid == -1) {
perror("fork");
exit(1);
}
signal(SIGINT, SIG_IGN);
signal(SIGQUIT, SIG_IGN);
if (waitpid(child_pid, &status, 0) == -1) {
perror("wait");
exit(1);
}
}
clock_gettime(CLOCK_REALTIME, &end);
for (num = 0; num < avail_monitors; num++)
monitors[num]->stop();
timediff = timespec_diff_us(start, end);
if (WIFEXITED(status))
printf(_("%s took %.5f seconds and exited with status %d\n"),
argv[0], timediff / (1000.0 * 1000),
WEXITSTATUS(status));
return 0;
}
int do_interval_measure(int i)
{
unsigned int num;
int cpu;
if (wake_cpus)
for (cpu = 0; cpu < cpu_count; cpu++)
bind_cpu(cpu);
for (num = 0; num < avail_monitors; num++) {
dprint("HW C-state residency monitor: %s - States: %d\n",
monitors[num]->name, monitors[num]->hw_states_num);
monitors[num]->start();
}
sleep(i);
if (wake_cpus)
for (cpu = 0; cpu < cpu_count; cpu++)
bind_cpu(cpu);
for (num = 0; num < avail_monitors; num++)
monitors[num]->stop();
return 0;
}
static void cmdline(int argc, char *argv[])
{
int opt;
progname = basename(argv[0]);
while ((opt = getopt(argc, argv, "+lci:m:")) != -1) {
switch (opt) {
case 'l':
if (mode)
print_wrong_arg_exit();
mode = list;
break;
case 'i':
/* only allow -i with -m or no option */
if (mode && mode != show)
print_wrong_arg_exit();
interval = atoi(optarg);
break;
case 'm':
if (mode)
print_wrong_arg_exit();
mode = show;
show_monitors_param = optarg;
break;
case 'c':
wake_cpus = 1;
break;
default:
print_wrong_arg_exit();
}
}
if (!mode)
mode = show_all;
}
int cmd_monitor(int argc, char **argv)
{
unsigned int num;
struct cpuidle_monitor *test_mon;
int cpu;
cmdline(argc, argv);
cpu_count = get_cpu_topology(&cpu_top);
if (cpu_count < 0) {
printf(_("Cannot read number of available processors\n"));
return EXIT_FAILURE;
}
/* Default is: monitor all CPUs */
if (bitmask_isallclear(cpus_chosen))
bitmask_setall(cpus_chosen);
dprint("System has up to %d CPU cores\n", cpu_count);
for (num = 0; all_monitors[num]; num++) {
dprint("Try to register: %s\n", all_monitors[num]->name);
test_mon = all_monitors[num]->do_register();
if (test_mon) {
if (test_mon->needs_root && !run_as_root) {
fprintf(stderr, _("Available monitor %s needs "
"root access\n"), test_mon->name);
continue;
}
monitors[avail_monitors] = test_mon;
dprint("%s registered\n", all_monitors[num]->name);
avail_monitors++;
}
}
if (avail_monitors == 0) {
printf(_("No HW Cstate monitors found\n"));
return 1;
}
if (mode == list) {
list_monitors();
exit(EXIT_SUCCESS);
}
if (mode == show)
parse_monitor_param(show_monitors_param);
dprint("Packages: %d - Cores: %d - CPUs: %d\n",
cpu_top.pkgs, cpu_top.cores, cpu_count);
/*
* if any params left, it must be a command to fork
*/
if (argc - optind)
fork_it(argv + optind);
else
do_interval_measure(interval);
/* ToDo: Topology parsing needs fixing first to do
this more generically */
if (cpu_top.pkgs > 1)
print_header(3);
else
print_header(1);
for (cpu = 0; cpu < cpu_count; cpu++) {
if (cpu_top.pkgs > 1)
print_results(3, cpu);
else
print_results(1, cpu);
}
for (num = 0; num < avail_monitors; num++)
monitors[num]->unregister();
cpu_topology_release(cpu_top);
return 0;
}

View File

@@ -0,0 +1,85 @@
/*
* (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
*
* Licensed under the terms of the GNU GPL License version 2.
*
*/
#ifndef __CPUIDLE_INFO_HW__
#define __CPUIDLE_INFO_HW__
#include <stdarg.h>
#include <time.h>
#include "idle_monitor/idle_monitors.h"
#define MONITORS_MAX 20
#define MONITOR_NAME_LEN 20
#define CSTATE_NAME_LEN 5
#define CSTATE_DESC_LEN 60
int cpu_count;
/* Hard to define the right names ...: */
enum power_range_e {
RANGE_THREAD, /* Lowest in topology hierarcy, AMD: core, Intel: thread
kernel sysfs: cpu */
RANGE_CORE, /* AMD: unit, Intel: core, kernel_sysfs: core_id */
RANGE_PACKAGE, /* Package, processor socket */
RANGE_MACHINE, /* Machine, platform wide */
RANGE_MAX };
typedef struct cstate {
int id;
enum power_range_e range;
char name[CSTATE_NAME_LEN];
char desc[CSTATE_DESC_LEN];
/* either provide a percentage or a general count */
int (*get_count_percent)(unsigned int self_id, double *percent,
unsigned int cpu);
int (*get_count)(unsigned int self_id, unsigned long long *count,
unsigned int cpu);
} cstate_t;
struct cpuidle_monitor {
/* Name must not contain whitespaces */
char name[MONITOR_NAME_LEN];
int name_len;
int hw_states_num;
cstate_t *hw_states;
int (*start) (void);
int (*stop) (void);
struct cpuidle_monitor* (*do_register) (void);
void (*unregister)(void);
unsigned int overflow_s;
int needs_root;
};
extern long long timespec_diff_us(struct timespec start, struct timespec end);
#define print_overflow_err(mes, ov) \
{ \
fprintf(stderr, gettext("Measure took %u seconds, but registers could " \
"overflow at %u seconds, results " \
"could be inaccurate\n"), mes, ov); \
}
/* Taken over from x86info project sources -> return 0 on success */
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
static inline int bind_cpu(int cpu)
{
cpu_set_t set;
if (sched_getaffinity(getpid(), sizeof(set), &set) == 0) {
CPU_ZERO(&set);
CPU_SET(cpu, &set);
return sched_setaffinity(getpid(), sizeof(set), &set);
}
return 1;
}
#endif /* __CPUIDLE_INFO_HW__ */

View File

@@ -0,0 +1,7 @@
#if defined(__i386__) || defined(__x86_64__)
DEF(amd_fam14h)
DEF(intel_nhm)
DEF(intel_snb)
DEF(mperf)
#endif
DEF(cpuidle_sysfs)

View File

@@ -0,0 +1,18 @@
/*
* (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
*
* Licensed under the terms of the GNU GPL License version 2.
*
* Based on the idea from Michael Matz <matz@suse.de>
*
*/
#ifndef _CPUIDLE_IDLE_MONITORS_H_
#define _CPUIDLE_IDLE_MONITORS_H_
#define DEF(x) extern struct cpuidle_monitor x ##_monitor;
#include "idle_monitors.def"
#undef DEF
extern struct cpuidle_monitor *all_monitors[];
#endif /* _CPUIDLE_IDLE_MONITORS_H_ */

View File

@@ -0,0 +1,338 @@
/*
* (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
*
* Licensed under the terms of the GNU GPL License version 2.
*/
#if defined(__i386__) || defined(__x86_64__)
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <cpufreq.h>
#include "helpers/helpers.h"
#include "idle_monitor/cpupower-monitor.h"
#define MSR_APERF 0xE8
#define MSR_MPERF 0xE7
#define MSR_TSC 0x10
#define MSR_AMD_HWCR 0xc0010015
enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
static int mperf_get_count_percent(unsigned int self_id, double *percent,
unsigned int cpu);
static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
unsigned int cpu);
static struct timespec time_start, time_end;
static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
{
.name = "C0",
.desc = N_("Processor Core not idle"),
.id = C0,
.range = RANGE_THREAD,
.get_count_percent = mperf_get_count_percent,
},
{
.name = "Cx",
.desc = N_("Processor Core in an idle state"),
.id = Cx,
.range = RANGE_THREAD,
.get_count_percent = mperf_get_count_percent,
},
{
.name = "Freq",
.desc = N_("Average Frequency (including boost) in MHz"),
.id = AVG_FREQ,
.range = RANGE_THREAD,
.get_count = mperf_get_count_freq,
},
};
enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF };
static int max_freq_mode;
/*
* The max frequency mperf is ticking at (in C0), either retrieved via:
* 1) calculated after measurements if we know TSC ticks at mperf/P0 frequency
* 2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time
* 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen)
*/
static unsigned long max_frequency;
static unsigned long long tsc_at_measure_start;
static unsigned long long tsc_at_measure_end;
static unsigned long long *mperf_previous_count;
static unsigned long long *aperf_previous_count;
static unsigned long long *mperf_current_count;
static unsigned long long *aperf_current_count;
/* valid flag for all CPUs. If a MSR read failed it will be zero */
static int *is_valid;
static int mperf_get_tsc(unsigned long long *tsc)
{
int ret;
ret = read_msr(0, MSR_TSC, tsc);
if (ret)
dprint("Reading TSC MSR failed, returning %llu\n", *tsc);
return ret;
}
static int mperf_init_stats(unsigned int cpu)
{
unsigned long long val;
int ret;
ret = read_msr(cpu, MSR_APERF, &val);
aperf_previous_count[cpu] = val;
ret |= read_msr(cpu, MSR_MPERF, &val);
mperf_previous_count[cpu] = val;
is_valid[cpu] = !ret;
return 0;
}
static int mperf_measure_stats(unsigned int cpu)
{
unsigned long long val;
int ret;
ret = read_msr(cpu, MSR_APERF, &val);
aperf_current_count[cpu] = val;
ret |= read_msr(cpu, MSR_MPERF, &val);
mperf_current_count[cpu] = val;
is_valid[cpu] = !ret;
return 0;
}
static int mperf_get_count_percent(unsigned int id, double *percent,
unsigned int cpu)
{
unsigned long long aperf_diff, mperf_diff, tsc_diff;
unsigned long long timediff;
if (!is_valid[cpu])
return -1;
if (id != C0 && id != Cx)
return -1;
mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
if (max_freq_mode == MAX_FREQ_TSC_REF) {
tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
*percent = 100.0 * mperf_diff / tsc_diff;
dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
mperf_cstates[id].name, mperf_diff, tsc_diff);
} else if (max_freq_mode == MAX_FREQ_SYSFS) {
timediff = timespec_diff_us(time_start, time_end);
*percent = 100.0 * mperf_diff / timediff;
dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
mperf_cstates[id].name, mperf_diff, timediff);
} else
return -1;
if (id == Cx)
*percent = 100.0 - *percent;
dprint("%s: previous: %llu - current: %llu - (%u)\n",
mperf_cstates[id].name, mperf_diff, aperf_diff, cpu);
dprint("%s: %f\n", mperf_cstates[id].name, *percent);
return 0;
}
static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
unsigned int cpu)
{
unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff;
if (id != AVG_FREQ)
return 1;
if (!is_valid[cpu])
return -1;
mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
if (max_freq_mode == MAX_FREQ_TSC_REF) {
/* Calculate max_freq from TSC count */
tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
time_diff = timespec_diff_us(time_start, time_end);
max_frequency = tsc_diff / time_diff;
}
*count = max_frequency * ((double)aperf_diff / mperf_diff);
dprint("%s: Average freq based on %s maximum frequency:\n",
mperf_cstates[id].name,
(max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read");
dprint("%max_frequency: %lu", max_frequency);
dprint("aperf_diff: %llu\n", aperf_diff);
dprint("mperf_diff: %llu\n", mperf_diff);
dprint("avg freq: %llu\n", *count);
return 0;
}
static int mperf_start(void)
{
int cpu;
unsigned long long dbg;
clock_gettime(CLOCK_REALTIME, &time_start);
mperf_get_tsc(&tsc_at_measure_start);
for (cpu = 0; cpu < cpu_count; cpu++)
mperf_init_stats(cpu);
mperf_get_tsc(&dbg);
dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
return 0;
}
static int mperf_stop(void)
{
unsigned long long dbg;
int cpu;
for (cpu = 0; cpu < cpu_count; cpu++)
mperf_measure_stats(cpu);
mperf_get_tsc(&tsc_at_measure_end);
clock_gettime(CLOCK_REALTIME, &time_end);
mperf_get_tsc(&dbg);
dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
return 0;
}
/*
* Mperf register is defined to tick at P0 (maximum) frequency
*
* Instead of reading out P0 which can be tricky to read out from HW,
* we use TSC counter if it reliably ticks at P0/mperf frequency.
*
* Still try to fall back to:
* /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
* on older Intel HW without invariant TSC feature.
* Or on AMD machines where TSC does not tick at P0 (do not exist yet, but
* it's still double checked (MSR_AMD_HWCR)).
*
* On these machines the user would still get useful mperf
* stats when acpi-cpufreq driver is loaded.
*/
static int init_maxfreq_mode(void)
{
int ret;
unsigned long long hwcr;
unsigned long min;
if (!cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC)
goto use_sysfs;
if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
/* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
* freq.
* A test whether hwcr is accessable/available would be:
* (cpupower_cpu_info.family > 0x10 ||
* cpupower_cpu_info.family == 0x10 &&
* cpupower_cpu_info.model >= 0x2))
* This should be the case for all aperf/mperf
* capable AMD machines and is therefore safe to test here.
* Compare with Linus kernel git commit: acf01734b1747b1ec4
*/
ret = read_msr(0, MSR_AMD_HWCR, &hwcr);
/*
* If the MSR read failed, assume a Xen system that did
* not explicitly provide access to it and assume TSC works
*/
if (ret != 0) {
dprint("TSC read 0x%x failed - assume TSC working\n",
MSR_AMD_HWCR);
return 0;
} else if (1 & (hwcr >> 24)) {
max_freq_mode = MAX_FREQ_TSC_REF;
return 0;
} else { /* Use sysfs max frequency if available */ }
} else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) {
/*
* On Intel we assume mperf (in C0) is ticking at same
* rate than TSC
*/
max_freq_mode = MAX_FREQ_TSC_REF;
return 0;
}
use_sysfs:
if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
dprint("Cannot retrieve max freq from cpufreq kernel "
"subsystem\n");
return -1;
}
max_freq_mode = MAX_FREQ_SYSFS;
return 0;
}
/*
* This monitor provides:
*
* 1) Average frequency a CPU resided in
* This always works if the CPU has aperf/mperf capabilities
*
* 2) C0 and Cx (any sleep state) time a CPU resided in
* Works if mperf timer stops ticking in sleep states which
* seem to be the case on all current HW.
* Both is directly retrieved from HW registers and is independent
* from kernel statistics.
*/
struct cpuidle_monitor mperf_monitor;
struct cpuidle_monitor *mperf_register(void)
{
if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
return NULL;
if (init_maxfreq_mode())
return NULL;
/* Free this at program termination */
is_valid = calloc(cpu_count, sizeof(int));
mperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
aperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
mperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
aperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
mperf_monitor.name_len = strlen(mperf_monitor.name);
return &mperf_monitor;
}
void mperf_unregister(void)
{
free(mperf_previous_count);
free(aperf_previous_count);
free(mperf_current_count);
free(aperf_current_count);
free(is_valid);
}
struct cpuidle_monitor mperf_monitor = {
.name = "Mperf",
.hw_states_num = MPERF_CSTATE_COUNT,
.hw_states = mperf_cstates,
.start = mperf_start,
.stop = mperf_stop,
.do_register = mperf_register,
.unregister = mperf_unregister,
.needs_root = 1,
.overflow_s = 922000000 /* 922337203 seconds TSC overflow
at 20GHz */
};
#endif /* #if defined(__i386__) || defined(__x86_64__) */

View File

@@ -0,0 +1,216 @@
/*
* (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
*
* Licensed under the terms of the GNU GPL License version 2.
*
* Based on Len Brown's <lenb@kernel.org> turbostat tool.
*/
#if defined(__i386__) || defined(__x86_64__)
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "helpers/helpers.h"
#include "idle_monitor/cpupower-monitor.h"
#define MSR_PKG_C3_RESIDENCY 0x3F8
#define MSR_PKG_C6_RESIDENCY 0x3F9
#define MSR_CORE_C3_RESIDENCY 0x3FC
#define MSR_CORE_C6_RESIDENCY 0x3FD
#define MSR_TSC 0x10
#define NHM_CSTATE_COUNT 4
enum intel_nhm_id { C3 = 0, C6, PC3, PC6, TSC = 0xFFFF };
static int nhm_get_count_percent(unsigned int self_id, double *percent,
unsigned int cpu);
static cstate_t nhm_cstates[NHM_CSTATE_COUNT] = {
{
.name = "C3",
.desc = N_("Processor Core C3"),
.id = C3,
.range = RANGE_CORE,
.get_count_percent = nhm_get_count_percent,
},
{
.name = "C6",
.desc = N_("Processor Core C6"),
.id = C6,
.range = RANGE_CORE,
.get_count_percent = nhm_get_count_percent,
},
{
.name = "PC3",
.desc = N_("Processor Package C3"),
.id = PC3,
.range = RANGE_PACKAGE,
.get_count_percent = nhm_get_count_percent,
},
{
.name = "PC6",
.desc = N_("Processor Package C6"),
.id = PC6,
.range = RANGE_PACKAGE,
.get_count_percent = nhm_get_count_percent,
},
};
static unsigned long long tsc_at_measure_start;
static unsigned long long tsc_at_measure_end;
static unsigned long long *previous_count[NHM_CSTATE_COUNT];
static unsigned long long *current_count[NHM_CSTATE_COUNT];
/* valid flag for all CPUs. If a MSR read failed it will be zero */
static int *is_valid;
static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val,
unsigned int cpu)
{
int msr;
switch (id) {
case C3:
msr = MSR_CORE_C3_RESIDENCY;
break;
case C6:
msr = MSR_CORE_C6_RESIDENCY;
break;
case PC3:
msr = MSR_PKG_C3_RESIDENCY;
break;
case PC6:
msr = MSR_PKG_C6_RESIDENCY;
break;
case TSC:
msr = MSR_TSC;
break;
default:
return -1;
};
if (read_msr(cpu, msr, val))
return -1;
return 0;
}
static int nhm_get_count_percent(unsigned int id, double *percent,
unsigned int cpu)
{
*percent = 0.0;
if (!is_valid[cpu])
return -1;
*percent = (100.0 *
(current_count[id][cpu] - previous_count[id][cpu])) /
(tsc_at_measure_end - tsc_at_measure_start);
dprint("%s: previous: %llu - current: %llu - (%u)\n",
nhm_cstates[id].name, previous_count[id][cpu],
current_count[id][cpu], cpu);
dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
nhm_cstates[id].name,
(unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
current_count[id][cpu] - previous_count[id][cpu],
*percent, cpu);
return 0;
}
static int nhm_start(void)
{
int num, cpu;
unsigned long long dbg, val;
nhm_get_count(TSC, &tsc_at_measure_start, 0);
for (num = 0; num < NHM_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
is_valid[cpu] = !nhm_get_count(num, &val, cpu);
previous_count[num][cpu] = val;
}
}
nhm_get_count(TSC, &dbg, 0);
dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
return 0;
}
static int nhm_stop(void)
{
unsigned long long val;
unsigned long long dbg;
int num, cpu;
nhm_get_count(TSC, &tsc_at_measure_end, 0);
for (num = 0; num < NHM_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
is_valid[cpu] = !nhm_get_count(num, &val, cpu);
current_count[num][cpu] = val;
}
}
nhm_get_count(TSC, &dbg, 0);
dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
return 0;
}
struct cpuidle_monitor intel_nhm_monitor;
struct cpuidle_monitor *intel_nhm_register(void)
{
int num;
if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL)
return NULL;
if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC))
return NULL;
if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
return NULL;
/* Free this at program termination */
is_valid = calloc(cpu_count, sizeof(int));
for (num = 0; num < NHM_CSTATE_COUNT; num++) {
previous_count[num] = calloc(cpu_count,
sizeof(unsigned long long));
current_count[num] = calloc(cpu_count,
sizeof(unsigned long long));
}
intel_nhm_monitor.name_len = strlen(intel_nhm_monitor.name);
return &intel_nhm_monitor;
}
void intel_nhm_unregister(void)
{
int num;
for (num = 0; num < NHM_CSTATE_COUNT; num++) {
free(previous_count[num]);
free(current_count[num]);
}
free(is_valid);
}
struct cpuidle_monitor intel_nhm_monitor = {
.name = "Nehalem",
.hw_states_num = NHM_CSTATE_COUNT,
.hw_states = nhm_cstates,
.start = nhm_start,
.stop = nhm_stop,
.do_register = intel_nhm_register,
.unregister = intel_nhm_unregister,
.needs_root = 1,
.overflow_s = 922000000 /* 922337203 seconds TSC overflow
at 20GHz */
};
#endif

View File

@@ -0,0 +1,196 @@
/*
* (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
*
* Licensed under the terms of the GNU GPL License version 2.
*
* Based on Len Brown's <lenb@kernel.org> turbostat tool.
*/
#if defined(__i386__) || defined(__x86_64__)
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "helpers/helpers.h"
#include "idle_monitor/cpupower-monitor.h"
#define MSR_PKG_C2_RESIDENCY 0x60D
#define MSR_PKG_C7_RESIDENCY 0x3FA
#define MSR_CORE_C7_RESIDENCY 0x3FE
#define MSR_TSC 0x10
enum intel_snb_id { C7 = 0, PC2, PC7, SNB_CSTATE_COUNT, TSC = 0xFFFF };
static int snb_get_count_percent(unsigned int self_id, double *percent,
unsigned int cpu);
static cstate_t snb_cstates[SNB_CSTATE_COUNT] = {
{
.name = "C7",
.desc = N_("Processor Core C7"),
.id = C7,
.range = RANGE_CORE,
.get_count_percent = snb_get_count_percent,
},
{
.name = "PC2",
.desc = N_("Processor Package C2"),
.id = PC2,
.range = RANGE_PACKAGE,
.get_count_percent = snb_get_count_percent,
},
{
.name = "PC7",
.desc = N_("Processor Package C7"),
.id = PC7,
.range = RANGE_PACKAGE,
.get_count_percent = snb_get_count_percent,
},
};
static unsigned long long tsc_at_measure_start;
static unsigned long long tsc_at_measure_end;
static unsigned long long *previous_count[SNB_CSTATE_COUNT];
static unsigned long long *current_count[SNB_CSTATE_COUNT];
/* valid flag for all CPUs. If a MSR read failed it will be zero */
static int *is_valid;
static int snb_get_count(enum intel_snb_id id, unsigned long long *val,
unsigned int cpu)
{
int msr;
switch (id) {
case C7:
msr = MSR_CORE_C7_RESIDENCY;
break;
case PC2:
msr = MSR_PKG_C2_RESIDENCY;
break;
case PC7:
msr = MSR_PKG_C7_RESIDENCY;
break;
case TSC:
msr = MSR_TSC;
break;
default:
return -1;
};
if (read_msr(cpu, msr, val))
return -1;
return 0;
}
static int snb_get_count_percent(unsigned int id, double *percent,
unsigned int cpu)
{
*percent = 0.0;
if (!is_valid[cpu])
return -1;
*percent = (100.0 *
(current_count[id][cpu] - previous_count[id][cpu])) /
(tsc_at_measure_end - tsc_at_measure_start);
dprint("%s: previous: %llu - current: %llu - (%u)\n",
snb_cstates[id].name, previous_count[id][cpu],
current_count[id][cpu], cpu);
dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
snb_cstates[id].name,
(unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
current_count[id][cpu] - previous_count[id][cpu],
*percent, cpu);
return 0;
}
static int snb_start(void)
{
int num, cpu;
unsigned long long val;
for (num = 0; num < SNB_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
snb_get_count(num, &val, cpu);
previous_count[num][cpu] = val;
}
}
snb_get_count(TSC, &tsc_at_measure_start, 0);
return 0;
}
static int snb_stop(void)
{
unsigned long long val;
int num, cpu;
snb_get_count(TSC, &tsc_at_measure_end, 0);
for (num = 0; num < SNB_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
is_valid[cpu] = !snb_get_count(num, &val, cpu);
current_count[num][cpu] = val;
}
}
return 0;
}
struct cpuidle_monitor intel_snb_monitor;
static struct cpuidle_monitor *snb_register(void)
{
int num;
if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL
|| cpupower_cpu_info.family != 6)
return NULL;
switch (cpupower_cpu_info.model) {
case 0x2A: /* SNB */
case 0x2D: /* SNB Xeon */
case 0x3A: /* IVB */
case 0x3E: /* IVB Xeon */
break;
default:
return NULL;
}
is_valid = calloc(cpu_count, sizeof(int));
for (num = 0; num < SNB_CSTATE_COUNT; num++) {
previous_count[num] = calloc(cpu_count,
sizeof(unsigned long long));
current_count[num] = calloc(cpu_count,
sizeof(unsigned long long));
}
intel_snb_monitor.name_len = strlen(intel_snb_monitor.name);
return &intel_snb_monitor;
}
void snb_unregister(void)
{
int num;
free(is_valid);
for (num = 0; num < SNB_CSTATE_COUNT; num++) {
free(previous_count[num]);
free(current_count[num]);
}
}
struct cpuidle_monitor intel_snb_monitor = {
.name = "SandyBridge",
.hw_states = snb_cstates,
.hw_states_num = SNB_CSTATE_COUNT,
.start = snb_start,
.stop = snb_stop,
.do_register = snb_register,
.unregister = snb_unregister,
.needs_root = 1,
.overflow_s = 922000000 /* 922337203 seconds TSC overflow
at 20GHz */
};
#endif /* defined(__i386__) || defined(__x86_64__) */