tracing: make the function profiler per cpu
Impact: speed enhancement By making the function profiler record in per cpu data we not only get better readings, avoid races, we also do not have to take any locks. Signed-off-by: Steven Rostedt <srostedt@redhat.com>
This commit is contained in:
parent
0706f1c48c
commit
cafb168a1c
1 changed files with 130 additions and 69 deletions
|
@ -257,28 +257,28 @@ struct ftrace_profile_page {
|
||||||
struct ftrace_profile records[];
|
struct ftrace_profile records[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ftrace_profile_stat {
|
||||||
|
atomic_t disabled;
|
||||||
|
struct hlist_head *hash;
|
||||||
|
struct ftrace_profile_page *pages;
|
||||||
|
struct ftrace_profile_page *start;
|
||||||
|
struct tracer_stat stat;
|
||||||
|
};
|
||||||
|
|
||||||
#define PROFILE_RECORDS_SIZE \
|
#define PROFILE_RECORDS_SIZE \
|
||||||
(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
|
(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
|
||||||
|
|
||||||
#define PROFILES_PER_PAGE \
|
#define PROFILES_PER_PAGE \
|
||||||
(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
|
(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
|
||||||
|
|
||||||
/* TODO: make these percpu, to prevent cache line bouncing */
|
|
||||||
static struct ftrace_profile_page *profile_pages_start;
|
|
||||||
static struct ftrace_profile_page *profile_pages;
|
|
||||||
|
|
||||||
static struct hlist_head *ftrace_profile_hash;
|
|
||||||
static int ftrace_profile_bits;
|
static int ftrace_profile_bits;
|
||||||
static int ftrace_profile_enabled;
|
static int ftrace_profile_enabled;
|
||||||
static DEFINE_MUTEX(ftrace_profile_lock);
|
static DEFINE_MUTEX(ftrace_profile_lock);
|
||||||
|
|
||||||
static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable);
|
static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
|
||||||
|
|
||||||
#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
|
#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
|
||||||
|
|
||||||
static raw_spinlock_t ftrace_profile_rec_lock =
|
|
||||||
(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
|
|
||||||
|
|
||||||
static void *
|
static void *
|
||||||
function_stat_next(void *v, int idx)
|
function_stat_next(void *v, int idx)
|
||||||
{
|
{
|
||||||
|
@ -303,7 +303,13 @@ function_stat_next(void *v, int idx)
|
||||||
|
|
||||||
static void *function_stat_start(struct tracer_stat *trace)
|
static void *function_stat_start(struct tracer_stat *trace)
|
||||||
{
|
{
|
||||||
return function_stat_next(&profile_pages_start->records[0], 0);
|
struct ftrace_profile_stat *stat =
|
||||||
|
container_of(trace, struct ftrace_profile_stat, stat);
|
||||||
|
|
||||||
|
if (!stat || !stat->start)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return function_stat_next(&stat->start->records[0], 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||||
|
@ -374,20 +380,11 @@ static int function_stat_show(struct seq_file *m, void *v)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct tracer_stat function_stats = {
|
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
|
||||||
.name = "functions",
|
|
||||||
.stat_start = function_stat_start,
|
|
||||||
.stat_next = function_stat_next,
|
|
||||||
.stat_cmp = function_stat_cmp,
|
|
||||||
.stat_headers = function_stat_headers,
|
|
||||||
.stat_show = function_stat_show
|
|
||||||
};
|
|
||||||
|
|
||||||
static void ftrace_profile_reset(void)
|
|
||||||
{
|
{
|
||||||
struct ftrace_profile_page *pg;
|
struct ftrace_profile_page *pg;
|
||||||
|
|
||||||
pg = profile_pages = profile_pages_start;
|
pg = stat->pages = stat->start;
|
||||||
|
|
||||||
while (pg) {
|
while (pg) {
|
||||||
memset(pg->records, 0, PROFILE_RECORDS_SIZE);
|
memset(pg->records, 0, PROFILE_RECORDS_SIZE);
|
||||||
|
@ -395,24 +392,24 @@ static void ftrace_profile_reset(void)
|
||||||
pg = pg->next;
|
pg = pg->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(ftrace_profile_hash, 0,
|
memset(stat->hash, 0,
|
||||||
FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
|
FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
|
||||||
}
|
}
|
||||||
|
|
||||||
int ftrace_profile_pages_init(void)
|
int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
|
||||||
{
|
{
|
||||||
struct ftrace_profile_page *pg;
|
struct ftrace_profile_page *pg;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* If we already allocated, do nothing */
|
/* If we already allocated, do nothing */
|
||||||
if (profile_pages)
|
if (stat->pages)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
profile_pages = (void *)get_zeroed_page(GFP_KERNEL);
|
stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
|
||||||
if (!profile_pages)
|
if (!stat->pages)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
pg = profile_pages_start = profile_pages;
|
pg = stat->start = stat->pages;
|
||||||
|
|
||||||
/* allocate 10 more pages to start */
|
/* allocate 10 more pages to start */
|
||||||
for (i = 0; i < 10; i++) {
|
for (i = 0; i < 10; i++) {
|
||||||
|
@ -430,13 +427,16 @@ int ftrace_profile_pages_init(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ftrace_profile_init(void)
|
static int ftrace_profile_init_cpu(int cpu)
|
||||||
{
|
{
|
||||||
|
struct ftrace_profile_stat *stat;
|
||||||
int size;
|
int size;
|
||||||
|
|
||||||
if (ftrace_profile_hash) {
|
stat = &per_cpu(ftrace_profile_stats, cpu);
|
||||||
|
|
||||||
|
if (stat->hash) {
|
||||||
/* If the profile is already created, simply reset it */
|
/* If the profile is already created, simply reset it */
|
||||||
ftrace_profile_reset();
|
ftrace_profile_reset(stat);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -446,29 +446,45 @@ static int ftrace_profile_init(void)
|
||||||
*/
|
*/
|
||||||
size = FTRACE_PROFILE_HASH_SIZE;
|
size = FTRACE_PROFILE_HASH_SIZE;
|
||||||
|
|
||||||
ftrace_profile_hash =
|
stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
|
||||||
kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
|
|
||||||
|
|
||||||
if (!ftrace_profile_hash)
|
if (!stat->hash)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
size--;
|
if (!ftrace_profile_bits) {
|
||||||
|
size--;
|
||||||
|
|
||||||
for (; size; size >>= 1)
|
for (; size; size >>= 1)
|
||||||
ftrace_profile_bits++;
|
ftrace_profile_bits++;
|
||||||
|
}
|
||||||
|
|
||||||
/* Preallocate a few pages */
|
/* Preallocate a few pages */
|
||||||
if (ftrace_profile_pages_init() < 0) {
|
if (ftrace_profile_pages_init(stat) < 0) {
|
||||||
kfree(ftrace_profile_hash);
|
kfree(stat->hash);
|
||||||
ftrace_profile_hash = NULL;
|
stat->hash = NULL;
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ftrace_profile_init(void)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu) {
|
||||||
|
ret = ftrace_profile_init_cpu(cpu);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/* interrupts must be disabled */
|
/* interrupts must be disabled */
|
||||||
static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
|
static struct ftrace_profile *
|
||||||
|
ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
|
||||||
{
|
{
|
||||||
struct ftrace_profile *rec;
|
struct ftrace_profile *rec;
|
||||||
struct hlist_head *hhd;
|
struct hlist_head *hhd;
|
||||||
|
@ -476,7 +492,7 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
|
||||||
unsigned long key;
|
unsigned long key;
|
||||||
|
|
||||||
key = hash_long(ip, ftrace_profile_bits);
|
key = hash_long(ip, ftrace_profile_bits);
|
||||||
hhd = &ftrace_profile_hash[key];
|
hhd = &stat->hash[key];
|
||||||
|
|
||||||
if (hlist_empty(hhd))
|
if (hlist_empty(hhd))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -489,52 +505,50 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ftrace_add_profile(struct ftrace_profile *rec)
|
static void ftrace_add_profile(struct ftrace_profile_stat *stat,
|
||||||
|
struct ftrace_profile *rec)
|
||||||
{
|
{
|
||||||
unsigned long key;
|
unsigned long key;
|
||||||
|
|
||||||
key = hash_long(rec->ip, ftrace_profile_bits);
|
key = hash_long(rec->ip, ftrace_profile_bits);
|
||||||
hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
|
hlist_add_head_rcu(&rec->node, &stat->hash[key]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Interrupts must be disabled calling this */
|
/* Interrupts must be disabled calling this */
|
||||||
static struct ftrace_profile *
|
static struct ftrace_profile *
|
||||||
ftrace_profile_alloc(unsigned long ip, bool alloc_safe)
|
ftrace_profile_alloc(struct ftrace_profile_stat *stat,
|
||||||
|
unsigned long ip, bool alloc_safe)
|
||||||
{
|
{
|
||||||
struct ftrace_profile *rec = NULL;
|
struct ftrace_profile *rec = NULL;
|
||||||
|
|
||||||
/* prevent recursion */
|
/* prevent recursion */
|
||||||
if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1)
|
if (atomic_inc_return(&stat->disabled) != 1)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
__raw_spin_lock(&ftrace_profile_rec_lock);
|
|
||||||
|
|
||||||
/* Try to always keep another page available */
|
/* Try to always keep another page available */
|
||||||
if (!profile_pages->next && alloc_safe)
|
if (!stat->pages->next && alloc_safe)
|
||||||
profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
|
stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to find the function again since another
|
* Try to find the function again since another
|
||||||
* task on another CPU could have added it
|
* task on another CPU could have added it
|
||||||
*/
|
*/
|
||||||
rec = ftrace_find_profiled_func(ip);
|
rec = ftrace_find_profiled_func(stat, ip);
|
||||||
if (rec)
|
if (rec)
|
||||||
goto out_unlock;
|
goto out;
|
||||||
|
|
||||||
if (profile_pages->index == PROFILES_PER_PAGE) {
|
if (stat->pages->index == PROFILES_PER_PAGE) {
|
||||||
if (!profile_pages->next)
|
if (!stat->pages->next)
|
||||||
goto out_unlock;
|
goto out;
|
||||||
profile_pages = profile_pages->next;
|
stat->pages = stat->pages->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
rec = &profile_pages->records[profile_pages->index++];
|
rec = &stat->pages->records[stat->pages->index++];
|
||||||
rec->ip = ip;
|
rec->ip = ip;
|
||||||
ftrace_add_profile(rec);
|
ftrace_add_profile(stat, rec);
|
||||||
|
|
||||||
out_unlock:
|
|
||||||
__raw_spin_unlock(&ftrace_profile_rec_lock);
|
|
||||||
out:
|
out:
|
||||||
atomic_dec(&__get_cpu_var(ftrace_profile_disable));
|
atomic_dec(&stat->disabled);
|
||||||
|
|
||||||
return rec;
|
return rec;
|
||||||
}
|
}
|
||||||
|
@ -552,6 +566,7 @@ static bool ftrace_safe_to_allocate(void)
|
||||||
static void
|
static void
|
||||||
function_profile_call(unsigned long ip, unsigned long parent_ip)
|
function_profile_call(unsigned long ip, unsigned long parent_ip)
|
||||||
{
|
{
|
||||||
|
struct ftrace_profile_stat *stat;
|
||||||
struct ftrace_profile *rec;
|
struct ftrace_profile *rec;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool alloc_safe;
|
bool alloc_safe;
|
||||||
|
@ -562,9 +577,14 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
|
||||||
alloc_safe = ftrace_safe_to_allocate();
|
alloc_safe = ftrace_safe_to_allocate();
|
||||||
|
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
rec = ftrace_find_profiled_func(ip);
|
|
||||||
|
stat = &__get_cpu_var(ftrace_profile_stats);
|
||||||
|
if (!stat->hash)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
rec = ftrace_find_profiled_func(stat, ip);
|
||||||
if (!rec) {
|
if (!rec) {
|
||||||
rec = ftrace_profile_alloc(ip, alloc_safe);
|
rec = ftrace_profile_alloc(stat, ip, alloc_safe);
|
||||||
if (!rec)
|
if (!rec)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -583,13 +603,19 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace)
|
||||||
|
|
||||||
static void profile_graph_return(struct ftrace_graph_ret *trace)
|
static void profile_graph_return(struct ftrace_graph_ret *trace)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
struct ftrace_profile_stat *stat;
|
||||||
struct ftrace_profile *rec;
|
struct ftrace_profile *rec;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
rec = ftrace_find_profiled_func(trace->func);
|
stat = &__get_cpu_var(ftrace_profile_stats);
|
||||||
|
if (!stat->hash)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
rec = ftrace_find_profiled_func(stat, trace->func);
|
||||||
if (rec)
|
if (rec)
|
||||||
rec->time += trace->rettime - trace->calltime;
|
rec->time += trace->rettime - trace->calltime;
|
||||||
|
out:
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -687,16 +713,51 @@ static const struct file_operations ftrace_profile_fops = {
|
||||||
.write = ftrace_profile_write,
|
.write = ftrace_profile_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* used to initialize the real stat files */
|
||||||
|
static struct tracer_stat function_stats __initdata = {
|
||||||
|
.name = "functions",
|
||||||
|
.stat_start = function_stat_start,
|
||||||
|
.stat_next = function_stat_next,
|
||||||
|
.stat_cmp = function_stat_cmp,
|
||||||
|
.stat_headers = function_stat_headers,
|
||||||
|
.stat_show = function_stat_show
|
||||||
|
};
|
||||||
|
|
||||||
static void ftrace_profile_debugfs(struct dentry *d_tracer)
|
static void ftrace_profile_debugfs(struct dentry *d_tracer)
|
||||||
{
|
{
|
||||||
|
struct ftrace_profile_stat *stat;
|
||||||
struct dentry *entry;
|
struct dentry *entry;
|
||||||
|
char *name;
|
||||||
int ret;
|
int ret;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
ret = register_stat_tracer(&function_stats);
|
for_each_possible_cpu(cpu) {
|
||||||
if (ret) {
|
stat = &per_cpu(ftrace_profile_stats, cpu);
|
||||||
pr_warning("Warning: could not register "
|
|
||||||
"function stats\n");
|
/* allocate enough for function name + cpu number */
|
||||||
return;
|
name = kmalloc(32, GFP_KERNEL);
|
||||||
|
if (!name) {
|
||||||
|
/*
|
||||||
|
* The files created are permanent, if something happens
|
||||||
|
* we still do not free memory.
|
||||||
|
*/
|
||||||
|
kfree(stat);
|
||||||
|
WARN(1,
|
||||||
|
"Could not allocate stat file for cpu %d\n",
|
||||||
|
cpu);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
stat->stat = function_stats;
|
||||||
|
snprintf(name, 32, "function%d", cpu);
|
||||||
|
stat->stat.name = name;
|
||||||
|
ret = register_stat_tracer(&stat->stat);
|
||||||
|
if (ret) {
|
||||||
|
WARN(1,
|
||||||
|
"Could not register function stat for cpu %d\n",
|
||||||
|
cpu);
|
||||||
|
kfree(name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
entry = debugfs_create_file("function_profile_enabled", 0644,
|
entry = debugfs_create_file("function_profile_enabled", 0644,
|
||||||
|
|
Loading…
Add table
Reference in a new issue