666 lines
14 KiB
C
666 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* trace context switch
|
|
*
|
|
* Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
|
|
*
|
|
*/
|
|
#include <linux/module.h>
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/kmemleak.h>
|
|
#include <linux/ftrace.h>
|
|
#include <trace/events/sched.h>
|
|
|
|
#include "trace.h"
|
|
|
|
#define RECORD_CMDLINE 1
|
|
#define RECORD_TGID 2
|
|
|
|
static int sched_cmdline_ref;
|
|
static int sched_tgid_ref;
|
|
static DEFINE_MUTEX(sched_register_mutex);
|
|
|
|
static void
|
|
probe_sched_switch(void *ignore, bool preempt,
|
|
struct task_struct *prev, struct task_struct *next,
|
|
unsigned int prev_state)
|
|
{
|
|
int flags;
|
|
|
|
flags = (RECORD_TGID * !!sched_tgid_ref) +
|
|
(RECORD_CMDLINE * !!sched_cmdline_ref);
|
|
|
|
if (!flags)
|
|
return;
|
|
tracing_record_taskinfo_sched_switch(prev, next, flags);
|
|
}
|
|
|
|
static void
|
|
probe_sched_wakeup(void *ignore, struct task_struct *wakee)
|
|
{
|
|
int flags;
|
|
|
|
flags = (RECORD_TGID * !!sched_tgid_ref) +
|
|
(RECORD_CMDLINE * !!sched_cmdline_ref);
|
|
|
|
if (!flags)
|
|
return;
|
|
tracing_record_taskinfo_sched_switch(current, wakee, flags);
|
|
}
|
|
|
|
static int tracing_sched_register(void)
|
|
{
|
|
int ret;
|
|
|
|
ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
|
|
if (ret) {
|
|
pr_info("wakeup trace: Couldn't activate tracepoint"
|
|
" probe to kernel_sched_wakeup\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
|
|
if (ret) {
|
|
pr_info("wakeup trace: Couldn't activate tracepoint"
|
|
" probe to kernel_sched_wakeup_new\n");
|
|
goto fail_deprobe;
|
|
}
|
|
|
|
ret = register_trace_sched_switch(probe_sched_switch, NULL);
|
|
if (ret) {
|
|
pr_info("sched trace: Couldn't activate tracepoint"
|
|
" probe to kernel_sched_switch\n");
|
|
goto fail_deprobe_wake_new;
|
|
}
|
|
|
|
return ret;
|
|
fail_deprobe_wake_new:
|
|
unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
|
|
fail_deprobe:
|
|
unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
|
|
return ret;
|
|
}
|
|
|
|
static void tracing_sched_unregister(void)
|
|
{
|
|
unregister_trace_sched_switch(probe_sched_switch, NULL);
|
|
unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
|
|
unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
|
|
}
|
|
|
|
static void tracing_start_sched_switch(int ops)
|
|
{
|
|
bool sched_register;
|
|
|
|
mutex_lock(&sched_register_mutex);
|
|
sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
|
|
|
|
switch (ops) {
|
|
case RECORD_CMDLINE:
|
|
sched_cmdline_ref++;
|
|
break;
|
|
|
|
case RECORD_TGID:
|
|
sched_tgid_ref++;
|
|
break;
|
|
}
|
|
|
|
if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
|
|
tracing_sched_register();
|
|
mutex_unlock(&sched_register_mutex);
|
|
}
|
|
|
|
static void tracing_stop_sched_switch(int ops)
|
|
{
|
|
mutex_lock(&sched_register_mutex);
|
|
|
|
switch (ops) {
|
|
case RECORD_CMDLINE:
|
|
sched_cmdline_ref--;
|
|
break;
|
|
|
|
case RECORD_TGID:
|
|
sched_tgid_ref--;
|
|
break;
|
|
}
|
|
|
|
if (!sched_cmdline_ref && !sched_tgid_ref)
|
|
tracing_sched_unregister();
|
|
mutex_unlock(&sched_register_mutex);
|
|
}
|
|
|
|
void tracing_start_cmdline_record(void)
|
|
{
|
|
tracing_start_sched_switch(RECORD_CMDLINE);
|
|
}
|
|
|
|
void tracing_stop_cmdline_record(void)
|
|
{
|
|
tracing_stop_sched_switch(RECORD_CMDLINE);
|
|
}
|
|
|
|
void tracing_start_tgid_record(void)
|
|
{
|
|
tracing_start_sched_switch(RECORD_TGID);
|
|
}
|
|
|
|
void tracing_stop_tgid_record(void)
|
|
{
|
|
tracing_stop_sched_switch(RECORD_TGID);
|
|
}
|
|
|
|
/*
|
|
* The tgid_map array maps from pid to tgid; i.e. the value stored at index i
|
|
* is the tgid last observed corresponding to pid=i.
|
|
*/
|
|
static int *tgid_map;
|
|
|
|
/* The maximum valid index into tgid_map. */
|
|
static size_t tgid_map_max;
|
|
|
|
#define SAVED_CMDLINES_DEFAULT 128
|
|
#define NO_CMDLINE_MAP UINT_MAX
|
|
/*
|
|
* Preemption must be disabled before acquiring trace_cmdline_lock.
|
|
* The various trace_arrays' max_lock must be acquired in a context
|
|
* where interrupt is disabled.
|
|
*/
|
|
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
|
|
struct saved_cmdlines_buffer {
|
|
unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
|
|
unsigned *map_cmdline_to_pid;
|
|
unsigned cmdline_num;
|
|
int cmdline_idx;
|
|
char saved_cmdlines[];
|
|
};
|
|
static struct saved_cmdlines_buffer *savedcmd;
|
|
|
|
/* Holds the size of a cmdline and pid element */
|
|
#define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \
|
|
(TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
|
|
|
|
static inline char *get_saved_cmdlines(int idx)
|
|
{
|
|
return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
|
|
}
|
|
|
|
static inline void set_cmdline(int idx, const char *cmdline)
|
|
{
|
|
strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
|
|
}
|
|
|
|
static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
|
|
{
|
|
int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
|
|
|
|
kmemleak_free(s);
|
|
free_pages((unsigned long)s, order);
|
|
}
|
|
|
|
static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
|
|
{
|
|
struct saved_cmdlines_buffer *s;
|
|
struct page *page;
|
|
int orig_size, size;
|
|
int order;
|
|
|
|
/* Figure out how much is needed to hold the given number of cmdlines */
|
|
orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
|
|
order = get_order(orig_size);
|
|
size = 1 << (order + PAGE_SHIFT);
|
|
page = alloc_pages(GFP_KERNEL, order);
|
|
if (!page)
|
|
return NULL;
|
|
|
|
s = page_address(page);
|
|
kmemleak_alloc(s, size, 1, GFP_KERNEL);
|
|
memset(s, 0, sizeof(*s));
|
|
|
|
/* Round up to actual allocation */
|
|
val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
|
|
s->cmdline_num = val;
|
|
|
|
/* Place map_cmdline_to_pid array right after saved_cmdlines */
|
|
s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
|
|
|
|
s->cmdline_idx = 0;
|
|
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
|
|
sizeof(s->map_pid_to_cmdline));
|
|
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
|
|
val * sizeof(*s->map_cmdline_to_pid));
|
|
|
|
return s;
|
|
}
|
|
|
|
int trace_create_savedcmd(void)
|
|
{
|
|
savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
|
|
|
|
return savedcmd ? 0 : -ENOMEM;
|
|
}
|
|
|
|
int trace_save_cmdline(struct task_struct *tsk)
|
|
{
|
|
unsigned tpid, idx;
|
|
|
|
/* treat recording of idle task as a success */
|
|
if (!tsk->pid)
|
|
return 1;
|
|
|
|
tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
|
|
|
|
/*
|
|
* It's not the end of the world if we don't get
|
|
* the lock, but we also don't want to spin
|
|
* nor do we want to disable interrupts,
|
|
* so if we miss here, then better luck next time.
|
|
*
|
|
* This is called within the scheduler and wake up, so interrupts
|
|
* had better been disabled and run queue lock been held.
|
|
*/
|
|
lockdep_assert_preemption_disabled();
|
|
if (!arch_spin_trylock(&trace_cmdline_lock))
|
|
return 0;
|
|
|
|
idx = savedcmd->map_pid_to_cmdline[tpid];
|
|
if (idx == NO_CMDLINE_MAP) {
|
|
idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
|
|
|
|
savedcmd->map_pid_to_cmdline[tpid] = idx;
|
|
savedcmd->cmdline_idx = idx;
|
|
}
|
|
|
|
savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
|
|
set_cmdline(idx, tsk->comm);
|
|
|
|
arch_spin_unlock(&trace_cmdline_lock);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void __trace_find_cmdline(int pid, char comm[])
|
|
{
|
|
unsigned map;
|
|
int tpid;
|
|
|
|
if (!pid) {
|
|
strcpy(comm, "<idle>");
|
|
return;
|
|
}
|
|
|
|
if (WARN_ON_ONCE(pid < 0)) {
|
|
strcpy(comm, "<XXX>");
|
|
return;
|
|
}
|
|
|
|
tpid = pid & (PID_MAX_DEFAULT - 1);
|
|
map = savedcmd->map_pid_to_cmdline[tpid];
|
|
if (map != NO_CMDLINE_MAP) {
|
|
tpid = savedcmd->map_cmdline_to_pid[map];
|
|
if (tpid == pid) {
|
|
strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
|
|
return;
|
|
}
|
|
}
|
|
strcpy(comm, "<...>");
|
|
}
|
|
|
|
void trace_find_cmdline(int pid, char comm[])
|
|
{
|
|
preempt_disable();
|
|
arch_spin_lock(&trace_cmdline_lock);
|
|
|
|
__trace_find_cmdline(pid, comm);
|
|
|
|
arch_spin_unlock(&trace_cmdline_lock);
|
|
preempt_enable();
|
|
}
|
|
|
|
static int *trace_find_tgid_ptr(int pid)
|
|
{
|
|
/*
|
|
* Pairs with the smp_store_release in set_tracer_flag() to ensure that
|
|
* if we observe a non-NULL tgid_map then we also observe the correct
|
|
* tgid_map_max.
|
|
*/
|
|
int *map = smp_load_acquire(&tgid_map);
|
|
|
|
if (unlikely(!map || pid > tgid_map_max))
|
|
return NULL;
|
|
|
|
return &map[pid];
|
|
}
|
|
|
|
int trace_find_tgid(int pid)
|
|
{
|
|
int *ptr = trace_find_tgid_ptr(pid);
|
|
|
|
return ptr ? *ptr : 0;
|
|
}
|
|
|
|
static int trace_save_tgid(struct task_struct *tsk)
|
|
{
|
|
int *ptr;
|
|
|
|
/* treat recording of idle task as a success */
|
|
if (!tsk->pid)
|
|
return 1;
|
|
|
|
ptr = trace_find_tgid_ptr(tsk->pid);
|
|
if (!ptr)
|
|
return 0;
|
|
|
|
*ptr = tsk->tgid;
|
|
return 1;
|
|
}
|
|
|
|
static bool tracing_record_taskinfo_skip(int flags)
|
|
{
|
|
if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
|
|
return true;
|
|
if (!__this_cpu_read(trace_taskinfo_save))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* tracing_record_taskinfo - record the task info of a task
|
|
*
|
|
* @task: task to record
|
|
* @flags: TRACE_RECORD_CMDLINE for recording comm
|
|
* TRACE_RECORD_TGID for recording tgid
|
|
*/
|
|
void tracing_record_taskinfo(struct task_struct *task, int flags)
|
|
{
|
|
bool done;
|
|
|
|
if (tracing_record_taskinfo_skip(flags))
|
|
return;
|
|
|
|
/*
|
|
* Record as much task information as possible. If some fail, continue
|
|
* to try to record the others.
|
|
*/
|
|
done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
|
|
done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
|
|
|
|
/* If recording any information failed, retry again soon. */
|
|
if (!done)
|
|
return;
|
|
|
|
__this_cpu_write(trace_taskinfo_save, false);
|
|
}
|
|
|
|
/**
|
|
* tracing_record_taskinfo_sched_switch - record task info for sched_switch
|
|
*
|
|
* @prev: previous task during sched_switch
|
|
* @next: next task during sched_switch
|
|
* @flags: TRACE_RECORD_CMDLINE for recording comm
|
|
* TRACE_RECORD_TGID for recording tgid
|
|
*/
|
|
void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
|
|
struct task_struct *next, int flags)
|
|
{
|
|
bool done;
|
|
|
|
if (tracing_record_taskinfo_skip(flags))
|
|
return;
|
|
|
|
/*
|
|
* Record as much task information as possible. If some fail, continue
|
|
* to try to record the others.
|
|
*/
|
|
done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
|
|
done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
|
|
done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
|
|
done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
|
|
|
|
/* If recording any information failed, retry again soon. */
|
|
if (!done)
|
|
return;
|
|
|
|
__this_cpu_write(trace_taskinfo_save, false);
|
|
}
|
|
|
|
/* Helpers to record a specific task information */
|
|
void tracing_record_cmdline(struct task_struct *task)
|
|
{
|
|
tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
|
|
}
|
|
|
|
void tracing_record_tgid(struct task_struct *task)
|
|
{
|
|
tracing_record_taskinfo(task, TRACE_RECORD_TGID);
|
|
}
|
|
|
|
int trace_alloc_tgid_map(void)
|
|
{
|
|
int *map;
|
|
|
|
if (tgid_map)
|
|
return 0;
|
|
|
|
tgid_map_max = pid_max;
|
|
map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
|
|
GFP_KERNEL);
|
|
if (!map)
|
|
return -ENOMEM;
|
|
|
|
/*
|
|
* Pairs with smp_load_acquire() in
|
|
* trace_find_tgid_ptr() to ensure that if it observes
|
|
* the tgid_map we just allocated then it also observes
|
|
* the corresponding tgid_map_max value.
|
|
*/
|
|
smp_store_release(&tgid_map, map);
|
|
return 0;
|
|
}
|
|
|
|
static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
|
|
{
|
|
int pid = ++(*pos);
|
|
|
|
return trace_find_tgid_ptr(pid);
|
|
}
|
|
|
|
static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
|
|
{
|
|
int pid = *pos;
|
|
|
|
return trace_find_tgid_ptr(pid);
|
|
}
|
|
|
|
static void saved_tgids_stop(struct seq_file *m, void *v)
|
|
{
|
|
}
|
|
|
|
static int saved_tgids_show(struct seq_file *m, void *v)
|
|
{
|
|
int *entry = (int *)v;
|
|
int pid = entry - tgid_map;
|
|
int tgid = *entry;
|
|
|
|
if (tgid == 0)
|
|
return SEQ_SKIP;
|
|
|
|
seq_printf(m, "%d %d\n", pid, tgid);
|
|
return 0;
|
|
}
|
|
|
|
static const struct seq_operations tracing_saved_tgids_seq_ops = {
|
|
.start = saved_tgids_start,
|
|
.stop = saved_tgids_stop,
|
|
.next = saved_tgids_next,
|
|
.show = saved_tgids_show,
|
|
};
|
|
|
|
static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
|
|
{
|
|
int ret;
|
|
|
|
ret = tracing_check_open_get_tr(NULL);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return seq_open(filp, &tracing_saved_tgids_seq_ops);
|
|
}
|
|
|
|
|
|
const struct file_operations tracing_saved_tgids_fops = {
|
|
.open = tracing_saved_tgids_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = seq_release,
|
|
};
|
|
|
|
static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
|
|
{
|
|
unsigned int *ptr = v;
|
|
|
|
if (*pos || m->count)
|
|
ptr++;
|
|
|
|
(*pos)++;
|
|
|
|
for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
|
|
ptr++) {
|
|
if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
|
|
continue;
|
|
|
|
return ptr;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
|
|
{
|
|
void *v;
|
|
loff_t l = 0;
|
|
|
|
preempt_disable();
|
|
arch_spin_lock(&trace_cmdline_lock);
|
|
|
|
v = &savedcmd->map_cmdline_to_pid[0];
|
|
while (l <= *pos) {
|
|
v = saved_cmdlines_next(m, v, &l);
|
|
if (!v)
|
|
return NULL;
|
|
}
|
|
|
|
return v;
|
|
}
|
|
|
|
static void saved_cmdlines_stop(struct seq_file *m, void *v)
|
|
{
|
|
arch_spin_unlock(&trace_cmdline_lock);
|
|
preempt_enable();
|
|
}
|
|
|
|
static int saved_cmdlines_show(struct seq_file *m, void *v)
|
|
{
|
|
char buf[TASK_COMM_LEN];
|
|
unsigned int *pid = v;
|
|
|
|
__trace_find_cmdline(*pid, buf);
|
|
seq_printf(m, "%d %s\n", *pid, buf);
|
|
return 0;
|
|
}
|
|
|
|
static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
|
|
.start = saved_cmdlines_start,
|
|
.next = saved_cmdlines_next,
|
|
.stop = saved_cmdlines_stop,
|
|
.show = saved_cmdlines_show,
|
|
};
|
|
|
|
static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
|
|
{
|
|
int ret;
|
|
|
|
ret = tracing_check_open_get_tr(NULL);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
|
|
}
|
|
|
|
const struct file_operations tracing_saved_cmdlines_fops = {
|
|
.open = tracing_saved_cmdlines_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = seq_release,
|
|
};
|
|
|
|
static ssize_t
|
|
tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
|
|
size_t cnt, loff_t *ppos)
|
|
{
|
|
char buf[64];
|
|
int r;
|
|
|
|
preempt_disable();
|
|
arch_spin_lock(&trace_cmdline_lock);
|
|
r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
|
|
arch_spin_unlock(&trace_cmdline_lock);
|
|
preempt_enable();
|
|
|
|
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
|
|
}
|
|
|
|
void trace_free_saved_cmdlines_buffer(void)
|
|
{
|
|
free_saved_cmdlines_buffer(savedcmd);
|
|
}
|
|
|
|
static int tracing_resize_saved_cmdlines(unsigned int val)
|
|
{
|
|
struct saved_cmdlines_buffer *s, *savedcmd_temp;
|
|
|
|
s = allocate_cmdlines_buffer(val);
|
|
if (!s)
|
|
return -ENOMEM;
|
|
|
|
preempt_disable();
|
|
arch_spin_lock(&trace_cmdline_lock);
|
|
savedcmd_temp = savedcmd;
|
|
savedcmd = s;
|
|
arch_spin_unlock(&trace_cmdline_lock);
|
|
preempt_enable();
|
|
free_saved_cmdlines_buffer(savedcmd_temp);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t
|
|
tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
|
|
size_t cnt, loff_t *ppos)
|
|
{
|
|
unsigned long val;
|
|
int ret;
|
|
|
|
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* must have at least 1 entry or less than PID_MAX_DEFAULT */
|
|
if (!val || val > PID_MAX_DEFAULT)
|
|
return -EINVAL;
|
|
|
|
ret = tracing_resize_saved_cmdlines((unsigned int)val);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
*ppos += cnt;
|
|
|
|
return cnt;
|
|
}
|
|
|
|
const struct file_operations tracing_saved_cmdlines_size_fops = {
|
|
.open = tracing_open_generic,
|
|
.read = tracing_saved_cmdlines_size_read,
|
|
.write = tracing_saved_cmdlines_size_write,
|
|
};
|