测试环境
root@:curtis# uname -a
Linux curtis-Aspire-E5-471G 5.15.0-52-generic #58~20.04.1-Ubuntu SMP Thu Oct 13 13:09:46 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux
root@:curtis# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 20.04.3 LTS
Release: 20.04
Codename: focal
Linux 通过proc
文件系统,将进程的栈信息透给用户态,调用链如下所示。
root@:curtis# cat /proc/self/stack
[<0>] proc_pid_stack+0x9a/0xf0
[<0>] proc_single_show+0x52/0xc0
[<0>] seq_read_iter+0x124/0x450
[<0>] seq_read+0xfd/0x150
[<0>] vfs_read+0xa0/0x1a0
[<0>] ksys_read+0x67/0xf0
[<0>] __x64_sys_read+0x1a/0x20
[<0>] do_syscall_64+0x5c/0xc0
[<0>] entry_SYSCALL_64_after_hwframe+0x61/0xcb
从调用栈上来看,最终调用的是函数proc_pid_stack
。
#ifdef CONFIG_STACKTRACE#define MAX_STACK_TRACE_DEPTH 64static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,struct pid *pid, struct task_struct *task)
{unsigned long *entries;int err;/** The ability to racily run the kernel stack unwinder on a running task* and then observe the unwinder output is scary; while it is useful for* debugging kernel issues, it can also allow an attacker to leak kernel* stack contents.* Doing this in a manner that is at least safe from races would require* some work to ensure that the remote task can not be scheduled; and* even then, this would still expose the unwinder as local attack* surface.* Therefore, this interface is restricted to root.*/if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))return -EACCES;entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),GFP_KERNEL);if (!entries)return -ENOMEM;err = lock_trace(task);if (!err) {unsigned int i, nr_entries;nr_entries = stack_trace_save_tsk(task, entries,MAX_STACK_TRACE_DEPTH, 0);for (i = 0; i < nr_entries; i++) {seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);}unlock_trace(task);}kfree(entries);return err;
}
#endif
从函数的定义来看需要将内核调试选项CONFIG_STACKTRACE
打开,核心程序调用的是stack_trace_save_tsk
函数,为非导出函数,如何使用未导出函数之前的文章有介绍过。
/*** stack_trace_save_tsk - Save a task stack trace into a storage array* @task: The task to examine* @store: Pointer to storage array* @size: Size of the storage array* @skipnr: Number of entries to skip at the start of the stack trace** Return: Number of trace entries stored*/
unsigned int stack_trace_save_tsk(struct task_struct *task,unsigned long *store, unsigned int size,unsigned int skipnr)
{struct stack_trace trace = {.entries = store,.max_entries = size,/* skip this function if they are tracing us */.skip = skipnr + (current == task),};save_stack_trace_tsk(task, &trace);return trace.nr_entries;
}
主要代码逻辑
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>#include "trace.h"#define MAX_STACK_TRACE_DEPTH 64unsigned int (*stack_trace_save_tsk_ptr)(struct task_struct *task,unsigned long *store, unsigned int size,unsigned int skipnr);int print_stack(struct task_struct *task)
{unsigned long *entries;unsigned int i, nr_entries;entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), GFP_KERNEL);if (!entries)return -ENOMEM;nr_entries = stack_trace_save_tsk_ptr(task, entries,MAX_STACK_TRACE_DEPTH, 0);printk("PID = %d, COMM = %s\n", task->pid, task->comm);for (i = 0; i < nr_entries; i++) {printk(" [<0>] %pB\n", (void *)entries[i]);}kfree(entries);return 0;
}int query_stack(void)
{int ret = 0;struct task_struct *g, *t;do_each_thread(g, t) {print_stack(t); } while_each_thread(g, t);return ret;
}static int __init stack_trace_init(void)
{int ret = 0;ret = init_kallsyms_lookup_func();if (ret < 0) {printk("get kallsyms_lookup_name addr failed\n");return -1;}stack_trace_save_tsk_ptr = find_func("stack_trace_save_tsk");if (stack_trace_save_tsk_ptr == NULL) {printk("get stack_trace_save_tsk addr failed\n");return -1;}ret = query_stack();if (ret < 0) {printk("query stack failed\n");return ret;}printk("stack trace init\n");return 0;
}static void __exit stack_trace_exit(void)
{printk("stack trace exit\n");
}module_init(stack_trace_init);
module_exit(stack_trace_exit);
MODULE_LICENSE("GPL");
调用栈打印示例。
[781162.407668] PID = 107085, COMM = sudo
[781162.407670] [<0>] do_sys_poll+0x486/0x610
[781162.407675] [<0>] __x64_sys_ppoll+0xac/0xe0
[781162.407679] [<0>] do_syscall_64+0x5c/0xc0
[781162.407684] [<0>] entry_SYSCALL_64_after_hwframe+0x61/0xcb
[781162.407696] PID = 107086, COMM = insmod
[781162.407698] [<0>] print_stack+0x58/0x90 [trace]
[781162.407705] [<0>] query_stack+0x2d/0x70 [trace]
[781162.407712] [<0>] stack_trace_init+0x55/0x1000 [trace]
[781162.407719] [<0>] do_one_initcall+0x48/0x1e0
[781162.407726] [<0>] do_init_module+0x52/0x230
[781162.407733] [<0>] load_module+0x138d/0x1610
[781162.407739] [<0>] __do_sys_finit_module+0xbf/0x120
[781162.407746] [<0>] __x64_sys_finit_module+0x1a/0x20
[781162.407752] [<0>] do_syscall_64+0x5c/0xc0
[781162.407757] [<0>] entry_SYSCALL_64_after_hwframe+0x61/0xcb