If an unused weak function was traced, it's call to fentry will still exist, which gets added into the __mcount_loc table. Ftrace will use kallsyms to retrieve the name for each location in __mcount_loc to display it in the available_filter_functions and used to enable functions via the name matching in set_ftrace_filter/notrace. Enabling these functions do nothing but enable an unused call to ftrace_caller. If a traced weak function is overridden, the symbol of the function would be used for it, which will either created duplicate names, or if the previous function was not traced, it would be incorrectly be listed in available_filter_functions as a function that can be traced. This became an issue with BPF[1] as there are tooling that enables the direct callers via ftrace but then checks to see if the functions were actually enabled. The case of one function that was marked notrace, but was followed by an unused weak function that was traced. The unused function's call to fentry was added to the __mcount_loc section, and kallsyms retrieved the untraced function's symbol as the weak function was overridden. Since the untraced function would not get traced, the BPF check would detect this and fail. The real fix would be to fix kallsyms to not show addresses of weak functions as the function before it. But that would require adding code in the build to add function size to kallsyms so that it can know when the function ends instead of just using the start of the next known symbol. In the mean time, this is a work around. Add a FTRACE_MCOUNT_MAX_OFFSET macro that if defined, ftrace will ignore any function that has its call to fentry/mcount that has an offset from the symbol that is greater than FTRACE_MCOUNT_MAX_OFFSET. If CONFIG_HAVE_FENTRY is defined for x86, define FTRACE_MCOUNT_MAX_OFFSET to zero (unless IBT is enabled), which will have ftrace ignore all locations that are not at the start of the function (or one after the ENDBR instruction). A worker thread is added at boot up to scan all the ftrace record entries, and will mark any that fail the FTRACE_MCOUNT_MAX_OFFSET test as disabled. They will still appear in the available_filter_functions file as: __ftrace_invalid_address___<invalid-offset> (showing the offset that caused it to be invalid). This is required for tools that use libtracefs (like trace-cmd does) that scan the available_filter_functions and enable set_ftrace_filter and set_ftrace_notrace using indexes of the function listed in the file (this is a speedup, as enabling thousands of files via names is an O(n^2) operation and can take minutes to complete, where the indexing takes less than a second). The invalid functions cannot be removed from available_filter_functions as the names there correspond to the ftrace records in the array that manages them (and the indexing depends on this). [1] https://lore.kernel.org/all/20220412094923.0abe90955e5db486b7bca279@kernel.org/ Link: https://lkml.kernel.org/r/20220526141912.794c2786@gandalf.local.home Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
130 lines
3.6 KiB
C
130 lines
3.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_FTRACE_H
|
|
#define _ASM_X86_FTRACE_H
|
|
|
|
#ifdef CONFIG_FUNCTION_TRACER
|
|
#ifndef CC_USING_FENTRY
|
|
# error Compiler does not support fentry?
|
|
#endif
|
|
# define MCOUNT_ADDR ((unsigned long)(__fentry__))
|
|
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
|
|
|
|
/* Ignore unused weak functions which will have non zero offsets */
|
|
#ifdef CONFIG_HAVE_FENTRY
|
|
# include <asm/ibt.h>
|
|
/* Add offset for endbr64 if IBT enabled */
|
|
# define FTRACE_MCOUNT_MAX_OFFSET ENDBR_INSN_SIZE
|
|
#endif
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
#define ARCH_SUPPORTS_FTRACE_OPS 1
|
|
#endif
|
|
|
|
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
|
|
|
#ifndef __ASSEMBLY__
|
|
extern atomic_t modifying_ftrace_code;
|
|
extern void __fentry__(void);
|
|
|
|
static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
|
{
|
|
/*
|
|
* addr is the address of the mcount call instruction.
|
|
* recordmcount does the necessary offset calculation.
|
|
*/
|
|
return addr;
|
|
}
|
|
|
|
/*
|
|
* When a ftrace registered caller is tracing a function that is
|
|
* also set by a register_ftrace_direct() call, it needs to be
|
|
* differentiated in the ftrace_caller trampoline. To do this, we
|
|
* place the direct caller in the ORIG_AX part of pt_regs. This
|
|
* tells the ftrace_caller that there's a direct caller.
|
|
*/
|
|
static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
|
|
{
|
|
/* Emulate a call */
|
|
regs->orig_ax = addr;
|
|
}
|
|
|
|
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
|
|
struct ftrace_regs {
|
|
struct pt_regs regs;
|
|
};
|
|
|
|
static __always_inline struct pt_regs *
|
|
arch_ftrace_get_regs(struct ftrace_regs *fregs)
|
|
{
|
|
/* Only when FL_SAVE_REGS is set, cs will be non zero */
|
|
if (!fregs->regs.cs)
|
|
return NULL;
|
|
return &fregs->regs;
|
|
}
|
|
|
|
#define ftrace_instruction_pointer_set(fregs, _ip) \
|
|
do { (fregs)->regs.ip = (_ip); } while (0)
|
|
|
|
struct ftrace_ops;
|
|
#define ftrace_graph_func ftrace_graph_func
|
|
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
|
struct ftrace_ops *op, struct ftrace_regs *fregs);
|
|
#else
|
|
#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
|
|
#endif
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
|
|
struct dyn_arch_ftrace {
|
|
/* No extra data needed for x86 */
|
|
};
|
|
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
#endif /* __ASSEMBLY__ */
|
|
#endif /* CONFIG_FUNCTION_TRACER */
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
|
|
extern void set_ftrace_ops_ro(void);
|
|
#else
|
|
static inline void set_ftrace_ops_ro(void) { }
|
|
#endif
|
|
|
|
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
|
|
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
|
|
{
|
|
/*
|
|
* Compare the symbol name with the system call name. Skip the
|
|
* "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
|
|
*/
|
|
return !strcmp(sym + 3, name + 3) ||
|
|
(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
|
|
(!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
|
|
(!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
|
|
}
|
|
|
|
#ifndef COMPILE_OFFSETS
|
|
|
|
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION)
|
|
#include <linux/compat.h>
|
|
|
|
/*
|
|
* Because ia32 syscalls do not map to x86_64 syscall numbers
|
|
* this screws up the trace output when tracing a ia32 task.
|
|
* Instead of reporting bogus syscalls, just do not trace them.
|
|
*
|
|
* If the user really wants these, then they should use the
|
|
* raw syscall tracepoints with filtering.
|
|
*/
|
|
#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
|
|
static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
|
|
{
|
|
return in_32bit_syscall();
|
|
}
|
|
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
|
|
#endif /* !COMPILE_OFFSETS */
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif /* _ASM_X86_FTRACE_H */
|