Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-11-02 The following pull-request contains BPF updates for your *net-next* tree. We've added 30 non-merge commits during the last 7 day(s) which contain a total of 41 files changed, 1864 insertions(+), 474 deletions(-). The main changes are: 1) Fix long standing user vs kernel access issue by introducing bpf_probe_read_user() and bpf_probe_read_kernel() helpers, from Daniel. 2) Accelerated xskmap lookup, from Björn and Maciej. 3) Support for automatic map pinning in libbpf, from Toke. 4) Cleanup of BTF-enabled raw tracepoints, from Alexei. 5) Various fixes to libbpf and selftests. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
ae8a76fb8b
41 changed files with 1864 additions and 477 deletions
|
@ -47,6 +47,15 @@ Program types
|
||||||
prog_flow_dissector
|
prog_flow_dissector
|
||||||
|
|
||||||
|
|
||||||
|
Testing BPF
|
||||||
|
===========
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
s390
|
||||||
|
|
||||||
|
|
||||||
.. Links:
|
.. Links:
|
||||||
.. _Documentation/networking/filter.txt: ../networking/filter.txt
|
.. _Documentation/networking/filter.txt: ../networking/filter.txt
|
||||||
.. _man-pages: https://www.kernel.org/doc/man-pages/
|
.. _man-pages: https://www.kernel.org/doc/man-pages/
|
||||||
|
|
205
Documentation/bpf/s390.rst
Normal file
205
Documentation/bpf/s390.rst
Normal file
|
@ -0,0 +1,205 @@
|
||||||
|
===================
|
||||||
|
Testing BPF on s390
|
||||||
|
===================
|
||||||
|
|
||||||
|
1. Introduction
|
||||||
|
***************
|
||||||
|
|
||||||
|
IBM Z are mainframe computers, which are descendants of IBM System/360 from
|
||||||
|
year 1964. They are supported by the Linux kernel under the name "s390". This
|
||||||
|
document describes how to test BPF in an s390 QEMU guest.
|
||||||
|
|
||||||
|
2. One-time setup
|
||||||
|
*****************
|
||||||
|
|
||||||
|
The following is required to build and run the test suite:
|
||||||
|
|
||||||
|
* s390 GCC
|
||||||
|
* s390 development headers and libraries
|
||||||
|
* Clang with BPF support
|
||||||
|
* QEMU with s390 support
|
||||||
|
* Disk image with s390 rootfs
|
||||||
|
|
||||||
|
Debian supports installing compiler and libraries for s390 out of the box.
|
||||||
|
Users of other distros may use debootstrap in order to set up a Debian chroot::
|
||||||
|
|
||||||
|
sudo debootstrap \
|
||||||
|
--variant=minbase \
|
||||||
|
--include=sudo \
|
||||||
|
testing \
|
||||||
|
./s390-toolchain
|
||||||
|
sudo mount --rbind /dev ./s390-toolchain/dev
|
||||||
|
sudo mount --rbind /proc ./s390-toolchain/proc
|
||||||
|
sudo mount --rbind /sys ./s390-toolchain/sys
|
||||||
|
sudo chroot ./s390-toolchain
|
||||||
|
|
||||||
|
Once on Debian, the build prerequisites can be installed as follows::
|
||||||
|
|
||||||
|
sudo dpkg --add-architecture s390x
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install \
|
||||||
|
bc \
|
||||||
|
bison \
|
||||||
|
cmake \
|
||||||
|
debootstrap \
|
||||||
|
dwarves \
|
||||||
|
flex \
|
||||||
|
g++ \
|
||||||
|
gcc \
|
||||||
|
g++-s390x-linux-gnu \
|
||||||
|
gcc-s390x-linux-gnu \
|
||||||
|
gdb-multiarch \
|
||||||
|
git \
|
||||||
|
make \
|
||||||
|
python3 \
|
||||||
|
qemu-system-misc \
|
||||||
|
qemu-utils \
|
||||||
|
rsync \
|
||||||
|
libcap-dev:s390x \
|
||||||
|
libelf-dev:s390x \
|
||||||
|
libncurses-dev
|
||||||
|
|
||||||
|
Latest Clang targeting BPF can be installed as follows::
|
||||||
|
|
||||||
|
git clone https://github.com/llvm/llvm-project.git
|
||||||
|
ln -s ../../clang llvm-project/llvm/tools/
|
||||||
|
mkdir llvm-project-build
|
||||||
|
cd llvm-project-build
|
||||||
|
cmake \
|
||||||
|
-DLLVM_TARGETS_TO_BUILD=BPF \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=/opt/clang-bpf \
|
||||||
|
../llvm-project/llvm
|
||||||
|
make
|
||||||
|
sudo make install
|
||||||
|
export PATH=/opt/clang-bpf/bin:$PATH
|
||||||
|
|
||||||
|
The disk image can be prepared using a loopback mount and debootstrap::
|
||||||
|
|
||||||
|
qemu-img create -f raw ./s390.img 1G
|
||||||
|
sudo losetup -f ./s390.img
|
||||||
|
sudo mkfs.ext4 /dev/loopX
|
||||||
|
mkdir ./s390.rootfs
|
||||||
|
sudo mount /dev/loopX ./s390.rootfs
|
||||||
|
sudo debootstrap \
|
||||||
|
--foreign \
|
||||||
|
--arch=s390x \
|
||||||
|
--variant=minbase \
|
||||||
|
--include=" \
|
||||||
|
iproute2, \
|
||||||
|
iputils-ping, \
|
||||||
|
isc-dhcp-client, \
|
||||||
|
kmod, \
|
||||||
|
libcap2, \
|
||||||
|
libelf1, \
|
||||||
|
netcat, \
|
||||||
|
procps" \
|
||||||
|
testing \
|
||||||
|
./s390.rootfs
|
||||||
|
sudo umount ./s390.rootfs
|
||||||
|
sudo losetup -d /dev/loopX
|
||||||
|
|
||||||
|
3. Compilation
|
||||||
|
**************
|
||||||
|
|
||||||
|
In addition to the usual Kconfig options required to run the BPF test suite, it
|
||||||
|
is also helpful to select::
|
||||||
|
|
||||||
|
CONFIG_NET_9P=y
|
||||||
|
CONFIG_9P_FS=y
|
||||||
|
CONFIG_NET_9P_VIRTIO=y
|
||||||
|
CONFIG_VIRTIO_PCI=y
|
||||||
|
|
||||||
|
as that would enable a very easy way to share files with the s390 virtual
|
||||||
|
machine.
|
||||||
|
|
||||||
|
Compiling kernel, modules and testsuite, as well as preparing gdb scripts to
|
||||||
|
simplify debugging, can be done using the following commands::
|
||||||
|
|
||||||
|
make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- menuconfig
|
||||||
|
make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- bzImage modules scripts_gdb
|
||||||
|
make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- \
|
||||||
|
-C tools/testing/selftests \
|
||||||
|
TARGETS=bpf \
|
||||||
|
INSTALL_PATH=$PWD/tools/testing/selftests/kselftest_install \
|
||||||
|
install
|
||||||
|
|
||||||
|
4. Running the test suite
|
||||||
|
*************************
|
||||||
|
|
||||||
|
The virtual machine can be started as follows::
|
||||||
|
|
||||||
|
qemu-system-s390x \
|
||||||
|
-cpu max,zpci=on \
|
||||||
|
-smp 2 \
|
||||||
|
-m 4G \
|
||||||
|
-kernel linux/arch/s390/boot/compressed/vmlinux \
|
||||||
|
-drive file=./s390.img,if=virtio,format=raw \
|
||||||
|
-nographic \
|
||||||
|
-append 'root=/dev/vda rw console=ttyS1' \
|
||||||
|
-virtfs local,path=./linux,security_model=none,mount_tag=linux \
|
||||||
|
-object rng-random,filename=/dev/urandom,id=rng0 \
|
||||||
|
-device virtio-rng-ccw,rng=rng0 \
|
||||||
|
-netdev user,id=net0 \
|
||||||
|
-device virtio-net-ccw,netdev=net0
|
||||||
|
|
||||||
|
When using this on a real IBM Z, ``-enable-kvm`` may be added for better
|
||||||
|
performance. When starting the virtual machine for the first time, disk image
|
||||||
|
setup must be finalized using the following command::
|
||||||
|
|
||||||
|
/debootstrap/debootstrap --second-stage
|
||||||
|
|
||||||
|
Directory with the code built on the host as well as ``/proc`` and ``/sys``
|
||||||
|
need to be mounted as follows::
|
||||||
|
|
||||||
|
mkdir -p /linux
|
||||||
|
mount -t 9p linux /linux
|
||||||
|
mount -t proc proc /proc
|
||||||
|
mount -t sysfs sys /sys
|
||||||
|
|
||||||
|
After that, the test suite can be run using the following commands::
|
||||||
|
|
||||||
|
cd /linux/tools/testing/selftests/kselftest_install
|
||||||
|
./run_kselftest.sh
|
||||||
|
|
||||||
|
As usual, tests can be also run individually::
|
||||||
|
|
||||||
|
cd /linux/tools/testing/selftests/bpf
|
||||||
|
./test_verifier
|
||||||
|
|
||||||
|
5. Debugging
|
||||||
|
************
|
||||||
|
|
||||||
|
It is possible to debug the s390 kernel using QEMU GDB stub, which is activated
|
||||||
|
by passing ``-s`` to QEMU.
|
||||||
|
|
||||||
|
It is preferable to turn KASLR off, so that gdb would know where to find the
|
||||||
|
kernel image in memory, by building the kernel with::
|
||||||
|
|
||||||
|
RANDOMIZE_BASE=n
|
||||||
|
|
||||||
|
GDB can then be attached using the following command::
|
||||||
|
|
||||||
|
gdb-multiarch -ex 'target remote localhost:1234' ./vmlinux
|
||||||
|
|
||||||
|
6. Network
|
||||||
|
**********
|
||||||
|
|
||||||
|
In case one needs to use the network in the virtual machine in order to e.g.
|
||||||
|
install additional packages, it can be configured using::
|
||||||
|
|
||||||
|
dhclient eth0
|
||||||
|
|
||||||
|
7. Links
|
||||||
|
********
|
||||||
|
|
||||||
|
This document is a compilation of techniques, whose more comprehensive
|
||||||
|
descriptions can be found by following these links:
|
||||||
|
|
||||||
|
- `Debootstrap <https://wiki.debian.org/EmDebian/CrossDebootstrap>`_
|
||||||
|
- `Multiarch <https://wiki.debian.org/Multiarch/HOWTO>`_
|
||||||
|
- `Building LLVM <https://llvm.org/docs/CMake.html>`_
|
||||||
|
- `Cross-compiling the kernel <https://wiki.gentoo.org/wiki/Embedded_Handbook/General/Cross-compiling_the_kernel>`_
|
||||||
|
- `QEMU s390x Guest Support <https://wiki.qemu.org/Documentation/Platforms/S390X>`_
|
||||||
|
- `Plan 9 folder sharing over Virtio <https://wiki.qemu.org/Documentation/9psetup>`_
|
||||||
|
- `Using GDB with QEMU <https://wiki.osdev.org/Kernel_Debugging#Use_GDB_with_QEMU>`_
|
|
@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
|
||||||
endif
|
endif
|
||||||
|
|
||||||
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
||||||
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
|
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
|
||||||
|
|
||||||
# Make sure __phys_addr has no stackprotector
|
# Make sure __phys_addr has no stackprotector
|
||||||
nostackp := $(call cc-option, -fno-stack-protector)
|
nostackp := $(call cc-option, -fno-stack-protector)
|
||||||
|
|
43
arch/x86/mm/maccess.c
Normal file
43
arch/x86/mm/maccess.c
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
|
||||||
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
|
||||||
|
{
|
||||||
|
return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline bool invalid_probe_range(u64 vaddr)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Range covering the highest possible canonical userspace address
|
||||||
|
* as well as non-canonical address range. For the canonical range
|
||||||
|
* we also need to include the userspace guard page.
|
||||||
|
*/
|
||||||
|
return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
|
||||||
|
canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static __always_inline bool invalid_probe_range(u64 vaddr)
|
||||||
|
{
|
||||||
|
return vaddr < TASK_SIZE_MAX;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
long probe_kernel_read_strict(void *dst, const void *src, size_t size)
|
||||||
|
{
|
||||||
|
if (unlikely(invalid_probe_range((unsigned long)src)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
return __probe_kernel_read(dst, src, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
|
||||||
|
{
|
||||||
|
if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
return __strncpy_from_unsafe(dst, unsafe_addr, count);
|
||||||
|
}
|
|
@ -373,6 +373,11 @@ enum bpf_cgroup_storage_type {
|
||||||
|
|
||||||
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
|
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
|
||||||
|
|
||||||
|
/* The longest tracepoint has 12 args.
|
||||||
|
* See include/trace/bpf_probe.h
|
||||||
|
*/
|
||||||
|
#define MAX_BPF_FUNC_ARGS 12
|
||||||
|
|
||||||
struct bpf_prog_stats {
|
struct bpf_prog_stats {
|
||||||
u64 cnt;
|
u64 cnt;
|
||||||
u64 nsecs;
|
u64 nsecs;
|
||||||
|
@ -1004,31 +1009,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(CONFIG_XDP_SOCKETS)
|
|
||||||
struct xdp_sock;
|
|
||||||
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
|
|
||||||
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
|
||||||
struct xdp_sock *xs);
|
|
||||||
void __xsk_map_flush(struct bpf_map *map);
|
|
||||||
#else
|
|
||||||
struct xdp_sock;
|
|
||||||
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
|
||||||
u32 key)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
|
||||||
struct xdp_sock *xs)
|
|
||||||
{
|
|
||||||
return -EOPNOTSUPP;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __xsk_map_flush(struct bpf_map *map)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
|
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
|
||||||
void bpf_sk_reuseport_detach(struct sock *sk);
|
void bpf_sk_reuseport_detach(struct sock *sk);
|
||||||
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
|
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
|
||||||
|
|
|
@ -26,6 +26,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
|
||||||
|
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing)
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_CGROUP_BPF
|
#ifdef CONFIG_CGROUP_BPF
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
||||||
|
|
|
@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
|
||||||
* happens, handle that and return -EFAULT.
|
* happens, handle that and return -EFAULT.
|
||||||
*/
|
*/
|
||||||
extern long probe_kernel_read(void *dst, const void *src, size_t size);
|
extern long probe_kernel_read(void *dst, const void *src, size_t size);
|
||||||
|
extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
|
||||||
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
|
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
|
||||||
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
|
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
|
||||||
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
|
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* probe_user_write(): safely attempt to write to a location in user space
|
||||||
|
* @dst: address to write to
|
||||||
|
* @src: pointer to the data that shall be written
|
||||||
|
* @size: size of the data chunk
|
||||||
|
*
|
||||||
|
* Safely write to address @dst from the buffer at @src. If a kernel fault
|
||||||
|
* happens, handle that and return -EFAULT.
|
||||||
|
*/
|
||||||
|
extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
|
||||||
|
extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
|
||||||
|
|
||||||
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
|
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
|
||||||
|
extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
|
||||||
|
long count);
|
||||||
|
extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
|
||||||
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
|
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
|
||||||
long count);
|
long count);
|
||||||
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
|
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
|
||||||
|
|
|
@ -69,7 +69,14 @@ struct xdp_umem {
|
||||||
/* Nodes are linked in the struct xdp_sock map_list field, and used to
|
/* Nodes are linked in the struct xdp_sock map_list field, and used to
|
||||||
* track which maps a certain socket reside in.
|
* track which maps a certain socket reside in.
|
||||||
*/
|
*/
|
||||||
struct xsk_map;
|
|
||||||
|
struct xsk_map {
|
||||||
|
struct bpf_map map;
|
||||||
|
struct list_head __percpu *flush_list;
|
||||||
|
spinlock_t lock; /* Synchronize map updates */
|
||||||
|
struct xdp_sock *xsk_map[];
|
||||||
|
};
|
||||||
|
|
||||||
struct xsk_map_node {
|
struct xsk_map_node {
|
||||||
struct list_head node;
|
struct list_head node;
|
||||||
struct xsk_map *map;
|
struct xsk_map *map;
|
||||||
|
@ -109,8 +116,6 @@ struct xdp_sock {
|
||||||
struct xdp_buff;
|
struct xdp_buff;
|
||||||
#ifdef CONFIG_XDP_SOCKETS
|
#ifdef CONFIG_XDP_SOCKETS
|
||||||
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
||||||
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
|
||||||
void xsk_flush(struct xdp_sock *xs);
|
|
||||||
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
|
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
|
||||||
/* Used from netdev driver */
|
/* Used from netdev driver */
|
||||||
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
|
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
|
||||||
|
@ -134,6 +139,22 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
|
||||||
struct xdp_sock **map_entry);
|
struct xdp_sock **map_entry);
|
||||||
int xsk_map_inc(struct xsk_map *map);
|
int xsk_map_inc(struct xsk_map *map);
|
||||||
void xsk_map_put(struct xsk_map *map);
|
void xsk_map_put(struct xsk_map *map);
|
||||||
|
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||||
|
struct xdp_sock *xs);
|
||||||
|
void __xsk_map_flush(struct bpf_map *map);
|
||||||
|
|
||||||
|
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
||||||
|
u32 key)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
struct xdp_sock *xs;
|
||||||
|
|
||||||
|
if (key >= map->max_entries)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
xs = READ_ONCE(m->xsk_map[key]);
|
||||||
|
return xs;
|
||||||
|
}
|
||||||
|
|
||||||
static inline u64 xsk_umem_extract_addr(u64 addr)
|
static inline u64 xsk_umem_extract_addr(u64 addr)
|
||||||
{
|
{
|
||||||
|
@ -224,15 +245,6 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||||
return -ENOTSUPP;
|
return -ENOTSUPP;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
|
||||||
{
|
|
||||||
return -ENOTSUPP;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void xsk_flush(struct xdp_sock *xs)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
|
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
@ -357,6 +369,21 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||||
|
struct xdp_sock *xs)
|
||||||
|
{
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __xsk_map_flush(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
||||||
|
u32 key)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
#endif /* CONFIG_XDP_SOCKETS */
|
#endif /* CONFIG_XDP_SOCKETS */
|
||||||
|
|
||||||
#endif /* _LINUX_XDP_SOCK_H */
|
#endif /* _LINUX_XDP_SOCK_H */
|
||||||
|
|
|
@ -173,6 +173,7 @@ enum bpf_prog_type {
|
||||||
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
||||||
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||||
BPF_PROG_TYPE_CGROUP_SOCKOPT,
|
BPF_PROG_TYPE_CGROUP_SOCKOPT,
|
||||||
|
BPF_PROG_TYPE_TRACING,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_attach_type {
|
enum bpf_attach_type {
|
||||||
|
@ -199,6 +200,7 @@ enum bpf_attach_type {
|
||||||
BPF_CGROUP_UDP6_RECVMSG,
|
BPF_CGROUP_UDP6_RECVMSG,
|
||||||
BPF_CGROUP_GETSOCKOPT,
|
BPF_CGROUP_GETSOCKOPT,
|
||||||
BPF_CGROUP_SETSOCKOPT,
|
BPF_CGROUP_SETSOCKOPT,
|
||||||
|
BPF_TRACE_RAW_TP,
|
||||||
__MAX_BPF_ATTACH_TYPE
|
__MAX_BPF_ATTACH_TYPE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -561,10 +563,13 @@ union bpf_attr {
|
||||||
* Return
|
* Return
|
||||||
* 0 on success, or a negative error in case of failure.
|
* 0 on success, or a negative error in case of failure.
|
||||||
*
|
*
|
||||||
* int bpf_probe_read(void *dst, u32 size, const void *src)
|
* int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
* Description
|
* Description
|
||||||
* For tracing programs, safely attempt to read *size* bytes from
|
* For tracing programs, safely attempt to read *size* bytes from
|
||||||
* address *src* and store the data in *dst*.
|
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
||||||
|
*
|
||||||
|
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
||||||
|
* instead.
|
||||||
* Return
|
* Return
|
||||||
* 0 on success, or a negative error in case of failure.
|
* 0 on success, or a negative error in case of failure.
|
||||||
*
|
*
|
||||||
|
@ -1426,45 +1431,14 @@ union bpf_attr {
|
||||||
* Return
|
* Return
|
||||||
* 0 on success, or a negative error in case of failure.
|
* 0 on success, or a negative error in case of failure.
|
||||||
*
|
*
|
||||||
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
|
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
* Description
|
* Description
|
||||||
* Copy a NUL terminated string from an unsafe address
|
* Copy a NUL terminated string from an unsafe kernel address
|
||||||
* *unsafe_ptr* to *dst*. The *size* should include the
|
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
||||||
* terminating NUL byte. In case the string length is smaller than
|
* more details.
|
||||||
* *size*, the target is not padded with further NUL bytes. If the
|
|
||||||
* string length is larger than *size*, just *size*-1 bytes are
|
|
||||||
* copied and the last byte is set to NUL.
|
|
||||||
*
|
*
|
||||||
* On success, the length of the copied string is returned. This
|
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
||||||
* makes this helper useful in tracing programs for reading
|
* instead.
|
||||||
* strings, and more importantly to get its length at runtime. See
|
|
||||||
* the following snippet:
|
|
||||||
*
|
|
||||||
* ::
|
|
||||||
*
|
|
||||||
* SEC("kprobe/sys_open")
|
|
||||||
* void bpf_sys_open(struct pt_regs *ctx)
|
|
||||||
* {
|
|
||||||
* char buf[PATHLEN]; // PATHLEN is defined to 256
|
|
||||||
* int res = bpf_probe_read_str(buf, sizeof(buf),
|
|
||||||
* ctx->di);
|
|
||||||
*
|
|
||||||
* // Consume buf, for example push it to
|
|
||||||
* // userspace via bpf_perf_event_output(); we
|
|
||||||
* // can use res (the string length) as event
|
|
||||||
* // size, after checking its boundaries.
|
|
||||||
* }
|
|
||||||
*
|
|
||||||
* In comparison, using **bpf_probe_read()** helper here instead
|
|
||||||
* to read the string would require to estimate the length at
|
|
||||||
* compile time, and would often result in copying more memory
|
|
||||||
* than necessary.
|
|
||||||
*
|
|
||||||
* Another useful use case is when parsing individual process
|
|
||||||
* arguments or individual environment variables navigating
|
|
||||||
* *current*\ **->mm->arg_start** and *current*\
|
|
||||||
* **->mm->env_start**: using this helper and the return value,
|
|
||||||
* one can quickly iterate at the right offset of the memory area.
|
|
||||||
* Return
|
* Return
|
||||||
* On success, the strictly positive length of the string,
|
* On success, the strictly positive length of the string,
|
||||||
* including the trailing NUL character. On error, a negative
|
* including the trailing NUL character. On error, a negative
|
||||||
|
@ -2775,6 +2749,72 @@ union bpf_attr {
|
||||||
* restricted to raw_tracepoint bpf programs.
|
* restricted to raw_tracepoint bpf programs.
|
||||||
* Return
|
* Return
|
||||||
* 0 on success, or a negative error in case of failure.
|
* 0 on success, or a negative error in case of failure.
|
||||||
|
*
|
||||||
|
* int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
|
* Description
|
||||||
|
* Safely attempt to read *size* bytes from user space address
|
||||||
|
* *unsafe_ptr* and store the data in *dst*.
|
||||||
|
* Return
|
||||||
|
* 0 on success, or a negative error in case of failure.
|
||||||
|
*
|
||||||
|
* int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
|
* Description
|
||||||
|
* Safely attempt to read *size* bytes from kernel space address
|
||||||
|
* *unsafe_ptr* and store the data in *dst*.
|
||||||
|
* Return
|
||||||
|
* 0 on success, or a negative error in case of failure.
|
||||||
|
*
|
||||||
|
* int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
|
* Description
|
||||||
|
* Copy a NUL terminated string from an unsafe user address
|
||||||
|
* *unsafe_ptr* to *dst*. The *size* should include the
|
||||||
|
* terminating NUL byte. In case the string length is smaller than
|
||||||
|
* *size*, the target is not padded with further NUL bytes. If the
|
||||||
|
* string length is larger than *size*, just *size*-1 bytes are
|
||||||
|
* copied and the last byte is set to NUL.
|
||||||
|
*
|
||||||
|
* On success, the length of the copied string is returned. This
|
||||||
|
* makes this helper useful in tracing programs for reading
|
||||||
|
* strings, and more importantly to get its length at runtime. See
|
||||||
|
* the following snippet:
|
||||||
|
*
|
||||||
|
* ::
|
||||||
|
*
|
||||||
|
* SEC("kprobe/sys_open")
|
||||||
|
* void bpf_sys_open(struct pt_regs *ctx)
|
||||||
|
* {
|
||||||
|
* char buf[PATHLEN]; // PATHLEN is defined to 256
|
||||||
|
* int res = bpf_probe_read_user_str(buf, sizeof(buf),
|
||||||
|
* ctx->di);
|
||||||
|
*
|
||||||
|
* // Consume buf, for example push it to
|
||||||
|
* // userspace via bpf_perf_event_output(); we
|
||||||
|
* // can use res (the string length) as event
|
||||||
|
* // size, after checking its boundaries.
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* In comparison, using **bpf_probe_read_user()** helper here
|
||||||
|
* instead to read the string would require to estimate the length
|
||||||
|
* at compile time, and would often result in copying more memory
|
||||||
|
* than necessary.
|
||||||
|
*
|
||||||
|
* Another useful use case is when parsing individual process
|
||||||
|
* arguments or individual environment variables navigating
|
||||||
|
* *current*\ **->mm->arg_start** and *current*\
|
||||||
|
* **->mm->env_start**: using this helper and the return value,
|
||||||
|
* one can quickly iterate at the right offset of the memory area.
|
||||||
|
* Return
|
||||||
|
* On success, the strictly positive length of the string,
|
||||||
|
* including the trailing NUL character. On error, a negative
|
||||||
|
* value.
|
||||||
|
*
|
||||||
|
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
|
* Description
|
||||||
|
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
||||||
|
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
||||||
|
* Return
|
||||||
|
* On success, the strictly positive length of the string, including
|
||||||
|
* the trailing NUL character. On error, a negative value.
|
||||||
*/
|
*/
|
||||||
#define __BPF_FUNC_MAPPER(FN) \
|
#define __BPF_FUNC_MAPPER(FN) \
|
||||||
FN(unspec), \
|
FN(unspec), \
|
||||||
|
@ -2888,7 +2928,11 @@ union bpf_attr {
|
||||||
FN(sk_storage_delete), \
|
FN(sk_storage_delete), \
|
||||||
FN(send_signal), \
|
FN(send_signal), \
|
||||||
FN(tcp_gen_syncookie), \
|
FN(tcp_gen_syncookie), \
|
||||||
FN(skb_output),
|
FN(skb_output), \
|
||||||
|
FN(probe_read_user), \
|
||||||
|
FN(probe_read_kernel), \
|
||||||
|
FN(probe_read_user_str), \
|
||||||
|
FN(probe_read_kernel_str),
|
||||||
|
|
||||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
* function eBPF program intends to call
|
* function eBPF program intends to call
|
||||||
|
|
|
@ -668,9 +668,6 @@ static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
|
||||||
{
|
{
|
||||||
struct latch_tree_node *n;
|
struct latch_tree_node *n;
|
||||||
|
|
||||||
if (!bpf_jit_kallsyms_enabled())
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
|
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
|
||||||
return n ?
|
return n ?
|
||||||
container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
|
container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
|
||||||
|
@ -1309,11 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
|
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
|
||||||
u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr)
|
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
{
|
{
|
||||||
memset(dst, 0, size);
|
memset(dst, 0, size);
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __bpf_prog_run - run eBPF program on a given context
|
* __bpf_prog_run - run eBPF program on a given context
|
||||||
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
|
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
|
||||||
|
@ -1569,9 +1567,9 @@ out:
|
||||||
LDST(W, u32)
|
LDST(W, u32)
|
||||||
LDST(DW, u64)
|
LDST(DW, u64)
|
||||||
#undef LDST
|
#undef LDST
|
||||||
#define LDX_PROBE(SIZEOP, SIZE) \
|
#define LDX_PROBE(SIZEOP, SIZE) \
|
||||||
LDX_PROBE_MEM_##SIZEOP: \
|
LDX_PROBE_MEM_##SIZEOP: \
|
||||||
bpf_probe_read(&DST, SIZE, (const void *)(long) SRC); \
|
bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \
|
||||||
CONT;
|
CONT;
|
||||||
LDX_PROBE(B, 1)
|
LDX_PROBE(B, 1)
|
||||||
LDX_PROBE(H, 2)
|
LDX_PROBE(H, 2)
|
||||||
|
|
|
@ -1579,7 +1579,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
|
||||||
u32 btf_id)
|
u32 btf_id)
|
||||||
{
|
{
|
||||||
switch (prog_type) {
|
switch (prog_type) {
|
||||||
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
case BPF_PROG_TYPE_TRACING:
|
||||||
if (btf_id > BTF_MAX_TYPE)
|
if (btf_id > BTF_MAX_TYPE)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
break;
|
break;
|
||||||
|
@ -1842,13 +1842,13 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||||
return PTR_ERR(prog);
|
return PTR_ERR(prog);
|
||||||
|
|
||||||
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
|
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
|
||||||
|
prog->type != BPF_PROG_TYPE_TRACING &&
|
||||||
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
|
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
goto out_put_prog;
|
goto out_put_prog;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT &&
|
if (prog->type == BPF_PROG_TYPE_TRACING) {
|
||||||
prog->aux->attach_btf_id) {
|
|
||||||
if (attr->raw_tracepoint.name) {
|
if (attr->raw_tracepoint.name) {
|
||||||
/* raw_tp name should not be specified in raw_tp
|
/* raw_tp name should not be specified in raw_tp
|
||||||
* programs that were verified via in-kernel BTF info
|
* programs that were verified via in-kernel BTF info
|
||||||
|
|
|
@ -6279,6 +6279,11 @@ static int check_return_code(struct bpf_verifier_env *env)
|
||||||
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
||||||
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||||
break;
|
break;
|
||||||
|
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
||||||
|
if (!env->prog->aux->attach_btf_id)
|
||||||
|
return 0;
|
||||||
|
range = tnum_const(0);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -9376,24 +9381,36 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||||
{
|
{
|
||||||
struct bpf_prog *prog = env->prog;
|
struct bpf_prog *prog = env->prog;
|
||||||
u32 btf_id = prog->aux->attach_btf_id;
|
u32 btf_id = prog->aux->attach_btf_id;
|
||||||
|
const char prefix[] = "btf_trace_";
|
||||||
const struct btf_type *t;
|
const struct btf_type *t;
|
||||||
const char *tname;
|
const char *tname;
|
||||||
|
|
||||||
if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT && btf_id) {
|
if (prog->type != BPF_PROG_TYPE_TRACING)
|
||||||
const char prefix[] = "btf_trace_";
|
return 0;
|
||||||
|
|
||||||
t = btf_type_by_id(btf_vmlinux, btf_id);
|
if (!btf_id) {
|
||||||
if (!t) {
|
verbose(env, "Tracing programs must provide btf_id\n");
|
||||||
verbose(env, "attach_btf_id %u is invalid\n", btf_id);
|
return -EINVAL;
|
||||||
return -EINVAL;
|
}
|
||||||
}
|
t = btf_type_by_id(btf_vmlinux, btf_id);
|
||||||
|
if (!t) {
|
||||||
|
verbose(env, "attach_btf_id %u is invalid\n", btf_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||||
|
if (!tname) {
|
||||||
|
verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (prog->expected_attach_type) {
|
||||||
|
case BPF_TRACE_RAW_TP:
|
||||||
if (!btf_type_is_typedef(t)) {
|
if (!btf_type_is_typedef(t)) {
|
||||||
verbose(env, "attach_btf_id %u is not a typedef\n",
|
verbose(env, "attach_btf_id %u is not a typedef\n",
|
||||||
btf_id);
|
btf_id);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
|
if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
|
||||||
if (!tname || strncmp(prefix, tname, sizeof(prefix) - 1)) {
|
|
||||||
verbose(env, "attach_btf_id %u points to wrong type name %s\n",
|
verbose(env, "attach_btf_id %u points to wrong type name %s\n",
|
||||||
btf_id, tname);
|
btf_id, tname);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -9414,8 +9431,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||||
prog->aux->attach_func_name = tname;
|
prog->aux->attach_func_name = tname;
|
||||||
prog->aux->attach_func_proto = t;
|
prog->aux->attach_func_proto = t;
|
||||||
prog->aux->attach_btf_trace = true;
|
prog->aux->attach_btf_trace = true;
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
||||||
|
|
|
@ -9,13 +9,6 @@
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
|
|
||||||
struct xsk_map {
|
|
||||||
struct bpf_map map;
|
|
||||||
struct xdp_sock **xsk_map;
|
|
||||||
struct list_head __percpu *flush_list;
|
|
||||||
spinlock_t lock; /* Synchronize map updates */
|
|
||||||
};
|
|
||||||
|
|
||||||
int xsk_map_inc(struct xsk_map *map)
|
int xsk_map_inc(struct xsk_map *map)
|
||||||
{
|
{
|
||||||
struct bpf_map *m = &map->map;
|
struct bpf_map *m = &map->map;
|
||||||
|
@ -80,9 +73,10 @@ static void xsk_map_sock_delete(struct xdp_sock *xs,
|
||||||
|
|
||||||
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
|
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
|
||||||
{
|
{
|
||||||
|
struct bpf_map_memory mem;
|
||||||
|
int cpu, err, numa_node;
|
||||||
struct xsk_map *m;
|
struct xsk_map *m;
|
||||||
int cpu, err;
|
u64 cost, size;
|
||||||
u64 cost;
|
|
||||||
|
|
||||||
if (!capable(CAP_NET_ADMIN))
|
if (!capable(CAP_NET_ADMIN))
|
||||||
return ERR_PTR(-EPERM);
|
return ERR_PTR(-EPERM);
|
||||||
|
@ -92,44 +86,35 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
|
||||||
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
|
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
m = kzalloc(sizeof(*m), GFP_USER);
|
numa_node = bpf_map_attr_numa_node(attr);
|
||||||
if (!m)
|
size = struct_size(m, xsk_map, attr->max_entries);
|
||||||
|
cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus());
|
||||||
|
|
||||||
|
err = bpf_map_charge_init(&mem, cost);
|
||||||
|
if (err < 0)
|
||||||
|
return ERR_PTR(err);
|
||||||
|
|
||||||
|
m = bpf_map_area_alloc(size, numa_node);
|
||||||
|
if (!m) {
|
||||||
|
bpf_map_charge_finish(&mem);
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
}
|
||||||
|
|
||||||
bpf_map_init_from_attr(&m->map, attr);
|
bpf_map_init_from_attr(&m->map, attr);
|
||||||
|
bpf_map_charge_move(&m->map.memory, &mem);
|
||||||
spin_lock_init(&m->lock);
|
spin_lock_init(&m->lock);
|
||||||
|
|
||||||
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
|
|
||||||
cost += sizeof(struct list_head) * num_possible_cpus();
|
|
||||||
|
|
||||||
/* Notice returns -EPERM on if map size is larger than memlock limit */
|
|
||||||
err = bpf_map_charge_init(&m->map.memory, cost);
|
|
||||||
if (err)
|
|
||||||
goto free_m;
|
|
||||||
|
|
||||||
err = -ENOMEM;
|
|
||||||
|
|
||||||
m->flush_list = alloc_percpu(struct list_head);
|
m->flush_list = alloc_percpu(struct list_head);
|
||||||
if (!m->flush_list)
|
if (!m->flush_list) {
|
||||||
goto free_charge;
|
bpf_map_charge_finish(&m->map.memory);
|
||||||
|
bpf_map_area_free(m);
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
}
|
||||||
|
|
||||||
for_each_possible_cpu(cpu)
|
for_each_possible_cpu(cpu)
|
||||||
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
|
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
|
||||||
|
|
||||||
m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
|
|
||||||
sizeof(struct xdp_sock *),
|
|
||||||
m->map.numa_node);
|
|
||||||
if (!m->xsk_map)
|
|
||||||
goto free_percpu;
|
|
||||||
return &m->map;
|
return &m->map;
|
||||||
|
|
||||||
free_percpu:
|
|
||||||
free_percpu(m->flush_list);
|
|
||||||
free_charge:
|
|
||||||
bpf_map_charge_finish(&m->map.memory);
|
|
||||||
free_m:
|
|
||||||
kfree(m);
|
|
||||||
return ERR_PTR(err);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void xsk_map_free(struct bpf_map *map)
|
static void xsk_map_free(struct bpf_map *map)
|
||||||
|
@ -139,8 +124,7 @@ static void xsk_map_free(struct bpf_map *map)
|
||||||
bpf_clear_redirect_map(map);
|
bpf_clear_redirect_map(map);
|
||||||
synchronize_net();
|
synchronize_net();
|
||||||
free_percpu(m->flush_list);
|
free_percpu(m->flush_list);
|
||||||
bpf_map_area_free(m->xsk_map);
|
bpf_map_area_free(m);
|
||||||
kfree(m);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||||
|
@ -160,45 +144,20 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
|
static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||||
{
|
{
|
||||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
|
||||||
struct xdp_sock *xs;
|
struct bpf_insn *insn = insn_buf;
|
||||||
|
|
||||||
if (key >= map->max_entries)
|
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
|
||||||
return NULL;
|
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
|
||||||
|
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
|
||||||
xs = READ_ONCE(m->xsk_map[key]);
|
*insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
|
||||||
return xs;
|
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
|
||||||
}
|
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0);
|
||||||
|
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
|
||||||
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
*insn++ = BPF_MOV64_IMM(ret, 0);
|
||||||
struct xdp_sock *xs)
|
return insn - insn_buf;
|
||||||
{
|
|
||||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
|
||||||
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
|
||||||
int err;
|
|
||||||
|
|
||||||
err = xsk_rcv(xs, xdp);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
if (!xs->flush_node.prev)
|
|
||||||
list_add(&xs->flush_node, flush_list);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void __xsk_map_flush(struct bpf_map *map)
|
|
||||||
{
|
|
||||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
|
||||||
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
|
||||||
struct xdp_sock *xs, *tmp;
|
|
||||||
|
|
||||||
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
|
|
||||||
xsk_flush(xs);
|
|
||||||
__list_del_clearprev(&xs->flush_node);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
|
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
|
||||||
|
@ -312,6 +271,7 @@ const struct bpf_map_ops xsk_map_ops = {
|
||||||
.map_free = xsk_map_free,
|
.map_free = xsk_map_free,
|
||||||
.map_get_next_key = xsk_map_get_next_key,
|
.map_get_next_key = xsk_map_get_next_key,
|
||||||
.map_lookup_elem = xsk_map_lookup_elem,
|
.map_lookup_elem = xsk_map_lookup_elem,
|
||||||
|
.map_gen_lookup = xsk_map_gen_lookup,
|
||||||
.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
|
.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
|
||||||
.map_update_elem = xsk_map_update_elem,
|
.map_update_elem = xsk_map_update_elem,
|
||||||
.map_delete_elem = xsk_map_delete_elem,
|
.map_delete_elem = xsk_map_delete_elem,
|
||||||
|
|
|
@ -138,24 +138,19 @@ static const struct bpf_func_proto bpf_override_return_proto = {
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
|
BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
|
||||||
|
const void __user *, unsafe_ptr)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret = probe_user_read(dst, unsafe_ptr, size);
|
||||||
|
|
||||||
ret = security_locked_down(LOCKDOWN_BPF_READ);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
ret = probe_kernel_read(dst, unsafe_ptr, size);
|
|
||||||
if (unlikely(ret < 0))
|
if (unlikely(ret < 0))
|
||||||
out:
|
|
||||||
memset(dst, 0, size);
|
memset(dst, 0, size);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct bpf_func_proto bpf_probe_read_proto = {
|
static const struct bpf_func_proto bpf_probe_read_user_proto = {
|
||||||
.func = bpf_probe_read,
|
.func = bpf_probe_read_user,
|
||||||
.gpl_only = true,
|
.gpl_only = true,
|
||||||
.ret_type = RET_INTEGER,
|
.ret_type = RET_INTEGER,
|
||||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||||
|
@ -163,7 +158,128 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
|
||||||
.arg3_type = ARG_ANYTHING,
|
.arg3_type = ARG_ANYTHING,
|
||||||
};
|
};
|
||||||
|
|
||||||
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
|
BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
|
||||||
|
const void __user *, unsafe_ptr)
|
||||||
|
{
|
||||||
|
int ret = strncpy_from_unsafe_user(dst, unsafe_ptr, size);
|
||||||
|
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
memset(dst, 0, size);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
|
||||||
|
.func = bpf_probe_read_user_str,
|
||||||
|
.gpl_only = true,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||||
|
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||||
|
.arg3_type = ARG_ANYTHING,
|
||||||
|
};
|
||||||
|
|
||||||
|
static __always_inline int
|
||||||
|
bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr,
|
||||||
|
const bool compat)
|
||||||
|
{
|
||||||
|
int ret = security_locked_down(LOCKDOWN_BPF_READ);
|
||||||
|
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
goto out;
|
||||||
|
ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
|
||||||
|
probe_kernel_read_strict(dst, unsafe_ptr, size);
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
out:
|
||||||
|
memset(dst, 0, size);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
|
||||||
|
const void *, unsafe_ptr)
|
||||||
|
{
|
||||||
|
return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
|
||||||
|
.func = bpf_probe_read_kernel,
|
||||||
|
.gpl_only = true,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||||
|
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||||
|
.arg3_type = ARG_ANYTHING,
|
||||||
|
};
|
||||||
|
|
||||||
|
BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
|
||||||
|
const void *, unsafe_ptr)
|
||||||
|
{
|
||||||
|
return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_func_proto bpf_probe_read_compat_proto = {
|
||||||
|
.func = bpf_probe_read_compat,
|
||||||
|
.gpl_only = true,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||||
|
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||||
|
.arg3_type = ARG_ANYTHING,
|
||||||
|
};
|
||||||
|
|
||||||
|
static __always_inline int
|
||||||
|
bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr,
|
||||||
|
const bool compat)
|
||||||
|
{
|
||||||
|
int ret = security_locked_down(LOCKDOWN_BPF_READ);
|
||||||
|
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
goto out;
|
||||||
|
/*
|
||||||
|
* The strncpy_from_unsafe_*() call will likely not fill the entire
|
||||||
|
* buffer, but that's okay in this circumstance as we're probing
|
||||||
|
* arbitrary memory anyway similar to bpf_probe_read_*() and might
|
||||||
|
* as well probe the stack. Thus, memory is explicitly cleared
|
||||||
|
* only in error case, so that improper users ignoring return
|
||||||
|
* code altogether don't copy garbage; otherwise length of string
|
||||||
|
* is returned that can be used for bpf_perf_event_output() et al.
|
||||||
|
*/
|
||||||
|
ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) :
|
||||||
|
strncpy_from_unsafe_strict(dst, unsafe_ptr, size);
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
out:
|
||||||
|
memset(dst, 0, size);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
|
||||||
|
const void *, unsafe_ptr)
|
||||||
|
{
|
||||||
|
return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
|
||||||
|
.func = bpf_probe_read_kernel_str,
|
||||||
|
.gpl_only = true,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||||
|
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||||
|
.arg3_type = ARG_ANYTHING,
|
||||||
|
};
|
||||||
|
|
||||||
|
BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
|
||||||
|
const void *, unsafe_ptr)
|
||||||
|
{
|
||||||
|
return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
|
||||||
|
.func = bpf_probe_read_compat_str,
|
||||||
|
.gpl_only = true,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||||
|
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||||
|
.arg3_type = ARG_ANYTHING,
|
||||||
|
};
|
||||||
|
|
||||||
|
BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
|
||||||
u32, size)
|
u32, size)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -186,10 +302,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
if (unlikely(!nmi_uaccess_okay()))
|
if (unlikely(!nmi_uaccess_okay()))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
if (!access_ok(unsafe_ptr, size))
|
|
||||||
return -EPERM;
|
|
||||||
|
|
||||||
return probe_kernel_write(unsafe_ptr, src, size);
|
return probe_user_write(unsafe_ptr, src, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct bpf_func_proto bpf_probe_write_user_proto = {
|
static const struct bpf_func_proto bpf_probe_write_user_proto = {
|
||||||
|
@ -585,41 +699,6 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
|
||||||
.arg2_type = ARG_ANYTHING,
|
.arg2_type = ARG_ANYTHING,
|
||||||
};
|
};
|
||||||
|
|
||||||
BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
|
|
||||||
const void *, unsafe_ptr)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = security_locked_down(LOCKDOWN_BPF_READ);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The strncpy_from_unsafe() call will likely not fill the entire
|
|
||||||
* buffer, but that's okay in this circumstance as we're probing
|
|
||||||
* arbitrary memory anyway similar to bpf_probe_read() and might
|
|
||||||
* as well probe the stack. Thus, memory is explicitly cleared
|
|
||||||
* only in error case, so that improper users ignoring return
|
|
||||||
* code altogether don't copy garbage; otherwise length of string
|
|
||||||
* is returned that can be used for bpf_perf_event_output() et al.
|
|
||||||
*/
|
|
||||||
ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
|
|
||||||
if (unlikely(ret < 0))
|
|
||||||
out:
|
|
||||||
memset(dst, 0, size);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct bpf_func_proto bpf_probe_read_str_proto = {
|
|
||||||
.func = bpf_probe_read_str,
|
|
||||||
.gpl_only = true,
|
|
||||||
.ret_type = RET_INTEGER,
|
|
||||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
|
||||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
|
||||||
.arg3_type = ARG_ANYTHING,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct send_signal_irq_work {
|
struct send_signal_irq_work {
|
||||||
struct irq_work irq_work;
|
struct irq_work irq_work;
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
|
@ -699,8 +778,6 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||||
return &bpf_map_pop_elem_proto;
|
return &bpf_map_pop_elem_proto;
|
||||||
case BPF_FUNC_map_peek_elem:
|
case BPF_FUNC_map_peek_elem:
|
||||||
return &bpf_map_peek_elem_proto;
|
return &bpf_map_peek_elem_proto;
|
||||||
case BPF_FUNC_probe_read:
|
|
||||||
return &bpf_probe_read_proto;
|
|
||||||
case BPF_FUNC_ktime_get_ns:
|
case BPF_FUNC_ktime_get_ns:
|
||||||
return &bpf_ktime_get_ns_proto;
|
return &bpf_ktime_get_ns_proto;
|
||||||
case BPF_FUNC_tail_call:
|
case BPF_FUNC_tail_call:
|
||||||
|
@ -727,8 +804,18 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||||
return &bpf_current_task_under_cgroup_proto;
|
return &bpf_current_task_under_cgroup_proto;
|
||||||
case BPF_FUNC_get_prandom_u32:
|
case BPF_FUNC_get_prandom_u32:
|
||||||
return &bpf_get_prandom_u32_proto;
|
return &bpf_get_prandom_u32_proto;
|
||||||
|
case BPF_FUNC_probe_read_user:
|
||||||
|
return &bpf_probe_read_user_proto;
|
||||||
|
case BPF_FUNC_probe_read_kernel:
|
||||||
|
return &bpf_probe_read_kernel_proto;
|
||||||
|
case BPF_FUNC_probe_read:
|
||||||
|
return &bpf_probe_read_compat_proto;
|
||||||
|
case BPF_FUNC_probe_read_user_str:
|
||||||
|
return &bpf_probe_read_user_str_proto;
|
||||||
|
case BPF_FUNC_probe_read_kernel_str:
|
||||||
|
return &bpf_probe_read_kernel_str_proto;
|
||||||
case BPF_FUNC_probe_read_str:
|
case BPF_FUNC_probe_read_str:
|
||||||
return &bpf_probe_read_str_proto;
|
return &bpf_probe_read_compat_str_proto;
|
||||||
#ifdef CONFIG_CGROUPS
|
#ifdef CONFIG_CGROUPS
|
||||||
case BPF_FUNC_get_current_cgroup_id:
|
case BPF_FUNC_get_current_cgroup_id:
|
||||||
return &bpf_get_current_cgroup_id_proto;
|
return &bpf_get_current_cgroup_id_proto;
|
||||||
|
@ -1055,10 +1142,6 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||||
switch (func_id) {
|
switch (func_id) {
|
||||||
case BPF_FUNC_perf_event_output:
|
case BPF_FUNC_perf_event_output:
|
||||||
return &bpf_perf_event_output_proto_raw_tp;
|
return &bpf_perf_event_output_proto_raw_tp;
|
||||||
#ifdef CONFIG_NET
|
|
||||||
case BPF_FUNC_skb_output:
|
|
||||||
return &bpf_skb_output_proto;
|
|
||||||
#endif
|
|
||||||
case BPF_FUNC_get_stackid:
|
case BPF_FUNC_get_stackid:
|
||||||
return &bpf_get_stackid_proto_raw_tp;
|
return &bpf_get_stackid_proto_raw_tp;
|
||||||
case BPF_FUNC_get_stack:
|
case BPF_FUNC_get_stack:
|
||||||
|
@ -1068,20 +1151,44 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const struct bpf_func_proto *
|
||||||
|
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||||
|
{
|
||||||
|
switch (func_id) {
|
||||||
|
#ifdef CONFIG_NET
|
||||||
|
case BPF_FUNC_skb_output:
|
||||||
|
return &bpf_skb_output_proto;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
return raw_tp_prog_func_proto(func_id, prog);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static bool raw_tp_prog_is_valid_access(int off, int size,
|
static bool raw_tp_prog_is_valid_access(int off, int size,
|
||||||
enum bpf_access_type type,
|
enum bpf_access_type type,
|
||||||
const struct bpf_prog *prog,
|
const struct bpf_prog *prog,
|
||||||
struct bpf_insn_access_aux *info)
|
struct bpf_insn_access_aux *info)
|
||||||
{
|
{
|
||||||
/* largest tracepoint in the kernel has 12 args */
|
if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
|
||||||
if (off < 0 || off >= sizeof(__u64) * 12)
|
return false;
|
||||||
|
if (type != BPF_READ)
|
||||||
|
return false;
|
||||||
|
if (off % size != 0)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool tracing_prog_is_valid_access(int off, int size,
|
||||||
|
enum bpf_access_type type,
|
||||||
|
const struct bpf_prog *prog,
|
||||||
|
struct bpf_insn_access_aux *info)
|
||||||
|
{
|
||||||
|
if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
|
||||||
return false;
|
return false;
|
||||||
if (type != BPF_READ)
|
if (type != BPF_READ)
|
||||||
return false;
|
return false;
|
||||||
if (off % size != 0)
|
if (off % size != 0)
|
||||||
return false;
|
return false;
|
||||||
if (!prog->aux->attach_btf_id)
|
|
||||||
return true;
|
|
||||||
return btf_ctx_access(off, size, type, prog, info);
|
return btf_ctx_access(off, size, type, prog, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1093,6 +1200,14 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
|
||||||
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const struct bpf_verifier_ops tracing_verifier_ops = {
|
||||||
|
.get_func_proto = tracing_prog_func_proto,
|
||||||
|
.is_valid_access = tracing_prog_is_valid_access,
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct bpf_prog_ops tracing_prog_ops = {
|
||||||
|
};
|
||||||
|
|
||||||
static bool raw_tp_writable_prog_is_valid_access(int off, int size,
|
static bool raw_tp_writable_prog_is_valid_access(int off, int size,
|
||||||
enum bpf_access_type type,
|
enum bpf_access_type type,
|
||||||
const struct bpf_prog *prog,
|
const struct bpf_prog *prog,
|
||||||
|
|
112
lib/test_bpf.c
112
lib/test_bpf.c
|
@ -6859,34 +6859,128 @@ err_page0:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __init int test_skb_segment(void)
|
static __init struct sk_buff *build_test_skb_linear_no_head_frag(void)
|
||||||
{
|
{
|
||||||
|
unsigned int alloc_size = 2000;
|
||||||
|
unsigned int headroom = 102, doffset = 72, data_size = 1308;
|
||||||
|
struct sk_buff *skb[2];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* skbs linked in a frag_list, both with linear data, with head_frag=0
|
||||||
|
* (data allocated by kmalloc), both have tcp data of 1308 bytes
|
||||||
|
* (total payload is 2616 bytes).
|
||||||
|
* Data offset is 72 bytes (40 ipv6 hdr, 32 tcp hdr). Some headroom.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < 2; i++) {
|
||||||
|
skb[i] = alloc_skb(alloc_size, GFP_KERNEL);
|
||||||
|
if (!skb[i]) {
|
||||||
|
if (i == 0)
|
||||||
|
goto err_skb0;
|
||||||
|
else
|
||||||
|
goto err_skb1;
|
||||||
|
}
|
||||||
|
|
||||||
|
skb[i]->protocol = htons(ETH_P_IPV6);
|
||||||
|
skb_reserve(skb[i], headroom);
|
||||||
|
skb_put(skb[i], doffset + data_size);
|
||||||
|
skb_reset_network_header(skb[i]);
|
||||||
|
if (i == 0)
|
||||||
|
skb_reset_mac_header(skb[i]);
|
||||||
|
else
|
||||||
|
skb_set_mac_header(skb[i], -ETH_HLEN);
|
||||||
|
__skb_pull(skb[i], doffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* setup shinfo.
|
||||||
|
* mimic bpf_skb_proto_4_to_6, which resets gso_segs and assigns a
|
||||||
|
* reduced gso_size.
|
||||||
|
*/
|
||||||
|
skb_shinfo(skb[0])->gso_size = 1288;
|
||||||
|
skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV6 | SKB_GSO_DODGY;
|
||||||
|
skb_shinfo(skb[0])->gso_segs = 0;
|
||||||
|
skb_shinfo(skb[0])->frag_list = skb[1];
|
||||||
|
|
||||||
|
/* adjust skb[0]'s len */
|
||||||
|
skb[0]->len += skb[1]->len;
|
||||||
|
skb[0]->data_len += skb[1]->len;
|
||||||
|
skb[0]->truesize += skb[1]->truesize;
|
||||||
|
|
||||||
|
return skb[0];
|
||||||
|
|
||||||
|
err_skb1:
|
||||||
|
kfree_skb(skb[0]);
|
||||||
|
err_skb0:
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct skb_segment_test {
|
||||||
|
const char *descr;
|
||||||
|
struct sk_buff *(*build_skb)(void);
|
||||||
netdev_features_t features;
|
netdev_features_t features;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct skb_segment_test skb_segment_tests[] __initconst = {
|
||||||
|
{
|
||||||
|
.descr = "gso_with_rx_frags",
|
||||||
|
.build_skb = build_test_skb,
|
||||||
|
.features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
|
||||||
|
NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.descr = "gso_linear_no_head_frag",
|
||||||
|
.build_skb = build_test_skb_linear_no_head_frag,
|
||||||
|
.features = NETIF_F_SG | NETIF_F_FRAGLIST |
|
||||||
|
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO |
|
||||||
|
NETIF_F_LLTX_BIT | NETIF_F_GRO |
|
||||||
|
NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
|
||||||
|
NETIF_F_HW_VLAN_STAG_TX_BIT
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static __init int test_skb_segment_single(const struct skb_segment_test *test)
|
||||||
|
{
|
||||||
struct sk_buff *skb, *segs;
|
struct sk_buff *skb, *segs;
|
||||||
int ret = -1;
|
int ret = -1;
|
||||||
|
|
||||||
features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
|
skb = test->build_skb();
|
||||||
NETIF_F_IPV6_CSUM;
|
|
||||||
features |= NETIF_F_RXCSUM;
|
|
||||||
skb = build_test_skb();
|
|
||||||
if (!skb) {
|
if (!skb) {
|
||||||
pr_info("%s: failed to build_test_skb", __func__);
|
pr_info("%s: failed to build_test_skb", __func__);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
segs = skb_segment(skb, features);
|
segs = skb_segment(skb, test->features);
|
||||||
if (!IS_ERR(segs)) {
|
if (!IS_ERR(segs)) {
|
||||||
kfree_skb_list(segs);
|
kfree_skb_list(segs);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
pr_info("%s: success in skb_segment!", __func__);
|
|
||||||
} else {
|
|
||||||
pr_info("%s: failed in skb_segment!", __func__);
|
|
||||||
}
|
}
|
||||||
kfree_skb(skb);
|
kfree_skb(skb);
|
||||||
done:
|
done:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __init int test_skb_segment(void)
|
||||||
|
{
|
||||||
|
int i, err_cnt = 0, pass_cnt = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) {
|
||||||
|
const struct skb_segment_test *test = &skb_segment_tests[i];
|
||||||
|
|
||||||
|
pr_info("#%d %s ", i, test->descr);
|
||||||
|
|
||||||
|
if (test_skb_segment_single(test)) {
|
||||||
|
pr_cont("FAIL\n");
|
||||||
|
err_cnt++;
|
||||||
|
} else {
|
||||||
|
pr_cont("PASS\n");
|
||||||
|
pass_cnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_info("%s: Summary: %d PASSED, %d FAILED\n", __func__,
|
||||||
|
pass_cnt, err_cnt);
|
||||||
|
return err_cnt ? -EINVAL : 0;
|
||||||
|
}
|
||||||
|
|
||||||
static __init int test_bpf(void)
|
static __init int test_bpf(void)
|
||||||
{
|
{
|
||||||
int i, err_cnt = 0, pass_cnt = 0;
|
int i, err_cnt = 0, pass_cnt = 0;
|
||||||
|
|
70
mm/maccess.c
70
mm/maccess.c
|
@ -18,6 +18,18 @@ probe_read_common(void *dst, const void __user *src, size_t size)
|
||||||
return ret ? -EFAULT : 0;
|
return ret ? -EFAULT : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __always_inline long
|
||||||
|
probe_write_common(void __user *dst, const void *src, size_t size)
|
||||||
|
{
|
||||||
|
long ret;
|
||||||
|
|
||||||
|
pagefault_disable();
|
||||||
|
ret = __copy_to_user_inatomic(dst, src, size);
|
||||||
|
pagefault_enable();
|
||||||
|
|
||||||
|
return ret ? -EFAULT : 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* probe_kernel_read(): safely attempt to read from a kernel-space location
|
* probe_kernel_read(): safely attempt to read from a kernel-space location
|
||||||
* @dst: pointer to the buffer that shall take the data
|
* @dst: pointer to the buffer that shall take the data
|
||||||
|
@ -31,11 +43,20 @@ probe_read_common(void *dst, const void __user *src, size_t size)
|
||||||
* do_page_fault() doesn't attempt to take mmap_sem. This makes
|
* do_page_fault() doesn't attempt to take mmap_sem. This makes
|
||||||
* probe_kernel_read() suitable for use within regions where the caller
|
* probe_kernel_read() suitable for use within regions where the caller
|
||||||
* already holds mmap_sem, or other locks which nest inside mmap_sem.
|
* already holds mmap_sem, or other locks which nest inside mmap_sem.
|
||||||
|
*
|
||||||
|
* probe_kernel_read_strict() is the same as probe_kernel_read() except for
|
||||||
|
* the case where architectures have non-overlapping user and kernel address
|
||||||
|
* ranges: probe_kernel_read_strict() will additionally return -EFAULT for
|
||||||
|
* probing memory on a user address range where probe_user_read() is supposed
|
||||||
|
* to be used instead.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
long __weak probe_kernel_read(void *dst, const void *src, size_t size)
|
long __weak probe_kernel_read(void *dst, const void *src, size_t size)
|
||||||
__attribute__((alias("__probe_kernel_read")));
|
__attribute__((alias("__probe_kernel_read")));
|
||||||
|
|
||||||
|
long __weak probe_kernel_read_strict(void *dst, const void *src, size_t size)
|
||||||
|
__attribute__((alias("__probe_kernel_read")));
|
||||||
|
|
||||||
long __probe_kernel_read(void *dst, const void *src, size_t size)
|
long __probe_kernel_read(void *dst, const void *src, size_t size)
|
||||||
{
|
{
|
||||||
long ret;
|
long ret;
|
||||||
|
@ -85,6 +106,7 @@ EXPORT_SYMBOL_GPL(probe_user_read);
|
||||||
* Safely write to address @dst from the buffer at @src. If a kernel fault
|
* Safely write to address @dst from the buffer at @src. If a kernel fault
|
||||||
* happens, handle that and return -EFAULT.
|
* happens, handle that and return -EFAULT.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
long __weak probe_kernel_write(void *dst, const void *src, size_t size)
|
long __weak probe_kernel_write(void *dst, const void *src, size_t size)
|
||||||
__attribute__((alias("__probe_kernel_write")));
|
__attribute__((alias("__probe_kernel_write")));
|
||||||
|
|
||||||
|
@ -94,15 +116,39 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
|
||||||
mm_segment_t old_fs = get_fs();
|
mm_segment_t old_fs = get_fs();
|
||||||
|
|
||||||
set_fs(KERNEL_DS);
|
set_fs(KERNEL_DS);
|
||||||
pagefault_disable();
|
ret = probe_write_common((__force void __user *)dst, src, size);
|
||||||
ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
|
|
||||||
pagefault_enable();
|
|
||||||
set_fs(old_fs);
|
set_fs(old_fs);
|
||||||
|
|
||||||
return ret ? -EFAULT : 0;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(probe_kernel_write);
|
EXPORT_SYMBOL_GPL(probe_kernel_write);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* probe_user_write(): safely attempt to write to a user-space location
|
||||||
|
* @dst: address to write to
|
||||||
|
* @src: pointer to the data that shall be written
|
||||||
|
* @size: size of the data chunk
|
||||||
|
*
|
||||||
|
* Safely write to address @dst from the buffer at @src. If a kernel fault
|
||||||
|
* happens, handle that and return -EFAULT.
|
||||||
|
*/
|
||||||
|
|
||||||
|
long __weak probe_user_write(void __user *dst, const void *src, size_t size)
|
||||||
|
__attribute__((alias("__probe_user_write")));
|
||||||
|
|
||||||
|
long __probe_user_write(void __user *dst, const void *src, size_t size)
|
||||||
|
{
|
||||||
|
long ret = -EFAULT;
|
||||||
|
mm_segment_t old_fs = get_fs();
|
||||||
|
|
||||||
|
set_fs(USER_DS);
|
||||||
|
if (access_ok(dst, size))
|
||||||
|
ret = probe_write_common(dst, src, size);
|
||||||
|
set_fs(old_fs);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(probe_user_write);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
|
* strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
|
||||||
|
@ -120,8 +166,22 @@ EXPORT_SYMBOL_GPL(probe_kernel_write);
|
||||||
*
|
*
|
||||||
* If @count is smaller than the length of the string, copies @count-1 bytes,
|
* If @count is smaller than the length of the string, copies @count-1 bytes,
|
||||||
* sets the last byte of @dst buffer to NUL and returns @count.
|
* sets the last byte of @dst buffer to NUL and returns @count.
|
||||||
|
*
|
||||||
|
* strncpy_from_unsafe_strict() is the same as strncpy_from_unsafe() except
|
||||||
|
* for the case where architectures have non-overlapping user and kernel address
|
||||||
|
* ranges: strncpy_from_unsafe_strict() will additionally return -EFAULT for
|
||||||
|
* probing memory on a user address range where strncpy_from_unsafe_user() is
|
||||||
|
* supposed to be used instead.
|
||||||
*/
|
*/
|
||||||
long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
|
|
||||||
|
long __weak strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
|
||||||
|
__attribute__((alias("__strncpy_from_unsafe")));
|
||||||
|
|
||||||
|
long __weak strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
|
||||||
|
long count)
|
||||||
|
__attribute__((alias("__strncpy_from_unsafe")));
|
||||||
|
|
||||||
|
long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
|
||||||
{
|
{
|
||||||
mm_segment_t old_fs = get_fs();
|
mm_segment_t old_fs = get_fs();
|
||||||
const void *src = unsafe_addr;
|
const void *src = unsafe_addr;
|
||||||
|
|
|
@ -196,7 +196,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||||
{
|
{
|
||||||
u32 len;
|
u32 len;
|
||||||
|
|
||||||
|
@ -212,7 +212,7 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||||
__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
|
__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
void xsk_flush(struct xdp_sock *xs)
|
static void xsk_flush(struct xdp_sock *xs)
|
||||||
{
|
{
|
||||||
xskq_produce_flush_desc(xs->rx);
|
xskq_produce_flush_desc(xs->rx);
|
||||||
xs->sk.sk_data_ready(&xs->sk);
|
xs->sk.sk_data_ready(&xs->sk);
|
||||||
|
@ -264,6 +264,35 @@ out_unlock:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||||
|
struct xdp_sock *xs)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = xsk_rcv(xs, xdp);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (!xs->flush_node.prev)
|
||||||
|
list_add(&xs->flush_node, flush_list);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __xsk_map_flush(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||||
|
struct xdp_sock *xs, *tmp;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
|
||||||
|
xsk_flush(xs);
|
||||||
|
__list_del_clearprev(&xs->flush_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
|
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
|
||||||
{
|
{
|
||||||
xskq_produce_flush_addr_n(umem->cq, nb_entries);
|
xskq_produce_flush_addr_n(umem->cq, nb_entries);
|
||||||
|
|
|
@ -181,8 +181,8 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
|
||||||
if (addrlen != sizeof(*in6))
|
if (addrlen != sizeof(*in6))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
|
ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
|
||||||
&in6->sin6_addr);
|
&in6->sin6_addr);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
|
|
|
@ -118,7 +118,7 @@ int trace_sys_connect(struct pt_regs *ctx)
|
||||||
if (addrlen != sizeof(*in6))
|
if (addrlen != sizeof(*in6))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
|
ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
inline_ret = ret;
|
inline_ret = ret;
|
||||||
goto done;
|
goto done;
|
||||||
|
@ -129,7 +129,7 @@ int trace_sys_connect(struct pt_regs *ctx)
|
||||||
|
|
||||||
test_case = dst6[7];
|
test_case = dst6[7];
|
||||||
|
|
||||||
ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
|
ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
inline_ret = ret;
|
inline_ret = ret;
|
||||||
goto done;
|
goto done;
|
||||||
|
|
|
@ -37,7 +37,7 @@ int bpf_prog1(struct pt_regs *ctx)
|
||||||
if (sockaddr_len > sizeof(orig_addr))
|
if (sockaddr_len > sizeof(orig_addr))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
|
if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
|
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
|
||||||
|
|
|
@ -173,6 +173,7 @@ enum bpf_prog_type {
|
||||||
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
||||||
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||||
BPF_PROG_TYPE_CGROUP_SOCKOPT,
|
BPF_PROG_TYPE_CGROUP_SOCKOPT,
|
||||||
|
BPF_PROG_TYPE_TRACING,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_attach_type {
|
enum bpf_attach_type {
|
||||||
|
@ -199,6 +200,7 @@ enum bpf_attach_type {
|
||||||
BPF_CGROUP_UDP6_RECVMSG,
|
BPF_CGROUP_UDP6_RECVMSG,
|
||||||
BPF_CGROUP_GETSOCKOPT,
|
BPF_CGROUP_GETSOCKOPT,
|
||||||
BPF_CGROUP_SETSOCKOPT,
|
BPF_CGROUP_SETSOCKOPT,
|
||||||
|
BPF_TRACE_RAW_TP,
|
||||||
__MAX_BPF_ATTACH_TYPE
|
__MAX_BPF_ATTACH_TYPE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -561,10 +563,13 @@ union bpf_attr {
|
||||||
* Return
|
* Return
|
||||||
* 0 on success, or a negative error in case of failure.
|
* 0 on success, or a negative error in case of failure.
|
||||||
*
|
*
|
||||||
* int bpf_probe_read(void *dst, u32 size, const void *src)
|
* int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
* Description
|
* Description
|
||||||
* For tracing programs, safely attempt to read *size* bytes from
|
* For tracing programs, safely attempt to read *size* bytes from
|
||||||
* address *src* and store the data in *dst*.
|
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
||||||
|
*
|
||||||
|
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
||||||
|
* instead.
|
||||||
* Return
|
* Return
|
||||||
* 0 on success, or a negative error in case of failure.
|
* 0 on success, or a negative error in case of failure.
|
||||||
*
|
*
|
||||||
|
@ -1426,45 +1431,14 @@ union bpf_attr {
|
||||||
* Return
|
* Return
|
||||||
* 0 on success, or a negative error in case of failure.
|
* 0 on success, or a negative error in case of failure.
|
||||||
*
|
*
|
||||||
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
|
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
* Description
|
* Description
|
||||||
* Copy a NUL terminated string from an unsafe address
|
* Copy a NUL terminated string from an unsafe kernel address
|
||||||
* *unsafe_ptr* to *dst*. The *size* should include the
|
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
||||||
* terminating NUL byte. In case the string length is smaller than
|
* more details.
|
||||||
* *size*, the target is not padded with further NUL bytes. If the
|
|
||||||
* string length is larger than *size*, just *size*-1 bytes are
|
|
||||||
* copied and the last byte is set to NUL.
|
|
||||||
*
|
*
|
||||||
* On success, the length of the copied string is returned. This
|
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
||||||
* makes this helper useful in tracing programs for reading
|
* instead.
|
||||||
* strings, and more importantly to get its length at runtime. See
|
|
||||||
* the following snippet:
|
|
||||||
*
|
|
||||||
* ::
|
|
||||||
*
|
|
||||||
* SEC("kprobe/sys_open")
|
|
||||||
* void bpf_sys_open(struct pt_regs *ctx)
|
|
||||||
* {
|
|
||||||
* char buf[PATHLEN]; // PATHLEN is defined to 256
|
|
||||||
* int res = bpf_probe_read_str(buf, sizeof(buf),
|
|
||||||
* ctx->di);
|
|
||||||
*
|
|
||||||
* // Consume buf, for example push it to
|
|
||||||
* // userspace via bpf_perf_event_output(); we
|
|
||||||
* // can use res (the string length) as event
|
|
||||||
* // size, after checking its boundaries.
|
|
||||||
* }
|
|
||||||
*
|
|
||||||
* In comparison, using **bpf_probe_read()** helper here instead
|
|
||||||
* to read the string would require to estimate the length at
|
|
||||||
* compile time, and would often result in copying more memory
|
|
||||||
* than necessary.
|
|
||||||
*
|
|
||||||
* Another useful use case is when parsing individual process
|
|
||||||
* arguments or individual environment variables navigating
|
|
||||||
* *current*\ **->mm->arg_start** and *current*\
|
|
||||||
* **->mm->env_start**: using this helper and the return value,
|
|
||||||
* one can quickly iterate at the right offset of the memory area.
|
|
||||||
* Return
|
* Return
|
||||||
* On success, the strictly positive length of the string,
|
* On success, the strictly positive length of the string,
|
||||||
* including the trailing NUL character. On error, a negative
|
* including the trailing NUL character. On error, a negative
|
||||||
|
@ -2775,6 +2749,72 @@ union bpf_attr {
|
||||||
* restricted to raw_tracepoint bpf programs.
|
* restricted to raw_tracepoint bpf programs.
|
||||||
* Return
|
* Return
|
||||||
* 0 on success, or a negative error in case of failure.
|
* 0 on success, or a negative error in case of failure.
|
||||||
|
*
|
||||||
|
* int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
|
* Description
|
||||||
|
* Safely attempt to read *size* bytes from user space address
|
||||||
|
* *unsafe_ptr* and store the data in *dst*.
|
||||||
|
* Return
|
||||||
|
* 0 on success, or a negative error in case of failure.
|
||||||
|
*
|
||||||
|
* int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
|
* Description
|
||||||
|
* Safely attempt to read *size* bytes from kernel space address
|
||||||
|
* *unsafe_ptr* and store the data in *dst*.
|
||||||
|
* Return
|
||||||
|
* 0 on success, or a negative error in case of failure.
|
||||||
|
*
|
||||||
|
* int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
|
* Description
|
||||||
|
* Copy a NUL terminated string from an unsafe user address
|
||||||
|
* *unsafe_ptr* to *dst*. The *size* should include the
|
||||||
|
* terminating NUL byte. In case the string length is smaller than
|
||||||
|
* *size*, the target is not padded with further NUL bytes. If the
|
||||||
|
* string length is larger than *size*, just *size*-1 bytes are
|
||||||
|
* copied and the last byte is set to NUL.
|
||||||
|
*
|
||||||
|
* On success, the length of the copied string is returned. This
|
||||||
|
* makes this helper useful in tracing programs for reading
|
||||||
|
* strings, and more importantly to get its length at runtime. See
|
||||||
|
* the following snippet:
|
||||||
|
*
|
||||||
|
* ::
|
||||||
|
*
|
||||||
|
* SEC("kprobe/sys_open")
|
||||||
|
* void bpf_sys_open(struct pt_regs *ctx)
|
||||||
|
* {
|
||||||
|
* char buf[PATHLEN]; // PATHLEN is defined to 256
|
||||||
|
* int res = bpf_probe_read_user_str(buf, sizeof(buf),
|
||||||
|
* ctx->di);
|
||||||
|
*
|
||||||
|
* // Consume buf, for example push it to
|
||||||
|
* // userspace via bpf_perf_event_output(); we
|
||||||
|
* // can use res (the string length) as event
|
||||||
|
* // size, after checking its boundaries.
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* In comparison, using **bpf_probe_read_user()** helper here
|
||||||
|
* instead to read the string would require to estimate the length
|
||||||
|
* at compile time, and would often result in copying more memory
|
||||||
|
* than necessary.
|
||||||
|
*
|
||||||
|
* Another useful use case is when parsing individual process
|
||||||
|
* arguments or individual environment variables navigating
|
||||||
|
* *current*\ **->mm->arg_start** and *current*\
|
||||||
|
* **->mm->env_start**: using this helper and the return value,
|
||||||
|
* one can quickly iterate at the right offset of the memory area.
|
||||||
|
* Return
|
||||||
|
* On success, the strictly positive length of the string,
|
||||||
|
* including the trailing NUL character. On error, a negative
|
||||||
|
* value.
|
||||||
|
*
|
||||||
|
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||||
|
* Description
|
||||||
|
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
||||||
|
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
||||||
|
* Return
|
||||||
|
* On success, the strictly positive length of the string, including
|
||||||
|
* the trailing NUL character. On error, a negative value.
|
||||||
*/
|
*/
|
||||||
#define __BPF_FUNC_MAPPER(FN) \
|
#define __BPF_FUNC_MAPPER(FN) \
|
||||||
FN(unspec), \
|
FN(unspec), \
|
||||||
|
@ -2888,7 +2928,11 @@ union bpf_attr {
|
||||||
FN(sk_storage_delete), \
|
FN(sk_storage_delete), \
|
||||||
FN(send_signal), \
|
FN(send_signal), \
|
||||||
FN(tcp_gen_syncookie), \
|
FN(tcp_gen_syncookie), \
|
||||||
FN(skb_output),
|
FN(skb_output), \
|
||||||
|
FN(probe_read_user), \
|
||||||
|
FN(probe_read_kernel), \
|
||||||
|
FN(probe_read_user_str), \
|
||||||
|
FN(probe_read_kernel_str),
|
||||||
|
|
||||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
* function eBPF program intends to call
|
* function eBPF program intends to call
|
||||||
|
|
|
@ -228,9 +228,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
|
||||||
memset(&attr, 0, sizeof(attr));
|
memset(&attr, 0, sizeof(attr));
|
||||||
attr.prog_type = load_attr->prog_type;
|
attr.prog_type = load_attr->prog_type;
|
||||||
attr.expected_attach_type = load_attr->expected_attach_type;
|
attr.expected_attach_type = load_attr->expected_attach_type;
|
||||||
if (attr.prog_type == BPF_PROG_TYPE_RAW_TRACEPOINT)
|
if (attr.prog_type == BPF_PROG_TYPE_TRACING)
|
||||||
/* expected_attach_type is ignored for tracing progs */
|
attr.attach_btf_id = load_attr->attach_btf_id;
|
||||||
attr.attach_btf_id = attr.expected_attach_type;
|
else
|
||||||
|
attr.prog_ifindex = load_attr->prog_ifindex;
|
||||||
attr.insn_cnt = (__u32)load_attr->insns_cnt;
|
attr.insn_cnt = (__u32)load_attr->insns_cnt;
|
||||||
attr.insns = ptr_to_u64(load_attr->insns);
|
attr.insns = ptr_to_u64(load_attr->insns);
|
||||||
attr.license = ptr_to_u64(load_attr->license);
|
attr.license = ptr_to_u64(load_attr->license);
|
||||||
|
@ -245,7 +246,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
|
||||||
}
|
}
|
||||||
|
|
||||||
attr.kern_version = load_attr->kern_version;
|
attr.kern_version = load_attr->kern_version;
|
||||||
attr.prog_ifindex = load_attr->prog_ifindex;
|
|
||||||
attr.prog_btf_fd = load_attr->prog_btf_fd;
|
attr.prog_btf_fd = load_attr->prog_btf_fd;
|
||||||
attr.func_info_rec_size = load_attr->func_info_rec_size;
|
attr.func_info_rec_size = load_attr->func_info_rec_size;
|
||||||
attr.func_info_cnt = load_attr->func_info_cnt;
|
attr.func_info_cnt = load_attr->func_info_cnt;
|
||||||
|
|
|
@ -78,7 +78,10 @@ struct bpf_load_program_attr {
|
||||||
size_t insns_cnt;
|
size_t insns_cnt;
|
||||||
const char *license;
|
const char *license;
|
||||||
__u32 kern_version;
|
__u32 kern_version;
|
||||||
__u32 prog_ifindex;
|
union {
|
||||||
|
__u32 prog_ifindex;
|
||||||
|
__u32 attach_btf_id;
|
||||||
|
};
|
||||||
__u32 prog_btf_fd;
|
__u32 prog_btf_fd;
|
||||||
__u32 func_info_rec_size;
|
__u32 func_info_rec_size;
|
||||||
const void *func_info;
|
const void *func_info;
|
||||||
|
|
|
@ -38,4 +38,10 @@ struct bpf_map_def {
|
||||||
unsigned int map_flags;
|
unsigned int map_flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum libbpf_pin_type {
|
||||||
|
LIBBPF_PIN_NONE,
|
||||||
|
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
|
||||||
|
LIBBPF_PIN_BY_NAME,
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -188,6 +188,7 @@ struct bpf_program {
|
||||||
bpf_program_clear_priv_t clear_priv;
|
bpf_program_clear_priv_t clear_priv;
|
||||||
|
|
||||||
enum bpf_attach_type expected_attach_type;
|
enum bpf_attach_type expected_attach_type;
|
||||||
|
__u32 attach_btf_id;
|
||||||
void *func_info;
|
void *func_info;
|
||||||
__u32 func_info_rec_size;
|
__u32 func_info_rec_size;
|
||||||
__u32 func_info_cnt;
|
__u32 func_info_cnt;
|
||||||
|
@ -226,6 +227,8 @@ struct bpf_map {
|
||||||
void *priv;
|
void *priv;
|
||||||
bpf_map_clear_priv_t clear_priv;
|
bpf_map_clear_priv_t clear_priv;
|
||||||
enum libbpf_map_type libbpf_type;
|
enum libbpf_map_type libbpf_type;
|
||||||
|
char *pin_path;
|
||||||
|
bool pinned;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_secdata {
|
struct bpf_secdata {
|
||||||
|
@ -1090,10 +1093,32 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int build_map_pin_path(struct bpf_map *map, const char *path)
|
||||||
|
{
|
||||||
|
char buf[PATH_MAX];
|
||||||
|
int err, len;
|
||||||
|
|
||||||
|
if (!path)
|
||||||
|
path = "/sys/fs/bpf";
|
||||||
|
|
||||||
|
len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
|
||||||
|
if (len < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
else if (len >= PATH_MAX)
|
||||||
|
return -ENAMETOOLONG;
|
||||||
|
|
||||||
|
err = bpf_map__set_pin_path(map, buf);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
|
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
|
||||||
const struct btf_type *sec,
|
const struct btf_type *sec,
|
||||||
int var_idx, int sec_idx,
|
int var_idx, int sec_idx,
|
||||||
const Elf_Data *data, bool strict)
|
const Elf_Data *data, bool strict,
|
||||||
|
const char *pin_root_path)
|
||||||
{
|
{
|
||||||
const struct btf_type *var, *def, *t;
|
const struct btf_type *var, *def, *t;
|
||||||
const struct btf_var_secinfo *vi;
|
const struct btf_var_secinfo *vi;
|
||||||
|
@ -1268,6 +1293,30 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
|
||||||
}
|
}
|
||||||
map->def.value_size = sz;
|
map->def.value_size = sz;
|
||||||
map->btf_value_type_id = t->type;
|
map->btf_value_type_id = t->type;
|
||||||
|
} else if (strcmp(name, "pinning") == 0) {
|
||||||
|
__u32 val;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (!get_map_field_int(map_name, obj->btf, def, m,
|
||||||
|
&val))
|
||||||
|
return -EINVAL;
|
||||||
|
pr_debug("map '%s': found pinning = %u.\n",
|
||||||
|
map_name, val);
|
||||||
|
|
||||||
|
if (val != LIBBPF_PIN_NONE &&
|
||||||
|
val != LIBBPF_PIN_BY_NAME) {
|
||||||
|
pr_warn("map '%s': invalid pinning value %u.\n",
|
||||||
|
map_name, val);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
if (val == LIBBPF_PIN_BY_NAME) {
|
||||||
|
err = build_map_pin_path(map, pin_root_path);
|
||||||
|
if (err) {
|
||||||
|
pr_warn("map '%s': couldn't build pin path.\n",
|
||||||
|
map_name);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (strict) {
|
if (strict) {
|
||||||
pr_warn("map '%s': unknown field '%s'.\n",
|
pr_warn("map '%s': unknown field '%s'.\n",
|
||||||
|
@ -1287,7 +1336,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
|
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
|
||||||
|
const char *pin_root_path)
|
||||||
{
|
{
|
||||||
const struct btf_type *sec = NULL;
|
const struct btf_type *sec = NULL;
|
||||||
int nr_types, i, vlen, err;
|
int nr_types, i, vlen, err;
|
||||||
|
@ -1329,7 +1379,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
|
||||||
for (i = 0; i < vlen; i++) {
|
for (i = 0; i < vlen; i++) {
|
||||||
err = bpf_object__init_user_btf_map(obj, sec, i,
|
err = bpf_object__init_user_btf_map(obj, sec, i,
|
||||||
obj->efile.btf_maps_shndx,
|
obj->efile.btf_maps_shndx,
|
||||||
data, strict);
|
data, strict, pin_root_path);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -1337,7 +1387,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps)
|
static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps,
|
||||||
|
const char *pin_root_path)
|
||||||
{
|
{
|
||||||
bool strict = !relaxed_maps;
|
bool strict = !relaxed_maps;
|
||||||
int err;
|
int err;
|
||||||
|
@ -1346,7 +1397,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps)
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
err = bpf_object__init_user_btf_maps(obj, strict);
|
err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
@ -1535,7 +1586,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps)
|
static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
|
||||||
|
const char *pin_root_path)
|
||||||
{
|
{
|
||||||
Elf *elf = obj->efile.elf;
|
Elf *elf = obj->efile.elf;
|
||||||
GElf_Ehdr *ep = &obj->efile.ehdr;
|
GElf_Ehdr *ep = &obj->efile.ehdr;
|
||||||
|
@ -1664,13 +1716,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
|
if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
|
||||||
pr_warn("Corrupted ELF file: index of strtab invalid\n");
|
pr_warn("Corrupted ELF file: index of strtab invalid\n");
|
||||||
return -LIBBPF_ERRNO__FORMAT;
|
return -LIBBPF_ERRNO__FORMAT;
|
||||||
}
|
}
|
||||||
err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
|
err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
|
||||||
if (!err)
|
if (!err)
|
||||||
err = bpf_object__init_maps(obj, relaxed_maps);
|
err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path);
|
||||||
if (!err)
|
if (!err)
|
||||||
err = bpf_object__sanitize_and_load_btf(obj);
|
err = bpf_object__sanitize_and_load_btf(obj);
|
||||||
if (!err)
|
if (!err)
|
||||||
|
@ -1916,16 +1968,22 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
|
||||||
return -errno;
|
return -errno;
|
||||||
|
|
||||||
new_fd = open("/", O_RDONLY | O_CLOEXEC);
|
new_fd = open("/", O_RDONLY | O_CLOEXEC);
|
||||||
if (new_fd < 0)
|
if (new_fd < 0) {
|
||||||
|
err = -errno;
|
||||||
goto err_free_new_name;
|
goto err_free_new_name;
|
||||||
|
}
|
||||||
|
|
||||||
new_fd = dup3(fd, new_fd, O_CLOEXEC);
|
new_fd = dup3(fd, new_fd, O_CLOEXEC);
|
||||||
if (new_fd < 0)
|
if (new_fd < 0) {
|
||||||
|
err = -errno;
|
||||||
goto err_close_new_fd;
|
goto err_close_new_fd;
|
||||||
|
}
|
||||||
|
|
||||||
err = zclose(map->fd);
|
err = zclose(map->fd);
|
||||||
if (err)
|
if (err) {
|
||||||
|
err = -errno;
|
||||||
goto err_close_new_fd;
|
goto err_close_new_fd;
|
||||||
|
}
|
||||||
free(map->name);
|
free(map->name);
|
||||||
|
|
||||||
map->fd = new_fd;
|
map->fd = new_fd;
|
||||||
|
@ -1944,7 +2002,7 @@ err_close_new_fd:
|
||||||
close(new_fd);
|
close(new_fd);
|
||||||
err_free_new_name:
|
err_free_new_name:
|
||||||
free(new_name);
|
free(new_name);
|
||||||
return -errno;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
|
int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
|
||||||
|
@ -2120,6 +2178,66 @@ bpf_object__probe_caps(struct bpf_object *obj)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
|
||||||
|
{
|
||||||
|
struct bpf_map_info map_info = {};
|
||||||
|
char msg[STRERR_BUFSIZE];
|
||||||
|
__u32 map_info_len;
|
||||||
|
|
||||||
|
map_info_len = sizeof(map_info);
|
||||||
|
|
||||||
|
if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
|
||||||
|
pr_warn("failed to get map info for map FD %d: %s\n",
|
||||||
|
map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (map_info.type == map->def.type &&
|
||||||
|
map_info.key_size == map->def.key_size &&
|
||||||
|
map_info.value_size == map->def.value_size &&
|
||||||
|
map_info.max_entries == map->def.max_entries &&
|
||||||
|
map_info.map_flags == map->def.map_flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
bpf_object__reuse_map(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
char *cp, errmsg[STRERR_BUFSIZE];
|
||||||
|
int err, pin_fd;
|
||||||
|
|
||||||
|
pin_fd = bpf_obj_get(map->pin_path);
|
||||||
|
if (pin_fd < 0) {
|
||||||
|
err = -errno;
|
||||||
|
if (err == -ENOENT) {
|
||||||
|
pr_debug("found no pinned map to reuse at '%s'\n",
|
||||||
|
map->pin_path);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
|
||||||
|
pr_warn("couldn't retrieve pinned map '%s': %s\n",
|
||||||
|
map->pin_path, cp);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!map_is_reuse_compat(map, pin_fd)) {
|
||||||
|
pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
|
||||||
|
map->pin_path);
|
||||||
|
close(pin_fd);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = bpf_map__reuse_fd(map, pin_fd);
|
||||||
|
if (err) {
|
||||||
|
close(pin_fd);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
map->pinned = true;
|
||||||
|
pr_debug("reused pinned map at '%s'\n", map->pin_path);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
|
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
|
||||||
{
|
{
|
||||||
|
@ -2162,6 +2280,15 @@ bpf_object__create_maps(struct bpf_object *obj)
|
||||||
char *cp, errmsg[STRERR_BUFSIZE];
|
char *cp, errmsg[STRERR_BUFSIZE];
|
||||||
int *pfd = &map->fd;
|
int *pfd = &map->fd;
|
||||||
|
|
||||||
|
if (map->pin_path) {
|
||||||
|
err = bpf_object__reuse_map(map);
|
||||||
|
if (err) {
|
||||||
|
pr_warn("error reusing pinned map %s\n",
|
||||||
|
map->name);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (map->fd >= 0) {
|
if (map->fd >= 0) {
|
||||||
pr_debug("skip map create (preset) %s: fd=%d\n",
|
pr_debug("skip map create (preset) %s: fd=%d\n",
|
||||||
map->name, map->fd);
|
map->name, map->fd);
|
||||||
|
@ -2240,6 +2367,15 @@ err_out:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (map->pin_path && !map->pinned) {
|
||||||
|
err = bpf_map__pin(map, NULL);
|
||||||
|
if (err) {
|
||||||
|
pr_warn("failed to auto-pin map name '%s' at '%s'\n",
|
||||||
|
map->name, map->pin_path);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pr_debug("created map %s: fd=%d\n", map->name, *pfd);
|
pr_debug("created map %s: fd=%d\n", map->name, *pfd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3446,6 +3582,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
|
||||||
load_attr.line_info_cnt = prog->line_info_cnt;
|
load_attr.line_info_cnt = prog->line_info_cnt;
|
||||||
load_attr.log_level = prog->log_level;
|
load_attr.log_level = prog->log_level;
|
||||||
load_attr.prog_flags = prog->prog_flags;
|
load_attr.prog_flags = prog->prog_flags;
|
||||||
|
load_attr.attach_btf_id = prog->attach_btf_id;
|
||||||
|
|
||||||
retry_load:
|
retry_load:
|
||||||
log_buf = malloc(log_buf_size);
|
log_buf = malloc(log_buf_size);
|
||||||
|
@ -3607,10 +3744,13 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id);
|
||||||
|
|
||||||
static struct bpf_object *
|
static struct bpf_object *
|
||||||
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
|
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
|
||||||
struct bpf_object_open_opts *opts)
|
struct bpf_object_open_opts *opts)
|
||||||
{
|
{
|
||||||
|
const char *pin_root_path;
|
||||||
struct bpf_program *prog;
|
struct bpf_program *prog;
|
||||||
struct bpf_object *obj;
|
struct bpf_object *obj;
|
||||||
const char *obj_name;
|
const char *obj_name;
|
||||||
|
@ -3645,17 +3785,20 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
|
||||||
|
|
||||||
obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
|
obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
|
||||||
relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
|
relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
|
||||||
|
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
|
||||||
|
|
||||||
CHECK_ERR(bpf_object__elf_init(obj), err, out);
|
CHECK_ERR(bpf_object__elf_init(obj), err, out);
|
||||||
CHECK_ERR(bpf_object__check_endianness(obj), err, out);
|
CHECK_ERR(bpf_object__check_endianness(obj), err, out);
|
||||||
CHECK_ERR(bpf_object__probe_caps(obj), err, out);
|
CHECK_ERR(bpf_object__probe_caps(obj), err, out);
|
||||||
CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps), err, out);
|
CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path),
|
||||||
|
err, out);
|
||||||
CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
|
CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
|
||||||
bpf_object__elf_finish(obj);
|
bpf_object__elf_finish(obj);
|
||||||
|
|
||||||
bpf_object__for_each_program(prog, obj) {
|
bpf_object__for_each_program(prog, obj) {
|
||||||
enum bpf_prog_type prog_type;
|
enum bpf_prog_type prog_type;
|
||||||
enum bpf_attach_type attach_type;
|
enum bpf_attach_type attach_type;
|
||||||
|
__u32 btf_id;
|
||||||
|
|
||||||
err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
|
err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
|
||||||
&attach_type);
|
&attach_type);
|
||||||
|
@ -3667,6 +3810,12 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
|
||||||
|
|
||||||
bpf_program__set_type(prog, prog_type);
|
bpf_program__set_type(prog, prog_type);
|
||||||
bpf_program__set_expected_attach_type(prog, attach_type);
|
bpf_program__set_expected_attach_type(prog, attach_type);
|
||||||
|
if (prog_type == BPF_PROG_TYPE_TRACING) {
|
||||||
|
err = libbpf_attach_btf_id_by_name(prog->section_name, &btf_id);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
prog->attach_btf_id = btf_id;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return obj;
|
return obj;
|
||||||
|
@ -3797,6 +3946,28 @@ int bpf_object__load(struct bpf_object *obj)
|
||||||
return bpf_object__load_xattr(&attr);
|
return bpf_object__load_xattr(&attr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int make_parent_dir(const char *path)
|
||||||
|
{
|
||||||
|
char *cp, errmsg[STRERR_BUFSIZE];
|
||||||
|
char *dname, *dir;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
dname = strdup(path);
|
||||||
|
if (dname == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
dir = dirname(dname);
|
||||||
|
if (mkdir(dir, 0700) && errno != EEXIST)
|
||||||
|
err = -errno;
|
||||||
|
|
||||||
|
free(dname);
|
||||||
|
if (err) {
|
||||||
|
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
|
||||||
|
pr_warn("failed to mkdir %s: %s\n", path, cp);
|
||||||
|
}
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
static int check_path(const char *path)
|
static int check_path(const char *path)
|
||||||
{
|
{
|
||||||
char *cp, errmsg[STRERR_BUFSIZE];
|
char *cp, errmsg[STRERR_BUFSIZE];
|
||||||
|
@ -3833,6 +4004,10 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
|
||||||
char *cp, errmsg[STRERR_BUFSIZE];
|
char *cp, errmsg[STRERR_BUFSIZE];
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
err = make_parent_dir(path);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
err = check_path(path);
|
err = check_path(path);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
@ -3886,25 +4061,14 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int make_dir(const char *path)
|
|
||||||
{
|
|
||||||
char *cp, errmsg[STRERR_BUFSIZE];
|
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
if (mkdir(path, 0700) && errno != EEXIST)
|
|
||||||
err = -errno;
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
|
|
||||||
pr_warn("failed to mkdir %s: %s\n", path, cp);
|
|
||||||
}
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bpf_program__pin(struct bpf_program *prog, const char *path)
|
int bpf_program__pin(struct bpf_program *prog, const char *path)
|
||||||
{
|
{
|
||||||
int i, err;
|
int i, err;
|
||||||
|
|
||||||
|
err = make_parent_dir(path);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
err = check_path(path);
|
err = check_path(path);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
@ -3925,10 +4089,6 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
|
||||||
return bpf_program__pin_instance(prog, path, 0);
|
return bpf_program__pin_instance(prog, path, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = make_dir(path);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
for (i = 0; i < prog->instances.nr; i++) {
|
for (i = 0; i < prog->instances.nr; i++) {
|
||||||
char buf[PATH_MAX];
|
char buf[PATH_MAX];
|
||||||
int len;
|
int len;
|
||||||
|
@ -4019,47 +4179,123 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
|
||||||
char *cp, errmsg[STRERR_BUFSIZE];
|
char *cp, errmsg[STRERR_BUFSIZE];
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = check_path(path);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
if (map == NULL) {
|
if (map == NULL) {
|
||||||
pr_warn("invalid map pointer\n");
|
pr_warn("invalid map pointer\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bpf_obj_pin(map->fd, path)) {
|
if (map->pin_path) {
|
||||||
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
|
if (path && strcmp(path, map->pin_path)) {
|
||||||
pr_warn("failed to pin map: %s\n", cp);
|
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
|
||||||
return -errno;
|
bpf_map__name(map), map->pin_path, path);
|
||||||
|
return -EINVAL;
|
||||||
|
} else if (map->pinned) {
|
||||||
|
pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
|
||||||
|
bpf_map__name(map), map->pin_path);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!path) {
|
||||||
|
pr_warn("missing a path to pin map '%s' at\n",
|
||||||
|
bpf_map__name(map));
|
||||||
|
return -EINVAL;
|
||||||
|
} else if (map->pinned) {
|
||||||
|
pr_warn("map '%s' already pinned\n", bpf_map__name(map));
|
||||||
|
return -EEXIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
map->pin_path = strdup(path);
|
||||||
|
if (!map->pin_path) {
|
||||||
|
err = -errno;
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_debug("pinned map '%s'\n", path);
|
err = make_parent_dir(map->pin_path);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
err = check_path(map->pin_path);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (bpf_obj_pin(map->fd, map->pin_path)) {
|
||||||
|
err = -errno;
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
|
||||||
|
map->pinned = true;
|
||||||
|
pr_debug("pinned map '%s'\n", map->pin_path);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
out_err:
|
||||||
|
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
|
||||||
|
pr_warn("failed to pin map: %s\n", cp);
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpf_map__unpin(struct bpf_map *map, const char *path)
|
int bpf_map__unpin(struct bpf_map *map, const char *path)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = check_path(path);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
if (map == NULL) {
|
if (map == NULL) {
|
||||||
pr_warn("invalid map pointer\n");
|
pr_warn("invalid map pointer\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (map->pin_path) {
|
||||||
|
if (path && strcmp(path, map->pin_path)) {
|
||||||
|
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
|
||||||
|
bpf_map__name(map), map->pin_path, path);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
path = map->pin_path;
|
||||||
|
} else if (!path) {
|
||||||
|
pr_warn("no path to unpin map '%s' from\n",
|
||||||
|
bpf_map__name(map));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = check_path(path);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
err = unlink(path);
|
err = unlink(path);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
return -errno;
|
return -errno;
|
||||||
pr_debug("unpinned map '%s'\n", path);
|
|
||||||
|
map->pinned = false;
|
||||||
|
pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
|
||||||
|
{
|
||||||
|
char *new = NULL;
|
||||||
|
|
||||||
|
if (path) {
|
||||||
|
new = strdup(path);
|
||||||
|
if (!new)
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(map->pin_path);
|
||||||
|
map->pin_path = new;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *bpf_map__get_pin_path(const struct bpf_map *map)
|
||||||
|
{
|
||||||
|
return map->pin_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool bpf_map__is_pinned(const struct bpf_map *map)
|
||||||
|
{
|
||||||
|
return map->pinned;
|
||||||
|
}
|
||||||
|
|
||||||
int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
|
int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
|
||||||
{
|
{
|
||||||
struct bpf_map *map;
|
struct bpf_map *map;
|
||||||
|
@ -4073,25 +4309,28 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = make_dir(path);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
bpf_object__for_each_map(map, obj) {
|
bpf_object__for_each_map(map, obj) {
|
||||||
|
char *pin_path = NULL;
|
||||||
char buf[PATH_MAX];
|
char buf[PATH_MAX];
|
||||||
int len;
|
|
||||||
|
|
||||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
if (path) {
|
||||||
bpf_map__name(map));
|
int len;
|
||||||
if (len < 0) {
|
|
||||||
err = -EINVAL;
|
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||||
goto err_unpin_maps;
|
bpf_map__name(map));
|
||||||
} else if (len >= PATH_MAX) {
|
if (len < 0) {
|
||||||
err = -ENAMETOOLONG;
|
err = -EINVAL;
|
||||||
goto err_unpin_maps;
|
goto err_unpin_maps;
|
||||||
|
} else if (len >= PATH_MAX) {
|
||||||
|
err = -ENAMETOOLONG;
|
||||||
|
goto err_unpin_maps;
|
||||||
|
}
|
||||||
|
pin_path = buf;
|
||||||
|
} else if (!map->pin_path) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = bpf_map__pin(map, buf);
|
err = bpf_map__pin(map, pin_path);
|
||||||
if (err)
|
if (err)
|
||||||
goto err_unpin_maps;
|
goto err_unpin_maps;
|
||||||
}
|
}
|
||||||
|
@ -4100,17 +4339,10 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
|
||||||
|
|
||||||
err_unpin_maps:
|
err_unpin_maps:
|
||||||
while ((map = bpf_map__prev(map, obj))) {
|
while ((map = bpf_map__prev(map, obj))) {
|
||||||
char buf[PATH_MAX];
|
if (!map->pin_path)
|
||||||
int len;
|
|
||||||
|
|
||||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
|
||||||
bpf_map__name(map));
|
|
||||||
if (len < 0)
|
|
||||||
continue;
|
|
||||||
else if (len >= PATH_MAX)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bpf_map__unpin(map, buf);
|
bpf_map__unpin(map, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
|
@ -4125,17 +4357,24 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
|
||||||
bpf_object__for_each_map(map, obj) {
|
bpf_object__for_each_map(map, obj) {
|
||||||
|
char *pin_path = NULL;
|
||||||
char buf[PATH_MAX];
|
char buf[PATH_MAX];
|
||||||
int len;
|
|
||||||
|
|
||||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
if (path) {
|
||||||
bpf_map__name(map));
|
int len;
|
||||||
if (len < 0)
|
|
||||||
return -EINVAL;
|
|
||||||
else if (len >= PATH_MAX)
|
|
||||||
return -ENAMETOOLONG;
|
|
||||||
|
|
||||||
err = bpf_map__unpin(map, buf);
|
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||||
|
bpf_map__name(map));
|
||||||
|
if (len < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
else if (len >= PATH_MAX)
|
||||||
|
return -ENAMETOOLONG;
|
||||||
|
pin_path = buf;
|
||||||
|
} else if (!map->pin_path) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = bpf_map__unpin(map, pin_path);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -4156,10 +4395,6 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = make_dir(path);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
bpf_object__for_each_program(prog, obj) {
|
bpf_object__for_each_program(prog, obj) {
|
||||||
char buf[PATH_MAX];
|
char buf[PATH_MAX];
|
||||||
int len;
|
int len;
|
||||||
|
@ -4260,6 +4495,7 @@ void bpf_object__close(struct bpf_object *obj)
|
||||||
|
|
||||||
for (i = 0; i < obj->nr_maps; i++) {
|
for (i = 0; i < obj->nr_maps; i++) {
|
||||||
zfree(&obj->maps[i].name);
|
zfree(&obj->maps[i].name);
|
||||||
|
zfree(&obj->maps[i].pin_path);
|
||||||
if (obj->maps[i].clear_priv)
|
if (obj->maps[i].clear_priv)
|
||||||
obj->maps[i].clear_priv(&obj->maps[i],
|
obj->maps[i].clear_priv(&obj->maps[i],
|
||||||
obj->maps[i].priv);
|
obj->maps[i].priv);
|
||||||
|
@ -4518,6 +4754,7 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
|
||||||
BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
|
BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
|
||||||
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
|
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
|
||||||
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
|
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
|
||||||
|
BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
|
||||||
|
|
||||||
enum bpf_attach_type
|
enum bpf_attach_type
|
||||||
bpf_program__get_expected_attach_type(struct bpf_program *prog)
|
bpf_program__get_expected_attach_type(struct bpf_program *prog)
|
||||||
|
@ -4546,7 +4783,8 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
|
||||||
BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
|
BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
|
||||||
|
|
||||||
/* Programs that use BTF to identify attach point */
|
/* Programs that use BTF to identify attach point */
|
||||||
#define BPF_PROG_BTF(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 1, 0)
|
#define BPF_PROG_BTF(string, ptype, eatype) \
|
||||||
|
BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)
|
||||||
|
|
||||||
/* Programs that can be attached but attach type can't be identified by section
|
/* Programs that can be attached but attach type can't be identified by section
|
||||||
* name. Kept for backward compatibility.
|
* name. Kept for backward compatibility.
|
||||||
|
@ -4573,7 +4811,8 @@ static const struct {
|
||||||
BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT),
|
BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT),
|
||||||
BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
|
BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
|
||||||
BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT),
|
BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT),
|
||||||
BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_RAW_TRACEPOINT),
|
BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING,
|
||||||
|
BPF_TRACE_RAW_TP),
|
||||||
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
|
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
|
||||||
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
|
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
|
||||||
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
|
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
|
||||||
|
@ -4678,27 +4917,6 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
|
||||||
continue;
|
continue;
|
||||||
*prog_type = section_names[i].prog_type;
|
*prog_type = section_names[i].prog_type;
|
||||||
*expected_attach_type = section_names[i].expected_attach_type;
|
*expected_attach_type = section_names[i].expected_attach_type;
|
||||||
if (section_names[i].is_attach_btf) {
|
|
||||||
struct btf *btf = bpf_core_find_kernel_btf();
|
|
||||||
char raw_tp_btf_name[128] = "btf_trace_";
|
|
||||||
char *dst = raw_tp_btf_name + sizeof("btf_trace_") - 1;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (IS_ERR(btf)) {
|
|
||||||
pr_warn("vmlinux BTF is not found\n");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
/* prepend "btf_trace_" prefix per kernel convention */
|
|
||||||
strncat(dst, name + section_names[i].len,
|
|
||||||
sizeof(raw_tp_btf_name) - sizeof("btf_trace_"));
|
|
||||||
ret = btf__find_by_name(btf, raw_tp_btf_name);
|
|
||||||
btf__free(btf);
|
|
||||||
if (ret <= 0) {
|
|
||||||
pr_warn("%s is not found in vmlinux BTF\n", dst);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
*expected_attach_type = ret;
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
pr_warn("failed to guess program type based on ELF section name '%s'\n", name);
|
pr_warn("failed to guess program type based on ELF section name '%s'\n", name);
|
||||||
|
@ -4711,6 +4929,46 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
|
||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define BTF_PREFIX "btf_trace_"
|
||||||
|
static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id)
|
||||||
|
{
|
||||||
|
struct btf *btf = bpf_core_find_kernel_btf();
|
||||||
|
char raw_tp_btf_name[128] = BTF_PREFIX;
|
||||||
|
char *dst = raw_tp_btf_name + sizeof(BTF_PREFIX) - 1;
|
||||||
|
int ret, i, err = -EINVAL;
|
||||||
|
|
||||||
|
if (IS_ERR(btf)) {
|
||||||
|
pr_warn("vmlinux BTF is not found\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!name)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(section_names); i++) {
|
||||||
|
if (!section_names[i].is_attach_btf)
|
||||||
|
continue;
|
||||||
|
if (strncmp(name, section_names[i].sec, section_names[i].len))
|
||||||
|
continue;
|
||||||
|
/* prepend "btf_trace_" prefix per kernel convention */
|
||||||
|
strncat(dst, name + section_names[i].len,
|
||||||
|
sizeof(raw_tp_btf_name) - sizeof(BTF_PREFIX));
|
||||||
|
ret = btf__find_by_name(btf, raw_tp_btf_name);
|
||||||
|
if (ret <= 0) {
|
||||||
|
pr_warn("%s is not found in vmlinux BTF\n", dst);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
*btf_id = ret;
|
||||||
|
err = 0;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
|
||||||
|
err = -ESRCH;
|
||||||
|
out:
|
||||||
|
btf__free(btf);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
int libbpf_attach_type_by_name(const char *name,
|
int libbpf_attach_type_by_name(const char *name,
|
||||||
enum bpf_attach_type *attach_type)
|
enum bpf_attach_type *attach_type)
|
||||||
{
|
{
|
||||||
|
|
|
@ -103,8 +103,13 @@ struct bpf_object_open_opts {
|
||||||
bool relaxed_maps;
|
bool relaxed_maps;
|
||||||
/* process CO-RE relocations non-strictly, allowing them to fail */
|
/* process CO-RE relocations non-strictly, allowing them to fail */
|
||||||
bool relaxed_core_relocs;
|
bool relaxed_core_relocs;
|
||||||
|
/* maps that set the 'pinning' attribute in their definition will have
|
||||||
|
* their pin_path attribute set to a file in this directory, and be
|
||||||
|
* auto-pinned to that path on load; defaults to "/sys/fs/bpf".
|
||||||
|
*/
|
||||||
|
const char *pin_root_path;
|
||||||
};
|
};
|
||||||
#define bpf_object_open_opts__last_field relaxed_core_relocs
|
#define bpf_object_open_opts__last_field pin_root_path
|
||||||
|
|
||||||
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
|
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
|
||||||
LIBBPF_API struct bpf_object *
|
LIBBPF_API struct bpf_object *
|
||||||
|
@ -124,6 +129,17 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
|
||||||
__u32 *size);
|
__u32 *size);
|
||||||
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
|
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
|
||||||
__u32 *off);
|
__u32 *off);
|
||||||
|
|
||||||
|
enum libbpf_pin_type {
|
||||||
|
LIBBPF_PIN_NONE,
|
||||||
|
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
|
||||||
|
LIBBPF_PIN_BY_NAME,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* pin_maps and unpin_maps can both be called with a NULL path, in which case
|
||||||
|
* they will use the pin_path attribute of each map (and ignore all maps that
|
||||||
|
* don't have a pin_path set).
|
||||||
|
*/
|
||||||
LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
|
LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
|
||||||
LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
|
LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
|
||||||
const char *path);
|
const char *path);
|
||||||
|
@ -307,6 +323,7 @@ LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
|
||||||
LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
|
LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
|
||||||
LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
|
LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
|
||||||
LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
|
LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
|
||||||
|
LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
|
||||||
|
|
||||||
LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
|
LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
|
||||||
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
|
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
|
||||||
|
@ -326,6 +343,7 @@ LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
|
||||||
LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
|
LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
|
||||||
LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
|
LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
|
||||||
LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
|
LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
|
||||||
|
LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need for __attribute__((packed)), all members of 'bpf_map_def'
|
* No need for __attribute__((packed)), all members of 'bpf_map_def'
|
||||||
|
@ -385,6 +403,9 @@ LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
|
||||||
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
|
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
|
||||||
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
|
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
|
||||||
LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
|
LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
|
||||||
|
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
|
||||||
|
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
|
||||||
|
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
|
||||||
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
|
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
|
||||||
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
|
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
|
||||||
|
|
||||||
|
|
|
@ -193,8 +193,13 @@ LIBBPF_0.0.5 {
|
||||||
|
|
||||||
LIBBPF_0.0.6 {
|
LIBBPF_0.0.6 {
|
||||||
global:
|
global:
|
||||||
|
bpf_map__get_pin_path;
|
||||||
|
bpf_map__is_pinned;
|
||||||
|
bpf_map__set_pin_path;
|
||||||
bpf_object__open_file;
|
bpf_object__open_file;
|
||||||
bpf_object__open_mem;
|
bpf_object__open_mem;
|
||||||
bpf_program__get_expected_attach_type;
|
bpf_program__get_expected_attach_type;
|
||||||
bpf_program__get_type;
|
bpf_program__get_type;
|
||||||
|
bpf_program__is_tracing;
|
||||||
|
bpf_program__set_tracing;
|
||||||
} LIBBPF_0.0.5;
|
} LIBBPF_0.0.5;
|
||||||
|
|
|
@ -102,6 +102,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
|
||||||
case BPF_PROG_TYPE_FLOW_DISSECTOR:
|
case BPF_PROG_TYPE_FLOW_DISSECTOR:
|
||||||
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
||||||
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||||
|
case BPF_PROG_TYPE_TRACING:
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,6 +73,21 @@ struct xsk_nl_info {
|
||||||
int fd;
|
int fd;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Up until and including Linux 5.3 */
|
||||||
|
struct xdp_ring_offset_v1 {
|
||||||
|
__u64 producer;
|
||||||
|
__u64 consumer;
|
||||||
|
__u64 desc;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Up until and including Linux 5.3 */
|
||||||
|
struct xdp_mmap_offsets_v1 {
|
||||||
|
struct xdp_ring_offset_v1 rx;
|
||||||
|
struct xdp_ring_offset_v1 tx;
|
||||||
|
struct xdp_ring_offset_v1 fr;
|
||||||
|
struct xdp_ring_offset_v1 cr;
|
||||||
|
};
|
||||||
|
|
||||||
int xsk_umem__fd(const struct xsk_umem *umem)
|
int xsk_umem__fd(const struct xsk_umem *umem)
|
||||||
{
|
{
|
||||||
return umem ? umem->fd : -EINVAL;
|
return umem ? umem->fd : -EINVAL;
|
||||||
|
@ -133,6 +148,58 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
|
||||||
|
{
|
||||||
|
struct xdp_mmap_offsets_v1 off_v1;
|
||||||
|
|
||||||
|
/* getsockopt on a kernel <= 5.3 has no flags fields.
|
||||||
|
* Copy over the offsets to the correct places in the >=5.4 format
|
||||||
|
* and put the flags where they would have been on that kernel.
|
||||||
|
*/
|
||||||
|
memcpy(&off_v1, off, sizeof(off_v1));
|
||||||
|
|
||||||
|
off->rx.producer = off_v1.rx.producer;
|
||||||
|
off->rx.consumer = off_v1.rx.consumer;
|
||||||
|
off->rx.desc = off_v1.rx.desc;
|
||||||
|
off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
|
||||||
|
|
||||||
|
off->tx.producer = off_v1.tx.producer;
|
||||||
|
off->tx.consumer = off_v1.tx.consumer;
|
||||||
|
off->tx.desc = off_v1.tx.desc;
|
||||||
|
off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
|
||||||
|
|
||||||
|
off->fr.producer = off_v1.fr.producer;
|
||||||
|
off->fr.consumer = off_v1.fr.consumer;
|
||||||
|
off->fr.desc = off_v1.fr.desc;
|
||||||
|
off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
|
||||||
|
|
||||||
|
off->cr.producer = off_v1.cr.producer;
|
||||||
|
off->cr.consumer = off_v1.cr.consumer;
|
||||||
|
off->cr.desc = off_v1.cr.desc;
|
||||||
|
off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
|
||||||
|
{
|
||||||
|
socklen_t optlen;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
optlen = sizeof(*off);
|
||||||
|
err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (optlen == sizeof(*off))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
|
||||||
|
xsk_mmap_offsets_v1(off);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
|
int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
|
||||||
__u64 size, struct xsk_ring_prod *fill,
|
__u64 size, struct xsk_ring_prod *fill,
|
||||||
struct xsk_ring_cons *comp,
|
struct xsk_ring_cons *comp,
|
||||||
|
@ -141,7 +208,6 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
|
||||||
struct xdp_mmap_offsets off;
|
struct xdp_mmap_offsets off;
|
||||||
struct xdp_umem_reg mr;
|
struct xdp_umem_reg mr;
|
||||||
struct xsk_umem *umem;
|
struct xsk_umem *umem;
|
||||||
socklen_t optlen;
|
|
||||||
void *map;
|
void *map;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
@ -190,8 +256,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
|
||||||
goto out_socket;
|
goto out_socket;
|
||||||
}
|
}
|
||||||
|
|
||||||
optlen = sizeof(off);
|
err = xsk_get_mmap_offsets(umem->fd, &off);
|
||||||
err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
|
|
||||||
if (err) {
|
if (err) {
|
||||||
err = -errno;
|
err = -errno;
|
||||||
goto out_socket;
|
goto out_socket;
|
||||||
|
@ -514,7 +579,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
|
||||||
struct sockaddr_xdp sxdp = {};
|
struct sockaddr_xdp sxdp = {};
|
||||||
struct xdp_mmap_offsets off;
|
struct xdp_mmap_offsets off;
|
||||||
struct xsk_socket *xsk;
|
struct xsk_socket *xsk;
|
||||||
socklen_t optlen;
|
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (!umem || !xsk_ptr || !rx || !tx)
|
if (!umem || !xsk_ptr || !rx || !tx)
|
||||||
|
@ -573,8 +637,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
optlen = sizeof(off);
|
err = xsk_get_mmap_offsets(xsk->fd, &off);
|
||||||
err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
|
|
||||||
if (err) {
|
if (err) {
|
||||||
err = -errno;
|
err = -errno;
|
||||||
goto out_socket;
|
goto out_socket;
|
||||||
|
@ -660,7 +723,6 @@ out_xsk_alloc:
|
||||||
int xsk_umem__delete(struct xsk_umem *umem)
|
int xsk_umem__delete(struct xsk_umem *umem)
|
||||||
{
|
{
|
||||||
struct xdp_mmap_offsets off;
|
struct xdp_mmap_offsets off;
|
||||||
socklen_t optlen;
|
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (!umem)
|
if (!umem)
|
||||||
|
@ -669,8 +731,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
|
||||||
if (umem->refcount)
|
if (umem->refcount)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
optlen = sizeof(off);
|
err = xsk_get_mmap_offsets(umem->fd, &off);
|
||||||
err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
|
|
||||||
if (!err) {
|
if (!err) {
|
||||||
munmap(umem->fill->ring - off.fr.desc,
|
munmap(umem->fill->ring - off.fr.desc,
|
||||||
off.fr.desc + umem->config.fill_size * sizeof(__u64));
|
off.fr.desc + umem->config.fill_size * sizeof(__u64));
|
||||||
|
@ -688,7 +749,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
|
||||||
{
|
{
|
||||||
size_t desc_sz = sizeof(struct xdp_desc);
|
size_t desc_sz = sizeof(struct xdp_desc);
|
||||||
struct xdp_mmap_offsets off;
|
struct xdp_mmap_offsets off;
|
||||||
socklen_t optlen;
|
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (!xsk)
|
if (!xsk)
|
||||||
|
@ -699,8 +759,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
|
||||||
close(xsk->prog_fd);
|
close(xsk->prog_fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
optlen = sizeof(off);
|
err = xsk_get_mmap_offsets(xsk->fd, &off);
|
||||||
err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
|
|
||||||
if (!err) {
|
if (!err) {
|
||||||
if (xsk->rx) {
|
if (xsk->rx) {
|
||||||
munmap(xsk->rx->ring - off.rx.desc,
|
munmap(xsk->rx->ring - off.rx.desc,
|
||||||
|
|
|
@ -89,6 +89,9 @@ $(notdir $(TEST_GEN_PROGS) \
|
||||||
$(OUTPUT)/urandom_read: urandom_read.c
|
$(OUTPUT)/urandom_read: urandom_read.c
|
||||||
$(CC) -o $@ $< -Wl,--build-id
|
$(CC) -o $@ $< -Wl,--build-id
|
||||||
|
|
||||||
|
$(OUTPUT)/test_stub.o: test_stub.c
|
||||||
|
$(CC) -c $(CFLAGS) -o $@ $<
|
||||||
|
|
||||||
BPFOBJ := $(OUTPUT)/libbpf.a
|
BPFOBJ := $(OUTPUT)/libbpf.a
|
||||||
|
|
||||||
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
|
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
|
||||||
|
@ -131,8 +134,13 @@ $(shell $(1) -v -E - </dev/null 2>&1 \
|
||||||
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
|
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
|
||||||
endef
|
endef
|
||||||
|
|
||||||
|
# Determine target endianness.
|
||||||
|
IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
|
||||||
|
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
|
||||||
|
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
|
||||||
|
|
||||||
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
|
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
|
||||||
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \
|
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
|
||||||
-I. -I./include/uapi -I$(APIDIR) \
|
-I. -I./include/uapi -I$(APIDIR) \
|
||||||
-I$(BPFDIR) -I$(abspath $(OUTPUT)/../usr/include)
|
-I$(BPFDIR) -I$(abspath $(OUTPUT)/../usr/include)
|
||||||
|
|
||||||
|
@ -271,12 +279,8 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
|
||||||
|
|
||||||
# Define test_progs BPF-GCC-flavored test runner.
|
# Define test_progs BPF-GCC-flavored test runner.
|
||||||
ifneq ($(BPF_GCC),)
|
ifneq ($(BPF_GCC),)
|
||||||
IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
|
|
||||||
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
|
|
||||||
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
|
|
||||||
|
|
||||||
TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
|
TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
|
||||||
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc) $(MENDIAN)
|
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
|
||||||
TRUNNER_BPF_LDFLAGS :=
|
TRUNNER_BPF_LDFLAGS :=
|
||||||
$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
|
$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
|
||||||
endif
|
endif
|
||||||
|
|
210
tools/testing/selftests/bpf/prog_tests/pinning.c
Normal file
210
tools/testing/selftests/bpf/prog_tests/pinning.c
Normal file
|
@ -0,0 +1,210 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <test_progs.h>
|
||||||
|
|
||||||
|
__u32 get_map_id(struct bpf_object *obj, const char *name)
|
||||||
|
{
|
||||||
|
struct bpf_map_info map_info = {};
|
||||||
|
__u32 map_info_len, duration = 0;
|
||||||
|
struct bpf_map *map;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
map_info_len = sizeof(map_info);
|
||||||
|
|
||||||
|
map = bpf_object__find_map_by_name(obj, name);
|
||||||
|
if (CHECK(!map, "find map", "NULL map"))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err = bpf_obj_get_info_by_fd(bpf_map__fd(map),
|
||||||
|
&map_info, &map_info_len);
|
||||||
|
CHECK(err, "get map info", "err %d errno %d", err, errno);
|
||||||
|
return map_info.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_pinning(void)
|
||||||
|
{
|
||||||
|
const char *file_invalid = "./test_pinning_invalid.o";
|
||||||
|
const char *custpinpath = "/sys/fs/bpf/custom/pinmap";
|
||||||
|
const char *nopinpath = "/sys/fs/bpf/nopinmap";
|
||||||
|
const char *nopinpath2 = "/sys/fs/bpf/nopinmap2";
|
||||||
|
const char *custpath = "/sys/fs/bpf/custom";
|
||||||
|
const char *pinpath = "/sys/fs/bpf/pinmap";
|
||||||
|
const char *file = "./test_pinning.o";
|
||||||
|
__u32 map_id, map_id2, duration = 0;
|
||||||
|
struct stat statbuf = {};
|
||||||
|
struct bpf_object *obj;
|
||||||
|
struct bpf_map *map;
|
||||||
|
int err;
|
||||||
|
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
|
||||||
|
.pin_root_path = custpath,
|
||||||
|
);
|
||||||
|
|
||||||
|
/* check that opening fails with invalid pinning value in map def */
|
||||||
|
obj = bpf_object__open_file(file_invalid, NULL);
|
||||||
|
err = libbpf_get_error(obj);
|
||||||
|
if (CHECK(err != -EINVAL, "invalid open", "err %d errno %d\n", err, errno)) {
|
||||||
|
obj = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* open the valid object file */
|
||||||
|
obj = bpf_object__open_file(file, NULL);
|
||||||
|
err = libbpf_get_error(obj);
|
||||||
|
if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
|
||||||
|
obj = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = bpf_object__load(obj);
|
||||||
|
if (CHECK(err, "default load", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* check that pinmap was pinned */
|
||||||
|
err = stat(pinpath, &statbuf);
|
||||||
|
if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* check that nopinmap was *not* pinned */
|
||||||
|
err = stat(nopinpath, &statbuf);
|
||||||
|
if (CHECK(!err || errno != ENOENT, "stat nopinpath",
|
||||||
|
"err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* check that nopinmap2 was *not* pinned */
|
||||||
|
err = stat(nopinpath2, &statbuf);
|
||||||
|
if (CHECK(!err || errno != ENOENT, "stat nopinpath2",
|
||||||
|
"err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
map_id = get_map_id(obj, "pinmap");
|
||||||
|
if (!map_id)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
bpf_object__close(obj);
|
||||||
|
|
||||||
|
obj = bpf_object__open_file(file, NULL);
|
||||||
|
if (CHECK_FAIL(libbpf_get_error(obj))) {
|
||||||
|
obj = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = bpf_object__load(obj);
|
||||||
|
if (CHECK(err, "default load", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* check that same map ID was reused for second load */
|
||||||
|
map_id2 = get_map_id(obj, "pinmap");
|
||||||
|
if (CHECK(map_id != map_id2, "check reuse",
|
||||||
|
"err %d errno %d id %d id2 %d\n", err, errno, map_id, map_id2))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* should be no-op to re-pin same map */
|
||||||
|
map = bpf_object__find_map_by_name(obj, "pinmap");
|
||||||
|
if (CHECK(!map, "find map", "NULL map"))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = bpf_map__pin(map, NULL);
|
||||||
|
if (CHECK(err, "re-pin map", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* but error to pin at different location */
|
||||||
|
err = bpf_map__pin(map, "/sys/fs/bpf/other");
|
||||||
|
if (CHECK(!err, "pin map different", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* unpin maps with a pin_path set */
|
||||||
|
err = bpf_object__unpin_maps(obj, NULL);
|
||||||
|
if (CHECK(err, "unpin maps", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* and re-pin them... */
|
||||||
|
err = bpf_object__pin_maps(obj, NULL);
|
||||||
|
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* set pinning path of other map and re-pin all */
|
||||||
|
map = bpf_object__find_map_by_name(obj, "nopinmap");
|
||||||
|
if (CHECK(!map, "find map", "NULL map"))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = bpf_map__set_pin_path(map, custpinpath);
|
||||||
|
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* should only pin the one unpinned map */
|
||||||
|
err = bpf_object__pin_maps(obj, NULL);
|
||||||
|
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* check that nopinmap was pinned at the custom path */
|
||||||
|
err = stat(custpinpath, &statbuf);
|
||||||
|
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* remove the custom pin path to re-test it with auto-pinning below */
|
||||||
|
err = unlink(custpinpath);
|
||||||
|
if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = rmdir(custpath);
|
||||||
|
if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
bpf_object__close(obj);
|
||||||
|
|
||||||
|
/* open the valid object file again */
|
||||||
|
obj = bpf_object__open_file(file, NULL);
|
||||||
|
err = libbpf_get_error(obj);
|
||||||
|
if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
|
||||||
|
obj = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* swap pin paths of the two maps */
|
||||||
|
bpf_object__for_each_map(map, obj) {
|
||||||
|
if (!strcmp(bpf_map__name(map), "nopinmap"))
|
||||||
|
err = bpf_map__set_pin_path(map, pinpath);
|
||||||
|
else if (!strcmp(bpf_map__name(map), "pinmap"))
|
||||||
|
err = bpf_map__set_pin_path(map, NULL);
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* should fail because of map parameter mismatch */
|
||||||
|
err = bpf_object__load(obj);
|
||||||
|
if (CHECK(err != -EINVAL, "param mismatch load", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
bpf_object__close(obj);
|
||||||
|
|
||||||
|
/* test auto-pinning at custom path with open opt */
|
||||||
|
obj = bpf_object__open_file(file, &opts);
|
||||||
|
if (CHECK_FAIL(libbpf_get_error(obj))) {
|
||||||
|
obj = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = bpf_object__load(obj);
|
||||||
|
if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* check that pinmap was pinned at the custom path */
|
||||||
|
err = stat(custpinpath, &statbuf);
|
||||||
|
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
out:
|
||||||
|
unlink(pinpath);
|
||||||
|
unlink(nopinpath);
|
||||||
|
unlink(nopinpath2);
|
||||||
|
unlink(custpinpath);
|
||||||
|
rmdir(custpath);
|
||||||
|
if (obj)
|
||||||
|
bpf_object__close(obj);
|
||||||
|
}
|
78
tools/testing/selftests/bpf/prog_tests/probe_user.c
Normal file
78
tools/testing/selftests/bpf/prog_tests/probe_user.c
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#include <test_progs.h>
|
||||||
|
|
||||||
|
void test_probe_user(void)
|
||||||
|
{
|
||||||
|
#define kprobe_name "__sys_connect"
|
||||||
|
const char *prog_name = "kprobe/" kprobe_name;
|
||||||
|
const char *obj_file = "./test_probe_user.o";
|
||||||
|
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
|
||||||
|
int err, results_map_fd, sock_fd, duration = 0;
|
||||||
|
struct sockaddr curr, orig, tmp;
|
||||||
|
struct sockaddr_in *in = (struct sockaddr_in *)&curr;
|
||||||
|
struct bpf_link *kprobe_link = NULL;
|
||||||
|
struct bpf_program *kprobe_prog;
|
||||||
|
struct bpf_object *obj;
|
||||||
|
static const int zero = 0;
|
||||||
|
|
||||||
|
obj = bpf_object__open_file(obj_file, &opts);
|
||||||
|
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
|
||||||
|
if (CHECK(!kprobe_prog, "find_probe",
|
||||||
|
"prog '%s' not found\n", prog_name))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
err = bpf_object__load(obj);
|
||||||
|
if (CHECK(err, "obj_load", "err %d\n", err))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
results_map_fd = bpf_find_map(__func__, obj, "test_pro.bss");
|
||||||
|
if (CHECK(results_map_fd < 0, "find_bss_map",
|
||||||
|
"err %d\n", results_map_fd))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false,
|
||||||
|
kprobe_name);
|
||||||
|
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
|
||||||
|
"err %ld\n", PTR_ERR(kprobe_link))) {
|
||||||
|
kprobe_link = NULL;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(&curr, 0, sizeof(curr));
|
||||||
|
in->sin_family = AF_INET;
|
||||||
|
in->sin_port = htons(5555);
|
||||||
|
in->sin_addr.s_addr = inet_addr("255.255.255.255");
|
||||||
|
memcpy(&orig, &curr, sizeof(curr));
|
||||||
|
|
||||||
|
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
|
||||||
|
if (CHECK(sock_fd < 0, "create_sock_fd", "err %d\n", sock_fd))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
connect(sock_fd, &curr, sizeof(curr));
|
||||||
|
close(sock_fd);
|
||||||
|
|
||||||
|
err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp);
|
||||||
|
if (CHECK(err, "get_kprobe_res",
|
||||||
|
"failed to get kprobe res: %d\n", err))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
in = (struct sockaddr_in *)&tmp;
|
||||||
|
if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res",
|
||||||
|
"wrong kprobe res from probe read: %s:%u\n",
|
||||||
|
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
memset(&tmp, 0xab, sizeof(tmp));
|
||||||
|
|
||||||
|
in = (struct sockaddr_in *)&curr;
|
||||||
|
if (CHECK(memcmp(&curr, &tmp, sizeof(tmp)), "check_kprobe_res",
|
||||||
|
"wrong kprobe res from probe write: %s:%u\n",
|
||||||
|
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
|
||||||
|
goto cleanup;
|
||||||
|
cleanup:
|
||||||
|
bpf_link__destroy(kprobe_link);
|
||||||
|
bpf_object__close(obj);
|
||||||
|
}
|
|
@ -79,11 +79,11 @@ int trace_kfree_skb(struct trace_kfree_skb *ctx)
|
||||||
func = ptr->func;
|
func = ptr->func;
|
||||||
}));
|
}));
|
||||||
|
|
||||||
bpf_probe_read(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
|
bpf_probe_read_kernel(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
|
||||||
pkt_type &= 7;
|
pkt_type &= 7;
|
||||||
|
|
||||||
/* read eth proto */
|
/* read eth proto */
|
||||||
bpf_probe_read(&pkt_data, sizeof(pkt_data), data + 12);
|
bpf_probe_read_kernel(&pkt_data, sizeof(pkt_data), data + 12);
|
||||||
|
|
||||||
bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
|
bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
|
||||||
bpf_printk("skb->len %d users %d pkt_type %x\n",
|
bpf_printk("skb->len %d users %d pkt_type %x\n",
|
||||||
|
|
|
@ -72,9 +72,9 @@ static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
|
||||||
void* thread_state;
|
void* thread_state;
|
||||||
int key;
|
int key;
|
||||||
|
|
||||||
bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
|
bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
|
||||||
bpf_probe_read(&thread_state, sizeof(thread_state),
|
bpf_probe_read_user(&thread_state, sizeof(thread_state),
|
||||||
tls_base + 0x310 + key * 0x10 + 0x08);
|
tls_base + 0x310 + key * 0x10 + 0x08);
|
||||||
return thread_state;
|
return thread_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,31 +82,33 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
|
||||||
FrameData *frame, Symbol *symbol)
|
FrameData *frame, Symbol *symbol)
|
||||||
{
|
{
|
||||||
// read data from PyFrameObject
|
// read data from PyFrameObject
|
||||||
bpf_probe_read(&frame->f_back,
|
bpf_probe_read_user(&frame->f_back,
|
||||||
sizeof(frame->f_back),
|
sizeof(frame->f_back),
|
||||||
frame_ptr + pidData->offsets.PyFrameObject_back);
|
frame_ptr + pidData->offsets.PyFrameObject_back);
|
||||||
bpf_probe_read(&frame->f_code,
|
bpf_probe_read_user(&frame->f_code,
|
||||||
sizeof(frame->f_code),
|
sizeof(frame->f_code),
|
||||||
frame_ptr + pidData->offsets.PyFrameObject_code);
|
frame_ptr + pidData->offsets.PyFrameObject_code);
|
||||||
|
|
||||||
// read data from PyCodeObject
|
// read data from PyCodeObject
|
||||||
if (!frame->f_code)
|
if (!frame->f_code)
|
||||||
return false;
|
return false;
|
||||||
bpf_probe_read(&frame->co_filename,
|
bpf_probe_read_user(&frame->co_filename,
|
||||||
sizeof(frame->co_filename),
|
sizeof(frame->co_filename),
|
||||||
frame->f_code + pidData->offsets.PyCodeObject_filename);
|
frame->f_code + pidData->offsets.PyCodeObject_filename);
|
||||||
bpf_probe_read(&frame->co_name,
|
bpf_probe_read_user(&frame->co_name,
|
||||||
sizeof(frame->co_name),
|
sizeof(frame->co_name),
|
||||||
frame->f_code + pidData->offsets.PyCodeObject_name);
|
frame->f_code + pidData->offsets.PyCodeObject_name);
|
||||||
// read actual names into symbol
|
// read actual names into symbol
|
||||||
if (frame->co_filename)
|
if (frame->co_filename)
|
||||||
bpf_probe_read_str(&symbol->file,
|
bpf_probe_read_user_str(&symbol->file,
|
||||||
sizeof(symbol->file),
|
sizeof(symbol->file),
|
||||||
frame->co_filename + pidData->offsets.String_data);
|
frame->co_filename +
|
||||||
|
pidData->offsets.String_data);
|
||||||
if (frame->co_name)
|
if (frame->co_name)
|
||||||
bpf_probe_read_str(&symbol->name,
|
bpf_probe_read_user_str(&symbol->name,
|
||||||
sizeof(symbol->name),
|
sizeof(symbol->name),
|
||||||
frame->co_name + pidData->offsets.String_data);
|
frame->co_name +
|
||||||
|
pidData->offsets.String_data);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,9 +176,9 @@ static __always_inline int __on_event(struct pt_regs *ctx)
|
||||||
event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
|
event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
|
||||||
|
|
||||||
void* thread_state_current = (void*)0;
|
void* thread_state_current = (void*)0;
|
||||||
bpf_probe_read(&thread_state_current,
|
bpf_probe_read_user(&thread_state_current,
|
||||||
sizeof(thread_state_current),
|
sizeof(thread_state_current),
|
||||||
(void*)(long)pidData->current_state_addr);
|
(void*)(long)pidData->current_state_addr);
|
||||||
|
|
||||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||||
void* tls_base = (void*)task;
|
void* tls_base = (void*)task;
|
||||||
|
@ -188,11 +190,13 @@ static __always_inline int __on_event(struct pt_regs *ctx)
|
||||||
if (pidData->use_tls) {
|
if (pidData->use_tls) {
|
||||||
uint64_t pthread_created;
|
uint64_t pthread_created;
|
||||||
uint64_t pthread_self;
|
uint64_t pthread_self;
|
||||||
bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
|
bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
|
||||||
|
tls_base + 0x10);
|
||||||
|
|
||||||
bpf_probe_read(&pthread_created,
|
bpf_probe_read_user(&pthread_created,
|
||||||
sizeof(pthread_created),
|
sizeof(pthread_created),
|
||||||
thread_state + pidData->offsets.PyThreadState_thread);
|
thread_state +
|
||||||
|
pidData->offsets.PyThreadState_thread);
|
||||||
event->pthread_match = pthread_created == pthread_self;
|
event->pthread_match = pthread_created == pthread_self;
|
||||||
} else {
|
} else {
|
||||||
event->pthread_match = 1;
|
event->pthread_match = 1;
|
||||||
|
@ -204,9 +208,10 @@ static __always_inline int __on_event(struct pt_regs *ctx)
|
||||||
Symbol sym = {};
|
Symbol sym = {};
|
||||||
int cur_cpu = bpf_get_smp_processor_id();
|
int cur_cpu = bpf_get_smp_processor_id();
|
||||||
|
|
||||||
bpf_probe_read(&frame_ptr,
|
bpf_probe_read_user(&frame_ptr,
|
||||||
sizeof(frame_ptr),
|
sizeof(frame_ptr),
|
||||||
thread_state + pidData->offsets.PyThreadState_frame);
|
thread_state +
|
||||||
|
pidData->offsets.PyThreadState_frame);
|
||||||
|
|
||||||
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
|
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
|
||||||
if (symbol_counter == NULL)
|
if (symbol_counter == NULL)
|
||||||
|
|
|
@ -98,7 +98,7 @@ struct strobe_map_raw {
|
||||||
/*
|
/*
|
||||||
* having volatile doesn't change anything on BPF side, but clang
|
* having volatile doesn't change anything on BPF side, but clang
|
||||||
* emits warnings for passing `volatile const char *` into
|
* emits warnings for passing `volatile const char *` into
|
||||||
* bpf_probe_read_str that expects just `const char *`
|
* bpf_probe_read_user_str that expects just `const char *`
|
||||||
*/
|
*/
|
||||||
const char* tag;
|
const char* tag;
|
||||||
/*
|
/*
|
||||||
|
@ -309,18 +309,18 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
|
||||||
dtv_t *dtv;
|
dtv_t *dtv;
|
||||||
void *tls_ptr;
|
void *tls_ptr;
|
||||||
|
|
||||||
bpf_probe_read(&tls_index, sizeof(struct tls_index),
|
bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
|
||||||
(void *)loc->offset);
|
(void *)loc->offset);
|
||||||
/* valid module index is always positive */
|
/* valid module index is always positive */
|
||||||
if (tls_index.module > 0) {
|
if (tls_index.module > 0) {
|
||||||
/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
|
/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
|
||||||
bpf_probe_read(&dtv, sizeof(dtv),
|
bpf_probe_read_user(&dtv, sizeof(dtv),
|
||||||
&((struct tcbhead *)tls_base)->dtv);
|
&((struct tcbhead *)tls_base)->dtv);
|
||||||
dtv += tls_index.module;
|
dtv += tls_index.module;
|
||||||
} else {
|
} else {
|
||||||
dtv = NULL;
|
dtv = NULL;
|
||||||
}
|
}
|
||||||
bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
|
bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
|
||||||
/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
|
/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
|
||||||
return tls_ptr && tls_ptr != (void *)-1
|
return tls_ptr && tls_ptr != (void *)-1
|
||||||
? tls_ptr + tls_index.offset
|
? tls_ptr + tls_index.offset
|
||||||
|
@ -336,7 +336,7 @@ static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
|
||||||
if (!location)
|
if (!location)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
|
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
|
||||||
data->int_vals[idx] = value->val;
|
data->int_vals[idx] = value->val;
|
||||||
if (value->header.len)
|
if (value->header.len)
|
||||||
data->int_vals_set_mask |= (1 << idx);
|
data->int_vals_set_mask |= (1 << idx);
|
||||||
|
@ -356,13 +356,13 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
|
||||||
if (!location)
|
if (!location)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
|
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
|
||||||
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
|
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
|
||||||
/*
|
/*
|
||||||
* if bpf_probe_read_str returns error (<0), due to casting to
|
* if bpf_probe_read_user_str returns error (<0), due to casting to
|
||||||
* unsinged int, it will become big number, so next check is
|
* unsinged int, it will become big number, so next check is
|
||||||
* sufficient to check for errors AND prove to BPF verifier, that
|
* sufficient to check for errors AND prove to BPF verifier, that
|
||||||
* bpf_probe_read_str won't return anything bigger than
|
* bpf_probe_read_user_str won't return anything bigger than
|
||||||
* STROBE_MAX_STR_LEN
|
* STROBE_MAX_STR_LEN
|
||||||
*/
|
*/
|
||||||
if (len > STROBE_MAX_STR_LEN)
|
if (len > STROBE_MAX_STR_LEN)
|
||||||
|
@ -391,8 +391,8 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
|
||||||
if (!location)
|
if (!location)
|
||||||
return payload;
|
return payload;
|
||||||
|
|
||||||
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
|
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
|
||||||
if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
|
if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
|
||||||
return payload;
|
return payload;
|
||||||
|
|
||||||
descr->id = map.id;
|
descr->id = map.id;
|
||||||
|
@ -402,7 +402,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
|
||||||
data->req_meta_valid = 1;
|
data->req_meta_valid = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
|
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
|
||||||
if (len <= STROBE_MAX_STR_LEN) {
|
if (len <= STROBE_MAX_STR_LEN) {
|
||||||
descr->tag_len = len;
|
descr->tag_len = len;
|
||||||
payload += len;
|
payload += len;
|
||||||
|
@ -418,15 +418,15 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
descr->key_lens[i] = 0;
|
descr->key_lens[i] = 0;
|
||||||
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
|
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
|
||||||
map.entries[i].key);
|
map.entries[i].key);
|
||||||
if (len <= STROBE_MAX_STR_LEN) {
|
if (len <= STROBE_MAX_STR_LEN) {
|
||||||
descr->key_lens[i] = len;
|
descr->key_lens[i] = len;
|
||||||
payload += len;
|
payload += len;
|
||||||
}
|
}
|
||||||
descr->val_lens[i] = 0;
|
descr->val_lens[i] = 0;
|
||||||
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
|
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
|
||||||
map.entries[i].val);
|
map.entries[i].val);
|
||||||
if (len <= STROBE_MAX_STR_LEN) {
|
if (len <= STROBE_MAX_STR_LEN) {
|
||||||
descr->val_lens[i] = len;
|
descr->val_lens[i] = len;
|
||||||
payload += len;
|
payload += len;
|
||||||
|
|
31
tools/testing/selftests/bpf/progs/test_pinning.c
Normal file
31
tools/testing/selftests/bpf/progs/test_pinning.c
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include "bpf_helpers.h"
|
||||||
|
|
||||||
|
int _version SEC("version") = 1;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||||
|
__uint(max_entries, 1);
|
||||||
|
__type(key, __u32);
|
||||||
|
__type(value, __u64);
|
||||||
|
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||||
|
} pinmap SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_HASH);
|
||||||
|
__uint(max_entries, 1);
|
||||||
|
__type(key, __u32);
|
||||||
|
__type(value, __u64);
|
||||||
|
} nopinmap SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||||
|
__uint(max_entries, 1);
|
||||||
|
__type(key, __u32);
|
||||||
|
__type(value, __u64);
|
||||||
|
__uint(pinning, LIBBPF_PIN_NONE);
|
||||||
|
} nopinmap2 SEC(".maps");
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
16
tools/testing/selftests/bpf/progs/test_pinning_invalid.c
Normal file
16
tools/testing/selftests/bpf/progs/test_pinning_invalid.c
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include "bpf_helpers.h"
|
||||||
|
|
||||||
|
int _version SEC("version") = 1;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||||
|
__uint(max_entries, 1);
|
||||||
|
__type(key, __u32);
|
||||||
|
__type(value, __u64);
|
||||||
|
__uint(pinning, 2); /* invalid */
|
||||||
|
} nopinmap3 SEC(".maps");
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
26
tools/testing/selftests/bpf/progs/test_probe_user.c
Normal file
26
tools/testing/selftests/bpf/progs/test_probe_user.c
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <linux/ptrace.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
|
#include <netinet/in.h>
|
||||||
|
|
||||||
|
#include "bpf_helpers.h"
|
||||||
|
#include "bpf_tracing.h"
|
||||||
|
|
||||||
|
static struct sockaddr_in old;
|
||||||
|
|
||||||
|
SEC("kprobe/__sys_connect")
|
||||||
|
int handle_sys_connect(struct pt_regs *ctx)
|
||||||
|
{
|
||||||
|
void *ptr = (void *)PT_REGS_PARM2(ctx);
|
||||||
|
struct sockaddr_in new;
|
||||||
|
|
||||||
|
bpf_probe_read_user(&old, sizeof(old), ptr);
|
||||||
|
__builtin_memset(&new, 0xab, sizeof(new));
|
||||||
|
bpf_probe_write_user(ptr, &new, sizeof(new));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
|
@ -38,7 +38,7 @@
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include "bpf_helpers.h"
|
#include "bpf_helpers.h"
|
||||||
|
|
||||||
#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
|
#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;})
|
||||||
#define TCP_ESTATS_MAGIC 0xBAADBEEF
|
#define TCP_ESTATS_MAGIC 0xBAADBEEF
|
||||||
|
|
||||||
/* This test case needs "sock" and "pt_regs" data structure.
|
/* This test case needs "sock" and "pt_regs" data structure.
|
||||||
|
|
|
@ -314,9 +314,6 @@ class DebugfsDir:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
p = os.path.join(path, f)
|
p = os.path.join(path, f)
|
||||||
if not os.stat(p).st_mode & stat.S_IRUSR:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if os.path.isfile(p) and os.access(p, os.R_OK):
|
if os.path.isfile(p) and os.access(p, os.R_OK):
|
||||||
_, out = cmd('cat %s/%s' % (path, f))
|
_, out = cmd('cat %s/%s' % (path, f))
|
||||||
dfs[f] = out.strip()
|
dfs[f] = out.strip()
|
||||||
|
|
|
@ -120,6 +120,29 @@ static struct sysctl_test tests[] = {
|
||||||
.newval = "(none)", /* same as default, should fail anyway */
|
.newval = "(none)", /* same as default, should fail anyway */
|
||||||
.result = OP_EPERM,
|
.result = OP_EPERM,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.descr = "ctx:write sysctl:write read ok narrow",
|
||||||
|
.insns = {
|
||||||
|
/* u64 w = (u16)write & 1; */
|
||||||
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||||
|
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
|
||||||
|
offsetof(struct bpf_sysctl, write)),
|
||||||
|
#else
|
||||||
|
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
|
||||||
|
offsetof(struct bpf_sysctl, write) + 2),
|
||||||
|
#endif
|
||||||
|
BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1),
|
||||||
|
/* return 1 - w; */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
|
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.attach_type = BPF_CGROUP_SYSCTL,
|
||||||
|
.sysctl = "kernel/domainname",
|
||||||
|
.open_flags = O_WRONLY,
|
||||||
|
.newval = "(none)", /* same as default, should fail anyway */
|
||||||
|
.result = OP_EPERM,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.descr = "ctx:write sysctl:read write reject",
|
.descr = "ctx:write sysctl:read write reject",
|
||||||
.insns = {
|
.insns = {
|
||||||
|
|
Loading…
Add table
Reference in a new issue