[v5,1/3] net/tap: support infrastructure to build the BPF filter

Message ID 20231031224429.150002-2-stephen@networkplumber.org (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series net/tap: build and fix for BPF program |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Stephen Hemminger Oct. 31, 2023, 10:42 p.m. UTC
  Move the BPF program related code into a subdirectory.
And add a Makefile for building it.

The code depends on include files from iproute2.
But these are not public headers which iproute2 exports
as a package API. Therefore make a local copy here.

The standalone build was also broken because by
commit ef5baf3486e0 ("replace packed attributes")
which introduced __rte_packed into this code.

Add a python program to extract the resulting BPF into
a format that can be consumed by the TAP driver.

Update the documentation.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 doc/guides/nics/tap.rst                     |  11 +-
 drivers/net/tap/bpf/.gitignore              |   1 +
 drivers/net/tap/bpf/Makefile                |  18 ++
 drivers/net/tap/bpf/bpf_api.h               | 275 ++++++++++++++++++++
 drivers/net/tap/bpf/bpf_elf.h               |  53 ++++
 drivers/net/tap/bpf/bpf_extract.py          |  86 ++++++
 drivers/net/tap/{ => bpf}/tap_bpf_program.c |  10 +-
 drivers/net/tap/tap_rss.h                   |   2 +-
 8 files changed, 444 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/tap/bpf/.gitignore
 create mode 100644 drivers/net/tap/bpf/Makefile
 create mode 100644 drivers/net/tap/bpf/bpf_api.h
 create mode 100644 drivers/net/tap/bpf/bpf_elf.h
 create mode 100644 drivers/net/tap/bpf/bpf_extract.py
 rename drivers/net/tap/{ => bpf}/tap_bpf_program.c (96%)
  

Patch

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
index 07df0d35a2..449e747994 100644
--- a/doc/guides/nics/tap.rst
+++ b/doc/guides/nics/tap.rst
@@ -256,15 +256,12 @@  C functions under different ELF sections.
 
 2. Install ``LLVM`` library and ``clang`` compiler versions 3.7 and above
 
-3. Compile ``tap_bpf_program.c`` via ``LLVM`` into an object file::
+3. Use make to compile  `tap_bpf_program.c`` via ``LLVM`` into an object file
+   and extract the resulting instructions into ``tap_bpf_insn.h``.
 
-    clang -O2 -emit-llvm -c tap_bpf_program.c -o - | llc -march=bpf \
-    -filetype=obj -o <tap_bpf_program.o>
+    cd bpf; make
 
-
-4. Use a tool that receives two parameters: an eBPF object file and a section
-name, and prints out the section as a C array of eBPF instructions.
-Embed the C array in your TAP PMD tree.
+4. Recompile the TAP PMD.
 
 The C arrays are uploaded to the kernel using BPF system calls.
 
diff --git a/drivers/net/tap/bpf/.gitignore b/drivers/net/tap/bpf/.gitignore
new file mode 100644
index 0000000000..30a258f1af
--- /dev/null
+++ b/drivers/net/tap/bpf/.gitignore
@@ -0,0 +1 @@ 
+tap_bpf_program.o
diff --git a/drivers/net/tap/bpf/Makefile b/drivers/net/tap/bpf/Makefile
new file mode 100644
index 0000000000..e5ae4e1f5a
--- /dev/null
+++ b/drivers/net/tap/bpf/Makefile
@@ -0,0 +1,18 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# This file is not built as part of normal DPDK build.
+# It is used to generate the eBPF code for TAP RSS.
+CLANG=clang
+CLANG_OPTS=-O2
+TARGET=../tap_bpf_insns.h
+
+all: $(TARGET)
+
+clean:
+	rm tap_bpf_program.o $(TARGET)
+
+tap_bpf_program.o: tap_bpf_program.c
+	$(CLANG) $(CLANG_OPTS) -emit-llvm -c $< -o - | \
+	llc -march=bpf -filetype=obj -o $@
+
+$(TARGET): bpf_extract.py tap_bpf_program.o
+	python3 bpf_extract.py tap_bpf_program.o $@
diff --git a/drivers/net/tap/bpf/bpf_api.h b/drivers/net/tap/bpf/bpf_api.h
new file mode 100644
index 0000000000..5887d3a851
--- /dev/null
+++ b/drivers/net/tap/bpf/bpf_api.h
@@ -0,0 +1,275 @@ 
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+#ifndef __BPF_API__
+#define __BPF_API__
+
+/* Note:
+ *
+ * This file can be included into eBPF kernel programs. It contains
+ * a couple of useful helper functions, map/section ABI (bpf_elf.h),
+ * misc macros and some eBPF specific LLVM built-ins.
+ */
+
+#include <stdint.h>
+
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+#include <asm/byteorder.h>
+
+#include "bpf_elf.h"
+
+/** libbpf pin type. */
+enum libbpf_pin_type {
+	LIBBPF_PIN_NONE,
+	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+	LIBBPF_PIN_BY_NAME,
+};
+
+/** Type helper macros. */
+
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
+#define __array(name, val) typeof(val) *name[]
+
+/** Misc macros. */
+
+#ifndef __stringify
+# define __stringify(X)		#X
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused		__attribute__((__unused__))
+#endif
+
+#ifndef offsetof
+# define offsetof(TYPE, MEMBER)	__builtin_offsetof(TYPE, MEMBER)
+#endif
+
+#ifndef likely
+# define likely(X)		__builtin_expect(!!(X), 1)
+#endif
+
+#ifndef unlikely
+# define unlikely(X)		__builtin_expect(!!(X), 0)
+#endif
+
+#ifndef htons
+# define htons(X)		__constant_htons((X))
+#endif
+
+#ifndef ntohs
+# define ntohs(X)		__constant_ntohs((X))
+#endif
+
+#ifndef htonl
+# define htonl(X)		__constant_htonl((X))
+#endif
+
+#ifndef ntohl
+# define ntohl(X)		__constant_ntohl((X))
+#endif
+
+#ifndef __inline__
+# define __inline__		__attribute__((always_inline))
+#endif
+
+/** Section helper macros. */
+
+#ifndef __section
+# define __section(NAME)						\
+	__attribute__((section(NAME), used))
+#endif
+
+#ifndef __section_tail
+# define __section_tail(ID, KEY)					\
+	__section(__stringify(ID) "/" __stringify(KEY))
+#endif
+
+#ifndef __section_xdp_entry
+# define __section_xdp_entry						\
+	__section(ELF_SECTION_PROG)
+#endif
+
+#ifndef __section_cls_entry
+# define __section_cls_entry						\
+	__section(ELF_SECTION_CLASSIFIER)
+#endif
+
+#ifndef __section_act_entry
+# define __section_act_entry						\
+	__section(ELF_SECTION_ACTION)
+#endif
+
+#ifndef __section_lwt_entry
+# define __section_lwt_entry						\
+	__section(ELF_SECTION_PROG)
+#endif
+
+#ifndef __section_license
+# define __section_license						\
+	__section(ELF_SECTION_LICENSE)
+#endif
+
+#ifndef __section_maps
+# define __section_maps							\
+	__section(ELF_SECTION_MAPS)
+#endif
+
+/** Declaration helper macros. */
+
+#ifndef BPF_LICENSE
+# define BPF_LICENSE(NAME)						\
+	char ____license[] __section_license = NAME
+#endif
+
+/** Classifier helper */
+
+#ifndef BPF_H_DEFAULT
+# define BPF_H_DEFAULT	-1
+#endif
+
+/** BPF helper functions for tc. Individual flags are in linux/bpf.h */
+
+#ifndef __BPF_FUNC
+# define __BPF_FUNC(NAME, ...)						\
+	(* NAME)(__VA_ARGS__) __maybe_unused
+#endif
+
+#ifndef BPF_FUNC
+# define BPF_FUNC(NAME, ...)						\
+	__BPF_FUNC(NAME, __VA_ARGS__) = (void *) BPF_FUNC_##NAME
+#endif
+
+/* Map access/manipulation */
+static void *BPF_FUNC(map_lookup_elem, void *map, const void *key);
+static int BPF_FUNC(map_update_elem, void *map, const void *key,
+		    const void *value, uint32_t flags);
+static int BPF_FUNC(map_delete_elem, void *map, const void *key);
+
+/* Time access */
+static uint64_t BPF_FUNC(ktime_get_ns);
+
+/* Debugging */
+
+/* FIXME: __attribute__ ((format(printf, 1, 3))) not possible unless
+ * llvm bug https://llvm.org/bugs/show_bug.cgi?id=26243 gets resolved.
+ * It would require ____fmt to be made const, which generates a reloc
+ * entry (non-map).
+ */
+static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...);
+
+#ifndef printt
+# define printt(fmt, ...)						\
+	({								\
+		char ____fmt[] = fmt;					\
+		trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__);	\
+	})
+#endif
+
+/* Random numbers */
+static uint32_t BPF_FUNC(get_prandom_u32);
+
+/* Tail calls */
+static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map,
+		     uint32_t index);
+
+/* System helpers */
+static uint32_t BPF_FUNC(get_smp_processor_id);
+static uint32_t BPF_FUNC(get_numa_node_id);
+
+/* Packet misc meta data */
+static uint32_t BPF_FUNC(get_cgroup_classid, struct __sk_buff *skb);
+static int BPF_FUNC(skb_under_cgroup, void *map, uint32_t index);
+
+static uint32_t BPF_FUNC(get_route_realm, struct __sk_buff *skb);
+static uint32_t BPF_FUNC(get_hash_recalc, struct __sk_buff *skb);
+static uint32_t BPF_FUNC(set_hash_invalid, struct __sk_buff *skb);
+
+/* Packet redirection */
+static int BPF_FUNC(redirect, int ifindex, uint32_t flags);
+static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, int ifindex,
+		    uint32_t flags);
+
+/* Packet manipulation */
+static int BPF_FUNC(skb_load_bytes, struct __sk_buff *skb, uint32_t off,
+		    void *to, uint32_t len);
+static int BPF_FUNC(skb_store_bytes, struct __sk_buff *skb, uint32_t off,
+		    const void *from, uint32_t len, uint32_t flags);
+
+static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off,
+		    uint32_t from, uint32_t to, uint32_t flags);
+static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off,
+		    uint32_t from, uint32_t to, uint32_t flags);
+static int BPF_FUNC(csum_diff, const void *from, uint32_t from_size,
+		    const void *to, uint32_t to_size, uint32_t seed);
+static int BPF_FUNC(csum_update, struct __sk_buff *skb, uint32_t wsum);
+
+static int BPF_FUNC(skb_change_type, struct __sk_buff *skb, uint32_t type);
+static int BPF_FUNC(skb_change_proto, struct __sk_buff *skb, uint32_t proto,
+		    uint32_t flags);
+static int BPF_FUNC(skb_change_tail, struct __sk_buff *skb, uint32_t nlen,
+		    uint32_t flags);
+
+static int BPF_FUNC(skb_pull_data, struct __sk_buff *skb, uint32_t len);
+
+/* Event notification */
+static int __BPF_FUNC(skb_event_output, struct __sk_buff *skb, void *map,
+		      uint64_t index, const void *data, uint32_t size) =
+		      (void *) BPF_FUNC_perf_event_output;
+
+/* Packet vlan encap/decap */
+static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto,
+		    uint16_t vlan_tci);
+static int BPF_FUNC(skb_vlan_pop, struct __sk_buff *skb);
+
+/* Packet tunnel encap/decap */
+static int BPF_FUNC(skb_get_tunnel_key, struct __sk_buff *skb,
+		    struct bpf_tunnel_key *to, uint32_t size, uint32_t flags);
+static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb,
+		    const struct bpf_tunnel_key *from, uint32_t size,
+		    uint32_t flags);
+
+static int BPF_FUNC(skb_get_tunnel_opt, struct __sk_buff *skb,
+		    void *to, uint32_t size);
+static int BPF_FUNC(skb_set_tunnel_opt, struct __sk_buff *skb,
+		    const void *from, uint32_t size);
+
+/** LLVM built-ins, mem*() routines work for constant size */
+
+#ifndef lock_xadd
+# define lock_xadd(ptr, val)	((void) __sync_fetch_and_add(ptr, val))
+#endif
+
+#ifndef memset
+# define memset(s, c, n)	__builtin_memset((s), (c), (n))
+#endif
+
+#ifndef memcpy
+# define memcpy(d, s, n)	__builtin_memcpy((d), (s), (n))
+#endif
+
+#ifndef memmove
+# define memmove(d, s, n)	__builtin_memmove((d), (s), (n))
+#endif
+
+/* FIXME: __builtin_memcmp() is not yet fully useable unless llvm bug
+ * https://llvm.org/bugs/show_bug.cgi?id=26218 gets resolved. Also
+ * this one would generate a reloc entry (non-map), otherwise.
+ */
+#if 0
+#ifndef memcmp
+# define memcmp(a, b, n)	__builtin_memcmp((a), (b), (n))
+#endif
+#endif
+
+unsigned long long load_byte(void *skb, unsigned long long off)
+	asm ("llvm.bpf.load.byte");
+
+unsigned long long load_half(void *skb, unsigned long long off)
+	asm ("llvm.bpf.load.half");
+
+unsigned long long load_word(void *skb, unsigned long long off)
+	asm ("llvm.bpf.load.word");
+
+#endif /* __BPF_API__ */
diff --git a/drivers/net/tap/bpf/bpf_elf.h b/drivers/net/tap/bpf/bpf_elf.h
new file mode 100644
index 0000000000..ea8a11c95c
--- /dev/null
+++ b/drivers/net/tap/bpf/bpf_elf.h
@@ -0,0 +1,53 @@ 
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+#ifndef __BPF_ELF__
+#define __BPF_ELF__
+
+#include <asm/types.h>
+
+/* Note:
+ *
+ * Below ELF section names and bpf_elf_map structure definition
+ * are not (!) kernel ABI. It's rather a "contract" between the
+ * application and the BPF loader in tc. For compatibility, the
+ * section names should stay as-is. Introduction of aliases, if
+ * needed, are a possibility, though.
+ */
+
+/* ELF section names, etc */
+#define ELF_SECTION_LICENSE	"license"
+#define ELF_SECTION_MAPS	"maps"
+#define ELF_SECTION_PROG	"prog"
+#define ELF_SECTION_CLASSIFIER	"classifier"
+#define ELF_SECTION_ACTION	"action"
+
+#define ELF_MAX_MAPS		64
+#define ELF_MAX_LICENSE_LEN	128
+
+/* Object pinning settings */
+#define PIN_NONE		0
+#define PIN_OBJECT_NS		1
+#define PIN_GLOBAL_NS		2
+
+/* ELF map definition */
+struct bpf_elf_map {
+	__u32 type;
+	__u32 size_key;
+	__u32 size_value;
+	__u32 max_elem;
+	__u32 flags;
+	__u32 id;
+	__u32 pinning;
+	__u32 inner_id;
+	__u32 inner_idx;
+};
+
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)		\
+	struct ____btf_map_##name {				\
+		type_key key;					\
+		type_val value;					\
+	};							\
+	struct ____btf_map_##name				\
+	    __attribute__ ((section(".maps." #name), used))	\
+	    ____btf_map_##name = { }
+
+#endif /* __BPF_ELF__ */
diff --git a/drivers/net/tap/bpf/bpf_extract.py b/drivers/net/tap/bpf/bpf_extract.py
new file mode 100644
index 0000000000..5e1aee38c8
--- /dev/null
+++ b/drivers/net/tap/bpf/bpf_extract.py
@@ -0,0 +1,86 @@ 
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2023 Stephen Hemminger <stephen@networkplumber.org>
+
+import argparse
+import sys
+import struct
+from tempfile import TemporaryFile
+from elftools.elf.elffile import ELFFile
+
+
+def load_sections(elffile):
+    """Get sections of interest from ELF"""
+    result = []
+    DATA = [("cls_q", "cls_q_insns"), ("l3_l4", "l3_l4_hash_insns")]
+    for name, tag in DATA:
+        section = elffile.get_section_by_name(name)
+        if section:
+            insns = struct.iter_unpack('<BBhL', section.data())
+            result.append([tag, insns])
+    return result
+
+
+def dump_section(name, insns, out):
+    """Dump the array of BPF instructructions"""
+    print(f'\nstatic struct bpf_insn {name}[] = {{', file=out)
+    for bpf in insns:
+        code = bpf[0]
+        src = bpf[1] >> 4
+        dst = bpf[1] & 0xf
+        off = bpf[2]
+        imm = bpf[3]
+        print(f'\t{{{code:#02x}, {dst:4d}, {src:4d}, {off:8d}, {imm:#010x}}},',
+              file=out)
+    print('};', file=out)
+
+
+def parse_args():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input",
+                        nargs='+',
+                        help="input object file path or '-' for stdin")
+    parser.add_argument("output", help="output C file path or '-' for stdout")
+    return parser.parse_args()
+
+
+def open_input(path):
+    """Open the input file or stdin"""
+    if path == "-":
+        temp = TemporaryFile()
+        temp.write(sys.stdin.buffer.read())
+        return temp
+    return open(path, "rb")
+
+
+def open_output(path):
+    """Open the output file or stdout"""
+    if path == "-":
+        return sys.stdout
+    return open(path, "w")
+
+
+def write_header(output):
+    """Write file intro header"""
+    print("/* SPDX-License-Identifier: BSD-3-Clause", file=output)
+    print(" * Compiled BPF instructions do not edit", file=output)
+    print(" */\n", file=output)
+    print("#include <tap_bpf.h>", file=output)
+
+
+def main():
+    '''program main function'''
+    args = parse_args()
+
+    output = open_output(args.output)
+    write_header(output)
+    for path in args.input:
+        elffile = ELFFile(open_input(path))
+        sections = load_sections(elffile)
+        for name, insns in sections:
+            dump_section(name, insns, output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/drivers/net/tap/tap_bpf_program.c b/drivers/net/tap/bpf/tap_bpf_program.c
similarity index 96%
rename from drivers/net/tap/tap_bpf_program.c
rename to drivers/net/tap/bpf/tap_bpf_program.c
index 20c310e5e7..d65021d8a1 100644
--- a/drivers/net/tap/tap_bpf_program.c
+++ b/drivers/net/tap/bpf/tap_bpf_program.c
@@ -14,9 +14,10 @@ 
 #include <linux/ipv6.h>
 #include <linux/if_tunnel.h>
 #include <linux/filter.h>
-#include <linux/bpf.h>
 
-#include "tap_rss.h"
+#include "bpf_api.h"
+#include "bpf_elf.h"
+#include "../tap_rss.h"
 
 /** Create IPv4 address */
 #define IPv4(a, b, c, d) ((__u32)(((a) & 0xff) << 24) | \
@@ -56,6 +57,7 @@  __section("cls_q") int
 match_q(struct __sk_buff *skb)
 {
 	__u32 queue = skb->cb[1];
+	/* queue is set by tap_flow_bpf_cls_q() before load */
 	volatile __u32 q = 0xdeadbeef;
 	__u32 match_queue = QUEUE_OFFSET + q;
 
@@ -75,14 +77,14 @@  struct ipv4_l3_l4_tuple {
 	__u32    dst_addr;
 	__u16    dport;
 	__u16    sport;
-} __rte_packed;
+} __attribute__((packed));
 
 struct ipv6_l3_l4_tuple {
 	__u8        src_addr[16];
 	__u8        dst_addr[16];
 	__u16       dport;
 	__u16       sport;
-} __rte_packed;
+} __attribute__((packed));
 
 static const __u8 def_rss_key[TAP_RSS_HASH_KEY_SIZE] = {
 	0xd1, 0x81, 0xc6, 0x2c,
diff --git a/drivers/net/tap/tap_rss.h b/drivers/net/tap/tap_rss.h
index 48c151cf6b..dff46a012f 100644
--- a/drivers/net/tap/tap_rss.h
+++ b/drivers/net/tap/tap_rss.h
@@ -35,6 +35,6 @@  struct rss_key {
 	__u32 key_size;
 	__u32 queues[TAP_MAX_QUEUES];
 	__u32 nb_queues;
-} __rte_packed;
+} __attribute__((packed));
 
 #endif /* _TAP_RSS_H_ */