summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--net/Makefile1
-rw-r--r--net/bird2/Makefile3
-rw-r--r--net/bird3/Makefile62
-rw-r--r--net/bird3/distinfo3
-rw-r--r--net/bird3/files/bird.in30
-rw-r--r--net/bird3/files/patch-00-kernel-Fix-crash-for-merge-paths38
-rw-r--r--net/bird3/files/patch-01-Table-not-feeding-twice39
-rw-r--r--net/bird3/files/patch-02-kernel-trace-the-final-result53
-rw-r--r--net/bird3/files/patch-03-BGP-fix-locking-order176
-rw-r--r--net/bird3/files/patch-04-BFD-Fix-session-locking-order400
-rw-r--r--net/bird3/files/patch-05-mainloop-dropped-old-socket86
-rw-r--r--net/bird3/files/patch-06-cli-allocate-tx-buffers134
-rw-r--r--net/bird3/files/patch-07-cli-flushing-tmp-linpool29
-rw-r--r--net/bird3/files/patch-08-kernel-feed-only-once274
-rw-r--r--net/bird3/files/patch-09-graceful-recovery311
-rw-r--r--net/bird3/files/patch-10-stonehenge116
-rw-r--r--net/bird3/files/patch-11-route-attribute-storage80
-rw-r--r--net/bird3/files/patch-12-BGP-tx-bucket-storage84
-rw-r--r--net/bird3/files/patch-13-allocate-normalization-buckets100
-rw-r--r--net/bird3/files/patch-14-BGP-fix-dislpay-name25
-rw-r--r--net/bird3/files/patch-15-BGP-fixed-deterministic-med-crashes65
-rw-r--r--net/bird3/files/patch-16-Table-old-best-route-refeed-fix87
-rw-r--r--net/bird3/files/patch-Makefile.in11
-rw-r--r--net/bird3/files/pkg-message.in11
-rw-r--r--net/bird3/pkg-descr14
-rw-r--r--net/bird3/pkg-plist4
26 files changed, 2234 insertions, 2 deletions
diff --git a/net/Makefile b/net/Makefile
index 151a98fa8881..9093da9bdb0b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -57,6 +57,7 @@
SUBDIR += bindtest
SUBDIR += binkd
SUBDIR += bird2
+ SUBDIR += bird3
SUBDIR += bittwist
SUBDIR += bmon
SUBDIR += boinc-client
diff --git a/net/bird2/Makefile b/net/bird2/Makefile
index 7a6d87747d6f..3a5dc86df54a 100644
--- a/net/bird2/Makefile
+++ b/net/bird2/Makefile
@@ -15,8 +15,7 @@ rtsock_PKGNAMESUFFIX= 2-rtsock
USES= bison cpe gmake ncurses readline
-CONFLICTS= bird
-CONFLICTS+= bird6
+CONFLICTS= bird3
CPE_VENDOR= nic
diff --git a/net/bird3/Makefile b/net/bird3/Makefile
new file mode 100644
index 000000000000..8c27bcafdc41
--- /dev/null
+++ b/net/bird3/Makefile
@@ -0,0 +1,62 @@
+PORTNAME= bird
+DISTVERSION= 3.0.0
+CATEGORIES= net
+MASTER_SITES= https://bird.network.cz/download/
+PKGNAMESUFFIX= 3
+
+MAINTAINER= olivier@FreeBSD.org
+COMMENT= Dynamic multithreaded IP routing daemon
+WWW= https://bird.network.cz/
+
+LICENSE= GPLv2
+
+USES= bison cpe gmake ncurses readline
+
+CONFLICTS= bird2
+
+CPE_VENDOR= nic
+
+USE_CSTD= gnu99
+GNU_CONFIGURE= yes
+CONFIGURE_ARGS= --localstatedir=/var
+USE_RC_SUBR= bird
+SUB_FILES= pkg-message
+
+GROUPS= birdvty
+
+MAKE_JOBS_UNSAFE= yes
+
+OPTIONS_MULTI= RP
+RP_DESC= Routing Protocols
+OPTIONS_MULTI_RP= BFD BABEL BMP BGP MRT OSPF PIPE RADV RIP RPKI STATIC
+OPTIONS_DEFAULT= BFD BABEL BGP MRT OSPF PIPE RADV RIP RPKI STATIC
+
+BFD_DESC= Bidirectional Forwarding Detection
+BABEL_DESC= Babel routing protocol
+BGP_DESC= Border Gateway Protocol
+BMP_DESC= BGP Monitoring Protocol
+MRT_DESC= Dumping Routing Information in MRT Format
+OSPF_DESC= Open Short Path First
+PIPE_DESC= PIPE routing
+RADV_DESC= Router Advertisement
+RIP_DESC= Routing Information Protocol
+RPKI_DESC= Resource Public Key Infrastructure
+STATIC_DESC= Static routing
+
+BFD_VARS= rt_prot+=bfd
+BABEL_VARS= rt_prot+=babel
+BGP_VARS= rt_prot+=bgp
+BMP_VARS= rt_prot+=bmp
+MRT_VARS= rt_prot+=mrt
+OSPF_VARS= rt_prot+=ospf
+PIPE_VARS= rt_prot+=pipe
+RADV_VARS= rt_prot+=radv
+RIP_VARS= rt_prot+=rip
+RPKI_VARS= rt_prot+=rpki
+STATIC_VARS= rt_prot+=static
+
+CONFIGURE_ARGS+=--with-protocols="${RT_PROT}"
+CONFIGURE_ARGS+=--with-sysconfig=bsd-netlink
+RPKI_LIB_DEPENDS= libssh.so:security/libssh
+
+.include <bsd.port.mk>
diff --git a/net/bird3/distinfo b/net/bird3/distinfo
new file mode 100644
index 000000000000..66fda3f7d35c
--- /dev/null
+++ b/net/bird3/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1734554961
+SHA256 (bird-3.0.0.tar.gz) = 8130440a2e273ba6456df2fb3acb43da7cb4d566f94a294a3a52a1b118f2512a
+SIZE (bird-3.0.0.tar.gz) = 2641569
diff --git a/net/bird3/files/bird.in b/net/bird3/files/bird.in
new file mode 100644
index 000000000000..de800bd69b81
--- /dev/null
+++ b/net/bird3/files/bird.in
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# PROVIDE: bird dynamicrouting
+# REQUIRE: LOGIN
+# KEYWORD: shutdown
+#
+# Add the following lines to /etc/rc.conf.local or /etc/rc.conf
+# to enable this service:
+#
+# bird_enable (bool): Set to NO by default.
+# Set it to YES to enable bird.
+# bird_config (path): Set to %%PREFIX%%/etc/bird.conf
+# by default.
+#
+
+. /etc/rc.subr
+
+name="bird"
+rcvar=bird_enable
+
+load_rc_config $name
+
+: ${bird_enable="NO"}
+: ${bird_config="%%PREFIX%%/etc/bird.conf"}
+: ${bird_group="birdvty"}
+
+command=%%PREFIX%%/sbin/${name}
+command_args="-c $bird_config -g $bird_group"
+
+run_rc_command "$1"
diff --git a/net/bird3/files/patch-00-kernel-Fix-crash-for-merge-paths b/net/bird3/files/patch-00-kernel-Fix-crash-for-merge-paths
new file mode 100644
index 000000000000..d008d4cf070c
--- /dev/null
+++ b/net/bird3/files/patch-00-kernel-Fix-crash-for-merge-paths
@@ -0,0 +1,38 @@
+From b6caccfd45fb639b6dd3a8d140d3c5ba4cc79311 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Thu, 19 Dec 2024 11:00:15 +0100
+Subject: [PATCH] Kernel: Fix crash for merge paths on if no route is in BIRD
+
+There was a missing check for a NULL return value.
+Also fixed an indenting error.
+
+Thanks to Radu Anghel for reporting it:
+https://bird.network.cz/pipermail/bird-users/2024-December/017977.html
+---
+ nest/rt-table.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/nest/rt-table.c b/nest/rt-table.c
+index fd8bb50dd..05191d743 100644
+--- nest/rt-table.c
++++ nest/rt-table.c
+@@ -5265,14 +5265,14 @@ krt_export_net(struct channel *c, const net_addr *a, linpool *lp)
+ if (c->ra_mode == RA_MERGED)
+ {
+ struct rt_export_feed *feed = rt_net_feed(c->table, a, NULL);
+- if (!feed->count_routes)
++ if (!feed || !feed->count_routes)
+ return NULL;
+
+ if (!bmap_test(&c->export_accepted_map, feed->block[0].id))
+ return NULL;
+
+ return rt_export_merged(c, feed, lp, 1);
+- }
++ }
+
+ static _Thread_local rte best;
+ best = rt_net_best(c->table, a);
+--
+GitLab
+
diff --git a/net/bird3/files/patch-01-Table-not-feeding-twice b/net/bird3/files/patch-01-Table-not-feeding-twice
new file mode 100644
index 000000000000..4fb40a644fb2
--- /dev/null
+++ b/net/bird3/files/patch-01-Table-not-feeding-twice
@@ -0,0 +1,39 @@
+From 0a2f92ad205d96d0be0945ecf2bb740b68d5a3c1 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Thu, 19 Dec 2024 11:54:05 +0100
+Subject: [PATCH] Table: not feeding twice, once is enough
+
+If there is no feed pending, the requested one should be
+activated immediately, otherwise it is activated only after
+the full run, effectively running first a full feed and
+then the requested one.
+---
+ nest/rt-export.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/nest/rt-export.c b/nest/rt-export.c
+index 7368447de..7d51e54cf 100644
+--- nest/rt-export.c
++++ nest/rt-export.c
+@@ -357,8 +357,16 @@ rt_export_refeed_feeder(struct rt_export_feeder *f, struct rt_feeding_request *r
+ if (!rfr)
+ return;
+
+- rfr->next = f->feed_pending;
+- f->feed_pending = rfr;
++ if (f->feeding)
++ {
++ rfr->next = f->feed_pending;
++ f->feed_pending = rfr;
++ }
++ else
++ {
++ rfr->next = NULL;
++ f->feeding = rfr;
++ }
+ }
+
+ void rt_export_refeed_request(struct rt_export_request *rer, struct rt_feeding_request *rfr)
+--
+GitLab
+
diff --git a/net/bird3/files/patch-02-kernel-trace-the-final-result b/net/bird3/files/patch-02-kernel-trace-the-final-result
new file mode 100644
index 000000000000..a3c97320f30e
--- /dev/null
+++ b/net/bird3/files/patch-02-kernel-trace-the-final-result
@@ -0,0 +1,53 @@
+From ab74652f96c301dd2d2d2a831dd1a159ae1d5e02 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Thu, 19 Dec 2024 12:28:27 +0100
+Subject: [PATCH] Kernel: when channel traces, we have to trace the final
+ result
+
+Otherwise it looks like we are sending too much traffic to netlink
+every other while, which is not true. Now we can disambiguate between
+in-kernel updates and ignored routes.
+---
+ sysdep/unix/krt.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
+index 2770b8be2..34882b88f 100644
+--- sysdep/unix/krt.c
++++ sysdep/unix/krt.c
+@@ -672,7 +672,7 @@ krt_preexport(struct channel *C, rte *e)
+ }
+
+ static void
+-krt_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net,
++krt_rt_notify(struct proto *P, struct channel *ch, const net_addr *net,
+ rte *new, const rte *old)
+ {
+ struct krt_proto *p = (struct krt_proto *) P;
+@@ -688,13 +688,21 @@ krt_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net,
+ case KPS_IDLE:
+ case KPS_PRUNING:
+ if (new && bmap_test(&p->seen_map, new->id))
+- /* Already installed and seen in the kernel dump */
+- return;
++ if (ch->debug & D_ROUTES)
++ {
++ /* Already installed and seen in the kernel dump */
++ log(L_TRACE "%s.%s: %N already in kernel",
++ P->name, ch->name, net);
++ return;
++ }
+
+ /* fall through */
+ case KPS_SCANNING:
+ /* Actually replace the route */
+ krt_replace_rte(p, net, new, old);
++ if (ch->debug & D_ROUTES)
++ log(L_TRACE "%s.%s: %N %s kernel",
++ P->name, ch->name, net, old ? "replaced in" : "added to");
+ break;
+
+ }
+--
+GitLab
+
diff --git a/net/bird3/files/patch-03-BGP-fix-locking-order b/net/bird3/files/patch-03-BGP-fix-locking-order
new file mode 100644
index 000000000000..51b73c26f8f8
--- /dev/null
+++ b/net/bird3/files/patch-03-BGP-fix-locking-order
@@ -0,0 +1,176 @@
+From 6779e5da698feb9b9e02411859ad81885ba46c01 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Fri, 20 Dec 2024 11:28:00 +0100
+Subject: [PATCH] BGP: fix locking order error on dynamic protocol spawn
+
+We missed that the protocol spawner violates the prescribed
+locking order. When the rtable level is locked, no new protocol can be
+started, thus we need to:
+
+* create the protocol from a clean mainloop context
+* in protocol start hook, take the socket
+
+Testsuite: cf-bgp-autopeer
+Fixes: #136
+
+Thanks to Job Snijders <job@fastly.com> for reporting:
+https://trubka.network.cz/pipermail/bird-users/2024-December/017980.html
+---
+ nest/proto.c | 19 +++++++++++++++++++
+ nest/protocol.h | 2 ++
+ proto/bgp/bgp.c | 46 +++++++++++++++++++++++++++++++++++-----------
+ 3 files changed, 56 insertions(+), 11 deletions(-)
+
+diff --git a/nest/proto.c b/nest/proto.c
+index dded84f51..678697d69 100644
+--- nest/proto.c
++++ nest/proto.c
+@@ -1867,6 +1867,25 @@ proto_spawn(struct proto_config *cf, uint disabled)
+ return p;
+ }
+
++bool
++proto_disable(struct proto *p)
++{
++ ASSERT_DIE(birdloop_inside(&main_birdloop));
++ bool changed = !p->disabled;
++ p->disabled = 1;
++ proto_rethink_goal(p);
++ return changed;
++}
++
++bool
++proto_enable(struct proto *p)
++{
++ ASSERT_DIE(birdloop_inside(&main_birdloop));
++ bool changed = p->disabled;
++ p->disabled = 0;
++ proto_rethink_goal(p);
++ return changed;
++}
+
+ /**
+ * DOC: Graceful restart recovery
+diff --git a/nest/protocol.h b/nest/protocol.h
+index 25ed6f553..cf7ecb898 100644
+--- nest/protocol.h
++++ nest/protocol.h
+@@ -78,6 +78,8 @@ void proto_build(struct protocol *); /* Called from protocol to register itself
+ void protos_preconfig(struct config *);
+ void protos_commit(struct config *new, struct config *old, int type);
+ struct proto * proto_spawn(struct proto_config *cf, uint disabled);
++bool proto_disable(struct proto *p);
++bool proto_enable(struct proto *p);
+ void protos_dump_all(struct dump_request *);
+
+ #define GA_UNKNOWN 0 /* Attribute not recognized */
+diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
+index 5fc2b5fff..3170e3a42 100644
+--- proto/bgp/bgp.c
++++ proto/bgp/bgp.c
+@@ -378,8 +378,6 @@ bgp_startup(struct bgp_proto *p)
+ if (p->postponed_sk)
+ {
+ /* Apply postponed incoming connection */
+- sk_reloop(p->postponed_sk, p->p.loop);
+-
+ bgp_setup_conn(p, &p->incoming_conn);
+ bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
+ bgp_send_open(&p->incoming_conn);
+@@ -583,6 +581,9 @@ bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len
+ static void
+ bgp_down(struct bgp_proto *p)
+ {
++ /* Check that the dynamic BGP socket has been picked up */
++ ASSERT_DIE(p->postponed_sk == NULL);
++
+ if (bgp_start_state(p) > BSS_PREPARE)
+ {
+ bgp_setup_auth(p, 0);
+@@ -617,8 +618,8 @@ bgp_decision(void *vp)
+ bgp_down(p);
+ }
+
+-static struct bgp_proto *
+-bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
++static void
++bgp_spawn(struct bgp_proto *pp, struct birdsock *sk)
+ {
+ struct symbol *sym;
+ char fmt[SYM_MAX_LEN];
+@@ -635,9 +636,16 @@ bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
+ cfg_mem = NULL;
+
+ /* Just pass remote_ip to bgp_init() */
+- ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
++ ((struct bgp_config *) sym->proto)->remote_ip = sk->daddr;
++
++ /* Create the protocol disabled initially */
++ SKIP_BACK_DECLARE(struct bgp_proto, p, p, proto_spawn(sym->proto, 1));
+
+- return (void *) proto_spawn(sym->proto, 0);
++ /* Pass the socket */
++ p->postponed_sk = sk;
++
++ /* And enable the protocol */
++ proto_enable(&p->p);
+ }
+
+ void
+@@ -1489,10 +1497,15 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
+ /* For dynamic BGP, spawn new instance and postpone the socket */
+ if (bgp_is_dynamic(p))
+ {
+- p = bgp_spawn(p, sk->daddr);
+- p->postponed_sk = sk;
+- rmove(sk, p->p.pool);
+- goto leave;
++ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
++
++ /* The dynamic protocol must be in the START state */
++ ASSERT_DIE(p->p.proto_state == PS_START);
++ birdloop_leave(p->p.loop);
++
++ /* Now we have a clean mainloop */
++ bgp_spawn(p, sk);
++ return 0;
+ }
+
+ rmove(sk, p->p.pool);
+@@ -1806,7 +1819,6 @@ bgp_start(struct proto *P)
+ p->incoming_conn.state = BS_IDLE;
+ p->neigh = NULL;
+ p->bfd_req = NULL;
+- p->postponed_sk = NULL;
+ p->gr_ready = 0;
+ p->gr_active_num = 0;
+
+@@ -1848,6 +1860,16 @@ bgp_start(struct proto *P)
+ channel_graceful_restart_lock(&c->c);
+ }
+
++ /* Now it's the last chance to move the postponed socket to this BGP,
++ * as bgp_start is the only hook running from main loop. */
++ if (p->postponed_sk)
++ {
++ LOCK_DOMAIN(rtable, bgp_listen_domain);
++ rmove(p->postponed_sk, p->p.pool);
++ sk_reloop(p->postponed_sk, p->p.loop);
++ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
++ }
++
+ /*
+ * Before attempting to create the connection, we need to lock the port,
+ * so that we are the only instance attempting to talk with that neighbor.
+@@ -1999,6 +2021,8 @@ bgp_init(struct proto_config *CF)
+ p->remote_ip = cf->remote_ip;
+ p->remote_as = cf->remote_as;
+
++ p->postponed_sk = NULL;
++
+ /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
+ if (cf->c.parent)
+ cf->remote_ip = IPA_NONE;
+--
+GitLab
+
diff --git a/net/bird3/files/patch-04-BFD-Fix-session-locking-order b/net/bird3/files/patch-04-BFD-Fix-session-locking-order
new file mode 100644
index 000000000000..3f5500500691
--- /dev/null
+++ b/net/bird3/files/patch-04-BFD-Fix-session-locking-order
@@ -0,0 +1,400 @@
+From 83495362789d961914c4bfaa590e31cb17370ed0 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Sat, 21 Dec 2024 19:02:22 +0100
+Subject: [PATCH] BFD: Fix session reconfiguration locking order
+
+The sessions have to be updated asynchronously to avoid
+cross-locking between protocols.
+
+Testsuite: cf-ibgp-bfd-switch, cf-ibgp-multi-bfd-auth
+Fixes: #139
+
+Thanks to Daniel Suchy <danny@danysek.cz> for reporting:
+https://trubka.network.cz/pipermail/bird-users/2024-December/017984.html
+---
+ nest/bfd.h | 7 ++-
+ proto/bfd/bfd.c | 144 +++++++++++++++++++++++---------------------
+ proto/bfd/bfd.h | 21 +------
+ proto/bfd/config.Y | 42 +++++--------
+ proto/bfd/packets.c | 4 +-
+ 5 files changed, 98 insertions(+), 120 deletions(-)
+
+diff --git a/nest/bfd.h b/nest/bfd.h
+index 5dacff5d7..c046152f8 100644
+--- nest/bfd.h
++++ nest/bfd.h
+@@ -18,8 +18,11 @@ struct bfd_options {
+ u32 min_tx_int;
+ u32 idle_tx_int;
+ u8 multiplier;
+- u8 passive;
+- u8 passive_set;
++ PACKED enum bfd_opt_passive {
++ BFD_OPT_PASSIVE_UNKNOWN = 0,
++ BFD_OPT_PASSIVE,
++ BFD_OPT_NOT_PASSIVE,
++ } passive;
+ u8 mode;
+ u8 auth_type; /* Authentication type (BFD_AUTH_*) */
+ list *passwords; /* Passwords for authentication */
+diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c
+index 34f992b93..4997f803a 100644
+--- proto/bfd/bfd.c
++++ proto/bfd/bfd.c
+@@ -172,17 +172,17 @@ static void bfd_free_iface(struct bfd_iface *ifa);
+ * BFD sessions
+ */
+
+-static inline struct bfd_session_config
+-bfd_merge_options(const struct bfd_iface_config *cf, const struct bfd_options *opts)
++static inline struct bfd_options
++bfd_merge_options(const struct bfd_options *bottom, const struct bfd_options *top)
+ {
+- return (struct bfd_session_config) {
+- .min_rx_int = opts->min_rx_int ?: cf->min_rx_int,
+- .min_tx_int = opts->min_tx_int ?: cf->min_tx_int,
+- .idle_tx_int = opts->idle_tx_int ?: cf->idle_tx_int,
+- .multiplier = opts->multiplier ?: cf->multiplier,
+- .passive = opts->passive_set ? opts->passive : cf->passive,
+- .auth_type = opts->auth_type ?: cf->auth_type,
+- .passwords = opts->passwords ?: cf->passwords,
++ return (struct bfd_options) {
++ .min_rx_int = top->min_rx_int ?: bottom->min_rx_int,
++ .min_tx_int = top->min_tx_int ?: bottom->min_tx_int,
++ .idle_tx_int = top->idle_tx_int ?: bottom->idle_tx_int,
++ .multiplier = top->multiplier ?: bottom->multiplier,
++ .passive = top->passive ?: bottom->passive,
++ .auth_type = top->auth_type ?: bottom->auth_type,
++ .passwords = top->passwords ?: bottom->passwords,
+ };
+ }
+
+@@ -478,7 +478,7 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *
+ HASH_INSERT(p->session_hash_id, HASH_ID, s);
+ HASH_INSERT(p->session_hash_ip, HASH_IP, s);
+
+- s->cf = bfd_merge_options(ifa->cf, opts);
++ s->cf = bfd_merge_options(&ifa->cf->opts, opts);
+
+ /* Initialization of state variables - see RFC 5880 6.8.1 */
+ s->loc_state = BFD_STATE_DOWN;
+@@ -561,26 +561,58 @@ bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
+ birdloop_leave(p->p.loop);
+ }
+
++struct bfd_reconfigure_sessions_deferred_call {
++ struct deferred_call dc;
++ struct bfd_proto *p;
++ config_ref old_config;
++};
++
+ static void
+-bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s)
++bfd_reconfigure_sessions(struct deferred_call *dc)
+ {
+- if (EMPTY_LIST(s->request_list))
+- return;
++ SKIP_BACK_DECLARE(struct bfd_reconfigure_sessions_deferred_call,
++ brsdc, dc, dc);
+
+- ASSERT_DIE(birdloop_inside(p->p.loop));
++ struct bfd_proto *p = brsdc->p;
++ birdloop_enter(p->p.loop);
+
+- SKIP_BACK_DECLARE(struct bfd_request, req, n, HEAD(s->request_list));
+- s->cf = bfd_merge_options(s->ifa->cf, &req->opts);
++ HASH_WALK(p->session_hash_id, next_id, s)
++ {
++ if (!EMPTY_LIST(s->request_list))
++ {
++ SKIP_BACK_DECLARE(struct bfd_request, req, n, HEAD(s->request_list));
++ struct bfd_options opts = bfd_merge_options(&s->ifa->cf->opts, &req->opts);
+
+- u32 tx = (s->loc_state == BFD_STATE_UP) ? s->cf.min_tx_int : s->cf.idle_tx_int;
+- bfd_session_set_min_tx(s, tx);
+- bfd_session_set_min_rx(s, s->cf.min_rx_int);
+- s->detect_mult = s->cf.multiplier;
+- s->passive = s->cf.passive;
++#define CHK(x) (opts.x != s->cf.x) ||
++ bool reload = MACRO_FOREACH(CHK,
++ min_rx_int,
++ min_tx_int,
++ idle_tx_int,
++ multiplier,
++ passive) false; /* terminating the || chain */
++#undef CHK
+
+- bfd_session_control_tx_timer(s, 0);
++ s->cf = opts;
++
++ if (reload)
++ {
++ u32 tx = (s->loc_state == BFD_STATE_UP) ? s->cf.min_tx_int : s->cf.idle_tx_int;
++ bfd_session_set_min_tx(s, tx);
++ bfd_session_set_min_rx(s, s->cf.min_rx_int);
++ s->detect_mult = s->cf.multiplier;
++ s->passive = s->cf.passive;
++
++ bfd_session_control_tx_timer(s, 0);
++
++ TRACE(D_EVENTS, "Session to %I reconfigured", s->addr);
++ }
++ }
++ }
++ HASH_WALK_END;
++ birdloop_leave(p->p.loop);
+
+- TRACE(D_EVENTS, "Session to %I reconfigured", s->addr);
++ /* Now the config is clean */
++ OBSREF_CLEAR(brsdc->old_config);
+ }
+
+
+@@ -589,10 +621,12 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s)
+ */
+
+ static struct bfd_iface_config bfd_default_iface = {
+- .min_rx_int = BFD_DEFAULT_MIN_RX_INT,
+- .min_tx_int = BFD_DEFAULT_MIN_TX_INT,
+- .idle_tx_int = BFD_DEFAULT_IDLE_TX_INT,
+- .multiplier = BFD_DEFAULT_MULTIPLIER,
++ .opts = {
++ .min_rx_int = BFD_DEFAULT_MIN_RX_INT,
++ .min_tx_int = BFD_DEFAULT_MIN_TX_INT,
++ .idle_tx_int = BFD_DEFAULT_IDLE_TX_INT,
++ .multiplier = BFD_DEFAULT_MULTIPLIER,
++ },
+ };
+
+ static inline struct bfd_iface_config *
+@@ -650,24 +684,6 @@ bfd_free_iface(struct bfd_iface *ifa)
+ mb_free(ifa);
+ }
+
+-static void
+-bfd_reconfigure_iface(struct bfd_proto *p UNUSED, struct bfd_iface *ifa, struct bfd_config *nc)
+-{
+- struct bfd_iface_config *new = bfd_find_iface_config(nc, ifa->iface);
+- struct bfd_iface_config *old = ifa->cf;
+-
+- /* Check options that are handled in bfd_reconfigure_session() */
+- ifa->changed =
+- (new->min_rx_int != old->min_rx_int) ||
+- (new->min_tx_int != old->min_tx_int) ||
+- (new->idle_tx_int != old->idle_tx_int) ||
+- (new->multiplier != old->multiplier) ||
+- (new->passive != old->passive);
+-
+- /* This should be probably changed to not access ifa->cf from the BFD thread */
+- ifa->cf = new;
+-}
+-
+
+ /*
+ * BFD requests
+@@ -900,20 +916,7 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local,
+ void
+ bfd_update_request(struct bfd_request *req, const struct bfd_options *opts)
+ {
+- struct bfd_session *s = req->session;
+-
+- if (!memcmp(opts, &req->opts, sizeof(const struct bfd_options)))
+- return;
+-
+ req->opts = *opts;
+-
+- if (s)
+- {
+- struct bfd_proto *p = s->ifa->bfd;
+- birdloop_enter(p->p.loop);
+- bfd_reconfigure_session(p, s);
+- birdloop_leave(p->p.loop);
+- }
+ }
+
+ static void
+@@ -1193,21 +1196,22 @@ bfd_reconfigure(struct proto *P, struct proto_config *c)
+ (new->zero_udp6_checksum_rx != old->zero_udp6_checksum_rx))
+ return 0;
+
+- birdloop_mask_wakeups(p->p.loop);
+-
+ WALK_LIST(ifa, p->iface_list)
+- bfd_reconfigure_iface(p, ifa, new);
+-
+- HASH_WALK(p->session_hash_id, next_id, s)
+- {
+- if (s->ifa->changed)
+- bfd_reconfigure_session(p, s);
+- }
+- HASH_WALK_END;
++ ifa->cf = bfd_find_iface_config(new, ifa->iface);
+
+ bfd_reconfigure_neighbors(p, new);
+
+- birdloop_unmask_wakeups(p->p.loop);
++ /* Sessions get reconfigured after all the config is applied */
++ struct bfd_reconfigure_sessions_deferred_call brsdc = {
++ .dc.hook = bfd_reconfigure_sessions,
++ .p = p,
++ };
++ SKIP_BACK_DECLARE(struct bfd_reconfigure_sessions_deferred_call,
++ brsdcp, dc, defer_call(&brsdc.dc, sizeof brsdc));
++
++ /* We need to keep the old config alive until all the sessions get
++ * reconfigured */
++ OBSREF_SET(brsdcp->old_config, P->cf->global);
+
+ return 1;
+ }
+diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h
+index 578ce8755..107829b72 100644
+--- proto/bfd/bfd.h
++++ proto/bfd/bfd.h
+@@ -54,24 +54,7 @@ struct bfd_config
+ struct bfd_iface_config
+ {
+ struct iface_patt i;
+- u32 min_rx_int;
+- u32 min_tx_int;
+- u32 idle_tx_int;
+- u8 multiplier;
+- u8 passive;
+- u8 auth_type; /* Authentication type (BFD_AUTH_*) */
+- list *passwords; /* Passwords for authentication */
+-};
+-
+-struct bfd_session_config
+-{
+- u32 min_rx_int;
+- u32 min_tx_int;
+- u32 idle_tx_int;
+- u8 multiplier;
+- u8 passive;
+- u8 auth_type; /* Authentication type (BFD_AUTH_*) */
+- list *passwords; /* Passwords for authentication */
++ struct bfd_options opts;
+ };
+
+ struct bfd_neighbor
+@@ -146,7 +129,7 @@ struct bfd_session
+ u32 loc_id; /* Local session ID (local discriminator) */
+ u32 rem_id; /* Remote session ID (remote discriminator) */
+
+- struct bfd_session_config cf; /* Static configuration parameters */
++ struct bfd_options cf; /* Static configuration parameters */
+
+ u32 des_min_tx_int; /* Desired min rx interval, local option */
+ u32 des_min_tx_new; /* Used for des_min_tx_int change */
+diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y
+index 9e9919c4e..56d1ffac4 100644
+--- proto/bfd/config.Y
++++ proto/bfd/config.Y
+@@ -86,44 +86,37 @@ bfd_iface_start:
+ add_tail(&BFD_CFG->patt_list, NODE this_ipatt);
+ init_list(&this_ipatt->ipn_list);
+
+- BFD_IFACE->min_rx_int = BFD_DEFAULT_MIN_RX_INT;
+- BFD_IFACE->min_tx_int = BFD_DEFAULT_MIN_TX_INT;
+- BFD_IFACE->idle_tx_int = BFD_DEFAULT_IDLE_TX_INT;
+- BFD_IFACE->multiplier = BFD_DEFAULT_MULTIPLIER;
++ this_bfd_opts = &BFD_IFACE->opts;
++
++ this_bfd_opts->min_rx_int = BFD_DEFAULT_MIN_RX_INT;
++ this_bfd_opts->min_tx_int = BFD_DEFAULT_MIN_TX_INT;
++ this_bfd_opts->idle_tx_int = BFD_DEFAULT_IDLE_TX_INT;
++ this_bfd_opts->multiplier = BFD_DEFAULT_MULTIPLIER;
+
+ reset_passwords();
+ };
+
+ bfd_iface_finish:
+ {
+- BFD_IFACE->passwords = get_passwords();
++ this_bfd_opts->passwords = get_passwords();
+
+- if (!BFD_IFACE->auth_type != !BFD_IFACE->passwords)
++ if (!this_bfd_opts->auth_type != !this_bfd_opts->passwords)
+ cf_warn("Authentication and password options should be used together");
+
+- if (BFD_IFACE->passwords)
++ if (this_bfd_opts->passwords)
+ {
+ struct password_item *pass;
+- WALK_LIST(pass, *BFD_IFACE->passwords)
++ WALK_LIST(pass, *this_bfd_opts->passwords)
+ {
+ if (pass->alg)
+ cf_error("Password algorithm option not available in BFD protocol");
+
+- pass->alg = bfd_auth_type_to_hash_alg[BFD_IFACE->auth_type];
++ pass->alg = bfd_auth_type_to_hash_alg[this_bfd_opts->auth_type];
+ }
+ }
+-};
+
+-bfd_iface_item:
+- INTERVAL expr_us { BFD_IFACE->min_rx_int = BFD_IFACE->min_tx_int = $2; }
+- | MIN RX INTERVAL expr_us { BFD_IFACE->min_rx_int = $4; }
+- | MIN TX INTERVAL expr_us { BFD_IFACE->min_tx_int = $4; }
+- | IDLE TX INTERVAL expr_us { BFD_IFACE->idle_tx_int = $4; }
+- | MULTIPLIER expr { BFD_IFACE->multiplier = $2; }
+- | PASSIVE bool { BFD_IFACE->passive = $2; }
+- | AUTHENTICATION bfd_auth_type { BFD_IFACE->auth_type = $2; }
+- | password_list {}
+- ;
++ this_bfd_opts = NULL;
++};
+
+ bfd_auth_type:
+ NONE { $$ = BFD_AUTH_NONE; }
+@@ -134,14 +127,9 @@ bfd_auth_type:
+ | METICULOUS KEYED SHA1 { $$ = BFD_AUTH_METICULOUS_KEYED_SHA1; }
+ ;
+
+-bfd_iface_opts:
+- /* empty */
+- | bfd_iface_opts bfd_iface_item ';'
+- ;
+-
+ bfd_iface_opt_list:
+ /* empty */
+- | '{' bfd_iface_opts '}'
++ | '{' bfd_items '}'
+ ;
+
+ bfd_iface:
+@@ -194,7 +182,7 @@ bfd_item:
+ | MIN TX INTERVAL expr_us { this_bfd_opts->min_tx_int = $4; }
+ | IDLE TX INTERVAL expr_us { this_bfd_opts->idle_tx_int = $4; }
+ | MULTIPLIER expr { this_bfd_opts->multiplier = $2; }
+- | PASSIVE bool { this_bfd_opts->passive = $2; this_bfd_opts->passive_set = 1; }
++ | PASSIVE bool { this_bfd_opts->passive = $2 ? BFD_OPT_PASSIVE : BFD_OPT_NOT_PASSIVE; }
+ | GRACEFUL { this_bfd_opts->mode = BGP_BFD_GRACEFUL; }
+ | AUTHENTICATION bfd_auth_type { this_bfd_opts->auth_type = $2; }
+ | password_list {}
+diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c
+index 1ceb470c1..f8bd63d73 100644
+--- proto/bfd/packets.c
++++ proto/bfd/packets.c
+@@ -109,7 +109,7 @@ const u8 bfd_auth_type_to_hash_alg[] = {
+ static void
+ bfd_fill_authentication(struct bfd_proto *p, struct bfd_session *s, struct bfd_ctl_packet *pkt)
+ {
+- struct bfd_session_config *cf = &s->cf;
++ struct bfd_options *cf = &s->cf;
+ struct password_item *pass = password_find(cf->passwords, 0);
+ uint meticulous = 0;
+
+@@ -179,7 +179,7 @@ bfd_fill_authentication(struct bfd_proto *p, struct bfd_session *s, struct bfd_c
+ static int
+ bfd_check_authentication(struct bfd_proto *p, struct bfd_session *s, struct bfd_ctl_packet *pkt)
+ {
+- struct bfd_session_config *cf = &s->cf;
++ struct bfd_options *cf = &s->cf;
+ const char *err_dsc = NULL;
+ uint err_val = 0;
+ uint auth_type = 0;
+--
+GitLab
+
diff --git a/net/bird3/files/patch-05-mainloop-dropped-old-socket b/net/bird3/files/patch-05-mainloop-dropped-old-socket
new file mode 100644
index 000000000000..eea4d1d26af2
--- /dev/null
+++ b/net/bird3/files/patch-05-mainloop-dropped-old-socket
@@ -0,0 +1,86 @@
+From 3d1f19e335f55c8cfa3cb7ca9d7b88ca03173d8e Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Sun, 22 Dec 2024 21:32:28 +0100
+Subject: [PATCH] Mainloop: Dropped old socket prioritization magic
+
+This is now done in worker threads and the mainloop needs to do other things,
+most notably kernel and CLI, with less overhead of repeatedly checking poll.
+---
+ sysdep/unix/io-loop.c | 2 +-
+ sysdep/unix/io.c | 21 +++++++--------------
+ 2 files changed, 8 insertions(+), 15 deletions(-)
+
+diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c
+index f69189e06..a72c69a03 100644
+--- sysdep/unix/io-loop.c
++++ sysdep/unix/io-loop.c
+@@ -1403,7 +1403,7 @@ bool task_still_in_limit(void)
+ {
+ static u64 main_counter = 0;
+ if (this_birdloop == &main_birdloop)
+- return (++main_counter % 2048); /* This is a hack because of no accounting in mainloop */
++ return (++main_counter % 512); /* This is a hack because of no accounting in mainloop */
+ else
+ return ns_now() < account_last + this_thread->max_loop_time_ns;
+ }
+diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
+index f9785c074..51395e1e9 100644
+--- sysdep/unix/io.c
++++ sysdep/unix/io.c
+@@ -53,14 +53,15 @@
+
+ /* Maximum number of calls of tx handler for one socket in one
+ * poll iteration. Should be small enough to not monopolize CPU by
+- * one protocol instance.
++ * one protocol instance. But as most of the problems are now offloaded
++ * to worker threads, too low values may actually bring problems with
++ * latency.
+ */
+-#define MAX_STEPS 4
++#define MAX_STEPS 2048
+
+ /* Maximum number of calls of rx handler for all sockets in one poll
+- iteration. RX callbacks are often much more costly so we limit
+- this to gen small latencies */
+-#define MAX_RX_STEPS 4
++ iteration. RX callbacks are often a little bit more costly. */
++#define MAX_RX_STEPS 512
+
+
+ /*
+@@ -2581,8 +2582,6 @@ io_init(void)
+ srandom((uint) (now ^ (now >> 32)));
+ }
+
+-static int short_loops = 0;
+-#define SHORT_LOOP_MAX 10
+ #define WORK_EVENTS_MAX 10
+
+ sock *stored_sock;
+@@ -2670,10 +2669,9 @@ io_loop(void)
+ {
+ if (pfd.pfd.data[0].revents & POLLIN)
+ {
+- /* IO loop reload requested */
++ /* Somebody sent an event to mainloop */
+ pipe_drain(&main_birdloop.thread->wakeup);
+ atomic_fetch_and_explicit(&main_birdloop.thread_transition, ~LTT_PING, memory_order_acq_rel);
+- continue;
+ }
+
+ times_update();
+@@ -2719,11 +2717,6 @@ io_loop(void)
+ main_birdloop.sock_active = sk_next(s);
+ }
+
+- short_loops++;
+- if (events && (short_loops < SHORT_LOOP_MAX))
+- continue;
+- short_loops = 0;
+-
+ int count = 0;
+ main_birdloop.sock_active = stored_sock;
+ if (main_birdloop.sock_active == NULL)
+--
+GitLab
+
diff --git a/net/bird3/files/patch-06-cli-allocate-tx-buffers b/net/bird3/files/patch-06-cli-allocate-tx-buffers
new file mode 100644
index 000000000000..0e9af5de5d63
--- /dev/null
+++ b/net/bird3/files/patch-06-cli-allocate-tx-buffers
@@ -0,0 +1,134 @@
+From de9dbee796876f5b621e40e0082612aad746cac1 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Sun, 22 Dec 2024 22:10:38 +0100
+Subject: [PATCH] CLI: allocate TX buffers as pages, not by malloc
+
+Every malloc risks heap bloating and these blocks are already
+the same size as pages.
+---
+ nest/cli.c | 59 ++++++++++++++++++++++++++++++++++++++++++------------
+ nest/cli.h | 2 +-
+ 2 files changed, 47 insertions(+), 14 deletions(-)
+
+diff --git a/nest/cli.c b/nest/cli.c
+index 3b8e6f468..b33ffd437 100644
+--- nest/cli.c
++++ nest/cli.c
+@@ -81,13 +81,14 @@ cli_alloc_out(cli *c, int size)
+ o = c->tx_buf;
+ else
+ {
+- o = mb_alloc(c->pool, sizeof(struct cli_out) + CLI_TX_BUF_SIZE);
++ o = alloc_page();
++ c->tx_pending_count++;
+ if (c->tx_write)
+ c->tx_write->next = o;
+ else
+ c->tx_buf = o;
+ o->wpos = o->outpos = o->buf;
+- o->end = o->buf + CLI_TX_BUF_SIZE;
++ o->end = (void *) o + page_size;
+ }
+ c->tx_write = o;
+ if (!c->tx_pos)
+@@ -167,19 +168,18 @@ cli_hello(cli *c)
+ static void
+ cli_free_out(cli *c)
+ {
+- struct cli_out *o, *p;
++ for (struct cli_out *o = c->tx_buf, *n; o; o = n)
++ {
++ n = o->next;
++ free_page(o);
++ c->tx_pending_count--;
++ }
+
+- if (o = c->tx_buf)
+- {
+- o->wpos = o->outpos = o->buf;
+- while (p = o->next)
+- {
+- o->next = p->next;
+- mb_free(p);
+- }
+- }
++ c->tx_buf = NULL;
+ c->tx_write = c->tx_pos = NULL;
+ c->async_msg_size = 0;
++
++ ASSERT_DIE(c->tx_pending_count == 0);
+ }
+
+ void
+@@ -189,6 +189,38 @@ cli_written(cli *c)
+ ev_schedule(c->event);
+ }
+
++/* A dummy resource to show and free memory pages allocated for pending TX */
++struct cli_tx_resource {
++ resource r;
++ struct cli *c;
++};
++
++static void
++cli_tx_resource_free(resource *r)
++{
++ cli_free_out(SKIP_BACK(struct cli_tx_resource, r, r)->c);
++}
++
++static void
++cli_tx_resource_dump(struct dump_request *dreq UNUSED, resource *r UNUSED) {}
++
++static struct resmem
++cli_tx_resource_memsize(resource *r)
++{
++ return (struct resmem) {
++ .effective = SKIP_BACK(struct cli_tx_resource, r, r)->c->tx_pending_count * page_size,
++ .overhead = sizeof(struct cli_tx_resource),
++ };
++}
++
++static struct resclass cli_tx_resource_class = {
++ .name = "CLI TX buffers",
++ .size = sizeof (struct cli_tx_resource),
++ .free = cli_tx_resource_free,
++ .dump = cli_tx_resource_dump,
++ .memsize = cli_tx_resource_memsize,
++};
++
+
+ static byte *cli_rh_pos;
+ static uint cli_rh_len;
+@@ -272,7 +304,8 @@ cli *
+ cli_new(struct birdsock *sock, struct cli_config *cf)
+ {
+ pool *p = rp_new(cli_pool, the_bird_domain.the_bird, "CLI");
+- cli *c = mb_alloc(p, sizeof(cli));
++ struct cli_tx_resource *ctr = ralloc(p, &cli_tx_resource_class);
++ cli *c = ctr->c = mb_alloc(p, sizeof(cli));
+
+ bzero(c, sizeof(cli));
+ c->pool = p;
+diff --git a/nest/cli.h b/nest/cli.h
+index d86ec3801..671be04d8 100644
+--- nest/cli.h
++++ nest/cli.h
+@@ -17,7 +17,6 @@
+ #include "conf/conf.h"
+
+ #define CLI_RX_BUF_SIZE 4096
+-#define CLI_TX_BUF_SIZE 4096
+ #define CLI_MAX_ASYNC_QUEUE 4096
+
+ #define CLI_MSG_SIZE 500
+@@ -49,6 +48,7 @@ typedef struct cli {
+ uint log_mask; /* Mask of allowed message levels */
+ uint log_threshold; /* When free < log_threshold, store only important messages */
+ uint async_msg_size; /* Total size of async messages queued in tx_buf */
++ uint tx_pending_count; /* How many blocks are pending */
+ } cli;
+
+ struct cli_config {
+--
+GitLab
+
diff --git a/net/bird3/files/patch-07-cli-flushing-tmp-linpool b/net/bird3/files/patch-07-cli-flushing-tmp-linpool
new file mode 100644
index 000000000000..c05321812dac
--- /dev/null
+++ b/net/bird3/files/patch-07-cli-flushing-tmp-linpool
@@ -0,0 +1,29 @@
+From 5fd0fd77e293328f354e7f6ed22632ba6ff96593 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Sun, 22 Dec 2024 22:26:44 +0100
+Subject: [PATCH] CLI: Flushing tmp_linpool after every shown net.
+
+There is no reason to keep the allocated objects through multiple nets.
+---
+ nest/rt-show.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/nest/rt-show.c b/nest/rt-show.c
+index 3986da83d..aa9209ca5 100644
+--- nest/rt-show.c
++++ nest/rt-show.c
+@@ -282,8 +282,9 @@ rt_show_cont(struct cli *c)
+ rt_show_table(d);
+
+ RT_FEED_WALK(&d->tab->req, f)
+- if (f->count_routes)
+- rt_show_net(d, f);
++ TMP_SAVED
++ if (f->count_routes)
++ rt_show_net(d, f);
+
+ if (rt_export_feed_active(&d->tab->req))
+ rt_feeder_unsubscribe(&d->tab->req);
+--
+GitLab
+
diff --git a/net/bird3/files/patch-08-kernel-feed-only-once b/net/bird3/files/patch-08-kernel-feed-only-once
new file mode 100644
index 000000000000..33a98cbc4795
--- /dev/null
+++ b/net/bird3/files/patch-08-kernel-feed-only-once
@@ -0,0 +1,274 @@
+From 0fa80d7c79428e5370740a2eba5605b65131ebd6 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Mon, 23 Dec 2024 11:58:05 +0100
+Subject: [PATCH] Kernel: feed only once during startup
+
+There was an inefficiency in the initial scan state machine,
+causing routes to be fed several times instead of just once.
+Now the export startup is postponed until first krt_scan()
+finishes and we actually can do the pruning with full information.
+---
+ nest/proto.c | 4 ++-
+ nest/protocol.h | 2 ++
+ sysdep/unix/krt.c | 69 ++++++++++++++++++++++++++++-------------------
+ sysdep/unix/krt.h | 5 ++--
+ 4 files changed, 48 insertions(+), 32 deletions(-)
+
+diff --git a/nest/proto.c b/nest/proto.c
+index 678697d69..6fa74e9f1 100644
+--- nest/proto.c
++++ nest/proto.c
+@@ -676,9 +676,11 @@ void channel_notify_basic(void *);
+ void channel_notify_accepted(void *);
+ void channel_notify_merged(void *);
+
+-static void
++void
+ channel_start_export(struct channel *c)
+ {
++ ASSERT_DIE(birdloop_inside(c->proto->loop));
++
+ if (rt_export_get_state(&c->out_req) != TES_DOWN)
+ bug("%s.%s: Attempted to start channel's already started export", c->proto->name, c->name);
+
+diff --git a/nest/protocol.h b/nest/protocol.h
+index cf7ecb898..2bfa1628a 100644
+--- nest/protocol.h
++++ nest/protocol.h
+@@ -747,6 +747,8 @@ int proto_configure_channel(struct proto *p, struct channel **c, struct channel_
+
+ void channel_set_state(struct channel *c, uint state);
+
++void channel_start_export(struct channel *c);
++
+ void channel_add_obstacle(struct channel *c);
+ void channel_del_obstacle(struct channel *c);
+
+diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
+index 34882b88f..1658dd6fe 100644
+--- sysdep/unix/krt.c
++++ sysdep/unix/krt.c
+@@ -342,6 +342,8 @@ krt_learn_async(struct krt_proto *p, rte *e, int new)
+ /* Hook defined in nest/rt-table.c ... to be refactored away later */
+ rte *krt_export_net(struct channel *c, const net_addr *a, linpool *lp);
+
++static void krt_rt_notify(struct proto *P, struct channel *ch, const net_addr *net, rte *new, const rte *old);
++
+ static int
+ krt_same_dest(rte *k, rte *e)
+ {
+@@ -361,6 +363,11 @@ krt_same_dest(rte *k, rte *e)
+ void
+ krt_got_route(struct krt_proto *p, rte *e, s8 src)
+ {
++ /* If we happen to get an asynchronous route notification
++ * before initialization, we wait for the scan. */
++ if (p->sync_state == KPS_INIT)
++ return;
++
+ rte *new = NULL;
+ e->pflags = 0;
+
+@@ -391,10 +398,6 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
+
+ /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
+
+- /* We wait for the initial feed to have correct installed state */
+- if (!p->ready)
+- goto ignore;
+-
+ /* Get the exported version */
+ new = krt_export_net(p->p.main_channel, e->net, krt_filter_lp);
+
+@@ -423,10 +426,6 @@ aseen:
+ krt_trace_in(p, e, "already seen");
+ goto done;
+
+-ignore:
+- krt_trace_in(p, e, "ignored");
+- goto done;
+-
+ update:
+ krt_trace_in(p, new, "updating");
+ krt_replace_rte(p, e->net, new, e);
+@@ -447,12 +446,21 @@ krt_init_scan(struct krt_proto *p)
+ {
+ switch (p->sync_state)
+ {
++ case KPS_INIT:
++ /* Allow exports now */
++ p->p.rt_notify = krt_rt_notify;
++ channel_start_export(p->p.main_channel);
++ rt_refresh_begin(&p->p.main_channel->in_req);
++ p->sync_state = KPS_FIRST_SCAN;
++ return 1;
++
+ case KPS_IDLE:
+ rt_refresh_begin(&p->p.main_channel->in_req);
+ bmap_reset(&p->seen_map, 1024);
+ p->sync_state = KPS_SCANNING;
+ return 1;
+
++ case KPS_FIRST_SCAN:
+ case KPS_SCANNING:
+ bug("Kernel scan double-init");
+
+@@ -470,14 +478,17 @@ krt_prune(struct krt_proto *p)
+ {
+ switch (p->sync_state)
+ {
++ case KPS_INIT:
+ case KPS_IDLE:
+ bug("Kernel scan prune without scan");
+
+ case KPS_SCANNING:
++ channel_request_full_refeed(p->p.main_channel);
++ /* fall through */
++ case KPS_FIRST_SCAN:
+ p->sync_state = KPS_PRUNING;
+ KRT_TRACE(p, D_EVENTS, "Pruning table %s", p->p.main_channel->table->name);
+ rt_refresh_end(&p->p.main_channel->in_req);
+- channel_request_full_refeed(p->p.main_channel);
+ break;
+
+ case KPS_PRUNING:
+@@ -549,7 +560,7 @@ krt_scan_all(timer *t UNUSED)
+ krt_do_scan(NULL);
+
+ WALK_LIST2(p, n, krt_proto_list, krt_node)
+- if (p->sync_state == KPS_SCANNING)
++ if ((p->sync_state == KPS_SCANNING) || (p->sync_state == KPS_FIRST_SCAN))
+ krt_prune(p);
+ }
+
+@@ -644,6 +655,9 @@ krt_scan_timer_kick(struct krt_proto *p)
+ static int
+ krt_preexport(struct channel *C, rte *e)
+ {
++ /* The export should not start before proper sync */
++ ASSERT_DIE(SKIP_BACK(struct krt_proto, p, C->proto)->sync_state != KPS_INIT);
++
+ if (e->src->owner == &C->proto->sources)
+ #ifdef CONFIG_SINGLE_ROUTE
+ return 1;
+@@ -659,15 +673,6 @@ krt_preexport(struct channel *C, rte *e)
+ return -1;
+ }
+
+- /* Before first scan we don't touch the routes */
+- if (!SKIP_BACK(struct krt_proto, p, C->proto)->ready)
+- {
+- if (C->debug & D_ROUTES)
+- log(L_TRACE "%s.%s not ready yet to accept route for %N",
+- C->proto->name, C->name, e->net);
+- return -1;
+- }
+-
+ return 0;
+ }
+
+@@ -685,18 +690,24 @@ krt_rt_notify(struct proto *P, struct channel *ch, const net_addr *net,
+
+ switch (p->sync_state)
+ {
++ case KPS_INIT:
++ bug("Routes in init state should have been rejected by preexport.");
++
+ case KPS_IDLE:
+ case KPS_PRUNING:
+ if (new && bmap_test(&p->seen_map, new->id))
++ {
+ if (ch->debug & D_ROUTES)
+ {
+ /* Already installed and seen in the kernel dump */
+ log(L_TRACE "%s.%s: %N already in kernel",
+ P->name, ch->name, net);
+- return;
+ }
++ return;
++ }
+
+ /* fall through */
++ case KPS_FIRST_SCAN:
+ case KPS_SCANNING:
+ /* Actually replace the route */
+ krt_replace_rte(p, net, new, old);
+@@ -732,7 +743,6 @@ krt_reload_routes(struct channel *C, struct rt_feeding_request *rfr)
+
+ if (KRT_CF->learn)
+ {
+- p->reload = 1;
+ krt_scan_timer_kick(p);
+ }
+
+@@ -749,15 +759,18 @@ krt_export_fed(struct channel *C)
+ {
+ struct krt_proto *p = (void *) C->proto;
+
+- p->ready = 1;
+- p->initialized = 1;
+-
+ switch (p->sync_state)
+ {
++ case KPS_INIT:
++ bug("KRT export started before scan");
++
+ case KPS_IDLE:
+ krt_scan_timer_kick(p);
+ break;
+
++ case KPS_FIRST_SCAN:
++ bug("KRT export done before first scan");
++
+ case KPS_SCANNING:
+ break;
+
+@@ -831,7 +844,8 @@ krt_init(struct proto_config *CF)
+ p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF));
+
+ p->p.preexport = krt_preexport;
+- p->p.rt_notify = krt_rt_notify;
++ /* Not setting rt_notify here to not start exports, must wait for the first scan
++ * and then we can start exports manually */
+ p->p.iface_sub.if_notify = krt_if_notify;
+ p->p.reload_routes = krt_reload_routes;
+ p->p.export_fed = krt_export_fed;
+@@ -887,7 +901,7 @@ krt_shutdown(struct proto *P)
+ return PS_FLUSH;
+
+ /* FIXME we should flush routes even when persist during reconfiguration */
+- if (p->initialized && !KRT_CF->persist && (P->down_code != PDC_CMD_GR_DOWN))
++ if ((p->sync_state != KPS_INIT) && !KRT_CF->persist && (P->down_code != PDC_CMD_GR_DOWN))
+ {
+ struct rt_export_feeder req = (struct rt_export_feeder)
+ {
+@@ -922,8 +936,7 @@ krt_shutdown(struct proto *P)
+ static void
+ krt_cleanup(struct krt_proto *p)
+ {
+- p->ready = 0;
+- p->initialized = 0;
++ p->sync_state = KPS_INIT;
+
+ krt_sys_shutdown(p);
+ rem_node(&p->krt_node);
+diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h
+index 394e74010..14be715f8 100644
+--- sysdep/unix/krt.h
++++ sysdep/unix/krt.h
+@@ -59,10 +59,9 @@ struct krt_proto {
+ struct bmap seen_map; /* Routes seen during last periodic scan */
+ node krt_node; /* Node in krt_proto_list */
+ byte af; /* Kernel address family (AF_*) */
+- byte ready; /* Initial feed has been finished */
+- byte initialized; /* First scan has been finished */
+- byte reload; /* Next scan is doing reload */
+ PACKED enum krt_prune_state {
++ KPS_INIT,
++ KPS_FIRST_SCAN,
+ KPS_IDLE,
+ KPS_SCANNING,
+ KPS_PRUNING,
+--
+GitLab
+
diff --git a/net/bird3/files/patch-09-graceful-recovery b/net/bird3/files/patch-09-graceful-recovery
new file mode 100644
index 000000000000..d576f80ebc42
--- /dev/null
+++ b/net/bird3/files/patch-09-graceful-recovery
@@ -0,0 +1,311 @@
+From f7639a9fafa7411ebd1f2af56c270b970ac09f3d Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Mon, 23 Dec 2024 21:06:26 +0100
+Subject: [PATCH] Graceful recovery: converted to obstacles
+
+Yet another refcounting mechanism had a locking collision.
+---
+ nest/proto.c | 178 ++++++++++++++++++++++++++----------------------
+ nest/protocol.h | 14 +++-
+ 2 files changed, 110 insertions(+), 82 deletions(-)
+
+diff --git a/nest/proto.c b/nest/proto.c
+index 6fa74e9f1..caf99829b 100644
+--- nest/proto.c
++++ nest/proto.c
+@@ -31,15 +31,8 @@ static list STATIC_LIST_INIT(protocol_list);
+ #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); })
+ #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); })
+
+-static timer *gr_wait_timer;
+-
+-#define GRS_NONE 0
+-#define GRS_INIT 1
+-#define GRS_ACTIVE 2
+-#define GRS_DONE 3
+-
+-static int graceful_restart_state;
+-static u32 graceful_restart_locks;
++static struct graceful_recovery_context _graceful_recovery_context;
++OBSREF(struct graceful_recovery_context) graceful_recovery_context;
+
+ static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
+ static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" };
+@@ -912,7 +905,7 @@ channel_do_stop(struct channel *c)
+ ev_postpone(&c->reimport_event);
+
+ c->gr_wait = 0;
+- if (c->gr_lock)
++ if (OBSREF_GET(c->gr_lock))
+ channel_graceful_restart_unlock(c);
+
+ CALL(c->class->shutdown, c);
+@@ -1407,7 +1400,7 @@ proto_start(struct proto *p)
+ DBG("Kicking %s up\n", p->name);
+ PD(p, "Starting");
+
+- if (graceful_restart_state == GRS_INIT)
++ if (OBSREF_GET(graceful_recovery_context))
+ p->gr_recovery = 1;
+
+ if (p->cf->loop_order != DOMAIN_ORDER(the_bird))
+@@ -1921,7 +1914,45 @@ proto_enable(struct proto *p)
+ *
+ */
+
+-static void graceful_restart_done(timer *t);
++/**
++ * graceful_restart_done - finalize graceful restart
++ * @t: unused
++ *
++ * When there are no locks on graceful restart, the functions finalizes the
++ * graceful restart recovery. Protocols postponing route export until the end of
++ * the recovery are awakened and the export to them is enabled.
++ */
++static void
++graceful_recovery_done(struct callback *_ UNUSED)
++{
++ ASSERT_DIE(birdloop_inside(&main_birdloop));
++ ASSERT_DIE(_graceful_recovery_context.grc_state == GRS_ACTIVE);
++
++ tm_stop(&_graceful_recovery_context.wait_timer);
++ log(L_INFO "Graceful recovery done");
++
++ WALK_TLIST(proto, p, &global_proto_list)
++ PROTO_LOCKED_FROM_MAIN(p)
++ {
++ p->gr_recovery = 0;
++
++ struct channel *c;
++ WALK_LIST(c, p->channels)
++ {
++ ASSERT_DIE(!OBSREF_GET(c->gr_lock));
++
++ /* Resume postponed export of routes */
++ if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify)
++ channel_start_export(c);
++
++ /* Cleanup */
++ c->gr_wait = 0;
++ }
++ }
++
++ _graceful_recovery_context.grc_state = GRS_DONE;
++}
++
+
+ /**
+ * graceful_restart_recovery - request initial graceful restart recovery
+@@ -1933,7 +1964,30 @@ static void graceful_restart_done(timer *t);
+ void
+ graceful_restart_recovery(void)
+ {
+- graceful_restart_state = GRS_INIT;
++ obstacle_target_init(
++ &_graceful_recovery_context.obstacles,
++ &_graceful_recovery_context.obstacles_cleared,
++ &root_pool, "Graceful recovery");
++
++ OBSREF_SET(graceful_recovery_context, &_graceful_recovery_context);
++ _graceful_recovery_context.grc_state = GRS_INIT;
++}
++
++static void
++graceful_recovery_timeout(timer *t UNUSED)
++{
++ log(L_INFO "Graceful recovery timeout");
++ WALK_TLIST(proto, p, &global_proto_list)
++ PROTO_LOCKED_FROM_MAIN(p)
++ {
++ struct channel *c;
++ WALK_LIST(c, p->channels)
++ if (OBSREF_GET(c->gr_lock))
++ {
++ log(L_INFO "Graceful recovery: Not waiting for %s.%s", p->name, c->name);
++ OBSREF_CLEAR(c->gr_lock);
++ }
++ }
+ }
+
+ /**
+@@ -1946,73 +2000,35 @@ graceful_restart_recovery(void)
+ void
+ graceful_restart_init(void)
+ {
+- if (!graceful_restart_state)
++ if (!OBSREF_GET(graceful_recovery_context))
+ return;
+
+- log(L_INFO "Graceful restart started");
++ log(L_INFO "Graceful recovery started");
+
+- if (!graceful_restart_locks)
+- {
+- graceful_restart_done(NULL);
+- return;
+- }
++ _graceful_recovery_context.grc_state = GRS_ACTIVE;
+
+- graceful_restart_state = GRS_ACTIVE;
+- gr_wait_timer = tm_new_init(proto_pool, graceful_restart_done, NULL, 0, 0);
++ _graceful_recovery_context.wait_timer = (timer) { .hook = graceful_recovery_timeout };
+ u32 gr_wait = atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait;
+- tm_start(gr_wait_timer, gr_wait S);
+-}
+-
+-/**
+- * graceful_restart_done - finalize graceful restart
+- * @t: unused
+- *
+- * When there are no locks on graceful restart, the functions finalizes the
+- * graceful restart recovery. Protocols postponing route export until the end of
+- * the recovery are awakened and the export to them is enabled. All other
+- * related state is cleared. The function is also called when the graceful
+- * restart wait timer fires (but there are still some locks).
+- */
+-static void
+-graceful_restart_done(timer *t)
+-{
+- log(L_INFO "Graceful restart done");
+- graceful_restart_state = GRS_DONE;
+-
+- WALK_TLIST(proto, p, &global_proto_list)
+- {
+- if (!p->gr_recovery)
+- continue;
+-
+- struct channel *c;
+- WALK_LIST(c, p->channels)
+- {
+- /* Resume postponed export of routes */
+- if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify)
+- channel_start_export(c);
++ tm_start(&_graceful_recovery_context.wait_timer, gr_wait S);
+
+- /* Cleanup */
+- c->gr_wait = 0;
+- c->gr_lock = 0;
+- }
+-
+- p->gr_recovery = 0;
+- }
++ callback_init(&_graceful_recovery_context.obstacles_cleared, graceful_recovery_done, &main_birdloop);
+
+- graceful_restart_locks = 0;
+-
+- rfree(t);
++ /* The last clearing of obstacle reference will cause
++ * the graceful recovery finish immediately. */
++ OBSREF_CLEAR(graceful_recovery_context);
+ }
+
+ void
+ graceful_restart_show_status(void)
+ {
+- if (graceful_restart_state != GRS_ACTIVE)
++ if (_graceful_recovery_context.grc_state != GRS_ACTIVE)
+ return;
+
+ cli_msg(-24, "Graceful restart recovery in progress");
+- cli_msg(-24, " Waiting for %d channels to recover", graceful_restart_locks);
+- cli_msg(-24, " Wait timer is %t/%u", tm_remains(gr_wait_timer),
++ cli_msg(-24, " Waiting for %u channels to recover",
++ obstacle_target_count(&_graceful_recovery_context.obstacles));
++ cli_msg(-24, " Wait timer is %t/%u",
++ tm_remains(&_graceful_recovery_context.wait_timer),
+ atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait);
+ }
+
+@@ -2032,14 +2048,22 @@ graceful_restart_show_status(void)
+ void
+ channel_graceful_restart_lock(struct channel *c)
+ {
+- ASSERT(graceful_restart_state == GRS_INIT);
+- ASSERT(c->proto->gr_recovery);
++ ASSERT_DIE(birdloop_inside(&main_birdloop));
+
+- if (c->gr_lock)
++ if (OBSREF_GET(c->gr_lock))
+ return;
+
+- c->gr_lock = 1;
+- graceful_restart_locks++;
++ switch (_graceful_recovery_context.grc_state)
++ {
++ case GRS_INIT:
++ case GRS_ACTIVE:
++ OBSREF_SET(c->gr_lock, &_graceful_recovery_context);
++ break;
++
++ case GRS_NONE:
++ case GRS_DONE:
++ break;
++ }
+ }
+
+ /**
+@@ -2052,18 +2076,10 @@ channel_graceful_restart_lock(struct channel *c)
+ void
+ channel_graceful_restart_unlock(struct channel *c)
+ {
+- if (!c->gr_lock)
+- return;
+-
+- c->gr_lock = 0;
+- graceful_restart_locks--;
+-
+- if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks)
+- tm_start(gr_wait_timer, 0);
++ OBSREF_CLEAR(c->gr_lock);
+ }
+
+
+-
+ /**
+ * protos_dump_all - dump status of all protocols
+ *
+@@ -2615,9 +2631,9 @@ channel_show_info(struct channel *c)
+ cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter));
+ cli_msg(-1006, " Output filter: %s", filter_name(c->out_filter));
+
+- if (graceful_restart_state == GRS_ACTIVE)
++ if (_graceful_recovery_context.grc_state == GRS_ACTIVE)
+ cli_msg(-1006, " GR recovery: %s%s",
+- c->gr_lock ? " pending" : "",
++ OBSREF_GET(c->gr_lock) ? " pending" : "",
+ c->gr_wait ? " waiting" : "");
+
+ channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]);
+diff --git a/nest/protocol.h b/nest/protocol.h
+index 2bfa1628a..ec561b263 100644
+--- nest/protocol.h
++++ nest/protocol.h
+@@ -659,7 +659,7 @@ struct channel {
+
+ u8 channel_state;
+ u8 reloadable; /* Hook reload_routes() is allowed on the channel */
+- u8 gr_lock; /* Graceful restart mechanism should wait for this channel */
++ OBSREF(struct graceful_recovery_context) gr_lock; /* Graceful restart mechanism should wait for this channel */
+ u8 gr_wait; /* Route export to channel is postponed until graceful restart */
+
+ u32 obstacles; /* External obstacles remaining before cleanup */
+@@ -763,4 +763,16 @@ void *channel_config_new(const struct channel_class *cc, const char *name, uint
+ void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto);
+ int channel_reconfigure(struct channel *c, struct channel_config *cf);
+
++struct graceful_recovery_context {
++ struct obstacle_target obstacles;
++ struct callback obstacles_cleared;
++ enum {
++ GRS_NONE,
++ GRS_INIT,
++ GRS_ACTIVE,
++ GRS_DONE,
++ } grc_state;
++ timer wait_timer;
++};
++
+ #endif
+--
+GitLab
+
diff --git a/net/bird3/files/patch-10-stonehenge b/net/bird3/files/patch-10-stonehenge
new file mode 100644
index 000000000000..a640442dae8d
--- /dev/null
+++ b/net/bird3/files/patch-10-stonehenge
@@ -0,0 +1,116 @@
+From f6ef8b5b58c674dd270b40aa57d20d2d638c48e9 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Tue, 24 Dec 2024 12:18:39 +0100
+Subject: [PATCH] Stonehenge: multi-slab allocator
+
+To mid-term allocate and free lots of small blocks in a fast pace,
+mb_alloc is too slow and causes heap bloating. We can already allocate
+blocks from slabs, and if we allow for a little bit of inefficiency,
+we can just use multiple slabs with stepped sizes.
+
+This technique is already used in ea_list allocation which is gonna be
+converted to Stonehenge.
+---
+ lib/resource.h | 14 ++++++++++++
+ lib/slab.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 76 insertions(+)
+
+diff --git a/lib/resource.h b/lib/resource.h
+index 48bf1f9ba..12b788510 100644
+--- lib/resource.h
++++ lib/resource.h
+@@ -139,6 +139,20 @@ void *sl_allocz(slab *);
+ void sl_free(void *);
+ void sl_delete(slab *);
+
++/* A whole stonehenge of slabs */
++
++typedef struct stonehenge stonehenge;
++typedef struct sth_block {
++ void *block;
++ bool large;
++} sth_block;
++
++stonehenge *sth_new(pool *);
++sth_block sth_alloc(stonehenge *, uint size);
++sth_block sth_allocz(stonehenge *, uint size);
++void sth_free(sth_block);
++void sth_delete(stonehenge *);
++
+ /*
+ * Low-level memory allocation functions, please don't use
+ * outside resource manager and possibly sysdep code.
+diff --git a/lib/slab.c b/lib/slab.c
+index ca971f9fb..d68bfef1e 100644
+--- lib/slab.c
++++ lib/slab.c
+@@ -469,4 +469,66 @@ slab_lookup(resource *r, unsigned long a)
+ return NULL;
+ }
+
++static const uint stonehenge_sizes[] = { 56, 112, 168, 288, 448, 800, 1344 };
++
++struct stonehenge {
++ pool *p;
++ slab *s[ARRAY_SIZE(stonehenge_sizes)];
++};
++
++sth_block
++sth_alloc(stonehenge *sth, uint size)
++{
++ for (uint i=0; i<ARRAY_SIZE(stonehenge_sizes); i++)
++ if (size <= stonehenge_sizes[i])
++ {
++ if (!sth->s[i])
++ sth->s[i] = sl_new(sth->p, stonehenge_sizes[i]);
++
++ return (sth_block) { .block = sl_alloc(sth->s[i]), };
++ }
++
++ return (sth_block) {
++ .block = mb_alloc(sth->p, size),
++ .large = 1,
++ };
++}
++
++sth_block
++sth_allocz(stonehenge *sth, uint size)
++{
++ sth_block b = sth_alloc(sth, size);
++ bzero(b.block, size);
++ return b;
++}
++
++void
++sth_free(sth_block b)
++{
++ if (b.large)
++ mb_free(b.block);
++ else
++ sl_free(b.block);
++}
++
++stonehenge *
++sth_new(pool *pp)
++{
++ stonehenge tmps = {
++ .p = rp_new(pp, pp->domain, "Stonehenge"),
++ };
++
++ stonehenge *s = sth_alloc(&tmps, sizeof(stonehenge)).block;
++ *s = tmps;
++ return s;
++}
++
++void sth_delete(stonehenge *s)
++{
++ pool *p = s->p;
++ sth_free((sth_block) { s });
++ rp_free(p);
++}
++
++
+ #endif
+--
+GitLab
+
diff --git a/net/bird3/files/patch-11-route-attribute-storage b/net/bird3/files/patch-11-route-attribute-storage
new file mode 100644
index 000000000000..5097846203eb
--- /dev/null
+++ b/net/bird3/files/patch-11-route-attribute-storage
@@ -0,0 +1,80 @@
+From 8b389a503ef56aa69aa456fabebd562abe247119 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Tue, 24 Dec 2024 13:12:58 +0100
+Subject: [PATCH] Route attribute storage moved to Stonehenge
+
+---
+ nest/rt-attr.c | 29 ++++++++---------------------
+ 1 file changed, 8 insertions(+), 21 deletions(-)
+
+diff --git a/nest/rt-attr.c b/nest/rt-attr.c
+index a0f7d5718..8d651efb2 100644
+--- nest/rt-attr.c
++++ nest/rt-attr.c
+@@ -204,9 +204,7 @@ DOMAIN(attrs) attrs_domain;
+
+ pool *rta_pool;
+
+-/* Assuming page size of 4096, these are magic values for slab allocation */
+-static const uint ea_slab_sizes[] = { 56, 112, 168, 288, 448, 800, 1344 };
+-static slab *ea_slab[ARRAY_SIZE(ea_slab_sizes)];
++static stonehenge *ea_sth;
+
+ static slab *rte_src_slab;
+
+@@ -1583,24 +1581,18 @@ ea_lookup_slow(ea_list *o, u32 squash_upto, enum ea_stored oid)
+ return rr;
+ }
+
+- struct ea_storage *r = NULL;
+ uint elen = ea_list_size(o);
+ uint sz = elen + sizeof(struct ea_storage);
+- for (uint i=0; i<ARRAY_SIZE(ea_slab_sizes); i++)
+- if (sz <= ea_slab_sizes[i])
+- {
+- r = sl_alloc(ea_slab[i]);
+- break;
+- }
++ sth_block b = sth_alloc(ea_sth, sz);
+
+- int huge = r ? 0 : EALF_HUGE;;
+- if (huge)
+- r = mb_alloc(rta_pool, sz);
++ struct ea_storage *r = b.block;
+
+ ea_list_copy(r->l, o, elen);
+ ea_list_ref(r->l);
+
+- r->l->flags |= huge;
++ if (b.large)
++ r->l->flags |= EALF_HUGE;
++
+ r->l->stored = oid;
+ r->hash_key = h;
+ atomic_store_explicit(&r->uc, 1, memory_order_release);
+@@ -1668,10 +1660,7 @@ ea_free_deferred(struct deferred_call *dc)
+
+ /* And now we can free the object, finally */
+ ea_list_unref(r->l);
+- if (r->l->flags & EALF_HUGE)
+- mb_free(r);
+- else
+- sl_free(r);
++ sth_free((sth_block) { r, !!(r->l->flags & EALF_HUGE) });
+
+ RTA_UNLOCK;
+ }
+@@ -1722,9 +1711,7 @@ rta_init(void)
+ RTA_LOCK;
+ rta_pool = rp_new(&root_pool, attrs_domain.attrs, "Attributes");
+
+- for (uint i=0; i<ARRAY_SIZE(ea_slab_sizes); i++)
+- ea_slab[i] = sl_new(rta_pool, ea_slab_sizes[i]);
+-
++ ea_sth = sth_new(rta_pool);
+ SPINHASH_INIT(rta_hash_table, RTAH, rta_pool, &global_work_list);
+
+ rte_src_init();
+--
+GitLab
+
diff --git a/net/bird3/files/patch-12-BGP-tx-bucket-storage b/net/bird3/files/patch-12-BGP-tx-bucket-storage
new file mode 100644
index 000000000000..513824f86769
--- /dev/null
+++ b/net/bird3/files/patch-12-BGP-tx-bucket-storage
@@ -0,0 +1,84 @@
+From fdb5c4920b45139fb3c37e1144643c0f756364b6 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Tue, 24 Dec 2024 13:22:56 +0100
+Subject: [PATCH] BGP: TX bucket storage moved to Stonehenge
+
+---
+ proto/bgp/attrs.c | 11 +++++++----
+ proto/bgp/bgp.h | 4 ++--
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
+index a2feaef53..725c469ff 100644
+--- proto/bgp/attrs.c
++++ proto/bgp/attrs.c
+@@ -1734,13 +1734,16 @@ bgp_get_bucket(struct bgp_ptx_private *c, ea_list *new)
+ uint size = sizeof(struct bgp_bucket) + ea_size;
+
+ /* Allocate the bucket */
+- b = mb_alloc(c->pool, size);
++ sth_block blk = sth_alloc(c->sth, size);
++ b = blk.block;
+ *b = (struct bgp_bucket) { };
+ init_list(&b->prefixes);
+ b->hash = hash;
+
+ /* Copy the ea_list */
+ ea_list_copy(b->eattrs, new, ea_size);
++ if (blk.large)
++ b->eattrs->flags |= EALF_HUGE;
+
+ /* Insert the bucket to bucket hash */
+ HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
+@@ -1764,7 +1767,7 @@ static void
+ bgp_free_bucket(struct bgp_ptx_private *c, struct bgp_bucket *b)
+ {
+ HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
+- mb_free(b);
++ sth_free((sth_block) { b, !!(b->eattrs->flags & EALF_HUGE) });
+ }
+
+ int
+@@ -2086,6 +2089,7 @@ bgp_init_pending_tx(struct bgp_channel *c)
+
+ bpp->lock = dom;
+ bpp->pool = p;
++ bpp->sth = sth_new(p);
+ bpp->c = c;
+
+ bgp_init_bucket_table(bpp);
+@@ -2160,8 +2164,7 @@ bgp_free_pending_tx(struct bgp_channel *bc)
+ HASH_WALK_END;
+
+ HASH_FREE(c->bucket_hash);
+- sl_delete(c->bucket_slab);
+- c->bucket_slab = NULL;
++ sth_delete(c->sth);
+
+ rp_free(c->pool);
+
+diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
+index 202e78ba3..dac6e84ea 100644
+--- proto/bgp/bgp.h
++++ proto/bgp/bgp.h
+@@ -452,7 +452,8 @@ struct bgp_ptx_private {
+ struct { BGP_PTX_PUBLIC; };
+ struct bgp_ptx_private **locked_at;
+
+- pool *pool; /* Resource pool for TX related allocations */
++ pool *pool; /* Pool for infrequent long-term blocks */
++ stonehenge *sth; /* Bucket allocator */
+
+ HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
+ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+@@ -461,7 +462,6 @@ struct bgp_ptx_private {
+ HASH(struct bgp_prefix) prefix_hash; /* Hash table of pending prefices */
+
+ slab *prefix_slab; /* Slab holding prefix nodes */
+- slab *bucket_slab; /* Slab holding buckets to send */
+
+ char bmp; /* This is a fake ptx for BMP encoding */
+ };
+--
+GitLab
+
diff --git a/net/bird3/files/patch-13-allocate-normalization-buckets b/net/bird3/files/patch-13-allocate-normalization-buckets
new file mode 100644
index 000000000000..60ff582d71c5
--- /dev/null
+++ b/net/bird3/files/patch-13-allocate-normalization-buckets
@@ -0,0 +1,100 @@
+From c3c12e1b4ff908211b156a182a5027f2b11b0709 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Tue, 24 Dec 2024 16:16:55 +0100
+Subject: [PATCH] Allocate the normalization buckets on stack
+
+Even though allocating from tmp_linpool is quite cheap,
+it isn't cheap when the block is larger than a page, which is the case here.
+Instead, we now allocate just the result which typically fits in a page,
+avoiding a necessity of a malloc().
+---
+ nest/rt-attr.c | 37 ++++++++++++++++++++++++-------------
+ 1 file changed, 24 insertions(+), 13 deletions(-)
+
+diff --git a/nest/rt-attr.c b/nest/rt-attr.c
+index 8d651efb2..9d5e10980 100644
+--- nest/rt-attr.c
++++ nest/rt-attr.c
+@@ -967,8 +967,8 @@ ea_list_size(ea_list *o)
+ * and creates the final structure useful for storage or fast searching.
+ * The method is a bucket sort.
+ *
+- * Returns the final ea_list with some excess memory at the end,
+- * allocated from the tmp_linpool. The adata is linked from the original places.
++ * Returns the final ea_list allocated from the tmp_linpool.
++ * The adata is linked from the original places.
+ */
+ ea_list *
+ ea_normalize(ea_list *e, u32 upto)
+@@ -976,21 +976,17 @@ ea_normalize(ea_list *e, u32 upto)
+ /* We expect some work to be actually needed. */
+ ASSERT_DIE(!BIT32_TEST(&upto, e->stored));
+
+- /* Allocate the output */
+- ea_list *out = tmp_allocz(ea_class_max * sizeof(eattr) + sizeof(ea_list));
+- *out = (ea_list) {
+- .flags = EALF_SORTED,
+- };
+-
++ /* Allocate the buckets locally */
++ eattr *buckets = allocz(ea_class_max * sizeof(eattr));
+ uint min_id = ~0, max_id = 0;
+
+- eattr *buckets = out->attrs;
++ ea_list *next = NULL;
+
+ /* Walk the attribute lists, one after another. */
+ for (; e; e = e->next)
+ {
+- if (!out->next && BIT32_TEST(&upto, e->stored))
+- out->next = e;
++ if (!next && BIT32_TEST(&upto, e->stored))
++ next = e;
+
+ for (int i = 0; i < e->count; i++)
+ {
+@@ -1000,7 +996,7 @@ ea_normalize(ea_list *e, u32 upto)
+ if (id < min_id)
+ min_id = id;
+
+- if (out->next)
++ if (next)
+ {
+ /* Underlay: check whether the value is duplicate */
+ if (buckets[id].id && buckets[id].fresh)
+@@ -1026,6 +1022,18 @@ ea_normalize(ea_list *e, u32 upto)
+ }
+ }
+
++ /* Find out how big the output actually is. */
++ uint len = 0;
++ for (uint id = min_id; id <= max_id; id++)
++ if (buckets[id].id && !(buckets[id].undef && buckets[id].fresh))
++ len++;
++
++ ea_list *out = tmp_alloc(sizeof(ea_list) + len * sizeof(eattr));
++ *out = (ea_list) {
++ .flags = EALF_SORTED,
++ .next = next,
++ };
++
+ /* And now we just walk the list from beginning to end and collect
+ * everything to the beginning of the list.
+ * Walking just that part which is inhabited for sure. */
+@@ -1044,9 +1052,12 @@ ea_normalize(ea_list *e, u32 upto)
+
+ /* Move the attribute to the beginning */
+ ASSERT_DIE(out->count < id);
+- buckets[out->count++] = buckets[id];
++ ASSERT_DIE(out->count < len);
++ out->attrs[out->count++] = buckets[id];
+ }
+
++ ASSERT_DIE(out->count == len);
++
+ /* We want to bisect only if the list is long enough */
+ if (out->count > 5)
+ out->flags |= EALF_BISECT;
+--
+GitLab
+
diff --git a/net/bird3/files/patch-14-BGP-fix-dislpay-name b/net/bird3/files/patch-14-BGP-fix-dislpay-name
new file mode 100644
index 000000000000..faf53ec128ef
--- /dev/null
+++ b/net/bird3/files/patch-14-BGP-fix-dislpay-name
@@ -0,0 +1,25 @@
+From b58bfcad683f46da9470ad87e8c78e423e04ff97 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Fri, 27 Dec 2024 16:22:59 +0100
+Subject: [PATCH] BGP: fix display name of bgp_otc attribute
+
+---
+ proto/bgp/attrs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
+index 725c469ff..5dc06be51 100644
+--- proto/bgp/attrs.c
++++ proto/bgp/attrs.c
+@@ -1192,7 +1192,7 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = {
+ .decode = bgp_decode_large_community,
+ },
+ [BA_ONLY_TO_CUSTOMER] = {
+- .name = "otc",
++ .name = "bgp_otc",
+ .type = T_INT,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .encode = bgp_encode_u32,
+--
+GitLab
+
diff --git a/net/bird3/files/patch-15-BGP-fixed-deterministic-med-crashes b/net/bird3/files/patch-15-BGP-fixed-deterministic-med-crashes
new file mode 100644
index 000000000000..15f3fac00287
--- /dev/null
+++ b/net/bird3/files/patch-15-BGP-fixed-deterministic-med-crashes
@@ -0,0 +1,65 @@
+From c5b07695ce810e4345ed1811eadfce935c83b324 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Tue, 7 Jan 2025 11:08:04 +0100
+Subject: [PATCH] BGP: fixed deterministic med crashes
+
+There were several places of forgotten NULL checks.
+
+Thanks to Alarig Le Lay <alarig@swordarmor.fr> for reporting:
+https://trubka.network.cz/pipermail/bird-users/2024-December/017990.html
+---
+ nest/rt-table.c | 14 ++++++++++++--
+ proto/bgp/attrs.c | 8 ++++----
+ 2 files changed, 16 insertions(+), 6 deletions(-)
+
+diff --git a/nest/rt-table.c b/nest/rt-table.c
+index 05191d743..fc6d0d4e0 100644
+--- nest/rt-table.c
++++ nest/rt-table.c
+@@ -2024,12 +2024,22 @@ rte_recalculate(struct rtable_private *table, struct rt_import_hook *c, struct n
+ do_recalculate:
+ /* Add the new route to the list right behind the old one */
+ if (new_stored)
++ {
++ /* There is the same piece of code several lines farther. Needs refactoring.
++ * The old_stored check is needed because of the possible jump from deterministic med */
++ if (old_stored)
+ {
+ atomic_store_explicit(&new_stored->next, atomic_load_explicit(&old_stored->next, memory_order_relaxed), memory_order_release);
+ atomic_store_explicit(&old_stored->next, new_stored, memory_order_release);
+-
+- table->rt_count++;
+ }
++ else
++ {
++ atomic_store_explicit(&new_stored->next, NULL, memory_order_release);
++ atomic_store_explicit(last_ptr, new_stored, memory_order_release);
++ }
++
++ table->rt_count++;
++ }
+
+ /* Find a new optimal route (if there is any) */
+ struct rte_storage * _Atomic *bp = &local_sentinel.next;
+diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
+index 5dc06be51..db6542343 100644
+--- proto/bgp/attrs.c
++++ proto/bgp/attrs.c
+@@ -2689,10 +2689,10 @@ bgp_rte_recalculate(struct rtable_private *table, net *net,
+ struct rte_storage *new_stored, struct rte_storage *old_stored, struct rte_storage *old_best_stored)
+ {
+ struct rte_storage *key_stored = new_stored ? new_stored : old_stored;
+- const struct rte *new = &new_stored->rte,
+- *old = &old_stored->rte,
+- *old_best = &old_best_stored->rte,
+- *key = &key_stored->rte;
++ const struct rte *new = RTE_OR_NULL(new_stored),
++ *old = RTE_OR_NULL(old_stored),
++ *old_best = RTE_OR_NULL(old_best_stored),
++ *key = RTE_OR_NULL(key_stored);
+
+ u32 lpref = rt_get_preference(key);
+ u32 lasn = bgp_get_neighbor(key);
+--
+GitLab
+
diff --git a/net/bird3/files/patch-16-Table-old-best-route-refeed-fix b/net/bird3/files/patch-16-Table-old-best-route-refeed-fix
new file mode 100644
index 000000000000..60dc7cece2a2
--- /dev/null
+++ b/net/bird3/files/patch-16-Table-old-best-route-refeed-fix
@@ -0,0 +1,87 @@
+From 2e14832d36c83b2ab5b7fb28b701de554fa5fdd9 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Tue, 7 Jan 2025 12:13:57 +0100
+Subject: [PATCH] Table: old best route refeed fix
+
+When refeeding with RA_OPTIMAL, the old best routes weren't announced,
+leading to weird behavior of protocols, mostly kernel. Fixed.
+---
+ nest/rt-table.c | 30 ++++++++++++++++++++++++++----
+ 1 file changed, 26 insertions(+), 4 deletions(-)
+
+diff --git a/nest/rt-table.c b/nest/rt-table.c
+index fc6d0d4e0..18a445a62 100644
+--- nest/rt-table.c
++++ nest/rt-table.c
+@@ -1485,11 +1485,18 @@ channel_notify_basic(void *_channel)
+ rte *new = &u->feed->block[i];
+ rte *old = NULL;
+ for (uint o = oldpos; o < u->feed->count_routes; o++)
+- if (new->src == u->feed->block[o].src)
++ if ((c->ra_mode == RA_ANY) && (new->src == u->feed->block[o].src))
+ {
+ old = &u->feed->block[o];
+ break;
+ }
++ else if ((c->ra_mode == RA_OPTIMAL) && (
++ bmap_test(&c->export_accepted_map, u->feed->block[o].id) ||
++ bmap_test(&c->export_rejected_map, u->feed->block[o].id)))
++ {
++ ASSERT_DIE(!old);
++ old = &u->feed->block[o];
++ }
+
+ rt_notify_basic(c, new, old);
+
+@@ -2542,10 +2549,14 @@ rt_feed_net_best(struct rt_exporter *e, struct rcu_unwinder *u, u32 index, bool
+ last_in_net = atomic_load_explicit(&n->best.last, memory_order_acquire);
+ first = rt_net_feed_validate_first(tr, first_in_net, last_in_net, first);
+
+- uint ecnt = 0;
++ uint ecnt = 0, ocnt = 0;
+ for (const struct rt_pending_export *rpe = first; rpe;
+ rpe = atomic_load_explicit(&rpe->next, memory_order_acquire))
++ {
+ ecnt++;
++ if (rpe->it.old)
++ ocnt++;
++ }
+
+ if (ecnt) {
+ const net_addr *a = (first->it.new ?: first->it.old)->net;
+@@ -2558,10 +2569,11 @@ rt_feed_net_best(struct rt_exporter *e, struct rcu_unwinder *u, u32 index, bool
+ if (!ecnt && (!best || prefilter && !prefilter(f, best->rte.net)))
+ return NULL;
+
+- struct rt_export_feed *feed = rt_alloc_feed(!!best, ecnt);
++ struct rt_export_feed *feed = rt_alloc_feed(!!best + ocnt, ecnt);
++ uint bpos = 0;
+ if (best)
+ {
+- feed->block[0] = best->rte;
++ feed->block[bpos++] = best->rte;
+ feed->ni = NET_TO_INDEX(best->rte.net);
+ }
+ else
+@@ -2575,8 +2587,18 @@ rt_feed_net_best(struct rt_exporter *e, struct rcu_unwinder *u, u32 index, bool
+ if (e >= ecnt)
+ RT_READ_RETRY(tr);
+ else
++ {
+ feed->exports[e++] = rpe->it.seq;
++ if (rpe->it.old)
++ {
++ ASSERT_DIE(bpos < !!best + ocnt);
++ feed->block[bpos] = *rpe->it.old;
++ feed->block[bpos].flags |= REF_OBSOLETE;
++ bpos++;
++ }
++ }
+
++ ASSERT_DIE(bpos == !!best + ocnt);
+ ASSERT_DIE(e == ecnt);
+ }
+
+--
+GitLab
+
diff --git a/net/bird3/files/patch-Makefile.in b/net/bird3/files/patch-Makefile.in
new file mode 100644
index 000000000000..2f8db6f96b66
--- /dev/null
+++ b/net/bird3/files/patch-Makefile.in
@@ -0,0 +1,11 @@
+--- Makefile.in.orig 2022-02-21 11:12:46 UTC
++++ Makefile.in
+@@ -207,7 +207,7 @@ install: all
+ $(INSTALL_PROGRAM) $(exedir)/$$BIN $(DESTDIR)/$(sbindir)/$$BIN ; \
+ done
+ if ! test -f $(DESTDIR)/@CONFIG_FILE@ ; then \
+- $(INSTALL_DATA) $(srcdir)/doc/bird.conf.example $(DESTDIR)/@CONFIG_FILE@ ; \
++ $(INSTALL_DATA) $(srcdir)/doc/bird.conf.example $(DESTDIR)/@CONFIG_FILE@.sample ; \
+ else \
+ echo "Not overwriting old bird.conf" ; \
+ fi
diff --git a/net/bird3/files/pkg-message.in b/net/bird3/files/pkg-message.in
new file mode 100644
index 000000000000..f1f38d120f01
--- /dev/null
+++ b/net/bird3/files/pkg-message.in
@@ -0,0 +1,11 @@
+[
+{ type: install
+ message: <<EOM
+%%LOCALBASE%%/sbin/birdc was added to /etc/shells
+To assign it to a user, the user needs to be in group wheel.
+
+Alternatively you could use security/sudo to grant a user access to it,
+which would allow '%%LOCALBASE%%/sbin/birdc -r' to restrict to read-only.
+EOM
+}
+]
diff --git a/net/bird3/pkg-descr b/net/bird3/pkg-descr
new file mode 100644
index 000000000000..64967d0317d2
--- /dev/null
+++ b/net/bird3/pkg-descr
@@ -0,0 +1,14 @@
+The BIRD project aims to develop a fully functional dynamic IP routing daemon.
+- Both IPv4 and IPv6
+- Multiple routing tables
+- BGP
+- RIP
+- OSPF
+- LDP
+- L3VPN
+- Static routes
+- Inter-table protocol
+- Command-line interface
+- Soft reconfiguration
+- Powerful language for route filtering
+Starting with release 3.x, BIRD is multi-threaded.
diff --git a/net/bird3/pkg-plist b/net/bird3/pkg-plist
new file mode 100644
index 000000000000..d34280eb152d
--- /dev/null
+++ b/net/bird3/pkg-plist
@@ -0,0 +1,4 @@
+@sample etc/bird.conf.sample
+@shell sbin/birdc
+sbin/bird
+sbin/birdcl