summaryrefslogtreecommitdiff
path: root/net/bird3/files/patch-03-BGP-fix-locking-order
diff options
context:
space:
mode:
Diffstat (limited to 'net/bird3/files/patch-03-BGP-fix-locking-order')
-rw-r--r--net/bird3/files/patch-03-BGP-fix-locking-order176
1 files changed, 176 insertions, 0 deletions
diff --git a/net/bird3/files/patch-03-BGP-fix-locking-order b/net/bird3/files/patch-03-BGP-fix-locking-order
new file mode 100644
index 000000000000..51b73c26f8f8
--- /dev/null
+++ b/net/bird3/files/patch-03-BGP-fix-locking-order
@@ -0,0 +1,176 @@
+From 6779e5da698feb9b9e02411859ad81885ba46c01 Mon Sep 17 00:00:00 2001
+From: Maria Matejka <mq@ucw.cz>
+Date: Fri, 20 Dec 2024 11:28:00 +0100
+Subject: [PATCH] BGP: fix locking order error on dynamic protocol spawn
+
+We missed that the protocol spawner violates the prescribed
+locking order. When the rtable level is locked, no new protocol can be
+started, thus we need to:
+
+* create the protocol from a clean mainloop context
+* in protocol start hook, take the socket
+
+Testsuite: cf-bgp-autopeer
+Fixes: #136
+
+Thanks to Job Snijders <job@fastly.com> for reporting:
+https://trubka.network.cz/pipermail/bird-users/2024-December/017980.html
+---
+ nest/proto.c | 19 +++++++++++++++++++
+ nest/protocol.h | 2 ++
+ proto/bgp/bgp.c | 46 +++++++++++++++++++++++++++++++++++-----------
+ 3 files changed, 56 insertions(+), 11 deletions(-)
+
+diff --git a/nest/proto.c b/nest/proto.c
+index dded84f51..678697d69 100644
+--- nest/proto.c
++++ nest/proto.c
+@@ -1867,6 +1867,25 @@ proto_spawn(struct proto_config *cf, uint disabled)
+ return p;
+ }
+
++bool
++proto_disable(struct proto *p)
++{
++ ASSERT_DIE(birdloop_inside(&main_birdloop));
++ bool changed = !p->disabled;
++ p->disabled = 1;
++ proto_rethink_goal(p);
++ return changed;
++}
++
++bool
++proto_enable(struct proto *p)
++{
++ ASSERT_DIE(birdloop_inside(&main_birdloop));
++ bool changed = p->disabled;
++ p->disabled = 0;
++ proto_rethink_goal(p);
++ return changed;
++}
+
+ /**
+ * DOC: Graceful restart recovery
+diff --git a/nest/protocol.h b/nest/protocol.h
+index 25ed6f553..cf7ecb898 100644
+--- nest/protocol.h
++++ nest/protocol.h
+@@ -78,6 +78,8 @@ void proto_build(struct protocol *); /* Called from protocol to register itself
+ void protos_preconfig(struct config *);
+ void protos_commit(struct config *new, struct config *old, int type);
+ struct proto * proto_spawn(struct proto_config *cf, uint disabled);
++bool proto_disable(struct proto *p);
++bool proto_enable(struct proto *p);
+ void protos_dump_all(struct dump_request *);
+
+ #define GA_UNKNOWN 0 /* Attribute not recognized */
+diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
+index 5fc2b5fff..3170e3a42 100644
+--- proto/bgp/bgp.c
++++ proto/bgp/bgp.c
+@@ -378,8 +378,6 @@ bgp_startup(struct bgp_proto *p)
+ if (p->postponed_sk)
+ {
+ /* Apply postponed incoming connection */
+- sk_reloop(p->postponed_sk, p->p.loop);
+-
+ bgp_setup_conn(p, &p->incoming_conn);
+ bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
+ bgp_send_open(&p->incoming_conn);
+@@ -583,6 +581,9 @@ bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len
+ static void
+ bgp_down(struct bgp_proto *p)
+ {
++ /* Check that the dynamic BGP socket has been picked up */
++ ASSERT_DIE(p->postponed_sk == NULL);
++
+ if (bgp_start_state(p) > BSS_PREPARE)
+ {
+ bgp_setup_auth(p, 0);
+@@ -617,8 +618,8 @@ bgp_decision(void *vp)
+ bgp_down(p);
+ }
+
+-static struct bgp_proto *
+-bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
++static void
++bgp_spawn(struct bgp_proto *pp, struct birdsock *sk)
+ {
+ struct symbol *sym;
+ char fmt[SYM_MAX_LEN];
+@@ -635,9 +636,16 @@ bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
+ cfg_mem = NULL;
+
+ /* Just pass remote_ip to bgp_init() */
+- ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
++ ((struct bgp_config *) sym->proto)->remote_ip = sk->daddr;
++
++ /* Create the protocol disabled initially */
++ SKIP_BACK_DECLARE(struct bgp_proto, p, p, proto_spawn(sym->proto, 1));
+
+- return (void *) proto_spawn(sym->proto, 0);
++ /* Pass the socket */
++ p->postponed_sk = sk;
++
++ /* And enable the protocol */
++ proto_enable(&p->p);
+ }
+
+ void
+@@ -1489,10 +1497,15 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
+ /* For dynamic BGP, spawn new instance and postpone the socket */
+ if (bgp_is_dynamic(p))
+ {
+- p = bgp_spawn(p, sk->daddr);
+- p->postponed_sk = sk;
+- rmove(sk, p->p.pool);
+- goto leave;
++ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
++
++ /* The dynamic protocol must be in the START state */
++ ASSERT_DIE(p->p.proto_state == PS_START);
++ birdloop_leave(p->p.loop);
++
++ /* Now we have a clean mainloop */
++ bgp_spawn(p, sk);
++ return 0;
+ }
+
+ rmove(sk, p->p.pool);
+@@ -1806,7 +1819,6 @@ bgp_start(struct proto *P)
+ p->incoming_conn.state = BS_IDLE;
+ p->neigh = NULL;
+ p->bfd_req = NULL;
+- p->postponed_sk = NULL;
+ p->gr_ready = 0;
+ p->gr_active_num = 0;
+
+@@ -1848,6 +1860,16 @@ bgp_start(struct proto *P)
+ channel_graceful_restart_lock(&c->c);
+ }
+
++ /* Now it's the last chance to move the postponed socket to this BGP,
++ * as bgp_start is the only hook running from main loop. */
++ if (p->postponed_sk)
++ {
++ LOCK_DOMAIN(rtable, bgp_listen_domain);
++ rmove(p->postponed_sk, p->p.pool);
++ sk_reloop(p->postponed_sk, p->p.loop);
++ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
++ }
++
+ /*
+ * Before attempting to create the connection, we need to lock the port,
+ * so that we are the only instance attempting to talk with that neighbor.
+@@ -1999,6 +2021,8 @@ bgp_init(struct proto_config *CF)
+ p->remote_ip = cf->remote_ip;
+ p->remote_as = cf->remote_as;
+
++ p->postponed_sk = NULL;
++
+ /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
+ if (cf->c.parent)
+ cf->remote_ip = IPA_NONE;
+--
+GitLab
+