summaryrefslogtreecommitdiff
path: root/net/bird3/files/patch-09-graceful-recovery
diff options
context:
space:
mode:
Diffstat (limited to 'net/bird3/files/patch-09-graceful-recovery')
-rw-r--r--net/bird3/files/patch-09-graceful-recovery311
1 files changed, 0 insertions, 311 deletions
diff --git a/net/bird3/files/patch-09-graceful-recovery b/net/bird3/files/patch-09-graceful-recovery
deleted file mode 100644
index d576f80ebc42..000000000000
--- a/net/bird3/files/patch-09-graceful-recovery
+++ /dev/null
@@ -1,311 +0,0 @@
-From f7639a9fafa7411ebd1f2af56c270b970ac09f3d Mon Sep 17 00:00:00 2001
-From: Maria Matejka <mq@ucw.cz>
-Date: Mon, 23 Dec 2024 21:06:26 +0100
-Subject: [PATCH] Graceful recovery: converted to obstacles
-
-Yet another refcounting mechanism had a locking collision.
----
- nest/proto.c | 178 ++++++++++++++++++++++++++----------------------
- nest/protocol.h | 14 +++-
- 2 files changed, 110 insertions(+), 82 deletions(-)
-
-diff --git a/nest/proto.c b/nest/proto.c
-index 6fa74e9f1..caf99829b 100644
---- nest/proto.c
-+++ nest/proto.c
-@@ -31,15 +31,8 @@ static list STATIC_LIST_INIT(protocol_list);
- #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); })
- #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); })
-
--static timer *gr_wait_timer;
--
--#define GRS_NONE 0
--#define GRS_INIT 1
--#define GRS_ACTIVE 2
--#define GRS_DONE 3
--
--static int graceful_restart_state;
--static u32 graceful_restart_locks;
-+static struct graceful_recovery_context _graceful_recovery_context;
-+OBSREF(struct graceful_recovery_context) graceful_recovery_context;
-
- static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
- static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" };
-@@ -912,7 +905,7 @@ channel_do_stop(struct channel *c)
- ev_postpone(&c->reimport_event);
-
- c->gr_wait = 0;
-- if (c->gr_lock)
-+ if (OBSREF_GET(c->gr_lock))
- channel_graceful_restart_unlock(c);
-
- CALL(c->class->shutdown, c);
-@@ -1407,7 +1400,7 @@ proto_start(struct proto *p)
- DBG("Kicking %s up\n", p->name);
- PD(p, "Starting");
-
-- if (graceful_restart_state == GRS_INIT)
-+ if (OBSREF_GET(graceful_recovery_context))
- p->gr_recovery = 1;
-
- if (p->cf->loop_order != DOMAIN_ORDER(the_bird))
-@@ -1921,7 +1914,45 @@ proto_enable(struct proto *p)
- *
- */
-
--static void graceful_restart_done(timer *t);
-+/**
-+ * graceful_restart_done - finalize graceful restart
-+ * @t: unused
-+ *
-+ * When there are no locks on graceful restart, the functions finalizes the
-+ * graceful restart recovery. Protocols postponing route export until the end of
-+ * the recovery are awakened and the export to them is enabled.
-+ */
-+static void
-+graceful_recovery_done(struct callback *_ UNUSED)
-+{
-+ ASSERT_DIE(birdloop_inside(&main_birdloop));
-+ ASSERT_DIE(_graceful_recovery_context.grc_state == GRS_ACTIVE);
-+
-+ tm_stop(&_graceful_recovery_context.wait_timer);
-+ log(L_INFO "Graceful recovery done");
-+
-+ WALK_TLIST(proto, p, &global_proto_list)
-+ PROTO_LOCKED_FROM_MAIN(p)
-+ {
-+ p->gr_recovery = 0;
-+
-+ struct channel *c;
-+ WALK_LIST(c, p->channels)
-+ {
-+ ASSERT_DIE(!OBSREF_GET(c->gr_lock));
-+
-+ /* Resume postponed export of routes */
-+ if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify)
-+ channel_start_export(c);
-+
-+ /* Cleanup */
-+ c->gr_wait = 0;
-+ }
-+ }
-+
-+ _graceful_recovery_context.grc_state = GRS_DONE;
-+}
-+
-
- /**
- * graceful_restart_recovery - request initial graceful restart recovery
-@@ -1933,7 +1964,30 @@ static void graceful_restart_done(timer *t);
- void
- graceful_restart_recovery(void)
- {
-- graceful_restart_state = GRS_INIT;
-+ obstacle_target_init(
-+ &_graceful_recovery_context.obstacles,
-+ &_graceful_recovery_context.obstacles_cleared,
-+ &root_pool, "Graceful recovery");
-+
-+ OBSREF_SET(graceful_recovery_context, &_graceful_recovery_context);
-+ _graceful_recovery_context.grc_state = GRS_INIT;
-+}
-+
-+static void
-+graceful_recovery_timeout(timer *t UNUSED)
-+{
-+ log(L_INFO "Graceful recovery timeout");
-+ WALK_TLIST(proto, p, &global_proto_list)
-+ PROTO_LOCKED_FROM_MAIN(p)
-+ {
-+ struct channel *c;
-+ WALK_LIST(c, p->channels)
-+ if (OBSREF_GET(c->gr_lock))
-+ {
-+ log(L_INFO "Graceful recovery: Not waiting for %s.%s", p->name, c->name);
-+ OBSREF_CLEAR(c->gr_lock);
-+ }
-+ }
- }
-
- /**
-@@ -1946,73 +2000,35 @@ graceful_restart_recovery(void)
- void
- graceful_restart_init(void)
- {
-- if (!graceful_restart_state)
-+ if (!OBSREF_GET(graceful_recovery_context))
- return;
-
-- log(L_INFO "Graceful restart started");
-+ log(L_INFO "Graceful recovery started");
-
-- if (!graceful_restart_locks)
-- {
-- graceful_restart_done(NULL);
-- return;
-- }
-+ _graceful_recovery_context.grc_state = GRS_ACTIVE;
-
-- graceful_restart_state = GRS_ACTIVE;
-- gr_wait_timer = tm_new_init(proto_pool, graceful_restart_done, NULL, 0, 0);
-+ _graceful_recovery_context.wait_timer = (timer) { .hook = graceful_recovery_timeout };
- u32 gr_wait = atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait;
-- tm_start(gr_wait_timer, gr_wait S);
--}
--
--/**
-- * graceful_restart_done - finalize graceful restart
-- * @t: unused
-- *
-- * When there are no locks on graceful restart, the functions finalizes the
-- * graceful restart recovery. Protocols postponing route export until the end of
-- * the recovery are awakened and the export to them is enabled. All other
-- * related state is cleared. The function is also called when the graceful
-- * restart wait timer fires (but there are still some locks).
-- */
--static void
--graceful_restart_done(timer *t)
--{
-- log(L_INFO "Graceful restart done");
-- graceful_restart_state = GRS_DONE;
--
-- WALK_TLIST(proto, p, &global_proto_list)
-- {
-- if (!p->gr_recovery)
-- continue;
--
-- struct channel *c;
-- WALK_LIST(c, p->channels)
-- {
-- /* Resume postponed export of routes */
-- if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify)
-- channel_start_export(c);
-+ tm_start(&_graceful_recovery_context.wait_timer, gr_wait S);
-
-- /* Cleanup */
-- c->gr_wait = 0;
-- c->gr_lock = 0;
-- }
--
-- p->gr_recovery = 0;
-- }
-+ callback_init(&_graceful_recovery_context.obstacles_cleared, graceful_recovery_done, &main_birdloop);
-
-- graceful_restart_locks = 0;
--
-- rfree(t);
-+ /* The last clearing of obstacle reference will cause
-+ * the graceful recovery finish immediately. */
-+ OBSREF_CLEAR(graceful_recovery_context);
- }
-
- void
- graceful_restart_show_status(void)
- {
-- if (graceful_restart_state != GRS_ACTIVE)
-+ if (_graceful_recovery_context.grc_state != GRS_ACTIVE)
- return;
-
- cli_msg(-24, "Graceful restart recovery in progress");
-- cli_msg(-24, " Waiting for %d channels to recover", graceful_restart_locks);
-- cli_msg(-24, " Wait timer is %t/%u", tm_remains(gr_wait_timer),
-+ cli_msg(-24, " Waiting for %u channels to recover",
-+ obstacle_target_count(&_graceful_recovery_context.obstacles));
-+ cli_msg(-24, " Wait timer is %t/%u",
-+ tm_remains(&_graceful_recovery_context.wait_timer),
- atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait);
- }
-
-@@ -2032,14 +2048,22 @@ graceful_restart_show_status(void)
- void
- channel_graceful_restart_lock(struct channel *c)
- {
-- ASSERT(graceful_restart_state == GRS_INIT);
-- ASSERT(c->proto->gr_recovery);
-+ ASSERT_DIE(birdloop_inside(&main_birdloop));
-
-- if (c->gr_lock)
-+ if (OBSREF_GET(c->gr_lock))
- return;
-
-- c->gr_lock = 1;
-- graceful_restart_locks++;
-+ switch (_graceful_recovery_context.grc_state)
-+ {
-+ case GRS_INIT:
-+ case GRS_ACTIVE:
-+ OBSREF_SET(c->gr_lock, &_graceful_recovery_context);
-+ break;
-+
-+ case GRS_NONE:
-+ case GRS_DONE:
-+ break;
-+ }
- }
-
- /**
-@@ -2052,18 +2076,10 @@ channel_graceful_restart_lock(struct channel *c)
- void
- channel_graceful_restart_unlock(struct channel *c)
- {
-- if (!c->gr_lock)
-- return;
--
-- c->gr_lock = 0;
-- graceful_restart_locks--;
--
-- if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks)
-- tm_start(gr_wait_timer, 0);
-+ OBSREF_CLEAR(c->gr_lock);
- }
-
-
--
- /**
- * protos_dump_all - dump status of all protocols
- *
-@@ -2615,9 +2631,9 @@ channel_show_info(struct channel *c)
- cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter));
- cli_msg(-1006, " Output filter: %s", filter_name(c->out_filter));
-
-- if (graceful_restart_state == GRS_ACTIVE)
-+ if (_graceful_recovery_context.grc_state == GRS_ACTIVE)
- cli_msg(-1006, " GR recovery: %s%s",
-- c->gr_lock ? " pending" : "",
-+ OBSREF_GET(c->gr_lock) ? " pending" : "",
- c->gr_wait ? " waiting" : "");
-
- channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]);
-diff --git a/nest/protocol.h b/nest/protocol.h
-index 2bfa1628a..ec561b263 100644
---- nest/protocol.h
-+++ nest/protocol.h
-@@ -659,7 +659,7 @@ struct channel {
-
- u8 channel_state;
- u8 reloadable; /* Hook reload_routes() is allowed on the channel */
-- u8 gr_lock; /* Graceful restart mechanism should wait for this channel */
-+ OBSREF(struct graceful_recovery_context) gr_lock; /* Graceful restart mechanism should wait for this channel */
- u8 gr_wait; /* Route export to channel is postponed until graceful restart */
-
- u32 obstacles; /* External obstacles remaining before cleanup */
-@@ -763,4 +763,16 @@ void *channel_config_new(const struct channel_class *cc, const char *name, uint
- void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto);
- int channel_reconfigure(struct channel *c, struct channel_config *cf);
-
-+struct graceful_recovery_context {
-+ struct obstacle_target obstacles;
-+ struct callback obstacles_cleared;
-+ enum {
-+ GRS_NONE,
-+ GRS_INIT,
-+ GRS_ACTIVE,
-+ GRS_DONE,
-+ } grc_state;
-+ timer wait_timer;
-+};
-+
- #endif
---
-GitLab
-