diff options
Diffstat (limited to 'net/bird3/files/patch-09-graceful-recovery')
-rw-r--r-- | net/bird3/files/patch-09-graceful-recovery | 311 |
1 files changed, 0 insertions, 311 deletions
diff --git a/net/bird3/files/patch-09-graceful-recovery b/net/bird3/files/patch-09-graceful-recovery deleted file mode 100644 index d576f80ebc42..000000000000 --- a/net/bird3/files/patch-09-graceful-recovery +++ /dev/null @@ -1,311 +0,0 @@ -From f7639a9fafa7411ebd1f2af56c270b970ac09f3d Mon Sep 17 00:00:00 2001 -From: Maria Matejka <mq@ucw.cz> -Date: Mon, 23 Dec 2024 21:06:26 +0100 -Subject: [PATCH] Graceful recovery: converted to obstacles - -Yet another refcounting mechanism had a locking collision. ---- - nest/proto.c | 178 ++++++++++++++++++++++++++---------------------- - nest/protocol.h | 14 +++- - 2 files changed, 110 insertions(+), 82 deletions(-) - -diff --git a/nest/proto.c b/nest/proto.c -index 6fa74e9f1..caf99829b 100644 ---- nest/proto.c -+++ nest/proto.c -@@ -31,15 +31,8 @@ static list STATIC_LIST_INIT(protocol_list); - #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); }) - #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); }) - --static timer *gr_wait_timer; -- --#define GRS_NONE 0 --#define GRS_INIT 1 --#define GRS_ACTIVE 2 --#define GRS_DONE 3 -- --static int graceful_restart_state; --static u32 graceful_restart_locks; -+static struct graceful_recovery_context _graceful_recovery_context; -+OBSREF(struct graceful_recovery_context) graceful_recovery_context; - - static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; - static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; -@@ -912,7 +905,7 @@ channel_do_stop(struct channel *c) - ev_postpone(&c->reimport_event); - - c->gr_wait = 0; -- if (c->gr_lock) -+ if (OBSREF_GET(c->gr_lock)) - channel_graceful_restart_unlock(c); - - CALL(c->class->shutdown, c); -@@ -1407,7 +1400,7 @@ proto_start(struct proto *p) - DBG("Kicking %s up\n", p->name); - PD(p, "Starting"); - -- if (graceful_restart_state == GRS_INIT) -+ if (OBSREF_GET(graceful_recovery_context)) - p->gr_recovery = 1; - - if (p->cf->loop_order != DOMAIN_ORDER(the_bird)) -@@ -1921,7 +1914,45 @@ proto_enable(struct proto *p) - * - */ - --static void graceful_restart_done(timer *t); -+/** -+ * graceful_restart_done - finalize graceful restart -+ * @t: unused -+ * -+ * When there are no locks on graceful restart, the functions finalizes the -+ * graceful restart recovery. Protocols postponing route export until the end of -+ * the recovery are awakened and the export to them is enabled. -+ */ -+static void -+graceful_recovery_done(struct callback *_ UNUSED) -+{ -+ ASSERT_DIE(birdloop_inside(&main_birdloop)); -+ ASSERT_DIE(_graceful_recovery_context.grc_state == GRS_ACTIVE); -+ -+ tm_stop(&_graceful_recovery_context.wait_timer); -+ log(L_INFO "Graceful recovery done"); -+ -+ WALK_TLIST(proto, p, &global_proto_list) -+ PROTO_LOCKED_FROM_MAIN(p) -+ { -+ p->gr_recovery = 0; -+ -+ struct channel *c; -+ WALK_LIST(c, p->channels) -+ { -+ ASSERT_DIE(!OBSREF_GET(c->gr_lock)); -+ -+ /* Resume postponed export of routes */ -+ if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify) -+ channel_start_export(c); -+ -+ /* Cleanup */ -+ c->gr_wait = 0; -+ } -+ } -+ -+ _graceful_recovery_context.grc_state = GRS_DONE; -+} -+ - - /** - * graceful_restart_recovery - request initial graceful restart recovery -@@ -1933,7 +1964,30 @@ static void graceful_restart_done(timer *t); - void - graceful_restart_recovery(void) - { -- graceful_restart_state = GRS_INIT; -+ obstacle_target_init( -+ &_graceful_recovery_context.obstacles, -+ &_graceful_recovery_context.obstacles_cleared, -+ &root_pool, "Graceful recovery"); -+ -+ OBSREF_SET(graceful_recovery_context, &_graceful_recovery_context); -+ _graceful_recovery_context.grc_state = GRS_INIT; -+} -+ -+static void -+graceful_recovery_timeout(timer *t UNUSED) -+{ -+ log(L_INFO "Graceful recovery timeout"); -+ WALK_TLIST(proto, p, &global_proto_list) -+ PROTO_LOCKED_FROM_MAIN(p) -+ { -+ struct channel *c; -+ WALK_LIST(c, p->channels) -+ if (OBSREF_GET(c->gr_lock)) -+ { -+ log(L_INFO "Graceful recovery: Not waiting for %s.%s", p->name, c->name); -+ OBSREF_CLEAR(c->gr_lock); -+ } -+ } - } - - /** -@@ -1946,73 +2000,35 @@ graceful_restart_recovery(void) - void - graceful_restart_init(void) - { -- if (!graceful_restart_state) -+ if (!OBSREF_GET(graceful_recovery_context)) - return; - -- log(L_INFO "Graceful restart started"); -+ log(L_INFO "Graceful recovery started"); - -- if (!graceful_restart_locks) -- { -- graceful_restart_done(NULL); -- return; -- } -+ _graceful_recovery_context.grc_state = GRS_ACTIVE; - -- graceful_restart_state = GRS_ACTIVE; -- gr_wait_timer = tm_new_init(proto_pool, graceful_restart_done, NULL, 0, 0); -+ _graceful_recovery_context.wait_timer = (timer) { .hook = graceful_recovery_timeout }; - u32 gr_wait = atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait; -- tm_start(gr_wait_timer, gr_wait S); --} -- --/** -- * graceful_restart_done - finalize graceful restart -- * @t: unused -- * -- * When there are no locks on graceful restart, the functions finalizes the -- * graceful restart recovery. Protocols postponing route export until the end of -- * the recovery are awakened and the export to them is enabled. All other -- * related state is cleared. The function is also called when the graceful -- * restart wait timer fires (but there are still some locks). -- */ --static void --graceful_restart_done(timer *t) --{ -- log(L_INFO "Graceful restart done"); -- graceful_restart_state = GRS_DONE; -- -- WALK_TLIST(proto, p, &global_proto_list) -- { -- if (!p->gr_recovery) -- continue; -- -- struct channel *c; -- WALK_LIST(c, p->channels) -- { -- /* Resume postponed export of routes */ -- if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify) -- channel_start_export(c); -+ tm_start(&_graceful_recovery_context.wait_timer, gr_wait S); - -- /* Cleanup */ -- c->gr_wait = 0; -- c->gr_lock = 0; -- } -- -- p->gr_recovery = 0; -- } -+ callback_init(&_graceful_recovery_context.obstacles_cleared, graceful_recovery_done, &main_birdloop); - -- graceful_restart_locks = 0; -- -- rfree(t); -+ /* The last clearing of obstacle reference will cause -+ * the graceful recovery finish immediately. */ -+ OBSREF_CLEAR(graceful_recovery_context); - } - - void - graceful_restart_show_status(void) - { -- if (graceful_restart_state != GRS_ACTIVE) -+ if (_graceful_recovery_context.grc_state != GRS_ACTIVE) - return; - - cli_msg(-24, "Graceful restart recovery in progress"); -- cli_msg(-24, " Waiting for %d channels to recover", graceful_restart_locks); -- cli_msg(-24, " Wait timer is %t/%u", tm_remains(gr_wait_timer), -+ cli_msg(-24, " Waiting for %u channels to recover", -+ obstacle_target_count(&_graceful_recovery_context.obstacles)); -+ cli_msg(-24, " Wait timer is %t/%u", -+ tm_remains(&_graceful_recovery_context.wait_timer), - atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait); - } - -@@ -2032,14 +2048,22 @@ graceful_restart_show_status(void) - void - channel_graceful_restart_lock(struct channel *c) - { -- ASSERT(graceful_restart_state == GRS_INIT); -- ASSERT(c->proto->gr_recovery); -+ ASSERT_DIE(birdloop_inside(&main_birdloop)); - -- if (c->gr_lock) -+ if (OBSREF_GET(c->gr_lock)) - return; - -- c->gr_lock = 1; -- graceful_restart_locks++; -+ switch (_graceful_recovery_context.grc_state) -+ { -+ case GRS_INIT: -+ case GRS_ACTIVE: -+ OBSREF_SET(c->gr_lock, &_graceful_recovery_context); -+ break; -+ -+ case GRS_NONE: -+ case GRS_DONE: -+ break; -+ } - } - - /** -@@ -2052,18 +2076,10 @@ channel_graceful_restart_lock(struct channel *c) - void - channel_graceful_restart_unlock(struct channel *c) - { -- if (!c->gr_lock) -- return; -- -- c->gr_lock = 0; -- graceful_restart_locks--; -- -- if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks) -- tm_start(gr_wait_timer, 0); -+ OBSREF_CLEAR(c->gr_lock); - } - - -- - /** - * protos_dump_all - dump status of all protocols - * -@@ -2615,9 +2631,9 @@ channel_show_info(struct channel *c) - cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); - cli_msg(-1006, " Output filter: %s", filter_name(c->out_filter)); - -- if (graceful_restart_state == GRS_ACTIVE) -+ if (_graceful_recovery_context.grc_state == GRS_ACTIVE) - cli_msg(-1006, " GR recovery: %s%s", -- c->gr_lock ? " pending" : "", -+ OBSREF_GET(c->gr_lock) ? " pending" : "", - c->gr_wait ? " waiting" : ""); - - channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]); -diff --git a/nest/protocol.h b/nest/protocol.h -index 2bfa1628a..ec561b263 100644 ---- nest/protocol.h -+++ nest/protocol.h -@@ -659,7 +659,7 @@ struct channel { - - u8 channel_state; - u8 reloadable; /* Hook reload_routes() is allowed on the channel */ -- u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ -+ OBSREF(struct graceful_recovery_context) gr_lock; /* Graceful restart mechanism should wait for this channel */ - u8 gr_wait; /* Route export to channel is postponed until graceful restart */ - - u32 obstacles; /* External obstacles remaining before cleanup */ -@@ -763,4 +763,16 @@ void *channel_config_new(const struct channel_class *cc, const char *name, uint - void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); - int channel_reconfigure(struct channel *c, struct channel_config *cf); - -+struct graceful_recovery_context { -+ struct obstacle_target obstacles; -+ struct callback obstacles_cleared; -+ enum { -+ GRS_NONE, -+ GRS_INIT, -+ GRS_ACTIVE, -+ GRS_DONE, -+ } grc_state; -+ timer wait_timer; -+}; -+ - #endif --- -GitLab - |