diff options
Diffstat (limited to 'net/openbgpd/files/patch-bgpd_session.c')
-rw-r--r-- | net/openbgpd/files/patch-bgpd_session.c | 2075 |
1 files changed, 2075 insertions, 0 deletions
diff --git a/net/openbgpd/files/patch-bgpd_session.c b/net/openbgpd/files/patch-bgpd_session.c new file mode 100644 index 000000000000..66c05a92aec0 --- /dev/null +++ b/net/openbgpd/files/patch-bgpd_session.c @@ -0,0 +1,2075 @@ +Index: bgpd/session.c +=================================================================== +RCS file: /home/cvs/private/hrs/openbgpd/bgpd/session.c,v +retrieving revision 1.1.1.8 +retrieving revision 1.13 +diff -u -p -r1.1.1.8 -r1.13 +--- bgpd/session.c 14 Feb 2010 20:19:57 -0000 1.1.1.8 ++++ bgpd/session.c 8 Dec 2012 20:17:59 -0000 1.13 +@@ -1,4 +1,4 @@ +-/* $OpenBSD: session.c,v 1.293 2009/06/07 05:56:24 eric Exp $ */ ++/* $OpenBSD: session.c,v 1.325 2012/09/18 09:45:50 claudio Exp $ */ + + /* + * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> +@@ -21,18 +21,21 @@ + + #include <sys/mman.h> + #include <sys/socket.h> ++#include <sys/time.h> ++#include <sys/resource.h> + #include <sys/un.h> ++#include <sys/queue.h> + #include <net/if_types.h> + #include <netinet/in.h> + #include <netinet/in_systm.h> + #include <netinet/ip.h> + #include <netinet/tcp.h> ++#include <netinet/tcp_var.h> + #include <arpa/inet.h> + + #include <err.h> + #include <errno.h> + #include <fcntl.h> +-#include <limits.h> + #include <poll.h> + #include <pwd.h> + #include <signal.h> +@@ -50,7 +53,12 @@ + #define PFD_PIPE_ROUTE_CTL 2 + #define PFD_SOCK_CTL 3 + #define PFD_SOCK_RCTL 4 +-#define PFD_LISTENERS_START 5 ++#define PFD_SOCK_PFKEY 5 ++#define PFD_LISTENERS_START 6 ++ ++#if defined(__FreeBSD__) /* FreeBSD has no LINK_STATE_IS_UP macro. */ ++#define LINK_STATE_IS_UP(_s) ((_s) >= LINK_STATE_UP) ++#endif /* defined(__FreeBSD__) */ + + void session_sighdlr(int); + int setup_listeners(u_int *); +@@ -65,9 +73,9 @@ void session_accept(int); + int session_connect(struct peer *); + void session_tcp_established(struct peer *); + void session_capa_ann_none(struct peer *); +-int session_capa_add(struct peer *, struct buf *, u_int8_t, u_int8_t, +- u_int8_t *); +-int session_capa_add_mp(struct buf *, u_int16_t, u_int8_t); ++int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); ++int session_capa_add_mp(struct ibuf *, u_int8_t); ++int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); + struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); + int session_sendmsg(struct bgp_msg *, struct peer *); + void session_open(struct peer *); +@@ -75,30 +83,34 @@ void session_keepalive(struct peer *); + void session_update(u_int32_t, void *, size_t); + void session_notification(struct peer *, u_int8_t, u_int8_t, void *, + ssize_t); +-void session_rrefresh(struct peer *, u_int16_t, u_int8_t); ++void session_rrefresh(struct peer *, u_int8_t); ++int session_graceful_restart(struct peer *); ++int session_graceful_is_restarting(struct peer *); ++int session_graceful_stop(struct peer *); + int session_dispatch_msg(struct pollfd *, struct peer *); ++int session_process_msg(struct peer *); + int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); + int parse_open(struct peer *); + int parse_update(struct peer *); + int parse_refresh(struct peer *); + int parse_notification(struct peer *); + int parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *); ++int capa_neg_calc(struct peer *); + void session_dispatch_imsg(struct imsgbuf *, int, u_int *); + void session_up(struct peer *); + void session_down(struct peer *); + void session_demote(struct peer *, int); + +-int la_cmp(struct listen_addr *, struct listen_addr *); +-struct peer *getpeerbyip(struct sockaddr *); +-int session_match_mask(struct peer *, struct sockaddr *); +-struct peer *getpeerbyid(u_int32_t); +-static struct sockaddr *addr2sa(struct bgpd_addr *, u_int16_t); ++int la_cmp(struct listen_addr *, struct listen_addr *); ++struct peer *getpeerbyip(struct sockaddr *); ++int session_match_mask(struct peer *, struct bgpd_addr *); ++struct peer *getpeerbyid(u_int32_t); + +-struct bgpd_config *conf, *nconf = NULL; ++struct bgpd_config *conf, *nconf; + struct bgpd_sysdep sysdep; +-struct peer *npeers; +-volatile sig_atomic_t session_quit = 0; +-int pending_reconf = 0; ++struct peer *peers, *npeers; ++volatile sig_atomic_t session_quit; ++int pending_reconf; + int csock = -1, rcsock = -1; + u_int peer_cnt; + struct imsgbuf *ibuf_rde; +@@ -106,6 +118,7 @@ struct imsgbuf *ibuf_rde_ctl; + struct imsgbuf *ibuf_main; + + struct mrt_head mrthead; ++time_t pauseaccept; + + void + session_sighdlr(int sig) +@@ -125,6 +138,22 @@ setup_listeners(u_int *la_cnt) + int opt; + struct listen_addr *la; + u_int cnt = 0; ++#if defined(__FreeBSD__) ++ int s; ++ ++ /* Check if TCP_MD5SIG is supported. */ ++ s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); ++ if (s < 0) ++ fatal("socket open for TCP_MD5SIG check"); ++ opt = TF_SIGNATURE; ++ if (setsockopt(s, IPPROTO_TCP, TCP_MD5SIG, &opt, sizeof(opt)) == -1) { ++ if (errno == ENOPROTOOPT || errno == EINVAL) ++ sysdep.no_md5sig = 1; ++ else ++ fatal("setsockopt TCP_MD5SIG"); ++ } ++ close(s); ++#endif /* defined(__FreeBSD__) */ + + TAILQ_FOREACH(la, conf->listen_addrs, entry) { + la->reconf = RECONF_NONE; +@@ -140,6 +169,7 @@ setup_listeners(u_int *la_cnt) + } + + opt = 1; ++#if !defined(__FreeBSD__) + if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG, + &opt, sizeof(opt)) == -1) { + if (errno == ENOPROTOOPT) { /* system w/o md5sig */ +@@ -148,6 +178,7 @@ setup_listeners(u_int *la_cnt) + } else + fatal("setsockopt TCP_MD5SIG"); + } ++#endif /* !defined(__FreeBSD__) */ + + /* set ttl to 255 so that ttl-security works */ + if (la->sa.ss_family == AF_INET && setsockopt(la->fd, +@@ -175,12 +206,10 @@ setup_listeners(u_int *la_cnt) + } + + pid_t +-session_main(struct bgpd_config *config, struct peer *cpeers, +- struct network_head *net_l, struct filter_head *rules, +- struct mrt_head *m_l, struct rib_names *rib_l, int pipe_m2s[2], +- int pipe_s2r[2], int pipe_m2r[2], int pipe_s2rctl[2]) ++session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2], ++ int pipe_s2rctl[2]) + { +- int nfds, timeout; ++ int nfds, timeout, pfkeysock; + unsigned int i, j, idx_peers, idx_listeners, idx_mrts; + pid_t pid; + u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; +@@ -189,19 +218,13 @@ session_main(struct bgpd_config *config, + u_int32_t ctl_queued; + struct passwd *pw; + struct peer *p, **peer_l = NULL, *last, *next; +- struct network *net; +- struct mrt *m, **mrt_l = NULL; +- struct filter_rule *r; ++ struct mrt *m, *xm, **mrt_l = NULL; + struct pollfd *pfd = NULL; + struct ctl_conn *ctl_conn; + struct listen_addr *la; +- struct rde_rib *rr; + void *newp; + short events; + +- conf = config; +- peers = cpeers; +- + switch (pid = fork()) { + case -1: + fatal("cannot fork"); +@@ -211,13 +234,6 @@ session_main(struct bgpd_config *config, + return (pid); + } + +- /* control socket is outside chroot */ +- if ((csock = control_init(0, conf->csock)) == -1) +- fatalx("control socket setup failed"); +- if (conf->rcsock != NULL && +- (rcsock = control_init(1, conf->rcsock)) == -1) +- fatalx("control socket setup failed"); +- + if ((pw = getpwnam(BGPD_USER)) == NULL) + fatal(NULL); + +@@ -228,29 +244,25 @@ session_main(struct bgpd_config *config, + + setproctitle("session engine"); + bgpd_process = PROC_SE; +- +- if (pfkey_init(&sysdep) == -1) +- fatalx("pfkey setup failed"); ++ pfkeysock = pfkey_init(&sysdep); + + if (setgroups(1, &pw->pw_gid) || + setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || + setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) + fatal("can't drop privileges"); + +- listener_cnt = 0; +- setup_listeners(&listener_cnt); +- + signal(SIGTERM, session_sighdlr); + signal(SIGINT, session_sighdlr); + signal(SIGPIPE, SIG_IGN); + signal(SIGHUP, SIG_IGN); +- log_info("session engine ready"); ++ signal(SIGALRM, SIG_IGN); ++ signal(SIGUSR1, SIG_IGN); ++ + close(pipe_m2s[0]); + close(pipe_s2r[1]); + close(pipe_s2rctl[1]); + close(pipe_m2r[0]); + close(pipe_m2r[1]); +- init_conf(conf); + if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL || + (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL || + (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) +@@ -258,37 +270,21 @@ session_main(struct bgpd_config *config, + imsg_init(ibuf_rde, pipe_s2r[0]); + imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]); + imsg_init(ibuf_main, pipe_m2s[1]); ++ + TAILQ_INIT(&ctl_conns); +- control_listen(csock); +- control_listen(rcsock); + LIST_INIT(&mrthead); ++ listener_cnt = 0; + peer_cnt = 0; + ctl_cnt = 0; + +- /* filter rules are not used in the SE */ +- while ((r = TAILQ_FIRST(rules)) != NULL) { +- TAILQ_REMOVE(rules, r, entry); +- free(r); +- } +- free(rules); +- +- /* network list is not used in the SE */ +- while ((net = TAILQ_FIRST(net_l)) != NULL) { +- TAILQ_REMOVE(net_l, net, entry); +- filterset_free(&net->net.attrset); +- free(net); +- } ++ if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL) ++ fatal(NULL); ++ if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) == ++ NULL) ++ fatal(NULL); ++ TAILQ_INIT(conf->listen_addrs); + +- /* main mrt list is not used in the SE */ +- while ((m = LIST_FIRST(m_l)) != NULL) { +- LIST_REMOVE(m, entry); +- free(m); +- } +- /* rib names not used in the SE */ +- while ((rr = SIMPLEQ_FIRST(&ribnames))) { +- SIMPLEQ_REMOVE_HEAD(&ribnames, entry); +- free(rr); +- } ++ log_info("session engine ready"); + + while (session_quit == 0) { + /* check for peers to be initialized or deleted */ +@@ -308,8 +304,9 @@ session_main(struct bgpd_config *config, + + /* reinit due? */ + if (p->conf.reconf_action == RECONF_REINIT) { +- bgp_fsm(p, EVNT_STOP); +- timer_set(p, Timer_IdleHold, 0); ++ session_stop(p, ERR_CEASE_ADMIN_RESET); ++ if (!p->conf.down) ++ timer_set(p, Timer_IdleHold, 0); + } + + /* deletion due? */ +@@ -317,7 +314,7 @@ session_main(struct bgpd_config *config, + if (p->demoted) + session_demote(p, -1); + p->conf.demote_group[0] = 0; +- bgp_fsm(p, EVNT_STOP); ++ session_stop(p, ERR_CEASE_PEER_UNCONF); + log_peer_warnx(&p->conf, "removed"); + if (last != NULL) + last->next = next; +@@ -346,9 +343,17 @@ session_main(struct bgpd_config *config, + } + + mrt_cnt = 0; +- LIST_FOREACH(m, &mrthead, entry) ++ for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { ++ xm = LIST_NEXT(m, entry); ++ if (m->state == MRT_STATE_REMOVE) { ++ mrt_clean(m); ++ LIST_REMOVE(m, entry); ++ free(m); ++ continue; ++ } + if (m->wbuf.queued) + mrt_cnt++; ++ } + + if (mrt_cnt > mrt_l_elms) { + if ((newp = realloc(mrt_l, sizeof(struct mrt *) * +@@ -394,18 +399,31 @@ session_main(struct bgpd_config *config, + if (ctl_queued < SESSION_CTL_QUEUE_MAX) + /* + * Do not act as unlimited buffer. Don't read in more +- * messages if the ctl sockets are getting full. ++ * messages if the ctl sockets are getting full. + */ + pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN; +- pfd[PFD_SOCK_CTL].fd = csock; +- pfd[PFD_SOCK_CTL].events = POLLIN; +- pfd[PFD_SOCK_RCTL].fd = rcsock; +- pfd[PFD_SOCK_RCTL].events = POLLIN; +- ++ if (pauseaccept == 0) { ++ pfd[PFD_SOCK_CTL].fd = csock; ++ pfd[PFD_SOCK_CTL].events = POLLIN; ++ pfd[PFD_SOCK_RCTL].fd = rcsock; ++ pfd[PFD_SOCK_RCTL].events = POLLIN; ++ } else { ++ pfd[PFD_SOCK_CTL].fd = -1; ++ pfd[PFD_SOCK_RCTL].fd = -1; ++ } ++ pfd[PFD_SOCK_PFKEY].fd = pfkeysock; ++#if !defined(__FreeBSD__) ++ pfd[PFD_SOCK_PFKEY].events = POLLIN; ++#else ++ pfd[PFD_SOCK_PFKEY].events = 0; ++#endif + i = PFD_LISTENERS_START; + TAILQ_FOREACH(la, conf->listen_addrs, entry) { +- pfd[i].fd = la->fd; +- pfd[i].events = POLLIN; ++ if (pauseaccept == 0) { ++ pfd[i].fd = la->fd; ++ pfd[i].events = POLLIN; ++ } else ++ pfd[i].fd = -1; + i++; + } + idx_listeners = i; +@@ -450,6 +468,10 @@ session_main(struct bgpd_config *config, + p->state == STATE_ESTABLISHED) + session_demote(p, -1); + break; ++ case Timer_RestartTimeout: ++ timer_stop(p, Timer_RestartTimeout); ++ session_graceful_stop(p); ++ break; + default: + fatalx("King Bula lost in time"); + } +@@ -462,6 +484,9 @@ session_main(struct bgpd_config *config, + events = POLLIN; + if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) + events |= POLLOUT; ++ /* is there still work to do? */ ++ if (p->rbuf && p->rbuf->wpos) ++ timeout = 0; + + /* poll events */ + if (p->fd != -1 && events != 0) { +@@ -492,12 +517,21 @@ session_main(struct bgpd_config *config, + i++; + } + ++ if (pauseaccept && timeout > 1) ++ timeout = 1; + if (timeout < 0) + timeout = 0; + if ((nfds = poll(pfd, i, timeout * 1000)) == -1) + if (errno != EINTR) + fatal("poll error"); + ++ /* ++ * If we previously saw fd exhaustion, we stop accept() ++ * for 1 second to throttle the accept() loop. ++ */ ++ if (pauseaccept && getmonotime() > pauseaccept + 1) ++ pauseaccept = 0; ++ + if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT) + if (msgbuf_write(&ibuf_main->w) < 0) + fatal("pipe write error"); +@@ -534,6 +568,14 @@ session_main(struct bgpd_config *config, + ctl_cnt += control_accept(rcsock, 1); + } + ++ if (nfds > 0 && pfd[PFD_SOCK_PFKEY].revents & POLLIN) { ++ nfds--; ++ if (pfkey_read(pfkeysock, NULL) == -1) { ++ log_warnx("pfkey_read failed, exiting..."); ++ session_quit = 1; ++ } ++ } ++ + for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners; + j++) + if (pfd[j].revents & POLLIN) { +@@ -545,6 +587,10 @@ session_main(struct bgpd_config *config, + nfds -= session_dispatch_msg(&pfd[j], + peer_l[j - idx_listeners]); + ++ for (p = peers; p != NULL; p = p->next) ++ if (p->rbuf && p->rbuf->wpos) ++ session_process_msg(p); ++ + for (; nfds > 0 && j < idx_mrts; j++) + if (pfd[j].revents & POLLOUT) { + nfds--; +@@ -557,7 +603,7 @@ session_main(struct bgpd_config *config, + + while ((p = peers) != NULL) { + peers = p->next; +- bgp_fsm(p, EVNT_STOP); ++ session_stop(p, ERR_CEASE_ADMIN_DOWN); + pfkey_remove(p); + free(p); + } +@@ -643,10 +689,9 @@ bgp_fsm(struct peer *peer, enum session_ + timer_stop(peer, Timer_IdleHold); + + /* allocate read buffer */ +- peer->rbuf = calloc(1, sizeof(struct buf_read)); ++ peer->rbuf = calloc(1, sizeof(struct ibuf_read)); + if (peer->rbuf == NULL) + fatal(NULL); +- peer->rbuf->wpos = 0; + + /* init write buffer */ + msgbuf_init(&peer->wbuf); +@@ -746,7 +791,6 @@ bgp_fsm(struct peer *peer, enum session_ + /* ignore */ + break; + case EVNT_STOP: +- session_notification(peer, ERR_CEASE, 0, NULL, 0); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_CON_CLOSED: +@@ -780,7 +824,8 @@ bgp_fsm(struct peer *peer, enum session_ + change_state(peer, STATE_IDLE, event); + break; + default: +- session_notification(peer, ERR_FSM, 0, NULL, 0); ++ session_notification(peer, ++ ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); + change_state(peer, STATE_IDLE, event); + break; + } +@@ -791,7 +836,6 @@ bgp_fsm(struct peer *peer, enum session_ + /* ignore */ + break; + case EVNT_STOP: +- session_notification(peer, ERR_CEASE, 0, NULL, 0); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_CON_CLOSED: +@@ -815,7 +859,8 @@ bgp_fsm(struct peer *peer, enum session_ + change_state(peer, STATE_IDLE, event); + break; + default: +- session_notification(peer, ERR_FSM, 0, NULL, 0); ++ session_notification(peer, ++ ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); + change_state(peer, STATE_IDLE, event); + break; + } +@@ -826,7 +871,6 @@ bgp_fsm(struct peer *peer, enum session_ + /* ignore */ + break; + case EVNT_STOP: +- session_notification(peer, ERR_CEASE, 0, NULL, 0); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_CON_CLOSED: +@@ -856,7 +900,8 @@ bgp_fsm(struct peer *peer, enum session_ + change_state(peer, STATE_IDLE, event); + break; + default: +- session_notification(peer, ERR_FSM, 0, NULL, 0); ++ session_notification(peer, ++ ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); + change_state(peer, STATE_IDLE, event); + break; + } +@@ -885,9 +930,10 @@ start_timer_keepalive(struct peer *peer) + void + session_close_connection(struct peer *peer) + { +- if (peer->fd != -1) ++ if (peer->fd != -1) { + close(peer->fd); +- ++ pauseaccept = 0; ++ } + peer->fd = peer->wbuf.fd = -1; + } + +@@ -923,20 +969,31 @@ change_state(struct peer *peer, enum ses + timer_stop(peer, Timer_ConnectRetry); + timer_stop(peer, Timer_Keepalive); + timer_stop(peer, Timer_Hold); ++ timer_stop(peer, Timer_IdleHold); + timer_stop(peer, Timer_IdleHoldReset); + session_close_connection(peer); + msgbuf_clear(&peer->wbuf); + free(peer->rbuf); + peer->rbuf = NULL; + bzero(&peer->capa.peer, sizeof(peer->capa.peer)); +- if (peer->state == STATE_ESTABLISHED) +- session_down(peer); ++ + if (event != EVNT_STOP) { + timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); + if (event != EVNT_NONE && + peer->IdleHoldTime < MAX_IDLE_HOLD/2) + peer->IdleHoldTime *= 2; + } ++ if (peer->state == STATE_ESTABLISHED) { ++ if (peer->capa.neg.grestart.restart == 2 && ++ (event == EVNT_CON_CLOSED || ++ event == EVNT_CON_FATAL)) { ++ /* don't punish graceful restart */ ++ timer_set(peer, Timer_IdleHold, 0); ++ peer->IdleHoldTime /= 2; ++ session_graceful_restart(peer); ++ } else ++ session_down(peer); ++ } + if (peer->state == STATE_NONE || + peer->state == STATE_ESTABLISHED) { + /* initialize capability negotiation structures */ +@@ -947,6 +1004,20 @@ change_state(struct peer *peer, enum ses + } + break; + case STATE_CONNECT: ++ if (peer->state == STATE_ESTABLISHED && ++ peer->capa.neg.grestart.restart == 2) { ++ /* do the graceful restart dance */ ++ session_graceful_restart(peer); ++ peer->holdtime = INTERVAL_HOLD_INITIAL; ++ timer_stop(peer, Timer_ConnectRetry); ++ timer_stop(peer, Timer_Keepalive); ++ timer_stop(peer, Timer_Hold); ++ timer_stop(peer, Timer_IdleHold); ++ timer_stop(peer, Timer_IdleHoldReset); ++ session_close_connection(peer); ++ msgbuf_clear(&peer->wbuf); ++ bzero(&peer->capa.peer, sizeof(peer->capa.peer)); ++ } + break; + case STATE_ACTIVE: + break; +@@ -990,7 +1061,10 @@ session_accept(int listenfd) + len = sizeof(cliaddr); + if ((connfd = accept(listenfd, + (struct sockaddr *)&cliaddr, &len)) == -1) { +- if (errno == EWOULDBLOCK || errno == EINTR) ++ if (errno == ENFILE || errno == EMFILE) { ++ pauseaccept = getmonotime(); ++ return; ++ } else if (errno == EWOULDBLOCK || errno == EINTR) + return; + else + log_warn("accept"); +@@ -1017,6 +1091,7 @@ session_accept(int listenfd) + } + } + ++open: + if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { + log_peer_warnx(&p->conf, + "ipsec or md5sig configured but not available"); +@@ -1049,6 +1124,13 @@ session_accept(int listenfd) + } + session_socket_blockmode(connfd, BM_NONBLOCK); + bgp_fsm(p, EVNT_CON_OPEN); ++ return; ++ } else if (p != NULL && p->state == STATE_ESTABLISHED && ++ p->capa.neg.grestart.restart == 2) { ++ /* first do the graceful restart dance */ ++ change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); ++ /* then do part of the open dance */ ++ goto open; + } else { + log_conn_attempt(p, (struct sockaddr *)&cliaddr); + close(connfd); +@@ -1069,7 +1151,7 @@ session_connect(struct peer *peer) + if (peer->fd != -1) + return (-1); + +- if ((peer->fd = socket(peer->conf.remote_addr.af, SOCK_STREAM, ++ if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), SOCK_STREAM, + IPPROTO_TCP)) == -1) { + log_peer_warn(&peer->conf, "session_connect socket"); + bgp_fsm(peer, EVNT_CON_OPENFAIL); +@@ -1100,8 +1182,7 @@ session_connect(struct peer *peer) + peer->wbuf.fd = peer->fd; + + /* if update source is set we need to bind() */ +- if (peer->conf.local_addr.af) { +- sa = addr2sa(&peer->conf.local_addr, 0); ++ if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) { + if (bind(peer->fd, sa, sa->sa_len) == -1) { + log_peer_warn(&peer->conf, "session_connect bind"); + bgp_fsm(peer, EVNT_CON_OPENFAIL); +@@ -1139,42 +1220,50 @@ session_setup_socket(struct peer *p) + int nodelay = 1; + int bsize; + +- if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET) { +- /* set TTL to foreign router's distance - 1=direct n=multihop +- with ttlsec, we always use 255 */ +- if (p->conf.ttlsec) { +- ttl = 256 - p->conf.distance; +- if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl, ++ switch (p->conf.remote_addr.aid) { ++ case AID_INET: ++ /* set precedence, see RFC 1771 appendix 5 */ ++ if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == ++ -1) { ++ log_peer_warn(&p->conf, ++ "session_setup_socket setsockopt TOS"); ++ return (-1); ++ } ++ ++ if (p->conf.ebgp) { ++ /* set TTL to foreign router's distance ++ 1=direct n=multihop with ttlsec, we always use 255 */ ++ if (p->conf.ttlsec) { ++ ttl = 256 - p->conf.distance; ++ if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, ++ &ttl, sizeof(ttl)) == -1) { ++ log_peer_warn(&p->conf, ++ "session_setup_socket: " ++ "setsockopt MINTTL"); ++ return (-1); ++ } ++ ttl = 255; ++ } ++ ++ if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, + sizeof(ttl)) == -1) { + log_peer_warn(&p->conf, +- "session_setup_socket setsockopt MINTTL"); ++ "session_setup_socket setsockopt TTL"); + return (-1); + } +- ttl = 255; +- } +- +- if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, +- sizeof(ttl)) == -1) { +- log_peer_warn(&p->conf, +- "session_setup_socket setsockopt TTL"); +- return (-1); + } +- } +- +- if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET6) +- /* set hoplimit to foreign router's distance */ +- if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, +- sizeof(ttl)) == -1) { +- log_peer_warn(&p->conf, +- "session_setup_socket setsockopt hoplimit"); +- return (-1); ++ break; ++ case AID_INET6: ++ if (p->conf.ebgp) { ++ /* set hoplimit to foreign router's distance */ ++ if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, ++ &ttl, sizeof(ttl)) == -1) { ++ log_peer_warn(&p->conf, ++ "session_setup_socket setsockopt hoplimit"); ++ return (-1); ++ } + } +- +- /* if ttlsec is in use, set minttl */ +- if (p->conf.ttlsec) { +- ttl = 256 - p->conf.distance; +- setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl, sizeof(ttl)); +- ++ break; + } + + /* set TCP_NODELAY */ +@@ -1185,24 +1274,18 @@ session_setup_socket(struct peer *p) + return (-1); + } + +- /* set precedence, see RFC 1771 appendix 5 */ +- if (p->conf.remote_addr.af == AF_INET && +- setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == -1) { +- log_peer_warn(&p->conf, +- "session_setup_socket setsockopt TOS"); +- return (-1); +- } +- + /* only increase bufsize (and thus window) if md5 or ipsec is in use */ + if (p->conf.auth.method != AUTH_NONE) { + /* try to increase bufsize. no biggie if it fails */ + bsize = 65535; +- while (setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, +- sizeof(bsize)) == -1) ++ while (bsize > 8192 && ++ setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, ++ sizeof(bsize)) == -1 && errno != EINVAL) + bsize /= 2; + bsize = 65535; +- while (setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, +- sizeof(bsize)) == -1) ++ while (bsize > 8192 && ++ setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, ++ sizeof(bsize)) == -1 && errno != EINVAL) + bsize /= 2; + } + +@@ -1244,40 +1327,56 @@ session_tcp_established(struct peer *pee + void + session_capa_ann_none(struct peer *peer) + { +- peer->capa.ann.mp_v4 = SAFI_NONE; +- peer->capa.ann.mp_v4 = SAFI_NONE; +- peer->capa.ann.refresh = 0; +- peer->capa.ann.restart = 0; +- peer->capa.ann.as4byte = 0; ++ bzero(&peer->capa.ann, sizeof(peer->capa.ann)); + } + + int +-session_capa_add(struct peer *p, struct buf *opb, u_int8_t capa_code, +- u_int8_t capa_len, u_int8_t *optparamlen) +-{ +- u_int8_t op_type, op_len, tot_len, errs = 0; +- +- op_type = OPT_PARAM_CAPABILITIES; +- op_len = sizeof(capa_code) + sizeof(capa_len) + capa_len; +- tot_len = sizeof(op_type) + sizeof(op_len) + op_len; +- errs += buf_add(opb, &op_type, sizeof(op_type)); +- errs += buf_add(opb, &op_len, sizeof(op_len)); +- errs += buf_add(opb, &capa_code, sizeof(capa_code)); +- errs += buf_add(opb, &capa_len, sizeof(capa_len)); +- *optparamlen += tot_len; ++session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len) ++{ ++ int errs = 0; ++ ++ errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); ++ errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); + return (errs); + } + + int +-session_capa_add_mp(struct buf *buf, u_int16_t afi, u_int8_t safi) ++session_capa_add_mp(struct ibuf *buf, u_int8_t aid) + { +- u_int8_t pad = 0; ++ u_int8_t safi, pad = 0; ++ u_int16_t afi; + int errs = 0; + ++ if (aid2afi(aid, &afi, &safi) == -1) ++ fatalx("session_capa_add_mp: bad afi/safi pair"); ++ afi = htons(afi); ++ errs += ibuf_add(buf, &afi, sizeof(afi)); ++ errs += ibuf_add(buf, &pad, sizeof(pad)); ++ errs += ibuf_add(buf, &safi, sizeof(safi)); ++ ++ return (errs); ++} ++ ++int ++session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) ++{ ++ u_int errs = 0; ++ u_int16_t afi; ++ u_int8_t flags, safi; ++ ++ if (aid2afi(aid, &afi, &safi)) { ++ log_warn("session_capa_add_gr: bad AID"); ++ return (1); ++ } ++ if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) ++ flags = CAPA_GR_F_FLAG; ++ else ++ flags = 0; ++ + afi = htons(afi); +- errs += buf_add(buf, &afi, sizeof(afi)); +- errs += buf_add(buf, &pad, sizeof(pad)); +- errs += buf_add(buf, &safi, sizeof(safi)); ++ errs += ibuf_add(b, &afi, sizeof(afi)); ++ errs += ibuf_add(b, &safi, sizeof(safi)); ++ errs += ibuf_add(b, &flags, sizeof(flags)); + + return (errs); + } +@@ -1287,23 +1386,22 @@ session_newmsg(enum msg_type msgtype, u_ + { + struct bgp_msg *msg; + struct msg_header hdr; +- struct buf *buf; ++ struct ibuf *buf; + int errs = 0; + + memset(&hdr.marker, 0xff, sizeof(hdr.marker)); + hdr.len = htons(len); + hdr.type = msgtype; + +- if ((buf = buf_open(len)) == NULL) ++ if ((buf = ibuf_open(len)) == NULL) + return (NULL); + +- errs += buf_add(buf, &hdr.marker, sizeof(hdr.marker)); +- errs += buf_add(buf, &hdr.len, sizeof(hdr.len)); +- errs += buf_add(buf, &hdr.type, sizeof(hdr.type)); +- +- if (errs > 0 || +- (msg = calloc(1, sizeof(*msg))) == NULL) { +- buf_free(buf); ++ errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); ++ errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); ++ errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); ++ ++ if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { ++ ibuf_free(buf); + return (NULL); + } + +@@ -1329,7 +1427,7 @@ session_sendmsg(struct bgp_msg *msg, str + mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p); + } + +- buf_close(&p->wbuf, msg->buf); ++ ibuf_close(&p->wbuf, msg->buf); + free(msg); + return (0); + } +@@ -1338,40 +1436,70 @@ void + session_open(struct peer *p) + { + struct bgp_msg *buf; +- struct buf *opb; ++ struct ibuf *opb; + struct msg_open msg; + u_int16_t len; +- u_int8_t optparamlen = 0; +- u_int errs = 0; ++ u_int8_t i, op_type, optparamlen = 0; ++ int errs = 0; ++ int mpcapa = 0; + + +- if ((opb = buf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN)) == NULL) { ++ if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - ++ sizeof(optparamlen))) == NULL) { + bgp_fsm(p, EVNT_CON_FATAL); + return; + } + + /* multiprotocol extensions, RFC 4760 */ +- if (p->capa.ann.mp_v4) { /* 4 bytes data */ +- errs += session_capa_add(p, opb, CAPA_MP, 4, &optparamlen); +- errs += session_capa_add_mp(opb, AFI_IPv4, p->capa.ann.mp_v4); +- } +- if (p->capa.ann.mp_v6) { /* 4 bytes data */ +- errs += session_capa_add(p, opb, CAPA_MP, 4, &optparamlen); +- errs += session_capa_add_mp(opb, AFI_IPv6, p->capa.ann.mp_v6); +- } ++ for (i = 0; i < AID_MAX; i++) ++ if (p->capa.ann.mp[i]) { /* 4 bytes data */ ++ errs += session_capa_add(opb, CAPA_MP, 4); ++ errs += session_capa_add_mp(opb, i); ++ mpcapa++; ++ } + + /* route refresh, RFC 2918 */ + if (p->capa.ann.refresh) /* no data */ +- errs += session_capa_add(p, opb, CAPA_REFRESH, 0, &optparamlen); ++ errs += session_capa_add(opb, CAPA_REFRESH, 0); + +- /* End-of-RIB marker, RFC 4724 */ +- if (p->capa.ann.restart) { /* 2 bytes data */ +- u_char c[2]; +- +- bzero(&c, 2); +- c[0] = 0x80; /* we're always restarting */ +- errs += session_capa_add(p, opb, CAPA_RESTART, 2, &optparamlen); +- errs += buf_add(opb, &c, 2); ++ /* graceful restart and End-of-RIB marker, RFC 4724 */ ++ if (p->capa.ann.grestart.restart) { ++ int rst = 0; ++ u_int16_t hdr; ++ u_int8_t grlen; ++ ++ if (mpcapa) { ++ grlen = 2 + 4 * mpcapa; ++ for (i = 0; i < AID_MAX; i++) { ++ if (p->capa.neg.grestart.flags[i] & ++ CAPA_GR_RESTARTING) ++ rst++; ++ } ++ } else { /* AID_INET */ ++ grlen = 2 + 4; ++ if (p->capa.neg.grestart.flags[AID_INET] & ++ CAPA_GR_RESTARTING) ++ rst++; ++ } ++ ++ hdr = conf->holdtime; /* default timeout */ ++ /* if client does graceful restart don't set R flag */ ++ if (!rst) ++ hdr |= CAPA_GR_R_FLAG; ++ hdr = htons(hdr); ++ ++ errs += session_capa_add(opb, CAPA_RESTART, grlen); ++ errs += ibuf_add(opb, &hdr, sizeof(hdr)); ++ ++ if (mpcapa) { ++ for (i = 0; i < AID_MAX; i++) { ++ if (p->capa.ann.mp[i]) { ++ errs += session_capa_add_gr(p, opb, i); ++ } ++ } ++ } else { /* AID_INET */ ++ errs += session_capa_add_gr(p, opb, AID_INET); ++ } + } + + /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ +@@ -1379,13 +1507,17 @@ session_open(struct peer *p) + u_int32_t nas; + + nas = htonl(conf->as); +- errs += session_capa_add(p, opb, CAPA_AS4BYTE, 4, &optparamlen); +- errs += buf_add(opb, &nas, 4); ++ errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); ++ errs += ibuf_add(opb, &nas, sizeof(nas)); + } + ++ if (ibuf_size(opb)) ++ optparamlen = ibuf_size(opb) + sizeof(op_type) + ++ sizeof(optparamlen); ++ + len = MSGSIZE_OPEN_MIN + optparamlen; + if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { +- buf_free(opb); ++ ibuf_free(opb); + bgp_fsm(p, EVNT_CON_FATAL); + return; + } +@@ -1399,19 +1531,24 @@ session_open(struct peer *p) + msg.bgpid = conf->bgpid; /* is already in network byte order */ + msg.optparamlen = optparamlen; + +- errs += buf_add(buf->buf, &msg.version, sizeof(msg.version)); +- errs += buf_add(buf->buf, &msg.myas, sizeof(msg.myas)); +- errs += buf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); +- errs += buf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); +- errs += buf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen)); +- +- if (optparamlen) +- errs += buf_add(buf->buf, opb->buf, optparamlen); ++ errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); ++ errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); ++ errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); ++ errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); ++ errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen)); ++ ++ if (optparamlen) { ++ op_type = OPT_PARAM_CAPABILITIES; ++ optparamlen = ibuf_size(opb); ++ errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); ++ errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen)); ++ errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); ++ } + +- buf_free(opb); ++ ibuf_free(opb); + +- if (errs > 0) { +- buf_free(buf->buf); ++ if (errs) { ++ ibuf_free(buf->buf); + free(buf); + bgp_fsm(p, EVNT_CON_FATAL); + return; +@@ -1459,8 +1596,8 @@ session_update(u_int32_t peerid, void *d + return; + } + +- if (buf_add(buf->buf, data, datalen)) { +- buf_free(buf->buf); ++ if (ibuf_add(buf->buf, data, datalen)) { ++ ibuf_free(buf->buf); + free(buf); + bgp_fsm(p, EVNT_CON_FATAL); + return; +@@ -1480,29 +1617,27 @@ session_notification(struct peer *p, u_i + void *data, ssize_t datalen) + { + struct bgp_msg *buf; +- u_int errs = 0; +- u_int8_t null8 = 0; ++ int errs = 0; + + if (p->stats.last_sent_errcode) /* some notification already sent */ + return; + ++ log_notification(p, errcode, subcode, data, datalen, "sending"); ++ + if ((buf = session_newmsg(NOTIFICATION, + MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { + bgp_fsm(p, EVNT_CON_FATAL); + return; + } + +- errs += buf_add(buf->buf, &errcode, sizeof(errcode)); +- if (errcode == ERR_CEASE) +- errs += buf_add(buf->buf, &null8, sizeof(null8)); +- else +- errs += buf_add(buf->buf, &subcode, sizeof(subcode)); ++ errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); ++ errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); + + if (datalen > 0) +- errs += buf_add(buf->buf, data, datalen); ++ errs += ibuf_add(buf->buf, data, datalen); + +- if (errs > 0) { +- buf_free(buf->buf); ++ if (errs) { ++ ibuf_free(buf->buf); + free(buf); + bgp_fsm(p, EVNT_CON_FATAL); + return; +@@ -1521,23 +1656,29 @@ session_notification(struct peer *p, u_i + int + session_neighbor_rrefresh(struct peer *p) + { ++ u_int8_t i; ++ + if (!p->capa.peer.refresh) + return (-1); + +- if (p->capa.peer.mp_v4 != SAFI_NONE) +- session_rrefresh(p, AFI_IPv4, p->capa.peer.mp_v4); +- if (p->capa.peer.mp_v6 != SAFI_NONE) +- session_rrefresh(p, AFI_IPv6, p->capa.peer.mp_v6); ++ for (i = 0; i < AID_MAX; i++) { ++ if (p->capa.peer.mp[i] != 0) ++ session_rrefresh(p, i); ++ } + + return (0); + } + + void +-session_rrefresh(struct peer *p, u_int16_t afi, u_int8_t safi) ++session_rrefresh(struct peer *p, u_int8_t aid) + { + struct bgp_msg *buf; + int errs = 0; +- u_int8_t null8 = 0; ++ u_int16_t afi; ++ u_int8_t safi, null8 = 0; ++ ++ if (aid2afi(aid, &afi, &safi) == -1) ++ fatalx("session_rrefresh: bad afi/safi pair"); + + if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { + bgp_fsm(p, EVNT_CON_FATAL); +@@ -1545,12 +1686,12 @@ session_rrefresh(struct peer *p, u_int16 + } + + afi = htons(afi); +- errs += buf_add(buf->buf, &afi, sizeof(afi)); +- errs += buf_add(buf->buf, &null8, sizeof(null8)); +- errs += buf_add(buf->buf, &safi, sizeof(safi)); ++ errs += ibuf_add(buf->buf, &afi, sizeof(afi)); ++ errs += ibuf_add(buf->buf, &null8, sizeof(null8)); ++ errs += ibuf_add(buf->buf, &safi, sizeof(safi)); + +- if (errs > 0) { +- buf_free(buf->buf); ++ if (errs) { ++ ibuf_free(buf->buf); + free(buf); + bgp_fsm(p, EVNT_CON_FATAL); + return; +@@ -1565,13 +1706,74 @@ session_rrefresh(struct peer *p, u_int16 + } + + int ++session_graceful_restart(struct peer *p) ++{ ++ u_int8_t i; ++ ++ timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); ++ ++ for (i = 0; i < AID_MAX; i++) { ++ if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { ++ if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE, ++ p->conf.id, 0, -1, &i, sizeof(i)) == -1) ++ return (-1); ++ log_peer_warnx(&p->conf, ++ "graceful restart of %s, keeping routes", ++ aid2str(i)); ++ p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; ++ } else if (p->capa.neg.mp[i]) { ++ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, ++ p->conf.id, 0, -1, &i, sizeof(i)) == -1) ++ return (-1); ++ log_peer_warnx(&p->conf, ++ "graceful restart of %s, flushing routes", ++ aid2str(i)); ++ } ++ } ++ return (0); ++} ++ ++int ++session_graceful_is_restarting(struct peer *p) ++{ ++ u_int8_t i; ++ ++ for (i = 0; i < AID_MAX; i++) ++ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) ++ return (1); ++ return (0); ++} ++ ++int ++session_graceful_stop(struct peer *p) ++{ ++ u_int8_t i; ++ ++ for (i = 0; i < AID_MAX; i++) { ++ /* ++ * Only flush if the peer is restarting and the peer indicated ++ * it hold the forwarding state. In all other cases the ++ * session was already flushed when the session came up. ++ */ ++ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING && ++ p->capa.neg.grestart.flags[i] & CAPA_GR_FORWARD) { ++ log_peer_warnx(&p->conf, "graceful restart of %s, " ++ "time-out, flushing", aid2str(i)); ++ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, ++ p->conf.id, 0, -1, &i, sizeof(i)) == -1) ++ return (-1); ++ } ++ p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; ++ } ++ return (0); ++} ++ ++int + session_dispatch_msg(struct pollfd *pfd, struct peer *p) + { +- ssize_t n, rpos, av, left; ++ ssize_t n; + socklen_t len; +- int error, processed = 0; +- u_int16_t msglen; +- u_int8_t msgtype; ++ int error; + + if (p->state == STATE_CONNECT) { + if (pfd->revents & POLLOUT) { +@@ -1641,71 +1843,83 @@ session_dispatch_msg(struct pollfd *pfd, + return (1); + } + +- rpos = 0; +- av = p->rbuf->wpos + n; ++ p->rbuf->wpos += n; + p->stats.last_read = time(NULL); ++ return (1); ++ } ++ return (0); ++} + +- /* +- * session might drop to IDLE -> buffers deallocated +- * we MUST check rbuf != NULL before use +- */ +- for (;;) { +- if (rpos + MSGSIZE_HEADER > av) +- break; +- if (p->rbuf == NULL) +- break; +- if (parse_header(p, p->rbuf->buf + rpos, &msglen, +- &msgtype) == -1) +- return (0); +- if (rpos + msglen > av) +- break; +- p->rbuf->rptr = p->rbuf->buf + rpos; +- +- switch (msgtype) { +- case OPEN: +- bgp_fsm(p, EVNT_RCVD_OPEN); +- p->stats.msg_rcvd_open++; +- break; +- case UPDATE: +- bgp_fsm(p, EVNT_RCVD_UPDATE); +- p->stats.msg_rcvd_update++; +- break; +- case NOTIFICATION: +- bgp_fsm(p, EVNT_RCVD_NOTIFICATION); +- p->stats.msg_rcvd_notification++; +- break; +- case KEEPALIVE: +- bgp_fsm(p, EVNT_RCVD_KEEPALIVE); +- p->stats.msg_rcvd_keepalive++; +- break; +- case RREFRESH: +- parse_refresh(p); +- p->stats.msg_rcvd_rrefresh++; +- break; +- default: /* cannot happen */ +- session_notification(p, ERR_HEADER, +- ERR_HDR_TYPE, &msgtype, 1); +- log_warnx("received message with " +- "unknown type %u", msgtype); +- bgp_fsm(p, EVNT_CON_FATAL); +- } +- rpos += msglen; +- if (++processed > MSG_PROCESS_LIMIT) +- break; +- } +- if (p->rbuf == NULL) +- return (1); ++int ++session_process_msg(struct peer *p) ++{ ++ ssize_t rpos, av, left; ++ int processed = 0; ++ u_int16_t msglen; ++ u_int8_t msgtype; + +- if (rpos < av) { +- left = av - rpos; +- memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left); +- p->rbuf->wpos = left; +- } else +- p->rbuf->wpos = 0; ++ rpos = 0; ++ av = p->rbuf->wpos; + +- return (1); ++ /* ++ * session might drop to IDLE -> buffers deallocated ++ * we MUST check rbuf != NULL before use ++ */ ++ for (;;) { ++ if (rpos + MSGSIZE_HEADER > av) ++ break; ++ if (p->rbuf == NULL) ++ break; ++ if (parse_header(p, p->rbuf->buf + rpos, &msglen, ++ &msgtype) == -1) ++ return (0); ++ if (rpos + msglen > av) ++ break; ++ p->rbuf->rptr = p->rbuf->buf + rpos; ++ ++ switch (msgtype) { ++ case OPEN: ++ bgp_fsm(p, EVNT_RCVD_OPEN); ++ p->stats.msg_rcvd_open++; ++ break; ++ case UPDATE: ++ bgp_fsm(p, EVNT_RCVD_UPDATE); ++ p->stats.msg_rcvd_update++; ++ break; ++ case NOTIFICATION: ++ bgp_fsm(p, EVNT_RCVD_NOTIFICATION); ++ p->stats.msg_rcvd_notification++; ++ break; ++ case KEEPALIVE: ++ bgp_fsm(p, EVNT_RCVD_KEEPALIVE); ++ p->stats.msg_rcvd_keepalive++; ++ break; ++ case RREFRESH: ++ parse_refresh(p); ++ p->stats.msg_rcvd_rrefresh++; ++ break; ++ default: /* cannot happen */ ++ session_notification(p, ERR_HEADER, ERR_HDR_TYPE, ++ &msgtype, 1); ++ log_warnx("received message with unknown type %u", ++ msgtype); ++ bgp_fsm(p, EVNT_CON_FATAL); ++ } ++ rpos += msglen; ++ if (++processed > MSG_PROCESS_LIMIT) ++ break; + } +- return (0); ++ if (p->rbuf == NULL) ++ return (1); ++ ++ if (rpos < av) { ++ left = av - rpos; ++ memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left); ++ p->rbuf->wpos = left; ++ } else ++ p->rbuf->wpos = 0; ++ ++ return (1); + } + + int +@@ -1853,12 +2067,6 @@ parse_open(struct peer *peer) + p += sizeof(short_as); + as = peer->short_as = ntohs(short_as); + +- /* if remote-as is zero and it's a cloned neighbor, accept any */ +- if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) { +- peer->conf.remote_as = as; +- peer->conf.ebgp = (peer->conf.remote_as != conf->as); +- } +- + memcpy(&oholdtime, p, sizeof(oholdtime)); + p += sizeof(oholdtime); + +@@ -1966,6 +2174,15 @@ parse_open(struct peer *peer) + } + } + ++ /* if remote-as is zero and it's a cloned neighbor, accept any */ ++ if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) { ++ peer->conf.remote_as = as; ++ peer->conf.ebgp = (peer->conf.remote_as != conf->as); ++ if (!peer->conf.ebgp) ++ /* force enforce_as off for iBGP sessions */ ++ peer->conf.enforce_as = ENFORCE_AS_OFF; ++ } ++ + if (peer->conf.remote_as != as) { + log_peer_warnx(&peer->conf, "peer sent wrong AS %s", + log_as(as)); +@@ -1974,6 +2191,14 @@ parse_open(struct peer *peer) + return (-1); + } + ++ if (capa_neg_calc(peer) == -1) { ++ log_peer_warnx(&peer->conf, ++ "capability negotiation calculation failed"); ++ session_notification(peer, ERR_OPEN, 0, NULL, 0); ++ change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); ++ return (-1); ++ } ++ + return (0); + } + +@@ -2008,24 +2233,35 @@ int + parse_refresh(struct peer *peer) + { + u_char *p; +- struct rrefresh r; ++ u_int16_t afi; ++ u_int8_t aid, safi; + + p = peer->rbuf->rptr; + p += MSGSIZE_HEADER; /* header is already checked */ + ++ /* ++ * We could check if we actually announced the capability but ++ * as long as the message is correctly encoded we don't care. ++ */ ++ + /* afi, 2 byte */ +- memcpy(&r.afi, p, sizeof(r.afi)); +- r.afi = ntohs(r.afi); ++ memcpy(&afi, p, sizeof(afi)); ++ afi = ntohs(afi); + p += 2; + /* reserved, 1 byte */ + p += 1; + /* safi, 1 byte */ +- memcpy(&r.safi, p, sizeof(r.safi)); ++ memcpy(&safi, p, sizeof(safi)); + + /* afi/safi unchecked - unrecognized values will be ignored anyway */ ++ if (afi2aid(afi, safi, &aid) == -1) { ++ log_peer_warnx(&peer->conf, "peer sent bad refresh, " ++ "invalid afi/safi pair"); ++ return (0); ++ } + +- if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &r, +- sizeof(r)) == -1) ++ if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid, ++ sizeof(aid)) == -1) + return (-1); + + return (0); +@@ -2035,11 +2271,12 @@ int + parse_notification(struct peer *peer) + { + u_char *p; ++ u_int16_t datalen; + u_int8_t errcode; + u_int8_t subcode; +- u_int16_t datalen; + u_int8_t capa_code; + u_int8_t capa_len; ++ u_int8_t i; + + /* just log */ + p = peer->rbuf->rptr; +@@ -2059,7 +2296,7 @@ parse_notification(struct peer *peer) + p += sizeof(subcode); + datalen -= sizeof(subcode); + +- log_notification(peer, errcode, subcode, p, datalen); ++ log_notification(peer, errcode, subcode, p, datalen, "received"); + peer->errcnt++; + + if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { +@@ -2094,8 +2331,8 @@ parse_notification(struct peer *peer) + datalen -= capa_len; + switch (capa_code) { + case CAPA_MP: +- peer->capa.ann.mp_v4 = SAFI_NONE; +- peer->capa.ann.mp_v6 = SAFI_NONE; ++ for (i = 0; i < AID_MAX; i++) ++ peer->capa.ann.mp[i] = 0; + log_peer_warnx(&peer->conf, + "disabling multiprotocol capability"); + break; +@@ -2105,7 +2342,7 @@ parse_notification(struct peer *peer) + "disabling route refresh capability"); + break; + case CAPA_RESTART: +- peer->capa.ann.restart = 0; ++ peer->capa.ann.grestart.restart = 0; + log_peer_warnx(&peer->conf, + "disabling restart capability"); + break; +@@ -2139,19 +2376,23 @@ parse_notification(struct peer *peer) + int + parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) + { ++ u_char *capa_val; ++ u_int32_t remote_as; + u_int16_t len; ++ u_int16_t afi; ++ u_int16_t gr_header; ++ u_int8_t safi; ++ u_int8_t aid; ++ u_int8_t gr_flags; + u_int8_t capa_code; + u_int8_t capa_len; +- u_char *capa_val; +- u_int16_t mp_afi; +- u_int8_t mp_safi; +- u_int32_t remote_as; ++ u_int8_t i; + + len = dlen; + while (len > 0) { + if (len < 2) { +- log_peer_warnx(&peer->conf, "parse_capabilities: " +- "expect len >= 2, len is %u", len); ++ log_peer_warnx(&peer->conf, "Bad capabilities attr " ++ "length: %u, too short", len); + return (-1); + } + memcpy(&capa_code, d, sizeof(capa_code)); +@@ -2163,7 +2404,7 @@ parse_capabilities(struct peer *peer, u_ + if (capa_len > 0) { + if (len < capa_len) { + log_peer_warnx(&peer->conf, +- "parse_capabilities: " ++ "Bad capabilities attr length: " + "len %u smaller than capa_len %u", + len, capa_len); + return (-1); +@@ -2178,47 +2419,82 @@ parse_capabilities(struct peer *peer, u_ + case CAPA_MP: /* RFC 4760 */ + if (capa_len != 4) { + log_peer_warnx(&peer->conf, +- "parse_capabilities: " +- "expect len 4, len is %u", capa_len); +- return (-1); +- } +- memcpy(&mp_afi, capa_val, sizeof(mp_afi)); +- mp_afi = ntohs(mp_afi); +- memcpy(&mp_safi, capa_val + 3, sizeof(mp_safi)); +- switch (mp_afi) { +- case AFI_IPv4: +- if (mp_safi < 1 || mp_safi > 3) +- log_peer_warnx(&peer->conf, +- "parse_capabilities: AFI IPv4, " +- "mp_safi %u unknown", mp_safi); +- else +- peer->capa.peer.mp_v4 = mp_safi; ++ "Bad multi protocol capability length: " ++ "%u", capa_len); + break; +- case AFI_IPv6: +- if (mp_safi < 1 || mp_safi > 3) +- log_peer_warnx(&peer->conf, +- "parse_capabilities: AFI IPv6, " +- "mp_safi %u unknown", mp_safi); +- else +- peer->capa.peer.mp_v6 = mp_safi; +- break; +- default: /* ignore */ ++ } ++ memcpy(&afi, capa_val, sizeof(afi)); ++ afi = ntohs(afi); ++ memcpy(&safi, capa_val + 3, sizeof(safi)); ++ if (afi2aid(afi, safi, &aid) == -1) { ++ log_peer_warnx(&peer->conf, ++ "Received multi protocol capability: " ++ " unknown AFI %u, safi %u pair", ++ afi, safi); + break; + } ++ peer->capa.peer.mp[aid] = 1; + break; + case CAPA_REFRESH: + peer->capa.peer.refresh = 1; + break; + case CAPA_RESTART: +- peer->capa.peer.restart = 1; +- /* we don't care about the further restart capas yet */ ++ if (capa_len == 2) { ++ /* peer only supports EoR marker */ ++ peer->capa.peer.grestart.restart = 1; ++ peer->capa.peer.grestart.timeout = 0; ++ break; ++ } else if (capa_len % 4 != 2) { ++ log_peer_warnx(&peer->conf, ++ "Bad graceful restart capability length: " ++ "%u", capa_len); ++ peer->capa.peer.grestart.restart = 0; ++ peer->capa.peer.grestart.timeout = 0; ++ break; ++ } ++ ++ memcpy(&gr_header, capa_val, sizeof(gr_header)); ++ gr_header = ntohs(gr_header); ++ peer->capa.peer.grestart.timeout = ++ gr_header & CAPA_GR_TIMEMASK; ++ if (peer->capa.peer.grestart.timeout == 0) { ++ log_peer_warnx(&peer->conf, "Received " ++ "graceful restart timeout is zero"); ++ peer->capa.peer.grestart.restart = 0; ++ break; ++ } ++ ++ for (i = 2; i <= capa_len - 4; i += 4) { ++ memcpy(&afi, capa_val + i, sizeof(afi)); ++ afi = ntohs(afi); ++ memcpy(&safi, capa_val + i + 2, sizeof(safi)); ++ if (afi2aid(afi, safi, &aid) == -1) { ++ log_peer_warnx(&peer->conf, ++ "Received graceful restart capa: " ++ " unknown AFI %u, safi %u pair", ++ afi, safi); ++ continue; ++ } ++ memcpy(&gr_flags, capa_val + i + 3, ++ sizeof(gr_flags)); ++ peer->capa.peer.grestart.flags[aid] |= ++ CAPA_GR_PRESENT; ++ if (gr_flags & CAPA_GR_F_FLAG) ++ peer->capa.peer.grestart.flags[aid] |= ++ CAPA_GR_FORWARD; ++ if (gr_header & CAPA_GR_R_FLAG) ++ peer->capa.peer.grestart.flags[aid] |= ++ CAPA_GR_RESTART; ++ peer->capa.peer.grestart.restart = 2; ++ } + break; + case CAPA_AS4BYTE: + if (capa_len != 4) { + log_peer_warnx(&peer->conf, +- "parse_capabilities: " +- "expect len 4, len is %u", capa_len); +- return (-1); ++ "Bad AS4BYTE capability length: " ++ "%u", capa_len); ++ peer->capa.peer.as4byte = 0; ++ break; + } + memcpy(&remote_as, capa_val, sizeof(remote_as)); + *as = ntohl(remote_as); +@@ -2232,6 +2508,66 @@ parse_capabilities(struct peer *peer, u_ + return (0); + } + ++int ++capa_neg_calc(struct peer *p) ++{ ++ u_int8_t i, hasmp = 0; ++ ++ /* refresh: does not realy matter here, use peer setting */ ++ p->capa.neg.refresh = p->capa.peer.refresh; ++ ++ /* as4byte: both side must announce capability */ ++ if (p->capa.ann.as4byte && p->capa.peer.as4byte) ++ p->capa.neg.as4byte = 1; ++ else ++ p->capa.neg.as4byte = 0; ++ ++ /* MP: both side must announce capability */ ++ for (i = 0; i < AID_MAX; i++) { ++ if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) { ++ p->capa.neg.mp[i] = 1; ++ hasmp = 1; ++ } else ++ p->capa.neg.mp[i] = 0; ++ } ++ /* if no MP capability present default to IPv4 unicast mode */ ++ if (!hasmp) ++ p->capa.neg.mp[AID_INET] = 1; ++ ++ /* ++ * graceful restart: only the peer capabilities are of interest here. ++ * It is necessary to compare the new values with the previous ones ++ * and act acordingly. AFI/SAFI that are not part in the MP capability ++ * are treated as not being present. ++ */ ++ ++ for (i = 0; i < AID_MAX; i++) { ++ /* disable GR if the AFI/SAFI is not present */ ++ if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && ++ p->capa.neg.mp[i] == 0) ++ p->capa.peer.grestart.flags[i] = 0; /* disable */ ++ /* look at current GR state and decide what to do */ ++ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { ++ if (!(p->capa.peer.grestart.flags[i] & ++ CAPA_GR_FORWARD)) { ++ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, ++ p->conf.id, 0, -1, &i, sizeof(i)) == -1) ++ return (-1); ++ log_peer_warnx(&p->conf, "graceful restart of " ++ "%s, not restarted, flushing", aid2str(i)); ++ } ++ p->capa.neg.grestart.flags[i] = ++ p->capa.peer.grestart.flags[i] | CAPA_GR_RESTARTING; ++ } else ++ p->capa.neg.grestart.flags[i] = ++ p->capa.peer.grestart.flags[i]; ++ } ++ p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; ++ p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; ++ ++ return (0); ++} ++ + void + session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) + { +@@ -2244,8 +2580,8 @@ session_dispatch_imsg(struct imsgbuf *ib + struct kif *kif; + u_char *data; + enum reconf_action reconf; +- int n, depend_ok; +- u_int8_t errcode, subcode; ++ int n, depend_ok, restricted; ++ u_int8_t aid, errcode, subcode; + + if ((n = imsg_read(ibuf)) == -1) + fatal("session_dispatch_imsg: imsg_read error"); +@@ -2332,15 +2668,42 @@ session_dispatch_imsg(struct imsgbuf *ib + } + + break; ++ case IMSG_RECONF_CTRL: ++ if (idx != PFD_PIPE_MAIN) ++ fatalx("reconf request not from parent"); ++ if (imsg.hdr.len != IMSG_HEADER_SIZE + ++ sizeof(restricted)) ++ fatalx("IFINFO imsg with wrong len"); ++ memcpy(&restricted, imsg.data, sizeof(restricted)); ++ if (imsg.fd == -1) { ++ log_warnx("expected to receive fd for control " ++ "socket but didn't receive any"); ++ break; ++ } ++ if (restricted) { ++ control_shutdown(rcsock); ++ rcsock = imsg.fd; ++ control_listen(rcsock); ++ } else { ++ control_shutdown(csock); ++ csock = imsg.fd; ++ control_listen(csock); ++ } ++ break; + case IMSG_RECONF_DONE: + if (idx != PFD_PIPE_MAIN) + fatalx("reconf request not from parent"); + if (nconf == NULL) + fatalx("got IMSG_RECONF_DONE but no config"); ++ conf->flags = nconf->flags; ++ conf->log = nconf->log; ++ conf->bgpid = nconf->bgpid; ++ conf->clusterid = nconf->clusterid; + conf->as = nconf->as; ++ conf->short_as = nconf->short_as; + conf->holdtime = nconf->holdtime; +- conf->bgpid = nconf->bgpid; + conf->min_holdtime = nconf->min_holdtime; ++ conf->connectretry = nconf->connectretry; + + /* add new peers */ + for (p = npeers; p != NULL; p = next) { +@@ -2388,6 +2751,8 @@ session_dispatch_imsg(struct imsgbuf *ib + nconf = NULL; + pending_reconf = 0; + log_info("SE reconfigured"); ++ imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, ++ -1, NULL, 0); + break; + case IMSG_IFINFO: + if (idx != PFD_PIPE_MAIN) +@@ -2397,9 +2762,7 @@ session_dispatch_imsg(struct imsgbuf *ib + fatalx("IFINFO imsg with wrong len"); + kif = imsg.data; + depend_ok = (kif->flags & IFF_UP) && +- (LINK_STATE_IS_UP(kif->link_state) || +- (kif->link_state == LINK_STATE_UNKNOWN && +- kif->media_type != IFT_CARP)); ++ LINK_STATE_IS_UP(kif->link_state); + + for (p = peers; p != NULL; p = p->next) + if (!strcmp(p->conf.if_depend, kif->ifname)) { +@@ -2408,7 +2771,8 @@ session_dispatch_imsg(struct imsgbuf *ib + bgp_fsm(p, EVNT_START); + } else if (!depend_ok && p->depend_ok) { + p->depend_ok = depend_ok; +- bgp_fsm(p, EVNT_STOP); ++ session_stop(p, ++ ERR_CEASE_OTHER_CHANGE); + } + } + break; +@@ -2456,10 +2820,10 @@ session_dispatch_imsg(struct imsgbuf *ib + } + break; + case IMSG_CTL_KROUTE: +- case IMSG_CTL_KROUTE6: + case IMSG_CTL_KROUTE_ADDR: + case IMSG_CTL_SHOW_NEXTHOP: + case IMSG_CTL_SHOW_INTERFACE: ++ case IMSG_CTL_SHOW_FIB_TABLES: + if (idx != PFD_PIPE_MAIN) + fatalx("ctl kroute request not from parent"); + control_imsg_relay(&imsg); +@@ -2469,7 +2833,6 @@ session_dispatch_imsg(struct imsgbuf *ib + case IMSG_CTL_SHOW_RIB_ATTR: + case IMSG_CTL_SHOW_RIB_MEM: + case IMSG_CTL_SHOW_NETWORK: +- case IMSG_CTL_SHOW_NETWORK6: + case IMSG_CTL_SHOW_NEIGHBOR: + if (idx != PFD_PIPE_ROUTE_CTL) + fatalx("ctl rib request not from RDE"); +@@ -2531,6 +2894,40 @@ session_dispatch_imsg(struct imsgbuf *ib + break; + } + break; ++ case IMSG_SESSION_RESTARTED: ++ if (idx != PFD_PIPE_ROUTE) ++ fatalx("update request not from RDE"); ++ if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { ++ log_warnx("RDE sent invalid restart msg"); ++ break; ++ } ++ if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { ++ log_warnx("no such peer: id=%u", ++ imsg.hdr.peerid); ++ break; ++ } ++ memcpy(&aid, imsg.data, sizeof(aid)); ++ if (aid >= AID_MAX) ++ fatalx("IMSG_SESSION_RESTARTED: bad AID"); ++ if (p->capa.neg.grestart.flags[aid] & ++ CAPA_GR_RESTARTING && ++ p->capa.neg.grestart.flags[aid] & ++ CAPA_GR_FORWARD) { ++ log_peer_warnx(&p->conf, ++ "graceful restart of %s finished", ++ aid2str(aid)); ++ p->capa.neg.grestart.flags[aid] &= ++ ~CAPA_GR_RESTARTING; ++ timer_stop(p, Timer_RestartTimeout); ++ ++ /* signal back to RDE to cleanup stale routes */ ++ if (imsg_compose(ibuf_rde, ++ IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0, ++ -1, &aid, sizeof(aid)) == -1) ++ fatal("imsg_compose: " ++ "IMSG_SESSION_RESTARTED"); ++ } ++ break; + default: + break; + } +@@ -2612,29 +3009,23 @@ getpeerbydesc(const char *descr) + struct peer * + getpeerbyip(struct sockaddr *ip) + { ++ struct bgpd_addr addr; + struct peer *p, *newpeer, *loose = NULL; + u_int32_t id; + ++ sa2addr(ip, &addr); ++ + /* we might want a more effective way to find peers by IP */ + for (p = peers; p != NULL; p = p->next) + if (!p->conf.template && +- p->conf.remote_addr.af == ip->sa_family) { +- if (p->conf.remote_addr.af == AF_INET && +- p->conf.remote_addr.v4.s_addr == +- ((struct sockaddr_in *)ip)->sin_addr.s_addr) +- return (p); +- if (p->conf.remote_addr.af == AF_INET6 && +- !bcmp(&p->conf.remote_addr.v6, +- &((struct sockaddr_in6 *)ip)->sin6_addr, +- sizeof(p->conf.remote_addr.v6))) +- return (p); +- } ++ !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) ++ return (p); + + /* try template matching */ + for (p = peers; p != NULL; p = p->next) + if (p->conf.template && +- p->conf.remote_addr.af == ip->sa_family && +- session_match_mask(p, ip)) ++ p->conf.remote_addr.aid == addr.aid && ++ session_match_mask(p, &addr)) + if (loose == NULL || loose->conf.remote_masklen < + p->conf.remote_masklen) + loose = p; +@@ -2653,21 +3044,19 @@ getpeerbyip(struct sockaddr *ip) + break; + } + } +- if (newpeer->conf.remote_addr.af == AF_INET) { +- newpeer->conf.remote_addr.v4.s_addr = +- ((struct sockaddr_in *)ip)->sin_addr.s_addr; ++ sa2addr(ip, &newpeer->conf.remote_addr); ++ switch (ip->sa_family) { ++ case AF_INET: + newpeer->conf.remote_masklen = 32; +- } +- if (newpeer->conf.remote_addr.af == AF_INET6) { +- memcpy(&p->conf.remote_addr.v6, +- &((struct sockaddr_in6 *)ip)->sin6_addr, +- sizeof(newpeer->conf.remote_addr.v6)); ++ break; ++ case AF_INET6: + newpeer->conf.remote_masklen = 128; ++ break; + } + newpeer->conf.template = 0; + newpeer->conf.cloned = 1; + newpeer->state = newpeer->prev_state = STATE_NONE; +- newpeer->conf.reconf_action = RECONF_REINIT; ++ newpeer->conf.reconf_action = RECONF_KEEP; + newpeer->rbuf = NULL; + init_peer(newpeer); + bgp_fsm(newpeer, EVNT_START); +@@ -2680,40 +3069,24 @@ getpeerbyip(struct sockaddr *ip) + } + + int +-session_match_mask(struct peer *p, struct sockaddr *ip) ++session_match_mask(struct peer *p, struct bgpd_addr *a) + { +- int i; + in_addr_t v4mask; +- struct in6_addr *in; +- struct in6_addr mask; ++ struct in6_addr masked; + +- if (p->conf.remote_addr.af == AF_INET) { ++ switch (p->conf.remote_addr.aid) { ++ case AID_INET: + v4mask = htonl(prefixlen2mask(p->conf.remote_masklen)); +- if (p->conf.remote_addr.v4.s_addr == +- ((((struct sockaddr_in *)ip)->sin_addr.s_addr) & v4mask)) ++ if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask)) + return (1); +- else +- return (0); +- } +- +- if (p->conf.remote_addr.af == AF_INET6) { +- bzero(&mask, sizeof(mask)); +- for (i = 0; i < p->conf.remote_masklen / 8; i++) +- mask.s6_addr[i] = 0xff; +- i = p->conf.remote_masklen % 8; +- if (i) +- mask.s6_addr[p->conf.remote_masklen / 8] = 0xff00 >> i; +- +- in = &((struct sockaddr_in6 *)ip)->sin6_addr; +- +- for (i = 0; i < 16; i++) +- if ((in->s6_addr[i] & mask.s6_addr[i]) != +- p->conf.remote_addr.addr8[i]) +- return (0); ++ return (0); ++ case AID_INET6: ++ inet6applymask(&masked, &a->v6, p->conf.remote_masklen); + +- return (1); ++ if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked))) ++ return (1); ++ return (0); + } +- + return (0); + } + +@@ -2733,6 +3106,7 @@ getpeerbyid(u_int32_t peerid) + void + session_down(struct peer *peer) + { ++ bzero(&peer->capa.neg, sizeof(peer->capa.neg)); + peer->stats.last_updown = time(NULL); + if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1, + NULL, 0) == -1) +@@ -2744,39 +3118,17 @@ session_up(struct peer *p) + { + struct session_up sup; + +- if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, +- &p->conf, sizeof(p->conf)) == -1) +- fatalx("imsg_compose error"); ++ if (!session_graceful_is_restarting(p)) ++ if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, ++ &p->conf, sizeof(p->conf)) == -1) ++ fatalx("imsg_compose error"); + +- switch (p->sa_local.ss_family) { +- case AF_INET: +- sup.local_addr.af = AF_INET; +- memcpy(&sup.local_addr.v4, +- &((struct sockaddr_in *)&p->sa_local)->sin_addr, +- sizeof(sup.local_addr.v4)); +- sup.remote_addr.af = AF_INET; +- memcpy(&sup.remote_addr.v4, +- &((struct sockaddr_in *)&p->sa_remote)->sin_addr, +- sizeof(sup.remote_addr.v4)); +- break; +- case AF_INET6: +- sup.local_addr.af = AF_INET6; +- memcpy(&sup.local_addr.v6, +- &((struct sockaddr_in6 *)&p->sa_local)->sin6_addr, +- sizeof(sup.local_addr.v6)); +- sup.remote_addr.af = AF_INET6; +- memcpy(&sup.remote_addr.v6, +- &((struct sockaddr_in6 *)&p->sa_remote)->sin6_addr, +- sizeof(sup.remote_addr.v6)); +- break; +- default: +- fatalx("session_up: unsupported address family"); +- } ++ sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr); ++ sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr); + + sup.remote_bgpid = p->remote_bgpid; + sup.short_as = p->short_as; +- memcpy(&sup.capa_announced, &p->capa.ann, sizeof(sup.capa_announced)); +- memcpy(&sup.capa_received, &p->capa.peer, sizeof(sup.capa_received)); ++ memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); + p->stats.last_updown = time(NULL); + if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1, + &sup, sizeof(sup)) == -1) +@@ -2784,9 +3136,10 @@ session_up(struct peer *p) + } + + int +-imsg_compose_parent(int type, pid_t pid, void *data, u_int16_t datalen) ++imsg_compose_parent(int type, u_int32_t peerid, pid_t pid, void *data, ++ u_int16_t datalen) + { +- return (imsg_compose(ibuf_main, type, 0, pid, -1, data, datalen)); ++ return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); + } + + int +@@ -2795,34 +3148,6 @@ imsg_compose_rde(int type, pid_t pid, vo + return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen)); + } + +-static struct sockaddr * +-addr2sa(struct bgpd_addr *addr, u_int16_t port) +-{ +- static struct sockaddr_storage ss; +- struct sockaddr_in *sa_in = (struct sockaddr_in *)&ss; +- struct sockaddr_in6 *sa_in6 = (struct sockaddr_in6 *)&ss; +- +- bzero(&ss, sizeof(ss)); +- switch (addr->af) { +- case AF_INET: +- sa_in->sin_family = AF_INET; +- sa_in->sin_len = sizeof(struct sockaddr_in); +- sa_in->sin_addr.s_addr = addr->v4.s_addr; +- sa_in->sin_port = htons(port); +- break; +- case AF_INET6: +- sa_in6->sin6_family = AF_INET6; +- sa_in6->sin6_len = sizeof(struct sockaddr_in6); +- memcpy(&sa_in6->sin6_addr, &addr->v6, +- sizeof(sa_in6->sin6_addr)); +- sa_in6->sin6_port = htons(port); +- sa_in6->sin6_scope_id = addr->scope_id; +- break; +- } +- +- return ((struct sockaddr *)&ss); +-} +- + void + session_demote(struct peer *p, int level) + { +@@ -2837,3 +3162,19 @@ session_demote(struct peer *p, int level + + p->demoted += level; + } ++ ++void ++session_stop(struct peer *peer, u_int8_t subcode) ++{ ++ switch (peer->state) { ++ case STATE_OPENSENT: ++ case STATE_OPENCONFIRM: ++ case STATE_ESTABLISHED: ++ session_notification(peer, ERR_CEASE, subcode, NULL, 0); ++ break; ++ default: ++ /* session not open, no need to send notification */ ++ break; ++ } ++ bgp_fsm(peer, EVNT_STOP); ++} |