1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Zebra; see the file COPYING. If not, write to the Free
18 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
35 #include "bgpd/bgpd.h"
36 #include "bgpd/bgp_table.h"
37 #include "bgpd/bgp_route.h"
38 #include "bgpd/bgp_attr.h"
39 #include "bgpd/bgp_nexthop.h"
40 #include "bgpd/bgp_debug.h"
41 #include "bgpd/bgp_nht.h"
42 #include "bgpd/bgp_fsm.h"
43 #include "bgpd/bgp_zebra.h"
45 extern struct zclient *zclient;
46 extern struct bgp_table *bgp_nexthop_cache_table[AFI_MAX];
48 static void register_nexthop(struct bgp_nexthop_cache *bnc);
49 static void unregister_nexthop (struct bgp_nexthop_cache *bnc);
50 static void evaluate_paths(struct bgp_nexthop_cache *bnc);
51 static int make_prefix(int afi, struct bgp_info *ri, struct prefix *p);
52 static void path_nh_map(struct bgp_info *path, struct bgp_nexthop_cache *bnc,
56 bgp_nexthop_check (struct bgp_info *path, int connected)
58 struct bgp_nexthop_cache *bnc = path->nexthop;
63 if (BGP_DEBUG(nht, NHT))
65 char buf[INET6_ADDRSTRLEN];
66 zlog_debug("%s: NHT checking %s",
68 bnc_str (bnc, buf, INET6_ADDRSTRLEN));
71 if (connected && !(CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)))
74 return (bgp_zebra_num_connects() == 0 ||
75 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID));
78 /* Helper to get the rn for the appropriate nexthop for path or peer.
79 * returns the locked rn - caller must bump down the refcnt.
81 * may return NULL in error cases.
85 bgp_get_nexthop_rn (struct bgp_info *path, struct peer *peer)
90 assert (path || peer);
97 afi = family2afi (path->net->p.family);
98 if (make_prefix(afi, path, &p) < 0)
103 afi = family2afi(peer->su.sa.sa_family);
107 p.prefixlen = IPV4_MAX_BITLEN;
108 p.u.prefix4 = peer->su.sin.sin_addr;
110 else if (afi == AFI_IP6)
113 p.prefixlen = IPV6_MAX_BITLEN;
114 p.u.prefix6 = peer->su.sin6.sin6_addr;
120 return bgp_node_get (bgp_nexthop_cache_table[afi], &p);
124 struct bgp_nexthop_cache *
125 bgp_find_nexthop (struct bgp_info *path, struct peer *peer)
127 struct bgp_nexthop_cache *bnc = NULL;
128 struct bgp_node *rn = bgp_get_nexthop_rn (path, peer);
134 bgp_unlock_node (rn);
140 bgp_unlink_nexthop_check (struct bgp_nexthop_cache *bnc)
142 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info)
144 if (BGP_DEBUG(nht, NHT))
146 char buf[INET6_ADDRSTRLEN];
147 zlog_debug("bgp_unlink_nexthop: freeing bnc %s",
148 bnc_str (bnc, buf, INET6_ADDRSTRLEN));
150 unregister_nexthop(bnc);
151 bnc->node->info = NULL;
152 bgp_unlock_node (bnc->node);
159 bgp_unlink_nexthop (struct bgp_info *path)
161 struct bgp_nexthop_cache *bnc = path->nexthop;
166 if (BGP_DEBUG(nht, NHT))
168 char buf[INET6_ADDRSTRLEN];
169 zlog_debug("%s: NHT unlinking %s",
170 __FUNCTION__, bnc_str (bnc, buf, INET6_ADDRSTRLEN));
173 path_nh_map(path, NULL, 0);
175 bgp_unlink_nexthop_check (bnc);
179 bgp_unlink_nexthop_by_peer (struct peer *peer)
181 struct bgp_nexthop_cache *bnc = bgp_find_nexthop (NULL, peer);
186 if (BGP_DEBUG(nht, NHT))
187 zlog_debug("%s: NHT unlinking %s",
188 __FUNCTION__, peer->host);
190 bnc->nht_info = NULL;
192 bgp_unlink_nexthop_check (bnc);
196 bgp_ensure_nexthop (struct bgp_info *ri, struct peer *peer,
200 struct bgp_nexthop_cache *bnc;
202 rn = bgp_get_nexthop_rn (ri, peer);
206 zlog_debug("%s: NHT could not ensure, failed to get rn!",
218 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
222 bgp_unlock_node (rn);
224 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
225 register_nexthop(bnc);
229 path_nh_map(ri, bnc, 1); /* updates NHT ri list reference */
231 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
232 (bgp_info_extra_get(ri))->igpmetric = bnc->metric;
234 ri->extra->igpmetric = 0;
237 bnc->nht_info = (void *)peer; /* NHT peer reference */
239 if (BGP_DEBUG(nht, NHT))
241 char buf[INET6_ADDRSTRLEN];
242 zlog_debug("%s: NHT ensured %s",
243 __FUNCTION__, bnc_str (bnc, buf, INET6_ADDRSTRLEN));
246 return (bgp_zebra_num_connects() == 0 ||
247 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID));
251 bgp_parse_nexthop_update (void)
255 struct bgp_nexthop_cache *bnc;
256 struct nexthop *nexthop;
257 struct nexthop *oldnh;
258 struct nexthop *nhlist_head = NULL;
259 struct nexthop *nhlist_tail = NULL;
267 memset(&p, 0, sizeof(struct prefix));
268 p.family = stream_getw(s);
269 p.prefixlen = stream_getc(s);
273 p.u.prefix4.s_addr = stream_get_ipv4 (s);
276 stream_get(&p.u.prefix6, s, 16);
282 rn = bgp_node_lookup(bgp_nexthop_cache_table[family2afi(p.family)], &p);
283 if (!rn || !rn->info)
285 if (BGP_DEBUG(nht, NHT))
287 char buf[INET6_ADDRSTRLEN];
288 prefix2str(&p, buf, INET6_ADDRSTRLEN);
289 zlog_debug("parse nexthop update(%s): rn not found", buf);
292 bgp_unlock_node (rn);
297 bgp_unlock_node (rn);
298 bnc->last_update = bgp_clock();
299 bnc->change_flags = 0;
300 metric = stream_getl (s);
301 nexthop_num = stream_getc (s);
303 /* debug print the input */
304 if (BGP_DEBUG(nht, NHT))
306 char buf[INET6_ADDRSTRLEN];
307 prefix2str(&p, buf, INET6_ADDRSTRLEN);
308 zlog_debug("parse nexthop update(%s): metric=%d, #nexthop=%d", buf,
309 metric, nexthop_num);
312 if (metric != bnc->metric)
313 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
315 if(nexthop_num != bnc->nexthop_num)
316 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
320 bnc->flags |= BGP_NEXTHOP_VALID;
321 bnc->metric = metric;
322 bnc->nexthop_num = nexthop_num;
324 for (i = 0; i < nexthop_num; i++)
326 nexthop = nexthop_new();
327 nexthop->type = stream_getc (s);
328 switch (nexthop->type)
330 case ZEBRA_NEXTHOP_IPV4:
331 nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s);
333 case ZEBRA_NEXTHOP_IFINDEX:
334 case ZEBRA_NEXTHOP_IFNAME:
335 nexthop->ifindex = stream_getl (s);
337 case ZEBRA_NEXTHOP_IPV4_IFINDEX:
338 case ZEBRA_NEXTHOP_IPV4_IFNAME:
339 nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s);
340 nexthop->ifindex = stream_getl (s);
343 case ZEBRA_NEXTHOP_IPV6:
344 stream_get (&nexthop->gate.ipv6, s, 16);
346 case ZEBRA_NEXTHOP_IPV6_IFINDEX:
347 case ZEBRA_NEXTHOP_IPV6_IFNAME:
348 stream_get (&nexthop->gate.ipv6, s, 16);
349 nexthop->ifindex = stream_getl (s);
359 nhlist_tail->next = nexthop;
360 nhlist_tail = nexthop;
364 nhlist_tail = nexthop;
365 nhlist_head = nexthop;
368 /* No need to evaluate the nexthop if we have already determined
369 * that there has been a change.
371 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
374 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
375 if (nexthop_same_no_recurse(oldnh, nexthop))
379 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
381 bnc_nexthop_free(bnc);
382 bnc->nexthop = nhlist_head;
386 bnc->flags &= ~BGP_NEXTHOP_VALID;
387 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
388 bnc_nexthop_free(bnc);
396 * make_prefix - make a prefix structure from the path (essentially
400 make_prefix (int afi, struct bgp_info *ri, struct prefix *p)
402 memset (p, 0, sizeof (struct prefix));
407 p->prefixlen = IPV4_MAX_BITLEN;
408 p->u.prefix4 = ri->attr->nexthop;
412 if (ri->attr->extra->mp_nexthop_len != 16
413 || IN6_IS_ADDR_LINKLOCAL (&ri->attr->extra->mp_nexthop_global))
416 p->family = AF_INET6;
417 p->prefixlen = IPV6_MAX_BITLEN;
418 p->u.prefix6 = ri->attr->extra->mp_nexthop_global;
428 * sendmsg_nexthop -- Format and send a nexthop register/Unregister
431 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
432 * int command -- either ZEBRA_NEXTHOP_REGISTER or ZEBRA_NEXTHOP_UNREGISTER
437 sendmsg_nexthop (struct bgp_nexthop_cache *bnc, int command)
444 if (!zclient || zclient->sock < 0)
446 zlog_debug("%s: Can't send NH register, Zebra client not established",
454 zclient_create_header (s, command, VRF_DEFAULT);
455 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
460 stream_putw(s, PREFIX_FAMILY(p));
461 stream_putc(s, p->prefixlen);
462 switch (PREFIX_FAMILY(p))
465 stream_put_in_addr (s, &p->u.prefix4);
468 stream_put(s, &(p->u.prefix6), 16);
473 stream_putw_at (s, 0, stream_get_endp (s));
475 ret = zclient_send_message(zclient);
476 /* TBD: handle the failure */
478 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
480 if (command == ZEBRA_NEXTHOP_REGISTER)
481 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
482 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
483 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
488 * register_nexthop - register a nexthop with Zebra for notification
489 * when the route to the nexthop changes.
491 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
496 register_nexthop (struct bgp_nexthop_cache *bnc)
498 /* Check if we have already registered */
499 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
501 sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_REGISTER);
505 * unregister_nexthop -- Unregister the nexthop from Zebra.
507 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
512 unregister_nexthop (struct bgp_nexthop_cache *bnc)
514 /* Check if we have already registered */
515 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
518 sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_UNREGISTER);
522 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
524 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
529 evaluate_paths (struct bgp_nexthop_cache *bnc)
532 struct bgp_info *path;
533 struct bgp *bgp = bgp_get_default();
535 struct peer *peer = (struct peer *)bnc->nht_info;
537 LIST_FOREACH(path, &(bnc->paths), nh_thread)
539 if (!(path->type == ZEBRA_ROUTE_BGP &&
540 path->sub_type == BGP_ROUTE_NORMAL))
544 afi = family2afi(rn->p.family);
546 /* Path becomes valid/invalid depending on whether the nexthop
547 * reachable/unreachable.
549 if ((CHECK_FLAG(path->flags, BGP_INFO_VALID) ? 1 : 0) !=
550 (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) ? 1 : 0))
552 if (CHECK_FLAG (path->flags, BGP_INFO_VALID))
554 bgp_aggregate_decrement (bgp, &rn->p, path,
556 bgp_info_unset_flag (rn, path, BGP_INFO_VALID);
560 bgp_info_set_flag (rn, path, BGP_INFO_VALID);
561 bgp_aggregate_increment (bgp, &rn->p, path,
566 /* Copy the metric to the path. Will be used for bestpath computation */
567 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
568 (bgp_info_extra_get(path))->igpmetric = bnc->metric;
569 else if (path->extra)
570 path->extra->igpmetric = 0;
572 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_METRIC_CHANGED) ||
573 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CHANGED))
574 SET_FLAG(path->flags, BGP_INFO_IGP_CHANGED);
576 bgp_process(bgp, rn, afi, SAFI_UNICAST);
579 if (peer && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED))
581 if (BGP_DEBUG(nht, NHT))
582 zlog_debug("%s: Updating peer (%s) status with NHT", __FUNCTION__, peer->host);
583 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
586 RESET_FLAG(bnc->change_flags);
590 * path_nh_map - make or break path-to-nexthop association.
592 * path - pointer to the path structure
593 * bnc - pointer to the nexthop structure
594 * make - if set, make the association. if unset, just break the existing
598 path_nh_map (struct bgp_info *path, struct bgp_nexthop_cache *bnc, int make)
602 LIST_REMOVE(path, nh_thread);
603 path->nexthop->path_count--;
604 path->nexthop = NULL;
608 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
610 path->nexthop->path_count++;