1 /* $QuaggaId: Format:%an, %ai, %h$ $
4 * Copyright (C) 2010 Google Inc.
6 * This file is part of Quagga
8 * Quagga is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
13 * Quagga is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Quagga; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 #include "sockunion.h"
33 #include "bgpd/bgpd.h"
34 #include "bgpd/bgp_table.h"
35 #include "bgpd/bgp_route.h"
36 #include "bgpd/bgp_attr.h"
37 #include "bgpd/bgp_debug.h"
38 #include "bgpd/bgp_aspath.h"
39 #include "bgpd/bgp_community.h"
40 #include "bgpd/bgp_ecommunity.h"
41 #include "bgpd/bgp_lcommunity.h"
42 #include "bgpd/bgp_mpath.h"
45 bgp_mpath_is_configured_sort (struct bgp *bgp, bgp_peer_sort_t sort,
46 afi_t afi, safi_t safi)
48 struct bgp_maxpaths_cfg *cfg = &bgp->maxpaths[afi][safi];
50 /* XXX: BGP_DEFAULT_MAXPATHS is 1, and this test only seems to make sense
51 * if if it stays 1, so not sure the DEFAULT define is that useful.
56 return cfg->maxpaths_ibgp != BGP_DEFAULT_MAXPATHS;
58 return cfg->maxpaths_ebgp != BGP_DEFAULT_MAXPATHS;
65 bgp_mpath_is_configured (struct bgp *bgp, afi_t afi, safi_t safi)
67 return bgp_mpath_is_configured_sort (bgp, BGP_PEER_IBGP, afi, safi)
68 || bgp_mpath_is_configured_sort (bgp, BGP_PEER_EBGP, afi, safi);
72 * bgp_maximum_paths_set
74 * Record maximum-paths configuration for BGP instance
77 bgp_maximum_paths_set (struct bgp *bgp, afi_t afi, safi_t safi,
78 int peertype, u_int16_t maxpaths)
80 if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
86 bgp->maxpaths[afi][safi].maxpaths_ibgp = maxpaths;
89 bgp->maxpaths[afi][safi].maxpaths_ebgp = maxpaths;
99 * bgp_maximum_paths_unset
101 * Remove maximum-paths configuration from BGP instance
104 bgp_maximum_paths_unset (struct bgp *bgp, afi_t afi, safi_t safi,
107 if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
113 bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS;
116 bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS;
126 * bgp_info_nexthop_cmp
128 * Compare the nexthops of two paths. Return value is less than, equal to,
129 * or greater than zero if bi1 is respectively less than, equal to,
130 * or greater than bi2.
133 bgp_info_nexthop_cmp (struct bgp_info *bi1, struct bgp_info *bi2)
135 struct attr_extra *ae1, *ae2;
138 ae1 = bi1->attr->extra;
139 ae2 = bi2->attr->extra;
141 compare = IPV4_ADDR_CMP (&bi1->attr->nexthop, &bi2->attr->nexthop);
143 if (!compare && ae1 && ae2)
145 if (ae1->mp_nexthop_len == ae2->mp_nexthop_len)
147 switch (ae1->mp_nexthop_len)
151 compare = IPV4_ADDR_CMP (&ae1->mp_nexthop_global_in,
152 &ae2->mp_nexthop_global_in);
155 compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
156 &ae2->mp_nexthop_global);
159 compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
160 &ae2->mp_nexthop_global);
162 compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_local,
163 &ae2->mp_nexthop_local);
168 /* This can happen if one IPv6 peer sends you global and link-local
169 * nexthops but another IPv6 peer only sends you global
171 else if (ae1->mp_nexthop_len == 16 || ae1->mp_nexthop_len == 32)
173 compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
174 &ae2->mp_nexthop_global);
177 if (ae1->mp_nexthop_len < ae2->mp_nexthop_len)
191 * This function determines our multipath list ordering. By ordering
192 * the list we can deterministically select which paths are included
193 * in the multipath set. The ordering also helps in detecting changes
194 * in the multipath selection so we can detect whether to send an
197 * The order of paths is determined first by received nexthop, and then
198 * by peer address if the nexthops are the same.
201 bgp_info_mpath_cmp (void *val1, void *val2)
203 struct bgp_info *bi1, *bi2;
209 compare = bgp_info_nexthop_cmp (bi1, bi2);
212 compare = sockunion_cmp (bi1->peer->su_remote, bi2->peer->su_remote);
220 * Initialize the mp_list, which holds the list of multipaths
221 * selected by bgp_best_selection
224 bgp_mp_list_init (struct list *mp_list)
227 memset (mp_list, 0, sizeof (struct list));
228 mp_list->cmp = bgp_info_mpath_cmp;
234 * Clears all entries out of the mp_list
237 bgp_mp_list_clear (struct list *mp_list)
240 list_delete_all_node (mp_list);
246 * Adds a multipath entry to the mp_list
249 bgp_mp_list_add (struct list *mp_list, struct bgp_info *mpinfo)
251 assert (mp_list && mpinfo);
252 listnode_add_sort (mp_list, mpinfo);
258 * Allocate and zero memory for a new bgp_info_mpath element
260 static struct bgp_info_mpath *
261 bgp_info_mpath_new (void)
263 struct bgp_info_mpath *new_mpath;
264 new_mpath = XCALLOC (MTYPE_BGP_MPATH_INFO, sizeof (struct bgp_info_mpath));
269 * bgp_info_mpath_free
271 * Release resources for a bgp_info_mpath element and zero out pointer
274 bgp_info_mpath_free (struct bgp_info_mpath **mpath)
278 if ((*mpath)->mp_attr)
279 bgp_attr_unintern (&(*mpath)->mp_attr);
280 XFREE (MTYPE_BGP_MPATH_INFO, *mpath);
288 * Fetch the mpath element for the given bgp_info. Used for
289 * doing lazy allocation.
291 static struct bgp_info_mpath *
292 bgp_info_mpath_get (struct bgp_info *binfo)
294 struct bgp_info_mpath *mpath;
297 mpath = bgp_info_mpath_new();
300 binfo->mpath = mpath;
301 mpath->mp_info = binfo;
307 * bgp_info_mpath_enqueue
309 * Enqueue a path onto the multipath list given the previous multipath
313 bgp_info_mpath_enqueue (struct bgp_info *prev_info, struct bgp_info *binfo)
315 struct bgp_info_mpath *prev, *mpath;
317 prev = bgp_info_mpath_get (prev_info);
318 mpath = bgp_info_mpath_get (binfo);
322 mpath->mp_next = prev->mp_next;
323 mpath->mp_prev = prev;
325 prev->mp_next->mp_prev = mpath;
326 prev->mp_next = mpath;
328 SET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
332 * bgp_info_mpath_dequeue
334 * Remove a path from the multipath list
337 bgp_info_mpath_dequeue (struct bgp_info *binfo)
339 struct bgp_info_mpath *mpath = binfo->mpath;
343 mpath->mp_prev->mp_next = mpath->mp_next;
345 mpath->mp_next->mp_prev = mpath->mp_prev;
346 mpath->mp_next = mpath->mp_prev = NULL;
347 UNSET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
351 * bgp_info_mpath_next
353 * Given a bgp_info, return the next multipath entry
356 bgp_info_mpath_next (struct bgp_info *binfo)
358 if (!binfo->mpath || !binfo->mpath->mp_next)
360 return binfo->mpath->mp_next->mp_info;
364 * bgp_info_mpath_first
366 * Given bestpath bgp_info, return the first multipath entry.
369 bgp_info_mpath_first (struct bgp_info *binfo)
371 return bgp_info_mpath_next (binfo);
375 * bgp_info_mpath_count
377 * Given the bestpath bgp_info, return the number of multipath entries
380 bgp_info_mpath_count (struct bgp_info *binfo)
384 return binfo->mpath->mp_count;
388 * bgp_info_mpath_count_set
390 * Sets the count of multipaths into bestpath's mpath element
393 bgp_info_mpath_count_set (struct bgp_info *binfo, u_int32_t count)
395 struct bgp_info_mpath *mpath;
396 if (!count && !binfo->mpath)
398 mpath = bgp_info_mpath_get (binfo);
401 mpath->mp_count = count;
405 * bgp_info_mpath_attr
407 * Given bestpath bgp_info, return aggregated attribute set used
408 * for advertising the multipath route
411 bgp_info_mpath_attr (struct bgp_info *binfo)
415 return binfo->mpath->mp_attr;
419 * bgp_info_mpath_attr_set
421 * Sets the aggregated attribute into bestpath's mpath element
424 bgp_info_mpath_attr_set (struct bgp_info *binfo, struct attr *attr)
426 struct bgp_info_mpath *mpath;
427 if (!attr && !binfo->mpath)
429 mpath = bgp_info_mpath_get (binfo);
432 mpath->mp_attr = attr;
436 * bgp_info_mpath_update
438 * Compare and sync up the multipath list with the mp_list generated by
442 bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best,
443 struct bgp_info *old_best, struct list *mp_list,
444 afi_t afi, safi_t safi)
446 u_int16_t maxpaths, mpath_count, old_mpath_count;
447 struct listnode *mp_node, *mp_next_node;
448 struct bgp_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath;
449 int mpath_changed, debug;
450 char pfx_buf[INET6_ADDRSTRLEN], nh_buf[2][INET6_ADDRSTRLEN];
451 struct bgp_maxpaths_cfg *mpath_cfg = NULL;
454 maxpaths = BGP_DEFAULT_MAXPATHS;
458 prev_mpath = new_best;
459 mp_node = listhead (mp_list);
461 debug = BGP_DEBUG (events, EVENTS);
464 prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf));
468 mpath_cfg = &new_best->peer->bgp->maxpaths[afi][safi];
470 if (new_best != old_best)
471 bgp_info_mpath_dequeue (new_best);
472 maxpaths = (new_best->peer->sort == BGP_PEER_IBGP) ?
473 mpath_cfg->maxpaths_ibgp : mpath_cfg->maxpaths_ebgp;
478 cur_mpath = bgp_info_mpath_first (old_best);
479 old_mpath_count = bgp_info_mpath_count (old_best);
480 bgp_info_mpath_count_set (old_best, 0);
481 bgp_info_mpath_dequeue (old_best);
485 * We perform an ordered walk through both lists in parallel.
486 * The reason for the ordered walk is that if there are paths
487 * that were previously multipaths and are still multipaths, the walk
488 * should encounter them in both lists at the same time. Otherwise
489 * there will be paths that are in one list or another, and we
490 * will deal with these separately.
492 * Note that new_best might be somewhere in the mp_list, so we need
495 while (mp_node || cur_mpath)
498 * We can bail out of this loop if all existing paths on the
499 * multipath list have been visited (for cleanup purposes) and
500 * the maxpath requirement is fulfulled
502 if (!cur_mpath && (mpath_count >= maxpaths))
505 mp_next_node = mp_node ? listnextnode (mp_node) : NULL;
506 next_mpath = cur_mpath ? bgp_info_mpath_next (cur_mpath) : NULL;
509 * If equal, the path was a multipath and is still a multipath.
510 * Insert onto new multipath list if maxpaths allows.
512 if (mp_node && (listgetdata (mp_node) == cur_mpath))
514 list_delete_node (mp_list, mp_node);
515 bgp_info_mpath_dequeue (cur_mpath);
516 if ((mpath_count < maxpaths) &&
517 bgp_info_nexthop_cmp (prev_mpath, cur_mpath))
519 bgp_info_mpath_enqueue (prev_mpath, cur_mpath);
520 prev_mpath = cur_mpath;
527 zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
528 inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
529 nh_buf[0], sizeof (nh_buf[0])),
530 sockunion2str (cur_mpath->peer->su_remote,
531 nh_buf[1], sizeof (nh_buf[1])));
533 mp_node = mp_next_node;
534 cur_mpath = next_mpath;
538 if (cur_mpath && (!mp_node ||
539 (bgp_info_mpath_cmp (cur_mpath,
540 listgetdata (mp_node)) < 0)))
543 * If here, we have an old multipath and either the mp_list
544 * is finished or the next mp_node points to a later
545 * multipath, so we need to purge this path from the
548 bgp_info_mpath_dequeue (cur_mpath);
551 zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
552 inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
553 nh_buf[0], sizeof (nh_buf[0])),
554 sockunion2str (cur_mpath->peer->su_remote,
555 nh_buf[1], sizeof (nh_buf[1])));
556 cur_mpath = next_mpath;
561 * If here, we have a path on the mp_list that was not previously
562 * a multipath (due to non-equivalance or maxpaths exceeded),
563 * or the matching multipath is sorted later in the multipath
564 * list. Before we enqueue the path on the new multipath list,
565 * make sure its not on the old_best multipath list or referenced
567 * - If next_mpath points to this new path, update next_mpath to
568 * point to the multipath after this one
569 * - Dequeue the path from the multipath list just to make sure
571 new_mpath = listgetdata (mp_node);
572 list_delete_node (mp_list, mp_node);
573 if ((mpath_count < maxpaths) && (new_mpath != new_best) &&
574 bgp_info_nexthop_cmp (prev_mpath, new_mpath))
576 if (new_mpath == next_mpath)
577 next_mpath = bgp_info_mpath_next (new_mpath);
578 bgp_info_mpath_dequeue (new_mpath);
580 bgp_info_mpath_enqueue (prev_mpath, new_mpath);
581 prev_mpath = new_mpath;
585 zlog_debug ("%s add mpath nexthop %s peer %s", pfx_buf,
586 inet_ntop (AF_INET, &new_mpath->attr->nexthop,
587 nh_buf[0], sizeof (nh_buf[0])),
588 sockunion2str (new_mpath->peer->su_remote,
589 nh_buf[1], sizeof (nh_buf[1])));
591 mp_node = mp_next_node;
597 bgp_info_mpath_count_set (new_best, mpath_count-1);
598 if (mpath_changed || (bgp_info_mpath_count (new_best) != old_mpath_count))
599 SET_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG);
604 * bgp_mp_dmed_deselect
606 * Clean up multipath information for BGP_INFO_DMED_SELECTED path that
607 * is not selected as best path
610 bgp_mp_dmed_deselect (struct bgp_info *dmed_best)
612 struct bgp_info *mpinfo, *mpnext;
617 for (mpinfo = bgp_info_mpath_first (dmed_best); mpinfo; mpinfo = mpnext)
619 mpnext = bgp_info_mpath_next (mpinfo);
620 bgp_info_mpath_dequeue (mpinfo);
623 bgp_info_mpath_count_set (dmed_best, 0);
624 UNSET_FLAG (dmed_best->flags, BGP_INFO_MULTIPATH_CHG);
625 assert (bgp_info_mpath_first (dmed_best) == 0);
629 * bgp_info_mpath_aggregate_update
631 * Set the multipath aggregate attribute. We need to see if the
632 * aggregate has changed and then set the ATTR_CHANGED flag on the
633 * bestpath info so that a peer update will be generated. The
634 * change is detected by generating the current attribute,
635 * interning it, and then comparing the interned pointer with the
636 * current value. We can skip this generate/compare step if there
637 * is no change in multipath selection and no attribute change in
641 bgp_info_mpath_aggregate_update (struct bgp_info *new_best,
642 struct bgp_info *old_best)
644 struct bgp_info *mpinfo;
645 struct aspath *aspath;
646 struct aspath *asmerge;
647 struct attr *new_attr, *old_attr;
648 u_char origin, attr_chg;
649 struct community *community, *commerge;
650 struct ecommunity *ecomm, *ecommerge;
651 struct lcommunity *lcomm, *lcommerge;
652 struct attr_extra *ae;
653 struct attr attr = { 0 };
655 if (old_best && (old_best != new_best) &&
656 (old_attr = bgp_info_mpath_attr (old_best)))
658 bgp_attr_unintern (&old_attr);
659 bgp_info_mpath_attr_set (old_best, NULL);
665 if (!bgp_info_mpath_count (new_best))
667 if ((new_attr = bgp_info_mpath_attr (new_best)))
669 bgp_attr_unintern (&new_attr);
670 bgp_info_mpath_attr_set (new_best, NULL);
671 SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED);
677 * Bail out here if the following is true:
678 * - MULTIPATH_CHG bit is not set on new_best, and
679 * - No change in bestpath, and
680 * - ATTR_CHANGED bit is not set on new_best or any of the multipaths
682 if (!CHECK_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG) &&
683 (old_best == new_best))
687 if (CHECK_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED))
690 for (mpinfo = bgp_info_mpath_first (new_best); mpinfo;
691 mpinfo = bgp_info_mpath_next (mpinfo))
693 if (CHECK_FLAG (mpinfo->flags, BGP_INFO_ATTR_CHANGED))
702 assert (bgp_info_mpath_attr (new_best));
707 bgp_attr_dup (&attr, new_best->attr);
709 /* aggregate attribute from multipath constituents */
710 aspath = aspath_dup (attr.aspath);
711 origin = attr.origin;
712 community = attr.community ? community_dup (attr.community) : NULL;
714 ecomm = (ae && ae->ecommunity) ? ecommunity_dup (ae->ecommunity) : NULL;
716 lcomm = (ae && ae->lcommunity) ? lcommunity_dup (ae->lcommunity) : NULL;
718 for (mpinfo = bgp_info_mpath_first (new_best); mpinfo;
719 mpinfo = bgp_info_mpath_next (mpinfo))
721 asmerge = aspath_aggregate_mpath (aspath, mpinfo->attr->aspath);
722 aspath_free (aspath);
725 if (origin < mpinfo->attr->origin)
726 origin = mpinfo->attr->origin;
728 if (mpinfo->attr->community)
732 commerge = community_merge (community, mpinfo->attr->community);
733 community = community_uniq_sort (commerge);
734 community_free (commerge);
737 community = community_dup (mpinfo->attr->community);
740 ae = mpinfo->attr->extra;
741 if (ae && ae->ecommunity)
745 ecommerge = ecommunity_merge (ecomm, ae->ecommunity);
746 ecomm = ecommunity_uniq_sort (ecommerge);
747 ecommunity_free (&ecommerge);
750 ecomm = ecommunity_dup (ae->ecommunity);
753 if (ae && ae->lcommunity)
757 lcommerge = lcommunity_merge (lcomm, ae->lcommunity);
758 lcomm = lcommunity_uniq_sort (lcommerge);
759 lcommunity_free (&lcommerge);
762 lcomm = lcommunity_dup (ae->lcommunity);
766 attr.aspath = aspath;
767 attr.origin = origin;
770 attr.community = community;
771 attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_COMMUNITIES);
775 ae = bgp_attr_extra_get (&attr);
776 ae->ecommunity = ecomm;
777 attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES);
780 /* Zap multipath attr nexthop so we set nexthop to self */
781 attr.nexthop.s_addr = 0;
783 memset (&attr.extra->mp_nexthop_global, 0, sizeof (struct in6_addr));
785 /* TODO: should we set ATOMIC_AGGREGATE and AGGREGATOR? */
787 new_attr = bgp_attr_intern (&attr);
788 bgp_attr_extra_free (&attr);
790 if (new_attr != bgp_info_mpath_attr (new_best))
792 if ((old_attr = bgp_info_mpath_attr (new_best)))
793 bgp_attr_unintern (&old_attr);
794 bgp_info_mpath_attr_set (new_best, new_attr);
795 SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED);
798 bgp_attr_unintern (&new_attr);