1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Zebra; see the file COPYING. If not, write to the Free
18 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 #include <net/if_arp.h>
25 /* Hack for GNU libc version 2. */
27 #define MSG_TRUNC 0x20
28 #endif /* MSG_TRUNC */
34 #include "connected.h"
43 #include "zebra/zserv.h"
45 #include "zebra/redistribute.h"
46 #include "zebra/interface.h"
47 #include "zebra/debug.h"
49 #include "rt_netlink.h"
51 static const struct message nlmsg_str[] = {
52 {RTM_NEWROUTE, "RTM_NEWROUTE"},
53 {RTM_DELROUTE, "RTM_DELROUTE"},
54 {RTM_GETROUTE, "RTM_GETROUTE"},
55 {RTM_NEWLINK, "RTM_NEWLINK"},
56 {RTM_DELLINK, "RTM_DELLINK"},
57 {RTM_GETLINK, "RTM_GETLINK"},
58 {RTM_NEWADDR, "RTM_NEWADDR"},
59 {RTM_DELADDR, "RTM_DELADDR"},
60 {RTM_GETADDR, "RTM_GETADDR"},
64 extern struct zebra_t zebrad;
66 extern struct zebra_privs_t zserv_privs;
68 extern u_int32_t nl_rcvbufsize;
75 /* Note: on netlink systems, there should be a 1-to-1 mapping between interface
76 names and ifindex values. */
78 set_ifindex(struct interface *ifp, ifindex_t ifi_index)
80 struct interface *oifp;
82 if (((oifp = if_lookup_by_index(ifi_index)) != NULL) && (oifp != ifp))
84 if (ifi_index == IFINDEX_INTERNAL)
85 zlog_err("Netlink is setting interface %s ifindex to reserved "
86 "internal value %u", ifp->name, ifi_index);
89 if (IS_ZEBRA_DEBUG_KERNEL)
90 zlog_debug("interface index %d was renamed from %s to %s",
91 ifi_index, oifp->name, ifp->name);
93 zlog_err("interface rename detected on up interface: index %d "
94 "was renamed from %s to %s, results are uncertain!",
95 ifi_index, oifp->name, ifp->name);
96 if_delete_update(oifp);
99 ifp->ifindex = ifi_index;
102 #ifndef SO_RCVBUFFORCE
103 #define SO_RCVBUFFORCE (33)
107 netlink_recvbuf (struct nlsock *nl, uint32_t newsize)
110 socklen_t newlen = sizeof(newsize);
111 socklen_t oldlen = sizeof(oldsize);
114 ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen);
117 zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name,
118 safe_strerror (errno));
122 /* Try force option (linux >= 2.6.14) and fall back to normal set */
123 if ( zserv_privs.change (ZPRIVS_RAISE) )
124 zlog_err ("routing_socket: Can't raise privileges");
125 ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE, &nl_rcvbufsize,
126 sizeof(nl_rcvbufsize));
127 if ( zserv_privs.change (ZPRIVS_LOWER) )
128 zlog_err ("routing_socket: Can't lower privileges");
130 ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &nl_rcvbufsize,
131 sizeof(nl_rcvbufsize));
134 zlog (NULL, LOG_ERR, "Can't set %s receive buffer size: %s", nl->name,
135 safe_strerror (errno));
139 ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen);
142 zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name,
143 safe_strerror (errno));
147 zlog (NULL, LOG_INFO,
148 "Setting netlink socket receive buffer size: %u -> %u",
153 /* Make socket for Linux netlink interface. */
155 netlink_socket (struct nlsock *nl, unsigned long groups, vrf_id_t vrf_id)
158 struct sockaddr_nl snl;
163 if (zserv_privs.change (ZPRIVS_RAISE))
165 zlog (NULL, LOG_ERR, "Can't raise privileges");
169 sock = vrf_socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE, vrf_id);
172 zlog (NULL, LOG_ERR, "Can't open %s socket: %s", nl->name,
173 safe_strerror (errno));
177 memset (&snl, 0, sizeof snl);
178 snl.nl_family = AF_NETLINK;
179 snl.nl_groups = groups;
181 /* Bind the socket to the netlink structure for anything. */
182 ret = bind (sock, (struct sockaddr *) &snl, sizeof snl);
184 if (zserv_privs.change (ZPRIVS_LOWER))
185 zlog (NULL, LOG_ERR, "Can't lower privileges");
189 zlog (NULL, LOG_ERR, "Can't bind %s socket to group 0x%x: %s",
190 nl->name, snl.nl_groups, safe_strerror (save_errno));
195 /* multiple netlink sockets will have different nl_pid */
196 namelen = sizeof snl;
197 ret = getsockname (sock, (struct sockaddr *) &snl, (socklen_t *) &namelen);
198 if (ret < 0 || namelen != sizeof snl)
200 zlog (NULL, LOG_ERR, "Can't get %s socket name: %s", nl->name,
201 safe_strerror (errno));
211 /* Get type specified information from netlink. */
213 netlink_request (int family, int type, struct nlsock *nl)
216 struct sockaddr_nl snl;
226 /* Check netlink socket. */
229 zlog (NULL, LOG_ERR, "%s socket isn't active.", nl->name);
233 memset (&snl, 0, sizeof snl);
234 snl.nl_family = AF_NETLINK;
236 memset (&req, 0, sizeof req);
237 req.nlh.nlmsg_len = sizeof req;
238 req.nlh.nlmsg_type = type;
239 req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
240 req.nlh.nlmsg_pid = nl->snl.nl_pid;
241 req.nlh.nlmsg_seq = ++nl->seq;
242 req.g.rtgen_family = family;
244 /* linux appears to check capabilities on every message
245 * have to raise caps for every message sent
247 if (zserv_privs.change (ZPRIVS_RAISE))
249 zlog (NULL, LOG_ERR, "Can't raise privileges");
253 ret = sendto (nl->sock, (void *) &req, sizeof req, 0,
254 (struct sockaddr *) &snl, sizeof snl);
257 if (zserv_privs.change (ZPRIVS_LOWER))
258 zlog (NULL, LOG_ERR, "Can't lower privileges");
262 zlog (NULL, LOG_ERR, "%s sendto failed: %s", nl->name,
263 safe_strerror (save_errno));
270 /* Receive message from netlink interface and pass those information
271 to the given function. */
273 netlink_parse_info (int (*filter) (struct sockaddr_nl *, struct nlmsghdr *,
275 struct nlsock *nl, struct zebra_vrf *zvrf)
284 .iov_base = nl_rcvbuf.p,
285 .iov_len = nl_rcvbuf.size,
287 struct sockaddr_nl snl;
288 struct msghdr msg = {
289 .msg_name = (void *) &snl,
290 .msg_namelen = sizeof snl,
296 status = recvmsg (nl->sock, &msg, 0);
301 if (errno == EWOULDBLOCK || errno == EAGAIN)
303 zlog (NULL, LOG_ERR, "%s recvmsg overrun: %s",
304 nl->name, safe_strerror(errno));
310 zlog (NULL, LOG_ERR, "%s EOF", nl->name);
314 if (msg.msg_namelen != sizeof snl)
316 zlog (NULL, LOG_ERR, "%s sender address length error: length %d",
317 nl->name, msg.msg_namelen);
321 for (h = (struct nlmsghdr *) nl_rcvbuf.p;
322 NLMSG_OK (h, (unsigned int) status);
323 h = NLMSG_NEXT (h, status))
325 /* Finish of reading. */
326 if (h->nlmsg_type == NLMSG_DONE)
329 /* Error handling. */
330 if (h->nlmsg_type == NLMSG_ERROR)
332 struct nlmsgerr *err = (struct nlmsgerr *) NLMSG_DATA (h);
333 int errnum = err->error;
334 int msg_type = err->msg.nlmsg_type;
336 /* If the error field is zero, then this is an ACK */
339 if (IS_ZEBRA_DEBUG_KERNEL)
341 zlog_debug ("%s: %s ACK: type=%s(%u), seq=%u, pid=%u",
342 __FUNCTION__, nl->name,
343 lookup (nlmsg_str, err->msg.nlmsg_type),
344 err->msg.nlmsg_type, err->msg.nlmsg_seq,
348 /* return if not a multipart message, otherwise continue */
349 if (!(h->nlmsg_flags & NLM_F_MULTI))
356 if (h->nlmsg_len < NLMSG_LENGTH (sizeof (struct nlmsgerr)))
358 zlog (NULL, LOG_ERR, "%s error: message truncated",
363 /* Deal with errors that occur because of races in link handling */
364 if (nl == &zvrf->netlink_cmd
365 && ((msg_type == RTM_DELROUTE &&
366 (-errnum == ENODEV || -errnum == ESRCH))
367 || (msg_type == RTM_NEWROUTE && -errnum == EEXIST)))
369 if (IS_ZEBRA_DEBUG_KERNEL)
370 zlog_debug ("%s: error: %s type=%s(%u), seq=%u, pid=%u",
371 nl->name, safe_strerror (-errnum),
372 lookup (nlmsg_str, msg_type),
373 msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid);
377 zlog_err ("%s error: %s, type=%s(%u), seq=%u, pid=%u",
378 nl->name, safe_strerror (-errnum),
379 lookup (nlmsg_str, msg_type),
380 msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid);
384 /* OK we got netlink message. */
385 if (IS_ZEBRA_DEBUG_KERNEL)
386 zlog_debug ("netlink_parse_info: %s type %s(%u), seq=%u, pid=%u",
388 lookup (nlmsg_str, h->nlmsg_type), h->nlmsg_type,
389 h->nlmsg_seq, h->nlmsg_pid);
391 /* skip unsolicited messages originating from command socket
392 * linux sets the originators port-id for {NEW|DEL}ADDR messages,
393 * so this has to be checked here. */
394 if (nl != &zvrf->netlink_cmd
395 && h->nlmsg_pid == zvrf->netlink_cmd.snl.nl_pid
396 && (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR))
398 if (IS_ZEBRA_DEBUG_KERNEL)
399 zlog_debug ("netlink_parse_info: %s packet comes from %s",
400 zvrf->netlink_cmd.name, nl->name);
404 error = (*filter) (&snl, h, zvrf->vrf_id);
407 zlog (NULL, LOG_ERR, "%s filter function error", nl->name);
412 /* After error care. */
413 if (msg.msg_flags & MSG_TRUNC)
415 zlog (NULL, LOG_ERR, "%s error: message truncated!", nl->name);
417 "Must restart with larger --nl-bufsize value!");
422 zlog (NULL, LOG_ERR, "%s error: data remnant size %d", nl->name,
430 /* Utility function for parse rtattr. */
432 netlink_parse_rtattr (struct rtattr **tb, int max, struct rtattr *rta,
435 while (RTA_OK (rta, len))
437 if (rta->rta_type <= max)
438 tb[rta->rta_type] = rta;
439 rta = RTA_NEXT (rta, len);
443 /* Utility function to parse hardware link-layer address and update ifp */
445 netlink_interface_update_hw_addr (struct rtattr **tb, struct interface *ifp)
449 if (tb[IFLA_ADDRESS])
453 hw_addr_len = RTA_PAYLOAD (tb[IFLA_ADDRESS]);
455 if (hw_addr_len > INTERFACE_HWADDR_MAX)
456 zlog_warn ("Hardware address is too large: %d", hw_addr_len);
459 ifp->hw_addr_len = hw_addr_len;
460 memcpy (ifp->hw_addr, RTA_DATA (tb[IFLA_ADDRESS]), hw_addr_len);
462 for (i = 0; i < hw_addr_len; i++)
463 if (ifp->hw_addr[i] != 0)
466 if (i == hw_addr_len)
467 ifp->hw_addr_len = 0;
469 ifp->hw_addr_len = hw_addr_len;
474 static enum zebra_link_type
475 netlink_to_zebra_link_type (unsigned int hwt)
479 case ARPHRD_ETHER: return ZEBRA_LLT_ETHER;
480 case ARPHRD_EETHER: return ZEBRA_LLT_EETHER;
481 case ARPHRD_AX25: return ZEBRA_LLT_AX25;
482 case ARPHRD_PRONET: return ZEBRA_LLT_PRONET;
483 case ARPHRD_IEEE802: return ZEBRA_LLT_IEEE802;
484 case ARPHRD_ARCNET: return ZEBRA_LLT_ARCNET;
485 case ARPHRD_APPLETLK: return ZEBRA_LLT_APPLETLK;
486 case ARPHRD_DLCI: return ZEBRA_LLT_DLCI;
487 case ARPHRD_ATM: return ZEBRA_LLT_ATM;
488 case ARPHRD_METRICOM: return ZEBRA_LLT_METRICOM;
489 case ARPHRD_IEEE1394: return ZEBRA_LLT_IEEE1394;
490 case ARPHRD_EUI64: return ZEBRA_LLT_EUI64;
491 case ARPHRD_INFINIBAND: return ZEBRA_LLT_INFINIBAND;
492 case ARPHRD_SLIP: return ZEBRA_LLT_SLIP;
493 case ARPHRD_CSLIP: return ZEBRA_LLT_CSLIP;
494 case ARPHRD_SLIP6: return ZEBRA_LLT_SLIP6;
495 case ARPHRD_CSLIP6: return ZEBRA_LLT_CSLIP6;
496 case ARPHRD_RSRVD: return ZEBRA_LLT_RSRVD;
497 case ARPHRD_ADAPT: return ZEBRA_LLT_ADAPT;
498 case ARPHRD_ROSE: return ZEBRA_LLT_ROSE;
499 case ARPHRD_X25: return ZEBRA_LLT_X25;
500 case ARPHRD_PPP: return ZEBRA_LLT_PPP;
501 case ARPHRD_CISCO: return ZEBRA_LLT_CHDLC;
502 case ARPHRD_LAPB: return ZEBRA_LLT_LAPB;
503 case ARPHRD_RAWHDLC: return ZEBRA_LLT_RAWHDLC;
504 case ARPHRD_TUNNEL: return ZEBRA_LLT_IPIP;
505 case ARPHRD_TUNNEL6: return ZEBRA_LLT_IPIP6;
506 case ARPHRD_FRAD: return ZEBRA_LLT_FRAD;
507 case ARPHRD_SKIP: return ZEBRA_LLT_SKIP;
508 case ARPHRD_LOOPBACK: return ZEBRA_LLT_LOOPBACK;
509 case ARPHRD_LOCALTLK: return ZEBRA_LLT_LOCALTLK;
510 case ARPHRD_FDDI: return ZEBRA_LLT_FDDI;
511 case ARPHRD_SIT: return ZEBRA_LLT_SIT;
512 case ARPHRD_IPDDP: return ZEBRA_LLT_IPDDP;
513 case ARPHRD_IPGRE: return ZEBRA_LLT_IPGRE;
514 case ARPHRD_PIMREG: return ZEBRA_LLT_PIMREG;
515 case ARPHRD_HIPPI: return ZEBRA_LLT_HIPPI;
516 case ARPHRD_ECONET: return ZEBRA_LLT_ECONET;
517 case ARPHRD_IRDA: return ZEBRA_LLT_IRDA;
518 case ARPHRD_FCPP: return ZEBRA_LLT_FCPP;
519 case ARPHRD_FCAL: return ZEBRA_LLT_FCAL;
520 case ARPHRD_FCPL: return ZEBRA_LLT_FCPL;
521 case ARPHRD_FCFABRIC: return ZEBRA_LLT_FCFABRIC;
522 case ARPHRD_IEEE802_TR: return ZEBRA_LLT_IEEE802_TR;
523 case ARPHRD_IEEE80211: return ZEBRA_LLT_IEEE80211;
524 case ARPHRD_IEEE802154: return ZEBRA_LLT_IEEE802154;
526 case ARPHRD_IP6GRE: return ZEBRA_LLT_IP6GRE;
528 #ifdef ARPHRD_IEEE802154_PHY
529 case ARPHRD_IEEE802154_PHY: return ZEBRA_LLT_IEEE802154_PHY;
532 default: return ZEBRA_LLT_UNKNOWN;
536 /* Called from interface_lookup_netlink(). This function is only used
539 netlink_interface (struct sockaddr_nl *snl, struct nlmsghdr *h,
543 struct ifinfomsg *ifi;
544 struct rtattr *tb[IFLA_MAX + 1];
545 struct interface *ifp;
548 ifi = NLMSG_DATA (h);
550 if (h->nlmsg_type != RTM_NEWLINK)
553 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg));
557 /* Looking up interface name. */
558 memset (tb, 0, sizeof tb);
559 netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len);
562 /* check for wireless messages to ignore */
563 if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0))
565 if (IS_ZEBRA_DEBUG_KERNEL)
566 zlog_debug ("%s: ignoring IFLA_WIRELESS message", __func__);
569 #endif /* IFLA_WIRELESS */
571 if (tb[IFLA_IFNAME] == NULL)
573 name = (char *) RTA_DATA (tb[IFLA_IFNAME]);
576 ifp = if_get_by_name_vrf (name, vrf_id);
577 set_ifindex(ifp, ifi->ifi_index);
578 ifp->flags = ifi->ifi_flags & 0x0000fffff;
579 ifp->mtu6 = ifp->mtu = *(uint32_t *) RTA_DATA (tb[IFLA_MTU]);
582 /* Hardware type and address. */
583 ifp->ll_type = netlink_to_zebra_link_type (ifi->ifi_type);
584 netlink_interface_update_hw_addr (tb, ifp);
591 /* Lookup interface IPv4/IPv6 address. */
593 netlink_interface_addr (struct sockaddr_nl *snl, struct nlmsghdr *h,
597 struct ifaddrmsg *ifa;
598 struct rtattr *tb[IFA_MAX + 1];
599 struct interface *ifp;
605 ifa = NLMSG_DATA (h);
607 if (ifa->ifa_family != AF_INET
609 && ifa->ifa_family != AF_INET6
610 #endif /* HAVE_IPV6 */
614 if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR)
617 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifaddrmsg));
621 memset (tb, 0, sizeof tb);
622 netlink_parse_rtattr (tb, IFA_MAX, IFA_RTA (ifa), len);
624 ifp = if_lookup_by_index_vrf (ifa->ifa_index, vrf_id);
627 zlog_err ("netlink_interface_addr can't find interface by index %d vrf %u",
628 ifa->ifa_index, vrf_id);
632 if (IS_ZEBRA_DEBUG_KERNEL) /* remove this line to see initial ifcfg */
635 zlog_debug ("netlink_interface_addr %s %s vrf %u:",
636 lookup (nlmsg_str, h->nlmsg_type), ifp->name, vrf_id);
638 zlog_debug (" IFA_LOCAL %s/%d",
639 inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_LOCAL]),
640 buf, BUFSIZ), ifa->ifa_prefixlen);
642 zlog_debug (" IFA_ADDRESS %s/%d",
643 inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_ADDRESS]),
644 buf, BUFSIZ), ifa->ifa_prefixlen);
645 if (tb[IFA_BROADCAST])
646 zlog_debug (" IFA_BROADCAST %s/%d",
647 inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_BROADCAST]),
648 buf, BUFSIZ), ifa->ifa_prefixlen);
649 if (tb[IFA_LABEL] && strcmp (ifp->name, RTA_DATA (tb[IFA_LABEL])))
650 zlog_debug (" IFA_LABEL %s", (char *)RTA_DATA (tb[IFA_LABEL]));
652 if (tb[IFA_CACHEINFO])
654 struct ifa_cacheinfo *ci = RTA_DATA (tb[IFA_CACHEINFO]);
655 zlog_debug (" IFA_CACHEINFO pref %d, valid %d",
656 ci->ifa_prefered, ci->ifa_valid);
660 /* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */
661 if (tb[IFA_LOCAL] == NULL)
662 tb[IFA_LOCAL] = tb[IFA_ADDRESS];
663 if (tb[IFA_ADDRESS] == NULL)
664 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
666 /* local interface address */
667 addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL);
669 /* is there a peer address? */
670 if (tb[IFA_ADDRESS] &&
671 memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), RTA_PAYLOAD(tb[IFA_ADDRESS])))
673 broad = RTA_DATA(tb[IFA_ADDRESS]);
674 SET_FLAG (flags, ZEBRA_IFA_PEER);
677 /* seeking a broadcast address */
678 broad = (tb[IFA_BROADCAST] ? RTA_DATA(tb[IFA_BROADCAST]) : NULL);
680 /* addr is primary key, SOL if we don't have one */
683 zlog_debug ("%s: NULL address", __func__);
688 if (ifa->ifa_flags & IFA_F_SECONDARY)
689 SET_FLAG (flags, ZEBRA_IFA_SECONDARY);
693 label = (char *) RTA_DATA (tb[IFA_LABEL]);
695 if (ifp && label && strcmp (ifp->name, label) == 0)
698 /* Register interface address to the interface. */
699 if (ifa->ifa_family == AF_INET)
701 if (h->nlmsg_type == RTM_NEWADDR)
702 connected_add_ipv4 (ifp, flags,
703 (struct in_addr *) addr, ifa->ifa_prefixlen,
704 (struct in_addr *) broad, label);
706 connected_delete_ipv4 (ifp, flags,
707 (struct in_addr *) addr, ifa->ifa_prefixlen,
708 (struct in_addr *) broad);
711 if (ifa->ifa_family == AF_INET6)
713 if (h->nlmsg_type == RTM_NEWADDR)
714 connected_add_ipv6 (ifp, flags,
715 (struct in6_addr *) addr, ifa->ifa_prefixlen,
716 (struct in6_addr *) broad, label);
718 connected_delete_ipv6 (ifp,
719 (struct in6_addr *) addr, ifa->ifa_prefixlen,
720 (struct in6_addr *) broad);
722 #endif /* HAVE_IPV6 */
727 /* Looking up routing table by netlink interface. */
729 netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h,
734 struct rtattr *tb[RTA_MAX + 1];
737 char anyaddr[16] = { 0 };
747 rtm = NLMSG_DATA (h);
749 if (h->nlmsg_type != RTM_NEWROUTE)
751 if (rtm->rtm_type != RTN_UNICAST)
754 table = rtm->rtm_table;
755 #if 0 /* we weed them out later in rib_weed_tables () */
756 if (table != RT_TABLE_MAIN && table != zebrad.rtm_table_default)
760 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct rtmsg));
764 memset (tb, 0, sizeof tb);
765 netlink_parse_rtattr (tb, RTA_MAX, RTM_RTA (rtm), len);
767 if (rtm->rtm_flags & RTM_F_CLONED)
769 if (rtm->rtm_protocol == RTPROT_REDIRECT)
771 if (rtm->rtm_protocol == RTPROT_KERNEL)
774 if (rtm->rtm_src_len != 0)
777 /* Route which inserted by Zebra. */
778 if (rtm->rtm_protocol == RTPROT_ZEBRA)
779 flags |= ZEBRA_FLAG_SELFROUTE;
787 index = *(int *) RTA_DATA (tb[RTA_OIF]);
790 dest = RTA_DATA (tb[RTA_DST]);
795 src = RTA_DATA (tb[RTA_PREFSRC]);
798 gate = RTA_DATA (tb[RTA_GATEWAY]);
802 struct rtattr *mxrta[RTAX_MAX+1];
804 memset (mxrta, 0, sizeof mxrta);
805 netlink_parse_rtattr (mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
806 RTA_PAYLOAD(tb[RTA_METRICS]));
809 mtu = *(u_int32_t *) RTA_DATA(mxrta[RTAX_MTU]);
812 if (rtm->rtm_family == AF_INET)
814 struct prefix_ipv4 p;
816 memcpy (&p.prefix, dest, 4);
817 p.prefixlen = rtm->rtm_dst_len;
819 if (!tb[RTA_MULTIPATH])
820 rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, flags, &p, gate, src, index,
821 vrf_id, table, 0, mtu, 0, SAFI_UNICAST);
824 /* This is a multipath route */
827 struct rtnexthop *rtnh =
828 (struct rtnexthop *) RTA_DATA (tb[RTA_MULTIPATH]);
830 len = RTA_PAYLOAD (tb[RTA_MULTIPATH]);
832 rib = XCALLOC (MTYPE_RIB, sizeof (struct rib));
833 rib->type = ZEBRA_ROUTE_KERNEL;
838 rib->vrf_id = vrf_id;
840 rib->nexthop_num = 0;
841 rib->uptime = time (NULL);
845 if (len < (int) sizeof (*rtnh) || rtnh->rtnh_len > len)
848 index = rtnh->rtnh_ifindex;
850 if (rtnh->rtnh_len > sizeof (*rtnh))
852 memset (tb, 0, sizeof (tb));
853 netlink_parse_rtattr (tb, RTA_MAX, RTNH_DATA (rtnh),
854 rtnh->rtnh_len - sizeof (*rtnh));
856 gate = RTA_DATA (tb[RTA_GATEWAY]);
862 rib_nexthop_ipv4_ifindex_add (rib, gate, src, index);
864 rib_nexthop_ipv4_add (rib, gate, src);
867 rib_nexthop_ifindex_add (rib, index);
869 len -= NLMSG_ALIGN(rtnh->rtnh_len);
870 rtnh = RTNH_NEXT(rtnh);
873 if (rib->nexthop_num == 0)
874 XFREE (MTYPE_RIB, rib);
876 rib_add_ipv4_multipath (&p, rib, SAFI_UNICAST);
880 if (rtm->rtm_family == AF_INET6)
882 struct prefix_ipv6 p;
884 memcpy (&p.prefix, dest, 16);
885 p.prefixlen = rtm->rtm_dst_len;
887 rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, flags, &p, gate, index, vrf_id,
888 table, 0, mtu, 0, SAFI_UNICAST);
890 #endif /* HAVE_IPV6 */
895 static const struct message rtproto_str[] = {
896 {RTPROT_REDIRECT, "redirect"},
897 {RTPROT_KERNEL, "kernel"},
898 {RTPROT_BOOT, "boot"},
899 {RTPROT_STATIC, "static"},
900 {RTPROT_GATED, "GateD"},
901 {RTPROT_RA, "router advertisement"},
903 {RTPROT_ZEBRA, "Zebra"},
905 {RTPROT_BIRD, "BIRD"},
906 #endif /* RTPROT_BIRD */
910 /* Routing information change from the kernel. */
912 netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h,
917 struct rtattr *tb[RTA_MAX + 1];
918 u_char zebra_flags = 0;
920 char anyaddr[16] = { 0 };
930 rtm = NLMSG_DATA (h);
932 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE))
934 /* If this is not route add/delete message print warning. */
935 zlog_warn ("Kernel message: %d vrf %u\n", h->nlmsg_type, vrf_id);
939 /* Connected route. */
940 if (IS_ZEBRA_DEBUG_KERNEL)
941 zlog_debug ("%s %s %s proto %s vrf %u",
943 RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE",
944 rtm->rtm_family == AF_INET ? "ipv4" : "ipv6",
945 rtm->rtm_type == RTN_UNICAST ? "unicast" : "multicast",
946 lookup (rtproto_str, rtm->rtm_protocol),
949 if (rtm->rtm_type != RTN_UNICAST)
954 table = rtm->rtm_table;
955 if (table != RT_TABLE_MAIN && table != zebrad.rtm_table_default)
960 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct rtmsg));
964 memset (tb, 0, sizeof tb);
965 netlink_parse_rtattr (tb, RTA_MAX, RTM_RTA (rtm), len);
967 if (rtm->rtm_flags & RTM_F_CLONED)
969 if (rtm->rtm_protocol == RTPROT_REDIRECT)
971 if (rtm->rtm_protocol == RTPROT_KERNEL)
974 if (rtm->rtm_protocol == RTPROT_ZEBRA && h->nlmsg_type == RTM_NEWROUTE)
976 if (rtm->rtm_protocol == RTPROT_ZEBRA)
977 SET_FLAG(zebra_flags, ZEBRA_FLAG_SELFROUTE);
979 if (rtm->rtm_src_len != 0)
981 zlog_warn ("netlink_route_change(): no src len, vrf %u", vrf_id);
991 index = *(int *) RTA_DATA (tb[RTA_OIF]);
994 dest = RTA_DATA (tb[RTA_DST]);
999 gate = RTA_DATA (tb[RTA_GATEWAY]);
1001 if (tb[RTA_PREFSRC])
1002 src = RTA_DATA (tb[RTA_PREFSRC]);
1004 if (h->nlmsg_type == RTM_NEWROUTE)
1006 if (tb[RTA_METRICS])
1008 struct rtattr *mxrta[RTAX_MAX+1];
1010 memset (mxrta, 0, sizeof mxrta);
1011 netlink_parse_rtattr (mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
1012 RTA_PAYLOAD(tb[RTA_METRICS]));
1014 if (mxrta[RTAX_MTU])
1015 mtu = *(u_int32_t *) RTA_DATA(mxrta[RTAX_MTU]);
1019 if (rtm->rtm_family == AF_INET)
1021 struct prefix_ipv4 p;
1023 memcpy (&p.prefix, dest, 4);
1024 p.prefixlen = rtm->rtm_dst_len;
1026 if (IS_ZEBRA_DEBUG_KERNEL)
1028 char buf[PREFIX_STRLEN];
1029 zlog_debug ("%s %s vrf %u",
1030 h->nlmsg_type == RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE",
1031 prefix2str (&p, buf, sizeof(buf)), vrf_id);
1034 if (h->nlmsg_type == RTM_NEWROUTE)
1036 if (!tb[RTA_MULTIPATH])
1037 rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, src, index, vrf_id,
1038 table, 0, mtu, 0, SAFI_UNICAST);
1041 /* This is a multipath route */
1044 struct rtnexthop *rtnh =
1045 (struct rtnexthop *) RTA_DATA (tb[RTA_MULTIPATH]);
1047 len = RTA_PAYLOAD (tb[RTA_MULTIPATH]);
1049 rib = XCALLOC (MTYPE_RIB, sizeof (struct rib));
1050 rib->type = ZEBRA_ROUTE_KERNEL;
1055 rib->vrf_id = vrf_id;
1057 rib->nexthop_num = 0;
1058 rib->uptime = time (NULL);
1062 if (len < (int) sizeof (*rtnh) || rtnh->rtnh_len > len)
1065 index = rtnh->rtnh_ifindex;
1067 if (rtnh->rtnh_len > sizeof (*rtnh))
1069 memset (tb, 0, sizeof (tb));
1070 netlink_parse_rtattr (tb, RTA_MAX, RTNH_DATA (rtnh),
1071 rtnh->rtnh_len - sizeof (*rtnh));
1072 if (tb[RTA_GATEWAY])
1073 gate = RTA_DATA (tb[RTA_GATEWAY]);
1079 rib_nexthop_ipv4_ifindex_add (rib, gate, src, index);
1081 rib_nexthop_ipv4_add (rib, gate, src);
1084 rib_nexthop_ifindex_add (rib, index);
1086 len -= NLMSG_ALIGN(rtnh->rtnh_len);
1087 rtnh = RTNH_NEXT(rtnh);
1090 if (rib->nexthop_num == 0)
1091 XFREE (MTYPE_RIB, rib);
1093 rib_add_ipv4_multipath (&p, rib, SAFI_UNICAST);
1097 rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p, gate,
1098 index, vrf_id, SAFI_UNICAST);
1102 if (rtm->rtm_family == AF_INET6)
1104 struct prefix_ipv6 p;
1106 p.family = AF_INET6;
1107 memcpy (&p.prefix, dest, 16);
1108 p.prefixlen = rtm->rtm_dst_len;
1110 if (IS_ZEBRA_DEBUG_KERNEL)
1112 char buf[PREFIX_STRLEN];
1113 zlog_debug ("%s %s vrf %u",
1114 h->nlmsg_type == RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE",
1115 prefix2str (&p, buf, sizeof(buf)), vrf_id);
1118 if (h->nlmsg_type == RTM_NEWROUTE)
1119 rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, vrf_id, table,
1120 0, mtu, 0, SAFI_UNICAST);
1122 rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p, gate, index, vrf_id,
1125 #endif /* HAVE_IPV6 */
1131 netlink_link_change (struct sockaddr_nl *snl, struct nlmsghdr *h,
1135 struct ifinfomsg *ifi;
1136 struct rtattr *tb[IFLA_MAX + 1];
1137 struct interface *ifp;
1140 ifi = NLMSG_DATA (h);
1142 if (!(h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK))
1144 /* If this is not link add/delete message so print warning. */
1145 zlog_warn ("netlink_link_change: wrong kernel message %d vrf %u\n",
1146 h->nlmsg_type, vrf_id);
1150 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg));
1154 /* Looking up interface name. */
1155 memset (tb, 0, sizeof tb);
1156 netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len);
1158 #ifdef IFLA_WIRELESS
1159 /* check for wireless messages to ignore */
1160 if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0))
1162 if (IS_ZEBRA_DEBUG_KERNEL)
1163 zlog_debug ("%s: ignoring IFLA_WIRELESS message, vrf %u", __func__,
1167 #endif /* IFLA_WIRELESS */
1169 if (tb[IFLA_IFNAME] == NULL)
1171 name = (char *) RTA_DATA (tb[IFLA_IFNAME]);
1173 /* Add interface. */
1174 if (h->nlmsg_type == RTM_NEWLINK)
1176 ifp = if_lookup_by_name_vrf (name, vrf_id);
1178 if (ifp == NULL || !CHECK_FLAG (ifp->status, ZEBRA_INTERFACE_ACTIVE))
1181 ifp = if_get_by_name_vrf (name, vrf_id);
1183 set_ifindex(ifp, ifi->ifi_index);
1184 ifp->flags = ifi->ifi_flags & 0x0000fffff;
1185 ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]);
1188 netlink_interface_update_hw_addr (tb, ifp);
1190 /* If new link is added. */
1191 if_add_update (ifp);
1195 /* Interface status change. */
1196 set_ifindex(ifp, ifi->ifi_index);
1197 ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]);
1200 netlink_interface_update_hw_addr (tb, ifp);
1202 if (if_is_operative (ifp))
1204 ifp->flags = ifi->ifi_flags & 0x0000fffff;
1205 if (!if_is_operative (ifp))
1208 /* Must notify client daemons of new interface status. */
1209 zebra_interface_up_update (ifp);
1213 ifp->flags = ifi->ifi_flags & 0x0000fffff;
1214 if (if_is_operative (ifp))
1222 ifp = if_lookup_by_name_vrf (name, vrf_id);
1226 zlog_warn ("interface %s vrf %u is deleted but can't find",
1231 if_delete_update (ifp);
1238 netlink_information_fetch (struct sockaddr_nl *snl, struct nlmsghdr *h,
1241 /* JF: Ignore messages that aren't from the kernel */
1242 if ( snl->nl_pid != 0 )
1244 zlog ( NULL, LOG_ERR, "Ignoring message from pid %u", snl->nl_pid );
1248 switch (h->nlmsg_type)
1251 return netlink_route_change (snl, h, vrf_id);
1254 return netlink_route_change (snl, h, vrf_id);
1257 return netlink_link_change (snl, h, vrf_id);
1260 return netlink_link_change (snl, h, vrf_id);
1263 return netlink_interface_addr (snl, h, vrf_id);
1266 return netlink_interface_addr (snl, h, vrf_id);
1269 zlog_warn ("Unknown netlink nlmsg_type %d vrf %u\n", h->nlmsg_type,
1276 /* Interface lookup by netlink socket. */
1278 interface_lookup_netlink (struct zebra_vrf *zvrf)
1282 /* Get interface information. */
1283 ret = netlink_request (AF_PACKET, RTM_GETLINK, &zvrf->netlink_cmd);
1286 ret = netlink_parse_info (netlink_interface, &zvrf->netlink_cmd, zvrf);
1290 /* Get IPv4 address of the interfaces. */
1291 ret = netlink_request (AF_INET, RTM_GETADDR, &zvrf->netlink_cmd);
1294 ret = netlink_parse_info (netlink_interface_addr, &zvrf->netlink_cmd, zvrf);
1299 /* Get IPv6 address of the interfaces. */
1300 ret = netlink_request (AF_INET6, RTM_GETADDR, &zvrf->netlink_cmd);
1303 ret = netlink_parse_info (netlink_interface_addr, &zvrf->netlink_cmd, zvrf);
1306 #endif /* HAVE_IPV6 */
1311 /* Routing table read function using netlink interface. Only called
1314 netlink_route_read (struct zebra_vrf *zvrf)
1318 /* Get IPv4 routing table. */
1319 ret = netlink_request (AF_INET, RTM_GETROUTE, &zvrf->netlink_cmd);
1322 ret = netlink_parse_info (netlink_routing_table, &zvrf->netlink_cmd, zvrf);
1327 /* Get IPv6 routing table. */
1328 ret = netlink_request (AF_INET6, RTM_GETROUTE, &zvrf->netlink_cmd);
1331 ret = netlink_parse_info (netlink_routing_table, &zvrf->netlink_cmd, zvrf);
1334 #endif /* HAVE_IPV6 */
1339 /* Utility function comes from iproute2.
1340 Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */
1342 addattr_l (struct nlmsghdr *n, size_t maxlen, int type, void *data, size_t alen)
1347 len = RTA_LENGTH (alen);
1349 if (NLMSG_ALIGN (n->nlmsg_len) + len > maxlen)
1352 rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len));
1353 rta->rta_type = type;
1355 memcpy (RTA_DATA (rta), data, alen);
1356 n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + len;
1362 rta_addattr_l (struct rtattr *rta, size_t maxlen, int type, void *data,
1366 struct rtattr *subrta;
1368 len = RTA_LENGTH (alen);
1370 if (RTA_ALIGN (rta->rta_len) + len > maxlen)
1373 subrta = (struct rtattr *) (((char *) rta) + RTA_ALIGN (rta->rta_len));
1374 subrta->rta_type = type;
1375 subrta->rta_len = len;
1376 memcpy (RTA_DATA (subrta), data, alen);
1377 rta->rta_len = NLMSG_ALIGN (rta->rta_len) + len;
1382 /* Utility function comes from iproute2.
1383 Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */
1385 addattr32 (struct nlmsghdr *n, size_t maxlen, int type, int data)
1390 len = RTA_LENGTH (4);
1392 if (NLMSG_ALIGN (n->nlmsg_len) + len > maxlen)
1395 rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len));
1396 rta->rta_type = type;
1398 memcpy (RTA_DATA (rta), &data, 4);
1399 n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + len;
1405 netlink_talk_filter (struct sockaddr_nl *snl, struct nlmsghdr *h,
1408 zlog_warn ("netlink_talk: ignoring message type 0x%04x vrf %u", h->nlmsg_type,
1413 /* sendmsg() to netlink socket then recvmsg(). */
1415 netlink_talk (struct nlmsghdr *n, struct nlsock *nl, struct zebra_vrf *zvrf)
1418 struct sockaddr_nl snl;
1419 struct iovec iov = {
1420 .iov_base = (void *) n,
1421 .iov_len = n->nlmsg_len
1423 struct msghdr msg = {
1424 .msg_name = (void *) &snl,
1425 .msg_namelen = sizeof snl,
1431 memset (&snl, 0, sizeof snl);
1432 snl.nl_family = AF_NETLINK;
1434 n->nlmsg_seq = ++nl->seq;
1436 /* Request an acknowledgement by setting NLM_F_ACK */
1437 n->nlmsg_flags |= NLM_F_ACK;
1439 if (IS_ZEBRA_DEBUG_KERNEL)
1440 zlog_debug ("netlink_talk: %s type %s(%u), seq=%u", nl->name,
1441 lookup (nlmsg_str, n->nlmsg_type), n->nlmsg_type,
1444 /* Send message to netlink interface. */
1445 if (zserv_privs.change (ZPRIVS_RAISE))
1446 zlog (NULL, LOG_ERR, "Can't raise privileges");
1447 status = sendmsg (nl->sock, &msg, 0);
1449 if (zserv_privs.change (ZPRIVS_LOWER))
1450 zlog (NULL, LOG_ERR, "Can't lower privileges");
1454 zlog (NULL, LOG_ERR, "netlink_talk sendmsg() error: %s",
1455 safe_strerror (save_errno));
1461 * Get reply from netlink socket.
1462 * The reply should either be an acknowlegement or an error.
1464 return netlink_parse_info (netlink_talk_filter, nl, zvrf);
1467 /* This function takes a nexthop as argument and adds
1468 * the appropriate netlink attributes to an existing
1471 * @param routedesc: Human readable description of route type
1472 * (direct/recursive, single-/multipath)
1473 * @param bytelen: Length of addresses in bytes.
1474 * @param nexthop: Nexthop information
1475 * @param nlmsg: nlmsghdr structure to fill in.
1476 * @param req_size: The size allocated for the message.
1479 _netlink_route_build_singlepath(
1480 const char *routedesc,
1482 struct nexthop *nexthop,
1483 struct nlmsghdr *nlmsg,
1484 struct rtmsg *rtmsg,
1487 if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ONLINK))
1488 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1489 if (nexthop->type == NEXTHOP_TYPE_IPV4
1490 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
1492 addattr_l (nlmsg, req_size, RTA_GATEWAY,
1493 &nexthop->gate.ipv4, bytelen);
1494 if (nexthop->src.ipv4.s_addr)
1495 addattr_l (nlmsg, req_size, RTA_PREFSRC,
1496 &nexthop->src.ipv4, bytelen);
1498 if (IS_ZEBRA_DEBUG_KERNEL)
1499 zlog_debug("netlink_route_multipath() (%s): "
1500 "nexthop via %s if %u",
1502 inet_ntoa (nexthop->gate.ipv4),
1506 if (nexthop->type == NEXTHOP_TYPE_IPV6
1507 || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
1508 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
1510 addattr_l (nlmsg, req_size, RTA_GATEWAY,
1511 &nexthop->gate.ipv6, bytelen);
1513 if (IS_ZEBRA_DEBUG_KERNEL)
1514 zlog_debug("netlink_route_multipath() (%s): "
1515 "nexthop via %s if %u",
1517 inet6_ntoa (nexthop->gate.ipv6),
1520 #endif /* HAVE_IPV6 */
1521 if (nexthop->type == NEXTHOP_TYPE_IFINDEX
1522 || nexthop->type == NEXTHOP_TYPE_IFNAME
1523 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
1525 addattr32 (nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1527 if (nexthop->src.ipv4.s_addr)
1528 addattr_l (nlmsg, req_size, RTA_PREFSRC,
1529 &nexthop->src.ipv4, bytelen);
1531 if (IS_ZEBRA_DEBUG_KERNEL)
1532 zlog_debug("netlink_route_multipath() (%s): "
1533 "nexthop via if %u", routedesc, nexthop->ifindex);
1536 if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX
1537 || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME)
1539 addattr32 (nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1541 if (IS_ZEBRA_DEBUG_KERNEL)
1542 zlog_debug("netlink_route_multipath() (%s): "
1543 "nexthop via if %u", routedesc, nexthop->ifindex);
1547 /* This function takes a nexthop as argument and
1548 * appends to the given rtattr/rtnexthop pair the
1549 * representation of the nexthop. If the nexthop
1550 * defines a preferred source, the src parameter
1551 * will be modified to point to that src, otherwise
1552 * it will be kept unmodified.
1554 * @param routedesc: Human readable description of route type
1555 * (direct/recursive, single-/multipath)
1556 * @param bytelen: Length of addresses in bytes.
1557 * @param nexthop: Nexthop information
1558 * @param rta: rtnetlink attribute structure
1559 * @param rtnh: pointer to an rtnetlink nexthop structure
1560 * @param src: pointer pointing to a location where
1561 * the prefsrc should be stored.
1564 _netlink_route_build_multipath(
1565 const char *routedesc,
1567 struct nexthop *nexthop,
1569 struct rtnexthop *rtnh,
1573 rtnh->rtnh_len = sizeof (*rtnh);
1574 rtnh->rtnh_flags = 0;
1575 rtnh->rtnh_hops = 0;
1576 rta->rta_len += rtnh->rtnh_len;
1578 if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ONLINK))
1579 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1581 if (nexthop->type == NEXTHOP_TYPE_IPV4
1582 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
1584 rta_addattr_l (rta, NL_PKT_BUF_SIZE, RTA_GATEWAY,
1585 &nexthop->gate.ipv4, bytelen);
1586 rtnh->rtnh_len += sizeof (struct rtattr) + bytelen;
1588 if (nexthop->src.ipv4.s_addr)
1589 *src = &nexthop->src;
1591 if (IS_ZEBRA_DEBUG_KERNEL)
1592 zlog_debug("netlink_route_multipath() (%s): "
1593 "nexthop via %s if %u",
1595 inet_ntoa (nexthop->gate.ipv4),
1599 if (nexthop->type == NEXTHOP_TYPE_IPV6
1600 || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
1601 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
1603 rta_addattr_l (rta, NL_PKT_BUF_SIZE, RTA_GATEWAY,
1604 &nexthop->gate.ipv6, bytelen);
1605 rtnh->rtnh_len += sizeof (struct rtattr) + bytelen;
1607 if (IS_ZEBRA_DEBUG_KERNEL)
1608 zlog_debug("netlink_route_multipath() (%s): "
1609 "nexthop via %s if %u",
1611 inet6_ntoa (nexthop->gate.ipv6),
1614 #endif /* HAVE_IPV6 */
1616 if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX
1617 || nexthop->type == NEXTHOP_TYPE_IFINDEX
1618 || nexthop->type == NEXTHOP_TYPE_IFNAME)
1620 rtnh->rtnh_ifindex = nexthop->ifindex;
1621 if (nexthop->src.ipv4.s_addr)
1622 *src = &nexthop->src;
1623 if (IS_ZEBRA_DEBUG_KERNEL)
1624 zlog_debug("netlink_route_multipath() (%s): "
1625 "nexthop via if %u", routedesc, nexthop->ifindex);
1627 else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
1628 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
1630 rtnh->rtnh_ifindex = nexthop->ifindex;
1632 if (IS_ZEBRA_DEBUG_KERNEL)
1633 zlog_debug("netlink_route_multipath() (%s): "
1634 "nexthop via if %u", routedesc, nexthop->ifindex);
1638 rtnh->rtnh_ifindex = 0;
1642 /* Log debug information for netlink_route_multipath
1643 * if debug logging is enabled.
1645 * @param cmd: Netlink command which is to be processed
1646 * @param p: Prefix for which the change is due
1647 * @param nexthop: Nexthop which is currently processed
1648 * @param routedesc: Semantic annotation for nexthop
1649 * (recursive, multipath, etc.)
1650 * @param family: Address family which the change concerns
1653 _netlink_route_debug(
1656 struct nexthop *nexthop,
1657 const char *routedesc,
1659 struct zebra_vrf *zvrf)
1661 if (IS_ZEBRA_DEBUG_KERNEL)
1663 char buf[PREFIX_STRLEN];
1664 zlog_debug ("netlink_route_multipath() (%s): %s %s vrf %u type %s",
1666 lookup (nlmsg_str, cmd),
1667 prefix2str (p, buf, sizeof(buf)),
1669 nexthop_type_to_str (nexthop->type));
1673 /* Routing table change via netlink interface. */
1675 netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib)
1678 struct sockaddr_nl snl;
1679 struct nexthop *nexthop = NULL, *tnexthop;
1683 int family = PREFIX_FAMILY(p);
1684 const char *routedesc;
1690 char buf[NL_PKT_BUF_SIZE];
1693 struct zebra_vrf *zvrf = vrf_info_lookup (rib->vrf_id);
1695 memset (&req, 0, sizeof req - NL_PKT_BUF_SIZE);
1697 bytelen = (family == AF_INET ? 4 : 16);
1699 req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg));
1700 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
1701 req.n.nlmsg_type = cmd;
1702 req.r.rtm_family = family;
1703 req.r.rtm_table = rib->table;
1704 req.r.rtm_dst_len = p->prefixlen;
1705 req.r.rtm_protocol = RTPROT_ZEBRA;
1706 req.r.rtm_scope = RT_SCOPE_LINK;
1708 if ((rib->flags & ZEBRA_FLAG_BLACKHOLE) || (rib->flags & ZEBRA_FLAG_REJECT))
1713 if (cmd == RTM_NEWROUTE)
1717 if (rib->flags & ZEBRA_FLAG_BLACKHOLE)
1718 req.r.rtm_type = RTN_BLACKHOLE;
1719 else if (rib->flags & ZEBRA_FLAG_REJECT)
1720 req.r.rtm_type = RTN_UNREACHABLE;
1722 assert (RTN_BLACKHOLE != RTN_UNREACHABLE); /* false */
1725 req.r.rtm_type = RTN_UNICAST;
1728 addattr_l (&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen);
1731 addattr32 (&req.n, sizeof req, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
1733 if (rib->mtu || rib->nexthop_mtu)
1735 char buf[NL_PKT_BUF_SIZE];
1736 struct rtattr *rta = (void *) buf;
1737 u_int32_t mtu = rib->mtu;
1738 if (!mtu || (rib->nexthop_mtu && rib->nexthop_mtu < mtu))
1739 mtu = rib->nexthop_mtu;
1740 rta->rta_type = RTA_METRICS;
1741 rta->rta_len = RTA_LENGTH(0);
1742 rta_addattr_l (rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu);
1743 addattr_l (&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA (rta),
1749 if (cmd == RTM_NEWROUTE)
1750 for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
1752 /* We shouldn't encounter recursive nexthops on discard routes,
1753 * but it is probably better to handle that case correctly anyway.
1755 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1757 SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
1762 /* Count overall nexthops so we can decide whether to use singlepath
1763 * or multipath case. */
1765 for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
1767 if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1769 if (cmd == RTM_NEWROUTE && !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
1771 if (cmd == RTM_DELROUTE && !CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
1774 if (nexthop->type != NEXTHOP_TYPE_IFINDEX &&
1775 nexthop->type != NEXTHOP_TYPE_IFNAME)
1776 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1781 /* Singlepath case. */
1782 if (nexthop_num == 1 || MULTIPATH_NUM == 1)
1785 for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
1787 if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1790 if ((cmd == RTM_NEWROUTE
1791 && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE))
1792 || (cmd == RTM_DELROUTE
1793 && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)))
1795 routedesc = recursing ? "recursive, 1 hop" : "single hop";
1797 _netlink_route_debug(cmd, p, nexthop, routedesc, family, zvrf);
1798 _netlink_route_build_singlepath(routedesc, bytelen,
1799 nexthop, &req.n, &req.r,
1802 if (cmd == RTM_NEWROUTE)
1803 SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
1812 char buf[NL_PKT_BUF_SIZE];
1813 struct rtattr *rta = (void *) buf;
1814 struct rtnexthop *rtnh;
1815 union g_addr *src = NULL;
1817 rta->rta_type = RTA_MULTIPATH;
1818 rta->rta_len = RTA_LENGTH (0);
1819 rtnh = RTA_DATA (rta);
1822 for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
1824 if (nexthop_num >= MULTIPATH_NUM)
1827 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1830 if ((cmd == RTM_NEWROUTE
1831 && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE))
1832 || (cmd == RTM_DELROUTE
1833 && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)))
1835 routedesc = recursing ? "recursive, multihop" : "multihop";
1838 _netlink_route_debug(cmd, p, nexthop,
1839 routedesc, family, zvrf);
1840 _netlink_route_build_multipath(routedesc, bytelen,
1841 nexthop, rta, rtnh, &src);
1842 rtnh = RTNH_NEXT (rtnh);
1844 if (cmd == RTM_NEWROUTE)
1845 SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
1849 addattr_l (&req.n, sizeof req, RTA_PREFSRC, &src->ipv4, bytelen);
1851 if (rta->rta_len > RTA_LENGTH (0))
1852 addattr_l (&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH, RTA_DATA (rta),
1856 /* If there is no useful nexthop then return. */
1857 if (nexthop_num == 0)
1859 if (IS_ZEBRA_DEBUG_KERNEL)
1860 zlog_debug ("netlink_route_multipath(): No useful nexthop.");
1866 /* Destination netlink address. */
1867 memset (&snl, 0, sizeof snl);
1868 snl.nl_family = AF_NETLINK;
1870 /* Talk to netlink socket. */
1871 return netlink_talk (&req.n, &zvrf->netlink_cmd, zvrf);
1875 kernel_route_rib (struct prefix *p, struct rib *old, struct rib *new)
1878 return netlink_route_multipath (RTM_NEWROUTE, p, new);
1880 return netlink_route_multipath (RTM_DELROUTE, p, old);
1882 /* Replace, can be done atomically if metric does not change;
1883 * netlink uses [prefix, tos, priority] to identify prefix.
1884 * Now metric is not sent to kernel, so we can just do atomic replace. */
1885 return netlink_route_multipath (RTM_NEWROUTE, p, new);
1888 /* Interface address modification. */
1890 netlink_address (int cmd, int family, struct interface *ifp,
1891 struct connected *ifc)
1899 struct ifaddrmsg ifa;
1900 char buf[NL_PKT_BUF_SIZE];
1903 struct zebra_vrf *zvrf = vrf_info_lookup (ifp->vrf_id);
1906 memset (&req, 0, sizeof req - NL_PKT_BUF_SIZE);
1908 bytelen = (family == AF_INET ? 4 : 16);
1910 req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct ifaddrmsg));
1911 req.n.nlmsg_flags = NLM_F_REQUEST;
1912 req.n.nlmsg_type = cmd;
1913 req.ifa.ifa_family = family;
1915 req.ifa.ifa_index = ifp->ifindex;
1916 req.ifa.ifa_prefixlen = p->prefixlen;
1918 addattr_l (&req.n, sizeof req, IFA_LOCAL, &p->u.prefix, bytelen);
1920 if (family == AF_INET && cmd == RTM_NEWADDR)
1922 if (!CONNECTED_PEER(ifc) && ifc->destination)
1924 p = ifc->destination;
1925 addattr_l (&req.n, sizeof req, IFA_BROADCAST, &p->u.prefix,
1930 if (CHECK_FLAG (ifc->flags, ZEBRA_IFA_SECONDARY))
1931 SET_FLAG (req.ifa.ifa_flags, IFA_F_SECONDARY);
1934 addattr_l (&req.n, sizeof req, IFA_LABEL, ifc->label,
1935 strlen (ifc->label) + 1);
1937 return netlink_talk (&req.n, &zvrf->netlink_cmd, zvrf);
1941 kernel_address_add_ipv4 (struct interface *ifp, struct connected *ifc)
1943 return netlink_address (RTM_NEWADDR, AF_INET, ifp, ifc);
1947 kernel_address_delete_ipv4 (struct interface *ifp, struct connected *ifc)
1949 return netlink_address (RTM_DELADDR, AF_INET, ifp, ifc);
1953 extern struct thread_master *master;
1955 /* Kernel route reflection. */
1957 kernel_read (struct thread *thread)
1959 struct zebra_vrf *zvrf = (struct zebra_vrf *)THREAD_ARG (thread);
1960 netlink_parse_info (netlink_information_fetch, &zvrf->netlink, zvrf);
1961 zvrf->t_netlink = thread_add_read (zebrad.master, kernel_read, zvrf,
1962 zvrf->netlink.sock);
1967 /* Filter out messages from self that occur on listener socket,
1968 caused by our actions on the command socket
1970 static void netlink_install_filter (int sock, __u32 pid)
1972 struct sock_filter filter[] = {
1974 BPF_STMT(BPF_LD|BPF_ABS|BPF_H, offsetof(struct nlmsghdr, nlmsg_type)),
1975 /* 1: jeq 0x18 jt 3 jf 6 */
1976 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_NEWROUTE), 1, 0),
1977 /* 2: jeq 0x19 jt 3 jf 6 */
1978 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_DELROUTE), 0, 3),
1980 BPF_STMT(BPF_LD|BPF_ABS|BPF_W, offsetof(struct nlmsghdr, nlmsg_pid)),
1981 /* 4: jeq XX jt 5 jf 6 */
1982 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htonl(pid), 0, 1),
1983 /* 5: ret 0 (skip) */
1984 BPF_STMT(BPF_RET|BPF_K, 0),
1985 /* 6: ret 0xffff (keep) */
1986 BPF_STMT(BPF_RET|BPF_K, 0xffff),
1989 struct sock_fprog prog = {
1990 .len = array_size(filter),
1994 if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) < 0)
1995 zlog_warn ("Can't install socket filter: %s\n", safe_strerror(errno));
1998 /* Exported interface function. This function simply calls
1999 netlink_socket (). */
2001 kernel_init (struct zebra_vrf *zvrf)
2003 unsigned long groups;
2005 groups = RTMGRP_LINK | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR;
2007 groups |= RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR;
2008 #endif /* HAVE_IPV6 */
2009 netlink_socket (&zvrf->netlink, groups, zvrf->vrf_id);
2010 netlink_socket (&zvrf->netlink_cmd, 0, zvrf->vrf_id);
2012 /* Register kernel socket. */
2013 if (zvrf->netlink.sock > 0)
2015 size_t bufsize = MAX(nl_rcvbufsize, 2 * sysconf(_SC_PAGESIZE));
2017 /* Only want non-blocking on the netlink event socket */
2018 if (fcntl (zvrf->netlink.sock, F_SETFL, O_NONBLOCK) < 0)
2019 zlog_err ("Can't set %s socket flags: %s", zvrf->netlink.name,
2020 safe_strerror (errno));
2022 /* Set receive buffer size if it's set from command line */
2024 netlink_recvbuf (&zvrf->netlink, nl_rcvbufsize);
2026 nl_rcvbuf.p = XMALLOC (MTYPE_NETLINK_RCVBUF, bufsize);
2027 nl_rcvbuf.size = bufsize;
2029 netlink_install_filter (zvrf->netlink.sock, zvrf->netlink_cmd.snl.nl_pid);
2030 zvrf->t_netlink = thread_add_read (zebrad.master, kernel_read, zvrf,
2031 zvrf->netlink.sock);
2036 kernel_terminate (struct zebra_vrf *zvrf)
2038 THREAD_READ_OFF (zvrf->t_netlink);
2040 if (zvrf->netlink.sock >= 0)
2042 close (zvrf->netlink.sock);
2043 zvrf->netlink.sock = -1;
2046 if (zvrf->netlink_cmd.sock >= 0)
2048 close (zvrf->netlink_cmd.sock);
2049 zvrf->netlink_cmd.sock = -1;
2054 * nl_msg_type_to_str
2057 nl_msg_type_to_str (uint16_t msg_type)
2059 return lookup (nlmsg_str, msg_type);
2066 nl_rtproto_to_str (u_char rtproto)
2068 return lookup (rtproto_str, rtproto);