/* Copyright (c) 2008, 2009 Nicira Networks
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "cfg.h"
#include "command-line.h"
#include "coverage.h"
#include "daemon.h"
#include "dirs.h"
#include "dpif.h"
#include "fatal-signal.h"
#include "fault.h"
#include "leak-checker.h"
#include "netdev.h"
#include "netlink.h"
#include "ofpbuf.h"
#include "openvswitch/brcompat-netlink.h"
#include "poll-loop.h"
#include "process.h"
#include "signals.h"
#include "svec.h"
#include "timeval.h"
#include "unixctl.h"
#include "util.h"
#include "vlog.h"
#define THIS_MODULE VLM_brcompatd
/* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */
/* Actions to modify bridge compatibility configuration. */
enum bmc_action {
BMC_ADD_DP,
BMC_DEL_DP,
BMC_ADD_PORT,
BMC_DEL_PORT
};
static void parse_options(int argc, char *argv[]);
static void usage(void) NO_RETURN;
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60);
/* Maximum number of milliseconds to wait for the config file to be
* unlocked. If set to zero, no waiting will occur. */
static int lock_timeout = 500;
/* Maximum number of milliseconds to wait before pruning port entries that
* no longer exist. If set to zero, ports are never pruned. */
static int prune_timeout = 5000;
/* Config file shared with ovs-vswitchd (usually ovs-vswitchd.conf). */
static char *config_file;
/* Command to run (via system()) to reload the ovs-vswitchd configuration
* file. */
static char *reload_command;
/* Netlink socket to listen for interface changes. */
static struct nl_sock *rtnl_sock;
/* Netlink socket to bridge compatibility kernel module. */
static struct nl_sock *brc_sock;
/* The Generic Netlink family number used for bridge compatibility. */
static int brc_family;
static const struct nl_policy brc_multicast_policy[] = {
[BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 }
};
static const struct nl_policy rtnlgrp_link_policy[] = {
[IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
[IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
};
static int
lookup_brc_multicast_group(int *multicast_group)
{
struct nl_sock *sock;
struct ofpbuf request, *reply;
struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)];
int retval;
retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock);
if (retval) {
return retval;
}
ofpbuf_init(&request, 0);
nl_msg_put_genlmsghdr(&request, sock, 0, brc_family,
NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1);
retval = nl_sock_transact(sock, &request, &reply);
ofpbuf_uninit(&request);
if (retval) {
nl_sock_destroy(sock);
return retval;
}
if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN,
brc_multicast_policy, attrs,
ARRAY_SIZE(brc_multicast_policy))) {
nl_sock_destroy(sock);
ofpbuf_delete(reply);
return EPROTO;
}
*multicast_group = nl_attr_get_u32(attrs[BRC_GENL_A_MC_GROUP]);
nl_sock_destroy(sock);
ofpbuf_delete(reply);
return 0;
}
/* Opens a socket for brcompat notifications. Returns 0 if successful,
* otherwise a positive errno value. */
static int
brc_open(struct nl_sock **sock)
{
int multicast_group = 0;
int retval;
retval = nl_lookup_genl_family(BRC_GENL_FAMILY_NAME, &brc_family);
if (retval) {
return retval;
}
retval = lookup_brc_multicast_group(&multicast_group);
if (retval) {
return retval;
}
retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, 0, sock);
if (retval) {
return retval;
}
return 0;
}
static const struct nl_policy brc_dp_policy[] = {
[BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING },
};
static bool
bridge_exists(const char *name)
{
return cfg_has_section("bridge.%s", name);
}
static int
rewrite_and_reload_config(void)
{
if (cfg_is_dirty()) {
int error1 = cfg_write();
int error2 = cfg_read();
long long int reload_start = time_msec();
int error3 = system(reload_command);
long long int elapsed = time_msec() - reload_start;
COVERAGE_INC(brcompatd_reload);
if (elapsed > 0) {
VLOG_INFO("reload command executed in %lld ms", elapsed);
}
if (error3 == -1) {
VLOG_ERR("failed to execute reload command: %s", strerror(errno));
} else if (error3 != 0) {
char *msg = process_status_msg(error3);
VLOG_ERR("reload command exited with error (%s)", msg);
free(msg);
}
return error1 ? error1 : error2 ? error2 : error3 ? ECHILD : 0;
}
return 0;
}
/* Go through the configuration file and remove any ports that no longer
* exist associated with a bridge. */
static void
prune_ports(void)
{
int i, j;
int error;
struct svec bridges, delete;
if (cfg_lock(NULL, 0)) {
/* Couldn't lock config file. */
return;
}
svec_init(&bridges);
svec_init(&delete);
cfg_get_subsections(&bridges, "bridge");
for (i=0; idata)->nlmsg_seq;
*br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]);
if (port_name) {
*port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]);
}
return 0;
}
static void
send_reply(uint32_t seq, int error)
{
struct ofpbuf msg;
int retval;
/* Compose reply. */
ofpbuf_init(&msg, 0);
nl_msg_put_genlmsghdr(&msg, brc_sock, 32, brc_family, NLM_F_REQUEST,
BRC_GENL_C_DP_RESULT, 1);
((struct nlmsghdr *) msg.data)->nlmsg_seq = seq;
nl_msg_put_u32(&msg, BRC_GENL_A_ERR_CODE, error);
/* Send reply. */
retval = nl_sock_send(brc_sock, &msg, false);
if (retval) {
VLOG_WARN_RL(&rl, "replying to brcompat request: %s",
strerror(retval));
}
ofpbuf_uninit(&msg);
}
static int
handle_bridge_cmd(struct ofpbuf *buffer, bool add)
{
const char *br_name;
uint32_t seq;
int error;
error = parse_command(buffer, &seq, &br_name, NULL);
if (!error) {
error = add ? add_bridge(br_name) : del_bridge(br_name);
if (!error) {
error = rewrite_and_reload_config();
}
send_reply(seq, error);
}
return error;
}
static const struct nl_policy brc_port_policy[] = {
[BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING },
[BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING },
};
static void
del_port(const char *br_name, const char *port_name)
{
cfg_del_entry("bridge.%s.port=%s", br_name, port_name);
cfg_del_match("bonding.*.slave=%s", port_name);
cfg_del_match("vlan.%s.*", port_name);
}
static int
handle_port_cmd(struct ofpbuf *buffer, bool add)
{
const char *cmd_name = add ? "add-if" : "del-if";
const char *br_name, *port_name;
uint32_t seq;
int error;
error = parse_command(buffer, &seq, &br_name, &port_name);
if (!error) {
if (!bridge_exists(br_name)) {
VLOG_WARN("%s %s %s: no bridge named %s",
cmd_name, br_name, port_name, br_name);
error = EINVAL;
} else if (!netdev_exists(port_name)) {
VLOG_WARN("%s %s %s: no network device named %s",
cmd_name, br_name, port_name, port_name);
error = EINVAL;
} else {
if (add) {
cfg_add_entry("bridge.%s.port=%s", br_name, port_name);
} else {
del_port(br_name, port_name);
}
VLOG_INFO("%s %s %s: success", cmd_name, br_name, port_name);
error = rewrite_and_reload_config();
}
send_reply(seq, error);
}
return error;
}
static int
brc_recv_update(void)
{
int retval;
struct ofpbuf *buffer;
struct genlmsghdr *genlmsghdr;
buffer = NULL;
do {
ofpbuf_delete(buffer);
retval = nl_sock_recv(brc_sock, &buffer, false);
} while (retval == ENOBUFS
|| (!retval
&& (nl_msg_nlmsgerr(buffer, NULL)
|| nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE)));
if (retval) {
if (retval != EAGAIN) {
VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval));
}
return retval;
}
genlmsghdr = nl_msg_genlmsghdr(buffer);
if (!genlmsghdr) {
VLOG_WARN_RL(&rl, "received packet too short for generic NetLink");
goto error;
}
if (nl_msg_nlmsghdr(buffer)->nlmsg_type != brc_family) {
VLOG_DBG_RL(&rl, "received type (%"PRIu16") != brcompat family (%d)",
nl_msg_nlmsghdr(buffer)->nlmsg_type, brc_family);
goto error;
}
if (cfg_lock(NULL, lock_timeout)) {
/* Couldn't lock config file. */
retval = EAGAIN;
goto error;
}
switch (genlmsghdr->cmd) {
case BRC_GENL_C_DP_ADD:
retval = handle_bridge_cmd(buffer, true);
break;
case BRC_GENL_C_DP_DEL:
retval = handle_bridge_cmd(buffer, false);
break;
case BRC_GENL_C_PORT_ADD:
retval = handle_port_cmd(buffer, true);
break;
case BRC_GENL_C_PORT_DEL:
retval = handle_port_cmd(buffer, false);
break;
default:
retval = EPROTO;
}
cfg_unlock();
error:
ofpbuf_delete(buffer);
return retval;
}
/* Check for interface configuration changes announced through RTNL. */
static void
rtnl_recv_update(void)
{
struct ofpbuf *buf;
int error = nl_sock_recv(rtnl_sock, &buf, false);
if (error == EAGAIN) {
/* Nothing to do. */
} else if (error == ENOBUFS) {
VLOG_WARN_RL(&rl, "network monitor socket overflowed");
} else if (error) {
VLOG_WARN_RL(&rl, "error on network monitor socket: %s",
strerror(error));
} else {
struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
struct nlmsghdr *nlh;
struct ifinfomsg *iim;
nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim);
if (!iim) {
VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)");
ofpbuf_delete(buf);
return;
}
if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
rtnlgrp_link_policy,
attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
VLOG_WARN_RL(&rl,"received bad rtnl message (policy)");
ofpbuf_delete(buf);
return;
}
if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) {
const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]);
char br_name[IFNAMSIZ];
uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]);
struct svec ports;
if (!if_indextoname(br_idx, br_name)) {
ofpbuf_delete(buf);
return;
}
if (cfg_lock(NULL, lock_timeout)) {
/* Couldn't lock config file. */
/* xxx this should try again and print error msg. */
ofpbuf_delete(buf);
return;
}
svec_init(&ports);
cfg_get_all_keys(&ports, "bridge.%s.port", br_name);
svec_sort(&ports);
if (svec_contains(&ports, port_name)) {
del_port(br_name, port_name);
rewrite_and_reload_config();
}
cfg_unlock();
}
ofpbuf_delete(buf);
}
}
int
main(int argc, char *argv[])
{
struct unixctl_server *unixctl;
int retval;
set_program_name(argv[0]);
register_fault_handlers();
time_init();
vlog_init();
parse_options(argc, argv);
signal(SIGPIPE, SIG_IGN);
process_init();
die_if_already_running();
daemonize();
retval = unixctl_server_create(NULL, &unixctl);
if (retval) {
ovs_fatal(retval, "could not listen for vlog connections");
}
if (brc_open(&brc_sock)) {
ovs_fatal(0, "could not open brcompat socket. Check "
"\"brcompat\" kernel module.");
}
if (prune_timeout) {
if (nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &rtnl_sock)) {
ovs_fatal(0, "could not create rtnetlink socket");
}
}
cfg_read();
for (;;) {
unixctl_server_run(unixctl);
brc_recv_update();
/* If 'prune_timeout' is non-zero, we actively prune from the
* config file any 'bridge..port' entries that are no
* longer valid. We use two methods:
*
* 1) The kernel explicitly notifies us of removed ports
* through the RTNL messages.
*
* 2) We periodically check all ports associated with bridges
* to see if they no longer exist.
*/
if (prune_timeout) {
rtnl_recv_update();
prune_ports();
nl_sock_wait(rtnl_sock, POLLIN);
poll_timer_wait(prune_timeout);
}
nl_sock_wait(brc_sock, POLLIN);
unixctl_server_wait(unixctl);
poll_block();
}
return 0;
}
static void
parse_options(int argc, char *argv[])
{
enum {
OPT_LOCK_TIMEOUT = UCHAR_MAX + 1,
OPT_PRUNE_TIMEOUT,
OPT_RELOAD_COMMAND,
VLOG_OPTION_ENUMS,
LEAK_CHECKER_OPTION_ENUMS
};
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'V'},
{"lock-timeout", required_argument, 0, OPT_LOCK_TIMEOUT},
{"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT},
{"reload-command", required_argument, 0, OPT_RELOAD_COMMAND},
DAEMON_LONG_OPTIONS,
VLOG_LONG_OPTIONS,
LEAK_CHECKER_LONG_OPTIONS,
{0, 0, 0, 0},
};
char *short_options = long_options_to_short_options(long_options);
int error;
reload_command = xasprintf("%s/ovs-appctl -t "
"%s/ovs-vswitchd.`cat %s/ovs-vswitchd.pid`.ctl "
"-e vswitchd/reload 2>&1 "
"| /usr/bin/logger -t brcompatd-reload",
ovs_bindir, ovs_rundir, ovs_rundir);
for (;;) {
int c;
c = getopt_long(argc, argv, short_options, long_options, NULL);
if (c == -1) {
break;
}
switch (c) {
case 'H':
case 'h':
usage();
case 'V':
OVS_PRINT_VERSION(0, 0);
exit(EXIT_SUCCESS);
case OPT_LOCK_TIMEOUT:
lock_timeout = atoi(optarg);
break;
case OPT_PRUNE_TIMEOUT:
prune_timeout = atoi(optarg) * 1000;
break;
case OPT_RELOAD_COMMAND:
reload_command = optarg;
break;
VLOG_OPTION_HANDLERS
DAEMON_OPTION_HANDLERS
LEAK_CHECKER_OPTION_HANDLERS
case '?':
exit(EXIT_FAILURE);
default:
abort();
}
}
free(short_options);
argc -= optind;
argv += optind;
if (argc != 1) {
ovs_fatal(0, "exactly one non-option argument required; "
"use --help for usage");
}
config_file = argv[0];
error = cfg_set_file(config_file);
if (error) {
ovs_fatal(error, "failed to add configuration file \"%s\"",
config_file);
}
}
static void
usage(void)
{
printf("%s: bridge compatibility front-end for ovs-vswitchd\n"
"usage: %s [OPTIONS] CONFIG\n"
"CONFIG is the configuration file used by ovs-vswitchd.\n",
program_name, program_name);
printf("\nConfiguration options:\n"
" --reload-command=COMMAND shell command to reload ovs-vswitchd\n"
" --prune-timeout=SECS wait at most SECS before pruning ports\n"
" --lock-timeout=MSECS wait at most MSECS for CONFIG to unlock\n"
);
daemon_usage();
vlog_usage();
printf("\nOther options:\n"
" -h, --help display this help message\n"
" -V, --version display version information\n");
leak_checker_usage();
printf("\nThe default reload command is:\n%s\n", reload_command);
exit(EXIT_SUCCESS);
}