aboutsummaryrefslogtreecommitdiff
path: root/server/failover.c
diff options
context:
space:
mode:
Diffstat (limited to 'server/failover.c')
-rw-r--r--server/failover.c6391
1 files changed, 6391 insertions, 0 deletions
diff --git a/server/failover.c b/server/failover.c
new file mode 100644
index 0000000..97e7d73
--- /dev/null
+++ b/server/failover.c
@@ -0,0 +1,6391 @@
+/* failover.c
+
+ Failover protocol support code... */
+
+/*
+ * Copyright (c) 2004-2011 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999-2003 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Internet Systems Consortium, Inc.
+ * 950 Charter Street
+ * Redwood City, CA 94063
+ * <info@isc.org>
+ * https://www.isc.org/
+ *
+ * This software has been written for Internet Systems Consortium
+ * by Ted Lemon in cooperation with Vixie Enterprises and Nominum, Inc.
+ * To learn more about Internet Systems Consortium, see
+ * ``https://www.isc.org/''. To learn more about Vixie Enterprises,
+ * see ``http://www.vix.com''. To learn more about Nominum, Inc., see
+ * ``http://www.nominum.com''.
+ */
+
+#include "dhcpd.h"
+#include <omapip/omapip_p.h>
+
+#if defined (FAILOVER_PROTOCOL)
+dhcp_failover_state_t *failover_states;
+static isc_result_t do_a_failover_option (omapi_object_t *,
+ dhcp_failover_link_t *);
+dhcp_failover_listener_t *failover_listeners;
+
+static isc_result_t failover_message_reference (failover_message_t **,
+ failover_message_t *,
+ const char *file, int line);
+static isc_result_t failover_message_dereference (failover_message_t **,
+ const char *file, int line);
+
+static void dhcp_failover_pool_balance(dhcp_failover_state_t *state);
+static void dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state);
+static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state,
+ isc_boolean_t *sendreq);
+static inline int secondary_not_hoarding(dhcp_failover_state_t *state,
+ struct pool *p);
+
+
+void dhcp_failover_startup ()
+{
+ dhcp_failover_state_t *state;
+ isc_result_t status;
+ struct timeval tv;
+
+ for (state = failover_states; state; state = state -> next) {
+ dhcp_failover_state_transition (state, "startup");
+
+ if (state -> pool_count == 0) {
+ log_error ("failover peer declaration with no %s",
+ "referring pools.");
+ log_error ("In order to use failover, you MUST %s",
+ "refer to your main failover declaration");
+ log_error ("in each pool declaration. You MUST %s",
+ "NOT use range declarations outside");
+ log_fatal ("of pool declarations.");
+ }
+ /* In case the peer is already running, immediately try
+ to establish a connection with it. */
+ status = dhcp_failover_link_initiate ((omapi_object_t *)state);
+ if (status != ISC_R_SUCCESS && status != DHCP_R_INCOMPLETE) {
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +90 dhcp_failover_reconnect");
+#endif
+ tv . tv_sec = cur_time + 90;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_reconnect, state,
+ (tvref_t)
+ dhcp_failover_state_reference,
+ (tvunref_t)
+ dhcp_failover_state_dereference);
+ log_error ("failover peer %s: %s", state -> name,
+ isc_result_totext (status));
+ }
+
+ status = (dhcp_failover_listen
+ ((omapi_object_t *)state));
+ if (status != ISC_R_SUCCESS) {
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +90 %s",
+ "dhcp_failover_listener_restart");
+#endif
+ tv . tv_sec = cur_time + 90;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_listener_restart,
+ state,
+ (tvref_t)omapi_object_reference,
+ (tvunref_t)omapi_object_dereference);
+ }
+ }
+}
+
+int dhcp_failover_write_all_states ()
+{
+ dhcp_failover_state_t *state;
+
+ for (state = failover_states; state; state = state -> next) {
+ if (!write_failover_state (state))
+ return 0;
+ }
+ return 1;
+}
+
+isc_result_t enter_failover_peer (peer)
+ dhcp_failover_state_t *peer;
+{
+ dhcp_failover_state_t *dup = (dhcp_failover_state_t *)0;
+ isc_result_t status;
+
+ status = find_failover_peer (&dup, peer -> name, MDL);
+ if (status == ISC_R_NOTFOUND) {
+ if (failover_states) {
+ dhcp_failover_state_reference (&peer -> next,
+ failover_states, MDL);
+ dhcp_failover_state_dereference (&failover_states,
+ MDL);
+ }
+ dhcp_failover_state_reference (&failover_states, peer, MDL);
+ return ISC_R_SUCCESS;
+ }
+ dhcp_failover_state_dereference (&dup, MDL);
+ if (status == ISC_R_SUCCESS)
+ return ISC_R_EXISTS;
+ return status;
+}
+
+isc_result_t find_failover_peer (peer, name, file, line)
+ dhcp_failover_state_t **peer;
+ const char *name;
+ const char *file;
+ int line;
+{
+ dhcp_failover_state_t *p;
+
+ for (p = failover_states; p; p = p -> next)
+ if (!strcmp (name, p -> name))
+ break;
+ if (p)
+ return dhcp_failover_state_reference (peer, p, file, line);
+ return ISC_R_NOTFOUND;
+}
+
+/* The failover protocol has three objects associated with it. For
+ each failover partner declaration in the dhcpd.conf file, primary
+ or secondary, there is a failover_state object. For any primary or
+ secondary state object that has a connection to its peer, there is
+ also a failover_link object, which has its own input state separate
+ from the failover protocol state for managing the actual bytes
+ coming in off the wire. Finally, there will be one listener object
+ for every distinct port number associated with a secondary
+ failover_state object. Normally all secondary failover_state
+ objects are expected to listen on the same port number, so there
+ need be only one listener object, but if different port numbers are
+ specified for each failover object, there could be as many as one
+ listener object for each secondary failover_state object. */
+
+/* This, then, is the implementation of the failover link object. */
+
+isc_result_t dhcp_failover_link_initiate (omapi_object_t *h)
+{
+ isc_result_t status;
+ dhcp_failover_link_t *obj;
+ dhcp_failover_state_t *state;
+ omapi_object_t *o;
+ int i;
+ struct data_string ds;
+ omapi_addr_list_t *addrs = (omapi_addr_list_t *)0;
+ omapi_addr_t local_addr;
+
+ /* Find the failover state in the object chain. */
+ for (o = h; o -> outer; o = o -> outer)
+ ;
+ for (; o; o = o -> inner) {
+ if (o -> type == dhcp_type_failover_state)
+ break;
+ }
+ if (!o)
+ return DHCP_R_INVALIDARG;
+ state = (dhcp_failover_state_t *)o;
+
+ obj = (dhcp_failover_link_t *)0;
+ status = dhcp_failover_link_allocate (&obj, MDL);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ option_cache_reference (&obj -> peer_address,
+ state -> partner.address, MDL);
+ obj -> peer_port = state -> partner.port;
+ dhcp_failover_state_reference (&obj -> state_object, state, MDL);
+
+ memset (&ds, 0, sizeof ds);
+ if (!evaluate_option_cache (&ds, (struct packet *)0, (struct lease *)0,
+ (struct client_state *)0,
+ (struct option_state *)0,
+ (struct option_state *)0,
+ &global_scope, obj -> peer_address, MDL)) {
+ dhcp_failover_link_dereference (&obj, MDL);
+ return ISC_R_UNEXPECTED;
+ }
+
+ /* Make an omapi address list out of a buffer containing zero or more
+ IPv4 addresses. */
+ status = omapi_addr_list_new (&addrs, ds.len / 4, MDL);
+ if (status != ISC_R_SUCCESS) {
+ dhcp_failover_link_dereference (&obj, MDL);
+ return status;
+ }
+
+ for (i = 0; i < addrs -> count; i++) {
+ addrs -> addresses [i].addrtype = AF_INET;
+ addrs -> addresses [i].addrlen = sizeof (struct in_addr);
+ memcpy (addrs -> addresses [i].address,
+ &ds.data [i * 4], sizeof (struct in_addr));
+ addrs -> addresses [i].port = obj -> peer_port;
+ }
+ data_string_forget (&ds, MDL);
+
+ /* Now figure out the local address that we're supposed to use. */
+ if (!state -> me.address ||
+ !evaluate_option_cache (&ds, (struct packet *)0,
+ (struct lease *)0,
+ (struct client_state *)0,
+ (struct option_state *)0,
+ (struct option_state *)0,
+ &global_scope, state -> me.address,
+ MDL)) {
+ memset (&local_addr, 0, sizeof local_addr);
+ local_addr.addrtype = AF_INET;
+ local_addr.addrlen = sizeof (struct in_addr);
+ if (!state -> server_identifier.len) {
+ log_fatal ("failover peer %s: no local address.",
+ state -> name);
+ }
+ } else {
+ if (ds.len != sizeof (struct in_addr)) {
+ log_error("failover peer %s: 'address' parameter "
+ "fails to resolve to an IPv4 address",
+ state->name);
+ data_string_forget (&ds, MDL);
+ dhcp_failover_link_dereference (&obj, MDL);
+ omapi_addr_list_dereference (&addrs, MDL);
+ return DHCP_R_INVALIDARG;
+ }
+ local_addr.addrtype = AF_INET;
+ local_addr.addrlen = ds.len;
+ memcpy (local_addr.address, ds.data, ds.len);
+ if (!state -> server_identifier.len)
+ data_string_copy (&state -> server_identifier,
+ &ds, MDL);
+ data_string_forget (&ds, MDL);
+ local_addr.port = 0; /* Let the O.S. choose. */
+ }
+
+ status = omapi_connect_list ((omapi_object_t *)obj,
+ addrs, &local_addr);
+ omapi_addr_list_dereference (&addrs, MDL);
+
+ dhcp_failover_link_dereference (&obj, MDL);
+ return status;
+}
+
+isc_result_t dhcp_failover_link_signal (omapi_object_t *h,
+ const char *name, va_list ap)
+{
+ isc_result_t status;
+ dhcp_failover_link_t *link;
+ omapi_object_t *c;
+ dhcp_failover_state_t *s, *state = (dhcp_failover_state_t *)0;
+ char *sname;
+ int slen;
+ struct timeval tv;
+
+ if (h -> type != dhcp_type_failover_link) {
+ /* XXX shouldn't happen. Put an assert here? */
+ return ISC_R_UNEXPECTED;
+ }
+ link = (dhcp_failover_link_t *)h;
+
+ if (!strcmp (name, "connect")) {
+ if (link -> state_object -> i_am == primary) {
+ status = dhcp_failover_send_connect (h);
+ if (status != ISC_R_SUCCESS) {
+ log_info ("dhcp_failover_send_connect: %s",
+ isc_result_totext (status));
+ omapi_disconnect (h -> outer, 1);
+ }
+ } else
+ status = ISC_R_SUCCESS;
+ /* Allow the peer fifteen seconds to send us a
+ startup message. */
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +15 %s",
+ "dhcp_failover_link_startup_timeout");
+#endif
+ tv . tv_sec = cur_time + 15;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_link_startup_timeout,
+ link,
+ (tvref_t)dhcp_failover_link_reference,
+ (tvunref_t)dhcp_failover_link_dereference);
+ return status;
+ }
+
+ if (!strcmp (name, "disconnect")) {
+ if (link -> state_object) {
+ dhcp_failover_state_reference (&state,
+ link -> state_object, MDL);
+ link -> state = dhcp_flink_disconnected;
+
+ /* Make the transition. */
+ if (state->link_to_peer == link)
+ dhcp_failover_state_transition(link->state_object, name);
+
+ /* Schedule an attempt to reconnect. */
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info("add_timeout +5 dhcp_failover_reconnect");
+#endif
+ tv.tv_sec = cur_time + 5;
+ tv.tv_usec = cur_tv.tv_usec;
+ add_timeout(&tv, dhcp_failover_reconnect, state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+
+ dhcp_failover_state_dereference (&state, MDL);
+ }
+ return ISC_R_SUCCESS;
+ }
+
+ if (!strcmp (name, "status")) {
+ if (link -> state_object) {
+ isc_result_t status;
+
+ status = va_arg(ap, isc_result_t);
+
+ if ((status == ISC_R_HOSTUNREACH) || (status == ISC_R_TIMEDOUT)) {
+ dhcp_failover_state_reference (&state,
+ link -> state_object, MDL);
+ link -> state = dhcp_flink_disconnected;
+
+ /* Make the transition. */
+ dhcp_failover_state_transition (link -> state_object,
+ "disconnect");
+
+ /* Start trying to reconnect. */
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +5 %s",
+ "dhcp_failover_reconnect");
+#endif
+ tv . tv_sec = cur_time + 5;
+ tv . tv_usec = 0;
+ add_timeout (&tv, dhcp_failover_reconnect,
+ state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+ }
+ dhcp_failover_state_dereference (&state, MDL);
+ }
+ return ISC_R_SUCCESS;
+ }
+
+ /* Not a signal we recognize? */
+ if (strcmp (name, "ready")) {
+ if (h -> inner && h -> inner -> type -> signal_handler)
+ return (*(h -> inner -> type -> signal_handler))
+ (h -> inner, name, ap);
+ return ISC_R_NOTFOUND;
+ }
+
+ if (!h -> outer || h -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+ c = h -> outer;
+
+ /* We get here because we requested that we be woken up after
+ some number of bytes were read, and that number of bytes
+ has in fact been read. */
+ switch (link -> state) {
+ case dhcp_flink_start:
+ link -> state = dhcp_flink_message_length_wait;
+ if ((omapi_connection_require (c, 2)) != ISC_R_SUCCESS)
+ break;
+ case dhcp_flink_message_length_wait:
+ next_message:
+ link -> state = dhcp_flink_message_wait;
+ link -> imsg = dmalloc (sizeof (failover_message_t), MDL);
+ if (!link -> imsg) {
+ status = ISC_R_NOMEMORY;
+ dhcp_flink_fail:
+ if (link -> imsg) {
+ failover_message_dereference (&link->imsg,
+ MDL);
+ }
+ link -> state = dhcp_flink_disconnected;
+ log_info ("message length wait: %s",
+ isc_result_totext (status));
+ omapi_disconnect (c, 1);
+ /* XXX just blow away the protocol state now?
+ XXX or will disconnect blow it away? */
+ return ISC_R_UNEXPECTED;
+ }
+ memset (link -> imsg, 0, sizeof (failover_message_t));
+ link -> imsg -> refcnt = 1;
+ /* Get the length: */
+ omapi_connection_get_uint16 (c, &link -> imsg_len);
+ link -> imsg_count = 0; /* Bytes read. */
+
+ /* Ensure the message is of valid length. */
+ if (link->imsg_len < DHCP_FAILOVER_MIN_MESSAGE_SIZE ||
+ link->imsg_len > DHCP_FAILOVER_MAX_MESSAGE_SIZE) {
+ status = ISC_R_UNEXPECTED;
+ goto dhcp_flink_fail;
+ }
+
+ if ((omapi_connection_require (c, link -> imsg_len - 2U)) !=
+ ISC_R_SUCCESS)
+ break;
+ case dhcp_flink_message_wait:
+ /* Read in the message. At this point we have the
+ entire message in the input buffer. For each
+ incoming value ID, set a bit in the bitmask
+ indicating that we've gotten it. Maybe flag an
+ error message if the bit is already set. Once
+ we're done reading, we can check the bitmask to
+ make sure that the required fields for each message
+ have been included. */
+
+ link -> imsg_count += 2; /* Count the length as read. */
+
+ /* Get message type. */
+ omapi_connection_copyout (&link -> imsg -> type, c, 1);
+ link -> imsg_count++;
+
+ /* Get message payload offset. */
+ omapi_connection_copyout (&link -> imsg_payoff, c, 1);
+ link -> imsg_count++;
+
+ /* Get message time. */
+ omapi_connection_get_uint32 (c, &link -> imsg -> time);
+ link -> imsg_count += 4;
+
+ /* Get transaction ID. */
+ omapi_connection_get_uint32 (c, &link -> imsg -> xid);
+ link -> imsg_count += 4;
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+# if !defined(DEBUG_FAILOVER_CONTACT_MESSAGES)
+ if (link->imsg->type == FTM_CONTACT)
+ goto skip_contact;
+# endif
+ log_info ("link: message %s payoff %d time %ld xid %ld",
+ dhcp_failover_message_name (link -> imsg -> type),
+ link -> imsg_payoff,
+ (unsigned long)link -> imsg -> time,
+ (unsigned long)link -> imsg -> xid);
+# if !defined(DEBUG_FAILOVER_CONTACT_MESSAGES)
+ skip_contact:
+# endif
+#endif
+ /* Skip over any portions of the message header that we
+ don't understand. */
+ if (link -> imsg_payoff - link -> imsg_count) {
+ omapi_connection_copyout ((unsigned char *)0, c,
+ (link -> imsg_payoff -
+ link -> imsg_count));
+ link -> imsg_count = link -> imsg_payoff;
+ }
+
+ /* Now start sucking options off the wire. */
+ while (link -> imsg_count < link -> imsg_len) {
+ status = do_a_failover_option (c, link);
+ if (status != ISC_R_SUCCESS)
+ goto dhcp_flink_fail;
+ }
+
+ /* If it's a connect message, try to associate it with
+ a state object. */
+ /* XXX this should be authenticated! */
+ if (link -> imsg -> type == FTM_CONNECT) {
+ const char *errmsg;
+ int reason;
+
+ if (!(link->imsg->options_present &
+ FTB_RELATIONSHIP_NAME)) {
+ errmsg = "missing relationship-name";
+ reason = FTR_INVALID_PARTNER;
+ goto badconnect;
+ }
+
+ /* See if we can find a failover_state object that
+ matches this connection. This message should only
+ be received by a secondary from a primary. */
+ for (s = failover_states; s; s = s -> next) {
+ if (dhcp_failover_state_match_by_name(s,
+ &link->imsg->relationship_name))
+ state = s;
+ }
+
+ /* If we can't find a failover protocol state
+ for this remote host, drop the connection */
+ if (!state) {
+ errmsg = "unknown failover relationship name";
+ reason = FTR_INVALID_PARTNER;
+
+ badconnect:
+ /* XXX Send a refusal message first?
+ XXX Look in protocol spec for guidance. */
+
+ if (state != NULL) {
+ sname = state->name;
+ slen = strlen(sname);
+ } else if (link->imsg->options_present &
+ FTB_RELATIONSHIP_NAME) {
+ sname = (char *)link->imsg->
+ relationship_name.data;
+ slen = link->imsg->relationship_name.count;
+ } else {
+ sname = "unknown";
+ slen = strlen(sname);
+ }
+
+ log_error("Failover CONNECT from %.*s: %s",
+ slen, sname, errmsg);
+ dhcp_failover_send_connectack
+ ((omapi_object_t *)link, state,
+ reason, errmsg);
+ log_info ("failover: disconnect: %s", errmsg);
+ omapi_disconnect (c, 0);
+ link -> state = dhcp_flink_disconnected;
+ return ISC_R_SUCCESS;
+ }
+
+ if ((cur_time > link -> imsg -> time &&
+ cur_time - link -> imsg -> time > 60) ||
+ (cur_time < link -> imsg -> time &&
+ link -> imsg -> time - cur_time > 60)) {
+ errmsg = "time offset too large";
+ reason = FTR_TIMEMISMATCH;
+ goto badconnect;
+ }
+
+ if (!(link -> imsg -> options_present & FTB_HBA) ||
+ link -> imsg -> hba.count != 32) {
+ errmsg = "invalid HBA";
+ reason = FTR_HBA_CONFLICT; /* XXX */
+ goto badconnect;
+ }
+ if (state -> hba)
+ dfree (state -> hba, MDL);
+ state -> hba = dmalloc (32, MDL);
+ if (!state -> hba) {
+ errmsg = "no memory";
+ reason = FTR_MISC_REJECT;
+ goto badconnect;
+ }
+ memcpy (state -> hba, link -> imsg -> hba.data, 32);
+
+ if (!link -> state_object)
+ dhcp_failover_state_reference
+ (&link -> state_object, state, MDL);
+ if (!link -> peer_address)
+ option_cache_reference
+ (&link -> peer_address,
+ state -> partner.address, MDL);
+ }
+
+ /* If we don't have a state object at this point, it's
+ some kind of bogus situation, so just drop the
+ connection. */
+ if (!link -> state_object) {
+ log_info ("failover: connect: no matching state.");
+ omapi_disconnect (c, 1);
+ link -> state = dhcp_flink_disconnected;
+ return DHCP_R_INVALIDARG;
+ }
+
+ /* Once we have the entire message, and we've validated
+ it as best we can here, pass it to the parent. */
+ omapi_signal ((omapi_object_t *)link -> state_object,
+ "message", link);
+ link -> state = dhcp_flink_message_length_wait;
+ if (link -> imsg)
+ failover_message_dereference (&link -> imsg, MDL);
+ /* XXX This is dangerous because we could get into a tight
+ XXX loop reading input without servicing any other stuff.
+ XXX There needs to be a way to relinquish control but
+ XXX get it back immediately if there's no other work to
+ XXX do. */
+ if ((omapi_connection_require (c, 2)) == ISC_R_SUCCESS)
+ goto next_message;
+ break;
+
+ default:
+ log_fatal("Impossible case at %s:%d.", MDL);
+ break;
+ }
+ return ISC_R_SUCCESS;
+}
+
+static isc_result_t do_a_failover_option (c, link)
+ omapi_object_t *c;
+ dhcp_failover_link_t *link;
+{
+ u_int16_t option_code;
+ u_int16_t option_len;
+ unsigned char *op;
+ unsigned op_size;
+ unsigned op_count;
+ int i;
+
+ if (link -> imsg_count + 2 > link -> imsg_len) {
+ log_error ("FAILOVER: message overflow at option code.");
+ return DHCP_R_PROTOCOLERROR;
+ }
+
+ /* Get option code. */
+ omapi_connection_get_uint16 (c, &option_code);
+ link -> imsg_count += 2;
+
+ if (link -> imsg_count + 2 > link -> imsg_len) {
+ log_error ("FAILOVER: message overflow at length.");
+ return DHCP_R_PROTOCOLERROR;
+ }
+
+ /* Get option length. */
+ omapi_connection_get_uint16 (c, &option_len);
+ link -> imsg_count += 2;
+
+ if (link -> imsg_count + option_len > link -> imsg_len) {
+ log_error ("FAILOVER: message overflow at data.");
+ return DHCP_R_PROTOCOLERROR;
+ }
+
+ /* If it's an unknown code, skip over it. */
+ if ((option_code > FTO_MAX) ||
+ (ft_options[option_code].type == FT_UNDEF)) {
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ log_debug (" option code %d (%s) len %d (not recognized)",
+ option_code,
+ dhcp_failover_option_name (option_code),
+ option_len);
+#endif
+ omapi_connection_copyout ((unsigned char *)0, c, option_len);
+ link -> imsg_count += option_len;
+ return ISC_R_SUCCESS;
+ }
+
+ /* If it's the digest, do it now. */
+ if (ft_options [option_code].type == FT_DIGEST) {
+ link -> imsg_count += option_len;
+ if (link -> imsg_count != link -> imsg_len) {
+ log_error ("FAILOVER: digest not at end of message");
+ return DHCP_R_PROTOCOLERROR;
+ }
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ log_debug (" option %s len %d",
+ ft_options [option_code].name, option_len);
+#endif
+ /* For now, just dump it. */
+ omapi_connection_copyout ((unsigned char *)0, c, option_len);
+ return ISC_R_SUCCESS;
+ }
+
+ /* Only accept an option once. */
+ if (link -> imsg -> options_present & ft_options [option_code].bit) {
+ log_error ("FAILOVER: duplicate option %s",
+ ft_options [option_code].name);
+ return DHCP_R_PROTOCOLERROR;
+ }
+
+ /* Make sure the option is appropriate for this type of message.
+ Really, any option is generally allowed for any message, and the
+ cases where this is not true are too complicated to represent in
+ this way - what this code is doing is to just avoid saving the
+ value of an option we don't have any way to use, which allows
+ us to make the failover_message structure smaller. */
+ if (ft_options [option_code].bit &&
+ !(fto_allowed [link -> imsg -> type] &
+ ft_options [option_code].bit)) {
+ omapi_connection_copyout ((unsigned char *)0, c, option_len);
+ link -> imsg_count += option_len;
+ return ISC_R_SUCCESS;
+ }
+
+ /* Figure out how many elements, how big they are, and where
+ to store them. */
+ if (ft_options [option_code].num_present) {
+ /* If this option takes a fixed number of elements,
+ we expect the space for them to be preallocated,
+ and we can just read the data in. */
+
+ op = ((unsigned char *)link -> imsg) +
+ ft_options [option_code].offset;
+ op_size = ft_sizes [ft_options [option_code].type];
+ op_count = ft_options [option_code].num_present;
+
+ if (option_len != op_size * op_count) {
+ log_error ("FAILOVER: option size (%d:%d), option %s",
+ option_len,
+ (ft_sizes [ft_options [option_code].type] *
+ ft_options [option_code].num_present),
+ ft_options [option_code].name);
+ return DHCP_R_PROTOCOLERROR;
+ }
+ } else {
+ failover_option_t *fo;
+
+ /* FT_DDNS* are special - one or two bytes of status
+ followed by the client FQDN. */
+ if (ft_options [option_code].type == FT_DDNS1 ||
+ ft_options [option_code].type == FT_DDNS1) {
+ ddns_fqdn_t *ddns =
+ ((ddns_fqdn_t *)
+ (((char *)link -> imsg) +
+ ft_options [option_code].offset));
+
+ op_count = (ft_options [option_code].type == FT_DDNS1
+ ? 1 : 2);
+
+ omapi_connection_copyout (&ddns -> codes [0],
+ c, op_count);
+ link -> imsg_count += op_count;
+ if (op_count == 1)
+ ddns -> codes [1] = 0;
+ op_size = 1;
+ op_count = option_len - op_count;
+
+ ddns -> length = op_count;
+ ddns -> data = dmalloc (op_count, MDL);
+ if (!ddns -> data) {
+ log_error ("FAILOVER: no memory getting%s(%d)",
+ " DNS data ", op_count);
+
+ /* Actually, NO_MEMORY, but if we lose here
+ we have to drop the connection. */
+ return DHCP_R_PROTOCOLERROR;
+ }
+ omapi_connection_copyout (ddns -> data, c, op_count);
+ goto out;
+ }
+
+ /* A zero for num_present means that any number of
+ elements can appear, so we have to figure out how
+ many we got from the length of the option, and then
+ fill out a failover_option structure describing the
+ data. */
+ op_size = ft_sizes [ft_options [option_code].type];
+
+ /* Make sure that option data length is a multiple of the
+ size of the data type being sent. */
+ if (op_size > 1 && option_len % op_size) {
+ log_error ("FAILOVER: option_len %d not %s%d",
+ option_len, "multiple of ", op_size);
+ return DHCP_R_PROTOCOLERROR;
+ }
+
+ op_count = option_len / op_size;
+
+ fo = ((failover_option_t *)
+ (((char *)link -> imsg) +
+ ft_options [option_code].offset));
+
+ fo -> count = op_count;
+ fo -> data = dmalloc (option_len, MDL);
+ if (!fo -> data) {
+ log_error ("FAILOVER: no memory getting %s (%d)",
+ "option data", op_count);
+
+ return DHCP_R_PROTOCOLERROR;
+ }
+ op = fo -> data;
+ }
+
+ /* For single-byte message values and multi-byte values that
+ don't need swapping, just read them in all at once. */
+ if (op_size == 1 || ft_options [option_code].type == FT_IPADDR) {
+ omapi_connection_copyout ((unsigned char *)op, c, option_len);
+ link -> imsg_count += option_len;
+
+ /*
+ * As of 3.1.0, many option codes were changed to conform to
+ * draft revision 12 (which alphabetized, then renumbered all
+ * the option codes without preserving the version option code
+ * nor bumping its value). As it turns out, the message codes
+ * for CONNECT and CONNECTACK turn out the same, so it tries
+ * its darndest to connect, and falls short (when TLS_REQUEST
+ * comes up size 2 rather than size 1 as draft revision 12 also
+ * mandates).
+ *
+ * The VENDOR_CLASS code in 3.0.x was 11, which is now the HBA
+ * code. Both work out to be arbitrarily long text-or-byte
+ * strings, so they pass parsing.
+ *
+ * Note that it is possible (or intentional), if highly
+ * improbable, for the HBA bit array to exactly match
+ * isc-V3.0.x. Warning here is not an issue; if it really is
+ * 3.0.x, there will be a protocol error later on. If it isn't
+ * actually 3.0.x, then I guess the lucky user will have to
+ * live with a weird warning.
+ */
+ if ((option_code == 11) && (option_len > 9) &&
+ (strncmp((const char *)op, "isc-V3.0.", 9) == 0)) {
+ log_error("WARNING: failover as of versions 3.1.0 and "
+ "on are not reverse compatible with "
+ "versions 3.0.x.");
+ }
+
+ goto out;
+ }
+
+ /* For values that require swapping, read them in one at a time
+ using routines that swap bytes. */
+ for (i = 0; i < op_count; i++) {
+ switch (ft_options [option_code].type) {
+ case FT_UINT32:
+ omapi_connection_get_uint32 (c, (u_int32_t *)op);
+ op += 4;
+ link -> imsg_count += 4;
+ break;
+
+ case FT_UINT16:
+ omapi_connection_get_uint16 (c, (u_int16_t *)op);
+ op += 2;
+ link -> imsg_count += 2;
+ break;
+
+ default:
+ /* Everything else should have been handled
+ already. */
+ log_error ("FAILOVER: option %s: bad type %d",
+ ft_options [option_code].name,
+ ft_options [option_code].type);
+ return DHCP_R_PROTOCOLERROR;
+ }
+ }
+ out:
+ /* Remember that we got this option. */
+ link -> imsg -> options_present |= ft_options [option_code].bit;
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t dhcp_failover_link_set_value (omapi_object_t *h,
+ omapi_object_t *id,
+ omapi_data_string_t *name,
+ omapi_typed_data_t *value)
+{
+ if (h -> type != omapi_type_protocol)
+ return DHCP_R_INVALIDARG;
+
+ /* Never valid to set these. */
+ if (!omapi_ds_strcmp (name, "link-port") ||
+ !omapi_ds_strcmp (name, "link-name") ||
+ !omapi_ds_strcmp (name, "link-state"))
+ return ISC_R_NOPERM;
+
+ if (h -> inner && h -> inner -> type -> set_value)
+ return (*(h -> inner -> type -> set_value))
+ (h -> inner, id, name, value);
+ return ISC_R_NOTFOUND;
+}
+
+isc_result_t dhcp_failover_link_get_value (omapi_object_t *h,
+ omapi_object_t *id,
+ omapi_data_string_t *name,
+ omapi_value_t **value)
+{
+ dhcp_failover_link_t *link;
+
+ if (h -> type != omapi_type_protocol)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)h;
+
+ if (!omapi_ds_strcmp (name, "link-port")) {
+ return omapi_make_int_value (value, name,
+ (int)link -> peer_port, MDL);
+ } else if (!omapi_ds_strcmp (name, "link-state")) {
+ if (link -> state < 0 ||
+ link -> state >= dhcp_flink_state_max)
+ return omapi_make_string_value (value, name,
+ "invalid link state",
+ MDL);
+ return omapi_make_string_value
+ (value, name,
+ dhcp_flink_state_names [link -> state], MDL);
+ }
+
+ if (h -> inner && h -> inner -> type -> get_value)
+ return (*(h -> inner -> type -> get_value))
+ (h -> inner, id, name, value);
+ return ISC_R_NOTFOUND;
+}
+
+isc_result_t dhcp_failover_link_destroy (omapi_object_t *h,
+ const char *file, int line)
+{
+ dhcp_failover_link_t *link;
+ if (h -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)h;
+
+ if (link -> peer_address)
+ option_cache_dereference (&link -> peer_address, file, line);
+ if (link -> imsg)
+ failover_message_dereference (&link -> imsg, file, line);
+ if (link -> state_object)
+ dhcp_failover_state_dereference (&link -> state_object,
+ file, line);
+ return ISC_R_SUCCESS;
+}
+
+/* Write all the published values associated with the object through the
+ specified connection. */
+
+isc_result_t dhcp_failover_link_stuff_values (omapi_object_t *c,
+ omapi_object_t *id,
+ omapi_object_t *l)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+
+ if (l -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)l;
+
+ status = omapi_connection_put_name (c, "link-port");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (int));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, link -> peer_port);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "link-state");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ if (link -> state < 0 ||
+ link -> state >= dhcp_flink_state_max)
+ status = omapi_connection_put_string (c, "invalid link state");
+ else
+ status = (omapi_connection_put_string
+ (c, dhcp_flink_state_names [link -> state]));
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ if (link -> inner && link -> inner -> type -> stuff_values)
+ return (*(link -> inner -> type -> stuff_values)) (c, id,
+ link -> inner);
+ return ISC_R_SUCCESS;
+}
+
+/* Set up a listener for the omapi protocol. The handle stored points to
+ a listener object, not a protocol object. */
+
+isc_result_t dhcp_failover_listen (omapi_object_t *h)
+{
+ isc_result_t status;
+ dhcp_failover_listener_t *obj, *l;
+ omapi_value_t *value = (omapi_value_t *)0;
+ omapi_addr_t local_addr;
+ unsigned long port;
+
+ status = omapi_get_value_str (h, (omapi_object_t *)0,
+ "local-port", &value);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ if (!value -> value) {
+ omapi_value_dereference (&value, MDL);
+ return DHCP_R_INVALIDARG;
+ }
+
+ status = omapi_get_int_value (&port, value -> value);
+ omapi_value_dereference (&value, MDL);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ local_addr.port = port;
+
+ status = omapi_get_value_str (h, (omapi_object_t *)0,
+ "local-address", &value);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ if (!value -> value) {
+ nogood:
+ omapi_value_dereference (&value, MDL);
+ return DHCP_R_INVALIDARG;
+ }
+
+ if (value -> value -> type != omapi_datatype_data ||
+ value -> value -> u.buffer.len != sizeof (struct in_addr))
+ goto nogood;
+
+ memcpy (local_addr.address, value -> value -> u.buffer.value,
+ value -> value -> u.buffer.len);
+ local_addr.addrlen = value -> value -> u.buffer.len;
+ local_addr.addrtype = AF_INET;
+
+ omapi_value_dereference (&value, MDL);
+
+ /* Are we already listening on this port and address? */
+ for (l = failover_listeners; l; l = l -> next) {
+ if (l -> address.port == local_addr.port &&
+ l -> address.addrtype == local_addr.addrtype &&
+ l -> address.addrlen == local_addr.addrlen &&
+ !memcmp (l -> address.address, local_addr.address,
+ local_addr.addrlen))
+ break;
+ }
+ /* Already listening. */
+ if (l)
+ return ISC_R_SUCCESS;
+
+ obj = (dhcp_failover_listener_t *)0;
+ status = dhcp_failover_listener_allocate (&obj, MDL);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ obj -> address = local_addr;
+
+ status = omapi_listen_addr ((omapi_object_t *)obj, &obj -> address, 1);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_object_reference (&h -> outer,
+ (omapi_object_t *)obj, MDL);
+ if (status != ISC_R_SUCCESS) {
+ dhcp_failover_listener_dereference (&obj, MDL);
+ return status;
+ }
+ status = omapi_object_reference (&obj -> inner, h, MDL);
+ if (status != ISC_R_SUCCESS) {
+ dhcp_failover_listener_dereference (&obj, MDL);
+ return status;
+ }
+
+ /* Put this listener on the list. */
+ if (failover_listeners) {
+ dhcp_failover_listener_reference (&obj -> next,
+ failover_listeners, MDL);
+ dhcp_failover_listener_dereference (&failover_listeners, MDL);
+ }
+ dhcp_failover_listener_reference (&failover_listeners, obj, MDL);
+
+ return dhcp_failover_listener_dereference (&obj, MDL);
+}
+
+/* Signal handler for protocol listener - if we get a connect signal,
+ create a new protocol connection, otherwise pass the signal down. */
+
+isc_result_t dhcp_failover_listener_signal (omapi_object_t *o,
+ const char *name, va_list ap)
+{
+ isc_result_t status;
+ omapi_connection_object_t *c;
+ dhcp_failover_link_t *obj;
+ dhcp_failover_listener_t *p;
+ dhcp_failover_state_t *s, *state = (dhcp_failover_state_t *)0;
+
+ if (!o || o -> type != dhcp_type_failover_listener)
+ return DHCP_R_INVALIDARG;
+ p = (dhcp_failover_listener_t *)o;
+
+ /* Not a signal we recognize? */
+ if (strcmp (name, "connect")) {
+ if (p -> inner && p -> inner -> type -> signal_handler)
+ return (*(p -> inner -> type -> signal_handler))
+ (p -> inner, name, ap);
+ return ISC_R_NOTFOUND;
+ }
+
+ c = va_arg (ap, omapi_connection_object_t *);
+ if (!c || c -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ /* See if we can find a failover_state object that
+ matches this connection. */
+ for (s = failover_states; s; s = s -> next) {
+ if (dhcp_failover_state_match
+ (s, (u_int8_t *)&c -> remote_addr.sin_addr,
+ sizeof c -> remote_addr.sin_addr)) {
+ state = s;
+ break;
+ }
+ }
+ if (!state) {
+ log_info ("failover: listener: no matching state");
+ omapi_disconnect ((omapi_object_t *)c, 1);
+ return(ISC_R_NOTFOUND);
+ }
+
+ obj = (dhcp_failover_link_t *)0;
+ status = dhcp_failover_link_allocate (&obj, MDL);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ obj -> peer_port = ntohs (c -> remote_addr.sin_port);
+
+ status = omapi_object_reference (&obj -> outer,
+ (omapi_object_t *)c, MDL);
+ if (status != ISC_R_SUCCESS) {
+ lose:
+ dhcp_failover_link_dereference (&obj, MDL);
+ log_info ("failover: listener: picayune failure.");
+ omapi_disconnect ((omapi_object_t *)c, 1);
+ return status;
+ }
+
+ status = omapi_object_reference (&c -> inner,
+ (omapi_object_t *)obj, MDL);
+ if (status != ISC_R_SUCCESS)
+ goto lose;
+
+ status = dhcp_failover_state_reference (&obj -> state_object,
+ state, MDL);
+ if (status != ISC_R_SUCCESS)
+ goto lose;
+
+ omapi_signal_in ((omapi_object_t *)obj, "connect");
+
+ return dhcp_failover_link_dereference (&obj, MDL);
+}
+
+isc_result_t dhcp_failover_listener_set_value (omapi_object_t *h,
+ omapi_object_t *id,
+ omapi_data_string_t *name,
+ omapi_typed_data_t *value)
+{
+ if (h -> type != dhcp_type_failover_listener)
+ return DHCP_R_INVALIDARG;
+
+ if (h -> inner && h -> inner -> type -> set_value)
+ return (*(h -> inner -> type -> set_value))
+ (h -> inner, id, name, value);
+ return ISC_R_NOTFOUND;
+}
+
+isc_result_t dhcp_failover_listener_get_value (omapi_object_t *h,
+ omapi_object_t *id,
+ omapi_data_string_t *name,
+ omapi_value_t **value)
+{
+ if (h -> type != dhcp_type_failover_listener)
+ return DHCP_R_INVALIDARG;
+
+ if (h -> inner && h -> inner -> type -> get_value)
+ return (*(h -> inner -> type -> get_value))
+ (h -> inner, id, name, value);
+ return ISC_R_NOTFOUND;
+}
+
+isc_result_t dhcp_failover_listener_destroy (omapi_object_t *h,
+ const char *file, int line)
+{
+ dhcp_failover_listener_t *l;
+
+ if (h -> type != dhcp_type_failover_listener)
+ return DHCP_R_INVALIDARG;
+ l = (dhcp_failover_listener_t *)h;
+ if (l -> next)
+ dhcp_failover_listener_dereference (&l -> next, file, line);
+
+ return ISC_R_SUCCESS;
+}
+
+/* Write all the published values associated with the object through the
+ specified connection. */
+
+isc_result_t dhcp_failover_listener_stuff (omapi_object_t *c,
+ omapi_object_t *id,
+ omapi_object_t *p)
+{
+ if (p -> type != dhcp_type_failover_listener)
+ return DHCP_R_INVALIDARG;
+
+ if (p -> inner && p -> inner -> type -> stuff_values)
+ return (*(p -> inner -> type -> stuff_values)) (c, id,
+ p -> inner);
+ return ISC_R_SUCCESS;
+}
+
+/* Set up master state machine for the failover protocol. */
+
+isc_result_t dhcp_failover_register (omapi_object_t *h)
+{
+ isc_result_t status;
+ dhcp_failover_state_t *obj;
+ unsigned long port;
+ omapi_value_t *value = (omapi_value_t *)0;
+
+ status = omapi_get_value_str (h, (omapi_object_t *)0,
+ "local-port", &value);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ if (!value -> value) {
+ omapi_value_dereference (&value, MDL);
+ return DHCP_R_INVALIDARG;
+ }
+
+ status = omapi_get_int_value (&port, value -> value);
+ omapi_value_dereference (&value, MDL);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ obj = (dhcp_failover_state_t *)0;
+ dhcp_failover_state_allocate (&obj, MDL);
+ obj -> me.port = port;
+
+ status = omapi_listen ((omapi_object_t *)obj, port, 1);
+ if (status != ISC_R_SUCCESS) {
+ dhcp_failover_state_dereference (&obj, MDL);
+ return status;
+ }
+
+ status = omapi_object_reference (&h -> outer, (omapi_object_t *)obj,
+ MDL);
+ if (status != ISC_R_SUCCESS) {
+ dhcp_failover_state_dereference (&obj, MDL);
+ return status;
+ }
+ status = omapi_object_reference (&obj -> inner, h, MDL);
+ dhcp_failover_state_dereference (&obj, MDL);
+ return status;
+}
+
+/* Signal handler for protocol state machine. */
+
+isc_result_t dhcp_failover_state_signal (omapi_object_t *o,
+ const char *name, va_list ap)
+{
+ isc_result_t status;
+ dhcp_failover_state_t *state;
+ dhcp_failover_link_t *link;
+ struct timeval tv;
+
+ if (!o || o -> type != dhcp_type_failover_state)
+ return DHCP_R_INVALIDARG;
+ state = (dhcp_failover_state_t *)o;
+
+ /* Not a signal we recognize? */
+ if (strcmp (name, "disconnect") &&
+ strcmp (name, "message")) {
+ if (state -> inner && state -> inner -> type -> signal_handler)
+ return (*(state -> inner -> type -> signal_handler))
+ (state -> inner, name, ap);
+ return ISC_R_NOTFOUND;
+ }
+
+ /* Handle connect signals by seeing what state we're in
+ and potentially doing a state transition. */
+ if (!strcmp (name, "disconnect")) {
+ link = va_arg (ap, dhcp_failover_link_t *);
+
+ dhcp_failover_link_dereference (&state -> link_to_peer, MDL);
+ dhcp_failover_state_transition (state, "disconnect");
+ if (state -> i_am == primary) {
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +90 %s",
+ "dhcp_failover_reconnect");
+#endif
+ tv . tv_sec = cur_time + 90;
+ tv . tv_usec = 0;
+ add_timeout (&tv, dhcp_failover_reconnect,
+ state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)
+ dhcp_failover_state_dereference);
+ }
+ } else if (!strcmp (name, "message")) {
+ link = va_arg (ap, dhcp_failover_link_t *);
+
+ if (link -> imsg -> type == FTM_CONNECT) {
+ /* If we already have a link to the peer, it must be
+ dead, so drop it.
+ XXX Is this the right thing to do?
+ XXX Probably not - what if both peers start at
+ XXX the same time? */
+ if (state -> link_to_peer) {
+ dhcp_failover_send_connectack
+ ((omapi_object_t *)link, state,
+ FTR_DUP_CONNECTION,
+ "already connected");
+ omapi_disconnect (link -> outer, 1);
+ return ISC_R_SUCCESS;
+ }
+ if (!(link -> imsg -> options_present & FTB_MCLT)) {
+ dhcp_failover_send_connectack
+ ((omapi_object_t *)link, state,
+ FTR_INVALID_MCLT,
+ "no MCLT provided");
+ omapi_disconnect (link -> outer, 1);
+ return ISC_R_SUCCESS;
+ }
+
+ dhcp_failover_link_reference (&state -> link_to_peer,
+ link, MDL);
+ status = (dhcp_failover_send_connectack
+ ((omapi_object_t *)link, state, 0, 0));
+ if (status != ISC_R_SUCCESS) {
+ dhcp_failover_link_dereference
+ (&state -> link_to_peer, MDL);
+ log_info ("dhcp_failover_send_connectack: %s",
+ isc_result_totext (status));
+ omapi_disconnect (link -> outer, 1);
+ return ISC_R_SUCCESS;
+ }
+ if (link -> imsg -> options_present & FTB_MAX_UNACKED)
+ state -> partner.max_flying_updates =
+ link -> imsg -> max_unacked;
+ if (link -> imsg -> options_present & FTB_RECEIVE_TIMER)
+ state -> partner.max_response_delay =
+ link -> imsg -> receive_timer;
+ state -> mclt = link -> imsg -> mclt;
+ dhcp_failover_send_state (state);
+ cancel_timeout (dhcp_failover_link_startup_timeout,
+ link);
+ } else if (link -> imsg -> type == FTM_CONNECTACK) {
+ const char *errmsg;
+ char errbuf[1024];
+ int reason;
+
+ cancel_timeout (dhcp_failover_link_startup_timeout,
+ link);
+
+ if (!(link->imsg->options_present &
+ FTB_RELATIONSHIP_NAME)) {
+ errmsg = "missing relationship-name";
+ reason = FTR_INVALID_PARTNER;
+ goto badconnectack;
+ }
+
+ if (link->imsg->options_present & FTB_REJECT_REASON) {
+ /* XXX: add message option to text output. */
+ log_error ("Failover CONNECT to %s rejected: %s",
+ state ? state->name : "unknown",
+ (dhcp_failover_reject_reason_print
+ (link -> imsg -> reject_reason)));
+ /* XXX print message from peer if peer sent message. */
+ omapi_disconnect (link -> outer, 1);
+ return ISC_R_SUCCESS;
+ }
+
+ if (!dhcp_failover_state_match_by_name(state,
+ &link->imsg->relationship_name)) {
+ /* XXX: Overflow results in log truncation, safe. */
+ snprintf(errbuf, sizeof(errbuf), "remote failover "
+ "relationship name %.*s does not match",
+ (int)link->imsg->relationship_name.count,
+ link->imsg->relationship_name.data);
+ errmsg = errbuf;
+ reason = FTR_INVALID_PARTNER;
+ badconnectack:
+ log_error("Failover CONNECTACK from %s: %s",
+ state->name, errmsg);
+ dhcp_failover_send_disconnect ((omapi_object_t *)link,
+ reason, errmsg);
+ omapi_disconnect (link -> outer, 0);
+ return ISC_R_SUCCESS;
+ }
+
+ if (state -> link_to_peer) {
+ errmsg = "already connected";
+ reason = FTR_DUP_CONNECTION;
+ goto badconnectack;
+ }
+
+ if ((cur_time > link -> imsg -> time &&
+ cur_time - link -> imsg -> time > 60) ||
+ (cur_time < link -> imsg -> time &&
+ link -> imsg -> time - cur_time > 60)) {
+ errmsg = "time offset too large";
+ reason = FTR_TIMEMISMATCH;
+ goto badconnectack;
+ }
+
+ dhcp_failover_link_reference (&state -> link_to_peer,
+ link, MDL);
+#if 0
+ /* XXX This is probably the right thing to do, but
+ XXX for release three, to make the smallest possible
+ XXX change, we are doing this when the peer state
+ XXX changes instead. */
+ if (state -> me.state == startup)
+ dhcp_failover_set_state (state,
+ state -> saved_state);
+ else
+#endif
+ dhcp_failover_send_state (state);
+
+ if (link -> imsg -> options_present & FTB_MAX_UNACKED)
+ state -> partner.max_flying_updates =
+ link -> imsg -> max_unacked;
+ if (link -> imsg -> options_present & FTB_RECEIVE_TIMER)
+ state -> partner.max_response_delay =
+ link -> imsg -> receive_timer;
+#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
+ log_info ("add_timeout +%d %s",
+ (int)state -> partner.max_response_delay / 3,
+ "dhcp_failover_send_contact");
+#endif
+ tv . tv_sec = cur_time +
+ (int)state -> partner.max_response_delay / 3;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_send_contact, state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
+ log_info ("add_timeout +%d %s",
+ (int)state -> me.max_response_delay,
+ "dhcp_failover_timeout");
+#endif
+ tv . tv_sec = cur_time +
+ (int)state -> me.max_response_delay;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_timeout, state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+ } else if (link -> imsg -> type == FTM_DISCONNECT) {
+ if (link -> imsg -> reject_reason) {
+ log_error ("Failover DISCONNECT from %s: %s",
+ state ? state->name : "unknown",
+ (dhcp_failover_reject_reason_print
+ (link -> imsg -> reject_reason)));
+ }
+ omapi_disconnect (link -> outer, 1);
+ } else if (link -> imsg -> type == FTM_BNDUPD) {
+ dhcp_failover_process_bind_update (state,
+ link -> imsg);
+ } else if (link -> imsg -> type == FTM_BNDACK) {
+ dhcp_failover_process_bind_ack (state, link -> imsg);
+ } else if (link -> imsg -> type == FTM_UPDREQ) {
+ dhcp_failover_process_update_request (state,
+ link -> imsg);
+ } else if (link -> imsg -> type == FTM_UPDREQALL) {
+ dhcp_failover_process_update_request_all
+ (state, link -> imsg);
+ } else if (link -> imsg -> type == FTM_UPDDONE) {
+ dhcp_failover_process_update_done (state,
+ link -> imsg);
+ } else if (link -> imsg -> type == FTM_POOLREQ) {
+ dhcp_failover_pool_reqbalance(state);
+ } else if (link -> imsg -> type == FTM_POOLRESP) {
+ log_info ("pool response: %ld leases",
+ (unsigned long)
+ link -> imsg -> addresses_transferred);
+ } else if (link -> imsg -> type == FTM_STATE) {
+ dhcp_failover_peer_state_changed (state,
+ link -> imsg);
+ }
+
+ /* Add a timeout so that if the partner doesn't send
+ another message for the maximum transmit idle time
+ plus a grace of one second, we close the
+ connection. */
+ if (state -> link_to_peer &&
+ state -> link_to_peer == link &&
+ state -> link_to_peer -> state != dhcp_flink_disconnected)
+ {
+#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
+ log_info ("add_timeout +%d %s",
+ (int)state -> me.max_response_delay,
+ "dhcp_failover_timeout");
+#endif
+ tv . tv_sec = cur_time +
+ (int)state -> me.max_response_delay;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_timeout, state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+
+ }
+ }
+
+ /* Handle all the events we care about... */
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
+ const char *name)
+{
+ isc_result_t status;
+
+ /* XXX Check these state transitions against the spec! */
+ if (!strcmp (name, "disconnect")) {
+ if (state -> link_to_peer) {
+ log_info ("peer %s: disconnected", state -> name);
+ if (state -> link_to_peer -> state_object)
+ dhcp_failover_state_dereference
+ (&state -> link_to_peer -> state_object, MDL);
+ dhcp_failover_link_dereference (&state -> link_to_peer,
+ MDL);
+ }
+ cancel_timeout (dhcp_failover_send_contact, state);
+ cancel_timeout (dhcp_failover_timeout, state);
+ cancel_timeout (dhcp_failover_startup_timeout, state);
+
+ switch (state -> me.state == startup ?
+ state -> saved_state : state -> me.state) {
+ /* In these situations, we remain in the current
+ * state, or if in startup enter those states.
+ */
+ case communications_interrupted:
+ case conflict_done:
+ case partner_down:
+ case paused:
+ case recover:
+ case recover_done:
+ case recover_wait:
+ case resolution_interrupted:
+ case shut_down:
+ /* Already in the right state? */
+ if (state -> me.state == startup)
+ return (dhcp_failover_set_state
+ (state, state -> saved_state));
+ return ISC_R_SUCCESS;
+
+ case potential_conflict:
+ return dhcp_failover_set_state
+ (state, resolution_interrupted);
+
+ case normal:
+ return dhcp_failover_set_state
+ (state, communications_interrupted);
+
+ case unknown_state:
+ return dhcp_failover_set_state
+ (state, resolution_interrupted);
+
+ default:
+ log_fatal("Impossible case at %s:%d.", MDL);
+ break; /* can't happen. */
+ }
+ } else if (!strcmp (name, "connect")) {
+ switch (state -> me.state) {
+ case communications_interrupted:
+ status = dhcp_failover_set_state (state, normal);
+ dhcp_failover_send_updates (state);
+ return status;
+
+ case resolution_interrupted:
+ return dhcp_failover_set_state (state,
+ potential_conflict);
+
+ case conflict_done:
+ case partner_down:
+ case potential_conflict:
+ case normal:
+ case recover:
+ case shut_down:
+ case paused:
+ case unknown_state:
+ case recover_done:
+ case startup:
+ case recover_wait:
+ return dhcp_failover_send_state (state);
+
+ default:
+ log_fatal("Impossible case at %s:%d.", MDL);
+ break;
+ }
+ } else if (!strcmp (name, "startup")) {
+ dhcp_failover_set_state (state, startup);
+ return ISC_R_SUCCESS;
+ } else if (!strcmp (name, "connect-timeout")) {
+ switch (state -> me.state) {
+ case communications_interrupted:
+ case partner_down:
+ case resolution_interrupted:
+ case paused:
+ case startup:
+ case shut_down:
+ case conflict_done:
+ return ISC_R_SUCCESS;
+
+ case normal:
+ case recover:
+ case recover_wait:
+ case recover_done:
+ case unknown_state:
+ return dhcp_failover_set_state
+ (state, communications_interrupted);
+
+ case potential_conflict:
+ return dhcp_failover_set_state
+ (state, resolution_interrupted);
+
+ default:
+ log_fatal("Impossible case at %s:%d.", MDL);
+ break;
+ }
+ }
+ return DHCP_R_INVALIDARG;
+}
+
+isc_result_t dhcp_failover_set_service_state (dhcp_failover_state_t *state)
+{
+ switch (state -> me.state) {
+ case unknown_state:
+ state -> service_state = not_responding;
+ state -> nrr = " (my state unknown)";
+ break;
+
+ case partner_down:
+ state -> service_state = service_partner_down;
+ state -> nrr = "";
+ break;
+
+ case normal:
+ state -> service_state = cooperating;
+ state -> nrr = "";
+ break;
+
+ case communications_interrupted:
+ state -> service_state = not_cooperating;
+ state -> nrr = "";
+ break;
+
+ case resolution_interrupted:
+ case potential_conflict:
+ case conflict_done:
+ state -> service_state = not_responding;
+ state -> nrr = " (resolving conflicts)";
+ break;
+
+ case recover:
+ state -> service_state = not_responding;
+ state -> nrr = " (recovering)";
+ break;
+
+ case shut_down:
+ state -> service_state = not_responding;
+ state -> nrr = " (shut down)";
+ break;
+
+ case paused:
+ state -> service_state = not_responding;
+ state -> nrr = " (paused)";
+ break;
+
+ case recover_wait:
+ state -> service_state = not_responding;
+ state -> nrr = " (recover wait)";
+ break;
+
+ case recover_done:
+ state -> service_state = not_responding;
+ state -> nrr = " (recover done)";
+ break;
+
+ case startup:
+ state -> service_state = service_startup;
+ state -> nrr = " (startup)";
+ break;
+
+ default:
+ log_fatal("Impossible case at %s:%d.\n", MDL);
+ break;
+ }
+
+ /* Some peer states can require us not to respond, even if our
+ state doesn't. */
+ /* XXX hm. I suspect this isn't true anymore. */
+ if (state -> service_state != not_responding) {
+ switch (state -> partner.state) {
+ case partner_down:
+ state -> service_state = not_responding;
+ state -> nrr = " (peer demands: recovering)";
+ break;
+
+ case potential_conflict:
+ case conflict_done:
+ case resolution_interrupted:
+ state -> service_state = not_responding;
+ state -> nrr = " (peer demands: resolving conflicts)";
+ break;
+
+ /* Other peer states don't affect our behaviour. */
+ default:
+ break;
+ }
+ }
+
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
+ enum failover_state new_state)
+{
+ enum failover_state saved_state;
+ TIME saved_stos;
+ struct pool *p;
+ struct shared_network *s;
+ struct lease *l;
+ struct timeval tv;
+
+ /* If we're in certain states where we're sending updates, and the peer
+ * state changes, we need to re-schedule any pending updates just to
+ * be on the safe side. This results in retransmission.
+ */
+ switch (state -> me.state) {
+ case normal:
+ case potential_conflict:
+ case partner_down:
+ if (state -> ack_queue_tail) {
+ struct lease *lp;
+
+ /* Zap the flags. */
+ for (lp = state -> ack_queue_head; lp; lp = lp -> next_pending)
+ lp -> flags = ((lp -> flags & ~ON_ACK_QUEUE) |
+ ON_UPDATE_QUEUE);
+
+ /* Now hook the ack queue to the beginning of the update
+ queue. */
+ if (state -> update_queue_head) {
+ lease_reference (&state -> ack_queue_tail -> next_pending,
+ state -> update_queue_head, MDL);
+ lease_dereference (&state -> update_queue_head, MDL);
+ }
+ lease_reference (&state -> update_queue_head,
+ state -> ack_queue_head, MDL);
+ if (!state -> update_queue_tail) {
+#if defined (POINTER_DEBUG)
+ if (state -> ack_queue_tail -> next_pending) {
+ log_error ("next pending on ack queue tail.");
+ abort ();
+ }
+#endif
+ lease_reference (&state -> update_queue_tail,
+ state -> ack_queue_tail, MDL);
+ }
+ lease_dereference (&state -> ack_queue_tail, MDL);
+ lease_dereference (&state -> ack_queue_head, MDL);
+ state -> cur_unacked_updates = 0;
+ }
+ /* We will re-queue a timeout later, if applicable. */
+ cancel_timeout (dhcp_failover_keepalive, state);
+ break;
+
+ default:
+ break;
+ }
+
+ /* Tentatively make the transition. */
+ saved_state = state -> me.state;
+ saved_stos = state -> me.stos;
+
+ /* Keep the old stos if we're going into recover_wait or if we're
+ coming into or out of startup. */
+ if (new_state != recover_wait && new_state != startup &&
+ saved_state != startup)
+ state -> me.stos = cur_time;
+
+ /* If we're in shutdown, peer is in partner_down, and we're moving
+ to recover, we can skip waiting for MCLT to expire. This happens
+ when a server is moved administratively into shutdown prior to
+ actually shutting down. Of course, if there are any updates
+ pending we can't actually do this. */
+ if (new_state == recover && saved_state == shut_down &&
+ state -> partner.state == partner_down &&
+ !state -> update_queue_head && !state -> ack_queue_head)
+ state -> me.stos = cur_time - state -> mclt;
+
+ state -> me.state = new_state;
+ if (new_state == startup && saved_state != startup)
+ state -> saved_state = saved_state;
+
+ /* If we can't record the new state, we can't make a state transition. */
+ if (!write_failover_state (state) || !commit_leases ()) {
+ log_error ("Unable to record current failover state for %s",
+ state -> name);
+ state -> me.state = saved_state;
+ state -> me.stos = saved_stos;
+ return ISC_R_IOERROR;
+ }
+
+ log_info ("failover peer %s: I move from %s to %s",
+ state -> name, dhcp_failover_state_name_print (saved_state),
+ dhcp_failover_state_name_print (state -> me.state));
+
+ /* If we were in startup and we just left it, cancel the timeout. */
+ if (new_state != startup && saved_state == startup)
+ cancel_timeout (dhcp_failover_startup_timeout, state);
+
+ /*
+ * If the state changes for any reason, cancel 'delayed auto state
+ * changes' (currently there is just the one).
+ */
+ cancel_timeout(dhcp_failover_auto_partner_down, state);
+
+ /* Set our service state. */
+ dhcp_failover_set_service_state (state);
+
+ /* Tell the peer about it. */
+ if (state -> link_to_peer)
+ dhcp_failover_send_state (state);
+
+ switch (new_state) {
+ case communications_interrupted:
+ /*
+ * There is an optional feature to automatically enter partner
+ * down after a timer expires, upon entering comms-interrupted.
+ * This feature is generally not safe except in specific
+ * circumstances.
+ *
+ * A zero value (also the default) disables it.
+ */
+ if (state->auto_partner_down == 0)
+ break;
+
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info("add_timeout +%lu dhcp_failover_auto_partner_down",
+ (unsigned long)state->auto_partner_down);
+#endif
+ tv.tv_sec = cur_time + state->auto_partner_down;
+ tv.tv_usec = 0;
+ add_timeout(&tv, dhcp_failover_auto_partner_down, state,
+ (tvref_t)omapi_object_reference,
+ (tvunref_t)omapi_object_dereference);
+ break;
+
+ case normal:
+ /* Upon entering normal state, the server is expected to retransmit
+ * all pending binding updates. This is a good opportunity to
+ * rebalance the pool (potentially making new pending updates),
+ * which also schedules the next pool rebalance.
+ */
+ dhcp_failover_pool_balance(state);
+ dhcp_failover_generate_update_queue(state, 0);
+
+ if (state->update_queue_tail != NULL) {
+ dhcp_failover_send_updates(state);
+ log_info("Sending updates to %s.", state->name);
+ }
+
+ break;
+
+ case potential_conflict:
+ if (state -> i_am == primary)
+ dhcp_failover_send_update_request (state);
+ break;
+
+ case startup:
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +15 %s",
+ "dhcp_failover_startup_timeout");
+#endif
+ tv . tv_sec = cur_time + 15;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_startup_timeout,
+ state,
+ (tvref_t)omapi_object_reference,
+ (tvunref_t)
+ omapi_object_dereference);
+ break;
+
+ /* If we come back in recover_wait and there's still waiting
+ to do, set a timeout. */
+ case recover_wait:
+ if (state -> me.stos + state -> mclt > cur_time) {
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +%d %s",
+ (int)(cur_time -
+ state -> me.stos + state -> mclt),
+ "dhcp_failover_startup_timeout");
+#endif
+ tv . tv_sec = (int)(state -> me.stos + state -> mclt);
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_recover_done,
+ state,
+ (tvref_t)omapi_object_reference,
+ (tvunref_t)
+ omapi_object_dereference);
+ } else
+ dhcp_failover_recover_done (state);
+ break;
+
+ case recover:
+ /* XXX: We're supposed to calculate if updreq or updreqall is
+ * needed. In theory, we should only have to updreqall if we
+ * are positive we lost our stable storage.
+ */
+ if (state -> link_to_peer)
+ dhcp_failover_send_update_request_all (state);
+ break;
+
+ case partner_down:
+ /* For every expired lease, set a timeout for it to become free. */
+ for (s = shared_networks; s; s = s -> next) {
+ for (p = s -> pools; p; p = p -> next) {
+ if (p -> failover_peer == state) {
+ for (l = p->expired ; l ; l = l->next) {
+ l->tsfp = state->me.stos + state->mclt;
+ l->sort_time = (l->tsfp > l->ends) ?
+ l->tsfp : l->ends;
+ }
+ if (p->expired &&
+ (p->expired->sort_time < p->next_event_time)) {
+
+ p->next_event_time = p->expired->sort_time;
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +%d %s",
+ (int)(cur_time - p->next_event_time),
+ "pool_timer");
+#endif
+ tv.tv_sec = p->next_event_time;
+ tv.tv_usec = 0;
+ add_timeout(&tv, pool_timer, p,
+ (tvref_t)pool_reference,
+ (tvunref_t)pool_dereference);
+ }
+ }
+ }
+ }
+ break;
+
+
+ default:
+ break;
+ }
+
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state,
+ failover_message_t *msg)
+{
+ enum failover_state previous_state = state -> partner.state;
+ enum failover_state new_state;
+ int startupp;
+
+ new_state = msg -> server_state;
+ startupp = (msg -> server_flags & FTF_SERVER_STARTUP) ? 1 : 0;
+
+ if (state -> partner.state == new_state && state -> me.state) {
+ switch (state -> me.state) {
+ case startup:
+ dhcp_failover_set_state (state, state -> saved_state);
+ return ISC_R_SUCCESS;
+
+ case unknown_state:
+ case normal:
+ case potential_conflict:
+ case recover_done:
+ case shut_down:
+ case paused:
+ case recover_wait:
+ return ISC_R_SUCCESS;
+
+ /* If we get a peer state change when we're
+ disconnected, we always process it. */
+ case partner_down:
+ case communications_interrupted:
+ case resolution_interrupted:
+ case recover:
+ case conflict_done:
+ break;
+
+ default:
+ log_fatal("Impossible case at %s:%d.", MDL);
+ break;
+ }
+ }
+
+ state -> partner.state = new_state;
+
+ log_info ("failover peer %s: peer moves from %s to %s",
+ state -> name,
+ dhcp_failover_state_name_print (previous_state),
+ dhcp_failover_state_name_print (state -> partner.state));
+
+ if (!write_failover_state (state) || !commit_leases ()) {
+ /* This is bad, but it's not fatal. Of course, if we
+ can't write to the lease database, we're not going to
+ get much done anyway. */
+ log_error ("Unable to record current failover state for %s",
+ state -> name);
+ }
+
+ /* Quickly validate the new state as being one of the 13 known
+ * states.
+ */
+ switch (new_state) {
+ case unknown_state:
+ case startup:
+ case normal:
+ case communications_interrupted:
+ case partner_down:
+ case potential_conflict:
+ case recover:
+ case paused:
+ case shut_down:
+ case recover_done:
+ case resolution_interrupted:
+ case conflict_done:
+ case recover_wait:
+ break;
+
+ default:
+ log_error("failover peer %s: Invalid state: %d", state->name,
+ new_state);
+ dhcp_failover_set_state(state, shut_down);
+ return ISC_R_SUCCESS;
+ }
+
+ /* Do any state transitions that are required as a result of the
+ peer's state transition. */
+
+ switch (state -> me.state == startup ?
+ state -> saved_state : state -> me.state) {
+ case normal:
+ switch (new_state) {
+ case normal:
+ dhcp_failover_state_pool_check (state);
+ break;
+
+ case partner_down:
+ if (state -> me.state == startup)
+ dhcp_failover_set_state (state, recover);
+ else
+ dhcp_failover_set_state (state,
+ potential_conflict);
+ break;
+
+ case potential_conflict:
+ case resolution_interrupted:
+ case conflict_done:
+ /* None of these transitions should ever occur. */
+ log_error("Peer %s: Invalid state transition %s "
+ "to %s.", state->name,
+ dhcp_failover_state_name_print(previous_state),
+ dhcp_failover_state_name_print(new_state));
+ dhcp_failover_set_state (state, shut_down);
+ break;
+
+ case recover:
+ case shut_down:
+ dhcp_failover_set_state (state, partner_down);
+ break;
+
+ case paused:
+ dhcp_failover_set_state (state,
+ communications_interrupted);
+ break;
+
+ default:
+ /* recover_wait, recover_done, unknown_state, startup,
+ * communications_interrupted
+ */
+ break;
+ }
+ break;
+
+ case recover:
+ switch (new_state) {
+ case recover:
+ log_info ("failover peer %s: requesting %s",
+ state -> name, "full update from peer");
+ /* Don't send updreqall if we're really in the
+ startup state, because that will result in two
+ being sent. */
+ if (state -> me.state == recover)
+ dhcp_failover_send_update_request_all (state);
+ break;
+
+ case potential_conflict:
+ case resolution_interrupted:
+ case conflict_done:
+ case normal:
+ dhcp_failover_set_state (state, potential_conflict);
+ break;
+
+ case partner_down:
+ case communications_interrupted:
+ /* We're supposed to send an update request at this
+ point. */
+ /* XXX we don't currently have code here to do any
+ XXX clever detection of when we should send an
+ XXX UPDREQALL message rather than an UPDREQ
+ XXX message. What to do, what to do? */
+ /* Currently when we enter recover state, no matter
+ * the reason, we send an UPDREQALL. So, it makes
+ * the most sense to stick to that until something
+ * better is done.
+ * Furthermore, we only want to send the update
+ * request if we are not in startup state.
+ */
+ if (state -> me.state == recover)
+ dhcp_failover_send_update_request_all (state);
+ break;
+
+ case shut_down:
+ /* XXX We're not explicitly told what to do in this
+ XXX case, but this transition is consistent with
+ XXX what is elsewhere in the draft. */
+ dhcp_failover_set_state (state, partner_down);
+ break;
+
+ /* We can't really do anything in this case. */
+ default:
+ /* paused, recover_done, recover_wait, unknown_state,
+ * startup.
+ */
+ break;
+ }
+ break;
+
+ case potential_conflict:
+ switch (new_state) {
+ case normal:
+ /* This is an illegal transition. */
+ log_error("Peer %s moves to normal during conflict "
+ "resolution - panic, shutting down.",
+ state->name);
+ dhcp_failover_set_state(state, shut_down);
+ break;
+
+ case conflict_done:
+ if (previous_state == potential_conflict)
+ dhcp_failover_send_update_request (state);
+ else
+ log_error("Peer %s: Unexpected move to "
+ "conflict-done.", state->name);
+ break;
+
+ case recover_done:
+ case recover_wait:
+ case potential_conflict:
+ case partner_down:
+ case communications_interrupted:
+ case resolution_interrupted:
+ case paused:
+ break;
+
+ case recover:
+ dhcp_failover_set_state (state, recover);
+ break;
+
+ case shut_down:
+ dhcp_failover_set_state (state, partner_down);
+ break;
+
+ default:
+ /* unknown_state, startup */
+ break;
+ }
+ break;
+
+ case conflict_done:
+ switch (new_state) {
+ case normal:
+ case shut_down:
+ dhcp_failover_set_state(state, new_state);
+ break;
+
+ default:
+ log_fatal("Peer %s: Invalid attempt to move from %s "
+ "to %s while local state is conflict-done.",
+ state->name,
+ dhcp_failover_state_name_print(previous_state),
+ dhcp_failover_state_name_print(new_state));
+ }
+ break;
+
+ case partner_down:
+ /* Take no action if other server is starting up. */
+ if (startupp)
+ break;
+
+ switch (new_state) {
+ /* This is where we should be. */
+ case recover:
+ case recover_wait:
+ break;
+
+ case recover_done:
+ dhcp_failover_set_state (state, normal);
+ break;
+
+ case normal:
+ case potential_conflict:
+ case partner_down:
+ case communications_interrupted:
+ case resolution_interrupted:
+ case conflict_done:
+ dhcp_failover_set_state (state, potential_conflict);
+ break;
+
+ default:
+ /* shut_down, paused, unknown_state, startup */
+ break;
+ }
+ break;
+
+ case communications_interrupted:
+ switch (new_state) {
+ case paused:
+ /* Stick with the status quo. */
+ break;
+
+ /* If we're in communications-interrupted and an
+ amnesic peer connects, go to the partner_down
+ state immediately. */
+ case recover:
+ dhcp_failover_set_state (state, partner_down);
+ break;
+
+ case normal:
+ case communications_interrupted:
+ case recover_done:
+ case recover_wait:
+ /* XXX so we don't need to do this specially in
+ XXX the CONNECT and CONNECTACK handlers. */
+ dhcp_failover_send_updates (state);
+ dhcp_failover_set_state (state, normal);
+ break;
+
+ case potential_conflict:
+ case partner_down:
+ case resolution_interrupted:
+ case conflict_done:
+ dhcp_failover_set_state (state, potential_conflict);
+ break;
+
+ case shut_down:
+ dhcp_failover_set_state (state, partner_down);
+ break;
+
+ default:
+ /* unknown_state, startup */
+ break;
+ }
+ break;
+
+ case resolution_interrupted:
+ switch (new_state) {
+ case normal:
+ case recover:
+ case potential_conflict:
+ case partner_down:
+ case communications_interrupted:
+ case resolution_interrupted:
+ case conflict_done:
+ case recover_done:
+ case recover_wait:
+ dhcp_failover_set_state (state, potential_conflict);
+ break;
+
+ case shut_down:
+ dhcp_failover_set_state (state, partner_down);
+ break;
+
+ default:
+ /* paused, unknown_state, startup */
+ break;
+ }
+ break;
+
+ /* Make no transitions while in recover_wait...just wait. */
+ case recover_wait:
+ break;
+
+ case recover_done:
+ switch (new_state) {
+ case recover_done:
+ log_error("Both servers have entered recover-done!");
+ case normal:
+ dhcp_failover_set_state (state, normal);
+ break;
+
+ case shut_down:
+ dhcp_failover_set_state (state, partner_down);
+ break;
+
+ default:
+ /* potential_conflict, partner_down,
+ * communications_interrupted, resolution_interrupted,
+ * paused, recover, recover_wait, unknown_state,
+ * startup.
+ */
+ break;
+ }
+ break;
+
+ /* We are essentially dead in the water when we're in
+ either shut_down or paused states, and do not do any
+ automatic state transitions. */
+ case shut_down:
+ case paused:
+ break;
+
+ /* XXX: Shouldn't this be a fatal condition? */
+ case unknown_state:
+ break;
+
+ default:
+ log_fatal("Impossible condition at %s:%d.", MDL);
+ break;
+
+ }
+
+ /* If we didn't make a transition out of startup as a result of
+ the peer's state change, do it now as a result of the fact that
+ we got a state change from the peer. */
+ if (state -> me.state == startup && state -> saved_state != startup)
+ dhcp_failover_set_state (state, state -> saved_state);
+
+ /* For now, just set the service state based on the peer's state
+ if necessary. */
+ dhcp_failover_set_service_state (state);
+
+ return ISC_R_SUCCESS;
+}
+
+/*
+ * Balance operation manual entry; startup, entrance to normal state. No
+ * sense sending a POOLREQ at this stage; the peer is likely about to schedule
+ * their own rebalance event upon entering normal themselves.
+ */
+static void
+dhcp_failover_pool_balance(dhcp_failover_state_t *state)
+{
+ /* Cancel pending event. */
+ cancel_timeout(dhcp_failover_pool_rebalance, state);
+ state->sched_balance = 0;
+
+ dhcp_failover_pool_dobalance(state, NULL);
+}
+
+/*
+ * Balance operation entry from timer event. Once per timer interval is
+ * the only time we want to emit POOLREQs (asserting an interrupt in our
+ * peer).
+ */
+void
+dhcp_failover_pool_rebalance(void *failover_state)
+{
+ dhcp_failover_state_t *state;
+ isc_boolean_t sendreq = ISC_FALSE;
+
+ state = (dhcp_failover_state_t *)failover_state;
+
+ /* Clear scheduled event indicator. */
+ state->sched_balance = 0;
+
+ if (dhcp_failover_pool_dobalance(state, &sendreq))
+ dhcp_failover_send_updates(state);
+
+ if (sendreq)
+ dhcp_failover_send_poolreq(state);
+}
+
+/*
+ * Balance operation entry from POOLREQ protocol message. Do not permit a
+ * POOLREQ to send back a POOLREQ. Ping pong.
+ */
+static void
+dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state)
+{
+ int queued;
+
+ /* Cancel pending event. */
+ cancel_timeout(dhcp_failover_pool_rebalance, state);
+ state->sched_balance = 0;
+
+ queued = dhcp_failover_pool_dobalance(state, NULL);
+
+ dhcp_failover_send_poolresp(state, queued);
+
+ if (queued)
+ dhcp_failover_send_updates(state);
+ else
+ log_info("peer %s: Got POOLREQ, answering negatively! "
+ "Peer may be out of leases or database inconsistent.",
+ state->name);
+}
+
+/*
+ * Do the meat of the work common to all forms of pool rebalance. If the
+ * caller deems it appropriate to transmit POOLREQ messages, it can use the
+ * sendreq pointer to pass in the address of a FALSE value which this function
+ * will conditionally turn TRUE if a POOLREQ is determined to be necessary.
+ * A NULL value may be passed, in which case no action is taken.
+ */
+static int
+dhcp_failover_pool_dobalance(dhcp_failover_state_t *state,
+ isc_boolean_t *sendreq)
+{
+ int lts, total, thresh, hold, panic, pass;
+ int leases_queued = 0;
+ struct lease *lp = (struct lease *)0;
+ struct lease *next = (struct lease *)0;
+ struct shared_network *s;
+ struct pool *p;
+ binding_state_t peer_lease_state;
+ binding_state_t my_lease_state;
+ struct lease **lq;
+ int (*log_func)(const char *, ...);
+ const char *result, *reqlog;
+
+ if (state -> me.state != normal)
+ return 0;
+
+ state->last_balance = cur_time;
+
+ for (s = shared_networks ; s ; s = s->next) {
+ for (p = s->pools ; p ; p = p->next) {
+ if (p->failover_peer != state)
+ continue;
+
+ /* Right now we're giving the peer half of the free leases.
+ If we have more leases than the peer (i.e., more than
+ half), then the number of leases we have, less the number
+ of leases the peer has, will be how many more leases we
+ have than the peer has. So if we send half that number
+ to the peer, we should be even. */
+ if (p->failover_peer->i_am == primary) {
+ lts = (p->free_leases - p->backup_leases) / 2;
+ peer_lease_state = FTS_BACKUP;
+ my_lease_state = FTS_FREE;
+ lq = &p->free;
+ } else {
+ lts = (p->backup_leases - p->free_leases) / 2;
+ peer_lease_state = FTS_FREE;
+ my_lease_state = FTS_BACKUP;
+ lq = &p->backup;
+ }
+
+ total = p->backup_leases + p->free_leases;
+
+ thresh = ((total * state->max_lease_misbalance) + 50) / 100;
+ hold = ((total * state->max_lease_ownership) + 50) / 100;
+
+ /*
+ * If we need leases (so lts is negative) more than negative
+ * double the thresh%, panic and send poolreq to hopefully wake
+ * up the peer (but more likely the db is inconsistent). But,
+ * if this comes out zero, switch to -1 so that the POOLREQ is
+ * sent on lts == -2 rather than right away at -1.
+ *
+ * Note that we do not subtract -1 from panic all the time
+ * because thresh% and hold% may come out to the same number,
+ * and that is correct operation...where thresh% and hold% are
+ * both -1, we want to send poolreq when lts reaches -3. So,
+ * "-3 < -2", lts < panic.
+ */
+ panic = thresh * -2;
+
+ if (panic == 0)
+ panic = -1;
+
+ if ((sendreq != NULL) && (lts < panic)) {
+ reqlog = " (requesting peer rebalance!)";
+ *sendreq = ISC_TRUE;
+ } else
+ reqlog = "";
+
+ log_info("balancing pool %lx %s total %d free %d "
+ "backup %d lts %d max-own (+/-)%d%s",
+ (unsigned long)p,
+ (p->shared_network ?
+ p->shared_network->name : ""), p->lease_count,
+ p->free_leases, p->backup_leases, lts, hold,
+ reqlog);
+
+ /* In the first pass, try to allocate leases to the
+ * peer which it would normally be responsible for (if
+ * the lease has a hardware address or client-identifier,
+ * and the load-balance-algorithm chooses the peer to
+ * answer that address), up to a hold% excess in the peer's
+ * favor. In the second pass, just send the oldest (first
+ * on the list) leases up to a hold% excess in our favor.
+ *
+ * This could make for additional pool rebalance
+ * events, but preserving MAC possession should be
+ * worth it.
+ */
+ pass = 0;
+ lease_reference(&lp, *lq, MDL);
+
+ while (lp) {
+ if (next)
+ lease_dereference(&next, MDL);
+ if (lp->next)
+ lease_reference(&next, lp->next, MDL);
+
+ /*
+ * Stop if the pool is 'balanced enough.'
+ *
+ * The pool is balanced enough if:
+ *
+ * 1) We're on the first run through and the peer has
+ * its fair share of leases already (lts reaches
+ * -hold).
+ * 2) We're on the second run through, we are shifting
+ * never-used leases, and there is a perfectly even
+ * balance (lts reaches zero).
+ * 3) Second run through, we are shifting previously
+ * used leases, and the local system has its fair
+ * share but no more (lts reaches hold).
+ *
+ * Note that this is implemented below in 3,2,1 order.
+ */
+ if (pass) {
+ if (lp->ends) {
+ if (lts <= hold)
+ break;
+ } else {
+ if (lts <= 0)
+ break;
+ }
+ } else if (lts <= -hold)
+ break;
+
+ if (pass || peer_wants_lease(lp)) {
+ --lts;
+ ++leases_queued;
+ lp->next_binding_state = peer_lease_state;
+ lp->tstp = cur_time;
+ lp->starts = cur_time;
+
+ if (!supersede_lease(lp, NULL, 0, 1, 0) ||
+ !write_lease(lp))
+ log_error("can't commit lease %s on "
+ "giveaway", piaddr(lp->ip_addr));
+ }
+
+ lease_dereference(&lp, MDL);
+ if (next)
+ lease_reference(&lp, next, MDL);
+ else if (!pass) {
+ pass = 1;
+ lease_reference(&lp, *lq, MDL);
+ }
+ }
+
+ if (next)
+ lease_dereference(&next, MDL);
+ if (lp)
+ lease_dereference(&lp, MDL);
+
+ if (lts > thresh) {
+ result = "IMBALANCED";
+ log_func = log_error;
+ } else {
+ result = "balanced";
+ log_func = log_info;
+ }
+
+ log_func("%s pool %lx %s total %d free %d backup %d "
+ "lts %d max-misbal %d", result, (unsigned long)p,
+ (p->shared_network ?
+ p->shared_network->name : ""), p->lease_count,
+ p->free_leases, p->backup_leases, lts, thresh);
+
+ /* Recalculate next rebalance event timer. */
+ dhcp_failover_pool_check(p);
+ }
+ }
+
+ if (leases_queued)
+ commit_leases();
+
+ return leases_queued;
+}
+
+/* dhcp_failover_pool_check: Called whenever FREE or BACKUP leases change
+ * states, on both servers. Check the scheduled time to rebalance the pool
+ * and lower it if applicable.
+ */
+void
+dhcp_failover_pool_check(struct pool *pool)
+{
+ dhcp_failover_state_t *peer;
+ TIME est1, est2;
+ struct timeval tv;
+
+ peer = pool->failover_peer;
+
+ if(!peer || peer->me.state != normal)
+ return;
+
+ /* Estimate the time left until lease exhaustion.
+ * The first lease on the backup or free lists is also the oldest
+ * lease. It is reasonable to guess that it will take at least
+ * as much time for a pool to run out of leases, as the present
+ * age of the oldest lease (seconds since it expired).
+ *
+ * Note that this isn't so sane of an assumption if the oldest
+ * lease is a virgin (ends = 0), we wind up sending this against
+ * the max_balance bounds check.
+ */
+ if(pool->free && pool->free->ends < cur_time)
+ est1 = cur_time - pool->free->ends;
+ else
+ est1 = 0;
+
+ if(pool->backup && pool->backup->ends < cur_time)
+ est2 = cur_time - pool->backup->ends;
+ else
+ est2 = 0;
+
+ /* We don't want to schedule rebalance for when we think we'll run
+ * out of leases, we want to schedule the rebalance for when we think
+ * the disparity will be 'large enough' to warrant action.
+ */
+ est1 = ((est1 * peer->max_lease_misbalance) + 50) / 100;
+ est2 = ((est2 * peer->max_lease_misbalance) + 50) / 100;
+
+ /* Guess when the local system will begin issuing POOLREQ panic
+ * attacks because "max_lease_misbalance*2" has been exceeded.
+ */
+ if(peer->i_am == primary)
+ est1 *= 2;
+ else
+ est2 *= 2;
+
+ /* Select the smallest time. */
+ if(est1 > est2)
+ est1 = est2;
+
+ /* Bounded by the maximum configured value. */
+ if(est1 > peer->max_balance)
+ est1 = peer->max_balance;
+
+ /* Project this time into the future. */
+ est1 += cur_time;
+
+ /* Do not move the time down under the minimum. */
+ est2 = peer->last_balance + peer->min_balance;
+ if(peer->last_balance && (est1 < est2))
+ est1 = est2;
+
+ /* Introduce a random delay. */
+ est1 += random() % 5;
+
+ /* Do not move the time forward, or reset to the same time. */
+ if(peer->sched_balance) {
+ if (est1 >= peer->sched_balance)
+ return;
+
+ /* We are about to schedule the time down, cancel the
+ * current timeout.
+ */
+ cancel_timeout(dhcp_failover_pool_rebalance, peer);
+ }
+
+ /* The time is different, and lower, use it. */
+ peer->sched_balance = est1;
+
+#if defined(DEBUG_FAILOVER_TIMING)
+ log_info("add_timeout +%d dhcp_failover_pool_rebalance",
+ (int)(est1 - cur_time));
+#endif
+ tv.tv_sec = est1;
+ tv.tv_usec = 0;
+ add_timeout(&tv, dhcp_failover_pool_rebalance, peer,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+}
+
+int dhcp_failover_state_pool_check (dhcp_failover_state_t *state)
+{
+ struct shared_network *s;
+ struct pool *p;
+
+ for (s = shared_networks; s; s = s -> next) {
+ for (p = s -> pools; p; p = p -> next) {
+ if (p -> failover_peer != state)
+ continue;
+ dhcp_failover_pool_check (p);
+ }
+ }
+ return 0;
+}
+
+isc_result_t dhcp_failover_send_updates (dhcp_failover_state_t *state)
+{
+ struct lease *lp = (struct lease *)0;
+ isc_result_t status;
+
+ /* Can't update peer if we're not talking to it! */
+ if (!state -> link_to_peer)
+ return ISC_R_SUCCESS;
+
+ /* If there are acks pending, transmit them prior to potentially
+ * sending new updates for the same lease.
+ */
+ if (state->toack_queue_head != NULL)
+ dhcp_failover_send_acks(state);
+
+ while ((state -> partner.max_flying_updates >
+ state -> cur_unacked_updates) && state -> update_queue_head) {
+ /* Grab the head of the update queue. */
+ lease_reference (&lp, state -> update_queue_head, MDL);
+
+ /* Send the update to the peer. */
+ status = dhcp_failover_send_bind_update (state, lp);
+ if (status != ISC_R_SUCCESS) {
+ lease_dereference (&lp, MDL);
+ return status;
+ }
+ lp -> flags &= ~ON_UPDATE_QUEUE;
+
+ /* Take it off the head of the update queue and put the next
+ item in the update queue at the head. */
+ lease_dereference (&state -> update_queue_head, MDL);
+ if (lp -> next_pending) {
+ lease_reference (&state -> update_queue_head,
+ lp -> next_pending, MDL);
+ lease_dereference (&lp -> next_pending, MDL);
+ } else {
+ lease_dereference (&state -> update_queue_tail, MDL);
+ }
+
+ if (state -> ack_queue_head) {
+ lease_reference
+ (&state -> ack_queue_tail -> next_pending,
+ lp, MDL);
+ lease_dereference (&state -> ack_queue_tail, MDL);
+ } else {
+ lease_reference (&state -> ack_queue_head, lp, MDL);
+ }
+#if defined (POINTER_DEBUG)
+ if (lp -> next_pending) {
+ log_error ("ack_queue_tail: lp -> next_pending");
+ abort ();
+ }
+#endif
+ lease_reference (&state -> ack_queue_tail, lp, MDL);
+ lp -> flags |= ON_ACK_QUEUE;
+ lease_dereference (&lp, MDL);
+
+ /* Count the object as an unacked update. */
+ state -> cur_unacked_updates++;
+ }
+ return ISC_R_SUCCESS;
+}
+
+/* Queue an update for a lease. Always returns 1 at this point - it's
+ not an error for this to be called on a lease for which there's no
+ failover peer. */
+
+int dhcp_failover_queue_update (struct lease *lease, int immediate)
+{
+ dhcp_failover_state_t *state;
+
+ if (!lease -> pool ||
+ !lease -> pool -> failover_peer)
+ return 1;
+
+ /* If it's already on the update queue, leave it there. */
+ if (lease -> flags & ON_UPDATE_QUEUE)
+ return 1;
+
+ /* Get the failover state structure for this lease. */
+ state = lease -> pool -> failover_peer;
+
+ /* If it's on the ack queue, take it off. */
+ if (lease -> flags & ON_ACK_QUEUE)
+ dhcp_failover_ack_queue_remove (state, lease);
+
+ if (state -> update_queue_head) {
+ lease_reference (&state -> update_queue_tail -> next_pending,
+ lease, MDL);
+ lease_dereference (&state -> update_queue_tail, MDL);
+ } else {
+ lease_reference (&state -> update_queue_head, lease, MDL);
+ }
+#if defined (POINTER_DEBUG)
+ if (lease -> next_pending) {
+ log_error ("next pending on update queue lease.");
+#if defined (DEBUG_RC_HISTORY)
+ dump_rc_history (lease);
+#endif
+ abort ();
+ }
+#endif
+ lease_reference (&state -> update_queue_tail, lease, MDL);
+ lease -> flags |= ON_UPDATE_QUEUE;
+ if (immediate)
+ dhcp_failover_send_updates (state);
+ return 1;
+}
+
+int dhcp_failover_send_acks (dhcp_failover_state_t *state)
+{
+ failover_message_t *msg = (failover_message_t *)0;
+
+ /* Must commit all leases prior to acking them. */
+ if (!commit_leases ())
+ return 0;
+
+ while (state -> toack_queue_head) {
+ failover_message_reference
+ (&msg, state -> toack_queue_head, MDL);
+ failover_message_dereference
+ (&state -> toack_queue_head, MDL);
+ if (msg -> next) {
+ failover_message_reference
+ (&state -> toack_queue_head, msg -> next, MDL);
+ }
+
+ dhcp_failover_send_bind_ack (state, msg, 0, (const char *)0);
+
+ failover_message_dereference (&msg, MDL);
+ }
+
+ if (state -> toack_queue_tail)
+ failover_message_dereference (&state -> toack_queue_tail, MDL);
+ state -> pending_acks = 0;
+
+ return 1;
+}
+
+void dhcp_failover_toack_queue_timeout (void *vs)
+{
+ dhcp_failover_state_t *state = vs;
+
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("dhcp_failover_toack_queue_timeout");
+#endif
+
+ dhcp_failover_send_acks (state);
+}
+
+/* Queue an ack for a message. There is currently no way to queue a
+ negative ack -- these need to be sent directly. */
+
+int dhcp_failover_queue_ack (dhcp_failover_state_t *state,
+ failover_message_t *msg)
+{
+ struct timeval tv;
+
+ if (state -> toack_queue_head) {
+ failover_message_reference
+ (&state -> toack_queue_tail -> next, msg, MDL);
+ failover_message_dereference (&state -> toack_queue_tail, MDL);
+ } else {
+ failover_message_reference (&state -> toack_queue_head,
+ msg, MDL);
+ }
+ failover_message_reference (&state -> toack_queue_tail, msg, MDL);
+
+ state -> pending_acks++;
+
+ /* Flush the toack queue whenever we exceed half the number of
+ allowed unacked updates. */
+ if (state -> pending_acks >= state -> partner.max_flying_updates / 2) {
+ dhcp_failover_send_acks (state);
+ }
+
+ /* Schedule a timeout to flush the ack queue. */
+ if (state -> pending_acks > 0) {
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +2 %s",
+ "dhcp_failover_toack_queue_timeout");
+#endif
+ tv . tv_sec = cur_time + 2;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_toack_queue_timeout, state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+ }
+
+ return 1;
+}
+
+void dhcp_failover_ack_queue_remove (dhcp_failover_state_t *state,
+ struct lease *lease)
+{
+ struct lease *lp;
+
+ if (!(lease -> flags & ON_ACK_QUEUE))
+ return;
+
+ if (state -> ack_queue_head == lease) {
+ lease_dereference (&state -> ack_queue_head, MDL);
+ if (lease -> next_pending) {
+ lease_reference (&state -> ack_queue_head,
+ lease -> next_pending, MDL);
+ lease_dereference (&lease -> next_pending, MDL);
+ } else {
+ lease_dereference (&state -> ack_queue_tail, MDL);
+ }
+ } else {
+ for (lp = state -> ack_queue_head;
+ lp && lp -> next_pending != lease;
+ lp = lp -> next_pending)
+ ;
+
+ if (!lp)
+ return;
+
+ lease_dereference (&lp -> next_pending, MDL);
+ if (lease -> next_pending) {
+ lease_reference (&lp -> next_pending,
+ lease -> next_pending, MDL);
+ lease_dereference (&lease -> next_pending, MDL);
+ } else {
+ lease_dereference (&state -> ack_queue_tail, MDL);
+ if (lp -> next_pending) {
+ log_error ("state -> ack_queue_tail");
+ abort ();
+ }
+ lease_reference (&state -> ack_queue_tail, lp, MDL);
+ }
+ }
+
+ lease -> flags &= ~ON_ACK_QUEUE;
+ /* Multiple acks on one XID is an error and may cause badness. */
+ lease->last_xid = 0;
+ /* XXX: this violates draft-failover. We can't send another
+ * update just because we forgot about an old one that hasn't
+ * been acked yet.
+ */
+ state -> cur_unacked_updates--;
+
+ /*
+ * When updating leases as a result of an ack, we defer the commit
+ * for performance reasons. When there are no more acks pending,
+ * do a commit.
+ */
+ if (state -> cur_unacked_updates == 0) {
+ commit_leases();
+ }
+}
+
+isc_result_t dhcp_failover_state_set_value (omapi_object_t *h,
+ omapi_object_t *id,
+ omapi_data_string_t *name,
+ omapi_typed_data_t *value)
+{
+ isc_result_t status;
+
+ if (h -> type != dhcp_type_failover_state)
+ return DHCP_R_INVALIDARG;
+
+ /* This list of successful returns is completely wrong, but the
+ fastest way to make dhcpctl do something vaguely sane when
+ you try to change the local state. */
+
+ if (!omapi_ds_strcmp (name, "name")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "partner-address")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "local-address")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "partner-port")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "local-port")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "max-outstanding-updates")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "mclt")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "load-balance-max-secs")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "load-balance-hba")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "partner-state")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "local-state")) {
+ unsigned long l;
+ status = omapi_get_int_value (&l, value);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ return dhcp_failover_set_state ((dhcp_failover_state_t *)h, l);
+ } else if (!omapi_ds_strcmp (name, "partner-stos")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "local-stos")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "hierarchy")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "last-packet-sent")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "last-timestamp-received")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "skew")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "max-response-delay")) {
+ return ISC_R_SUCCESS;
+ } else if (!omapi_ds_strcmp (name, "cur-unacked-updates")) {
+ return ISC_R_SUCCESS;
+ }
+
+ if (h -> inner && h -> inner -> type -> set_value)
+ return (*(h -> inner -> type -> set_value))
+ (h -> inner, id, name, value);
+ return ISC_R_NOTFOUND;
+}
+
+void dhcp_failover_keepalive (void *vs)
+{
+}
+
+void dhcp_failover_reconnect (void *vs)
+{
+ dhcp_failover_state_t *state = vs;
+ isc_result_t status;
+ struct timeval tv;
+
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("dhcp_failover_reconnect");
+#endif
+ /* If we already connected the other way, let the connection
+ recovery code initiate any retry that may be required. */
+ if (state -> link_to_peer)
+ return;
+
+ status = dhcp_failover_link_initiate ((omapi_object_t *)state);
+ if (status != ISC_R_SUCCESS && status != DHCP_R_INCOMPLETE) {
+ log_info ("failover peer %s: %s", state -> name,
+ isc_result_totext (status));
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info("add_timeout +90 dhcp_failover_reconnect");
+#endif
+ tv . tv_sec = cur_time + 90;
+ tv . tv_usec = 0;
+ add_timeout(&tv, dhcp_failover_reconnect, state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+ }
+}
+
+void dhcp_failover_startup_timeout (void *vs)
+{
+ dhcp_failover_state_t *state = vs;
+
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("dhcp_failover_startup_timeout");
+#endif
+
+ dhcp_failover_state_transition (state, "disconnect");
+}
+
+void dhcp_failover_link_startup_timeout (void *vl)
+{
+ dhcp_failover_link_t *link = vl;
+ omapi_object_t *p;
+
+ for (p = (omapi_object_t *)link; p -> inner; p = p -> inner)
+ ;
+ for (; p; p = p -> outer)
+ if (p -> type == omapi_type_connection)
+ break;
+ if (p) {
+ log_info ("failover: link startup timeout");
+ omapi_disconnect (p, 1);
+ }
+}
+
+void dhcp_failover_listener_restart (void *vs)
+{
+ dhcp_failover_state_t *state = vs;
+ isc_result_t status;
+ struct timeval tv;
+
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("dhcp_failover_listener_restart");
+#endif
+
+ status = dhcp_failover_listen ((omapi_object_t *)state);
+ if (status != ISC_R_SUCCESS) {
+ log_info ("failover peer %s: %s", state -> name,
+ isc_result_totext (status));
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +90 %s",
+ "dhcp_failover_listener_restart");
+#endif
+ tv . tv_sec = cur_time + 90;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_listener_restart, state,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+ }
+}
+
+void
+dhcp_failover_auto_partner_down(void *vs)
+{
+ dhcp_failover_state_t *state = vs;
+
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info("dhcp_failover_auto_partner_down");
+#endif
+
+ dhcp_failover_set_state(state, partner_down);
+}
+
+isc_result_t dhcp_failover_state_get_value (omapi_object_t *h,
+ omapi_object_t *id,
+ omapi_data_string_t *name,
+ omapi_value_t **value)
+{
+ dhcp_failover_state_t *s;
+ struct option_cache *oc;
+ struct data_string ds;
+ isc_result_t status;
+
+ if (h -> type != dhcp_type_failover_state)
+ return DHCP_R_INVALIDARG;
+ s = (dhcp_failover_state_t *)h;
+
+ if (!omapi_ds_strcmp (name, "name")) {
+ if (s -> name)
+ return omapi_make_string_value (value,
+ name, s -> name, MDL);
+ return ISC_R_NOTFOUND;
+ } else if (!omapi_ds_strcmp (name, "partner-address")) {
+ oc = s -> partner.address;
+ getaddr:
+ memset (&ds, 0, sizeof ds);
+ if (!evaluate_option_cache (&ds, (struct packet *)0,
+ (struct lease *)0,
+ (struct client_state *)0,
+ (struct option_state *)0,
+ (struct option_state *)0,
+ &global_scope, oc, MDL)) {
+ return ISC_R_NOTFOUND;
+ }
+ status = omapi_make_const_value (value,
+ name, ds.data, ds.len, MDL);
+ /* Disgusting kludge: */
+ if (oc == s -> me.address && !s -> server_identifier.len)
+ data_string_copy (&s -> server_identifier, &ds, MDL);
+ data_string_forget (&ds, MDL);
+ return status;
+ } else if (!omapi_ds_strcmp (name, "local-address")) {
+ oc = s -> me.address;
+ goto getaddr;
+ } else if (!omapi_ds_strcmp (name, "partner-port")) {
+ return omapi_make_int_value (value, name,
+ s -> partner.port, MDL);
+ } else if (!omapi_ds_strcmp (name, "local-port")) {
+ return omapi_make_int_value (value,
+ name, s -> me.port, MDL);
+ } else if (!omapi_ds_strcmp (name, "max-outstanding-updates")) {
+ return omapi_make_uint_value (value, name,
+ s -> me.max_flying_updates,
+ MDL);
+ } else if (!omapi_ds_strcmp (name, "mclt")) {
+ return omapi_make_uint_value (value, name, s -> mclt, MDL);
+ } else if (!omapi_ds_strcmp (name, "load-balance-max-secs")) {
+ return omapi_make_int_value (value, name,
+ s -> load_balance_max_secs, MDL);
+ } else if (!omapi_ds_strcmp (name, "load-balance-hba")) {
+ if (s -> hba)
+ return omapi_make_const_value (value, name,
+ s -> hba, 32, MDL);
+ return ISC_R_NOTFOUND;
+ } else if (!omapi_ds_strcmp (name, "partner-state")) {
+ return omapi_make_uint_value (value, name,
+ s -> partner.state, MDL);
+ } else if (!omapi_ds_strcmp (name, "local-state")) {
+ return omapi_make_uint_value (value, name,
+ s -> me.state, MDL);
+ } else if (!omapi_ds_strcmp (name, "partner-stos")) {
+ return omapi_make_int_value (value, name,
+ s -> partner.stos, MDL);
+ } else if (!omapi_ds_strcmp (name, "local-stos")) {
+ return omapi_make_int_value (value, name,
+ s -> me.stos, MDL);
+ } else if (!omapi_ds_strcmp (name, "hierarchy")) {
+ return omapi_make_uint_value (value, name, s -> i_am, MDL);
+ } else if (!omapi_ds_strcmp (name, "last-packet-sent")) {
+ return omapi_make_int_value (value, name,
+ s -> last_packet_sent, MDL);
+ } else if (!omapi_ds_strcmp (name, "last-timestamp-received")) {
+ return omapi_make_int_value (value, name,
+ s -> last_timestamp_received,
+ MDL);
+ } else if (!omapi_ds_strcmp (name, "skew")) {
+ return omapi_make_int_value (value, name, s -> skew, MDL);
+ } else if (!omapi_ds_strcmp (name, "max-response-delay")) {
+ return omapi_make_uint_value (value, name,
+ s -> me.max_response_delay,
+ MDL);
+ } else if (!omapi_ds_strcmp (name, "cur-unacked-updates")) {
+ return omapi_make_int_value (value, name,
+ s -> cur_unacked_updates, MDL);
+ }
+
+ if (h -> inner && h -> inner -> type -> get_value)
+ return (*(h -> inner -> type -> get_value))
+ (h -> inner, id, name, value);
+ return ISC_R_NOTFOUND;
+}
+
+isc_result_t dhcp_failover_state_destroy (omapi_object_t *h,
+ const char *file, int line)
+{
+ dhcp_failover_state_t *s;
+
+ if (h -> type != dhcp_type_failover_state)
+ return DHCP_R_INVALIDARG;
+ s = (dhcp_failover_state_t *)h;
+
+ if (s -> link_to_peer)
+ dhcp_failover_link_dereference (&s -> link_to_peer, file, line);
+ if (s -> name) {
+ dfree (s -> name, MDL);
+ s -> name = (char *)0;
+ }
+ if (s -> partner.address)
+ option_cache_dereference (&s -> partner.address, file, line);
+ if (s -> me.address)
+ option_cache_dereference (&s -> me.address, file, line);
+ if (s -> hba) {
+ dfree (s -> hba, file, line);
+ s -> hba = (u_int8_t *)0;
+ }
+ if (s -> update_queue_head)
+ lease_dereference (&s -> update_queue_head, file, line);
+ if (s -> update_queue_tail)
+ lease_dereference (&s -> update_queue_tail, file, line);
+ if (s -> ack_queue_head)
+ lease_dereference (&s -> ack_queue_head, file, line);
+ if (s -> ack_queue_tail)
+ lease_dereference (&s -> ack_queue_tail, file, line);
+ if (s -> send_update_done)
+ lease_dereference (&s -> send_update_done, file, line);
+ if (s -> toack_queue_head)
+ failover_message_dereference (&s -> toack_queue_head,
+ file, line);
+ if (s -> toack_queue_tail)
+ failover_message_dereference (&s -> toack_queue_tail,
+ file, line);
+ return ISC_R_SUCCESS;
+}
+
+/* Write all the published values associated with the object through the
+ specified connection. */
+
+isc_result_t dhcp_failover_state_stuff (omapi_object_t *c,
+ omapi_object_t *id,
+ omapi_object_t *h)
+{
+ dhcp_failover_state_t *s;
+ omapi_connection_object_t *conn;
+ isc_result_t status;
+
+ if (c -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+ conn = (omapi_connection_object_t *)c;
+
+ if (h -> type != dhcp_type_failover_state)
+ return DHCP_R_INVALIDARG;
+ s = (dhcp_failover_state_t *)h;
+
+ status = omapi_connection_put_name (c, "name");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_string (c, s -> name);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "partner-address");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof s -> partner.address);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_copyin (c, (u_int8_t *)&s -> partner.address,
+ sizeof s -> partner.address);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "partner-port");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, (u_int32_t)s -> partner.port);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "local-address");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof s -> me.address);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_copyin (c, (u_int8_t *)&s -> me.address,
+ sizeof s -> me.address);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "local-port");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, (u_int32_t)s -> me.port);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "max-outstanding-updates");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c,
+ s -> me.max_flying_updates);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "mclt");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, s -> mclt);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "load-balance-max-secs");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = (omapi_connection_put_uint32
+ (c, (u_int32_t)s -> load_balance_max_secs));
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+
+ if (s -> hba) {
+ status = omapi_connection_put_name (c, "load-balance-hba");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, 32);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_copyin (c, s -> hba, 32);
+ if (status != ISC_R_SUCCESS)
+ return status;
+ }
+
+ status = omapi_connection_put_name (c, "partner-state");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, s -> partner.state);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "local-state");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, s -> me.state);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "partner-stos");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c,
+ (u_int32_t)s -> partner.stos);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "local-stos");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, (u_int32_t)s -> me.stos);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "hierarchy");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, s -> i_am);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "last-packet-sent");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = (omapi_connection_put_uint32
+ (c, (u_int32_t)s -> last_packet_sent));
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "last-timestamp-received");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = (omapi_connection_put_uint32
+ (c, (u_int32_t)s -> last_timestamp_received));
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "skew");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, (u_int32_t)s -> skew);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "max-response-delay");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = (omapi_connection_put_uint32
+ (c, (u_int32_t)s -> me.max_response_delay));
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ status = omapi_connection_put_name (c, "cur-unacked-updates");
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = omapi_connection_put_uint32 (c, sizeof (u_int32_t));
+ if (status != ISC_R_SUCCESS)
+ return status;
+ status = (omapi_connection_put_uint32
+ (c, (u_int32_t)s -> cur_unacked_updates));
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ if (h -> inner && h -> inner -> type -> stuff_values)
+ return (*(h -> inner -> type -> stuff_values)) (c, id,
+ h -> inner);
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t dhcp_failover_state_lookup (omapi_object_t **sp,
+ omapi_object_t *id,
+ omapi_object_t *ref)
+{
+ omapi_value_t *tv = (omapi_value_t *)0;
+ isc_result_t status;
+ dhcp_failover_state_t *s;
+
+ if (!ref)
+ return DHCP_R_NOKEYS;
+
+ /* First see if we were sent a handle. */
+ status = omapi_get_value_str (ref, id, "handle", &tv);
+ if (status == ISC_R_SUCCESS) {
+ status = omapi_handle_td_lookup (sp, tv -> value);
+
+ omapi_value_dereference (&tv, MDL);
+ if (status != ISC_R_SUCCESS)
+ return status;
+
+ /* Don't return the object if the type is wrong. */
+ if ((*sp) -> type != dhcp_type_failover_state) {
+ omapi_object_dereference (sp, MDL);
+ return DHCP_R_INVALIDARG;
+ }
+ }
+
+ /* Look the failover state up by peer name. */
+ status = omapi_get_value_str (ref, id, "name", &tv);
+ if (status == ISC_R_SUCCESS) {
+ for (s = failover_states; s; s = s -> next) {
+ unsigned l = strlen (s -> name);
+ if (l == tv -> value -> u.buffer.len &&
+ !memcmp (s -> name,
+ tv -> value -> u.buffer.value, l))
+ break;
+ }
+ omapi_value_dereference (&tv, MDL);
+
+ /* If we already have a lease, and it's not the same one,
+ then the query was invalid. */
+ if (*sp && *sp != (omapi_object_t *)s) {
+ omapi_object_dereference (sp, MDL);
+ return DHCP_R_KEYCONFLICT;
+ } else if (!s) {
+ if (*sp)
+ omapi_object_dereference (sp, MDL);
+ return ISC_R_NOTFOUND;
+ } else if (!*sp)
+ /* XXX fix so that hash lookup itself creates
+ XXX the reference. */
+ omapi_object_reference (sp, (omapi_object_t *)s, MDL);
+ }
+
+ /* If we get to here without finding a lease, no valid key was
+ specified. */
+ if (!*sp)
+ return DHCP_R_NOKEYS;
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t dhcp_failover_state_create (omapi_object_t **sp,
+ omapi_object_t *id)
+{
+ return ISC_R_NOTIMPLEMENTED;
+}
+
+isc_result_t dhcp_failover_state_remove (omapi_object_t *sp,
+ omapi_object_t *id)
+{
+ return ISC_R_NOTIMPLEMENTED;
+}
+
+int dhcp_failover_state_match (dhcp_failover_state_t *state,
+ u_int8_t *addr, unsigned addrlen)
+{
+ struct data_string ds;
+ int i;
+
+ memset (&ds, 0, sizeof ds);
+ if (evaluate_option_cache (&ds, (struct packet *)0,
+ (struct lease *)0,
+ (struct client_state *)0,
+ (struct option_state *)0,
+ (struct option_state *)0,
+ &global_scope,
+ state -> partner.address, MDL)) {
+ for (i = 0; i + addrlen - 1 < ds.len; i += addrlen) {
+ if (!memcmp (&ds.data [i],
+ addr, addrlen)) {
+ data_string_forget (&ds, MDL);
+ return 1;
+ }
+ }
+ data_string_forget (&ds, MDL);
+ }
+ return 0;
+}
+
+int
+dhcp_failover_state_match_by_name(state, name)
+ dhcp_failover_state_t *state;
+ failover_option_t *name;
+{
+ if ((strlen(state->name) == name->count) &&
+ (memcmp(state->name, name->data, name->count) == 0))
+ return 1;
+
+ return 0;
+}
+
+const char *dhcp_failover_reject_reason_print (int reason)
+{
+ static char resbuf[sizeof("Undefined-255: This reason code is not defined "
+ "in the protocol standard.")];
+
+ if ((reason > 0xff) || (reason < 0))
+ return "Reason code out of range.";
+
+ switch (reason) {
+ case FTR_ILLEGAL_IP_ADDR:
+ return "Illegal IP address (not part of any address pool).";
+
+ case FTR_FATAL_CONFLICT:
+ return "Fatal conflict exists: address in use by other client.";
+
+ case FTR_MISSING_BINDINFO:
+ return "Missing binding information.";
+
+ case FTR_TIMEMISMATCH:
+ return "Connection rejected, time mismatch too great.";
+
+ case FTR_INVALID_MCLT:
+ return "Connection rejected, invalid MCLT.";
+
+ case FTR_MISC_REJECT:
+ return "Connection rejected, unknown reason.";
+
+ case FTR_DUP_CONNECTION:
+ return "Connection rejected, duplicate connection.";
+
+ case FTR_INVALID_PARTNER:
+ return "Connection rejected, invalid failover partner.";
+
+ case FTR_TLS_UNSUPPORTED:
+ return "TLS not supported.";
+
+ case FTR_TLS_UNCONFIGURED:
+ return "TLS supported but not configured.";
+
+ case FTR_TLS_REQUIRED:
+ return "TLS required but not supported by partner.";
+
+ case FTR_DIGEST_UNSUPPORTED:
+ return "Message digest not supported.";
+
+ case FTR_DIGEST_UNCONFIGURED:
+ return "Message digest not configured.";
+
+ case FTR_VERSION_MISMATCH:
+ return "Protocol version mismatch.";
+
+ case FTR_OUTDATED_BIND_INFO:
+ return "Outdated binding information.";
+
+ case FTR_LESS_CRIT_BIND_INFO:
+ return "Less critical binding information.";
+
+ case FTR_NO_TRAFFIC:
+ return "No traffic within sufficient time.";
+
+ case FTR_HBA_CONFLICT:
+ return "Hash bucket assignment conflict.";
+
+ case FTR_IP_NOT_RESERVED:
+ return "IP not reserved on this server.";
+
+ case FTR_IP_DIGEST_FAILURE:
+ return "Message digest failed to compare.";
+
+ case FTR_IP_MISSING_DIGEST:
+ return "Missing message digest.";
+
+ case FTR_UNKNOWN:
+ return "Unknown Error.";
+
+ default:
+ sprintf(resbuf, "Undefined-%d: This reason code is not defined in the "
+ "protocol standard.", reason);
+ return resbuf;
+ }
+}
+
+const char *dhcp_failover_state_name_print (enum failover_state state)
+{
+ switch (state) {
+ default:
+ case unknown_state:
+ return "unknown-state";
+
+ case partner_down:
+ return "partner-down";
+
+ case normal:
+ return "normal";
+
+ case conflict_done:
+ return "conflict-done";
+
+ case communications_interrupted:
+ return "communications-interrupted";
+
+ case resolution_interrupted:
+ return "resolution-interrupted";
+
+ case potential_conflict:
+ return "potential-conflict";
+
+ case recover:
+ return "recover";
+
+ case recover_done:
+ return "recover-done";
+
+ case recover_wait:
+ return "recover-wait";
+
+ case shut_down:
+ return "shutdown";
+
+ case paused:
+ return "paused";
+
+ case startup:
+ return "startup";
+ }
+}
+
+const char *dhcp_failover_message_name (unsigned type)
+{
+ static char messbuf[sizeof("unknown-message-255")];
+
+ if (type > 0xff)
+ return "invalid-message";
+
+ switch (type) {
+ case FTM_POOLREQ:
+ return "pool-request";
+
+ case FTM_POOLRESP:
+ return "pool-response";
+
+ case FTM_BNDUPD:
+ return "bind-update";
+
+ case FTM_BNDACK:
+ return "bind-ack";
+
+ case FTM_CONNECT:
+ return "connect";
+
+ case FTM_CONNECTACK:
+ return "connect-ack";
+
+ case FTM_UPDREQ:
+ return "update-request";
+
+ case FTM_UPDDONE:
+ return "update-done";
+
+ case FTM_UPDREQALL:
+ return "update-request-all";
+
+ case FTM_STATE:
+ return "state";
+
+ case FTM_CONTACT:
+ return "contact";
+
+ case FTM_DISCONNECT:
+ return "disconnect";
+
+ default:
+ sprintf(messbuf, "unknown-message-%u", type);
+ return messbuf;
+ }
+}
+
+const char *dhcp_failover_option_name (unsigned type)
+{
+ static char optbuf[sizeof("unknown-option-65535")];
+
+ if (type > 0xffff)
+ return "invalid-option";
+
+ switch (type) {
+ case FTO_ADDRESSES_TRANSFERRED:
+ return "addresses-transferred";
+
+ case FTO_ASSIGNED_IP_ADDRESS:
+ return "assigned-ip-address";
+
+ case FTO_BINDING_STATUS:
+ return "binding-status";
+
+ case FTO_CLIENT_IDENTIFIER:
+ return "client-identifier";
+
+ case FTO_CHADDR:
+ return "chaddr";
+
+ case FTO_CLTT:
+ return "cltt";
+
+ case FTO_DDNS:
+ return "ddns";
+
+ case FTO_DELAYED_SERVICE:
+ return "delayed-service";
+
+ case FTO_HBA:
+ return "hba";
+
+ case FTO_IP_FLAGS:
+ return "ip-flags";
+
+ case FTO_LEASE_EXPIRY:
+ return "lease-expiry";
+
+ case FTO_MAX_UNACKED:
+ return "max-unacked";
+
+ case FTO_MCLT:
+ return "mclt";
+
+ case FTO_MESSAGE:
+ return "message";
+
+ case FTO_MESSAGE_DIGEST:
+ return "message-digest";
+
+ case FTO_POTENTIAL_EXPIRY:
+ return "potential-expiry";
+
+ case FTO_PROTOCOL_VERSION:
+ return "protocol-version";
+
+ case FTO_RECEIVE_TIMER:
+ return "receive-timer";
+
+ case FTO_REJECT_REASON:
+ return "reject-reason";
+
+ case FTO_RELATIONSHIP_NAME:
+ return "relationship-name";
+
+ case FTO_REPLY_OPTIONS:
+ return "reply-options";
+
+ case FTO_REQUEST_OPTIONS:
+ return "request-options";
+
+ case FTO_SERVER_FLAGS:
+ return "server-flags";
+
+ case FTO_SERVER_STATE:
+ return "server-state";
+
+ case FTO_STOS:
+ return "stos";
+
+ case FTO_TLS_REPLY:
+ return "tls-reply";
+
+ case FTO_TLS_REQUEST:
+ return "tls-request";
+
+ case FTO_VENDOR_CLASS:
+ return "vendor-class";
+
+ case FTO_VENDOR_OPTIONS:
+ return "vendor-options";
+
+ default:
+ sprintf(optbuf, "unknown-option-%u", type);
+ return optbuf;
+ }
+}
+
+failover_option_t *dhcp_failover_option_printf (unsigned code,
+ char *obuf,
+ unsigned *obufix,
+ unsigned obufmax,
+ const char *fmt, ...)
+{
+ va_list va;
+ char tbuf [256];
+
+ /* %Audit% Truncation causes panic. %2004.06.17,Revisit%
+ * It is unclear what the effects of truncation here are, or
+ * how that condition should be handled. It seems that this
+ * function is used for formatting messages in the failover
+ * command channel. For now the safest thing is for
+ * overflow-truncation to cause a fatal log.
+ */
+ va_start (va, fmt);
+ if (vsnprintf (tbuf, sizeof tbuf, fmt, va) >= sizeof tbuf)
+ log_fatal ("%s: vsnprintf would truncate",
+ "dhcp_failover_make_option");
+ va_end (va);
+
+ return dhcp_failover_make_option (code, obuf, obufix, obufmax,
+ strlen (tbuf), tbuf);
+}
+
+failover_option_t *dhcp_failover_make_option (unsigned code,
+ char *obuf, unsigned *obufix,
+ unsigned obufmax, ...)
+{
+ va_list va;
+ struct failover_option_info *info;
+ int i;
+ unsigned size, count;
+ unsigned val;
+ u_int8_t *iaddr;
+ unsigned ilen = 0;
+ u_int8_t *bval;
+ char *txt = NULL;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char tbuf [256];
+#endif
+
+ /* Note that the failover_option structure is used differently on
+ input than on output - on input, count is an element count, and
+ on output it's the number of bytes total in the option, including
+ the option code and option length. */
+ failover_option_t option, *op;
+
+
+ /* Bogus option code? */
+ if (code < 1 || code > FTO_MAX || ft_options [code].type == FT_UNDEF) {
+ return &null_failover_option;
+ }
+ info = &ft_options [code];
+
+ va_start (va, obufmax);
+
+ /* Get the number of elements and the size of the buffer we need
+ to allocate. */
+ if (info -> type == FT_DDNS || info -> type == FT_DDNS1) {
+ count = info -> type == FT_DDNS ? 1 : 2;
+ size = va_arg (va, int) + count;
+ } else {
+ /* Find out how many items in this list. */
+ if (info -> num_present)
+ count = info -> num_present;
+ else
+ count = va_arg (va, int);
+
+ /* Figure out size. */
+ switch (info -> type) {
+ case FT_UINT8:
+ case FT_BYTES:
+ case FT_DIGEST:
+ size = count;
+ break;
+
+ case FT_TEXT_OR_BYTES:
+ case FT_TEXT:
+ txt = va_arg (va, char *);
+ size = count;
+ break;
+
+ case FT_IPADDR:
+ ilen = va_arg (va, unsigned);
+ size = count * ilen;
+ break;
+
+ case FT_UINT32:
+ size = count * 4;
+ break;
+
+ case FT_UINT16:
+ size = count * 2;
+ break;
+
+ default:
+ /* shouldn't get here. */
+ log_fatal ("bogus type in failover_make_option: %d",
+ info -> type);
+ return &null_failover_option;
+ }
+ }
+
+ size += 4;
+
+ /* Allocate a buffer for the option. */
+ option.count = size;
+ option.data = dmalloc (option.count, MDL);
+ if (!option.data) {
+ va_end (va);
+ return &null_failover_option;
+ }
+
+ /* Put in the option code and option length. */
+ putUShort (option.data, code);
+ putUShort (&option.data [2], size - 4);
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ /* %Audit% Truncation causes panic. %2004.06.17,Revisit%
+ * It is unclear what the effects of truncation here are, or
+ * how that condition should be handled. It seems that this
+ * message may be sent over the failover command channel.
+ * For now the safest thing is for overflow-truncation to cause
+ * a fatal log.
+ */
+ if (snprintf (tbuf, sizeof tbuf, " (%s<%d>", info -> name,
+ option.count) >= sizeof tbuf)
+ log_fatal ("dhcp_failover_make_option: tbuf overflow");
+ failover_print (obuf, obufix, obufmax, tbuf);
+#endif
+
+ /* Now put in the data. */
+ switch (info -> type) {
+ case FT_UINT8:
+ for (i = 0; i < count; i++) {
+ val = va_arg (va, unsigned);
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ /* %Audit% Cannot exceed 24 bytes. %2004.06.17,Safe% */
+ sprintf (tbuf, " %d", val);
+ failover_print (obuf, obufix, obufmax, tbuf);
+#endif
+ option.data [i + 4] = val;
+ }
+ break;
+
+ case FT_IPADDR:
+ for (i = 0; i < count; i++) {
+ iaddr = va_arg (va, u_int8_t *);
+ if (ilen != 4) {
+ dfree (option.data, MDL);
+ log_error ("IP addrlen=%d, should be 4.",
+ ilen);
+ va_end (va);
+ return &null_failover_option;
+ }
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ /*%Audit% Cannot exceed 17 bytes. %2004.06.17,Safe%*/
+ sprintf (tbuf, " %u.%u.%u.%u",
+ iaddr [0], iaddr [1], iaddr [2], iaddr [3]);
+ failover_print (obuf, obufix, obufmax, tbuf);
+#endif
+ memcpy (&option.data [4 + i * ilen], iaddr, ilen);
+ }
+ break;
+
+ case FT_UINT32:
+ for (i = 0; i < count; i++) {
+ val = va_arg (va, unsigned);
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ /*%Audit% Cannot exceed 24 bytes. %2004.06.17,Safe%*/
+ sprintf (tbuf, " %d", val);
+ failover_print (obuf, obufix, obufmax, tbuf);
+#endif
+ putULong (&option.data [4 + i * 4], val);
+ }
+ break;
+
+ case FT_BYTES:
+ case FT_DIGEST:
+ bval = va_arg (va, u_int8_t *);
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ for (i = 0; i < count; i++) {
+ /* 23 bytes plus nul, safe. */
+ sprintf (tbuf, " %d", bval [i]);
+ failover_print (obuf, obufix, obufmax, tbuf);
+ }
+#endif
+ memcpy (&option.data [4], bval, count);
+ break;
+
+ /* On output, TEXT_OR_BYTES is _always_ text, and always NUL
+ terminated. Note that the caller should be careful not
+ to provide a format and data that amount to more than 256
+ bytes of data, since it will cause a fatal error. */
+ case FT_TEXT_OR_BYTES:
+ case FT_TEXT:
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ /* %Audit% Truncation causes panic. %2004.06.17,Revisit%
+ * It is unclear what the effects of truncation here are, or
+ * how that condition should be handled. It seems that this
+ * function is used for formatting messages in the failover
+ * command channel. For now the safest thing is for
+ * overflow-truncation to cause a fatal log.
+ */
+ if (snprintf (tbuf, sizeof tbuf, "\"%s\"", txt) >= sizeof tbuf)
+ log_fatal ("dhcp_failover_make_option: tbuf overflow");
+ failover_print (obuf, obufix, obufmax, tbuf);
+#endif
+ memcpy (&option.data [4], txt, count);
+ break;
+
+ case FT_DDNS:
+ case FT_DDNS1:
+ option.data [4] = va_arg (va, unsigned);
+ if (count == 2)
+ option.data [5] = va_arg (va, unsigned);
+ bval = va_arg (va, u_int8_t *);
+ memcpy (&option.data [4 + count], bval, size - count - 4);
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ for (i = 4; i < size; i++) {
+ /*%Audit% Cannot exceed 24 bytes. %2004.06.17,Safe%*/
+ sprintf (tbuf, " %d", option.data [i]);
+ failover_print (obuf, obufix, obufmax, tbuf);
+ }
+#endif
+ break;
+
+ case FT_UINT16:
+ for (i = 0; i < count; i++) {
+ val = va_arg (va, u_int32_t);
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ /*%Audit% Cannot exceed 24 bytes. %2004.06.17,Safe%*/
+ sprintf (tbuf, " %d", val);
+ failover_print (obuf, obufix, obufmax, tbuf);
+#endif
+ putUShort (&option.data [4 + i * 2], val);
+ }
+ break;
+
+ case FT_UNDEF:
+ default:
+ break;
+ }
+
+#if defined DEBUG_FAILOVER_MESSAGES
+ failover_print (obuf, obufix, obufmax, ")");
+#endif
+ va_end (va);
+
+ /* Now allocate a place to store what we just set up. */
+ op = dmalloc (sizeof (failover_option_t), MDL);
+ if (!op) {
+ dfree (option.data, MDL);
+ return &null_failover_option;
+ }
+
+ *op = option;
+ return op;
+}
+
+/* Send a failover message header. */
+
+isc_result_t dhcp_failover_put_message (dhcp_failover_link_t *link,
+ omapi_object_t *connection,
+ int msg_type, u_int32_t xid, ...)
+{
+ unsigned size = 0;
+ int bad_option = 0;
+ int opix = 0;
+ va_list list;
+ failover_option_t *option;
+ unsigned char *opbuf;
+ isc_result_t status = ISC_R_SUCCESS;
+ unsigned char cbuf;
+ struct timeval tv;
+
+ /* Run through the argument list once to compute the length of
+ the option portion of the message. */
+ va_start (list, xid);
+ while ((option = va_arg (list, failover_option_t *))) {
+ if (option != &skip_failover_option)
+ size += option -> count;
+ if (option == &null_failover_option)
+ bad_option = 1;
+ }
+ va_end (list);
+
+ /* Allocate an option buffer, unless we got an error. */
+ if (!bad_option && size) {
+ opbuf = dmalloc (size, MDL);
+ if (!opbuf)
+ status = ISC_R_NOMEMORY;
+ } else
+ opbuf = (unsigned char *)0;
+
+ va_start (list, xid);
+ while ((option = va_arg (list, failover_option_t *))) {
+ if (option == &skip_failover_option)
+ continue;
+ if (!bad_option && opbuf)
+ memcpy (&opbuf [opix],
+ option -> data, option -> count);
+ if (option != &null_failover_option &&
+ option != &skip_failover_option) {
+ opix += option -> count;
+ dfree (option -> data, MDL);
+ dfree (option, MDL);
+ }
+ }
+ va_end(list);
+
+ if (bad_option)
+ return DHCP_R_INVALIDARG;
+
+ /* Now send the message header. */
+
+ /* Message length. */
+ status = omapi_connection_put_uint16 (connection, size + 12);
+ if (status != ISC_R_SUCCESS)
+ goto err;
+
+ /* Message type. */
+ cbuf = msg_type;
+ status = omapi_connection_copyin (connection, &cbuf, 1);
+ if (status != ISC_R_SUCCESS)
+ goto err;
+
+ /* Payload offset. */
+ cbuf = 12;
+ status = omapi_connection_copyin (connection, &cbuf, 1);
+ if (status != ISC_R_SUCCESS)
+ goto err;
+
+ /* Current time. */
+ status = omapi_connection_put_uint32 (connection, (u_int32_t)cur_time);
+ if (status != ISC_R_SUCCESS)
+ goto err;
+
+ /* Transaction ID. */
+ status = omapi_connection_put_uint32(connection, xid);
+ if (status != ISC_R_SUCCESS)
+ goto err;
+
+ /* Payload. */
+ if (opbuf) {
+ status = omapi_connection_copyin (connection, opbuf, size);
+ if (status != ISC_R_SUCCESS)
+ goto err;
+ dfree (opbuf, MDL);
+ }
+ if (link -> state_object &&
+ link -> state_object -> link_to_peer == link) {
+#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
+ log_info ("add_timeout +%d %s",
+ (int)(link -> state_object ->
+ partner.max_response_delay) / 3,
+ "dhcp_failover_send_contact");
+#endif
+ tv . tv_sec = cur_time +
+ (int)(link -> state_object ->
+ partner.max_response_delay) / 3;
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_send_contact, link -> state_object,
+ (tvref_t)dhcp_failover_state_reference,
+ (tvunref_t)dhcp_failover_state_dereference);
+ }
+ return status;
+
+ err:
+ if (opbuf)
+ dfree (opbuf, MDL);
+ log_info ("dhcp_failover_put_message: something went wrong.");
+ omapi_disconnect (connection, 1);
+ return status;
+}
+
+void dhcp_failover_timeout (void *vstate)
+{
+ dhcp_failover_state_t *state = vstate;
+ dhcp_failover_link_t *link;
+
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("dhcp_failover_timeout");
+#endif
+
+ if (!state || state -> type != dhcp_type_failover_state)
+ return;
+ link = state -> link_to_peer;
+ if (!link ||
+ !link -> outer ||
+ link -> outer -> type != omapi_type_connection)
+ return;
+
+ log_error ("timeout waiting for failover peer %s", state -> name);
+
+ /* If we haven't gotten a timely response, blow away the connection.
+ This will cause the state to change automatically. */
+ omapi_disconnect (link -> outer, 1);
+}
+
+void dhcp_failover_send_contact (void *vstate)
+{
+ dhcp_failover_state_t *state = vstate;
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+
+#if defined(DEBUG_FAILOVER_MESSAGES) && \
+ defined(DEBUG_FAILOVER_CONTACT_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+ failover_print(obuf, &obufix, sizeof(obuf), "(contact");
+#endif
+
+#if defined (DEBUG_FAILOVER_CONTACT_TIMING)
+ log_info ("dhcp_failover_send_contact");
+#endif
+
+ if (!state || state -> type != dhcp_type_failover_state)
+ return;
+ link = state -> link_to_peer;
+ if (!link ||
+ !link -> outer ||
+ link -> outer -> type != omapi_type_connection)
+ return;
+
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_CONTACT, link->xid++,
+ (failover_option_t *)0));
+
+#if defined(DEBUG_FAILOVER_MESSAGES) && \
+ defined(DEBUG_FAILOVER_CONTACT_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print(obuf, &obufix, sizeof(obuf), " (failed)");
+ failover_print(obuf, &obufix, sizeof(obuf), ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return;
+}
+
+isc_result_t dhcp_failover_send_state (dhcp_failover_state_t *state)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(state");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!state || state -> type != dhcp_type_failover_state)
+ return DHCP_R_INVALIDARG;
+ link = state -> link_to_peer;
+ if (!link ||
+ !link -> outer ||
+ link -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_STATE, link->xid++,
+ dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+ (state -> me.state == startup
+ ? state -> saved_state
+ : state -> me.state)),
+ dhcp_failover_make_option
+ (FTO_SERVER_FLAGS, FMA,
+ (state -> service_state == service_startup
+ ? FTF_SERVER_STARTUP : 0)),
+ dhcp_failover_make_option (FTO_STOS, FMA, state -> me.stos),
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return ISC_R_SUCCESS;
+}
+
+/* Send a connect message. */
+
+isc_result_t dhcp_failover_send_connect (omapi_object_t *l)
+{
+ dhcp_failover_link_t *link;
+ dhcp_failover_state_t *state;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(connect");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!l || l -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)l;
+ state = link -> state_object;
+ if (!l -> outer || l -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ status =
+ (dhcp_failover_put_message
+ (link, l -> outer,
+ FTM_CONNECT, link->xid++,
+ dhcp_failover_make_option(FTO_RELATIONSHIP_NAME, FMA,
+ strlen(state->name), state->name),
+ dhcp_failover_make_option (FTO_MAX_UNACKED, FMA,
+ state -> me.max_flying_updates),
+ dhcp_failover_make_option (FTO_RECEIVE_TIMER, FMA,
+ state -> me.max_response_delay),
+ dhcp_failover_option_printf(FTO_VENDOR_CLASS, FMA,
+ "isc-%s", PACKAGE_VERSION),
+ dhcp_failover_make_option (FTO_PROTOCOL_VERSION, FMA,
+ DHCP_FAILOVER_VERSION),
+ dhcp_failover_make_option (FTO_TLS_REQUEST, FMA,
+ 0, 0),
+ dhcp_failover_make_option (FTO_MCLT, FMA,
+ state -> mclt),
+ (state -> hba
+ ? dhcp_failover_make_option (FTO_HBA, FMA, 32, state -> hba)
+ : &skip_failover_option),
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return status;
+}
+
+isc_result_t dhcp_failover_send_connectack (omapi_object_t *l,
+ dhcp_failover_state_t *state,
+ int reason, const char *errmsg)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(connectack");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!l || l -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)l;
+ if (!l -> outer || l -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ status =
+ (dhcp_failover_put_message
+ (link, l -> outer,
+ FTM_CONNECTACK, link->imsg->xid,
+ state
+ ? dhcp_failover_make_option(FTO_RELATIONSHIP_NAME, FMA,
+ strlen(state->name), state->name)
+ : (link->imsg->options_present & FTB_RELATIONSHIP_NAME)
+ ? &link->imsg->relationship_name
+ : &skip_failover_option,
+ state
+ ? dhcp_failover_make_option (FTO_MAX_UNACKED, FMA,
+ state -> me.max_flying_updates)
+ : &skip_failover_option,
+ state
+ ? dhcp_failover_make_option (FTO_RECEIVE_TIMER, FMA,
+ state -> me.max_response_delay)
+ : &skip_failover_option,
+ dhcp_failover_option_printf(FTO_VENDOR_CLASS, FMA,
+ "isc-%s", PACKAGE_VERSION),
+ dhcp_failover_make_option (FTO_PROTOCOL_VERSION, FMA,
+ DHCP_FAILOVER_VERSION),
+ (link->imsg->options_present & FTB_TLS_REQUEST)
+ ? dhcp_failover_make_option(FTO_TLS_REPLY, FMA,
+ 0, 0)
+ : &skip_failover_option,
+ reason
+ ? dhcp_failover_make_option (FTO_REJECT_REASON,
+ FMA, reason)
+ : &skip_failover_option,
+ (reason && errmsg)
+ ? dhcp_failover_make_option (FTO_MESSAGE, FMA,
+ strlen (errmsg), errmsg)
+ : &skip_failover_option,
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return status;
+}
+
+isc_result_t dhcp_failover_send_disconnect (omapi_object_t *l,
+ int reason,
+ const char *message)
+{
+ dhcp_failover_link_t *link;
+ dhcp_failover_state_t *state;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(disconnect");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!l || l -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)l;
+ state = link -> state_object;
+ if (!l -> outer || l -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ if (!message && reason)
+ message = dhcp_failover_reject_reason_print (reason);
+
+ status = (dhcp_failover_put_message
+ (link, l -> outer,
+ FTM_DISCONNECT, link->xid++,
+ dhcp_failover_make_option (FTO_REJECT_REASON,
+ FMA, reason),
+ (message
+ ? dhcp_failover_make_option (FTO_MESSAGE, FMA,
+ strlen (message), message)
+ : &skip_failover_option),
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return status;
+}
+
+/* Send a Bind Update message. */
+
+isc_result_t dhcp_failover_send_bind_update (dhcp_failover_state_t *state,
+ struct lease *lease)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+ int flags = 0;
+ binding_state_t transmit_state;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(bndupd");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!state -> link_to_peer ||
+ state -> link_to_peer -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+ if (!link -> outer || link -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ transmit_state = lease->desired_binding_state;
+ if (lease->flags & RESERVED_LEASE) {
+ /* If we are listing an allocable (not yet ACTIVE etc) lease
+ * as reserved, toggle to the peer's 'free state', per the
+ * draft. This gives the peer permission to alloc it to the
+ * chaddr/uid-named client.
+ */
+ if ((state->i_am == primary) && (transmit_state == FTS_FREE))
+ transmit_state = FTS_BACKUP;
+ else if ((state->i_am == secondary) &&
+ (transmit_state == FTS_BACKUP))
+ transmit_state = FTS_FREE;
+
+ flags |= FTF_IP_FLAG_RESERVE;
+ }
+ if (lease->flags & BOOTP_LEASE)
+ flags |= FTF_IP_FLAG_BOOTP;
+
+ /* last_xid == 0 is illegal, seek past zero if we hit it. */
+ if (link->xid == 0)
+ link->xid = 1;
+
+ lease->last_xid = link->xid++;
+
+ /*
+ * Our very next action is to transmit a binding update relating to
+ * this lease over the wire, and although there is a BNDACK, there is
+ * no BNDACKACK or BNDACKACKACK...the basic issue as we send a BNDUPD,
+ * we may not receive a BNDACK. This non-reception does not imply the
+ * peer did not receive and process the BNDUPD. So at this point, we
+ * must divest any state that would be dangerous to retain under the
+ * impression the peer has been updated. Normally state changes like
+ * this are processed in supersede_lease(), but in this case we need a
+ * very late binding.
+ *
+ * In failover rules, a server is permitted to work forward in certain
+ * directions from a given lease's state; active leases may be
+ * extended, so forth. There is an 'optimization' in the failover
+ * draft that permits a server to 'rewind' any work they have not
+ * informed the peer. Since we can't know if the peer received our
+ * update but was unable to acknowledge it, we make this change on
+ * transmit rather than upon receiving the acknowledgement.
+ *
+ * XXX: Frequent lease commits are undesirable. This should hopefully
+ * only trigger when a server is sending a lease /state change/, and
+ * not merely an update such as with a renewal.
+ */
+ if (lease->rewind_binding_state != lease->binding_state) {
+ lease->rewind_binding_state = lease->binding_state;
+
+ write_lease(lease);
+ commit_leases();
+ }
+
+ /* Send the update. */
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_BNDUPD, lease->last_xid,
+ dhcp_failover_make_option (FTO_ASSIGNED_IP_ADDRESS, FMA,
+ lease -> ip_addr.len,
+ lease -> ip_addr.iabuf),
+ dhcp_failover_make_option (FTO_BINDING_STATUS, FMA,
+ lease -> desired_binding_state),
+ lease -> uid_len
+ ? dhcp_failover_make_option (FTO_CLIENT_IDENTIFIER, FMA,
+ lease -> uid_len,
+ lease -> uid)
+ : &skip_failover_option,
+ lease -> hardware_addr.hlen
+ ? dhcp_failover_make_option (FTO_CHADDR, FMA,
+ lease -> hardware_addr.hlen,
+ lease -> hardware_addr.hbuf)
+ : &skip_failover_option,
+ dhcp_failover_make_option (FTO_LEASE_EXPIRY, FMA,
+ lease -> ends),
+ dhcp_failover_make_option (FTO_POTENTIAL_EXPIRY, FMA,
+ lease -> tstp),
+ dhcp_failover_make_option (FTO_STOS, FMA,
+ lease -> starts),
+ (lease->cltt != 0) ?
+ dhcp_failover_make_option(FTO_CLTT, FMA, lease->cltt) :
+ &skip_failover_option, /* No CLTT */
+ flags ? dhcp_failover_make_option(FTO_IP_FLAGS, FMA,
+ flags) :
+ &skip_failover_option, /* No IP_FLAGS */
+ &skip_failover_option, /* XXX DDNS */
+ &skip_failover_option, /* XXX request options */
+ &skip_failover_option, /* XXX reply options */
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return status;
+}
+
+/* Send a Bind ACK message. */
+
+isc_result_t dhcp_failover_send_bind_ack (dhcp_failover_state_t *state,
+ failover_message_t *msg,
+ int reason, const char *message)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(bndack");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!state -> link_to_peer ||
+ state -> link_to_peer -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+ if (!link -> outer || link -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ if (!message && reason)
+ message = dhcp_failover_reject_reason_print (reason);
+
+ /* Send the update. */
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_BNDACK, msg->xid,
+ dhcp_failover_make_option (FTO_ASSIGNED_IP_ADDRESS, FMA,
+ sizeof msg -> assigned_addr,
+ &msg -> assigned_addr),
+#ifdef DO_BNDACK_SHOULD_NOT
+ dhcp_failover_make_option (FTO_BINDING_STATUS, FMA,
+ msg -> binding_status),
+ (msg -> options_present & FTB_CLIENT_IDENTIFIER)
+ ? dhcp_failover_make_option (FTO_CLIENT_IDENTIFIER, FMA,
+ msg -> client_identifier.count,
+ msg -> client_identifier.data)
+ : &skip_failover_option,
+ (msg -> options_present & FTB_CHADDR)
+ ? dhcp_failover_make_option (FTO_CHADDR, FMA,
+ msg -> chaddr.count,
+ msg -> chaddr.data)
+ : &skip_failover_option,
+ dhcp_failover_make_option (FTO_LEASE_EXPIRY, FMA,
+ msg -> expiry),
+ dhcp_failover_make_option (FTO_POTENTIAL_EXPIRY, FMA,
+ msg -> potential_expiry),
+ dhcp_failover_make_option (FTO_STOS, FMA,
+ msg -> stos),
+ (msg->options_present & FTB_CLTT) ?
+ dhcp_failover_make_option(FTO_CLTT, FMA, msg->cltt) :
+ &skip_failover_option, /* No CLTT in the msg to ack. */
+ ((msg->options_present & FTB_IP_FLAGS) && msg->ip_flags) ?
+ dhcp_failover_make_option(FTO_IP_FLAGS, FMA,
+ msg->ip_flags)
+ : &skip_failover_option,
+#endif /* DO_BNDACK_SHOULD_NOT */
+ reason
+ ? dhcp_failover_make_option(FTO_REJECT_REASON, FMA, reason)
+ : &skip_failover_option,
+ (reason && message)
+ ? dhcp_failover_make_option (FTO_MESSAGE, FMA,
+ strlen (message), message)
+ : &skip_failover_option,
+#ifdef DO_BNDACK_SHOULD_NOT
+ &skip_failover_option, /* XXX DDNS */
+ &skip_failover_option, /* XXX request options */
+ &skip_failover_option, /* XXX reply options */
+#endif /* DO_BNDACK_SHOULD_NOT */
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return status;
+}
+
+isc_result_t dhcp_failover_send_poolreq (dhcp_failover_state_t *state)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(poolreq");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!state -> link_to_peer ||
+ state -> link_to_peer -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+ if (!link -> outer || link -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_POOLREQ, link->xid++,
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return status;
+}
+
+isc_result_t dhcp_failover_send_poolresp (dhcp_failover_state_t *state,
+ int leases)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(poolresp");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!state -> link_to_peer ||
+ state -> link_to_peer -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+ if (!link -> outer || link -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_POOLRESP, link->imsg->xid,
+ dhcp_failover_make_option (FTO_ADDRESSES_TRANSFERRED, FMA,
+ leases),
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ return status;
+}
+
+isc_result_t dhcp_failover_send_update_request (dhcp_failover_state_t *state)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(updreq");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!state -> link_to_peer ||
+ state -> link_to_peer -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+ if (!link -> outer || link -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ if (state -> curUPD)
+ return ISC_R_ALREADYRUNNING;
+
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_UPDREQ, link->xid++,
+ (failover_option_t *)0));
+
+ if (status == ISC_R_SUCCESS)
+ state -> curUPD = FTM_UPDREQ;
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ log_info ("Sent update request message to %s", state -> name);
+ return status;
+}
+
+isc_result_t dhcp_failover_send_update_request_all (dhcp_failover_state_t
+ *state)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(updreqall");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!state -> link_to_peer ||
+ state -> link_to_peer -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+ if (!link -> outer || link -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ /* If there is an UPDREQ in progress, then upgrade to UPDREQALL. */
+ if (state -> curUPD && (state -> curUPD != FTM_UPDREQ))
+ return ISC_R_ALREADYRUNNING;
+
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_UPDREQALL, link->xid++,
+ (failover_option_t *)0));
+
+ if (status == ISC_R_SUCCESS)
+ state -> curUPD = FTM_UPDREQALL;
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+ log_info ("Sent update request all message to %s", state -> name);
+ return status;
+}
+
+isc_result_t dhcp_failover_send_update_done (dhcp_failover_state_t *state)
+{
+ dhcp_failover_link_t *link;
+ isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ char obuf [64];
+ unsigned obufix = 0;
+
+# define FMA obuf, &obufix, sizeof obuf
+ failover_print (FMA, "(upddone");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+ if (!state -> link_to_peer ||
+ state -> link_to_peer -> type != dhcp_type_failover_link)
+ return DHCP_R_INVALIDARG;
+ link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+ if (!link -> outer || link -> outer -> type != omapi_type_connection)
+ return DHCP_R_INVALIDARG;
+
+ status = (dhcp_failover_put_message
+ (link, link -> outer,
+ FTM_UPDDONE, state->updxid,
+ (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+ if (status != ISC_R_SUCCESS)
+ failover_print (FMA, " (failed)");
+ failover_print (FMA, ")");
+ if (obufix) {
+ log_debug ("%s", obuf);
+ }
+#endif
+
+ log_info ("Sent update done message to %s", state -> name);
+
+ state->updxid--; /* Paranoia, just so it mismatches. */
+
+ /* There may be uncommitted leases at this point (since
+ dhcp_failover_process_bind_ack() doesn't commit leases);
+ commit the lease file. */
+ commit_leases();
+
+ return status;
+}
+
+/*
+ * failover_lease_is_better() compares the binding update in 'msg' with
+ * the current lease in 'lease'. If the determination is that the binding
+ * update shouldn't be allowed to update/crush more critical binding info
+ * on the lease, the lease is preferred. A value of true is returned if the
+ * local lease is preferred, or false if the remote binding update is
+ * preferred.
+ *
+ * For now this function is hopefully simplistic and trivial. It may be that
+ * a more detailed system of preferences is required, so this is something we
+ * should monitor as we gain experience with these dueling events.
+ */
+static isc_boolean_t
+failover_lease_is_better(dhcp_failover_state_t *state, struct lease *lease,
+ failover_message_t *msg)
+{
+ binding_state_t local_state;
+ TIME msg_cltt;
+
+ if (lease->binding_state != lease->desired_binding_state)
+ local_state = lease->desired_binding_state;
+ else
+ local_state = lease->binding_state;
+
+ if ((msg->options_present & FTB_CLTT) != 0)
+ msg_cltt = msg->cltt;
+ else
+ msg_cltt = 0;
+
+ switch(local_state) {
+ case FTS_ACTIVE:
+ if (msg->binding_status == FTS_ACTIVE) {
+ if (msg_cltt < lease->cltt)
+ return ISC_TRUE;
+ else if (msg_cltt > lease->cltt)
+ return ISC_FALSE;
+ else if (state->i_am == primary)
+ return ISC_TRUE;
+ else
+ return ISC_FALSE;
+ } else if (msg->binding_status == FTS_EXPIRED) {
+ return ISC_FALSE;
+ }
+ /* FALL THROUGH */
+
+ case FTS_FREE:
+ case FTS_BACKUP:
+ case FTS_EXPIRED:
+ case FTS_RELEASED:
+ case FTS_ABANDONED:
+ case FTS_RESET:
+ if (msg->binding_status == FTS_ACTIVE)
+ return ISC_FALSE;
+ else if (state->i_am == primary)
+ return ISC_TRUE;
+ else
+ return ISC_FALSE;
+ /* FALL THROUGH to impossible condition */
+
+ default:
+ log_fatal("Impossible condition at %s:%d.", MDL);
+ }
+
+ log_fatal("Impossible condition at %s:%d.", MDL);
+ /* Silence compiler warning. */
+ return ISC_FALSE;
+}
+
+isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state,
+ failover_message_t *msg)
+{
+ struct lease *lt, *lease;
+ struct iaddr ia;
+ int reason = FTR_MISC_REJECT;
+ const char *message;
+ int new_binding_state;
+ int send_to_backup = 0;
+ int required_options;
+ isc_boolean_t chaddr_changed = ISC_FALSE;
+ isc_boolean_t ident_changed = ISC_FALSE;
+
+ /* Validate the binding update. */
+ required_options = FTB_ASSIGNED_IP_ADDRESS | FTB_BINDING_STATUS;
+ if ((msg->options_present & required_options) != required_options) {
+ message = "binding update lacks required options";
+ reason = FTR_MISSING_BINDINFO;
+ goto bad;
+ }
+
+ ia.len = sizeof msg -> assigned_addr;
+ memcpy (ia.iabuf, &msg -> assigned_addr, ia.len);
+
+ lease = (struct lease *)0;
+ lt = (struct lease *)0;
+ if (!find_lease_by_ip_addr (&lease, ia, MDL)) {
+ message = "unknown IP address";
+ reason = FTR_ILLEGAL_IP_ADDR;
+ goto bad;
+ }
+
+ /*
+ * If this lease is covered by a different failover peering
+ * relationship, assert an error.
+ */
+ if ((lease->pool == NULL) || (lease->pool->failover_peer == NULL) ||
+ (lease->pool->failover_peer != state)) {
+ message = "IP address is covered by a different failover "
+ "relationship state";
+ reason = FTR_ILLEGAL_IP_ADDR;
+ goto bad;
+ }
+
+ /*
+ * Dueling updates: This happens when both servers send a BNDUPD
+ * at the same time. We want the best update to win, which means
+ * we reject if we think ours is better, or cancel if we think the
+ * peer's is better. We only assert a problem if the lease is on
+ * the ACK queue, not on the UPDATE queue. This means that after
+ * accepting this server's BNDUPD, we will send our own BNDUPD
+ * /after/ sending the BNDACK (this order was recently enforced in
+ * queue processing).
+ */
+ if ((lease->flags & ON_ACK_QUEUE) != 0) {
+ if (failover_lease_is_better(state, lease, msg)) {
+ message = "incoming update is less critical than "
+ "outgoing update";
+ reason = FTR_LESS_CRIT_BIND_INFO;
+ goto bad;
+ } else {
+ /* This makes it so we ignore any spurious ACKs. */
+ dhcp_failover_ack_queue_remove(state, lease);
+ }
+ }
+
+ /* Install the new info. Start by taking a copy to markup. */
+ if (!lease_copy (&lt, lease, MDL)) {
+ message = "no memory";
+ goto bad;
+ }
+
+ if (msg -> options_present & FTB_CHADDR) {
+ if (msg->binding_status == FTS_ABANDONED) {
+ message = "BNDUPD to ABANDONED with a CHADDR";
+ goto bad;
+ }
+ if (msg -> chaddr.count > sizeof lt -> hardware_addr.hbuf) {
+ message = "chaddr too long";
+ goto bad;
+ }
+
+ if ((lt->hardware_addr.hlen != msg->chaddr.count) ||
+ (memcmp(lt->hardware_addr.hbuf, msg->chaddr.data,
+ msg->chaddr.count) != 0))
+ chaddr_changed = ISC_TRUE;
+
+ lt -> hardware_addr.hlen = msg -> chaddr.count;
+ memcpy (lt -> hardware_addr.hbuf, msg -> chaddr.data,
+ msg -> chaddr.count);
+ } else if (msg->binding_status == FTS_ACTIVE ||
+ msg->binding_status == FTS_EXPIRED ||
+ msg->binding_status == FTS_RELEASED) {
+ message = "BNDUPD without CHADDR";
+ reason = FTR_MISSING_BINDINFO;
+ goto bad;
+ } else if (msg->binding_status == FTS_ABANDONED) {
+ chaddr_changed = ISC_TRUE;
+ lt->hardware_addr.hlen = 0;
+ if (lt->scope)
+ binding_scope_dereference(&lt->scope, MDL);
+ }
+
+ /* There is no explicit message content to indicate that the client
+ * supplied no client-identifier. So if we don't hear of a value,
+ * we discard the last one.
+ */
+ if (msg->options_present & FTB_CLIENT_IDENTIFIER) {
+ if (msg->binding_status == FTS_ABANDONED) {
+ message = "BNDUPD to ABANDONED with client-id";
+ goto bad;
+ }
+
+ if ((lt->uid_len != msg->client_identifier.count) ||
+ (lt->uid == NULL) || /* Sanity; should never happen. */
+ (memcmp(lt->uid, msg->client_identifier.data,
+ lt->uid_len) != 0))
+ ident_changed = ISC_TRUE;
+
+ lt->uid_len = msg->client_identifier.count;
+
+ /* Allocate the lt->uid buffer if we haven't already, or
+ * re-allocate the lt-uid buffer if we have one that is not
+ * large enough. Otherwise, just use the extant buffer.
+ */
+ if (!lt->uid || lt->uid == lt->uid_buf ||
+ lt->uid_len > lt->uid_max) {
+ if (lt->uid && lt->uid != lt->uid_buf)
+ dfree(lt->uid, MDL);
+
+ if (lt->uid_len > sizeof(lt->uid_buf)) {
+ lt->uid_max = lt->uid_len;
+ lt->uid = dmalloc(lt->uid_len, MDL);
+ if (!lt->uid) {
+ message = "no memory";
+ goto bad;
+ }
+ } else {
+ lt->uid_max = sizeof(lt->uid_buf);
+ lt->uid = lt->uid_buf;
+ }
+ }
+ memcpy (lt -> uid,
+ msg -> client_identifier.data, lt -> uid_len);
+ } else if (lt->uid && msg->binding_status != FTS_RESET &&
+ msg->binding_status != FTS_FREE &&
+ msg->binding_status != FTS_BACKUP) {
+ ident_changed = ISC_TRUE;
+ if (lt->uid != lt->uid_buf)
+ dfree (lt->uid, MDL);
+ lt->uid = NULL;
+ lt->uid_max = lt->uid_len = 0;
+ }
+
+ /*
+ * A server's configuration can assign a 'binding scope';
+ *
+ * set var = "value";
+ *
+ * The problem with these binding scopes is that they are refreshed
+ * when the server processes a client's DHCP packet. A local binding
+ * scope is trash, then, when the lease has been assigned by the
+ * partner server. There is no real way to detect this, a peer may
+ * be updating us (as through potential conflict) with a binding we
+ * sent them, but we can trivially detect the /problematic/ case;
+ *
+ * lease is free.
+ * primary allocates lease to client A, assigns ddns name A.
+ * primary fails.
+ * secondary enters partner down.
+ * lease expires, and is set free.
+ * lease is allocated to client B and given ddns name B.
+ * primary recovers.
+ *
+ * The binding update in this case will be active->active, but the
+ * client identification on the lease will have changed. The ddns
+ * update on client A will have leaked if we just remove the binding
+ * scope blindly.
+ */
+ if (msg->binding_status == FTS_ACTIVE &&
+ (chaddr_changed || ident_changed)) {
+ ddns_removals(lease, NULL, NULL);
+
+ if (lease->scope != NULL)
+ binding_scope_dereference(&lease->scope, MDL);
+ }
+
+ /* XXX Times may need to be adjusted based on clock skew! */
+ if (msg -> options_present & FTB_STOS) {
+ lt -> starts = msg -> stos;
+ }
+ if (msg -> options_present & FTB_LEASE_EXPIRY) {
+ lt -> ends = msg -> expiry;
+ }
+ if (msg->options_present & FTB_POTENTIAL_EXPIRY) {
+ lt->atsfp = lt->tsfp = msg->potential_expiry;
+ }
+ if (msg->options_present & FTB_IP_FLAGS) {
+ if (msg->ip_flags & FTF_IP_FLAG_RESERVE) {
+ if ((((state->i_am == primary) &&
+ (lease->binding_state == FTS_FREE)) ||
+ ((state->i_am == secondary) &&
+ (lease->binding_state == FTS_BACKUP))) &&
+ !(lease->flags & RESERVED_LEASE)) {
+ message = "Address is not reserved.";
+ reason = FTR_IP_NOT_RESERVED;
+ goto bad;
+ }
+
+ lt->flags |= RESERVED_LEASE;
+ } else
+ lt->flags &= ~RESERVED_LEASE;
+
+ if (msg->ip_flags & FTF_IP_FLAG_BOOTP) {
+ if ((((state->i_am == primary) &&
+ (lease->binding_state == FTS_FREE)) ||
+ ((state->i_am == secondary) &&
+ (lease->binding_state == FTS_BACKUP))) &&
+ !(lease->flags & BOOTP_LEASE)) {
+ message = "Address is not allocated to BOOTP.";
+ goto bad;
+ }
+ lt->flags |= BOOTP_LEASE;
+ } else
+ lt->flags &= ~BOOTP_LEASE;
+
+ if (msg->ip_flags & ~(FTF_IP_FLAG_RESERVE | FTF_IP_FLAG_BOOTP))
+ log_info("Unknown IP-flags set in BNDUPD (0x%x).",
+ msg->ip_flags);
+ } else /* Flags may only not appear if the values are zero. */
+ lt->flags &= ~(RESERVED_LEASE | BOOTP_LEASE);
+
+#if defined (DEBUG_LEASE_STATE_TRANSITIONS)
+ log_info ("processing state transition for %s: %s to %s",
+ piaddr (lease -> ip_addr),
+ binding_state_print (lease -> binding_state),
+ binding_state_print (msg -> binding_status));
+#endif
+
+ /* If we're in normal state, make sure the state transition
+ we got is valid. */
+ if (state -> me.state == normal) {
+ new_binding_state =
+ (normal_binding_state_transition_check
+ (lease, state, msg -> binding_status,
+ msg -> potential_expiry));
+ /* XXX if the transition the peer asked for isn't
+ XXX allowed, maybe we should make the transition
+ XXX into potential-conflict at this point. */
+ } else {
+ new_binding_state =
+ (conflict_binding_state_transition_check
+ (lease, state, msg -> binding_status,
+ msg -> potential_expiry));
+ }
+ if (new_binding_state != msg -> binding_status) {
+ char outbuf [100];
+
+ if (snprintf (outbuf, sizeof outbuf,
+ "%s: invalid state transition: %s to %s",
+ piaddr (lease -> ip_addr),
+ binding_state_print (lease -> binding_state),
+ binding_state_print (msg -> binding_status))
+ >= sizeof outbuf)
+ log_fatal ("%s: impossible outbuf overflow",
+ "dhcp_failover_process_bind_update");
+
+ dhcp_failover_send_bind_ack (state, msg,
+ FTR_FATAL_CONFLICT,
+ outbuf);
+ goto out;
+ }
+ if (new_binding_state == FTS_EXPIRED ||
+ new_binding_state == FTS_RELEASED ||
+ new_binding_state == FTS_RESET) {
+ lt -> next_binding_state = FTS_FREE;
+
+ /* Mac address affinity. Assign the lease to
+ * BACKUP state if we are the primary and the
+ * peer is more likely to reallocate this lease
+ * to a returning client.
+ */
+ if ((state->i_am == primary) &&
+ !(lt->flags & (RESERVED_LEASE | BOOTP_LEASE)))
+ send_to_backup = peer_wants_lease(lt);
+ } else {
+ lt -> next_binding_state = new_binding_state;
+ }
+ msg -> binding_status = lt -> next_binding_state;
+
+ /*
+ * If we accept a peer's binding update, then we can't rewind a
+ * lease behind the peer's state.
+ */
+ lease->rewind_binding_state = lt->next_binding_state;
+
+ /* Try to install the new information. */
+ if (!supersede_lease (lease, lt, 0, 0, 0) ||
+ !write_lease (lease)) {
+ message = "database update failed";
+ bad:
+ dhcp_failover_send_bind_ack (state, msg, reason, message);
+ goto out;
+ } else {
+ dhcp_failover_queue_ack (state, msg);
+ }
+
+ /* If it is probably wise, assign lease to backup state if the peer
+ * is not already hoarding leases.
+ */
+ if (send_to_backup && secondary_not_hoarding(state, lease->pool)) {
+ lease->next_binding_state = FTS_BACKUP;
+ lease->tstp = cur_time;
+ lease->starts = cur_time;
+
+ if (!supersede_lease(lease, NULL, 0, 1, 0) ||
+ !write_lease(lease))
+ log_error("can't commit lease %s for mac addr "
+ "affinity", piaddr(lease->ip_addr));
+
+ dhcp_failover_send_updates(state);
+ }
+
+ out:
+ if (lt)
+ lease_dereference (&lt, MDL);
+ if (lease)
+ lease_dereference (&lease, MDL);
+
+ return ISC_R_SUCCESS;
+}
+
+/* This was hairy enough I didn't want to do it all in an if statement.
+ *
+ * Returns: Truth is the secondary is allowed to get more leases based upon
+ * MAC address affinity. False otherwise.
+ */
+static inline int
+secondary_not_hoarding(dhcp_failover_state_t *state, struct pool *p) {
+ int total;
+ int hold;
+ int lts;
+
+ total = p->free_leases + p->backup_leases;
+
+ /* How many leases is one side or the other allowed to "hold"? */
+ hold = ((total * state->max_lease_ownership) + 50) / 100;
+
+ /* If we were to send leases (or if the secondary were to send us
+ * leases in the negative direction), how many would that be?
+ */
+ lts = (p->free_leases - p->backup_leases) / 2;
+
+ /* The peer is not hoarding leases if we would send them more leases
+ * (or they would take fewer leases) than the maximum they are allowed
+ * to hold (the negative hold).
+ */
+ return(lts > -hold);
+}
+
+isc_result_t dhcp_failover_process_bind_ack (dhcp_failover_state_t *state,
+ failover_message_t *msg)
+{
+ struct lease *lt = (struct lease *)0;
+ struct lease *lease = (struct lease *)0;
+ struct iaddr ia;
+ const char *message = "no memory";
+ u_int32_t pot_expire;
+ int send_to_backup = ISC_FALSE;
+ struct timeval tv;
+
+ ia.len = sizeof msg -> assigned_addr;
+ memcpy (ia.iabuf, &msg -> assigned_addr, ia.len);
+
+ if (!find_lease_by_ip_addr (&lease, ia, MDL)) {
+ message = "no such lease";
+ goto bad;
+ }
+
+ /* XXX check for conflicts. */
+ if (msg -> options_present & FTB_REJECT_REASON) {
+ log_error ("bind update on %s from %s rejected: %.*s",
+ piaddr (ia), state -> name,
+ (int)((msg -> options_present & FTB_MESSAGE)
+ ? msg -> message.count
+ : strlen (dhcp_failover_reject_reason_print
+ (msg -> reject_reason))),
+ (msg -> options_present & FTB_MESSAGE)
+ ? (const char *)(msg -> message.data)
+ : (dhcp_failover_reject_reason_print
+ (msg -> reject_reason)));
+ goto unqueue;
+ }
+
+ /* Silently discard acks for leases we did not update (or multiple
+ * acks).
+ */
+ if (!lease->last_xid)
+ goto unqueue;
+
+ if (lease->last_xid != msg->xid) {
+ message = "xid mismatch";
+ goto bad;
+ }
+
+ /* XXX Times may need to be adjusted based on clock skew! */
+ if (msg->options_present & FTO_POTENTIAL_EXPIRY)
+ pot_expire = msg->potential_expiry;
+ else
+ pot_expire = lease->tstp;
+
+ /* If the lease was desired to enter a binding state, we set
+ * such a value upon transmitting a bndupd. We do not clear it
+ * if we receive a bndupd in the meantime (or change the state
+ * of the lease again ourselves), but we do set binding_state
+ * if we get a bndupd.
+ *
+ * So desired_binding_state tells us what we sent a bndupd for,
+ * and binding_state tells us what we have since determined in
+ * the meantime.
+ */
+ if (lease->desired_binding_state == FTS_EXPIRED ||
+ lease->desired_binding_state == FTS_RESET ||
+ lease->desired_binding_state == FTS_RELEASED)
+ {
+ /* It is not a problem to do this directly as we call
+ * supersede_lease immediately after: the lease is requeued
+ * even if its sort order (tsfp) has changed.
+ */
+ lease->atsfp = lease->tsfp = pot_expire;
+ if ((state->i_am == secondary) &&
+ (lease->flags & RESERVED_LEASE))
+ lease->next_binding_state = FTS_BACKUP;
+ else
+ lease->next_binding_state = FTS_FREE;
+
+ /* Clear this condition for the next go-round. */
+ lease->desired_binding_state = lease->next_binding_state;
+
+ /* The peer will have made this state change, so set rewind. */
+ lease->rewind_binding_state = lease->next_binding_state;
+
+ supersede_lease(lease, (struct lease *)0, 0, 0, 0);
+ write_lease(lease);
+
+ /* Lease has returned to FREE state from the
+ * transitional states. If the lease 'belongs'
+ * to a client that would be served by the
+ * peer, process a binding update now to send
+ * the lease to backup state. But not if we
+ * think we already have.
+ */
+ if (state->i_am == primary &&
+ !(lease->flags & (RESERVED_LEASE | BOOTP_LEASE)) &&
+ peer_wants_lease(lease))
+ send_to_backup = ISC_TRUE;
+
+ if (!send_to_backup && state->me.state == normal)
+ commit_leases();
+ } else {
+ /* XXX It could be a problem to do this directly if the lease
+ * XXX is sorted by tsfp.
+ */
+ lease->atsfp = lease->tsfp = pot_expire;
+ if (lease->desired_binding_state != lease->binding_state) {
+ lease->next_binding_state =
+ lease->desired_binding_state;
+ supersede_lease(lease,
+ (struct lease *)0, 0, 0, 0);
+ }
+ write_lease(lease);
+ /* Commit the lease only after a two-second timeout,
+ so that if we get a bunch of acks in quick
+ succession (e.g., when stealing leases from the
+ secondary), we do not do an immediate commit for
+ each one. */
+ tv.tv_sec = cur_time + 2;
+ tv.tv_usec = 0;
+ add_timeout(&tv, commit_leases_timeout, (void *)0, 0, 0);
+ }
+
+ unqueue:
+ dhcp_failover_ack_queue_remove (state, lease);
+
+ /* If we are supposed to send an update done after we send
+ this lease, go ahead and send it. */
+ if (state -> send_update_done == lease) {
+ lease_dereference (&state -> send_update_done, MDL);
+ dhcp_failover_send_update_done (state);
+ }
+
+ /* Now that the lease is off the ack queue, consider putting it
+ * back on the update queue for mac address affinity.
+ */
+ if (send_to_backup && secondary_not_hoarding(state, lease->pool)) {
+ lease->next_binding_state = FTS_BACKUP;
+ lease->tstp = lease->starts = cur_time;
+
+ if (!supersede_lease(lease, NULL, 0, 1, 0) ||
+ !write_lease(lease))
+ log_error("can't commit lease %s for "
+ "client affinity", piaddr(lease->ip_addr));
+
+ if (state->me.state == normal)
+ commit_leases();
+ }
+
+ /* If there are updates pending, we've created space to send at
+ least one. */
+ dhcp_failover_send_updates (state);
+
+ out:
+ lease_dereference (&lease, MDL);
+ if (lt)
+ lease_dereference (&lt, MDL);
+
+ return ISC_R_SUCCESS;
+
+ bad:
+ log_info ("bind update on %s got ack from %s: %s.",
+ piaddr (ia), state -> name, message);
+ goto out;
+}
+
+isc_result_t dhcp_failover_generate_update_queue (dhcp_failover_state_t *state,
+ int everythingp)
+{
+ struct shared_network *s;
+ struct pool *p;
+ struct lease *l;
+ int i;
+#define FREE_LEASES 0
+#define ACTIVE_LEASES 1
+#define EXPIRED_LEASES 2
+#define ABANDONED_LEASES 3
+#define BACKUP_LEASES 4
+#define RESERVED_LEASES 5
+ struct lease **lptr[RESERVED_LEASES+1];
+
+ /* Loop through each pool in each shared network and call the
+ expiry routine on the pool. */
+ for (s = shared_networks; s; s = s -> next) {
+ for (p = s -> pools; p; p = p -> next) {
+ if (p->failover_peer != state)
+ continue;
+
+ lptr[FREE_LEASES] = &p->free;
+ lptr[ACTIVE_LEASES] = &p->active;
+ lptr[EXPIRED_LEASES] = &p->expired;
+ lptr[ABANDONED_LEASES] = &p->abandoned;
+ lptr[BACKUP_LEASES] = &p->backup;
+ lptr[RESERVED_LEASES] = &p->reserved;
+
+ for (i = FREE_LEASES; i <= RESERVED_LEASES; i++) {
+ for (l = *(lptr [i]); l; l = l -> next) {
+ if ((l->flags & ON_QUEUE) == 0 &&
+ (everythingp ||
+ (l->tstp > l->atsfp) ||
+ (i == EXPIRED_LEASES))) {
+ l -> desired_binding_state = l -> binding_state;
+ dhcp_failover_queue_update (l, 0);
+ }
+ }
+ }
+ }
+ }
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t
+dhcp_failover_process_update_request (dhcp_failover_state_t *state,
+ failover_message_t *msg)
+{
+ if (state->send_update_done) {
+ log_info("Received update request while old update still "
+ "flying! Silently discarding old request.");
+ lease_dereference(&state->send_update_done, MDL);
+ }
+
+ /* Generate a fresh update queue. */
+ dhcp_failover_generate_update_queue (state, 0);
+
+ state->updxid = msg->xid;
+
+ /* If there's anything on the update queue (there shouldn't be
+ anything on the ack queue), trigger an update done message
+ when we get an ack for that lease. */
+ if (state -> update_queue_tail) {
+ lease_reference (&state -> send_update_done,
+ state -> update_queue_tail, MDL);
+ dhcp_failover_send_updates (state);
+ log_info ("Update request from %s: sending update",
+ state -> name);
+ } else {
+ /* Otherwise, there are no updates to send, so we can
+ just send an UPDDONE message immediately. */
+ dhcp_failover_send_update_done (state);
+ log_info ("Update request from %s: nothing pending",
+ state -> name);
+ }
+
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t
+dhcp_failover_process_update_request_all (dhcp_failover_state_t *state,
+ failover_message_t *msg)
+{
+ if (state->send_update_done) {
+ log_info("Received update request while old update still "
+ "flying! Silently discarding old request.");
+ lease_dereference(&state->send_update_done, MDL);
+ }
+
+ /* Generate a fresh update queue that includes every lease. */
+ dhcp_failover_generate_update_queue (state, 1);
+
+ state->updxid = msg->xid;
+
+ if (state -> update_queue_tail) {
+ lease_reference (&state -> send_update_done,
+ state -> update_queue_tail, MDL);
+ dhcp_failover_send_updates (state);
+ log_info ("Update request all from %s: sending update",
+ state -> name);
+ } else {
+ /* This should really never happen, but it could happen
+ on a server that currently has no leases configured. */
+ dhcp_failover_send_update_done (state);
+ log_info ("Update request all from %s: nothing pending",
+ state -> name);
+ }
+
+ return ISC_R_SUCCESS;
+}
+
+isc_result_t
+dhcp_failover_process_update_done (dhcp_failover_state_t *state,
+ failover_message_t *msg)
+{
+ struct timeval tv;
+
+ log_info ("failover peer %s: peer update completed.",
+ state -> name);
+
+ state -> curUPD = 0;
+
+ switch (state -> me.state) {
+ case unknown_state:
+ case partner_down:
+ case normal:
+ case communications_interrupted:
+ case resolution_interrupted:
+ case shut_down:
+ case paused:
+ case recover_done:
+ case startup:
+ case recover_wait:
+ break; /* shouldn't happen. */
+
+ /* We got the UPDDONE, so we can go into normal state! */
+ case potential_conflict:
+ if (state->partner.state == conflict_done) {
+ if (state->i_am == secondary) {
+ dhcp_failover_set_state (state, normal);
+ } else {
+ log_error("Secondary is in conflict_done "
+ "state after conflict resolution, "
+ "this is illegal.");
+ dhcp_failover_set_state (state, shut_down);
+ }
+ } else {
+ if (state->i_am == primary)
+ dhcp_failover_set_state (state, conflict_done);
+ else
+ log_error("Spurious update-done message.");
+ }
+
+ break;
+
+ case conflict_done:
+ log_error("Spurious update-done message.");
+ break;
+
+ case recover:
+ /* Wait for MCLT to expire before moving to recover_done,
+ except that if both peers come up in recover, there is
+ no point in waiting for MCLT to expire - this probably
+ indicates the initial startup of a newly-configured
+ failover pair. */
+ if (state -> me.stos + state -> mclt > cur_time &&
+ state -> partner.state != recover &&
+ state -> partner.state != recover_done) {
+ dhcp_failover_set_state (state, recover_wait);
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("add_timeout +%d %s",
+ (int)(cur_time -
+ state -> me.stos + state -> mclt),
+ "dhcp_failover_recover_done");
+#endif
+ tv . tv_sec = (int)(state -> me.stos + state -> mclt);
+ tv . tv_usec = 0;
+ add_timeout (&tv,
+ dhcp_failover_recover_done,
+ state,
+ (tvref_t)omapi_object_reference,
+ (tvunref_t)
+ omapi_object_dereference);
+ } else
+ dhcp_failover_recover_done (state);
+ }
+
+ return ISC_R_SUCCESS;
+}
+
+void dhcp_failover_recover_done (void *sp)
+{
+ dhcp_failover_state_t *state = sp;
+
+#if defined (DEBUG_FAILOVER_TIMING)
+ log_info ("dhcp_failover_recover_done");
+#endif
+
+ dhcp_failover_set_state (state, recover_done);
+}
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+/* Print hunks of failover messages, doing line breaks as appropriate.
+ Note that this assumes syslog is being used, rather than, e.g., the
+ Windows NT logging facility, where just dumping the whole message in
+ one hunk would be more appropriate. */
+
+void failover_print (char *obuf,
+ unsigned *obufix, unsigned obufmax, const char *s)
+{
+ int len = strlen (s);
+
+ while (len + *obufix + 1 >= obufmax) {
+ log_debug ("%s", obuf);
+ if (!*obufix) {
+ log_debug ("%s", s);
+ *obufix = 0;
+ return;
+ }
+ *obufix = 0;
+ }
+ strcpy (&obuf [*obufix], s);
+ *obufix += len;
+}
+#endif /* defined (DEBUG_FAILOVER_MESSAGES) */
+
+/* Taken from draft-ietf-dhc-loadb-01.txt: */
+/* A "mixing table" of 256 distinct values, in pseudo-random order. */
+unsigned char loadb_mx_tbl[256] = {
+ 251, 175, 119, 215, 81, 14, 79, 191, 103, 49,
+ 181, 143, 186, 157, 0, 232, 31, 32, 55, 60,
+ 152, 58, 17, 237, 174, 70, 160, 144, 220, 90,
+ 57, 223, 59, 3, 18, 140, 111, 166, 203, 196,
+ 134, 243, 124, 95, 222, 179, 197, 65, 180, 48,
+ 36, 15, 107, 46, 233, 130, 165, 30, 123, 161,
+ 209, 23, 97, 16, 40, 91, 219, 61, 100, 10,
+ 210, 109, 250, 127, 22, 138, 29, 108, 244, 67,
+ 207, 9, 178, 204, 74, 98, 126, 249, 167, 116,
+ 34, 77, 193, 200, 121, 5, 20, 113, 71, 35,
+ 128, 13, 182, 94, 25, 226, 227, 199, 75, 27,
+ 41, 245, 230, 224, 43, 225, 177, 26, 155, 150,
+ 212, 142, 218, 115, 241, 73, 88, 105, 39, 114,
+ 62, 255, 192, 201, 145, 214, 168, 158, 221, 148,
+ 154, 122, 12, 84, 82, 163, 44, 139, 228, 236,
+ 205, 242, 217, 11, 187, 146, 159, 64, 86, 239,
+ 195, 42, 106, 198, 118, 112, 184, 172, 87, 2,
+ 173, 117, 176, 229, 247, 253, 137, 185, 99, 164,
+ 102, 147, 45, 66, 231, 52, 141, 211, 194, 206,
+ 246, 238, 56, 110, 78, 248, 63, 240, 189, 93,
+ 92, 51, 53, 183, 19, 171, 72, 50, 33, 104,
+ 101, 69, 8, 252, 83, 120, 76, 135, 85, 54,
+ 202, 125, 188, 213, 96, 235, 136, 208, 162, 129,
+ 190, 132, 156, 38, 47, 1, 7, 254, 24, 4,
+ 216, 131, 89, 21, 28, 133, 37, 153, 149, 80,
+ 170, 68, 6, 169, 234, 151 };
+
+static unsigned char loadb_p_hash (const unsigned char *, unsigned);
+static unsigned char loadb_p_hash (const unsigned char *key, unsigned len)
+{
+ unsigned char hash = len;
+ int i;
+ for(i = len; i > 0; )
+ hash = loadb_mx_tbl [hash ^ (key [--i])];
+ return hash;
+}
+
+int load_balance_mine (struct packet *packet, dhcp_failover_state_t *state)
+{
+ struct option_cache *oc;
+ struct data_string ds;
+ unsigned char hbaix;
+ int hm;
+
+ if (state -> load_balance_max_secs < ntohs (packet -> raw -> secs)) {
+ return 1;
+ }
+
+ /* If we don't have a hash bucket array, we can't tell if this
+ one's ours, so we assume it's not. */
+ if (!state -> hba)
+ return 0;
+
+ oc = lookup_option (&dhcp_universe, packet -> options,
+ DHO_DHCP_CLIENT_IDENTIFIER);
+ memset (&ds, 0, sizeof ds);
+ if (oc &&
+ evaluate_option_cache (&ds, packet, (struct lease *)0,
+ (struct client_state *)0,
+ packet -> options, (struct option_state *)0,
+ &global_scope, oc, MDL)) {
+ hbaix = loadb_p_hash (ds.data, ds.len);
+
+ data_string_forget(&ds, MDL);
+ } else {
+ hbaix = loadb_p_hash (packet -> raw -> chaddr,
+ packet -> raw -> hlen);
+ }
+
+ hm = state->hba[(hbaix >> 3) & 0x1F] & (1 << (hbaix & 0x07));
+
+ if (state -> i_am == primary)
+ return hm;
+ else
+ return !hm;
+}
+
+/* The inverse of load_balance_mine ("load balance theirs"). We can't
+ * use the regular load_balance_mine() and invert it because of the case
+ * where there might not be an HBA, and we want to indicate false here
+ * in this case only.
+ */
+int
+peer_wants_lease(struct lease *lp)
+{
+ dhcp_failover_state_t *state;
+ unsigned char hbaix;
+ int hm;
+
+ if (!lp->pool)
+ return 0;
+
+ state = lp->pool->failover_peer;
+
+ if (!state || !state->hba)
+ return 0;
+
+ if (lp->uid_len)
+ hbaix = loadb_p_hash(lp->uid, lp->uid_len);
+ else if (lp->hardware_addr.hlen > 1)
+ /* Skip the first byte, which is the hardware type, and is
+ * not included during actual load balancing checks above
+ * since it is separate from the packet header chaddr field.
+ * The remainder of the hardware address should be identical
+ * to the chaddr contents.
+ */
+ hbaix = loadb_p_hash(lp->hardware_addr.hbuf + 1,
+ lp->hardware_addr.hlen - 1);
+ else /* impossible to categorize into LBA */
+ return 0;
+
+ hm = state->hba[(hbaix >> 3) & 0x1F] & (1 << (hbaix & 0x07));
+
+ if (state->i_am == primary)
+ return !hm;
+ else
+ return hm;
+}
+
+/* This deals with what to do with bind updates when
+ we're in the normal state
+
+ Note that tsfp had better be set from the latest bind update
+ _before_ this function is called! */
+
+binding_state_t
+normal_binding_state_transition_check (struct lease *lease,
+ dhcp_failover_state_t *state,
+ binding_state_t binding_state,
+ u_int32_t tsfp)
+{
+ binding_state_t new_state;
+
+ /* If there is no transition, it's no problem. */
+ if (binding_state == lease -> binding_state)
+ return binding_state;
+
+ switch (lease -> binding_state) {
+ case FTS_FREE:
+ case FTS_ABANDONED:
+ switch (binding_state) {
+ case FTS_ACTIVE:
+ case FTS_ABANDONED:
+ case FTS_BACKUP:
+ case FTS_EXPIRED:
+ case FTS_RELEASED:
+ case FTS_RESET:
+ /* If the lease was free, and our peer is primary,
+ then it can make it active, or abandoned, or
+ backup. Abandoned is treated like free in
+ this case. */
+ if (state -> i_am == secondary)
+ return binding_state;
+
+ /* Otherwise, it can't legitimately do any sort of
+ state transition. Because the lease was free,
+ and the error has already been made, we allow the
+ peer to change its state anyway, but log a warning
+ message in hopes that the error will be fixed. */
+ case FTS_FREE: /* for compiler */
+ new_state = binding_state;
+ goto out;
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+ case FTS_ACTIVE:
+ /* The secondary can't change the state of an active
+ lease. */
+ if (state -> i_am == primary) {
+ /* Except that the client may send the DHCPRELEASE
+ to the secondary, and we have to accept that. */
+ if (binding_state == FTS_RELEASED)
+ return binding_state;
+ new_state = lease -> binding_state;
+ goto out;
+ }
+
+ /* So this is only for transitions made by the primary: */
+ switch (binding_state) {
+ case FTS_FREE:
+ case FTS_BACKUP:
+ /* Can't set a lease to free or backup until the
+ peer agrees that it's expired. */
+ if (tsfp > cur_time) {
+ new_state = lease -> binding_state;
+ goto out;
+ }
+ return binding_state;
+
+ case FTS_EXPIRED:
+ /* XXX 65 should be the clock skew between the peers
+ XXX plus a fudge factor. This code will result
+ XXX in problems if MCLT is really short or the
+ XXX max-lease-time is really short (less than the
+ XXX fudge factor. */
+ if (lease -> ends - 65 > cur_time) {
+ new_state = lease -> binding_state;
+ goto out;
+ }
+
+ case FTS_RELEASED:
+ case FTS_ABANDONED:
+ case FTS_RESET:
+ case FTS_ACTIVE:
+ return binding_state;
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+ break;
+ case FTS_EXPIRED:
+ switch (binding_state) {
+ case FTS_BACKUP:
+ case FTS_FREE:
+ /* Can't set a lease to free or backup until the
+ peer agrees that it's expired. */
+ if (tsfp > cur_time) {
+ new_state = lease -> binding_state;
+ goto out;
+ }
+ return binding_state;
+
+ case FTS_ACTIVE:
+ case FTS_RELEASED:
+ case FTS_ABANDONED:
+ case FTS_RESET:
+ case FTS_EXPIRED:
+ return binding_state;
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+ case FTS_RELEASED:
+ switch (binding_state) {
+ case FTS_FREE:
+ case FTS_BACKUP:
+
+ /* These are invalid state transitions - should we
+ prevent them? */
+ case FTS_EXPIRED:
+ case FTS_ABANDONED:
+ case FTS_RESET:
+ case FTS_ACTIVE:
+ case FTS_RELEASED:
+ return binding_state;
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+ case FTS_RESET:
+ switch (binding_state) {
+ case FTS_FREE:
+ case FTS_BACKUP:
+ /* Can't set a lease to free or backup until the
+ peer agrees that it's expired. */
+ if (tsfp > cur_time) {
+ new_state = lease -> binding_state;
+ goto out;
+ }
+ return binding_state;
+
+ case FTS_ACTIVE:
+ case FTS_EXPIRED:
+ case FTS_RELEASED:
+ case FTS_ABANDONED:
+ case FTS_RESET:
+ return binding_state;
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+ case FTS_BACKUP:
+ switch (binding_state) {
+ case FTS_ACTIVE:
+ case FTS_ABANDONED:
+ case FTS_EXPIRED:
+ case FTS_RELEASED:
+ case FTS_RESET:
+ /* If the lease was in backup, and our peer
+ is secondary, then it can make it active
+ or abandoned. */
+ if (state -> i_am == primary)
+ return binding_state;
+
+ /* Either the primary or the secondary can
+ reasonably move a lease from the backup
+ state to the free state. */
+ case FTS_FREE:
+ return binding_state;
+
+ case FTS_BACKUP:
+ new_state = lease -> binding_state;
+ goto out;
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+ out:
+ return new_state;
+}
+
+/* Determine whether the state transition is okay when we're potentially
+ in conflict with the peer. */
+binding_state_t
+conflict_binding_state_transition_check (struct lease *lease,
+ dhcp_failover_state_t *state,
+ binding_state_t binding_state,
+ u_int32_t tsfp)
+{
+ binding_state_t new_state;
+
+ /* If there is no transition, it's no problem. */
+ if (binding_state == lease -> binding_state)
+ new_state = binding_state;
+ else {
+ switch (lease -> binding_state) {
+ /* If we think the lease is not in use, then the
+ state into which the partner put it is just fine,
+ whatever it is. */
+ case FTS_FREE:
+ case FTS_ABANDONED:
+ case FTS_EXPIRED:
+ case FTS_RELEASED:
+ case FTS_RESET:
+ case FTS_BACKUP:
+ new_state = binding_state;
+ break;
+
+ /* If we think the lease *is* in use, then we're not
+ going to take the partner's change if the partner
+ thinks it's free. */
+ case FTS_ACTIVE:
+ switch (binding_state) {
+ case FTS_FREE:
+ case FTS_BACKUP:
+ new_state = lease -> binding_state;
+ break;
+
+ case FTS_EXPIRED:
+ /* If we don't agree about expiry, it's
+ * invalid. 65 should allow for max
+ * clock skew (60) plus some fudge.
+ * XXX: should we refetch cur_time?
+ */
+ if ((lease->ends - 65) > cur_time)
+ new_state = lease->binding_state;
+ else
+ new_state = binding_state;
+ break;
+
+ /* RELEASED, RESET, and ABANDONED indicate
+ * that our partner has information about
+ * this lease that we did not witness. Our
+ * partner wins.
+ */
+ case FTS_RELEASED:
+ case FTS_RESET:
+ case FTS_ABANDONED:
+ new_state = binding_state;
+ break;
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+ break;
+
+ default:
+ log_fatal ("Impossible case at %s:%d.", MDL);
+ return FTS_RESET;
+ }
+ }
+ return new_state;
+}
+
+/* We can reallocate a lease under the following circumstances:
+
+ (1) It belongs to us - it's FTS_FREE, and we're primary, or it's
+ FTS_BACKUP, and we're secondary.
+ (2) We're in partner_down, and the lease is not active, and we
+ can be sure that the other server didn't make it active.
+ We can only be sure that the server didn't make it active
+ when we are in the partner_down state and one of the following
+ two conditions holds:
+ (a) in the case that the time sent from the peer is earlier than
+ the time we entered the partner_down state, at least MCLT has
+ gone by since we entered partner_down, or
+ (b) in the case that the time sent from the peer is later than
+ the time when we entered partner_down, the current time is
+ later than the time sent from the peer by at least MCLT. */
+
+int lease_mine_to_reallocate (struct lease *lease)
+{
+ dhcp_failover_state_t *peer;
+
+ if (lease && lease->pool &&
+ (peer = lease->pool->failover_peer)) {
+ /*
+ * In addition to the normal rules governing wether a server
+ * is allowed to operate changes on a lease, the server is
+ * allowed to operate on a lease from the standpoint of the
+ * most conservative guess of the peer's state for this lease.
+ */
+ switch (lease->binding_state) {
+ case FTS_ACTIVE:
+ /* ACTIVE leases may not be reallocated. */
+ return 0;
+
+ case FTS_FREE:
+ case FTS_ABANDONED:
+ /* FREE leases may only be allocated by the primary,
+ * unless the secondary is acting in partner_down
+ * state and stos+mclt or tsfp+mclt has expired,
+ * whichever is greater.
+ *
+ * ABANDONED are treated the same as FREE for all
+ * purposes here. Note that servers will only try
+ * for ABANDONED leases as a last resort anyway.
+ */
+ if (peer -> i_am == primary)
+ return 1;
+
+ return(peer->service_state == service_partner_down &&
+ ((lease->tsfp < peer->me.stos) ?
+ (peer->me.stos + peer->mclt < cur_time) :
+ (lease->tsfp + peer->mclt < cur_time)));
+
+ case FTS_RELEASED:
+ case FTS_EXPIRED:
+ /*
+ * These leases are generally untouchable until the
+ * peer acknowledges their state change. However, as
+ * this is impossible if the peer is offline, the
+ * failover protocol permits an 'optimization' to
+ * rewind the lease to a previous state that the server
+ * is allowed to operate on, if that was the state that
+ * was last acknowledged by the peer.
+ *
+ * So if a lease was free, was allocated by this
+ * server, and expired without ever being transmitted
+ * to the peer, it can be returned to free and given
+ * to any new client legally.
+ */
+ if ((peer->i_am == primary) &&
+ (lease->rewind_binding_state == FTS_FREE))
+ return 1;
+ if ((peer->i_am == secondary) &&
+ (lease->rewind_binding_state == FTS_BACKUP))
+ return 1;
+
+ /* FALL THROUGH (released, expired, reset) */
+ case FTS_RESET:
+ /*
+ * Released, expired, and reset leases go onto the
+ * 'expired' queue all together. Upon entry into
+ * partner-down state, this queue of leases has their
+ * tsfp values modified to equal stos+mclt, the point
+ * at which the server is allowed to remove them from
+ * these transitional states.
+ *
+ * Note that although tsfp has been possibly extended
+ * past the actual tsfp we received from the peer, we
+ * don't have to take any special action. Since tsfp
+ * will be equal to the current time when the lease
+ * transitions to free, tsfp will not be used to grant
+ * lease-times longer than the MCLT to clients, which
+ * is the only danger for this sort of modification.
+ */
+ return((peer->service_state == service_partner_down) &&
+ (lease->tsfp < cur_time));
+
+ case FTS_BACKUP:
+ /* Only the secondary may allocate BACKUP leases,
+ * unless in partner_down state in which case at
+ * least TSFP+MCLT or STOS+MCLT must have expired,
+ * whichever is greater.
+ */
+ if (peer->i_am == secondary)
+ return 1;
+
+ return((peer->service_state == service_partner_down) &&
+ ((lease->tsfp < peer->me.stos) ?
+ (peer->me.stos + peer->mclt < cur_time) :
+ (lease->tsfp + peer->mclt < cur_time)));
+
+ default:
+ /* All lease states appear above. */
+ log_fatal("Impossible case at %s:%d.", MDL);
+ break;
+ }
+ return 0;
+ }
+ if (lease)
+ return(lease->binding_state == FTS_FREE ||
+ lease->binding_state == FTS_BACKUP);
+ else
+ return 0;
+}
+
+static isc_result_t failover_message_reference (failover_message_t **mp,
+ failover_message_t *m,
+ const char *file, int line)
+{
+ *mp = m;
+ m -> refcnt++;
+ return ISC_R_SUCCESS;
+}
+
+static isc_result_t failover_message_dereference (failover_message_t **mp,
+ const char *file, int line)
+{
+ failover_message_t *m;
+ m = (*mp);
+ m -> refcnt--;
+ if (m -> refcnt == 0) {
+ if (m -> next)
+ failover_message_dereference (&m -> next,
+ file, line);
+ if (m -> chaddr.data)
+ dfree (m -> chaddr.data, file, line);
+ if (m -> client_identifier.data)
+ dfree (m -> client_identifier.data, file, line);
+ if (m -> hba.data)
+ dfree (m -> hba.data, file, line);
+ if (m -> message.data)
+ dfree (m -> message.data, file, line);
+ if (m -> reply_options.data)
+ dfree (m -> reply_options.data, file, line);
+ if (m -> request_options.data)
+ dfree (m -> request_options.data, file, line);
+ if (m -> vendor_class.data)
+ dfree (m -> vendor_class.data, file, line);
+ if (m -> vendor_options.data)
+ dfree (m -> vendor_options.data, file, line);
+ if (m -> ddns.data)
+ dfree (m -> ddns.data, file, line);
+ dfree (*mp, file, line);
+ }
+ *mp = 0;
+ return ISC_R_SUCCESS;
+}
+
+OMAPI_OBJECT_ALLOC (dhcp_failover_state, dhcp_failover_state_t,
+ dhcp_type_failover_state)
+OMAPI_OBJECT_ALLOC (dhcp_failover_listener, dhcp_failover_listener_t,
+ dhcp_type_failover_listener)
+OMAPI_OBJECT_ALLOC (dhcp_failover_link, dhcp_failover_link_t,
+ dhcp_type_failover_link)
+#endif /* defined (FAILOVER_PROTOCOL) */
+
+const char *binding_state_print (enum failover_state state)
+{
+ switch (state) {
+ case FTS_FREE:
+ return "free";
+ break;
+
+ case FTS_ACTIVE:
+ return "active";
+ break;
+
+ case FTS_EXPIRED:
+ return "expired";
+ break;
+
+ case FTS_RELEASED:
+ return "released";
+ break;
+
+ case FTS_ABANDONED:
+ return "abandoned";
+ break;
+
+ case FTS_RESET:
+ return "reset";
+ break;
+
+ case FTS_BACKUP:
+ return "backup";
+ break;
+
+ default:
+ return "unknown";
+ break;
+ }
+}