use %016llx when printing dpids
[actl.git] / app-l3_lb / l3_lb.c
blob66ad90076357ba89b81eaacc6a00c58effc95bd9
1 /*
2 * Copyright (c) 2016 Mohamed Aslan <maslan@sce.carleton.ca>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <math.h>
21 #include <getopt.h>
22 #include <err.h>
23 #include <time.h>
25 #include <libof.h>
26 #include <of10.h>
27 #include <hashtab.h>
29 #include <net/if_arp.h>
30 #include <netinet/in.h>
31 #include <netinet/if_ether.h>
32 #include <arpa/inet.h>
33 #include <netinet/ip.h>
34 #include <sys/queue.h>
36 #include <actl.h>
37 #include "l3_lb.h"
39 #ifdef __linux__
40 #include <netinet/ether.h>
41 #endif
44 when a packet arrives:
45 (1) learn (in port and src mac)
46 (2) check whether it's an arp or ip packet
47 a. in case of arp:
48 i. if dst is vserver:
49 fake arp response
50 ii. otherwise:
51 flood
52 b. in case of ip:
53 i. if dst is vserver:
54 loadbalance
55 ii. otherwise:
56 flood or forward
59 #define DPID(dp) (be64toh(((struct of10_switch_features *)((dp)->sw_features))->datapath_id))
61 /* mactable's key entry */
62 struct entry {
63 uint64_t dpid;
64 uint8_t mac_addr[ETHER_ADDR_LEN];
67 static SLIST_HEAD(, dp_ptr) dataplane_list;
68 struct dp_ptr {
69 struct of_dataplane *dp;
70 SLIST_ENTRY(dp_ptr) next;
73 static SLIST_HEAD(, server) server_list;
74 struct server {
75 struct in_addr ip_addr;
76 struct ether_addr mac_addr;
77 int ip_resolved;
78 uint64_t load;
79 uint64_t load_bytes;
80 time_t load_ts;
81 SLIST_ENTRY(server) next;
84 static struct actl_ctx *ctx;
85 static struct hashtab mactable;
86 static struct in_addr vserver_ip; /* virtual server's IP */
87 static struct ether_addr vserver_mac; /* virtual server's MAC */
88 static int n_servers = 0;
89 static int auto_arp = 0;
90 static uint16_t ttl_idle = DEFAULT_TIMEOUT;
91 static uint16_t ttl_hard = 0;
93 static struct option longopts[] = {
94 { "ip_addr", required_argument, NULL, 'i' },
95 { "mac_addr", required_argument, NULL, 'm' },
96 { "servers", required_argument, NULL, 's' },
97 { "arp_servers", required_argument, NULL, 'a'},
98 { "poll", required_argument, NULL, 'p'},
99 { "kpi", required_argument, NULL, 'k'},
100 { "ttl_idle", required_argument, NULL, 't'},
101 { "ttl_hard", required_argument, NULL, 'h'},
102 { NULL, 0, NULL, 0 }
107 static void
108 learn(struct of_dataplane *dp, struct of10_packet_in *p_in)
110 struct ether_header *eh;
111 struct entry e;
112 struct ether_addr broadcast_addr;
114 eh = (struct ether_header *)(p_in->data);
115 memset(&broadcast_addr, 0xff, ETHER_ADDR_LEN);
117 /* skip the load-balacing address from the learning process */
118 if (!memcmp(eh->ether_shost, &vserver_mac, ETHER_ADDR_LEN))
119 return;
120 /* skip broadcast address */
121 if (!memcmp(eh->ether_shost, &broadcast_addr, ETHER_ADDR_LEN))
122 return;
124 /* L2 learning (update mactable) */
125 e.dpid = be64toh(((struct of10_switch_features *)(dp->sw_features))->datapath_id);
126 memcpy(e.mac_addr, eh->ether_shost, ETHER_ADDR_LEN);
127 hashtab_put(&mactable, &e, sizeof(struct entry), &p_in->in_port, sizeof(uint16_t));
128 #ifdef DEBUG
129 printf("[dpid=0x%016llx] learned that %s is at port %hu.\n", DPID(dp), ether_ntoa(eh->ether_shost), ntohs(p_in->in_port));
130 #endif
133 static int
134 lookup(struct of_dataplane *dp, struct ether_addr *ea, uint16_t *port)
136 uint16_t *p;
137 size_t len;
138 struct entry e;
140 e.dpid = be64toh(((struct of10_switch_features *)(dp->sw_features))->datapath_id);
141 memcpy(e.mac_addr, ea, ETHER_ADDR_LEN);
142 #ifdef DEBUG
143 printf("[dpid=0x%016llx] looking up %s.\n", DPID(dp), ether_ntoa(ea));
144 #endif
145 if (hashtab_get(&mactable, &e, sizeof(struct entry), &p, &len)) {
146 *port = *p;
147 return 1;
149 return 0;
152 static void
153 flood(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in)
155 struct of10_packet_out *msg_out;
156 struct of10_action_output *action;
158 msg_out = (struct of10_packet_out *)malloc(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output));
159 if (msg_out == NULL)
160 return;
161 msg_out->hdr.version = OFP_VERSION_10;
162 msg_out->hdr.type = OFPT10_PACKET_OUT;
163 msg_out->hdr.length = htons(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output));
164 msg_out->hdr.xid = 0;
165 msg_out->buffer_id = p_in->buffer_id;
166 msg_out->in_port = p_in->in_port;
167 msg_out->actions_len = htons(sizeof(struct of10_action_output));
168 action = (struct of10_action_output *)(msg_out->actions);
169 action->type = htons(OFPAT10_OUTPUT);
170 action->len = htons(sizeof(struct of10_action_output));
171 action->port = htons(OFPP10_FLOOD);
172 action->max_len = 0;
173 ctl->send(ctl, dp, (struct ofp_header *)msg_out);
174 free(msg_out);
175 #ifdef DEBUG
176 printf("packet flooded.\n");
177 #endif
180 static void
181 forward_install(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in, uint16_t out_port)
183 struct ether_header *eh;
184 struct of10_flow_mod *msg_mod;
185 struct of10_action_output *action;
187 eh = (struct ether_header *)(p_in->data);
188 msg_mod = (struct of10_flow_mod *)malloc(sizeof(struct of10_flow_mod) + sizeof(struct of10_action_output));
189 if (msg_mod == NULL)
190 return;
191 /* header */
192 msg_mod->hdr.version = OFP_VERSION_10;
193 msg_mod->hdr.type = OFPT10_FLOW_MOD;
194 msg_mod->hdr.length = htons(sizeof(struct of10_flow_mod) + sizeof(struct of10_action_output));
195 msg_mod->hdr.xid = 0;
196 /* match */
197 msg_mod->match.wildcards = htonl(OFPFW10_ALL & ~(OFPFW10_DL_DST));
198 memcpy(msg_mod->match.dl_dst, eh->ether_dhost, OFP10_ETH_ALEN);
199 /* others */
200 msg_mod->cookie = 0;
201 msg_mod->command = htons(OFPFC10_ADD);
202 msg_mod->idle_timeout = htons(ttl_idle);
203 msg_mod->hard_timeout = htons(ttl_hard);
204 msg_mod->priority = htons(0xffff);
205 msg_mod->buffer_id = p_in->buffer_id;
206 msg_mod->out_port = out_port; /* already in net byte order */
207 msg_mod->flags = 0;
208 /* action */
209 action = (struct of10_action_output *)(msg_mod->actions);
210 action->type = htons(OFPAT10_OUTPUT);
211 action->len = htons(sizeof(struct of10_action_output));
212 action->port = out_port; /* already in net byte order */
213 action->max_len = 0;
214 ctl->send(ctl, dp, (struct ofp_header *)msg_mod);
215 free(msg_mod);
218 static void
219 rewrite_install(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in, struct server *srv, uint16_t out_port)
221 struct ether_header *eh;
222 struct of10_flow_mod *msg_mod;
223 struct of10_action_nw_addr *action0;
224 struct of10_action_dl_addr *action1;
225 struct of10_action_output *action2;
227 eh = (struct ether_header *)(p_in->data);
228 msg_mod = (struct of10_flow_mod *)malloc(sizeof(struct of10_flow_mod) + 3 * sizeof(struct of10_action_output));
229 if (msg_mod == NULL)
230 return;
232 /* source -> server */
233 /* header */
234 msg_mod->hdr.version = OFP_VERSION_10;
235 msg_mod->hdr.type = OFPT10_FLOW_MOD;
236 msg_mod->hdr.length = htons(sizeof(struct of10_flow_mod) + sizeof(struct of10_action_nw_addr) +
237 sizeof(struct of10_action_dl_addr) + sizeof(struct of10_action_output));
238 msg_mod->hdr.xid = 0;
239 /* match */
240 msg_mod->match.wildcards = htonl(OFPFW10_ALL & ~(OFPFW10_DL_SRC | OFPFW10_DL_DST));
241 memcpy(msg_mod->match.dl_src, eh->ether_shost, OFP10_ETH_ALEN);
242 memcpy(msg_mod->match.dl_dst, eh->ether_dhost, OFP10_ETH_ALEN);
243 /* others */
244 msg_mod->cookie = 0;
245 msg_mod->command = htons(OFPFC10_ADD);
246 msg_mod->idle_timeout = htons(ttl_idle);
247 msg_mod->hard_timeout = htons(ttl_hard);
248 msg_mod->priority = htons(0xffff);
249 msg_mod->buffer_id = p_in->buffer_id;
250 msg_mod->out_port = out_port; /* already in net byte order */
251 msg_mod->flags = 0;
252 /* actions */
253 /* rewrite ip address */
254 action0 = (struct of10_action_nw_addr *)(msg_mod->actions);
255 action0->type = htons(OFPAT10_SET_NW_DST);
256 action0->len = htons(sizeof(struct of10_action_nw_addr));
257 action0->nw_addr = srv->ip_addr.s_addr; /* already in net byte order */
258 /* rewrite ether address */
259 action1 = (struct of10_action_dl_addr *)(++action0);
260 action1->type = htons(OFPAT10_SET_DL_DST);
261 action1->len = htons(sizeof(struct of10_action_dl_addr));
262 memcpy(&action1->dl_addr, &srv->mac_addr, OFP10_ETH_ALEN);
263 /* forward */
264 action2 = (struct of10_action_output *)(++action1);
265 action2->type = htons(OFPAT10_OUTPUT);
266 action2->len = htons(sizeof(struct of10_action_output));
267 action2->port = out_port; /* already in net byte order */
268 action2->max_len = 0;
269 /* send flow mod message */
270 ctl->send(ctl, dp, (struct ofp_header *)msg_mod);
271 #ifdef DEBUG
272 printf("[LB] rewrite and install (1).\n");
273 #endif
275 /* server -> source */
276 /* reuse msg_mod */
277 memcpy(msg_mod->match.dl_src, &srv->mac_addr, OFP10_ETH_ALEN);
278 memcpy(msg_mod->match.dl_dst, eh->ether_shost, OFP10_ETH_ALEN);
279 msg_mod->buffer_id = htonl(-1);
280 /* rewrite ip address */
281 action0 = (struct of10_action_nw_addr *)(msg_mod->actions);
282 action0->type = htons(OFPAT10_SET_NW_SRC);
283 action0->nw_addr = vserver_ip.s_addr; /* already in net byte order */
284 /* rewrite ether address */
285 action1 = (struct of10_action_dl_addr *)(++action0);
286 action1->type = htons(OFPAT10_SET_DL_SRC);
287 memcpy(&action1->dl_addr, &vserver_mac, OFP10_ETH_ALEN);
288 /* forward */
289 action2 = (struct of10_action_output *)(++action1);
290 action2->port = p_in->in_port;
291 /* send flow mod message */
292 ctl->send(ctl, dp, (struct ofp_header *)msg_mod);
293 free(msg_mod);
294 #ifdef DEBUG
295 printf("[LB] rewrite and install (2).\n");
296 #endif
299 static void
300 loadbalance(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in)
302 uint64_t least_load;
303 uint16_t llsrv_port;
304 struct server *srv, *llsrv = NULL;
306 if (!n_servers)
307 return;
309 /* find least-loaded server */
310 srv = SLIST_FIRST(&server_list);
311 least_load = srv->load;
312 SLIST_FOREACH(srv, &server_list, next) {
313 printf("[LB] server %s -> load is %lld bytes/sec (status is %d).\n", inet_ntoa(srv->ip_addr), srv->load, srv->ip_resolved);
314 if (!srv->ip_resolved)
315 continue; /* next server */
316 if ((llsrv == NULL) || (srv->load < least_load)) {
317 least_load = srv->load;
318 llsrv = srv;
321 if (llsrv == NULL)
322 return; /* TODO: drop packet */
323 done:
324 printf("[LB] least-loaded server is %s (with load = %lld bytes/sec).\n", inet_ntoa(llsrv->ip_addr), llsrv->load);
325 if (!lookup(dp, &llsrv->mac_addr, &llsrv_port))
326 llsrv_port = htons(OFPP10_FLOOD);
327 printf("[LB] least-loaded server's port is %hu.\n", ntohs(llsrv_port));
328 rewrite_install(ctl, dp, p_in, llsrv, llsrv_port);
331 static void
332 learn_servers(struct ether_arp *arp)
334 struct ether_addr *arp_sha;
335 struct in_addr *arp_spa;
336 struct server *srv;
338 arp_sha = (struct ether_addr *)arp->arp_sha;
339 arp_spa = (struct in_addr *)arp->arp_spa;
341 /* check it's one of the servers (L3 learning) */
342 SLIST_FOREACH(srv, &server_list, next) {
343 if (arp_spa->s_addr == srv->ip_addr.s_addr) {
344 memcpy(&srv->mac_addr, arp_sha, ETHER_ADDR_LEN);
345 srv->ip_resolved = 1;
346 printf("learned (L3) that server %s is at %s.\n", inet_ntoa(srv->ip_addr), ether_ntoa(&srv->mac_addr));
347 break;
352 static void
353 handle_arp(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *ph_in)
355 struct ether_header *eh_in, *eh_out;
356 struct ether_arp *arp_in, *arp_out;
357 struct of10_packet_out *msg_out;
358 struct of10_action_output *action;
359 struct in_addr *arp_in_tpa;
360 uint8_t *p;
362 eh_in = (struct ether_header *)(ph_in->data);
363 arp_in = (struct ether_arp *)(ph_in->data + ETHER_HDR_LEN);
365 /* check if it's valid or not */
366 if (!(arp_in->ea_hdr.ar_hrd == htons(ARPHRD_ETHER)) ||
367 !(arp_in->ea_hdr.ar_pro == htons(ETHERTYPE_IP)))
368 return;
369 /* check if it's an arp reply */
370 if (arp_in->ea_hdr.ar_op == htons(ARPOP_REPLY)) {
371 printf("[ARP] %u.%u.%u.%u is at %02x:%02x:%02x:%02x:%02x:%02x\n",
372 arp_in->arp_spa[0], arp_in->arp_spa[1], arp_in->arp_spa[2], arp_in->arp_spa[3],
373 arp_in->arp_sha[0], arp_in->arp_sha[1], arp_in->arp_sha[2], arp_in->arp_sha[3],
374 arp_in->arp_sha[4], arp_in->arp_sha[5]);
375 learn_servers(arp_in); /* L3 learn about the servers */
376 flood(ctl, dp, ph_in); /* XXX: lookup() */
377 return;
380 /* now, ensure it's an arp request */
381 if (arp_in->ea_hdr.ar_op != htons(ARPOP_REQUEST))
382 return;
383 /* okay */
384 printf("[ARP] who has %u.%u.%u.%u tell %u.%u.%u.%u\n",
385 arp_in->arp_tpa[0], arp_in->arp_tpa[1], arp_in->arp_tpa[2], arp_in->arp_tpa[3],
386 arp_in->arp_spa[0], arp_in->arp_spa[1], arp_in->arp_spa[2], arp_in->arp_spa[3]);
388 /* check if it's destined to vserver or not */
389 arp_in_tpa = (struct in_addr *)arp_in->arp_tpa;
390 if (arp_in_tpa->s_addr != vserver_ip.s_addr) {
391 printf("[ARP] arp is not for a load balanced host.\n");
392 flood(ctl, dp, ph_in);
393 return;
395 printf("[ARP] arp is for a load balanced host.\n");
397 /* fake arp response */
398 msg_out = (struct of10_packet_out *)
399 malloc(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output)
400 + ETHER_HDR_LEN + sizeof(struct ether_arp));
401 if (msg_out == NULL)
402 return;
404 eh_out = (struct ether_header *)(msg_out + sizeof(struct of10_packet_out) +
405 sizeof(struct of10_action_output));
406 arp_out = (struct ether_arp *)(msg_out + sizeof(struct of10_packet_out) +
407 sizeof(struct of10_action_output) + ETHER_HDR_LEN);
410 p = (uint8_t *)(msg_out) + sizeof(struct of10_packet_out) +
411 sizeof(struct of10_action_output);
412 eh_out = (struct ether_header *)p;
413 p += ETHER_HDR_LEN;
414 arp_out = (struct ether_arp *)p;
416 /* ether_arp */
417 arp_out->ea_hdr.ar_hrd = htons(ARPHRD_ETHER);
418 arp_out->ea_hdr.ar_pro = htons(ETHERTYPE_IP);
419 arp_out->ea_hdr.ar_op = htons(ARPOP_REPLY);
420 arp_out->ea_hdr.ar_hln = arp_in->ea_hdr.ar_hln;
421 arp_out->ea_hdr.ar_pln = arp_in->ea_hdr.ar_pln;
422 memcpy(arp_out->arp_tha, arp_in->arp_sha, sizeof(arp_out->arp_tha));
423 memcpy(arp_out->arp_tpa, arp_in->arp_spa, sizeof(arp_out->arp_tpa));
424 memcpy(arp_out->arp_sha, &vserver_mac, sizeof(arp_out->arp_sha));
425 memcpy(arp_out->arp_spa, &vserver_ip, sizeof(arp_out->arp_spa));
426 /* ether_header */
427 eh_out->ether_type = ntohs(ETHERTYPE_ARP);
428 memcpy(eh_out->ether_dhost, eh_in->ether_shost, sizeof(eh_out->ether_dhost));
429 memcpy(eh_out->ether_shost, &vserver_mac, sizeof(eh_out->ether_shost));
430 /* packet_out */
431 msg_out->hdr.version = OFP_VERSION_10;
432 msg_out->hdr.type = OFPT10_PACKET_OUT;
433 msg_out->hdr.length = htons(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output) + ETHER_HDR_LEN + sizeof(struct ether_arp));
434 msg_out->hdr.xid = 0;
435 msg_out->buffer_id = htonl(-1);
436 msg_out->in_port = ph_in->in_port;
437 msg_out->actions_len = htons(sizeof(struct of10_action_output));
438 action = (struct of10_action_output *)(msg_out->actions);
439 action->type = htons(OFPAT10_OUTPUT);
440 action->len = htons(sizeof(struct of10_action_output));
441 action->port = htons(OFPP10_IN_PORT);
442 action->max_len = 0;
443 ctl->send(ctl, dp, (struct ofp_header *)msg_out);
444 free(msg_out);
447 static void
448 handle_ip(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in)
450 struct ether_header *eh_in;
451 struct ip *ip_in;
452 uint16_t out_port;
454 eh_in = (struct ether_header *)(p_in->data);
455 ip_in = (struct ether_arp *)(p_in->data + ETHER_HDR_LEN);
457 char *x = strdup(inet_ntoa(ip_in->ip_src));
458 char *y = strdup(inet_ntoa(ip_in->ip_dst));
459 printf("[IP] from %s to %s\n", x, y);
460 free(x);
461 free(y);
463 /* check if it's destined to vserver or not */
464 if (ip_in->ip_dst.s_addr == vserver_ip.s_addr) {
465 printf("[IP] packet is for a load balanced host.\n");
466 loadbalance(ctl, dp, p_in);
467 return;
470 if (lookup(dp, (struct ether_addr *)eh_in->ether_dhost, &out_port)) {
471 printf("[IP] forward (port = %hu) and install flow rule.\n", ntohs(out_port));
472 forward_install(ctl, dp, p_in, out_port);
474 else{
475 printf("[IP] flood (%s not found).\n", ether_ntoa(eh_in->ether_dhost));
476 flood(ctl, dp, p_in);
480 static void
481 handle_stats(struct of_controller *ctl, struct of_dataplane *dp, struct ofp_header *msg)
483 int i, n;
484 struct of10_stats_reply *sts;
485 struct of10_port_stats *psts;
486 struct server *srv;
487 struct host_info **hosts;
488 time_t now;
491 sts = (struct of10_stats_reply *)msg;
492 #ifdef DEBUG
493 printf("[DEBUG] stats received type=0x%x, size=0x%x.\n", ntohs(sts->type), ntohs(sts->hdr.length));
494 #endif
495 /* process only port stats for now */
496 if (ntohs(sts->type) != OFPST10_PORT)
497 return;
498 psts = (struct of10_flow_stats *)sts->body;
499 if (psts == NULL)
500 return;
501 #ifdef DEBUG
502 printf("[DEBUG] dpid=0x%016llx,port=%u load is now %llu bytes.\n", DPID(dp), ntohs(psts->port_no), be64toh(psts->tx_bytes));
503 #endif
504 /* read static topology information */
505 ctx->topo(ctx, &hosts, &n);
506 for (i = 0 ; i < n ; i++) {
507 if ((DPID(dp) == hosts[i]->dpid) && (ntohs(psts->port_no) == hosts[i]->port)) {
508 SLIST_FOREACH(srv, &server_list, next) {
509 if (srv->ip_addr.s_addr == hosts[i]->ip_addr.s_addr) {
510 now = time(NULL);
511 srv->load = (uint64_t)(be64toh(psts->tx_bytes) - srv->load_bytes) / (now - srv->load_ts);
512 srv->load_bytes = (uint64_t)be64toh(psts->tx_bytes);
513 srv->load_ts = now;
514 printf("[INFO] server %s load is now %llu bytes/sec.\n", inet_ntoa(srv->ip_addr), srv->load);
515 break;
522 static void
523 handle_packet_in(struct of_controller *ctl, struct of_dataplane *dp, struct ofp_header *msg_in)
525 struct ether_header *eh;
526 struct of10_packet_in *p_in;
529 p_in = (struct of10_packet_in *)msg_in;
530 eh = (struct ether_header *)(p_in->data);
532 #ifdef DEBUG
533 printf("in_port: %hu, ", ntohs(p_in->in_port));
534 printf("%02x:%02x:%02x:%02x:%02x:%02x ->", eh->ether_shost[0], eh->ether_shost[1],
535 eh->ether_shost[2], eh->ether_shost[3], eh->ether_shost[4],
536 eh->ether_shost[5]);
537 printf(" %02x:%02x:%02x:%02x:%02x:%02x, ", eh->ether_dhost[0], eh->ether_dhost[1],
538 eh->ether_dhost[2], eh->ether_dhost[3], eh->ether_dhost[4],
539 eh->ether_dhost[5]);
540 printf("type: 0x%hx ", ntohs(eh->ether_type));
541 if (ntohs(eh->ether_type) == ETHERTYPE_ARP)
542 printf("[ARP]");
543 else if (ntohs(eh->ether_type) == ETHERTYPE_IP)
544 printf("[IP4]");
545 printf("\n");
546 printf("packet_in reason: 0x%x\n", (unsigned int)p_in->reason);
547 #endif
549 /* learn */
550 learn(dp, p_in);
552 /* check if it's an arp packet */
553 if (ntohs(eh->ether_type) == ETHERTYPE_ARP)
554 handle_arp(ctl, dp, p_in);
555 /* check if it's an ip packet */
556 else if (ntohs(eh->ether_type) == ETHERTYPE_IP)
557 handle_ip(ctl, dp, p_in);
560 static void
561 handle_ofp_messages(struct of_controller *ctl, struct of_dataplane *dp, struct ofp_header *msg_in)
563 printf("ofp_messages [dpid = 0x%016llx, type=0x%x].\n", DPID(dp), (unsigned int)msg_in->type);
564 if (msg_in->type == OFPT10_PACKET_IN)
565 handle_packet_in(ctl, dp, msg_in);
566 else if (msg_in->type == OFPT10_STATS_REPLY)
567 handle_stats(ctl, dp, msg_in);
570 static struct of10_packet_out *
571 arp_request(struct in_addr addr)
573 struct ether_header *eh;
574 struct ether_arp *arp;
575 struct of10_packet_out *msg;
576 struct of10_action_output *action;
577 uint8_t *p;
579 msg = (struct of10_packet_out *)
580 malloc(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output)
581 + ETHER_HDR_LEN + sizeof(struct ether_arp));
582 if (msg == NULL)
583 return NULL;
585 p = (uint8_t *)(msg) + sizeof(struct of10_packet_out) +
586 sizeof(struct of10_action_output);
587 eh = (struct ether_header *)p;
588 p += ETHER_HDR_LEN;
589 arp = (struct ether_arp *)p;
591 /* ether_arp */
592 arp->ea_hdr.ar_hrd = htons(ARPHRD_ETHER);
593 arp->ea_hdr.ar_pro = htons(ETHERTYPE_IP);
594 arp->ea_hdr.ar_op = htons(ARPOP_REQUEST);
595 arp->ea_hdr.ar_hln = ETHER_ADDR_LEN;
596 arp->ea_hdr.ar_pln = sizeof(in_addr_t);
597 memset(arp->arp_tha, 0xff, ETHER_ADDR_LEN);
598 memcpy(arp->arp_tpa, &addr.s_addr, sizeof(arp->arp_tpa));
599 memcpy(arp->arp_sha, &vserver_mac, ETHER_ADDR_LEN);
600 memcpy(arp->arp_spa, &vserver_ip.s_addr, sizeof(arp->arp_spa));
601 /* ether_header */
602 eh->ether_type = ntohs(ETHERTYPE_ARP);
603 memset(eh->ether_dhost, 0xff, ETHER_ADDR_LEN);
604 memcpy(eh->ether_shost, &vserver_mac, ETHER_ADDR_LEN);
605 /* packet_out */
606 msg->hdr.version = OFP_VERSION_10;
607 msg->hdr.type = OFPT10_PACKET_OUT;
608 msg->hdr.length = htons(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output) + ETHER_HDR_LEN + sizeof(struct ether_arp));
609 msg->hdr.xid = 0;
610 msg->buffer_id = htonl(-1);
611 msg->in_port = 0xffff; /* XXX: OFPP10_NONE */
612 msg->actions_len = htons(sizeof(struct of10_action_output));
613 action = (struct of10_action_output *)(msg->actions);
614 action->type = htons(OFPAT10_OUTPUT);
615 action->len = htons(sizeof(struct of10_action_output));
616 action->port = htons(OFPP10_FLOOD);
617 action->max_len = 0;
618 return msg;
621 static void
622 vserver_stats_request(struct of_controller *ctl, struct of_dataplane *dp, uint16_t port)
624 struct of10_stats_request *msg;
625 struct of10_port_stats_request *body;
627 msg = (struct of10_stats_request *)malloc(sizeof(struct of10_stats_request) + sizeof(struct of10_port_stats_request));
628 if (msg == NULL)
629 return;
630 /* header */
631 msg->hdr.version = OFP_VERSION_10;
632 msg->hdr.type = OFPT10_STATS_REQUEST;
633 msg->hdr.length = htons(sizeof(struct of10_stats_request) + sizeof(struct of10_port_stats_request));
634 msg->hdr.xid = 0;
635 /* type */
636 msg->type = htons(OFPST10_PORT);
637 /* flags */
638 msg->flags = 0;
639 /* body */
640 body = (struct of10_port_stats_request *)(msg->body);
641 /* port */
642 body->port_no = htons(port);
643 /* send message */
644 ctl->send(ctl, dp, (struct ofp_header *)msg);
645 free(msg);
648 static struct of_dataplane *
649 dpid2ptr(uint64_t dpid)
651 struct dp_ptr *dptr;
653 SLIST_FOREACH(dptr, &dataplane_list, next)
654 if ((dptr->dp) && (DPID(dptr->dp) == dpid))
655 return dptr->dp;
656 return NULL;
659 static void
660 poll(struct of_controller *ctl)
662 int i, n;
663 struct host_info **hosts;
664 struct of_dataplane *dp;
665 struct server *srv;
668 /* read static topology information */
669 ctx->topo(ctx, &hosts, &n);
671 /* poll local servers only */
672 SLIST_FOREACH(srv, &server_list, next) {
673 if (!srv->ip_resolved)
674 continue;
675 for (i = 0 ; i < n ; i++) {
676 if (srv->ip_addr.s_addr == hosts[i]->ip_addr.s_addr) {
677 dp = dpid2ptr(hosts[i]->dpid);
678 if (!dp)
679 continue;
680 #ifdef DEBUG
681 printf("[DEBUG] poll(): server %s at 0x%016llx.\n", inet_ntoa(srv->ip_addr), DPID(dp));
682 #endif
683 vserver_stats_request(ctl, dp, hosts[i]->port);
684 break; /* XXX: support multi-connections */
690 static void
691 handle_connection_up(struct of_controller *ctl, struct of_dataplane *dp)
693 struct ofp_header *msg;
694 struct server *srv;
695 struct dp_ptr *dptr;
696 uint64_t dpid;
698 dpid = DPID(dp);
699 printf("[INFO] connection_up [dpid = 0x%016llx].\n", dpid);
701 if (auto_arp) {
702 printf("[INFO] auto_arp is ON.\n");
703 SLIST_FOREACH(srv, &server_list, next) {
704 printf("[INFO] learning about server %s.\n", inet_ntoa(srv->ip_addr));
705 msg = arp_request(srv->ip_addr);
706 ctl->send(ctl, dp, (struct ofp_header *)msg);
707 free(msg);
710 dptr = (struct dp_ptr *)malloc(sizeof(struct dp_ptr));
711 dptr->dp = dp;
712 SLIST_INSERT_HEAD(&dataplane_list, dptr, next);
713 printf("[INFO] switch [dpid = 0x%016llx] is ready.\n", dpid);
717 init(struct actl_ctx *c)
719 ctx = c;
720 printf("l3_lb initialized.\n");
721 return 0;
725 main(int argc, char **argv)
727 int i, ch, poll_period = DEFAULT_POLL_PERIOD;
728 double tar_kpi;
729 const char *errstr;
730 struct ether_addr *ea;
731 struct server *srv;
734 #ifdef DEBUG
735 for (i = 0 ; i < argc ; i++) {
736 printf("arg[%d] = %s.\n", i, argv[i]);
738 #endif
740 SLIST_INIT(&dataplane_list);
741 SLIST_INIT(&server_list);
742 while ((ch = getopt_long(argc, argv, "i:m:s:a:p:k:t:h:", longopts, NULL)) != -1) {
743 switch (ch) {
744 case 'i':
745 if (!inet_aton(optarg, &vserver_ip))
746 errx(1, "invalid IP address");
747 break;
748 case 'm':
749 if ((ea = ether_aton(optarg)) == NULL)
750 errx(1, "invalid MAC address");
751 vserver_mac = *ea;
752 break;
753 case 's':
754 srv = (struct server *)malloc(sizeof(struct server));
755 if (!inet_aton(optarg, &srv->ip_addr))
756 errx(1, "invalid IP address");
757 srv->ip_resolved = 0;
758 srv->load = 0;
759 srv->load_bytes = 0;
760 srv->load_ts = time(NULL);
761 SLIST_INSERT_HEAD(&server_list, srv, next);
762 ++n_servers;
763 break;
764 case 'a':
765 if (!strcmp(optarg, "yes"))
766 auto_arp = 1;
767 else if (!strcmp(optarg, "no"))
768 auto_arp = 0;
769 else
770 errx(1, "incorrect option for argument '-a'");
771 break;
772 case 'p':
773 poll_period = strtonum(optarg, 1, 60, &errstr);
774 if (errstr != NULL)
775 errx(1, "invalid polling period %s: %s", errstr, optarg);
776 break;
777 case 'k':
778 tar_kpi = (double)strtonum(optarg, 1, 100, &errstr);
779 if (errstr)
780 errx(1, "invalid target KPI %s:%s", errstr, optarg);
781 ctx->adapt(ctx, tar_kpi);
782 break;
783 case 't':
784 ttl_idle = (uint16_t)strtonum(optarg, 0, 1024, &errstr);
785 if (errstr)
786 errx(1, "flow idle ttl out-of-range %s:%s", errstr, optarg);
787 break;
788 case 'h':
789 ttl_hard = (uint16_t)strtonum(optarg, 0, 1024, &errstr);
790 if (errstr)
791 errx(1, "flow hard ttl out-of-range %s:%s", errstr, optarg);
792 break;
794 /* TODO: default -> usage() */
796 argc -= optind;
797 argv += optind;
799 /* initialize the mactable */
800 if (!hashtab_init(&mactable, 12, NULL))
801 return 1;
803 printf("Virtual Server IP: %s\n", inet_ntoa(vserver_ip));
804 printf("Virtual Server MAC: %s\n", ether_ntoa(&vserver_mac));
805 printf("Number of Physical Servers = %d.\n", n_servers);
806 i = 0;
807 SLIST_FOREACH(srv, &server_list, next)
808 printf("Server%d IP: %s, MAC: %s\n", ++i, inet_ntoa(srv->ip_addr), ether_ntoa(&srv->mac_addr));
809 printf("Target KPI = %lf.\n", tar_kpi);
810 printf("TTL IDLE = %hu, HARD = %hu.\n", ttl_idle, ttl_hard);
812 printf("L3 LB initialized.\n");
814 /* polling timer */
815 ctx->cntl->timer(ctx->cntl, poll_period * 1000, poll);
817 return 0;
820 void
821 handler(struct of_event *ev)
823 #ifdef DEBUG
824 printf("l3_lb_handler.\n");
825 #endif
826 if (ev->type == OFEV_CONNECTION_UP)
827 handle_connection_up(ctx->cntl, ev->dp);
828 else if (ev->type == OFEV_PROTO_MESSAGE)
829 handle_ofp_messages(ctx->cntl, ev->dp, ev->ofp_hdr);
832 double
833 kpi()
835 double avg, stddev = 0;
836 int n = 0;
837 struct server *srv;
838 uint64_t load = 0;
840 #ifdef DEBUG
841 printf("l3_lb_kpi.\n");
842 #endif
843 if (!n_servers)
844 return 0;
846 /* mean */
847 srv = SLIST_FIRST(&server_list);
848 SLIST_FOREACH(srv, &server_list, next) {
849 if (!srv->ip_resolved)
850 continue;
851 load += srv->load;
852 n++;
854 if (!n)
855 return 0;
856 avg = load / n;
857 /* stddev */
858 srv = SLIST_FIRST(&server_list);
859 SLIST_FOREACH(srv, &server_list, next) {
860 if (!srv->ip_resolved)
861 continue;
862 stddev += (srv->load - avg) * (srv->load - avg);
864 stddev /= n;
865 stddev = sqrt(stddev);
866 #ifdef DEBUG
867 printf("l3_lb_kpi: KPI = %lf.\n", stddev);
868 #endif
869 return stddev;