send ack needed
[cor.git] / net / cor / cor.h
blobc84b3fe3c8d94d89469e526d93792c3458c1165b
1 /**
2 * Connection oriented routing
3 * Copyright (C) 2007-2021 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
21 #include <asm/atomic.h>
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/interrupt.h>
26 #include <linux/sched.h>
27 #include <linux/netdevice.h>
28 #include <linux/skbuff.h>
29 #include <linux/spinlock.h>
30 #include <linux/workqueue.h>
31 #include <linux/kref.h>
32 #include <linux/ktime.h>
33 #include <linux/rbtree.h>
35 #include <linux/socket.h>
36 #include <net/sock.h>
38 #include <linux/math64.h>
40 #include "settings.h"
43 #define ETH_P_COR 0x1022
44 #define AF_COR 99
45 #define PF_COR AF_COR
47 #define PROTO_COR_RAW 0
48 #define PROTO_COR_RDEAMON 1
50 #define SOCKADDRTYPE_PORT 1
51 #define SOCKADDRTYPE_ADDRPORT 2
52 struct cor_sockaddr {
53 __u16 sin_family;
55 __be32 port;
56 char addr[64];
59 #define COR_PASS_ON_CLOSE 1
61 #define COR_PUBLISH_SERVICE 2
63 #define COR_TOS 3
64 #define COR_TOS_DEFAULT 0
65 #define COR_TOS_LOW_LATENCY 1
66 #define COR_TOS_HIGH_LATENCY 2
68 #define MAX_CONN_CMD_LEN 64
70 #define PACKET_TYPE_NONE 0
71 #define PACKET_TYPE_ANNOUNCE 1
72 #define PACKET_TYPE_CMSG_NOACK 2
73 #define PACKET_TYPE_CMSG_ACKSLOW 3
74 #define PACKET_TYPE_CMSG_ACKFAST 4
75 #define PACKET_TYPE_CONNDATA 5
76 #define PACKET_TYPE_CONNDATA_LOWBUFDELAYED 6
77 #define PACKET_TYPE_CONNDATA_FLUSH 7
78 #define PACKET_TYPE_CONNDATA_LOWBUFDELAYED_FLUSH 8
81 /**
82 * Announce data format:
83 * version [2]
84 * is 0, may be increased if the protocol changes
85 * min_version [2]
86 * is 0, must be increased if a future version of the protocol is incompatible
87 * to the current version
88 * [data]
90 * Data format of the announce packet "data" field:
91 *{command [2] commandlength [2] commanddata [commandlength]}[...]
94 /* Commands */
96 /* ANNCMD_VERSION: version[2] minversion[2] */
97 #define ANNCMD_VERSION 1
99 /* ANNCMD_ADDR: addrlen[2] addr[addrlen] */
100 #define ANNCMD_ADDR 2
104 * Kernel packet data - these commands are sent by the neighbor
105 * The end nodes may cause these commands to be sent, but they see them beyond
106 * the first hop.
108 #define KP_MISC 0
109 #define KP_ACK_CONN 1
110 #define KP_CONN_DATA 2
112 /* KP_PADDING[1] */
113 #define KP_MISC_PADDING 0
116 * KP_INIT_SESSION[1] sessionid[4]
118 * finishes neighbor discovery and starts a session
120 * Before this is received all other commands are ignored. The sessionid is used
121 * to prevent usage of old neighbor discovery data (e.g. addresses)
123 #define KP_MISC_INIT_SESSION 1
125 #define KP_MISC_INIT_SESSION_CMDLEN 5
128 * KP_PING[1] cookie[4]
129 * KP_PONG[1] cookie[4] respdelay_full[4] respdelay_netonly[4]
131 * This is needed to find out whether the other node is reachable. After a new
132 * neighbor is seen, ping requests are sent and the neighbor is only reachable
133 * after a few pongs are received. These requests are also used to find out
134 * whether a neighber is gone.
136 * respdelay:
137 * The receiver of a ping may delay the sending of the pong e.g. to create
138 * bigger packets. The respdelay is the time in microseconds the packet was
139 * delayed.
141 #define KP_MISC_PING 2
143 #define KP_MISC_PING_CMDLEN 5
145 #define KP_MISC_PONG 3
147 /* KP_ACK[1] seqno[6] */
148 #define KP_MISC_ACK 4
151 * NOTE on connection ids:
152 * connection ids we receive with most significant bit 0 have been generated by
153 * us
154 * connection ids we receive with most significant bit 1 have been generated by
155 * the other side
157 * ATTENTION: the priority seqno are reversed:
158 * priority seqnos we send are used when we send updates
159 * priority seqnos we received are used when we receive updates
163 * incoming connection
164 * seqno1... used to ack data sent from the side which initiated the connection
165 * seqno2... used to ack data sent to the side which initiated the connection
166 * KP_CONNECT[1] conn_id[4] seqno1[6] seqno2[6] window[1] priority_seqno[1]
167 * priority[1]
169 #define KP_MISC_CONNECT 5
172 * incoming connection successful,
173 * KP_CONNECT_SUCCESS[1] conn_id[4] window[1] is_highlatency[1]
175 #define KP_MISC_CONNECT_SUCCESS 6
178 * KP_RESET_CONN[1] conn_id[4]
179 * We send this, if there is an established connection we want to close.
181 #define KP_MISC_RESET_CONN 7
184 * KP_SET_MAX_CMSG_DELAY[1] cpacket_ack_delay[4] data_ack_lowlatency_delay[4]
185 * data_ack_highlatency_delay[4] cmsg_delay[4]
186 * Sent after connecting and at any change
187 * delay in specifies in microsecs
189 #define KP_MISC_SET_MAX_CMSG_DELAY 8
191 #define KP_MISC_SET_MAX_CMSG_DELAY_CMDLEN 17
195 * KP_ACK_CONN[1] conn_id[4] delay_remaining[1] seqno[6] window[1]
196 * bufsize_changerate[1] seqno_ooo[6]
197 * length[1-4] priority_seqno[1] priority[1]
199 * conn_id is the conn_id we use if we sent something through this conn and
200 * *not* the conn_id that the neighbor used to send us the data
202 * delay_remaining = time the ack_conn could have remained in the queue
203 * 255 means the ack_conn has been sent immediately
204 * 0 means it has been delayed by as much the delay set by SET_MAX_CMSG_DELAY
206 * seqno = the seqno which is expected in the next non-out-of-order packet
208 * window = amount of data which can be sent without receiving the next ack
209 * packets with lower seqno do not overwrite the last window size
210 * The window may also be reduced. However, this only indicates a wish.
211 * Packets must be accepted if they exceed the new window, but not the old
212 * one.
214 * decode:
215 * 0 = 0
216 * 1...255 = 64*2^((value-1)/7) end result is rounded down to an integer
218 * bufsize_changerate = if the next router(s) is increasing or decreasing its
219 * buffer size
220 * 0 = for every byte we can send, the end host will receive 2 bytes
221 * 64 = for every byte we can send, the end host will receive 1 byte
222 * 128 = for every 2 byte we can send, the end host will receive 1 byte
223 * ...
225 * seqno_ooo, length = This packet was received out of order. Maybe a previous
226 * packet has been lost. Out of order data should not be retransmitted.
227 * Multiple ooo packets may be merged into a single ack. Ooo packets may be
228 * partially accepted, so that the length does not cover the full packet and/
229 * or the seqno starts in the middle of a packet
231 #define KP_ACK_CONN_FLAGS_SEQNO 1
232 #define KP_ACK_CONN_FLAGS_WINDOW 2
233 #define KP_ACK_CONN_FLAGS_OOO 12 /* 4+8 */
234 #define KP_ACK_CONN_FLAGS_PRIORITY 16
236 static inline __u8 cor_ooolen_to_flags(__u32 len)
238 if (len == 0)
239 return 0;
240 if (len < 256)
241 return 4;
242 if (len < 65536)
243 return 8;
244 return 12;
247 static inline int cor_ooolen(__u8 flags)
249 int len = ((flags & KP_ACK_CONN_FLAGS_OOO) >> 2);
250 if (unlikely(len == 3))
251 return 4;
252 return len;
255 static inline int cor_ack_conn_len(__u8 flags)
257 int len = 0;
258 if ((flags & KP_ACK_CONN_FLAGS_SEQNO) != 0) {
259 len += 6;
260 if ((flags & KP_ACK_CONN_FLAGS_WINDOW) != 0)
261 len += 2;
264 if (cor_ooolen(flags) != 0) {
265 len += 6;
266 len += cor_ooolen(flags);
269 if ((flags & KP_ACK_CONN_FLAGS_SEQNO) != 0 ||
270 cor_ooolen(flags) != 0)
271 len++;
273 if (flags & KP_ACK_CONN_FLAGS_PRIORITY)
274 len += 2;
276 return len;
279 /* KP_CONN_DATA[1] conn_id[4] seqno[6] length[1-2] data[length] */
280 #define KP_CONN_DATA_FLAGS_LEN 3
281 #define KP_CONN_DATA_FLAGS_FLUSH 4
282 #define KP_CONN_DATA_FLAGS_LOWBUFDELAYED 8
284 static inline __u32 get_kp_conn_data_length(__u32 datalen)
286 if (datalen <= 767) {
287 return 12 + datalen;
288 } else {
289 return 13 + datalen;
293 static inline __u8 get_kp_code(__u8 maj, __u8 min)
295 BUILD_BUG_ON(maj > 3);
296 BUG_ON(min > 63);
297 return (maj << 6) + min;
300 static inline __u8 kp_maj(__u8 code)
302 return code >> 6;
305 static inline __u8 kp_min(__u8 code)
307 return code & 63;
312 * Connection data which in interpreted when connection has no target yet
313 * These commands are sent by the end node.
315 * Format:
316 * cmd[2] length[1-4] parameter[length]
317 * unrecogniced commands are ignored
318 * parameters which are longer than expected are ignored as well
321 #define CD_CONTINUE_ON_ERROR_FLAG 32768
323 /* outgoing connection: CD_CONNECT_NB[2] length[1-4]
324 * addrlen[1-4] addr[addrlen] */
325 #define CD_CONNECT_NB 1
327 /* connection to local open part: CD_CONNECT_PORT[2] length[1-4] port[4] */
328 #define CD_CONNECT_PORT 2
331 * CD_LIST_NEIGH sends CDR_BINDATA if the command was successful. The response
332 * format is:
334 * numneighs[1-4]
335 * numfields[1-4] (field[2] fieldlen[1-4])[numfields]
336 * rows[responserows]:
337 * fieldlen[1-4], only if fieldlen in the header was "0"
338 * fielddata[fieldlen]
340 * Future versions may append data to field definition. Therefore clients must
341 * silently discard data at the end they do not expect.
344 /* list connected neighbors: CD_LIST_NEIGH[2] length[1-4] */
345 #define CD_LIST_NEIGH 3
348 * addr[fieldlen]
350 #define LIST_NEIGH_FIELD_ADDR 1
353 * latency_in_microsecs[1] (64_11 encoding)
354 * Only raw network latency in measured. Delays caused by the priority queues
355 * are *not* included.
357 #define LIST_NEIGH_FIELD_LATENCY 2
359 /* list services: CD_LIST_SERVICES[2] length[1-4] */
360 #define CD_LIST_SERVICES 4
364 * Connection data response
365 * Format is the same as with connection data
369 * CDR_EXECOK[1]
371 #define CDR_EXECOK 1
374 * CDR_EXECFAILED[1] reasoncode[2]
376 #define CDR_EXECFAILED 2
377 #define CDR_EXECFAILED_INVALID_COMMAND 1
378 #define CDR_EXECFAILED_TEMPORARILY_OUT_OF_RESSOURCES 2
379 #define CDR_EXECFAILED_NB_DOESNTEXIST 3
380 #define CDR_EXECFAILED_PORTCLOSED 4
383 * must be sent after CDR_EXEC{OK|FAILED}
384 * CDR_EXEOK_BINDATA[1] bindatalen[1-4] bindata[bindatalen] */
385 #define CDR_BINDATA 3
388 * routing daemon sock
389 * format:
390 * cmdcode[4] length[4] cmddata[length]
394 #define CRD_KTU_SUPPORTEDVERSIONS 1
396 * CRD_KTU_SUPPORTEDVERSIONS[4] length[4] min[4] max[4]
399 #define CRD_KTU_CONNECT 2
401 * CRD_KTU_KTOU_CONNECT[4] length[4] cookie[8] targetlen[4] target[targetlen]
404 #define CRD_UTK_VERSION 1
406 * CRD_UTK_VERSION[4] length[4] version[4]
409 #define CRD_UTK_UP 2
410 #define CRD_UTK_UP_FLAGS_INTERFACES 1
412 * CRD_UTK_UP[4] length[4] flags[8] addrlen[4] addr[addrlen]
413 * if CRD_UTK_UP_FLAGS_INTERFACES:
414 * num_interfaces[4] (length[4] interface[length])[num_interfaces]
418 #define CRD_UTK_CONNECTERROR 3
420 * CRD_UTK_CONNECTERROR[4] length[4] cookie[8] error[4]
423 #define CRD_UTK_CONNECTERROR_ACCES 1
424 #define CRD_UTK_CONNECTERROR_NETUNREACH 2
425 #define CRD_UTK_CONNECTERROR_TIMEDOUT 3
426 #define CRD_UTK_CONNECTERROR_REFUSED 4
428 #define CONN_MNGD_HEADERLEN 2
429 #define CONN_MNGD_CHECKSUMLEN 4
431 #define CONN_MNGD_HASDATA (1 << 15)
432 #define CONN_MNGD_EOF (1 << 0)
433 #define CONN_MNGD_RCVEND (1 << 1)
434 #define CONN_MNGD_DATALEN 4095
436 #define CONN_MNGD_MAX_SEGMENT_SIZE (CONN_MNGD_DATALEN + 1)
438 #define PRIORITY_MAX 15384774
441 struct cor_interface_config {
442 char *name;
443 __u32 name_len;
446 #define CONGSTATUS_NONE 0
447 #define CONGSTATUS_CONNDATA 1
448 #define CONGSTATUS_ANNOUNCE 2
449 #define CONGSTATUS_RETRANS 3
450 #define CONGSTATUS_KPACKETS 4
452 struct cor_qos_queue {
453 spinlock_t qlock;
455 struct kref ref;
457 struct list_head queue_list;
459 struct net_device *dev; /* may not change while queue is in list */
461 struct task_struct *qos_resume_thread;
462 wait_queue_head_t qos_resume_wq;
463 atomic_t qos_resume_scheduled;
464 unsigned long jiffies_lastprogress;
466 struct list_head kpackets_waiting;
467 struct list_head conn_retrans_waiting;
468 struct list_head announce_waiting;
469 struct list_head neighbors_waiting;
470 struct list_head neighbors_waiting_nextpass;
472 unsigned long jiffies_nb_pass_start;
473 unsigned long jiffies_nb_lastduration;
476 unsigned long jiffies_lastdrop;
478 __u32 numconns;
479 __u64 priority_sum;
481 atomic_t cong_status;
486 * switch to and from RB_INQUEUE_NBCONGWIN is only done with nbcongwin.lock
487 * *and* qlock held
489 #define RB_INQUEUE_FALSE 0
490 #define RB_INQUEUE_TRUE 1
491 #define RB_INQUEUE_NBCONGWIN 2 /* only for nb->rb */
493 struct cor_resume_block{
494 struct list_head lh;
495 __u8 in_queue;
498 #define ANNOUNCE_TYPE_BROADCAST 1
499 #define ANNOUNCE_TYPE_UNICAST 2
501 struct cor_announce_data{
502 struct kref ref;
504 struct list_head lh;
505 __u8 type;
506 __u16 sndcnt;
507 struct net_device *dev;
508 char mac[MAX_ADDR_LEN];
509 struct delayed_work announce_work;
510 struct cor_resume_block rb;
513 struct cor_neighbor_discdata{
514 struct list_head lh;
515 unsigned long jiffies_created;
517 __be32 sessionid;
519 struct net_device *dev;
520 char mac[MAX_ADDR_LEN];
522 __u8 nb_allocated;
524 __u8 rcvd_version;
525 __u8 rcvd_addr;
527 __u16 version;
528 __u16 minversion;
530 char *addr;
531 __u16 addrlen;
534 struct cor_ping_cookie{
535 ktime_t time_created;
536 ktime_t time_sent;
537 unsigned long jiffies_sent;
539 __u32 cookie;
540 __u8 pongs; /* count of pongs for pings sent after this one */
543 #define NEIGHBOR_STATE_INITIAL 0
544 #define NEIGHBOR_STATE_ACTIVE 1
545 #define NEIGHBOR_STATE_STALLED 2
546 #define NEIGHBOR_STATE_KILLED 3
548 #define NBCONGWIN_SHIFT 16
549 #define NBCONGWIN_MUL (1 << NBCONGWIN_SHIFT)
551 struct cor_neighbor{
552 struct list_head nb_list;
553 __u8 in_nb_list;
555 struct kref ref;
557 struct net_device *dev;
558 char mac[MAX_ADDR_LEN];
559 struct cor_qos_queue *queue;
561 __be32 sessionid;
563 atomic_t sessionid_rcv_needed;
564 atomic_t sessionid_snd_needed;
566 char *addr;
567 __u16 addrlen;
569 atomic64_t cmsg_timer_timeout;
570 struct timer_list cmsg_timer;
571 spinlock_t cmsg_lock;
572 struct list_head cmsg_queue_pong;
573 struct list_head cmsg_queue_ack;
574 struct list_head cmsg_queue_ackconn_urgent;
575 struct list_head cmsg_queue_ackconn_lowlat;
576 struct list_head cmsg_queue_ackconn_highlat;
577 struct list_head cmsg_queue_conndata_lowlat;
578 struct list_head cmsg_queue_conndata_highlat;
579 struct list_head cmsg_queue_other;
580 __u8 add_retrans_needed;
581 __u64 kpacket_seqno; /* not locked, only accessed by single tasklet */
583 struct rb_root pending_conn_resets_rb;
585 __u32 cmsg_pongslength;
586 __u32 cmsg_otherlength;
588 __u32 cmsg_pongscnt; /* size of queue only, protected by cmsg_lock */
589 atomic_t cmsg_pongs_retrans_cnt; /* number of retransmits only */
590 atomic_t cmsg_othercnt; /* size of queue + retransmits */
592 atomic_t cmsg_bulk_readds;
594 atomic_t cmsg_delay_conndata;
596 /* not locked, only accessed by single tasklet */
597 __u8 max_cmsg_delay_sent;
599 /* procected by cor_qos_queue->qlock */
600 struct cor_resume_block rb_kp;
601 struct cor_resume_block rb_cr;
602 struct cor_resume_block rb;
603 unsigned long cmsg_send_start_j;
604 ktime_t cmsg_send_start_kt;
606 struct{
607 spinlock_t lock;
608 struct list_head lh;
609 struct list_head lh_nextpass;
610 __u32 cnt;
611 __u64 priority_sum;
612 }conns_waiting;
614 struct{
615 spinlock_t lock;
616 atomic64_t data_intransit;
617 atomic64_t cwin;
618 __u64 cwin_shrinkto;
619 }nbcongwin;
621 spinlock_t state_lock;
622 unsigned long last_ping_time;
624 struct cor_ping_cookie cookies[PING_COOKIES_PER_NEIGH];
625 __u32 ping_intransit;
626 __u32 lastcookie;
627 __u32 cookie_unsent;
628 __u64 latency_variance_retrans_us; /* microsecs */
629 atomic_t latency_retrans_us; /* microsecs */
630 atomic_t latency_stddev_retrans_us; /* microsecs */
631 atomic_t latency_advertised_us; /* microsecs */
632 atomic_t max_remote_ack_delay_us; /* microsecs */
633 atomic_t max_remote_ackconn_lowlat_delay_us; /* microsecs */
634 atomic_t max_remote_ackconn_highlat_delay_us; /* microsecs */
635 atomic_t max_remote_pong_delay_us; /* microsecs */
637 union {
638 unsigned long initial_state_since;/* initial state */
640 * last_roundtrip:
641 * time of the last sent packet which has been acked or
642 * otherwise responded to (e.g. pong)
644 unsigned long last_roundtrip;/* active/stalled state */
645 }state_time;
646 ktime_t last_roundtrip_end;
647 __u16 ping_success;
648 __u8 state;
650 __u8 str_timer_pending;
651 struct delayed_work stalltimeout_timer;
653 spinlock_t connid_lock;
654 struct rb_root connid_rb;
656 spinlock_t connid_reuse_lock;
657 struct rb_root connid_reuse_rb;
658 struct list_head connid_reuse_list;
659 __u16 connid_reuse_pingcnt;
661 atomic64_t priority_sum;
664 * connecions which receive data from/send data to this node
665 * used when terminating all connections of a neighbor and terminating
666 * inactive connections
668 spinlock_t conn_list_lock;
669 struct list_head rcv_conn_list;
671 spinlock_t stalledconn_lock;
672 struct work_struct stalledconn_work;
673 __u8 stalledconn_work_scheduled;
674 struct list_head stalledconn_list;
677 * the timer has to be inited when adding the neighbor
678 * timer_setup(...);
679 * add_timer(struct timer_list * timer);
681 spinlock_t retrans_lock;
682 struct timer_list retrans_timer;
683 struct list_head retrans_list;
684 struct rb_root kp_retransmits_rb;
686 spinlock_t retrans_conn_lock;
687 struct timer_list retrans_conn_timer;
688 struct list_head retrans_conn_lowlatency_list;
689 struct list_head retrans_conn_highlatency_list;
691 struct work_struct reset_neigh_work;
694 #define DATABUF_BUF 0
695 #define DATABUF_SKB 1
697 struct cor_data_buf_item{
698 struct list_head buf_list;
700 char *buf;
701 __u16 datalen;
702 __u16 buflen;
704 __u8 type;
707 struct cor_connid_reuse_item{
708 struct rb_node rbn;
710 struct list_head lh;
712 struct kref ref;
713 __u32 conn_id;
714 __u16 pingcnt;
717 #define SNDSPEED_INIT 0
718 #define SNDSPEED_ACTIVE 1
719 struct cor_snd_speed{
720 __u8 state;
721 __u8 flushed;
722 unsigned long jiffies_last_refresh;
723 __u32 bytes_sent;
725 /* bytes per second */
726 __u32 speed;
727 __u32 speed_limited;
730 struct cor_sock;
733 * There are 2 conn objects per bi-directional connection. They refer to each
734 * other with in the reversedir field.
738 * Naming:
740 * cn: conn we do not know what is inside
741 * src_in, trgt_unconn, trgt_out, ...: A conn with the specified source or
742 * targettype. In the unlocked case the types are only a guess, because they
743 * might have changed since the last access. After locking the
744 * source/destination parameters have to be checked whether they still are what
745 * we expect. This includes source/targettype, neighbor, conn_id
747 * Exception: they may not change after they are set to source/target sock
748 * until the socket is released.
751 * Naming suffixes:
752 * no suffix: unlocked
754 * _l: this direction is locked
756 * _ll: both directions are locked
758 * _lx: this direction is locked, the other direction may be locked
760 * _o: unlocked, but source or target is known for sure, because an outside
761 * lock is taken; For variables on the heap this means that an outside lock must
762 * be taken before accessing the struct which points to the conn can be
763 * accessed.
767 * Locking:
768 * The following fields are immutable after the conn has been allocated:
769 * is_client, reversedir
771 * Most fields are protected by rcv_lock. Fields which which control
772 * source and destination of the data flow require both directions to
773 * to be locked and external references to be cleared before the change can
774 * happen. This includes fields like sourcetype, targettype, connid,
775 * list_heads, ???. In this case the side with is_client == 1 needs to be locked
776 * first.
778 * Some other fields are locked outside (e.g. at struct neighbor).
780 #define SOURCE_UNCONNECTED 0
781 #define SOURCE_IN 1
782 #define SOURCE_SOCK 2
784 #define TARGET_UNCONNECTED 0
785 #define TARGET_OUT 1
786 #define TARGET_SOCK 2
787 #define TARGET_DISCARD 3
789 #define BUFSIZE_NOACTION 0
790 #define BUFSIZE_DECR 1
791 #define BUFSIZE_DECR_FAST 2
792 #define BUFSIZE_INCR 3
793 #define BUFSIZE_INCR_FAST 4
795 #define JIFFIES_LAST_IDLE_SHIFT 8
796 #define BUFSIZE_SHIFT 5
798 #define SOCKTYPE_RAW 0
799 #define SOCKTYPE_MANAGED 1
801 #define RCV_BUF_STATE_OK 0
802 #define RCV_BUF_STATE_INCOMPLETE 1
803 #define RCV_BUF_STATE_RESET 2
805 #define SND_BUF_STATE_INCOMPLETE 0
806 #define SND_BUF_STATE_FILLED 1
809 struct cor_conn{
810 __u8 sourcetype:4,
811 targettype:4;
813 __u8 is_client;
816 * isreset values:
817 * 0... connection active
818 * 1... connection is about to be reset, target does not need to be
819 * notified
820 * 2... connection is reset
821 * 3... connection is reset + no pointers to
822 * "struct cor_conn *reversedir" remaining except from this conn
824 __u8 isreset;
826 __u8 flush:1,
827 is_highlatency:1;
829 struct kref ref;
831 spinlock_t rcv_lock;
833 union{
834 struct{
835 struct cor_neighbor *nb;
836 /* list of all connections from this neighbor */
837 struct list_head nb_list;
839 struct list_head reorder_queue;
840 __u32 reorder_memused;
842 struct rb_node rbn;
843 struct cor_connid_reuse_item *cir;
844 __u32 conn_id;
845 __u64 next_seqno;
847 /* number of ack sent, not data seqno */
848 __u32 ack_seqno;
850 __u16 small_ooo_packets;
852 __u32 priority;
853 __u8 priority_seqno;
854 __u8 inorder_ack_needed;
856 __u8 established;
858 __u64 window_seqnolimit;
859 __u64 window_seqnolimit_remote;
861 /* protected by nb->cmsg_lock */
862 struct list_head acks_pending;
864 unsigned long jiffies_last_act;
865 }in;
867 struct{
868 struct cor_sock *cs;
870 struct list_head cl_list;
872 __u32 priority;
874 struct cor_snd_speed snd_speed;
876 /* protected by conn->lock */
877 __u8 in_flushtoconn_oom_list;
878 /* protected by flushtoconn_oom_lock */
879 struct list_head flushtoconn_oom_lh;
881 __u8 buf_ctrl_filled;
882 __u8 buf_data_filled;
884 __u8 send_eof_needed:1,
885 send_rcvend_needed:1;
887 __u8 snd_delayed_lowbuf;
888 __u8 flush;
890 __u16 sent;
892 struct{
893 char snd_hdr[CONN_MNGD_HEADERLEN];
894 char snd_data[0];
895 char snd_chksum[CONN_MNGD_CHECKSUMLEN];
896 }buf_ctrl;
898 struct{
899 char snd_hdr[CONN_MNGD_HEADERLEN];
900 char snd_chksum[CONN_MNGD_CHECKSUMLEN];
901 char *snd_data;
902 __u16 snd_data_len;
903 }buf_data;
904 }sock;
905 }source;
907 union{
908 struct{
909 __u32 paramlen;
910 __u32 cmdread;
911 __u16 cmd;
912 __u8 paramlen_read;
913 char *cmdparams;
914 char paramlen_buf[4];
915 }unconnected;
917 struct{
918 struct cor_neighbor *nb;
919 /* protected by nb->retrans_conn_lock, sorted by seqno
921 struct list_head retrans_list;
923 /* protected by nb->stalledconn_lock */
924 struct list_head nbstalled_lh;
926 __u32 conn_id;
927 __u64 seqno_nextsend;
928 __u64 seqno_acked;
929 __u64 seqno_windowlimit;
931 struct cor_resume_block rb;
933 /* for bursting */
934 unsigned long jiffies_idle_since;
936 __u32 rb_priority;
937 __u16 maxsend_extra;
939 __u16 burst_bytes;
941 __u8 remote_bufsize_changerate;
943 __u8 priority_last;
944 __u8 priority_seqno;
945 __u8 priority_send_allowed;
947 __u8 windowlimit_reached;
949 __u8 established;
951 /* protected by nb->retrans_conn_lock */
952 __u16 retrans_lowwindow;
953 }out;
955 struct{
956 __u8 waiting_for_userspace;
957 unsigned long waiting_for_userspace_since;
959 struct cor_sock *cs;
961 __u8 socktype;
963 __u8 rcv_buf_state;
964 char rcv_hdr[CONN_MNGD_HEADERLEN];
965 char rcv_chksum[CONN_MNGD_CHECKSUMLEN];
966 char *rcv_buf;
967 __u16 rcvd;
968 __u16 rcv_hdr_flags;
969 __u16 rcv_data_len;
970 }sock;
971 }target;
973 struct{
974 struct list_head items;
975 struct cor_data_buf_item *nextread;
976 __u64 first_offset;
978 __u32 datasize;
979 __u32 overhead;
980 __u32 read_remaining;
982 __u16 next_read_offset;
983 }data_buf;
985 __u32 bufspace_accounted;
987 struct{
988 __u32 bufsize; /* 32 ==> 1 byte, see BUFSIZE_SHIFT */
989 __u32 ignore_rcv_lowbuf;
990 union{
991 struct{
992 __u32 bytesleft;
993 }noact;
995 struct{
996 __u32 size_start;
997 }decr;
999 struct{
1000 __u32 size_start;
1001 __u32 size_end;
1002 }incr;
1003 }act;
1005 __u32 state:3,
1006 bytes_rcvd:24;
1007 }bufsize;
1009 struct cor_conn *reversedir;
1012 #define CONN_RETRANS_INITIAL 0
1013 #define CONN_RETRANS_SCHEDULED 1
1014 #define CONN_RETRANS_LOWWINDOW 2
1015 #define CONN_RETRANS_SENDING 3
1016 #define CONN_RETRANS_ACKED 4
1017 struct cor_conn_retrans {
1018 /* timeout_list and conn_list share a single ref */
1019 struct kref ref;
1020 /* only in timeout_list if state == CONN_RETRANS_SCHEDULED */
1021 struct list_head timeout_list;
1022 struct list_head conn_list;
1023 struct cor_conn *trgt_out_o;
1024 __u64 seqno;
1025 __u32 length;
1027 __u8 snd_delayed_lowbuf;
1028 __u8 state;
1029 unsigned long timeout;
1032 #define RCVOOO_BUF 0
1033 #define RCVOOO_SKB 1
1034 struct cor_rcvooo{
1035 struct list_head lh;
1036 __u64 seqno;
1037 __u8 type;
1038 __u8 flush;
1041 struct cor_rcvooo_buf{
1042 struct cor_rcvooo r;
1043 char *data;
1044 __u32 len;
1047 /* inside skb->cb */
1048 struct cor_skb_procstate{
1049 union{
1050 struct{
1051 struct work_struct work;
1052 }announce1;
1054 struct{
1055 __u32 offset;
1056 }announce2;
1058 struct{
1059 __u32 skb_memused;
1060 struct cor_rcvooo r;
1061 }rcv_ooo;
1063 struct{
1064 struct cor_data_buf_item dbi;
1065 }rcv;
1066 }funcstate;
1069 #define CS_TYPE_UNCONNECTED 0
1070 #define CS_TYPE_LISTENER 1
1071 #define CS_TYPE_CONN_RAW 2
1072 #define CS_TYPE_CONN_MANAGED 3
1074 #define CS_CONNECTSTATE_UNCONNECTED 0
1075 #define CS_CONNECTSTATE_CONNECTING 1
1076 #define CS_CONNECTSTATE_CONNECTED 2
1077 #define CS_CONNECTSTATE_ERROR 3
1079 struct cor_sock {
1080 struct sock sk; /* must be first */
1082 struct mutex lock;
1083 struct kref ref;
1085 /* type may not change once it is set to != CS_TYPE_UNCONNECTED */
1086 __u8 type;
1087 __u8 isreleased;
1089 __u8 publish_service;
1091 __u8 is_highlatency;
1093 union {
1094 struct {
1095 /* listener is protected by cor_bindnodes */
1096 struct list_head lh;
1097 __be32 port;
1098 __u8 publish_service;
1099 __u32 queue_maxlen;
1100 __u32 queue_len;
1101 struct list_head conn_queue;
1102 }listener;
1104 struct {
1105 struct cor_conn *src_sock;
1106 struct cor_conn *trgt_sock;
1108 struct cor_data_buf_item *rcvitem;
1109 __u16 rcvoffset;
1111 __u8 snd_delayed_lowbuf;
1113 struct cor_sock *pass_on_close;
1114 }conn_raw;
1116 struct {
1117 struct cor_sockaddr remoteaddr;
1119 struct list_head rd_msgs;
1120 struct list_head crd_lh;
1121 __u8 in_crd_list;
1123 __u8 connect_state;
1125 __u8 is_reset;
1127 __u8 flush;
1129 __u8 shutdown_rd:1,
1130 shutdown_wr:1,
1131 sent_eof:1,
1132 sent_rcvend:1,
1133 rcvd_eof:1,
1134 rcvd_rcvend:1;
1136 __u8 snd_delayed_lowbuf;
1138 __be64 cookie;
1139 struct rb_node rbn;
1141 struct cor_conn *src_sock;
1142 struct cor_conn *trgt_sock;
1144 /* sending */
1145 char *snd_buf;
1146 __u16 snd_segment_size;
1147 __u16 snd_data_len;
1148 __u8 send_in_progress;
1150 /* receiving */
1151 char rcv_hdr[CONN_MNGD_HEADERLEN];
1152 char rcv_chksum[CONN_MNGD_CHECKSUMLEN];
1153 char *rcv_buf;
1154 __u16 rcv_hdr_flags;
1155 __u16 rcv_data_len;
1156 __u16 rcvbuf_consumed;
1157 __u8 rcv_buf_state;
1158 }conn_managed;
1159 }data;
1161 struct work_struct readfromconn_work;
1162 atomic_t readfromconn_work_scheduled;
1164 atomic_t ready_to_read;
1165 atomic_t ready_to_write;
1166 atomic_t ready_to_accept;
1169 #define ACK_NEEDED_NO 0
1170 #define ACK_NEEDED_SLOW 1
1171 #define ACK_NEEDED_FAST 2
1173 /* config.c */
1174 extern spinlock_t cor_local_addr_lock;
1175 extern char *cor_local_addr;
1176 extern __u32 cor_local_addrlen;
1177 extern __be32 cor_local_addr_sessionid;
1179 extern int cor_is_device_configurated(struct net_device *dev);
1181 extern void cor_set_interface_config(struct cor_interface_config *new_config,
1182 __u32 new_num_interfaces, int new_all_interfaces);
1184 extern void cor_config_down(void);
1186 extern int cor_config_up(char *addr2, __u32 addrlen2);
1188 extern int cor_is_clientmode(void);
1190 /* dev.c */
1191 extern void cor_qos_set_lastdrop(struct cor_qos_queue *q);
1193 #ifdef DEBUG_QOS_SLOWSEND
1194 extern int _cor_dev_queue_xmit(struct sk_buff *skb, int caller);
1195 #else
1196 static inline int _cor_dev_queue_xmit(struct sk_buff *skb, int caller)
1198 return dev_queue_xmit(skb);
1200 #endif
1202 static inline int cor_dev_queue_xmit(struct sk_buff *skb,
1203 struct cor_qos_queue *q, int caller)
1205 int rc = _cor_dev_queue_xmit(skb, caller);
1206 if (unlikely(rc != NET_XMIT_SUCCESS))
1207 cor_qos_set_lastdrop(q);
1208 return rc;
1211 extern void cor_free_qos(struct kref *ref);
1213 #ifdef COR_NBCONGWIN
1214 extern void cor_nbcongwin_data_retransmitted(struct cor_neighbor *nb,
1215 __u64 bytes_sent);
1217 extern void cor_nbcongwin_data_acked(struct cor_neighbor *nb,
1218 __u64 bytes_acked);
1220 extern void cor_nbcongwin_data_sent(struct cor_neighbor *nb, __u32 bytes_sent);
1222 extern int cor_nbcongwin_send_allowed(struct cor_neighbor *nb);
1224 #else
1226 static inline void cor_nbcongwin_data_retransmitted(struct cor_neighbor *nb,
1227 __u64 bytes_sent)
1231 static inline void cor_nbcongwin_data_acked(struct cor_neighbor *nb,
1232 __u64 bytes_acked)
1236 static inline void cor_nbcongwin_data_sent(struct cor_neighbor *nb,
1237 __u32 bytes_sent)
1241 static inline int cor_nbcongwin_send_allowed(struct cor_neighbor *nb)
1243 return 1;
1245 #endif
1247 extern unsigned long cor_get_conn_idletime(struct cor_conn *trgt_out_l);
1249 extern struct cor_qos_queue *cor_get_queue(struct net_device *dev);
1251 extern int cor_destroy_queue(struct net_device *dev);
1253 extern int cor_create_queue(struct net_device *dev);
1255 #define QOS_RESUME_DONE 0
1256 #define QOS_RESUME_CONG 1
1257 #define QOS_RESUME_NEXTNEIGHBOR 2 /* cor_resume_neighbors() internal */
1258 #define QOS_RESUME_EXIT 3
1260 #define QOS_CALLER_KPACKET 0
1261 #define QOS_CALLER_CONN_RETRANS 1
1262 #define QOS_CALLER_ANNOUNCE 2
1263 #define QOS_CALLER_NEIGHBOR 3
1265 static inline void cor_schedule_qos_resume(struct cor_qos_queue *q)
1267 if (atomic_cmpxchg(&(q->qos_resume_scheduled), 0, 1) == 0) {
1268 barrier();
1269 wake_up(&(q->qos_resume_wq));
1273 extern void cor_qos_enqueue(struct cor_qos_queue *q,
1274 struct cor_resume_block *rb, unsigned long cmsg_send_start_j,
1275 ktime_t cmsg_send_start_kt, int caller);
1277 extern void cor_qos_remove_conn(struct cor_conn *trgt_out_l);
1279 extern int cor_may_send_announce(struct net_device *dev);
1281 extern struct sk_buff *cor_create_packet_cmsg(struct cor_neighbor *nb, int size,
1282 gfp_t alloc_flags, __u64 seqno);
1284 extern struct sk_buff *cor_create_packet(struct cor_neighbor *nb, int size,
1285 gfp_t alloc_flags);
1287 extern struct sk_buff *cor_create_packet_conndata(struct cor_neighbor *nb,
1288 int size, gfp_t alloc_flags, __u32 conn_id, __u64 seqno,
1289 __u8 snd_delayed_lowbuf, __u8 flush);
1291 extern void cor_qos_enqueue_conn(struct cor_conn *trgt_out_lx);
1293 extern void cor_dev_down(void);
1295 extern int cor_dev_up(void);
1297 extern void __exit cor_dev_exit1(void);
1299 extern int __init cor_dev_init(void);
1301 /* util.c */
1302 extern __u8 __attribute__((const)) cor_enc_log_256_16(__u32 value);
1304 extern __u32 __attribute__((const)) cor_dec_log_256_16(__u8 value);
1306 extern __u8 __attribute__((const)) cor_enc_log_64_11(__u32 value);
1308 extern __u32 __attribute__((const)) cor_dec_log_64_11(__u8 value);
1310 extern __u8 __attribute__((const)) cor_enc_log_64_7(__u64 value);
1312 extern __u64 __attribute__((const)) cor_dec_log_64_7(__u8 value);
1314 extern void cor_swap_list_items(struct list_head *lh1, struct list_head *lh2);
1316 extern void cor_kreffree_bug(struct kref *ref);
1318 extern int __init cor_util_init(void);
1320 /* neigh.c */
1321 extern atomic_t cor_num_neighs;
1323 extern void cor_neighbor_free(struct kref *ref);
1325 extern int cor_is_from_nb(struct sk_buff *skb, struct cor_neighbor *nb);
1327 extern struct cor_neighbor *_cor_get_neigh_by_mac(struct net_device *dev,
1328 char *source_hw);
1330 extern struct cor_neighbor *cor_get_neigh_by_mac(struct sk_buff *skb);
1332 extern struct cor_neighbor *cor_find_neigh(char *addr, __u16 addrlen);
1334 extern __u32 cor_generate_neigh_list(char *buf, __u32 buflen);
1336 extern void cor_reset_neighbors(struct net_device *dev);
1338 extern int cor_get_neigh_state(struct cor_neighbor *nb);
1340 extern void cor_ping_resp(struct cor_neighbor *nb, __u32 cookie,
1341 __u32 respdelay);
1343 extern __u32 cor_add_ping_req(struct cor_neighbor *nb,
1344 unsigned long *last_ping_time);
1346 extern void cor_ping_sent(struct cor_neighbor *nb, __u32 cookie);
1348 extern void cor_unadd_ping_req(struct cor_neighbor *nb, __u32 cookie,
1349 unsigned long last_ping_time, int congested);
1351 #define TIMETOSENDPING_NO 0
1352 #define TIMETOSENDPING_YES 1
1353 #define TIMETOSENDPING_FORCE 2
1354 extern int cor_time_to_send_ping(struct cor_neighbor *nb);
1356 extern unsigned long cor_get_next_ping_time(struct cor_neighbor *nb);
1358 extern void cor_add_neighbor(struct cor_neighbor_discdata *nb_dd);
1360 extern struct cor_conn *cor_get_conn(struct cor_neighbor *nb, __u32 conn_id);
1362 extern int cor_insert_connid(struct cor_neighbor *nb,
1363 struct cor_conn *src_in_ll);
1365 extern void cor_insert_connid_reuse(struct cor_neighbor *nb,
1366 struct cor_connid_reuse_item *ins);
1368 extern int cor_connid_alloc(struct cor_neighbor *nb,
1369 struct cor_conn *src_in_ll);
1371 extern int __init cor_neighbor_init(void);
1373 extern void __exit cor_neighbor_exit2(void);
1375 /* neigh_ann_rcv.c */
1376 extern int cor_rcv_announce(struct sk_buff *skb);
1378 extern int __init cor_neigh_ann_rcv_init(void);
1380 extern void __exit cor_neigh_ann_rcv_exit2(void);
1382 /* neigh_ann_snd.c */
1383 extern int _cor_send_announce(struct cor_announce_data *ann, int fromqos,
1384 int *sent);
1386 extern void cor_announce_data_free(struct kref *ref);
1388 extern void cor_announce_send_start(struct net_device *dev, char *mac,
1389 int type);
1391 extern void cor_announce_send_stop(struct net_device *dev, char *mac, int type);
1393 /* neigh_rcv.c */
1394 extern void cor_kernel_packet(struct cor_neighbor *nb, struct sk_buff *skb,
1395 int ackneeded);
1397 /* neigh_snd.c */
1398 struct cor_control_msg_out;
1400 #define ACM_PRIORITY_LOW 1 /* oom recovery easy */
1401 #define ACM_PRIORITY_MED 2 /* oom may cause timeouts */
1402 #define ACM_PRIORITY_HIGH 3 /* cm acks - needed for freeing old cms */
1404 extern struct cor_control_msg_out *cor_alloc_control_msg(
1405 struct cor_neighbor *nb, int priority);
1407 extern void cor_free_control_msg(struct cor_control_msg_out *cm);
1409 extern void cor_retransmit_timerfunc(struct timer_list *retrans_timer);
1411 extern void cor_kern_ack_rcvd(struct cor_neighbor *nb, __u64 seqno);
1413 extern int cor_send_messages(struct cor_neighbor *nb,
1414 unsigned long cmsg_send_start_j, ktime_t cmsg_send_start_kt,
1415 int *sent);
1417 extern void cor_controlmsg_timerfunc(struct timer_list *cmsg_timer);
1419 extern void cor_schedule_controlmsg_timer(struct cor_neighbor *nb_cmsglocked);
1421 extern void cor_send_pong(struct cor_neighbor *nb, __u32 cookie,
1422 ktime_t ping_rcvtime);
1424 extern int cor_send_reset_conn(struct cor_neighbor *nb, __u32 conn_id,
1425 int lowprio);
1427 extern void cor_send_ack(struct cor_neighbor *nb, __u64 seqno);
1429 extern void cor_send_ack_conn_ifneeded(struct cor_conn *src_in_l, __u64 seqno_ooo,
1430 __u32 ooo_length);
1432 extern void cor_send_priority(struct cor_conn *trgt_out_ll, int force,
1433 __u8 priority);
1435 extern void cor_free_ack_conns(struct cor_conn *src_in_lx);
1437 extern void cor_send_connect_success(struct cor_control_msg_out *cm,
1438 __u32 conn_id, struct cor_conn *src_in);
1440 extern void cor_send_connect_nb(struct cor_control_msg_out *cm, __u32 conn_id,
1441 __u64 seqno1, __u64 seqno2, struct cor_conn *src_in_ll);
1443 extern void cor_send_conndata(struct cor_control_msg_out *cm, __u32 conn_id,
1444 __u64 seqno, char *data_orig, char *data, __u32 datalen,
1445 __u8 snd_delayed_lowbuf, __u8 flush, __u8 highlatency,
1446 struct cor_conn_retrans *cr);
1448 extern int __init cor_kgen_init(void);
1450 extern void __exit cor_kgen_exit2(void);
1452 /* conn.c */
1453 extern struct kmem_cache *cor_connid_reuse_slab;
1455 extern atomic_t cor_num_conns;
1457 extern spinlock_t cor_bindnodes;
1459 extern int cor_newconn_checkpriority(struct cor_neighbor *nb, __u8 priority);
1461 extern __u32 cor_refresh_conn_priority(struct cor_conn *cn, int locked);
1463 extern void cor_set_conn_in_priority(struct cor_neighbor *nb, __u32 conn_id,
1464 struct cor_conn *src_in, __u8 priority_seqno, __u8 priority);
1466 extern void _cor_set_last_act(struct cor_conn *src_in_l);
1468 extern void cor_free_conn(struct kref *ref);
1470 extern int cor_conn_init_out(struct cor_conn *trgt_unconn_ll,
1471 struct cor_neighbor *nb, __u32 rcvd_connid,
1472 int use_rcvd_connid);
1474 extern void cor_conn_init_sock_source(struct cor_conn *cn);
1476 extern void cor_conn_init_sock_target(struct cor_conn *cn);
1478 extern __u32 cor_list_services(char *buf, __u32 buflen);
1480 extern void cor_set_publish_service(struct cor_sock *cs, __u8 value);
1482 extern void cor_close_port(struct cor_sock *cs);
1484 extern int cor_open_port(struct cor_sock *cs_l, __be32 port);
1486 extern int cor_connect_port(struct cor_conn *trgt_unconn_ll, __be32 port);
1488 extern int cor_connect_neigh(struct cor_conn *trgt_unconn_ll, char *addr,
1489 __u16 addrlen);
1491 extern struct cor_conn* cor_alloc_conn(gfp_t allocflags, __u8 is_highlatency);
1493 extern void cor_reset_conn_locked(struct cor_conn *cn_ll);
1495 extern void cor_reset_conn(struct cor_conn *cn);
1497 /* conn_src_in.c */
1498 extern void cor_reset_ooo_queue(struct cor_conn *src_in_lx);
1500 extern void cor_drain_ooo_queue(struct cor_conn *src_in_l);
1502 extern void cor_conn_rcv(struct cor_neighbor *nb,
1503 struct sk_buff *skb, char *data, __u32 len,
1504 __u32 conn_id, __u64 seqno,
1505 int rcv_delayed_lowbuf, __u8 flush);
1507 extern int __init cor_rcv_init(void);
1509 extern void __exit cor_rcv_exit2(void);
1511 /* conn_src_sock.c */
1512 extern void cor_update_src_sock_sndspeed(struct cor_conn *src_sock_l,
1513 __u32 bytes_sent);
1515 extern int cor_sock_sndbufavailable(struct cor_conn *src_sock_lx,
1516 int for_wakeup);
1518 #define RC_FTC_OK 0
1519 #define RC_FTC_OOM 1
1520 #define RC_FTC_ERR 2
1521 extern int _cor_mngdsocket_flushtoconn(struct cor_conn *src_sock_l);
1523 extern int cor_mngdsocket_flushtoconn_ctrl(struct cor_sock *cs_m_l,
1524 __u8 send_eof, __u8 send_rcvend);
1526 extern int cor_mngdsocket_flushtoconn(struct cor_sock *cs_m_l);
1528 extern int __init cor_conn_src_sock_init1(void);
1530 extern void __exit cor_conn_src_sock_exit1(void);
1532 /* conn_trgt_unconn.c */
1533 extern int cor_encode_len(char *buf, int buflen, __u32 len);
1535 extern void cor_proc_cpacket(struct cor_conn *trgt_unconn);
1537 /* conn_trgt_out.c */
1538 extern void cor_reschedule_conn_retrans_timer(
1539 struct cor_neighbor *nb_retranslocked);
1541 extern void cor_cancel_all_conn_retrans(struct cor_conn *trgt_out_l);
1543 extern int cor_send_retrans(struct cor_neighbor *nb, int *sent);
1545 extern void cor_retransmit_conn_timerfunc(struct timer_list *retrans_timer_conn);
1547 extern void cor_conn_ack_ooo_rcvd(struct cor_neighbor *nb, __u32 conn_id,
1548 struct cor_conn *trgt_out, __u64 seqno_ooo, __u32 length,
1549 __u64 *bytes_acked);
1551 extern void cor_conn_ack_rcvd(struct cor_neighbor *nb, __u32 conn_id,
1552 struct cor_conn *trgt_out, __u64 seqno, int setwindow,
1553 __u8 window, __u8 bufsize_changerate, __u64 *bytes_acked);
1555 extern void cor_schedule_retransmit_conn(struct cor_conn_retrans *cr,
1556 int connlocked, int nbretrans_locked);
1558 extern int cor_srcin_buflimit_reached(struct cor_conn *src_in_lx);
1560 /* RC_FLUSH_CONN_OUT_SENT | RC_FLUSH_CONN_OUT_{^SENT} */
1561 #define RC_FLUSH_CONN_OUT_OK 1
1562 #define RC_FLUSH_CONN_OUT_SENT_CONG 2 /* cor_flush_out internal only */
1563 #define RC_FLUSH_CONN_OUT_NBNOTACTIVE 3
1564 #define RC_FLUSH_CONN_OUT_CONG 4
1565 #define RC_FLUSH_CONN_OUT_MAXSENT 5
1566 #define RC_FLUSH_CONN_OUT_OOM 6
1568 extern int _cor_flush_out(struct cor_conn *trgt_out_lx, __u32 maxsend,
1569 __u32 *sent, int from_qos, int maxsend_forcedelay);
1571 static inline int cor_flush_out(struct cor_conn *trgt_out_lx, __u32 *sent)
1573 int rc = _cor_flush_out(trgt_out_lx, 1 << 30, sent, 0, 0);
1575 if (rc == RC_FLUSH_CONN_OUT_CONG || rc == RC_FLUSH_CONN_OUT_MAXSENT ||
1576 rc == RC_FLUSH_CONN_OUT_OOM)
1577 cor_qos_enqueue_conn(trgt_out_lx);
1579 return rc;
1582 extern void cor_resume_nbstalled_conns(struct work_struct *work);
1584 extern int __init cor_snd_init(void);
1586 extern void __exit cor_snd_exit2(void);
1588 /* conn_trgt_sock.c */
1589 extern void cor_flush_sock_managed(struct cor_conn *trgt_sock_lx,
1590 int from_recvmsg, __u8 *do_wake_sender);
1592 extern void cor_flush_sock(struct cor_conn *trgt_sock_lx);
1594 /* conn_databuf.c */
1595 extern struct kmem_cache *cor_data_buf_item_slab;
1597 extern void cor_databuf_init(struct cor_conn *cn_init);
1599 extern void cor_bufsize_init(struct cor_conn *cn_l, __u32 bufsize);
1601 extern int cor_account_bufspace(struct cor_conn *cn_lx);
1603 extern int cor_cpacket_write_allowed(struct cor_conn *src_unconn_lx);
1605 extern void cor_update_windowlimit(struct cor_conn *src_in_lx);
1607 extern __u8 _cor_bufsize_update_get_changerate(struct cor_conn *cn_lx);
1609 static inline int cor_bufsize_initial_phase(struct cor_conn *cn_lx)
1611 return unlikely(cn_lx->bufsize.bytes_rcvd != (1 << 24) - 1 &&
1612 cn_lx->bufsize.bytes_rcvd < cn_lx->bufsize.bufsize);
1615 static inline int cor_ackconn_urgent(struct cor_conn *cn_lx)
1617 return cor_bufsize_initial_phase(cn_lx) ||
1618 cn_lx->bufsize.state == BUFSIZE_INCR_FAST;
1621 extern void cor_bufsize_read_to_sock(struct cor_conn *trgt_sock_lx);
1623 extern void cor_databuf_ackdiscard(struct cor_conn *cn_lx);
1625 extern void cor_reset_seqno(struct cor_conn *cn_l, __u64 initseqno);
1627 extern void cor_databuf_pull(struct cor_conn *cn_lx, char *dst, __u32 len);
1629 static inline __u32 cor_databuf_trypull(struct cor_conn *cn_l, char *dst,
1630 __u32 len)
1632 if (len > cn_l->data_buf.read_remaining)
1633 len = cn_l->data_buf.read_remaining;
1634 cor_databuf_pull(cn_l, dst, len);
1635 return len;
1638 extern void cor_databuf_unpull_dpi(struct cor_conn *trgt_sock, struct cor_sock *cs,
1639 struct cor_data_buf_item *item, __u16 next_read_offset);
1641 extern void cor_databuf_pull_dbi(struct cor_sock *cs_rl,
1642 struct cor_conn *trgt_sock_l);
1644 extern void cor_databuf_unpull(struct cor_conn *trgt_out_l, __u32 bytes);
1646 extern void cor_databuf_pullold(struct cor_conn *trgt_out_l, __u64 startpos,
1647 char *dst, int len);
1649 extern void cor_databuf_ack(struct cor_conn *trgt_out_l, __u64 pos);
1651 extern void cor_databuf_ackread(struct cor_conn *cn_lx);
1653 extern __u32 cor_receive_buf(struct cor_conn *cn_lx, char *buf, __u32 datalen,
1654 int rcv_delayed_lowbuf, __u8 flush);
1656 extern __u32 cor_receive_skb(struct cor_conn *src_in_l, struct sk_buff *skb,
1657 int rcv_delayed_lowbuf, __u8 flush);
1659 extern void cor_wake_sender(struct cor_conn *cn);
1661 extern int __init cor_forward_init(void);
1663 extern void __exit cor_forward_exit2(void);
1665 /* sock_rdaemon.c */
1666 extern int cor_is_device_configurated(struct net_device *dev);
1668 extern int cor_create_rdaemon_sock(struct net *net, struct socket *sock,
1669 int protocol, int kern);
1671 extern int cor_rdreq_connect(struct cor_sock *cs);
1673 extern void cor_usersock_release(struct cor_sock *cs);
1675 extern int __init cor_rd_init1(void);
1677 extern int __init cor_rd_init2(void);
1679 extern void __exit cor_rd_exit1(void);
1681 extern void __exit cor_rd_exit2(void);
1683 /* sock_raw.c */
1684 extern int cor_create_raw_sock(struct net *net, struct socket *sock,
1685 int protocol, int kern);
1687 /* sock_managed.c */
1688 extern struct cor_sock *cor_get_sock_by_cookie(__be64 cookie);
1690 extern void __cor_set_sock_connecterror(struct cor_sock *cs_m_l, int errorno);
1692 extern void _cor_set_sock_connecterror(struct cor_sock *cs, int errorno);
1694 extern void cor_mngdsocket_chksum(char *hdr, __u32 hdrlen,
1695 char *data, __u32 datalen,
1696 char *chksum, __u32 chksum_len);
1698 static inline void cor_set_sock_connecterror(__be64 cookie, int errorno)
1700 struct cor_sock *cs = cor_get_sock_by_cookie(cookie);
1701 _cor_set_sock_connecterror(cs, errorno);
1704 extern void cor_mngdsocket_readfromconn_fromatomic(struct cor_sock *cs);
1706 extern void cor_mngdsocket_readfromconn_wq(struct work_struct *work);
1708 extern int cor_create_managed_sock(struct net *net, struct socket *sock,
1709 int protocol, int kern);
1711 extern int __init cor_sock_managed_init1(void);
1713 /* sock.c */
1714 extern void cor_free_sock(struct kref *ref);
1716 extern int cor_socket_setsockopt_tos(struct socket *sock,
1717 char __user *optval, unsigned int optlen);
1719 extern int cor_socket_socketpair(struct socket *sock1, struct socket *sock2);
1721 extern int cor_socket_getname(struct socket *sock, struct sockaddr *addr,
1722 int peer);
1724 extern int cor_socket_mmap(struct file *file, struct socket *sock,
1725 struct vm_area_struct *vma);
1727 extern int _cor_createsock(struct net *net, struct socket *sock, int protocol,
1728 int kern);
1730 extern int __init cor_sock_init1(void);
1732 extern int __init cor_sock_init2(void);
1734 extern void __exit cor_sock_exit1(void);
1737 static inline struct cor_skb_procstate *cor_skb_pstate(struct sk_buff *skb)
1739 BUILD_BUG_ON(sizeof(struct cor_skb_procstate) > sizeof(skb->cb));
1740 return (struct cor_skb_procstate *) &(skb->cb[0]);
1743 static inline struct sk_buff *cor_skb_from_pstate(struct cor_skb_procstate *ps)
1745 return (struct sk_buff *) (((char *)ps) - offsetof(struct sk_buff,cb));
1748 static inline int cor_qos_fastsend_allowed_conn_retrans(struct cor_neighbor *nb)
1750 return atomic_read(&(nb->queue->cong_status)) < CONGSTATUS_RETRANS;
1753 static inline int cor_qos_fastsend_allowed_announce(struct net_device *dev)
1755 int rc;
1756 struct cor_qos_queue *q = cor_get_queue(dev);
1758 if (q == 0)
1759 return 0;
1761 rc = atomic_read(&(q->cong_status)) < CONGSTATUS_ANNOUNCE;
1763 kref_put(&(q->ref), cor_free_qos);
1765 return rc;
1768 static inline int cor_qos_fastsend_allowed_conn(struct cor_conn *trgt_out_lx)
1770 struct cor_qos_queue *q = trgt_out_lx->target.out.nb->queue;
1771 return atomic_read(&(q->cong_status)) < CONGSTATUS_CONNDATA;
1774 static inline __u32 cor_mss(struct cor_neighbor *nb, __u32 l3overhead)
1776 return nb->dev->mtu - LL_RESERVED_SPACE(nb->dev) - l3overhead;
1779 static inline __u32 cor_mss_cmsg(struct cor_neighbor *nb)
1781 return cor_mss(nb, 7);
1784 static inline __u32 cor_mss_conndata(struct cor_neighbor *nb, int highlatency)
1786 __u32 mss_tmp = cor_mss(nb, 11);
1787 __u32 i;
1789 if (mss_tmp < 256 || highlatency)
1790 return mss_tmp;
1792 for (i=256;i<4096;i*=2) {
1793 if (i*2 > mss_tmp)
1794 return i;
1797 return mss_tmp - mss_tmp%4096;
1800 static inline __u32 cor_send_conndata_as_skb(struct cor_neighbor *nb,
1801 __u32 size)
1803 return size >= cor_mss_conndata(nb, 0)/2;
1806 static inline long cor_calc_timeout(__u32 latency_us, __u32 latency_stddev_us,
1807 __u32 max_remote_ack_delay_us)
1809 unsigned long addto;
1810 if (unlikely(unlikely(latency_us > 1000000000) ||
1811 unlikely(latency_stddev_us > 500000000) ||
1812 unlikely(max_remote_ack_delay_us > 1000000000))) {
1813 addto = msecs_to_jiffies(latency_us/1000 + latency_us/4000 +
1814 latency_stddev_us/333 +
1815 max_remote_ack_delay_us/1000);
1816 } else {
1817 addto = usecs_to_jiffies(latency_us + latency_us/4 +
1818 latency_stddev_us*3 + max_remote_ack_delay_us);
1822 * 2 is added because
1823 * 1) _to_jiffies rounds down, but should round up, so add 1 to
1824 * compensate
1825 * 2) even if latency is 0, we never want to schedule the retransmit
1826 * to run right now, so add 1 more
1828 return jiffies + 2 + addto;
1831 static inline void cor_put_be64(char *dst, __be64 value)
1833 char *p_value = (char *) &value;
1835 dst[0] = p_value[0];
1836 dst[1] = p_value[1];
1837 dst[2] = p_value[2];
1838 dst[3] = p_value[3];
1839 dst[4] = p_value[4];
1840 dst[5] = p_value[5];
1841 dst[6] = p_value[6];
1842 dst[7] = p_value[7];
1845 static inline void cor_put_u64(char *dst, __u64 value)
1847 cor_put_be64(dst, cpu_to_be64(value));
1850 static inline void cor_put_u48(char *dst, __u64 value)
1852 char *p_value = (char *) &value;
1854 value = cpu_to_be64(value);
1856 dst[0] = p_value[2];
1857 dst[1] = p_value[3];
1858 dst[2] = p_value[4];
1859 dst[3] = p_value[5];
1860 dst[4] = p_value[6];
1861 dst[5] = p_value[7];
1864 static inline void cor_put_be32(char *dst, __be32 value)
1866 char *p_value = (char *) &value;
1867 dst[0] = p_value[0];
1868 dst[1] = p_value[1];
1869 dst[2] = p_value[2];
1870 dst[3] = p_value[3];
1873 static inline void cor_put_u32(char *dst, __u32 value)
1875 cor_put_be32(dst, cpu_to_be32(value));
1878 static inline void cor_put_be16(char *dst, __be16 value)
1880 char *p_value = (char *) &value;
1881 dst[0] = p_value[0];
1882 dst[1] = p_value[1];
1885 static inline void cor_put_u16(char *dst, __u16 value)
1887 cor_put_be16(dst, cpu_to_be16(value));
1890 static inline char *cor_pull_skb(struct sk_buff *skb, unsigned int len)
1892 char *ptr = skb_pull(skb, len);
1894 if (unlikely(ptr == 0))
1895 return 0;
1897 return ptr - len;
1900 static inline __be64 cor_parse_be64(char *buf)
1902 __u64 ret = 0;
1904 BUG_ON(buf == 0);
1906 ((char *)&ret)[0] = buf[0];
1907 ((char *)&ret)[1] = buf[1];
1908 ((char *)&ret)[2] = buf[2];
1909 ((char *)&ret)[3] = buf[3];
1910 ((char *)&ret)[4] = buf[4];
1911 ((char *)&ret)[5] = buf[5];
1912 ((char *)&ret)[6] = buf[6];
1913 ((char *)&ret)[7] = buf[7];
1915 return ret;
1918 static inline __u64 cor_parse_u64(char *buf)
1920 return be64_to_cpu(cor_parse_be64(buf));
1923 static inline __u64 cor_parse_u48(char *ptr)
1925 __u64 ret = 0;
1927 ((char *)&ret)[0] = 0;
1928 ((char *)&ret)[1] = 0;
1929 ((char *)&ret)[2] = ptr[0];
1930 ((char *)&ret)[3] = ptr[1];
1931 ((char *)&ret)[4] = ptr[2];
1932 ((char *)&ret)[5] = ptr[3];
1933 ((char *)&ret)[6] = ptr[4];
1934 ((char *)&ret)[7] = ptr[5];
1936 return be64_to_cpu(ret);
1939 static inline __be32 cor_parse_be32(char *ptr)
1941 __u32 ret = 0;
1943 BUG_ON(ptr == 0);
1945 ((char *)&ret)[0] = ptr[0];
1946 ((char *)&ret)[1] = ptr[1];
1947 ((char *)&ret)[2] = ptr[2];
1948 ((char *)&ret)[3] = ptr[3];
1950 return ret;
1953 static inline __u32 cor_parse_u32(char *ptr)
1955 return be32_to_cpu(cor_parse_be32(ptr));
1958 static inline __be16 cor_parse_be16(char *ptr)
1960 __u16 ret = 0;
1962 BUG_ON(ptr == 0);
1964 ((char *)&ret)[0] = ptr[0];
1965 ((char *)&ret)[1] = ptr[1];
1967 return ret;
1970 static inline __u16 cor_parse_u16(char *ptr)
1972 return be16_to_cpu(cor_parse_be16(ptr));
1975 static inline __u8 cor_parse_u8(char *ptr)
1977 BUG_ON(ptr == 0);
1978 return (__u8) ptr[0];
1981 static inline __u64 cor_pull_u48(struct sk_buff *skb)
1983 return cor_parse_u48(cor_pull_skb(skb, 6));
1986 static inline __be32 cor_pull_be32(struct sk_buff *skb)
1988 return cor_parse_be32(cor_pull_skb(skb, 4));
1991 static inline __u32 cor_pull_u32(struct sk_buff *skb)
1993 return cor_parse_u32(cor_pull_skb(skb, 4));
1996 static inline __u16 cor_pull_u16(struct sk_buff *skb)
1998 return cor_parse_u16(cor_pull_skb(skb, 2));
2001 static inline __u8 cor_pull_u8(struct sk_buff *skb)
2003 char *ptr = cor_pull_skb(skb, 1);
2004 BUG_ON(ptr == 0);
2005 return *ptr;
2008 static inline int cor_is_conn_in(struct cor_conn *cn_l, struct cor_neighbor *nb,
2009 __u32 conn_id)
2011 if (unlikely(unlikely(cn_l->sourcetype != SOURCE_IN) ||
2012 unlikely(cn_l->source.in.nb != nb) ||
2013 unlikely(cn_l->source.in.conn_id != conn_id) ||
2014 unlikely(cn_l->isreset != 0)))
2015 return 0;
2016 return 1;
2019 static inline int cor_is_src_sock(struct cor_conn *cn_l, struct cor_sock *cs)
2021 if (unlikely(unlikely(cn_l->sourcetype != SOURCE_SOCK) ||
2022 unlikely(cn_l->source.sock.cs != cs)))
2023 return 0;
2024 return 1;
2027 static inline int cor_is_trgt_sock(struct cor_conn *cn_l, struct cor_sock *cs)
2029 if (unlikely(unlikely(cn_l->targettype != TARGET_SOCK) ||
2030 unlikely(cn_l->target.sock.cs != cs)))
2031 return 0;
2032 return 1;
2035 static inline void cor_set_last_act(struct cor_conn *src_in_l)
2037 unsigned long jiffies_tmp = jiffies;
2039 BUG_ON(src_in_l->sourcetype != SOURCE_IN);
2041 if (unlikely(time_after(jiffies_tmp,
2042 src_in_l->source.in.jiffies_last_act +
2043 HZ * CONN_ACTIVITY_UPDATEINTERVAL_SEC)))
2044 _cor_set_last_act(src_in_l);
2047 #define BUFLEN_MIN 128
2048 #define BUFLEN_MAX 4096
2049 #define PAGESIZE (1 << PAGE_SHIFT)
2051 static inline __u32 cor_buf_optlen(__u32 datalen)
2053 __u32 optlen = BUFLEN_MIN;
2054 while (optlen < datalen && optlen < PAGESIZE && optlen < BUFLEN_MAX)
2055 optlen = (optlen << 1);
2056 return optlen;
2060 inline static void cor_databuf_item_free(struct cor_data_buf_item *item)
2062 if (item->type == DATABUF_BUF) {
2063 kfree(item->buf);
2064 kmem_cache_free(cor_data_buf_item_slab, item);
2065 } else if (item->type == DATABUF_SKB) {
2066 struct sk_buff *skb = cor_skb_from_pstate(container_of(item,
2067 struct cor_skb_procstate, funcstate.rcv.dbi));
2068 kfree_skb(skb);
2069 } else {
2070 BUG();
2074 static inline __u64 cor_seqno_clean(__u64 seqno)
2076 return seqno & ((1LL << 48) - 1);
2079 static inline int cor_seqno_eq(__u64 seqno1, __u64 seqno2)
2081 seqno1 = seqno1 << 16;
2082 seqno2 = seqno2 << 16;
2083 return seqno1 == seqno2;
2086 static inline int cor_seqno_before(__u64 seqno1, __u64 seqno2)
2088 seqno1 = seqno1 << 16;
2089 seqno2 = seqno2 << 16;
2090 return (seqno1 - seqno2) >= (1LL << 63);
2093 static inline int cor_seqno_before_eq(__u64 seqno1, __u64 seqno2)
2095 return cor_seqno_eq(seqno1, seqno2) || cor_seqno_before(seqno1, seqno2);
2098 static inline int cor_seqno_after(__u64 seqno1, __u64 seqno2)
2100 return cor_seqno_before_eq(seqno1, seqno2) ? 0 : 1;
2103 static inline int cor_seqno_after_eq(__u64 seqno1, __u64 seqno2)
2105 return cor_seqno_before(seqno1, seqno2) ? 0 : 1;
2108 static inline int ktime_before_eq(ktime_t time1, ktime_t time2)
2110 return ktime_after(time1, time2) ? 0 : 1;
2113 static inline int ktime_after_eq(ktime_t time1, ktime_t time2)
2115 return ktime_before(time1, time2) ? 0 : 1;
2118 static inline __u64 cor_update_atomic_sum(atomic64_t *atomic_sum, __u32 oldvalue,
2119 __u32 newvalue)
2121 __u64 sum_old = atomic64_read(atomic_sum);
2122 __u64 sum;
2124 while (1) {
2125 __u64 cmpxchg_ret;
2127 sum = sum_old;
2129 BUG_ON(sum < oldvalue);
2130 sum -= oldvalue;
2132 BUG_ON(sum + newvalue < sum);
2133 sum += newvalue;
2135 cmpxchg_ret = atomic64_cmpxchg(atomic_sum, sum_old, sum);
2137 if (likely(cmpxchg_ret == sum_old))
2138 break;
2140 sum_old = cmpxchg_ret;
2143 return sum;
2146 static inline void cor_sk_write_space(struct cor_sock *cs)
2148 atomic_set(&(cs->ready_to_write), 1);
2149 barrier();
2150 cs->sk.sk_write_space(&(cs->sk));
2153 static inline void cor_sk_data_ready(struct cor_sock *cs)
2155 atomic_set(&(cs->ready_to_read), 1);
2156 barrier();
2157 cs->sk.sk_data_ready(&(cs->sk));
2160 /* the other direction may be locked only if called from cor_proc_cpacket */
2161 static inline void cor_flush_buf(struct cor_conn *cn_lx)
2163 if (unlikely(cn_lx->targettype == TARGET_UNCONNECTED)) {
2164 cor_proc_cpacket(cn_lx);
2165 } else if (cn_lx->targettype == TARGET_SOCK) {
2166 cor_flush_sock(cn_lx);
2167 } else if (cn_lx->targettype == TARGET_OUT) {
2168 __u32 bytessent = 0;
2169 cor_flush_out(cn_lx, &bytessent);
2170 } else if (unlikely(cn_lx->targettype == TARGET_DISCARD)) {
2171 cor_databuf_ackdiscard(cn_lx);
2172 } else {
2173 BUG();