2 * Connection oriented routing
3 * Copyright (C) 2007-2013 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <asm/atomic.h>
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/interrupt.h>
26 #include <linux/sched.h>
27 #include <linux/netdevice.h>
28 #include <linux/skbuff.h>
29 #include <linux/spinlock.h>
30 #include <linux/workqueue.h>
31 #include <linux/kref.h>
32 #include <linux/ktime.h>
33 #include <linux/rbtree.h>
35 #include <linux/socket.h>
38 #include <linux/math64.h>
43 #define ETH_P_COR 0x1022
47 #define PROTO_COR_RAW 0
48 #define PROTO_COR_RDEAMON 1
50 #define SOCKADDRTYPE_PORT 1
51 #define SOCKADDRTYPE_ADDRPORT 2
59 #define COR_PASS_ON_CLOSE 1
60 #define COR_PUBLISH_SERVICE 2
62 #define MAX_CONN_CMD_LEN 4096
65 #define PACKET_TYPE_ANNOUNCE 1
66 #define PACKET_TYPE_CMSG 2
67 #define PACKET_TYPE_CONNDATA 3
70 * Kernel packet data - these commands are sent by the neighbor
71 * The end nodes may cause these commands to be sent, but they see them beyond
79 * KP_INIT_SESSION[1] sessionid[4]
81 * finishes neighbor discovery and starts a session
83 * Before this is received all other commands are ignored. The sessionid is used
84 * to prevent usage of old neighbor discovery data (e.g. addresses)
86 #define KP_INIT_SESSION 1
89 * KP_PING[1] cookie[4]
90 * KP_PONG[1] cookie[4] respdelay[4]
92 * This is needed to find out whether the other node is reachable. After a new
93 * neighbor is seen, ping requests are sent and the neighbor is only reachable
94 * after a few pongs are received. These requests are also used to find out
95 * whether a neighber is gone.
98 * The receiver of a ping may delay the sending of the pong e.g. to create
99 * bigger packets. The respdelay is the time in microseconds the packet was
105 /* KP_ACK[1] seqno[6] */
109 * KP_ACK_CONN[1] conn_id[4] flags[1] seqno[6] window[1] seqno_ooo[6]
110 * length[1-4] priority_seqno[1] priority[1]
112 * conn_id is the conn_id we use if we sent something through this conn and
113 * *not* the conn_id that the neighbor used to send us the data
115 * flags defines which of the following fields are sent
117 * seqno = the seqno which is expected in the next non-out-of-order packet
119 * window = amount of data which can be sent without receiving the next ack
120 * packets with lower seqno do not overwrite the last window size
121 * The window may also be reduced. However, this only indicates a wish.
122 * Packets must be accepted if they exceed the new window, but not the old
127 * 1...255 = 64*2^((value-1)/7) end result is rounded down to an integer
129 * seqno_ooo, length = This packet was received out of order. Maybe a previous
130 * packet has been lost. Out of order data should not be retransmitted.
131 * Multiple ooo packets may be merged into a single ack. Ooo packets may be
132 * partially accepted, so that the length does not cover the full packet and/
133 * or the seqno starts in the middle of a packet
135 #define KP_ACK_CONN 5
137 #define KP_ACK_CONN_FLAGS_SEQNO 1
138 #define KP_ACK_CONN_FLAGS_WINDOW 2
139 #define KP_ACK_CONN_FLAGS_OOO 12 /* 4+8 */
140 #define KP_ACK_CONN_FLAGS_PRIORITY 16
142 static inline __u8
ooolen_to_flags(__u32 len
)
153 static inline int ooolen(__u8 flags
)
155 int len
= ((flags
& KP_ACK_CONN_FLAGS_OOO
) >> 2);
156 if (unlikely(len
== 3))
161 static inline int ack_conn_len(__u8 flags
)
164 if ((flags
& KP_ACK_CONN_FLAGS_SEQNO
) != 0) {
166 if ((flags
& KP_ACK_CONN_FLAGS_WINDOW
) != 0)
170 if (ooolen(flags
) != 0) {
172 len
+= ooolen(flags
);
175 if (flags
& KP_ACK_CONN_FLAGS_PRIORITY
)
182 * NOTE on connection ids:
183 * connection ids we receive with most significant bit 0 have been generated by
185 * connection ids we receive with most significant bit 1 have been generated by
188 * ATTENTION: the priority seqno are reversed:
189 * priority seqnos we send are used when we send updates
190 * priority seqnos we received are used when we receive updates
194 * incoming connection
195 * seqno1... used to ack data sent from the side which initiated the connection
196 * seqno2... used to ack data sent to the side which initiated the connection
197 * KP_CONNECT[1] conn_id[4] seqno1[6] seqno2[6] window[1] priority_seqno[1]
203 * incoming connection successful,
204 * KP_CONNECT_SUCCESS[1] conn_id[4] window[1]
206 #define KP_CONNECT_SUCCESS 7
208 /* KP_CONN_DATA[1] conn_id[4] seqno[6] length[2] data[length] */
209 #define KP_CONN_DATA 8
212 * KP_RESET_CONN[1] conn_id[4]
213 * We send this, if there is an established connection we want to close.
215 #define KP_RESET_CONN 9
218 * KP_SET_MAX_CMSG_DELAY[1] cpacket_ack_delay[4] data_ack_delay[4] cmsg_delay[4]
219 * Sent after connecting and at any change
220 * delay in specifies in microsecs
222 #define KP_SET_MAX_CMSG_DELAY 10
226 * Connection data which in interpreted when connection has no target yet
227 * These commands are sent by the end node.
230 * cmd[2] length[1-4] parameter[length]
231 * unrecogniced commands are ignored
232 * parameters which are longer than expected are ignored as well
235 /* outgoing connection: CD_CONNECT_NB[2] length[1-4]
236 * addrlen[1-4] addr[addrlen] */
237 #define CD_CONNECT_NB 1
239 /* connection to local open part: CD_CONNECT_PORT[2] length[1-4] port[2] */
240 #define CD_CONNECT_PORT 2
243 * CD_LIST_NEIGH sends CDR_BINDATA if the command was successful. The response
247 * numfields[1-4] (field[2] fieldlen[1-4])[numfields]
248 * rows[responserows]:
249 * fieldlen[1-4], only if fieldlen in the header was "0"
250 * fielddata[fieldlen]
252 * Future versions may append data to field definition. Therefore clients must
253 * silently discard data at the end they do not expect.
256 /* list connected neighbors: CD_LIST_NEIGH[2] length[1-4] */
257 #define CD_LIST_NEIGH 3
262 #define LIST_NEIGH_FIELD_ADDR 1
265 * latency_in_microsecs[1] (64_11 encoding)
266 * Only raw network latency in measured. Delays caused by the priority queues
267 * are *not* included.
269 #define LIST_NEIGH_FIELD_LATENCY 2
271 /* list services: CD_LIST_SERVICES[2] length[1-4] */
272 #define CD_LIST_SERVICES 4
274 /* list extensions: CD_CAPABLITIES[2] length[1-4] page[2] */
275 #define CD_CAPABLITIES 5
280 * Connection data response
281 * Format is the same as with connection data
290 * CDR_EXECFAILED[1] reasoncode[2]
292 #define CDR_EXECFAILED 2
293 #define CDR_EXECFAILED_INVALID_COMMAND 1
294 #define CDR_EXECFAILED_TEMPORARILY_OUT_OF_RESSOURCES 2
295 #define CDR_EXECFAILED_NB_DOESNTEXIST 3
296 #define CDR_EXECFAILED_PORTCLOSED 4
299 * must be sent after CDR_EXEC{OK|FAILED}
300 * CDR_EXEOK_BINDATA[1] bindatalen[1-4] bindata[bindatalen] */
301 #define CDR_BINDATA 3
304 * routing daemon sock
306 * cmdcode[4] length[4] cmddata[length]
310 #define CRD_KTU_SUPPORTEDVERSIONS 1
312 * CRD_KTU_SUPPORTEDVERSIONS[4] length[4] min[4] max[4]
315 #define CRD_KTU_CONNECT 2
317 * CRD_KTU_KTOU_CONNECT[4] length[4] cookie[8] targetlen[4] target[targetlen]
320 #define CRD_UTK_VERSION 1
322 * CRD_UTK_VERSION[4] length[4] version[4]
328 * CRD_UTK_UP[4] length[4] flags[8] addrlen[4] addr[addrlen]
333 #define CRD_UTK_CONNECTERROR 3
335 * CRD_UTK_CONNECTERROR[4] length[4] cookie[8] error[4]
338 #define CRD_UTK_CONNECTERROR_ACCES 1
339 #define CRD_UTK_CONNECTERROR_NETUNREACH 2
340 #define CRD_UTK_CONNECTERROR_TIMEDOUT 3
341 #define CRD_UTK_CONNECTERROR_REFUSED 4
343 #define CONN_MNGD_HEADERLEN 2
344 #define CONN_MNGD_CHECKSUMLEN 4
346 #define CONN_MNGD_HASDATA (1 << 15)
347 #define CONN_MNGD_EOF (1 << 14)
348 #define CONN_MNGD_RCVEND (1 << 13)
349 #define CONN_MNGD_DATALEN 32767
351 #define CONN_MNGD_MAX_SEGMENT_SIZE (CONN_MNGD_DATALEN + 1)
353 #define PRIORITY_MAX 15384774
358 /* result codes for rcv.c/proc_packet */
360 #define RC_FINISHED 1
362 #define RC_RCV1_ANNOUNCE 2
363 #define RC_RCV1_KERNEL 3
364 #define RC_RCV1_CONN 4
367 struct heap_element
*up
;
368 struct heap_element
*left
;
369 struct heap_element
*right
;
375 struct heap_element
*top
;
378 struct heap_definition
{
385 int (*compare_elements
)(struct heap_element
*el1
,
386 struct heap_element
*el2
);
390 __u64 speed
;/* moving average, 1000 == 1byte/sec */
391 unsigned long jiffies_last_update
;
395 struct buffertracker
{
396 __u32 buffered
;/* moving average */
397 unsigned long jiffies_last_update
;
406 struct list_head queue_list
;
408 struct net_device
*dev
; /* may not change while queue is in list */
410 struct timer_list qos_resume_timer
;
411 struct tasklet_struct qos_resume_task
;
412 int qos_resume_scheduled
;
413 unsigned long jiffies_lastprogress
;
415 struct list_head kpackets_waiting
;
416 struct list_head conn_retrans_waiting
;
417 struct list_head announce_waiting
;
418 struct list_head conns_waiting
;
421 atomic64_t priority_sum
;
429 #define ANNOUNCE_TYPE_BROADCAST 1
430 #define ANNOUNCE_TYPE_UNICAST 2
432 struct announce_data
{
438 struct net_device
*dev
;
439 char mac
[MAX_ADDR_LEN
];
440 struct delayed_work announce_work
;
441 struct resume_block rb
;
445 ktime_t time_created
;
447 unsigned long jiffies_sent
;
450 __u8 pongs
; /* count of pongs for pings sent after this one */
453 #define NEIGHBOR_STATE_INITIAL 0
454 #define NEIGHBOR_STATE_ACTIVE 1
455 #define NEIGHBOR_STATE_STALLED 2
456 #define NEIGHBOR_STATE_KILLED 3
459 struct list_head nb_list
;
463 struct net_device
*dev
;
464 char mac
[MAX_ADDR_LEN
];
465 struct qos_queue
*queue
;
469 atomic_t sessionid_rcv_needed
;
470 atomic_t sessionid_snd_needed
;
475 struct timer_list cmsg_timer
;
476 struct tasklet_struct cmsg_task
;
477 atomic_t cmsg_task_scheduled
;
478 atomic_t cmsg_timer_running
;
479 spinlock_t cmsg_lock
;
480 spinlock_t send_cmsg_lock
;
481 struct list_head cmsg_queue_pong
;
482 struct list_head cmsg_queue_ack
;
483 struct list_head cmsg_queue_ackconn
;
484 struct heap cmsg_queue_conndata
;
485 struct list_head cmsg_queue_other
;
487 struct rb_root pending_conn_resets_rb
;
489 unsigned long timeout
;
491 __u32 cmsg_pongslength
;
492 __u32 cmsg_otherlength
;
494 atomic_t cmsg_pongscnt
; /* size of queue only */
495 atomic_t cmsg_pongs_retrans_cnt
; /* number of retransmits only */
496 atomic_t cmsg_othercnt
; /* size of queue + retransmits */
498 atomic_t cmsg_bulk_readds
;
501 unsigned long jiffies_last_cmsg
;
502 __u32 cmsg_interval
; /* microsecs */
504 __u8 max_cmsg_delay_sent
;
506 /* procected by queues_lock */
507 struct resume_block rb_kp
;
508 struct resume_block rb_cr
;
510 spinlock_t state_lock
;
511 unsigned long last_ping_time
;
513 struct ping_cookie cookies
[PING_COOKIES_PER_NEIGH
];
514 __u32 ping_intransit
;
517 __u64 latency_variance_retrans_us
; /* microsecs */
518 atomic_t latency_retrans_us
; /* microsecs */
519 atomic_t latency_stddev_retrans_us
; /* microsecs */
520 atomic_t latency_advertised_us
; /* microsecs */
521 atomic_t max_remote_ack_delay_us
; /* microsecs */
522 atomic_t max_remote_ackconn_delay_us
; /* microsecs */
523 atomic_t max_remote_other_delay_us
; /* microsecs */
526 __u64 last_state_change
;/* initial state */
529 * time of the last sent packet which has been acked or
530 * otherwise responded to (e.g. pong)
532 unsigned long last_roundtrip
;/* active/stalled state */
534 ktime_t last_roundtrip_end
;
538 __u8 str_timer_pending
;
539 struct delayed_work stalltimeout_timer
;
541 spinlock_t connid_lock
;
542 struct rb_root connid_rb
;
544 spinlock_t connid_reuse_lock
;
545 struct rb_root connid_reuse_rb
;
546 struct list_head connid_reuse_list
;
547 __u16 connid_reuse_pingcnt
;
549 spinlock_t kp_retransmits_lock
;
550 struct rb_root kp_retransmits_rb
;
552 atomic64_t kpacket_seqno
;
554 atomic64_t priority_sum
;
557 * connecions which receive data from/send data to this node
558 * used when terminating all connections of a neighbor
560 spinlock_t conn_list_lock
;
561 struct list_head rcv_conn_list
;
563 spinlock_t stalledconn_lock
;
564 struct work_struct stalledconn_work
;
565 __u8 stalledconn_work_scheduled
;
566 struct list_head stalledconn_list
;
569 * the timer has to be inited when adding the neighbor
571 * add_timer(struct timer_list * timer);
573 spinlock_t retrans_lock
;
574 struct timer_list retrans_timer_conn
;
575 struct timer_list retrans_timer
;
576 struct tasklet_struct retrans_task_conn
;
577 struct tasklet_struct retrans_task
;
578 __u8 retrans_timer_conn_running
;
579 __u8 retrans_conn_running
;
580 __u8 retrans_timer_running
;
582 struct list_head retrans_list
;
583 struct list_head retrans_list_conn
;
586 struct cor_sched_data
{
588 struct list_head conn_list
;
589 struct sk_buff_head requeue_queue
;
592 #define DATABUF_BUF 0
593 #define DATABUF_SKB 1
595 struct data_buf_item
{
596 struct list_head buf_list
;
605 struct connid_reuse_item
{
618 * There are 2 conn objects per bi-directional connection. They refer to each
619 * other with in the reversedir field.
625 * cn: conn we have no clue what is inside
626 * src_in, trgt_unconn, trgt_out, ...: A conn with the specified source or
627 * targettype. In the unlocked case the types are actually just a guess,
628 * because they could have changed since the last access. After locking the
629 * source/destination parameters have to be checked whether they still are
630 * what we expect. This includes source/targettype, neighbor, conn_id
632 * Exception: they may not change after they are set to source/target sock
633 * until the socket is released.
637 * no suffix: unlocked
639 * _l: this direction is locked
641 * _ll: both directions are locked
643 * _o: unlocked, but source or target is known for sure, because an outside
644 * lock is taken; For variables on the heap this means that an outside lock must
645 * be taken before accessing the struct which points to the conn can be
651 * The following fields are immutable after the conn has been allocated:
652 * is_client, reversedir
654 * Most fields are protected by rcv_lock. Fields which which control
655 * source and destination of the data flow require both directions to
656 * to be locked and external references to be cleared before the change can
657 * happen. This includes fields like sourcetype, targettype, connid,
658 * list_heads, htab_entries, ???. In this case the side with is_client == 1
659 * needs to be locked first. Changes to conn_id and neighbor also require
660 * removing the conn from the htables first.
662 * Some other fields like htab_entry, some list_head need are locked
663 * outside (e.g. at struct neighbor).
665 #define SOURCE_UNCONNECTED 0
667 #define SOURCE_SOCK 2
669 #define TARGET_UNCONNECTED 0
671 #define TARGET_SOCK 2
672 #define TARGET_DISCARD 3
674 #define FLUSHDELAY_SENDNOW 0
675 #define FLUSHDELAY_SENDSOME 1
676 #define FLUSHDELAY_DELAY 2
681 __u8 target_congested
:1;
687 * 0... connection active
688 * 1... connection is about to be reset, target does not need to be
690 * 2... connection is reset
691 * 3... connection is reset + no pointers to "struct conn *reversedir"
692 * remaining except from this conn
696 struct list_head queue_list
;
702 struct speedtracker st
;
703 struct buffertracker bt
;
708 /* list of all connections from this neighbor */
709 struct list_head nb_list
;
711 struct list_head reorder_queue
;
714 struct connid_reuse_item
*cir
;
718 /* number of ack sent, not data seqno */
721 __u16 small_ooo_packets
;
725 __u8 inorder_ack_needed
;
729 __u64 window_seqnolimit
;
730 __u64 window_seqnolimit_remote
;
733 __u32 buffer_reserve
;
735 __u32 speedtracker_bytes
;
737 struct list_head buffer_list
;
739 /* protected by nb->cmsg_lock */
740 struct list_head acks_pending
;
742 unsigned long jiffies_bufferspeed_set
;
744 unsigned long jiffies_last_act
;
751 struct list_head cl_list
;
766 char paramlen_buf
[4];
768 __u8 in_buffer_wait_list
;
769 struct list_head buffer_wait_list
;
774 /* protected by nb->retrans_lock, sorted by seqno */
775 struct list_head retrans_list
;
777 /* protected by nb->stalledconn_lock */
778 struct list_head nbstalled_lh
;
781 __u64 seqno_nextsend
;
783 __u64 seqno_windowlimit
;
785 struct resume_block rb
;
793 __u8 priority_send_allowed
;
804 struct list_head items
;
805 struct data_buf_item
*nextread
;
810 __u32 read_remaining
;
812 __u16 next_read_offset
;
814 __u16 cpacket_buffer
;/* including overhead */
817 /* these 2 vars are protected by cpacket_bufferlimits_lock */
818 struct list_head cpacket_lh
;
819 unsigned long cpacket_bufused_since
;
821 struct conn
*reversedir
;
824 struct conn_retrans
{
825 /* timeout_list and conn_list share a single ref */
827 struct list_head timeout_list
;
828 struct list_head conn_list
;
829 struct conn
*trgt_out_o
;
835 unsigned long timeout
;
854 struct skb_procstate
{
857 struct work_struct work
;
869 struct data_buf_item dbi
;
874 #define CS_TYPE_UNCONNECTED 0
875 #define CS_TYPE_LISTENER 1
876 #define CS_TYPE_CONN_RAW 2
877 #define CS_TYPE_CONN_MANAGED 3
879 #define CS_CONNECTSTATE_UNCONNECTED 0
880 #define CS_CONNECTSTATE_CONNECTING 1
881 #define CS_CONNECTSTATE_CONNECTED 2
882 #define CS_CONNECTSTATE_ERROR 3
884 #define CS_SHUTDOWN_SHUTDOWN_RD (1 << 0)
885 #define CS_SHUTDOWN_SHUTDOWN_WR (1 << 1)
886 #define CS_SHUTDOWN_SENT_EOF (1 << 2)
887 #define CS_SHUTDOWN_SENT_RCVEND (1 << 3)
888 #define CS_SHUTDOWN_RCVD_EOF (1 << 4)
889 #define CS_SHUTDOWN_RCVD_RCVEND (1 << 5)
892 struct sock sk
; /* must be first */
897 /* type may not change once it is set to != CS_TYPE_UNCONNECTED */
901 __u8 publish_service
;
905 /* listener is protected by cor_bindnodes */
908 __u8 publish_service
;
911 struct list_head conn_queue
;
915 struct conn
*src_sock
;
916 struct conn
*trgt_sock
;
918 struct data_buf_item
*rcvitem
;
921 struct cor_sock
*pass_on_close
;
925 struct cor_sockaddr remoteaddr
;
927 struct list_head rd_msgs
;
928 struct list_head crd_lh
;
942 struct conn
*src_sock
;
943 struct conn
*trgt_sock
;
945 char snd_hdr
[CONN_MNGD_HEADERLEN
];
946 char snd_chksum
[CONN_MNGD_CHECKSUMLEN
];
951 /* protected by cor_sock->lock */
952 __u8 in_flushtoconn_oom_list
;
953 /* protected by flushtoconn_oom_lock */
954 struct list_head flushtoconn_oom_lh
;
957 char rcv_hdr
[CONN_MNGD_HEADERLEN
];
958 char rcv_chksum
[CONN_MNGD_CHECKSUMLEN
];
963 __u16 rcvbuf_consumed
;
968 struct work_struct flushtoconn_work
;
969 struct work_struct readfromconn_work
;
970 atomic_t flushtoconn_work_scheduled
;
971 atomic_t readfromconn_work_scheduled
;
977 extern atomic_t num_conns
;
979 extern spinlock_t cor_bindnodes
;
981 extern struct conn
*get_conn(struct neighbor
*nb
, __u32 conn_id
);
983 extern void connid_used_pingsuccess(struct neighbor
*nb
);
985 extern void _set_last_act(struct conn
*src_in_l
);
987 extern void free_conn(struct kref
*ref
);
989 extern int conn_init_out(struct conn
*trgt_unconn_ll
, struct neighbor
*nb
,
990 __u32 rcvd_connid
, int use_rcvd_connid
);
992 extern void conn_init_sock_source(struct conn
*cn
);
994 extern void conn_init_sock_target(struct conn
*cn
);
996 extern __u32
list_services(char *buf
, __u32 buflen
);
998 extern void set_publish_service(struct cor_sock
*cs
, __u8 value
);
1000 extern void close_port(struct cor_sock
*cs
);
1002 extern int open_port(struct cor_sock
*cs_l
, __be16 port
);
1004 extern int connect_port(struct conn
*trgt_unconn_l
, __be16 port
);
1006 extern int connect_neigh(struct conn
*trgt_unconn_l
, char *addr
, __u16 addrlen
);
1008 extern struct conn
* alloc_conn(gfp_t allocflags
);
1010 extern void reset_conn(struct conn
*cn
);
1013 extern __u8
__attribute__((const)) enc_log_256_16(__u32 value
);
1015 extern __u32
__attribute__((const)) dec_log_256_16(__u8 value
);
1017 extern __u8
__attribute__((const)) enc_log_64_11(__u32 value
);
1019 extern __u32
__attribute__((const)) dec_log_64_11(__u8 value
);
1021 extern __u8
__attribute__((const)) enc_log_64_7(__u64 value
);
1023 extern __u64
__attribute__((const)) dec_log_64_7(__u8 value
);
1025 static inline void mul(__u64 a
, __u64 b
, __u64
*reshigh
, __u64
*reslow
)
1028 __u32 ah
= (a
>> 32);
1030 __u32 bh
= (b
>> 32);
1032 __u64 r1
= ((__u64
) al
) * bl
;
1033 __u64 r20
= (r1
>> 32);
1034 __u64 r21
= ((__u64
) ah
) * bl
;
1035 __u64 r22
= ((__u64
) al
) * bh
;
1036 __u64 r2
= r20
+ ((r21
<< 32) >> 32) + ((r22
<< 32) >> 32);
1037 __u64 r30
= (r21
>> 32) + (r22
>> 32) + (r2
>> 32);
1038 __u64 r31
= ((__u64
) ah
) * bh
;
1040 BUG_ON(reshigh
== 0);
1041 BUG_ON(reslow
== 0);
1043 (*reslow
) = ((r1
<< 32) >> 32) + (r2
<< 32);
1044 (*reshigh
) = r30
+ r31
;
1047 /* calculate (a*b+rem)/c */
1048 static inline __u64
multiply_shift(__u64 a
, __u64 b
, __u64 rem
, int shift
,
1057 mul(a
, b
, &high
, &low
);
1059 if (low
+ ((__u64
) rem
) < low
)
1061 low
+= ((__u64
) rem
);
1063 if (remainder
!= 0) {
1067 (*remainder
) = ((low
<< (64 - shift
)) >> (64 - shift
));
1072 } else if (unlikely(((high
<< (64 - shift
)) >> (64 - shift
)) != high
)) {
1074 (*remainder
) = ((1LL << shift
) - 1);
1077 return (low
>> shift
) | (high
<< (64 - shift
));
1081 extern __u64
multiply_div2(__u64 a
, __u64 b
, __u64 rem
, __u64 c
,
1084 static inline __u64
__attribute__((const)) multiply_div(__u64 a
, __u64 b
,
1087 return multiply_div2(a
, b
, 0, c
, 0);
1090 extern void kreffree_bug(struct kref
*ref
);
1092 extern void cor_heap_remove(struct heap_definition
*hpd
, struct heap
*hp
,
1093 struct heap_element
*hpel
);
1095 extern void cor_heap_insert(struct heap_definition
*hpd
, struct heap
*hp
,
1096 struct heap_element
*hpel
);
1098 extern void cor_heap_init(struct heap
*hp
);
1100 extern void speedtracker_init(struct speedtracker
*st
);
1102 /* return 1000 == 1byte/sec */
1103 extern __u64
speedtracker_update(struct speedtracker
*st
, __u32 bytes
);
1105 extern void buffertracker_update(struct conn
*cn_l
);
1107 extern int __init
cor_util_init(void);
1110 extern int newconn_checkpriority(struct neighbor
*nb
, __u8 priority
);
1112 extern __u32
refresh_conn_priority(struct conn
*cn
, int locked
);
1114 extern void set_conn_in_priority(struct neighbor
*nb
, __u32 conn_id
,
1115 struct conn
*src_in
, __u8 priority_seqno
, __u8 priority
);
1117 extern void connreset_priority(struct conn
*cn
);
1119 extern int __init
credits_init(void);
1122 extern void neighbor_free(struct kref
*ref
);
1124 extern int is_from_nb(struct sk_buff
*skb
, struct neighbor
*nb
);
1126 extern struct neighbor
*get_neigh_by_mac(struct sk_buff
*skb
);
1128 extern struct neighbor
*find_neigh(char *addr
, __u16 addrlen
);
1130 extern __u32
generate_neigh_list(char *buf
, __u32 buflen
);
1132 extern int get_neigh_state(struct neighbor
*nb
);
1134 extern void ping_resp(struct neighbor
*nb
, __u32 cookie
, __u32 respdelay
);
1136 extern __u32
add_ping_req(struct neighbor
*nb
, unsigned long *last_ping_time
,
1139 extern void ping_sent(struct neighbor
*nb
, __u32 cookie
);
1141 extern void unadd_ping_req(struct neighbor
*nb
, __u32 cookie
,
1142 unsigned long last_ping_time
, int congested
);
1144 #define TIMETOSENDPING_NO 0
1145 #define TIMETOSENDPING_YES 1
1146 #define TIMETOSENDPING_FORCE 2
1147 extern int time_to_send_ping(struct neighbor
*nb
);
1149 extern unsigned long get_next_ping_time(struct neighbor
*nb
);
1151 extern int force_ping(struct neighbor
*nb
);
1153 extern int rcv_announce(struct sk_buff
*skb
);
1155 extern int _send_announce(struct announce_data
*ann
, int fromqos
);
1157 extern void announce_data_free(struct kref
*ref
);
1159 extern void announce_send_stop(struct net_device
*dev
, char *mac
, int type
);
1161 extern void cor_neighbor_down(void);
1163 extern int cor_neighbor_up(char *addr2
, __u32 addrlen2
);
1165 extern int is_clientmode(void);
1167 extern int __init
cor_neighbor_init(void);
1170 extern void update_windowlimit(struct conn
*src_in_l
);
1172 extern void reset_bufferusage(struct conn
*cn
);
1174 extern void reset_ooo_queue(struct conn
*src_in_l
);
1176 extern void drain_ooo_queue(struct conn
*src_in_l
);
1178 void conn_rcv(struct neighbor
*nb
, struct sk_buff
*skb
, char *data
,
1179 __u32 len
, __u32 conn_id
, __u64 seqno
);
1181 extern void cor_rcv_down(void);
1183 extern void cor_rcv_up(void);
1185 extern int __init
cor_rcv_init(void);
1187 /* kpacket_parse.c */
1188 extern void kernel_packet(struct neighbor
*nb
, struct sk_buff
*skb
,
1192 struct control_msg_out
;
1194 #define ACM_PRIORITY_LOW 1 /* oom recovery easy */
1195 #define ACM_PRIORITY_MED 2 /* oom may cause timeouts */
1196 #define ACM_PRIORITY_HIGH 3 /* cm acks - needed for freeing old cms */
1198 extern int may_alloc_control_msg(struct neighbor
*nb
, int priority
);
1200 extern struct control_msg_out
*alloc_control_msg(struct neighbor
*nb
,
1203 extern void free_control_msg(struct control_msg_out
*cm
);
1205 extern void retransmit_taskfunc(unsigned long arg
);
1207 extern void retransmit_timerfunc(struct timer_list
*retrans_timer
);
1209 extern void kern_ack_rcvd(struct neighbor
*nb
, __u64 seqno
);
1211 extern int send_messages(struct neighbor
*nb
, int resume
);
1213 extern void controlmsg_taskfunc(unsigned long nb
);
1215 extern void controlmsg_timerfunc(struct timer_list
*cmsg_timer
);
1217 extern void schedule_controlmsg_timer(struct neighbor
*nb_cmsglocked
);
1219 extern void send_pong(struct neighbor
*nb
,
1222 extern int send_reset_conn(struct neighbor
*nb
, __u32 conn_id
, int lowprio
);
1224 extern void send_ack(struct neighbor
*nb
,
1227 extern void send_ack_conn_ifneeded(struct conn
*src_in_l
, __u64 seqno_ooo
,
1230 extern void send_priority(struct conn
*trgt_out_ll
, int force
,
1233 extern void free_ack_conns(struct conn
*src_in_l
);
1235 extern void send_connect_success(struct control_msg_out
*cm
, __u32 conn_id
,
1236 struct conn
*src_in
);
1238 extern void send_connect_nb(struct control_msg_out
*cm
, __u32 conn_id
,
1239 __u64 seqno1
, __u64 seqno2
, struct conn
*src_in
);
1241 extern void send_conndata(struct control_msg_out
*cm
, __u32 conn_id
,
1242 __u64 seqno
, char *data_orig
, char *data
, __u32 datalen
,
1243 __u16 max_delay_hz
, struct conn_retrans
*cr
, int fromsplit
);
1245 extern int __init
cor_kgen_init(void);
1247 /* cpacket_parse.c */
1248 extern void mod_cpacket_buffer(struct conn
*cn_l
);
1250 extern void free_cpacket_buffer(__s32 amount
);
1252 extern void connreset_cpacket_buffer(struct conn
*trgt_unconn_l
);
1254 extern int encode_len(char *buf
, int buflen
, __u32 len
);
1256 extern int decode_len(char *buf
, int buflen
, __u32
*len
);
1258 extern void parse(struct conn
*trgt_unconn
, int fromresume
);
1260 extern int __init
cor_cpacket_init(void);
1263 #ifdef DEBUG_QOS_SLOWSEND
1264 extern int cor_dev_queue_xmit(struct sk_buff
*skb
, int caller
);
1266 static inline int cor_dev_queue_xmit(struct sk_buff
*skb
, int caller
)
1268 return dev_queue_xmit(skb
);
1272 extern void free_qos(struct kref
*ref
);
1274 extern struct qos_queue
*get_queue(struct net_device
*dev
);
1276 extern int destroy_queue(struct net_device
*dev
);
1278 extern int create_queue(struct net_device
*dev
);
1280 #define QOS_RESUME_DONE 0
1281 #define QOS_RESUME_CONG 1
1282 #define QOS_RESUME_CONG_NOPROGRESS 2
1284 #define QOS_CALLER_KPACKET 0
1285 #define QOS_CALLER_CONN_RETRANS 1
1286 #define QOS_CALLER_ANNOUNCE 2
1287 #define QOS_CALLER_CONN 3
1289 extern void qos_enqueue(struct qos_queue
*q
, struct resume_block
*rb
,
1292 extern void qos_remove_conn(struct conn
*trgt_out_l
);
1294 extern int may_send_announce(struct net_device
*dev
);
1296 extern struct sk_buff
*create_packet_cmsg(struct neighbor
*nb
, int size
,
1297 gfp_t alloc_flags
, __u64 seqno
);
1299 extern void reschedule_conn_retrans_timer(struct neighbor
*nb_retranslocked
);
1301 extern void cancel_conn_all_retrans(struct conn
*trgt_out_l
);
1303 extern void retransmit_conn_taskfunc(unsigned long nb
);
1305 extern void retransmit_conn_timerfunc(struct timer_list
*retrans_timer_conn
);
1307 extern void conn_ack_ooo_rcvd(struct neighbor
*nb
, __u32 conn_id
,
1308 struct conn
*trgt_out
, __u64 seqno_ooo
, __u32 length
);
1310 extern void conn_ack_rcvd(struct neighbor
*nb
, __u32 conn_id
,
1311 struct conn
*trgt_out
, __u64 seqno
, int setwindow
, __u8 window
);
1313 extern void schedule_retransmit_conn(struct conn_retrans
*cr
, int connlocked
);
1315 /* RC_FLUSH_CONN_OUT_SENT | RC_FLUSH_CONN_OUT_{^SENT} */
1316 #define RC_FLUSH_CONN_OUT_OK 1
1317 #define RC_FLUSH_CONN_OUT_NBNOTACTIVE 2
1318 #define RC_FLUSH_CONN_OUT_CONG 3
1319 #define RC_FLUSH_CONN_OUT_MAXSENT 4
1320 #define RC_FLUSH_CONN_OUT_OOM 5
1321 extern int flush_out(struct conn
*trgt_out_l
, __u32
*sent
);
1323 extern void resume_nbstalled_conns(struct work_struct
*work
);
1325 extern int __init
cor_snd_init(void);
1328 extern struct kmem_cache
*data_buf_item_slab
;
1330 extern void databuf_init(struct conn
*cn_init
);
1332 extern void databuf_ackdiscard(struct conn
*cn_l
);
1334 extern void reset_seqno(struct conn
*cn_l
, __u64 initseqno
);
1336 extern void databuf_pull(struct conn
*cn_l
, char *dst
, __u32 len
);
1338 static inline __u32
databuf_trypull(struct conn
*cn_l
, char *dst
, __u32 len
)
1340 if (len
> cn_l
->data_buf
.read_remaining
)
1341 len
= cn_l
->data_buf
.read_remaining
;
1342 databuf_pull(cn_l
, dst
, len
);
1346 extern void databuf_unpull_dpi(struct conn
*trgt_sock_l
, struct cor_sock
*cs
,
1347 struct data_buf_item
*item
, __u16 next_read_offset
);
1349 extern void databuf_pull_dbi(struct cor_sock
*cs_rl
, struct conn
*trgt_sock_l
);
1351 extern void databuf_unpull(struct conn
*trgt_out_l
, __u32 bytes
);
1353 extern void databuf_pullold(struct conn
*trgt_out_l
, __u64 startpos
, char *dst
,
1356 extern void databuf_ack(struct conn
*trgt_out_l
, __u64 pos
);
1358 extern void databuf_ackread(struct conn
*cn_l
);
1360 extern __u32
receive_buf(struct conn
*cn_l
, char *buf
, __u32 datalen
);
1362 extern void receive_cpacketresp(struct conn
*trgt_unconn_l
, char *buf
,
1363 __u32 len
, int *reset_needed
);
1365 extern __u32
receive_skb(struct conn
*src_in_l
, struct sk_buff
*skb
);
1367 extern void wake_sender(struct conn
*cn
);
1369 extern int flush_buf(struct conn
*cn
, int wakesender
);
1371 extern int __init
forward_init(void);
1373 /* sock_rdaemon.c */
1374 extern int cor_create_rdaemon_sock(struct net
*net
, struct socket
*sock
,
1375 int protocol
, int kern
);
1377 extern int rdreq_connect(struct cor_sock
*cs
);
1379 extern void cor_usersock_release(struct cor_sock
*cs
);
1381 extern int __init
cor_rd_init1(void);
1383 extern int __init
cor_rd_init2(void);
1386 extern int cor_create_raw_sock(struct net
*net
, struct socket
*sock
,
1387 int protocol
, int kern
);
1389 /* sock_managed.c */
1390 extern struct cor_sock
*get_corsock_by_cookie(__be64 cookie
);
1392 extern void __set_sock_connecterror(struct cor_sock
*cs_m_l
, int errorno
);
1394 extern void _set_sock_connecterror(struct cor_sock
*cs
, int errorno
);
1396 static inline void set_sock_connecterror(__be64 cookie
, int errorno
)
1398 struct cor_sock
*cs
= get_corsock_by_cookie(cookie
);
1399 _set_sock_connecterror(cs
, errorno
);
1402 extern int cor_mngdsocket_readfromconn(struct cor_sock
*cs_l
);
1404 extern int cor_create_managed_sock(struct net
*net
, struct socket
*sock
,
1405 int protocol
, int kern
);
1407 extern int __init
cor_sock_managed_init1(void);
1410 extern void free_sock(struct kref
*ref
);
1412 extern void cor_sock_flushtoconn(struct cor_sock
*cs
);
1414 extern void cor_sock_readfromconn(struct cor_sock
*cs
);
1416 extern int cor_socket_socketpair(struct socket
*sock1
, struct socket
*sock2
);
1418 extern int cor_socket_getname(struct socket
*sock
, struct sockaddr
*addr
,
1421 extern int cor_socket_mmap(struct file
*file
, struct socket
*sock
,
1422 struct vm_area_struct
*vma
);
1424 extern int _cor_createsock(struct net
*net
, struct socket
*sock
, int protocol
,
1427 extern int __init
cor_sock_init1(void);
1429 extern int __init
cor_sock_init2(void);
1432 static inline struct skb_procstate
*skb_pstate(struct sk_buff
*skb
)
1434 return (struct skb_procstate
*) &(skb
->cb
[0]);
1437 static inline struct sk_buff
*skb_from_pstate(struct skb_procstate
*ps
)
1439 return (struct sk_buff
*) (((char *)ps
) - offsetof(struct sk_buff
,cb
));
1442 static inline __u32
mss(struct neighbor
*nb
, __u32 l3overhead
)
1444 __u32 mtu
= (nb
->dev
->mtu
> 4096) ? 4096 : nb
->dev
->mtu
;
1445 return mtu
- LL_RESERVED_SPACE(nb
->dev
) - l3overhead
;
1448 static inline __u32
mss_cmsg(struct neighbor
*nb
)
1453 static inline __u32
mss_conndata(struct neighbor
*nb
)
1458 static inline long calc_timeout(__u32 latency_us
, __u32 latency_stddev_us
,
1459 __u32 max_remote_ack_delay_us
)
1461 unsigned long addto
;
1462 if (unlikely(unlikely(latency_us
> 1000000000) ||
1463 unlikely(latency_stddev_us
> 500000000) ||
1464 unlikely(max_remote_ack_delay_us
> 1000000000))) {
1465 addto
= msecs_to_jiffies(latency_us
/1000 + latency_us
/4000 +
1466 latency_stddev_us
/333 +
1467 max_remote_ack_delay_us
/1000);
1469 addto
= usecs_to_jiffies(latency_us
+ latency_us
/4 +
1470 latency_stddev_us
*3 + max_remote_ack_delay_us
);
1474 * 2 is added because
1475 * 1) _to_jiffies rounds down, but should round up, so add 1 to
1477 * 2) even if latency is 0, we never want to schedule the retransmit
1478 * to run right now, so add 1 more
1480 return jiffies
+ 2 + addto
;
1483 static inline void put_be64(char *dst
, __be64 value
)
1485 char *p_value
= (char *) &value
;
1487 dst
[0] = p_value
[0];
1488 dst
[1] = p_value
[1];
1489 dst
[2] = p_value
[2];
1490 dst
[3] = p_value
[3];
1491 dst
[4] = p_value
[4];
1492 dst
[5] = p_value
[5];
1493 dst
[6] = p_value
[6];
1494 dst
[7] = p_value
[7];
1497 static inline void put_u64(char *dst
, __u64 value
)
1499 put_be64(dst
, cpu_to_be64(value
));
1502 static inline void put_u48(char *dst
, __u64 value
)
1504 char *p_value
= (char *) &value
;
1506 value
= cpu_to_be64(value
);
1508 dst
[0] = p_value
[2];
1509 dst
[1] = p_value
[3];
1510 dst
[2] = p_value
[4];
1511 dst
[3] = p_value
[5];
1512 dst
[4] = p_value
[6];
1513 dst
[5] = p_value
[7];
1516 static inline void put_be32(char *dst
, __be32 value
)
1518 char *p_value
= (char *) &value
;
1519 dst
[0] = p_value
[0];
1520 dst
[1] = p_value
[1];
1521 dst
[2] = p_value
[2];
1522 dst
[3] = p_value
[3];
1525 static inline void put_u32(char *dst
, __u32 value
)
1527 put_be32(dst
, cpu_to_be32(value
));
1530 static inline void put_be16(char *dst
, __be16 value
)
1532 char *p_value
= (char *) &value
;
1533 dst
[0] = p_value
[0];
1534 dst
[1] = p_value
[1];
1537 static inline void put_u16(char *dst
, __u16 value
)
1539 put_be16(dst
, cpu_to_be16(value
));
1542 static inline char *cor_pull_skb(struct sk_buff
*skb
, unsigned int len
)
1544 char *ptr
= skb_pull(skb
, len
);
1546 if (unlikely(ptr
== 0))
1552 static inline __be64
parse_be64(char *buf
)
1558 ((char *)&ret
)[0] = buf
[0];
1559 ((char *)&ret
)[1] = buf
[1];
1560 ((char *)&ret
)[2] = buf
[2];
1561 ((char *)&ret
)[3] = buf
[3];
1562 ((char *)&ret
)[4] = buf
[4];
1563 ((char *)&ret
)[5] = buf
[5];
1564 ((char *)&ret
)[6] = buf
[6];
1565 ((char *)&ret
)[7] = buf
[7];
1570 static inline __u64
parse_u64(char *buf
)
1572 return be64_to_cpu(parse_be64(buf
));
1575 static inline __u64
parse_u48(char *ptr
)
1579 ((char *)&ret
)[0] = 0;
1580 ((char *)&ret
)[1] = 0;
1581 ((char *)&ret
)[2] = ptr
[0];
1582 ((char *)&ret
)[3] = ptr
[1];
1583 ((char *)&ret
)[4] = ptr
[2];
1584 ((char *)&ret
)[5] = ptr
[3];
1585 ((char *)&ret
)[6] = ptr
[4];
1586 ((char *)&ret
)[7] = ptr
[5];
1588 return be64_to_cpu(ret
);
1591 static inline __be32
parse_be32(char *ptr
)
1597 ((char *)&ret
)[0] = ptr
[0];
1598 ((char *)&ret
)[1] = ptr
[1];
1599 ((char *)&ret
)[2] = ptr
[2];
1600 ((char *)&ret
)[3] = ptr
[3];
1605 static inline __u32
parse_u32(char *ptr
)
1607 return be32_to_cpu(parse_be32(ptr
));
1610 static inline __be16
parse_be16(char *ptr
)
1616 ((char *)&ret
)[0] = ptr
[0];
1617 ((char *)&ret
)[1] = ptr
[1];
1622 static inline __u16
parse_u16(char *ptr
)
1624 return be16_to_cpu(parse_be16(ptr
));
1627 static inline __u64
pull_u48(struct sk_buff
*skb
)
1629 return parse_u48(cor_pull_skb(skb
, 6));
1632 static inline __be32
pull_be32(struct sk_buff
*skb
)
1634 return parse_be32(cor_pull_skb(skb
, 4));
1637 static inline __u32
pull_u32(struct sk_buff
*skb
)
1639 return parse_u32(cor_pull_skb(skb
, 4));
1642 static inline __u16
pull_u16(struct sk_buff
*skb
)
1644 return parse_u16(cor_pull_skb(skb
, 2));
1647 static inline __u8
pull_u8(struct sk_buff
*skb
)
1649 char *ptr
= cor_pull_skb(skb
, 1);
1654 static inline int is_conn_in(struct conn
*cn_l
, struct neighbor
*nb
,
1657 if (unlikely(unlikely(cn_l
->sourcetype
!= SOURCE_IN
) ||
1658 unlikely(cn_l
->source
.in
.nb
!= nb
) ||
1659 unlikely(cn_l
->source
.in
.conn_id
!= conn_id
) ||
1660 unlikely(cn_l
->isreset
!= 0)))
1665 static inline int is_src_sock(struct conn
*cn_l
, struct cor_sock
*cs
)
1667 if (unlikely(unlikely(cn_l
->sourcetype
!= SOURCE_SOCK
) ||
1668 unlikely(cn_l
->source
.sock
.cs
!= cs
)))
1673 static inline int is_trgt_sock(struct conn
*cn_l
, struct cor_sock
*cs
)
1675 if (unlikely(unlikely(cn_l
->targettype
!= TARGET_SOCK
) ||
1676 unlikely(cn_l
->target
.sock
.cs
!= cs
)))
1681 static inline void set_last_act(struct conn
*src_in_l
)
1683 unsigned long jiffies_tmp
= jiffies
;
1685 BUG_ON(src_in_l
->sourcetype
!= SOURCE_IN
);
1687 if (unlikely(time_after(src_in_l
->source
.in
.jiffies_last_act
+
1688 HZ
* CONN_ACTIVITY_UPDATEINTERVAL_SEC
,
1690 _set_last_act(src_in_l
);
1693 #define BUFLEN_MIN 128
1694 #define BUFLEN_MAX 4096
1695 #define PAGESIZE (1 << PAGE_SHIFT)
1697 static inline __u32
buf_optlen(__u32 datalen
)
1699 __u32 optlen
= BUFLEN_MIN
;
1700 while (optlen
< datalen
&& optlen
< PAGESIZE
&& optlen
< BUFLEN_MAX
)
1701 optlen
= (optlen
<< 1);
1706 inline static void databuf_item_free(struct data_buf_item
*item
)
1708 if (item
->type
== DATABUF_BUF
) {
1710 kmem_cache_free(data_buf_item_slab
, item
);
1711 } else if (item
->type
== DATABUF_SKB
) {
1712 struct sk_buff
*skb
= skb_from_pstate(container_of(item
,
1713 struct skb_procstate
, funcstate
.rcv
.dbi
));
1720 static inline __u64
seqno_clean(__u64 seqno
)
1722 return seqno
& ((1LL << 48) - 1);
1725 static inline int seqno_eq(__u64 seqno1
, __u64 seqno2
)
1727 seqno1
= seqno1
<< 16;
1728 seqno2
= seqno2
<< 16;
1729 return seqno1
== seqno2
;
1732 static inline int seqno_before(__u64 seqno1
, __u64 seqno2
)
1734 seqno1
= seqno1
<< 16;
1735 seqno2
= seqno2
<< 16;
1736 return (seqno1
- seqno2
) >= (1LL << 63);
1739 static inline int seqno_before_eq(__u64 seqno1
, __u64 seqno2
)
1741 return seqno_eq(seqno1
, seqno2
) || seqno_before(seqno1
, seqno2
);
1744 static inline int seqno_after(__u64 seqno1
, __u64 seqno2
)
1746 return seqno_before_eq(seqno1
, seqno2
) ? 0 : 1;
1749 static inline int seqno_after_eq(__u64 seqno1
, __u64 seqno2
)
1751 return seqno_before(seqno1
, seqno2
) ? 0 : 1;
1754 static inline int ktime_before_eq(ktime_t time1
, ktime_t time2
)
1756 return ktime_after(time1
, time2
) ? 0 : 1;
1759 static inline int ktime_after_eq(ktime_t time1
, ktime_t time2
)
1761 return ktime_before(time1
, time2
) ? 0 : 1;