2 * Connection oriented routing
3 * Copyright (C) 2007-2021 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #include <linux/atomic.h>
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/interrupt.h>
21 #include <linux/sched.h>
22 #include <linux/netdevice.h>
23 #include <linux/skbuff.h>
24 #include <linux/spinlock.h>
25 #include <linux/workqueue.h>
26 #include <linux/kref.h>
27 #include <linux/ktime.h>
28 #include <linux/rbtree.h>
30 #include <linux/socket.h>
33 #include <linux/math64.h>
38 #define ETH_P_COR 0x1022
42 #define PROTO_COR_RAW 0
43 #define PROTO_COR_RDEAMON 1
52 #define COR_PASS_ON_CLOSE 1
54 #define COR_PUBLISH_SERVICE 2
57 #define COR_TOS_DEFAULT 0
58 #define COR_TOS_LOW_LATENCY 1
59 #define COR_TOS_HIGH_LATENCY 2
61 #define COR_PRIORITY 4
63 #define MAX_CONN_CMD_LEN 64
65 #define PACKET_TYPE_NONE 0
66 #define PACKET_TYPE_ANNOUNCE 1
67 #define PACKET_TYPE_CMSG_NOACK 2
68 #define PACKET_TYPE_CMSG_ACKSLOW 3
69 #define PACKET_TYPE_CMSG_ACKFAST 4
70 #define PACKET_TYPE_CONNDATA 64
72 #define PACKET_TYPE_CONNDATA_FLAGS 63
73 #define PACKET_TYPE_CONNDATA_FLAGS_FLUSH 32
74 #define PACKET_TYPE_CONNDATA_FLAGS_WINDOWUSED 31
78 * Announce data format:
80 * is 0, may be increased if the protocol changes
82 * is 0, must be increased if a future version of the protocol is incompatible
83 * to the current version
86 * Data format of the announce packet "data" field:
87 *{command [2] commandlength [2] commanddata [commandlength]}[...]
92 /* ANNCMD_VERSION: version[2] minversion[2] */
93 #define ANNCMD_VERSION 1
95 /* ANNCMD_ADDR: addr[8] */
98 /* ANNCMD_NOADDR: (no params) */
99 #define ANNCMD_NOADDR 3
103 * Kernel packet data - these commands are sent by the neighbor
104 * The end nodes may cause these commands to be sent, but they see them beyond
108 #define KP_ACK_CONN 1
109 #define KP_CONN_DATA 2
112 #define KP_MISC_PADDING 0
115 * KP_INIT_SESSION[1] sessionid[4]
117 * finishes neighbor discovery and starts a session
119 * Before this is received all other commands are ignored. The sessionid is used
120 * to prevent usage of old neighbor discovery data (e.g. addresses)
122 #define KP_MISC_INIT_SESSION 1
124 #define KP_MISC_INIT_SESSION_CMDLEN 5
127 * KP_PING[1] cookie[4]
128 * KP_PONG[1] cookie[4] respdelay_full[4] respdelay_netonly[4]
130 * This is needed to find out whether the other node is reachable. After a new
131 * neighbor is seen, ping requests are sent and the neighbor is only reachable
132 * after a few pongs are received. These requests are also used to find out
133 * whether a neighber is gone.
136 * The receiver of a ping may delay the sending of the pong e.g. to create
137 * bigger packets. The respdelay is the time in microseconds the packet was
140 #define KP_MISC_PING 2
142 #define KP_MISC_PING_CMDLEN 5
144 #define KP_MISC_PONG 3
146 /* KP_ACK[1] seqno[6] */
147 #define KP_MISC_ACK 4
150 * NOTE on connection ids:
151 * connection ids we receive with most significant bit 0 have been generated by
153 * connection ids we receive with most significant bit 1 have been generated by
156 * ATTENTION: the priority seqno are reversed:
157 * priority seqnos we send are used when we send updates
158 * priority seqnos we received are used when we receive updates
162 * incoming connection
163 * seqno1... used to ack data sent from the side which initiated the connection
164 * seqno2... used to ack data sent to the side which initiated the connection
165 * KP_CONNECT[1] conn_id[4] seqno1[6] seqno2[6] window[2] priority_seqno[0.5]
166 * priority[1.5] is_highlatency[1]
168 #define KP_MISC_CONNECT 5
171 * incoming connection successful,
172 * KP_CONNECT_SUCCESS[1] conn_id[4] window[2]
174 #define KP_MISC_CONNECT_SUCCESS 6
177 * KP_RESET_CONN[1] conn_id[4]
178 * We send this, if there is an established connection we want to close.
180 #define KP_MISC_RESET_CONN 7
183 * KP_SET_MAX_CMSG_DELAY[1] cpacket_ack_fast_delay[4] cpacket_ack_slow_delay[4]
184 * data_ack_lowlatency_delay[4] data_ack_highlatency_delay[4] cmsg_delay[4]
185 * Sent after connecting and at any change
186 * delay in specifies in microsecs
188 #define KP_MISC_SET_MAX_CMSG_DELAY 8
190 #define KP_MISC_SET_MAX_CMSG_DELAY_CMDLEN 21
193 * KP_MISC_SET_RECEIVE_MTU[1] receive_mtu[4]
194 * Sent after connecting and at any change
196 #define KP_MISC_SET_RECEIVE_MTU 9
198 #define KP_MISC_SET_RECEIVE_MTU_CMDLEN 5
202 * KP_ACK_CONN[1] conn_id[4] delay_remaining[1] seqno[6] window[2]
203 * bufsize_changerate[1] seqno_ooo[6]
204 * length[1-4] priority_seqno[0.5] priority[1.5]
206 * conn_id is the conn_id we use if we sent something through this conn and
207 * *not* the conn_id that the neighbor used to send us the data
209 * delay_remaining = time the ack_conn could have remained in the queue
210 * 255 means the ack_conn has been sent immediately
211 * 0 means it has been delayed by as much the delay set by SET_MAX_CMSG_DELAY
213 * seqno = the seqno which is expected in the next non-out-of-order packet
215 * window = amount of data which can be sent without receiving the next ack
216 * packets with lower seqno do not overwrite the last window size
217 * The window may also be reduced. However, this only indicates a wish.
218 * Packets must be accepted if they exceed the new window, but not the old
223 * 1...255 = 64*2^((value-1)/7) end result is rounded down to an integer
225 * bufsize_changerate = if the next router(s) is increasing or decreasing its
227 * 0 = for every byte we can send, the end host will receive 2 bytes
228 * 64 = for every byte we can send, the end host will receive 1 byte
229 * 128 = for every 2 byte we can send, the end host will receive 1 byte
232 * seqno_ooo, length = This packet was received out of order. Maybe a previous
233 * packet has been lost. Out of order data should not be retransmitted.
234 * Multiple ooo packets may be merged into a single ack. Ooo packets may be
235 * partially accepted, so that the length does not cover the full packet and/
236 * or the seqno starts in the middle of a packet
238 #define KP_ACK_CONN_FLAGS_SEQNO 1
239 #define KP_ACK_CONN_FLAGS_WINDOW 2
240 #define KP_ACK_CONN_FLAGS_OOO 12 /* 4+8 */
241 #define KP_ACK_CONN_FLAGS_PRIORITY 16
243 static inline __u8
cor_ooolen_to_flags(__u32 len
)
254 static inline int cor_ooolen(__u8 flags
)
256 int len
= ((flags
& KP_ACK_CONN_FLAGS_OOO
) >> 2);
258 if (unlikely(len
== 3))
263 static inline int cor_ack_conn_len(__u8 flags
)
267 if ((flags
& KP_ACK_CONN_FLAGS_SEQNO
) != 0) {
269 if ((flags
& KP_ACK_CONN_FLAGS_WINDOW
) != 0)
273 if (cor_ooolen(flags
) != 0) {
275 len
+= cor_ooolen(flags
);
278 if ((flags
& KP_ACK_CONN_FLAGS_SEQNO
) != 0 ||
279 cor_ooolen(flags
) != 0)
282 if (flags
& KP_ACK_CONN_FLAGS_PRIORITY
)
288 /* KP_CONN_DATA[1] conn_id[4] seqno[6] length[1-2] data[length] */
289 #define KP_CONN_DATA_FLAGS_WINDOWUSED 31
290 #define KP_CONN_DATA_FLAGS_FLUSH 32
292 #define KP_CONN_DATA_MAXLEN (128+32767)
294 static inline __u32
get_kp_conn_data_length(__u32 datalen
)
302 static inline __u8
get_kp_code(__u8 maj
, __u8 min
)
304 BUILD_BUG_ON(maj
> 3);
306 return (maj
<< 6) + min
;
309 static inline __u8
kp_maj(__u8 code
)
314 static inline __u8
kp_min(__u8 code
)
321 * Connection data which in interpreted when connection has no target yet
322 * These commands are sent by the end node.
325 * cmd[2] length[1-4] parameter[length]
326 * unrecogniced commands are ignored
327 * parameters which are longer than expected are ignored as well
330 #define CD_CONTINUE_ON_ERROR_FLAG 32768
332 /* outgoing connection: CD_CONNECT_NB[2] length[1-4] addr[8] */
333 #define CD_CONNECT_NB 1
335 /* connection to local open part: CD_CONNECT_PORT[2] length[1-4] port[4] */
336 #define CD_CONNECT_PORT 2
339 * list connected neighbors: CD_LIST_NEIGH[2] length[1-4]
340 * responds with CDR_BINDATA if successful
345 * numfields[1-4] (field[2] fieldlen[1-4])[numfields]
346 * rows[responserows]:
347 * fieldlen[1-4], only if fieldlen in the header was "0"
348 * fielddata[fieldlen]
350 * Future versions may append data to field definition. Clients must silently
351 * discard fields they do not expect.
353 #define CD_LIST_NEIGH 3
358 #define LIST_NEIGH_FIELD_ADDR 1
361 * latency_in_microsecs[1] (64_11 encoding)
362 * Only raw network latency in measured. Delays caused by the priority queues
363 * are *not* included.
365 #define LIST_NEIGH_FIELD_LATENCY 2
368 * list services: CD_LIST_SERVICES[2] length[1-4]
369 * responds with CDR_BINDATA if successful
371 #define CD_LIST_SERVICES 4
374 * list services: CD_LIST_SERVICES[2] length[1-4]
375 * responds with CDR_BINDATA if successful
377 #define CD_LIST_L4PROTOCOLS 5
381 * Connection data response
382 * Format is the same as with connection data
391 * CDR_EXEOK_BINDATA[1] bindatalen[1-4] bindata[bindatalen]
393 #define CDR_EXECOK_BINDATA 2
396 * CDR_EXECFAILED[1] reasoncode[2]
398 #define CDR_EXECFAILED 3
399 #define CDR_EXECFAILED_INVALID_COMMAND 1
400 #define CDR_EXECFAILED_COMMAND_PARSE_ERROR 2
401 #define CDR_EXECFAILED_TEMPORARILY_OUT_OF_RESOURCES 3
402 #define CDR_EXECFAILED_NB_DOESNTEXIST 4
403 #define CDR_EXECFAILED_UNKNOWN_L4PROTOCOL 5
404 #define CDR_EXECFAILED_PORTCLOSED 6
406 #define L4PROTO_STREAM 42399
410 * routing daemon sock
412 * cmdcode[4] length[4] cmddata[length]
414 #define CRD_KTU_SUPPORTEDVERSIONS 1
416 * CRD_KTU_SUPPORTEDVERSIONS[4] length[4] min[4] max[4]
419 #define CRD_KTU_CONNECT 2
421 * CRD_KTU_KTOU_CONNECT[4] length[4] cookie[8] targetlen[4] target[targetlen]
424 #define CRD_UTK_VERSION 1
426 * CRD_UTK_VERSION[4] length[4] version[4]
430 #define CRD_UTK_UP_FLAGS_ADDR 1
431 #define CRD_UTK_UP_FLAGS_INTERFACES 2
433 * CRD_UTK_UP[4] length[4] flags[8]
434 * if CRD_UTK_UP_FLAGS_ADDR
436 * if CRD_UTK_UP_FLAGS_INTERFACES:
437 * num_interfaces[4] (length[4] interface[length])[num_interfaces]
441 #define CRD_UTK_CONNECTERROR 3
443 * CRD_UTK_CONNECTERROR[4] length[4] cookie[8] error[4]
446 #define CRD_UTK_CONNECTERROR_ACCES 1
447 #define CRD_UTK_CONNECTERROR_NETUNREACH 2
448 #define CRD_UTK_CONNECTERROR_TIMEDOUT 3
449 #define CRD_UTK_CONNECTERROR_REFUSED 4
451 #define CONN_MNGD_HEADERLEN 2
452 #define CONN_MNGD_MAX_CTRL_DATALEN 8
453 #define CONN_MNGD_CHECKSUMLEN 4
455 #define CONN_MNGD_HASDATA (1 << 15)
456 #define CONN_MNGD_EOF (1 << 0)
457 #define CONN_MNGD_RCVEND (1 << 1)
458 #define CONN_MNGD_KEEPALIVE_REQ (1 << 2)
459 #define CONN_MNGD_KEEPALIVE_RESP (1 << 3)
460 #define CONN_MNGD_DATALEN 4095
462 #define CONN_MNGD_MAX_SEGMENT_SIZE (CONN_MNGD_DATALEN + 1)
465 struct cor_interface_config
{
470 #define CONGSTATUS_NONE 0
471 #define CONGSTATUS_CONNDATA 1
472 #define CONGSTATUS_ANNOUNCE 2
473 #define CONGSTATUS_RETRANS 3
474 #define CONGSTATUS_KPACKETS 4
476 struct cor_qos_queue
{
481 struct list_head queue_list
;
483 struct net_device
*dev
; /* may not change while queue is in list */
485 struct task_struct
*qos_resume_thread
;
486 wait_queue_head_t qos_resume_wq
;
487 atomic_t qos_resume_scheduled
;
488 unsigned long jiffies_lastprogress
;
490 struct list_head kpackets_waiting
;
491 struct list_head conn_retrans_waiting
;
492 struct list_head announce_waiting
;
493 struct list_head neighbors_waiting
;
494 struct list_head neighbors_waiting_nextpass
;
496 unsigned long jiffies_nb_pass_start
;
497 unsigned long jiffies_nb_lastduration
;
500 unsigned long jiffies_lastdrop
;
505 atomic_t cong_status
;
510 * switch to and from RB_INQUEUE_NBCONGWIN is only done with nbcongwin.lock
513 #define RB_INQUEUE_FALSE 0
514 #define RB_INQUEUE_TRUE 1
515 #define RB_INQUEUE_NBCONGWIN 2 /* only for nb->rb */
516 #define RB_INQUEUE_NBNOTACTIVE 3 /* only for nb->rb */
518 struct cor_resume_block
{
523 #define ANNOUNCE_TYPE_BROADCAST 1
524 #define ANNOUNCE_TYPE_UNICAST 2
526 struct cor_announce_data
{
532 struct net_device
*dev
;
533 char mac
[MAX_ADDR_LEN
];
534 struct delayed_work announce_work
;
535 struct cor_resume_block rb
;
538 struct cor_neighbor_discdata
{
540 unsigned long jiffies_created
;
544 struct net_device
*dev
;
545 char mac
[MAX_ADDR_LEN
];
559 struct cor_ping_cookie
{
560 ktime_t time_created
;
562 unsigned long jiffies_sent
;
565 __u8 pongs
; /* count of pongs for pings sent after this one */
568 #define NEIGHBOR_STATE_INITIAL 0
569 #define NEIGHBOR_STATE_ACTIVE 1
570 #define NEIGHBOR_STATE_STALLED 2
571 #define NEIGHBOR_STATE_KILLED 3
573 #define NBCONGWIN_SHIFT 16
574 #define NBCONGWIN_MUL (1 << NBCONGWIN_SHIFT)
576 struct cor_neighbor
{
577 struct list_head nb_list
;
582 struct net_device
*dev
;
583 char mac
[MAX_ADDR_LEN
];
584 struct cor_qos_queue
*queue
;
588 atomic_t sessionid_rcv_needed
;
589 atomic_t sessionid_snd_needed
;
594 atomic64_t cmsg_timer_timeout
;
595 struct timer_list cmsg_timer
;
596 spinlock_t cmsg_lock
;
597 struct list_head cmsg_queue_pong
;
598 struct list_head cmsg_queue_ack_fast
;
599 struct list_head cmsg_queue_ack_slow
;
600 struct list_head cmsg_queue_ackconn_urgent
;
601 struct list_head cmsg_queue_ackconn_lowlat
;
602 struct list_head cmsg_queue_ackconn_highlat
;
603 struct list_head cmsg_queue_conndata_lowlat
;
604 struct list_head cmsg_queue_conndata_highlat
;
605 struct list_head cmsg_queue_other
;
606 __u8 add_retrans_needed
;
607 __u64 kpacket_seqno
; /* not locked, only accessed by single tasklet */
609 struct rb_root pending_conn_resets_rb
;
611 __u32 cmsg_pongslength
;
612 __u32 cmsg_otherlength
;
614 __u32 cmsg_pongscnt
; /* size of queue only, protected by cmsg_lock */
615 atomic_t cmsg_pongs_retrans_cnt
; /* number of retransmits only */
616 atomic_t cmsg_othercnt
; /* size of queue + retransmits */
618 atomic_t cmsg_bulk_readds
;
620 atomic_t cmsg_delay_conndata
;
622 /* not locked, only accessed by single thread */
623 __u8 max_cmsg_delay_sent
;
625 atomic_t rcvmtu_sendneeded
;
628 /* procected by cor_qos_queue->qlock */
629 struct cor_resume_block rb_kp
;
630 struct cor_resume_block rb_cr
;
631 struct cor_resume_block rb
;
632 unsigned long cmsg_send_start_j
;
633 ktime_t cmsg_send_start_kt
;
638 struct list_head lh_nextpass
;
645 atomic64_t data_intransit
;
650 spinlock_t state_lock
;
651 unsigned long last_ping_time
;
652 struct cor_ping_cookie cookies
[PING_COOKIES_PER_NEIGH
];
653 __u32 ping_intransit
;
656 __u64 latency_variance_retrans_us
; /* microsecs */
657 atomic_t latency_retrans_us
; /* microsecs */
658 atomic_t latency_stddev_retrans_us
; /* microsecs */
659 atomic_t latency_advertised_us
; /* microsecs */
660 __u8 rcvmtu_delayed_send_needed
:1,
661 rcvmtu_allowed_countdown
:2;
663 atomic_t max_remote_ack_fast_delay_us
; /* microsecs */
664 atomic_t max_remote_ack_slow_delay_us
; /* microsecs */
665 atomic_t max_remote_ackconn_lowlat_delay_us
; /* microsecs */
666 atomic_t max_remote_ackconn_highlat_delay_us
; /* microsecs */
667 atomic_t max_remote_pong_delay_us
; /* microsecs */
669 atomic_t remote_rcvmtu
;
672 unsigned long initial_state_since
;/* initial state */
675 * time of the last sent packet which has been acked or
676 * otherwise responded to (e.g. pong)
678 unsigned long last_roundtrip
;/* active/stalled state */
680 ktime_t last_roundtrip_end
;
684 __u8 str_timer_pending
;
685 struct delayed_work stalltimeout_timer
;
687 spinlock_t connid_lock
;
688 struct rb_root connid_rb
;
690 spinlock_t connid_reuse_lock
;
691 struct rb_root connid_reuse_rb
;
692 struct list_head connid_reuse_list
;
693 __u16 connid_reuse_pingcnt
;
694 __u8 connid_reuse_oom_countdown
;
696 atomic64_t priority_sum
;
699 * connecions which receive data from/send data to this node
700 * used when terminating all connections of a neighbor and terminating
701 * inactive connections
703 spinlock_t conn_list_lock
;
704 struct list_head snd_conn_idle_list
;
705 struct list_head snd_conn_busy_list
;
708 * the timer has to be inited when adding the neighbor
710 * add_timer(struct timer_list * timer);
712 spinlock_t retrans_lock
;
713 struct timer_list retrans_timer
;
714 struct list_head retrans_fast_list
;
715 struct list_head retrans_slow_list
;
716 struct rb_root kp_retransmits_rb
;
718 spinlock_t retrans_conn_lock
;
719 struct timer_list retrans_conn_timer
;
720 struct list_head retrans_conn_lowlatency_list
;
721 struct list_head retrans_conn_highlatency_list
;
723 struct work_struct reset_neigh_work
;
726 static inline void cor_nb_kref_get(struct cor_neighbor
*nb
, char *reason
)
728 /* printk(KERN_ERR "cor_nb_kref_get %p %s\n", nb, reason); */
732 extern void cor_neighbor_free(struct kref
*ref
); /* neigh.c */
734 static inline void cor_nb_kref_put(struct cor_neighbor
*nb
, char *reason
)
736 /* printk(KERN_ERR "cor_nb_kref_put %p %s\n", nb, reason); */
737 kref_put(&nb
->ref
, cor_neighbor_free
);
740 extern void cor_kreffree_bug(struct kref
*ref
); /* util.c */
742 static inline void cor_nb_kref_put_bug(struct cor_neighbor
*nb
, char *reason
)
744 /* printk(KERN_ERR "cor_nb_kref_put_bug %p %s\n", nb, reason); */
745 kref_put(&nb
->ref
, cor_kreffree_bug
);
749 #define DATABUF_BUF 0
750 #define DATABUF_SKB 1
752 struct cor_data_buf_item
{
753 struct list_head buf_list
;
762 struct cor_connid_reuse_item
{
772 #define SNDSPEED_INIT 0
773 #define SNDSPEED_ACTIVE 1
774 struct cor_snd_speed
{
777 unsigned long jiffies_last_refresh
;
780 /* bytes per second */
787 /* This struct helps keep struct cor_conn small. */
788 struct cor_conn_src_sock_extradata
{
793 struct cor_snd_speed snd_speed
;
795 __be32 keepalive_req_cookie
;
796 __be32 keepalive_resp_cookie
;
798 * keepalive_intransit == 0... last resp received
799 * keepalive_intransit == 1... req sent
801 unsigned long jiffies_keepalive_lastact
;
804 char snd_hdr
[CONN_MNGD_HEADERLEN
];
805 char snd_data
[CONN_MNGD_MAX_CTRL_DATALEN
];
806 char snd_chksum
[CONN_MNGD_CHECKSUMLEN
];
811 char snd_hdr
[CONN_MNGD_HEADERLEN
];
812 char snd_chksum
[CONN_MNGD_CHECKSUMLEN
];
821 * There are 2 conn objects per bi-directional connection. They refer to each
822 * other with in the reversedir field.
828 * cn: conn we do not know what is inside
829 * src_in, trgt_unconn, trgt_out, ...: A conn with the specified source or
830 * targettype. In the unlocked case the types are only a guess, because they
831 * might have changed since the last access. After locking the
832 * source/destination parameters have to be checked whether they still are what
833 * we expect. This includes source/targettype, neighbor, conn_id
837 * no suffix: unlocked
839 * _l: this direction is locked
841 * _ll: both directions are locked
843 * _lx: this direction is locked, the other direction may be locked
845 * _o: unlocked, but source or target is known for sure, because an outside
846 * lock is taken; For variables on the heap this means that an outside lock must
847 * be taken before accessing the struct which points to the conn can be
851 * Most fields are protected by rcv_lock. Fields which which control
852 * source and destination of the data flow require both directions to
853 * to be locked and external references to be cleared before the change can
854 * happen. This includes fields like sourcetype, targettype, connid,
855 * list_heads, ???. In this case the side with is_client == 1 needs to be locked
858 * Some other fields are locked outside (e.g. at struct neighbor).
860 #define SOURCE_UNCONNECTED 0
862 #define SOURCE_SOCK 2
864 #define TARGET_UNCONNECTED 0
866 #define TARGET_SOCK 2
867 #define TARGET_DISCARD 3
869 #define BUFSIZE_NOACTION 0
870 #define BUFSIZE_DECR 1
871 #define BUFSIZE_DECR_FAST 2
872 #define BUFSIZE_INCR 3
873 #define BUFSIZE_INCR_FAST 4
875 #define JIFFIES_LAST_IDLE_SHIFT 8
876 #define BUFSIZE_SHIFT 5
878 #define SOCKTYPE_RAW 0
879 #define SOCKTYPE_MANAGED 1
881 #define RCV_BUF_STATE_OK 0
882 #define RCV_BUF_STATE_INCOMPLETE 1
883 #define RCV_BUF_STATE_RESET 2
885 #define SND_BUF_STATE_INCOMPLETE 0
886 #define SND_BUF_STATE_FILLED 1
893 __u8 is_client
; /* immutable after allocated */
897 * 0... connection active
898 * 1... connection is about to be reset, target does not need to be
900 * 2... connection is reset
911 struct cor_neighbor
*nb
;
913 struct list_head reorder_queue
;
914 __u32 reorder_memused
;
920 /* number of ack sent, not data seqno */
923 __u16 small_ooo_packets
;
927 __u8 inorder_ack_needed
;
931 __u64 window_seqnolimit
;
932 __u64 window_seqnolimit_remote
;
934 /* protected by nb->cmsg_lock */
935 struct list_head acks_pending
;
939 struct cor_conn_src_sock_extradata
*ed
;
942 * cl_list and in_cl_list is protected by cor_bindnodes
944 struct list_head cl_list
;
948 * keepalive_lh and in_keepalive_list is protected by
949 * cor_keepalive_req_lock
951 struct timer_list keepalive_timer
;
952 struct list_head keepalive_lh
;
954 __u8 in_keepalive_list
;
956 /* protected by flushtoconn_oom_lock */
957 struct list_head flushtoconn_oom_lh
;
958 /* protected by conn->rcv_lock */
959 __u8 in_flushtoconn_oom_list
;
962 __u8 keepalive_intransit
:1,
965 send_keepalive_req_needed
:1,
966 send_keepalive_resp_needed
:1,
968 send_rcvend_needed
:1;
970 __u8 last_windowused
;
983 char paramlen_buf
[4];
987 struct cor_neighbor
*nb
;
989 /* list of all connections to this neighbor */
990 struct list_head nb_list
;
991 unsigned long jiffies_last_act
;
992 __u32 nblist_busy_remaining
;
995 __u64 seqno_nextsend
;
997 __u64 seqno_windowlimit
;
999 /* protected by nb->retrans_conn_lock, sorted by seqno
1001 struct list_head retrans_list
;
1003 struct cor_resume_block rb
;
1006 unsigned long jiffies_idle_since
;
1009 __u16 maxsend_extra
;
1013 __u8 lastsend_windowused
;
1015 __u8 remote_bufsize_changerate
;
1017 __u8 priority_send_allowed
:1,
1021 __u16 priority_last
:12,
1024 /* protected by nb->retrans_conn_lock */
1025 __u16 retrans_lowwindow
;
1029 __u8 waiting_for_userspace
;
1030 unsigned long waiting_for_userspace_since
;
1032 struct cor_sock
*cs
;
1037 char rcv_hdr
[CONN_MNGD_HEADERLEN
];
1038 char rcv_chksum
[CONN_MNGD_CHECKSUMLEN
];
1041 __u16 rcv_hdr_flags
;
1047 struct list_head items
;
1048 struct cor_data_buf_item
*nextread
;
1053 __u32 read_remaining
;
1055 __u16 next_read_offset
;
1058 __u32 bufspace_accounted
;
1061 __u32 bufsize
; /* 32 ==> 1 byte, see BUFSIZE_SHIFT */
1062 __u32 ignore_rcv_lowbuf
;
1083 static inline __u32
cor_get_connid_reverse(__u32 conn_id
)
1085 return conn_id
^ (1 << 31);
1088 struct cor_conn_bidir
{
1089 struct cor_conn cli
;
1090 struct cor_conn srv
;
1095 static inline struct cor_conn_bidir
* cor_get_conn_bidir(struct cor_conn
*cn
)
1098 return container_of(cn
, struct cor_conn_bidir
, cli
);
1100 return container_of(cn
, struct cor_conn_bidir
, srv
);
1103 static inline struct cor_conn
* cor_get_conn_reversedir(struct cor_conn
*cn
)
1105 if (cn
->is_client
) {
1106 struct cor_conn_bidir
*cnb
= container_of(cn
,
1107 struct cor_conn_bidir
, cli
);
1110 struct cor_conn_bidir
*cnb
= container_of(cn
,
1111 struct cor_conn_bidir
, srv
);
1116 static inline void cor_conn_kref_get(struct cor_conn
*cn
, char *reason
)
1118 /* printk(KERN_ERR "cor_conn_kref_get %p %s\n", cn, reason); */
1119 kref_get(&cor_get_conn_bidir(cn
)->ref
);
1122 extern void cor_free_conn(struct kref
*ref
); /* conn.c */
1124 static inline void cor_conn_kref_put(struct cor_conn
*cn
, char *reason
)
1126 /* printk(KERN_ERR "cor_conn_kref_put %p %s\n", cn, reason); */
1127 kref_put(&cor_get_conn_bidir(cn
)->ref
, cor_free_conn
);
1130 static inline void cor_conn_kref_put_bug(struct cor_conn
*cn
, char *reason
)
1132 /* printk(KERN_ERR "cor_conn_kref_put_bug %p %s\n", cn, reason); */
1133 kref_put(&cor_get_conn_bidir(cn
)->ref
, cor_kreffree_bug
);
1138 #define CONN_RETRANS_INITIAL 0
1139 #define CONN_RETRANS_SCHEDULED 1
1140 #define CONN_RETRANS_LOWWINDOW 2
1141 #define CONN_RETRANS_SENDING 3
1142 #define CONN_RETRANS_ACKED 4
1143 struct cor_conn_retrans
{
1144 /* timeout_list and conn_list share a single ref */
1146 /* only in timeout_list if state == CONN_RETRANS_SCHEDULED */
1147 struct list_head timeout_list
;
1148 struct list_head conn_list
;
1149 struct cor_conn
*trgt_out_o
;
1155 unsigned long timeout
;
1158 #define RCVOOO_BUF 0
1159 #define RCVOOO_SKB 1
1161 struct list_head lh
;
1168 struct cor_rcvooo_buf
{
1169 struct cor_rcvooo r
;
1174 /* inside skb->cb */
1175 struct cor_skb_procstate
{
1178 struct work_struct work
;
1187 struct cor_rcvooo r
;
1191 struct cor_data_buf_item dbi
;
1196 #define CS_TYPE_UNCONNECTED 0
1197 #define CS_TYPE_LISTENER 1
1198 #define CS_TYPE_CONN_RAW 2
1199 #define CS_TYPE_CONN_MANAGED 3
1201 #define CS_CONNECTSTATE_UNCONNECTED 0
1202 #define CS_CONNECTSTATE_CONNECTING 1
1203 #define CS_CONNECTSTATE_CONNECTED 2
1204 #define CS_CONNECTSTATE_ERROR 3
1207 struct sock sk
; /* must be first */
1212 /* type may not change once it is set to != CS_TYPE_UNCONNECTED */
1216 __u8 publish_service
;
1218 __u8 is_highlatency
;
1224 /* listener is protected by cor_bindnodes */
1225 struct list_head lh
;
1227 __u8 publish_service
;
1230 struct list_head conn_queue
;
1234 struct cor_conn
*src_sock
;
1235 struct cor_conn
*trgt_sock
;
1237 struct cor_data_buf_item
*rcvitem
;
1240 struct cor_sock
*pass_on_close
;
1244 struct cor_sockaddr remoteaddr
;
1246 struct list_head rd_msgs
;
1247 struct list_head crd_lh
;
1266 struct cor_conn
*src_sock
;
1267 struct cor_conn
*trgt_sock
;
1271 __u16 snd_segment_size
;
1273 __u8 send_in_progress
;
1278 __u16 rcvbuf_consumed
;
1283 struct work_struct readfromconn_work
;
1284 atomic_t readfromconn_work_scheduled
;
1286 atomic_t ready_to_read
;
1287 atomic_t ready_to_write
;
1288 atomic_t ready_to_accept
;
1291 #define ACK_NEEDED_NO 0
1292 #define ACK_NEEDED_SLOW 1
1293 #define ACK_NEEDED_FAST 2
1296 extern spinlock_t cor_local_addr_lock
;
1297 extern __u8 cor_local_has_addr
;
1298 extern __be64 cor_local_addr
;
1299 extern __be32 cor_local_addr_sessionid
;
1301 extern int cor_is_device_configurated(struct net_device
*dev
);
1303 extern void cor_set_interface_config(struct cor_interface_config
*new_config
,
1304 __u32 new_num_interfaces
, int new_all_interfaces
);
1306 extern void cor_config_down(void);
1308 extern int cor_config_up(__u8 has_addr
, __be64 addr
);
1310 extern int cor_is_clientmode(void);
1313 extern void cor_qos_set_lastdrop(struct cor_qos_queue
*q
);
1315 #ifdef DEBUG_QOS_SLOWSEND
1316 extern int _cor_dev_queue_xmit(struct sk_buff
*skb
, int caller
);
1318 static inline int _cor_dev_queue_xmit(struct sk_buff
*skb
, int caller
)
1320 return dev_queue_xmit(skb
);
1324 static inline int cor_dev_queue_xmit(struct sk_buff
*skb
,
1325 struct cor_qos_queue
*q
, int caller
)
1327 int rc
= _cor_dev_queue_xmit(skb
, caller
);
1329 if (unlikely(rc
!= NET_XMIT_SUCCESS
))
1330 cor_qos_set_lastdrop(q
);
1334 extern void cor_free_qos(struct kref
*ref
);
1336 #ifdef COR_NBCONGWIN
1337 extern void cor_nbcongwin_data_retransmitted(struct cor_neighbor
*nb
,
1340 extern void cor_nbcongwin_data_acked(struct cor_neighbor
*nb
,
1343 extern void cor_nbcongwin_data_sent(struct cor_neighbor
*nb
, __u32 bytes_sent
);
1345 extern int cor_nbcongwin_send_allowed(struct cor_neighbor
*nb
);
1349 static inline void cor_nbcongwin_data_retransmitted(struct cor_neighbor
*nb
,
1354 static inline void cor_nbcongwin_data_acked(struct cor_neighbor
*nb
,
1359 static inline void cor_nbcongwin_data_sent(struct cor_neighbor
*nb
,
1364 static inline int cor_nbcongwin_send_allowed(struct cor_neighbor
*nb
)
1370 extern unsigned long cor_get_conn_idletime(struct cor_conn
*trgt_out_l
);
1372 extern struct cor_qos_queue
*cor_get_queue(struct net_device
*dev
);
1374 extern int cor_destroy_queue(struct net_device
*dev
);
1376 extern int cor_create_queue(struct net_device
*dev
);
1378 #define QOS_RESUME_DONE 0
1379 #define QOS_RESUME_CONG 1
1380 #define QOS_RESUME_NEXTNEIGHBOR 2 /* cor_resume_neighbors() internal */
1381 #define QOS_RESUME_EXIT 3
1383 #define QOS_CALLER_KPACKET 0
1384 #define QOS_CALLER_CONN_RETRANS 1
1385 #define QOS_CALLER_ANNOUNCE 2
1386 #define QOS_CALLER_NEIGHBOR 3
1388 static inline void cor_schedule_qos_resume(struct cor_qos_queue
*q
)
1390 if (atomic_cmpxchg(&q
->qos_resume_scheduled
, 0, 1) == 0) {
1392 wake_up(&q
->qos_resume_wq
);
1396 extern void cor_qos_enqueue(struct cor_qos_queue
*q
,
1397 struct cor_resume_block
*rb
, unsigned long cmsg_send_start_j
,
1398 ktime_t cmsg_send_start_kt
, int caller
,
1399 int from_nbnotactive_resume
);
1401 extern void cor_qos_remove_conn(struct cor_conn
*trgt_out_l
);
1403 extern int cor_may_send_announce(struct net_device
*dev
);
1405 extern struct sk_buff
*cor_create_packet_cmsg(struct cor_neighbor
*nb
, int size
,
1406 gfp_t alloc_flags
, __u64 seqno
);
1408 extern struct sk_buff
*cor_create_packet(struct cor_neighbor
*nb
, int size
,
1411 extern struct sk_buff
*cor_create_packet_conndata(struct cor_neighbor
*nb
,
1412 int size
, gfp_t alloc_flags
, __u32 conn_id
, __u64 seqno
,
1413 __u8 windowused
, __u8 flush
);
1415 extern void cor_qos_enqueue_conn(struct cor_conn
*trgt_out_lx
);
1417 extern void cor_dev_down(void);
1419 extern int cor_dev_up(void);
1421 extern void __exit
cor_dev_exit1(void);
1423 extern int __init
cor_dev_init(void);
1426 static inline __u16
cor_enc_priority(__u32 value
)
1432 while ((value
>> exponent
) > 255) {
1435 BUG_ON(exponent
> 15);
1437 mantissa
= (value
>> exponent
);
1438 ret
= (mantissa
<< 4) | exponent
;
1443 static inline __u32
cor_dec_priority(__u16 priority
)
1445 __u32 mantissa
= (__u32
) (priority
>> 4);
1446 __u16 exponent
= (priority
& 15);
1448 BUG_ON(priority
> 4095);
1449 return (mantissa
<< exponent
);
1452 static inline __u32
cor_priority_max(void)
1454 return cor_dec_priority(4095);
1457 static inline __u16
cor_enc_window(__u64 value
)
1463 while ((value
>> exponent
) > 2047) {
1466 if (unlikely(exponent
> 31))
1469 mantissa
= (value
>> exponent
);
1470 ret
= (mantissa
<< 5) | exponent
;
1471 BUG_ON(ret
> 65535);
1475 static inline __u32
cor_dec_window(__u16 value
)
1477 __u64 mantissa
= (__u64
) (value
>> 5);
1478 __u16 exponent
= (value
& 31);
1480 return (mantissa
<< exponent
);
1483 extern __u8
__attribute__((const)) cor_enc_log_64_11(__u32 value
);
1485 extern __u32
__attribute__((const)) cor_dec_log_64_11(__u8 value
);
1487 extern void cor_swap_list_items(struct list_head
*lh1
, struct list_head
*lh2
);
1489 extern int __init
cor_util_init(void);
1494 extern atomic_t cor_num_neighs
;
1496 extern int cor_is_from_nb(struct sk_buff
*skb
, struct cor_neighbor
*nb
);
1498 extern struct cor_neighbor
*_cor_get_neigh_by_mac(struct net_device
*dev
,
1501 extern struct cor_neighbor
*cor_get_neigh_by_mac(struct sk_buff
*skb
);
1503 extern struct cor_neighbor
*cor_find_neigh(__be64 addr
);
1505 extern void cor_resend_rcvmtu(struct net_device
*dev
);
1507 extern __u32
cor_generate_neigh_list(char *buf
, __u32 buflen
);
1509 extern void cor_reset_neighbors(struct net_device
*dev
);
1511 extern int cor_get_neigh_state(struct cor_neighbor
*nb
);
1513 extern void cor_ping_resp(struct cor_neighbor
*nb
, __u32 cookie
,
1516 extern __u32
cor_add_ping_req(struct cor_neighbor
*nb
,
1517 unsigned long *last_ping_time
);
1519 extern void cor_ping_sent(struct cor_neighbor
*nb
, __u32 cookie
);
1521 extern void cor_unadd_ping_req(struct cor_neighbor
*nb
, __u32 cookie
,
1522 unsigned long last_ping_time
, int congested
);
1524 #define TIMETOSENDPING_NO 0
1525 #define TIMETOSENDPING_YES 1
1526 #define TIMETOSENDPING_FORCE 2
1527 extern int cor_time_to_send_ping(struct cor_neighbor
*nb
);
1529 extern unsigned long cor_get_next_ping_time(struct cor_neighbor
*nb
);
1531 extern void cor_add_neighbor(struct cor_neighbor_discdata
*nb_dd
);
1533 extern struct cor_conn
*cor_get_conn(struct cor_neighbor
*nb
, __u32 conn_id
);
1535 extern int cor_insert_connid(struct cor_neighbor
*nb
,
1536 struct cor_conn
*src_in_ll
);
1538 extern void cor_insert_connid_reuse(struct cor_neighbor
*nb
, __u32 conn_id
);
1540 extern int cor_connid_alloc(struct cor_neighbor
*nb
,
1541 struct cor_conn
*src_in_ll
);
1543 extern int __init
cor_neighbor_init(void);
1545 extern void __exit
cor_neighbor_exit2(void);
1547 /* neigh_ann_rcv.c */
1548 extern int cor_rcv_announce(struct sk_buff
*skb
);
1550 extern int __init
cor_neigh_ann_rcv_init(void);
1552 extern void __exit
cor_neigh_ann_rcv_exit2(void);
1554 /* neigh_ann_snd.c */
1555 extern int _cor_send_announce(struct cor_announce_data
*ann
, int fromqos
,
1558 extern void cor_announce_data_free(struct kref
*ref
);
1560 extern void cor_announce_send_start(struct net_device
*dev
, char *mac
,
1563 extern void cor_announce_send_stop(struct net_device
*dev
, char *mac
, int type
);
1566 extern void cor_kernel_packet(struct cor_neighbor
*nb
, struct sk_buff
*skb
,
1570 struct cor_control_msg_out
;
1572 #define ACM_PRIORITY_LOW 1 /* oom recovery easy */
1573 #define ACM_PRIORITY_MED 2 /* oom may cause timeouts */
1574 #define ACM_PRIORITY_HIGH 3 /* cm acks - needed for freeing old cms */
1576 extern struct cor_control_msg_out
*cor_alloc_control_msg(
1577 struct cor_neighbor
*nb
, int priority
);
1579 extern void cor_free_control_msg(struct cor_control_msg_out
*cm
);
1581 extern void cor_retransmit_timerfunc(struct timer_list
*retrans_timer
);
1583 extern void cor_kern_ack_rcvd(struct cor_neighbor
*nb
, __u64 seqno
);
1585 extern int cor_send_messages(struct cor_neighbor
*nb
,
1586 unsigned long cmsg_send_start_j
, ktime_t cmsg_send_start_kt
,
1589 extern void cor_controlmsg_timerfunc(struct timer_list
*cmsg_timer
);
1591 extern void cor_schedule_controlmsg_timer(struct cor_neighbor
*nb_cmsglocked
);
1593 extern void cor_send_rcvmtu(struct cor_neighbor
*nb
);
1595 extern void cor_send_pong(struct cor_neighbor
*nb
, __u32 cookie
,
1596 ktime_t ping_rcvtime
);
1598 extern int cor_send_reset_conn(struct cor_neighbor
*nb
, __u32 conn_id
,
1601 extern void cor_send_ack(struct cor_neighbor
*nb
, __u64 seqno
, __u8 fast
);
1603 extern void cor_send_ack_conn_ifneeded(struct cor_conn
*src_in_l
,
1604 __u64 seqno_ooo
, __u32 ooo_length
);
1606 extern void cor_send_priority(struct cor_conn
*trgt_out_ll
, __u16 priority
);
1608 extern void cor_free_ack_conns(struct cor_conn
*src_in_lx
);
1610 extern void cor_send_connect_success(struct cor_control_msg_out
*cm
,
1611 __u32 conn_id
, struct cor_conn
*src_in
);
1613 extern void cor_send_connect_nb(struct cor_control_msg_out
*cm
, __u32 conn_id
,
1614 __u64 seqno1
, __u64 seqno2
, struct cor_conn
*src_in_ll
);
1616 extern void cor_send_conndata(struct cor_control_msg_out
*cm
, __u32 conn_id
,
1617 __u64 seqno
, char *data_orig
, char *data
, __u32 datalen
,
1618 __u8 windowused
, __u8 flush
, __u8 highlatency
,
1619 struct cor_conn_retrans
*cr
);
1621 extern int __init
cor_kgen_init(void);
1623 extern void __exit
cor_kgen_exit2(void);
1626 extern struct kmem_cache
*cor_connid_reuse_slab
;
1628 extern atomic_t cor_num_conns
;
1630 extern spinlock_t cor_bindnodes
;
1632 extern int cor_new_incoming_conn_allowed(struct cor_neighbor
*nb
);
1634 extern __u32
_cor_conn_refresh_priority(struct cor_conn
*cn_lx
);
1636 extern __u32
cor_conn_refresh_priority(struct cor_conn
*cn
, int locked
);
1638 extern void cor_set_conn_in_priority(struct cor_neighbor
*nb
, __u32 conn_id
,
1639 struct cor_conn
*src_in
, __u8 priority_seqno
, __u16 priority
);
1641 extern void cor_conn_set_last_act(struct cor_conn
*trgt_out_lx
);
1643 extern int cor_conn_init_out(struct cor_conn
*trgt_unconn_ll
,
1644 struct cor_neighbor
*nb
, __u32 rcvd_connid
,
1645 int use_rcvd_connid
);
1647 extern int cor_conn_init_sock_source(struct cor_conn
*cn
);
1649 extern void cor_conn_init_sock_target(struct cor_conn
*cn
);
1651 extern __u32
cor_list_services(char *buf
, __u32 buflen
);
1653 extern void cor_set_publish_service(struct cor_sock
*cs
, __u8 value
);
1655 extern void cor_close_port(struct cor_sock
*cs
);
1657 extern int cor_open_port(struct cor_sock
*cs_l
, __be32 port
);
1659 #define CONNECT_PORT_OK 0
1660 #define CONNECT_PORT_PORTCLOSED 1
1661 #define CONNECT_PORT_TEMPORARILY_OUT_OF_RESOURCES 2
1663 extern int cor_connect_port(struct cor_conn
*trgt_unconn_ll
, __be32 port
);
1665 extern int cor_connect_neigh(struct cor_conn
*trgt_unconn_ll
, __be64 addr
);
1667 extern struct cor_conn_bidir
* cor_alloc_conn(gfp_t allocflags
,
1668 __u8 is_highlatency
);
1670 extern void cor_reset_conn_locked(struct cor_conn_bidir
*cnb_ll
);
1672 extern void cor_reset_conn(struct cor_conn
*cn
);
1675 extern void cor_reset_ooo_queue(struct cor_conn
*src_in_lx
);
1677 extern void cor_drain_ooo_queue(struct cor_conn
*src_in_l
);
1679 extern void cor_conn_rcv(struct cor_neighbor
*nb
,
1680 struct sk_buff
*skb
, char *data
, __u32 len
,
1681 __u32 conn_id
, __u64 seqno
,
1682 __u8 windowused
, __u8 flush
);
1684 extern int __init
cor_rcv_init(void);
1686 extern void __exit
cor_rcv_exit2(void);
1688 /* conn_src_sock.c */
1689 extern void cor_update_src_sock_sndspeed(struct cor_conn
*src_sock_l
,
1692 extern int cor_sock_sndbufavailable(struct cor_conn
*src_sock_lx
,
1696 #define RC_FTC_OOM 1
1697 #define RC_FTC_ERR 2
1698 extern int _cor_mngdsocket_flushtoconn(struct cor_conn
*src_sock_l
);
1700 extern int cor_mngdsocket_flushtoconn_ctrl(struct cor_sock
*cs_m_l
,
1701 __u8 send_eof
, __u8 send_rcvend
,
1702 __u8 send_keepalive_resp
, __be32 keepalive_resp_cookie
);
1704 extern int cor_mngdsocket_flushtoconn_data(struct cor_sock
*cs_m_l
);
1706 extern void cor_keepalive_req_timerfunc(struct timer_list
*retrans_conn_timer
);
1708 extern void cor_keepalive_req_sched_timer(struct cor_conn
*src_sock_lx
);
1710 extern void cor_keepalive_resp_rcvd(struct cor_sock
*cs_m_l
, __be32 cookie
);
1712 extern int __init
cor_conn_src_sock_init1(void);
1714 extern void __exit
cor_conn_src_sock_exit1(void);
1716 /* conn_trgt_unconn.c */
1717 extern int cor_encode_len(char *buf
, int buflen
, __u32 len
);
1719 extern void cor_proc_cpacket(struct cor_conn
*trgt_unconn
);
1721 /* conn_trgt_out.c */
1722 extern void cor_free_connretrans(struct kref
*ref
);
1724 extern void cor_reschedule_conn_retrans_timer(
1725 struct cor_neighbor
*nb_retranslocked
);
1727 extern void cor_cancel_all_conn_retrans(struct cor_conn
*trgt_out_l
);
1729 extern int cor_send_retrans(struct cor_neighbor
*nb
, int *sent
);
1731 extern void cor_retransmit_conn_timerfunc(
1732 struct timer_list
*retrans_timer_conn
);
1734 extern void cor_conn_ack_ooo_rcvd(struct cor_neighbor
*nb
, __u32 conn_id
,
1735 struct cor_conn
*trgt_out
, __u64 seqno_ooo
, __u32 length
,
1736 __u64
*bytes_acked
);
1738 extern void cor_conn_ack_rcvd(struct cor_neighbor
*nb
, __u32 conn_id
,
1739 struct cor_conn
*trgt_out
, __u64 seqno
, int setwindow
,
1740 __u16 window
, __u8 bufsize_changerate
, __u64
*bytes_acked
);
1742 extern void cor_schedule_retransmit_conn(struct cor_conn_retrans
*cr
,
1743 int connlocked
, int nbretrans_locked
);
1745 extern int cor_srcin_buflimit_reached(struct cor_conn
*src_in_lx
);
1747 /* RC_FLUSH_CONN_OUT_SENT | RC_FLUSH_CONN_OUT_{^SENT} */
1748 #define RC_FLUSH_CONN_OUT_OK 1
1749 #define RC_FLUSH_CONN_OUT_SENT_CONG 2 /* cor_flush_out internal only */
1750 #define RC_FLUSH_CONN_OUT_NBNOTACTIVE 3
1751 #define RC_FLUSH_CONN_OUT_CONG 4
1752 #define RC_FLUSH_CONN_OUT_MAXSENT 5
1753 #define RC_FLUSH_CONN_OUT_OOM 6
1755 extern int _cor_flush_out(struct cor_conn
*trgt_out_lx
, __u32 maxsend
,
1756 __u32
*sent
, int from_qos
, int maxsend_forcedelay
);
1758 static inline int cor_flush_out(struct cor_conn
*trgt_out_lx
, __u32
*sent
)
1760 int rc
= _cor_flush_out(trgt_out_lx
, 1 << 30, sent
, 0, 0);
1762 if (rc
== RC_FLUSH_CONN_OUT_CONG
|| rc
== RC_FLUSH_CONN_OUT_MAXSENT
||
1763 rc
== RC_FLUSH_CONN_OUT_OOM
||
1764 rc
== RC_FLUSH_CONN_OUT_NBNOTACTIVE
)
1765 cor_qos_enqueue_conn(trgt_out_lx
);
1770 extern int __init
cor_snd_init(void);
1772 extern void __exit
cor_snd_exit2(void);
1774 /* conn_trgt_sock.c */
1775 extern void cor_flush_sock_managed(struct cor_conn
*trgt_sock_lx
,
1776 int from_recvmsg
, __u8
*do_wake_sender
);
1778 extern void cor_flush_sock(struct cor_conn
*trgt_sock_lx
);
1780 /* conn_databuf.c */
1781 extern struct kmem_cache
*cor_data_buf_item_slab
;
1783 extern void cor_databuf_init(struct cor_conn
*cn_init
);
1785 extern void cor_bufsize_init(struct cor_conn
*cn_l
, __u32 bufsize
);
1787 extern int cor_account_bufspace(struct cor_conn
*cn_lx
);
1789 extern int cor_conn_src_unconn_write_allowed(struct cor_conn
*src_unconn_lx
);
1791 extern void cor_update_windowlimit(struct cor_conn
*src_in_lx
);
1793 extern __u8
_cor_bufsize_update_get_changerate(struct cor_conn
*cn_lx
);
1795 static inline int cor_bufsize_initial_phase(struct cor_conn
*cn_lx
)
1797 return unlikely(cn_lx
->bufsize
.bytes_rcvd
!= (1 << 24) - 1 &&
1798 cn_lx
->bufsize
.bytes_rcvd
< cn_lx
->bufsize
.bufsize
);
1801 static inline int cor_ackconn_urgent(struct cor_conn
*cn_lx
)
1803 return cor_bufsize_initial_phase(cn_lx
) ||
1804 cn_lx
->bufsize
.state
== BUFSIZE_INCR_FAST
;
1807 extern void cor_bufsize_read_to_sock(struct cor_conn
*trgt_sock_lx
);
1809 extern void cor_databuf_ackdiscard(struct cor_conn
*cn_lx
);
1811 extern void cor_reset_seqno(struct cor_conn
*cn_l
, __u64 initseqno
);
1813 extern void cor_databuf_pull(struct cor_conn
*cn_lx
, char *dst
, __u32 len
);
1815 static inline __u32
cor_databuf_trypull(struct cor_conn
*cn_l
, char *dst
,
1818 if (len
> cn_l
->data_buf
.read_remaining
)
1819 len
= cn_l
->data_buf
.read_remaining
;
1820 cor_databuf_pull(cn_l
, dst
, len
);
1824 extern void cor_databuf_unpull_dpi(struct cor_conn
*trgt_sock
,
1825 struct cor_sock
*cs
, struct cor_data_buf_item
*item
,
1826 __u16 next_read_offset
);
1828 extern void cor_databuf_pull_dbi(struct cor_sock
*cs_rl
,
1829 struct cor_conn
*trgt_sock_l
);
1831 extern void cor_databuf_unpull(struct cor_conn
*trgt_out_l
, __u32 bytes
);
1833 extern void cor_databuf_pullold(struct cor_conn
*trgt_out_l
, __u64 startpos
,
1834 char *dst
, int len
);
1836 extern void cor_databuf_ack(struct cor_conn
*trgt_out_l
, __u64 pos
);
1838 extern void cor_databuf_ackread(struct cor_conn
*cn_lx
);
1840 extern __u32
_cor_receive_buf(struct cor_conn
*cn_lx
, char *buf
, __u32 datalen
,
1841 int from_sock
, __u8 windowused
, __u8 flush
);
1843 static inline __u32
cor_receive_buf(struct cor_conn
*cn_lx
, char *buf
,
1844 __u32 datalen
, __u8 windowused
, __u8 flush
)
1846 return _cor_receive_buf(cn_lx
, buf
, datalen
, 0, windowused
, flush
);
1849 static inline __u32
cor_receive_sock(struct cor_conn
*src_sock_l
, char *buf
,
1850 __u32 datalen
, __u8 flush
)
1854 BUG_ON(src_sock_l
->sourcetype
!= SOURCE_SOCK
);
1856 ret
= _cor_receive_buf(src_sock_l
, buf
, datalen
, 1,
1857 src_sock_l
->src
.sock
.last_windowused
, flush
);
1859 if (likely(ret
> 0)) {
1860 __u32 bufsize
= src_sock_l
->bufsize
.bufsize
>> BUFSIZE_SHIFT
;
1861 __u32 bufused
= src_sock_l
->data_buf
.read_remaining
;
1863 if (bufused
>= bufsize
)
1864 src_sock_l
->src
.sock
.last_windowused
= 31;
1865 else if (unlikely(bufused
* 31 > U32_MAX
))
1866 src_sock_l
->src
.sock
.last_windowused
=
1867 bufused
/(bufsize
/31);
1869 src_sock_l
->src
.sock
.last_windowused
=
1870 (bufused
*31)/bufsize
;
1876 extern __u32
cor_receive_skb(struct cor_conn
*src_in_l
, struct sk_buff
*skb
,
1877 __u8 windowused
, __u8 flush
);
1879 extern void cor_wake_sender(struct cor_conn
*cn
);
1881 extern int __init
cor_forward_init(void);
1883 extern void __exit
cor_forward_exit2(void);
1886 extern void cor_free_sock(struct kref
*ref
);
1888 extern int cor_socket_setsockopt_tos(struct socket
*sock
,
1889 char __user
*optval
, unsigned int optlen
);
1891 extern int cor_socket_setsockopt_priority(struct socket
*sock
,
1892 char __user
*optval
, unsigned int optlen
);
1894 extern int cor_socket_socketpair(struct socket
*sock1
, struct socket
*sock2
);
1896 extern int cor_socket_getname(struct socket
*sock
, struct sockaddr
*addr
,
1899 extern int cor_socket_mmap(struct file
*file
, struct socket
*sock
,
1900 struct vm_area_struct
*vma
);
1902 extern int _cor_createsock(struct net
*net
, struct socket
*sock
, int protocol
,
1903 int kern
, __u8 is_client
);
1905 extern int __init
cor_sock_init1(void);
1907 extern int __init
cor_sock_init2(void);
1909 extern void __exit
cor_sock_exit1(void);
1911 /* sock_rdaemon.c */
1912 extern int cor_is_device_configurated(struct net_device
*dev
);
1914 extern int cor_create_rdaemon_sock(struct net
*net
, struct socket
*sock
,
1915 int protocol
, int kern
);
1917 extern int cor_rdreq_connect(struct cor_sock
*cs
);
1919 extern void cor_usersock_release(struct cor_sock
*cs
);
1921 extern int __init
cor_rd_init1(void);
1923 extern int __init
cor_rd_init2(void);
1925 extern void __exit
cor_rd_exit1(void);
1927 extern void __exit
cor_rd_exit2(void);
1930 extern int cor_create_raw_sock(struct net
*net
, struct socket
*sock
,
1931 int protocol
, int kern
);
1933 /* sock_managed.c */
1934 extern struct cor_sock
*cor_get_sock_by_cookie(__be64 cookie
);
1936 extern void __cor_set_sock_connecterror(struct cor_sock
*cs_m_l
, int errorno
);
1938 extern void _cor_set_sock_connecterror(struct cor_sock
*cs
, int errorno
);
1940 extern void cor_mngdsocket_chksum(char *hdr
, __u32 hdrlen
,
1941 char *data
, __u32 datalen
,
1942 char *chksum
, __u32 chksum_len
);
1944 static inline void cor_set_sock_connecterror(__be64 cookie
, int errorno
)
1946 struct cor_sock
*cs
= cor_get_sock_by_cookie(cookie
);
1948 _cor_set_sock_connecterror(cs
, errorno
);
1949 kref_put(&cs
->ref
, cor_free_sock
);
1952 extern void cor_mngdsocket_readfromconn_fromatomic(struct cor_sock
*cs
);
1954 extern void cor_mngdsocket_readfromconn_wq(struct work_struct
*work
);
1956 extern int cor_create_managed_sock(struct net
*net
, struct socket
*sock
,
1957 int protocol
, int kern
);
1959 extern int __init
cor_sock_managed_init1(void);
1962 static inline struct cor_skb_procstate
*cor_skb_pstate(struct sk_buff
*skb
)
1964 BUILD_BUG_ON(sizeof(struct cor_skb_procstate
) > sizeof(skb
->cb
));
1965 return (struct cor_skb_procstate
*) &skb
->cb
[0];
1968 static inline struct sk_buff
*cor_skb_from_pstate(struct cor_skb_procstate
*ps
)
1970 return (struct sk_buff
*) (((char *)ps
) - offsetof(struct sk_buff
, cb
));
1973 static inline int cor_qos_fastsend_allowed_conn_retrans(struct cor_neighbor
*nb
)
1975 return atomic_read(&nb
->queue
->cong_status
) < CONGSTATUS_RETRANS
;
1978 static inline int cor_qos_fastsend_allowed_announce(struct net_device
*dev
)
1981 struct cor_qos_queue
*q
= cor_get_queue(dev
);
1986 rc
= atomic_read(&q
->cong_status
) < CONGSTATUS_ANNOUNCE
;
1988 kref_put(&q
->ref
, cor_free_qos
);
1993 static inline int cor_qos_fastsend_allowed_conn(struct cor_conn
*trgt_out_lx
)
1995 struct cor_qos_queue
*q
= trgt_out_lx
->trgt
.out
.nb
->queue
;
1997 return atomic_read(&q
->cong_status
) < CONGSTATUS_CONNDATA
;
2000 static inline __u32
cor_rcv_mtu(struct cor_neighbor
*nb
)
2002 return nb
->dev
->mtu
;
2005 static inline __u32
cor_snd_mtu(struct cor_neighbor
*nb
)
2007 return min((__u32
) nb
->dev
->mtu
,
2008 (__u32
) atomic_read(&nb
->remote_rcvmtu
));
2011 static inline __u32
cor_mss(struct cor_neighbor
*nb
, __u32 l3overhead
)
2013 return cor_snd_mtu(nb
) - LL_RESERVED_SPACE(nb
->dev
) - l3overhead
;
2016 static inline __u32
cor_mss_cmsg(struct cor_neighbor
*nb
)
2018 return cor_mss(nb
, 7);
2021 static inline __u32
cor_mss_conndata(struct cor_neighbor
*nb
, int highlatency
)
2023 __u32 mss_tmp
= cor_mss(nb
, 11);
2026 if (mss_tmp
< 256 || highlatency
|| LOWLATENCY_LOWERMTU
== 0)
2029 for (i
= 256; i
< 4096; i
*= 2) {
2034 return mss_tmp
- mss_tmp
%4096;
2037 static inline __u32
cor_send_conndata_as_skb(struct cor_neighbor
*nb
,
2040 return size
>= cor_mss_conndata(nb
, 0)/2 || size
> KP_CONN_DATA_MAXLEN
;
2043 static inline long cor_calc_timeout(__u32 latency_us
, __u32 latency_stddev_us
,
2044 __u32 max_remote_ack_delay_us
)
2046 unsigned long addto
;
2048 if (unlikely(unlikely(latency_us
> 1000000000) ||
2049 unlikely(latency_stddev_us
> 500000000) ||
2050 unlikely(max_remote_ack_delay_us
> 1000000000))) {
2051 addto
= msecs_to_jiffies(latency_us
/1000 + latency_us
/4000 +
2052 latency_stddev_us
/333 +
2053 max_remote_ack_delay_us
/1000);
2055 addto
= usecs_to_jiffies(latency_us
+ latency_us
/4 +
2056 latency_stddev_us
*3 + max_remote_ack_delay_us
);
2060 * 2 is added because
2061 * 1) _to_jiffies rounds down, but should round up, so add 1 to
2063 * 2) even if latency is 0, we never want to schedule the retransmit
2064 * to run right now, so add 1 more
2066 return jiffies
+ 2 + addto
;
2069 static inline void cor_put_be64(char *dst
, __be64 value
)
2071 char *p_value
= (char *) &value
;
2073 dst
[0] = p_value
[0];
2074 dst
[1] = p_value
[1];
2075 dst
[2] = p_value
[2];
2076 dst
[3] = p_value
[3];
2077 dst
[4] = p_value
[4];
2078 dst
[5] = p_value
[5];
2079 dst
[6] = p_value
[6];
2080 dst
[7] = p_value
[7];
2083 static inline void cor_put_u64(char *dst
, __u64 value
)
2085 cor_put_be64(dst
, cpu_to_be64(value
));
2088 static inline void cor_put_u48(char *dst
, __u64 value
)
2090 char *p_value
= (char *) &value
;
2092 value
= cpu_to_be64(value
);
2094 dst
[0] = p_value
[2];
2095 dst
[1] = p_value
[3];
2096 dst
[2] = p_value
[4];
2097 dst
[3] = p_value
[5];
2098 dst
[4] = p_value
[6];
2099 dst
[5] = p_value
[7];
2102 static inline void cor_put_be32(char *dst
, __be32 value
)
2104 char *p_value
= (char *) &value
;
2106 dst
[0] = p_value
[0];
2107 dst
[1] = p_value
[1];
2108 dst
[2] = p_value
[2];
2109 dst
[3] = p_value
[3];
2112 static inline void cor_put_u32(char *dst
, __u32 value
)
2114 cor_put_be32(dst
, cpu_to_be32(value
));
2117 static inline void cor_put_be16(char *dst
, __be16 value
)
2119 char *p_value
= (char *) &value
;
2121 dst
[0] = p_value
[0];
2122 dst
[1] = p_value
[1];
2125 static inline void cor_put_u16(char *dst
, __u16 value
)
2127 cor_put_be16(dst
, cpu_to_be16(value
));
2130 static inline char *cor_pull_skb(struct sk_buff
*skb
, unsigned int len
)
2132 char *ptr
= skb_pull(skb
, len
);
2134 if (unlikely(ptr
== 0))
2140 static inline __be64
cor_parse_be64(char *buf
)
2146 ((char *)&ret
)[0] = buf
[0];
2147 ((char *)&ret
)[1] = buf
[1];
2148 ((char *)&ret
)[2] = buf
[2];
2149 ((char *)&ret
)[3] = buf
[3];
2150 ((char *)&ret
)[4] = buf
[4];
2151 ((char *)&ret
)[5] = buf
[5];
2152 ((char *)&ret
)[6] = buf
[6];
2153 ((char *)&ret
)[7] = buf
[7];
2158 static inline __u64
cor_parse_u64(char *buf
)
2160 return be64_to_cpu(cor_parse_be64(buf
));
2163 static inline __u64
cor_parse_u48(char *ptr
)
2167 ((char *)&ret
)[0] = 0;
2168 ((char *)&ret
)[1] = 0;
2169 ((char *)&ret
)[2] = ptr
[0];
2170 ((char *)&ret
)[3] = ptr
[1];
2171 ((char *)&ret
)[4] = ptr
[2];
2172 ((char *)&ret
)[5] = ptr
[3];
2173 ((char *)&ret
)[6] = ptr
[4];
2174 ((char *)&ret
)[7] = ptr
[5];
2176 return be64_to_cpu(ret
);
2179 static inline __be32
cor_parse_be32(char *ptr
)
2185 ((char *)&ret
)[0] = ptr
[0];
2186 ((char *)&ret
)[1] = ptr
[1];
2187 ((char *)&ret
)[2] = ptr
[2];
2188 ((char *)&ret
)[3] = ptr
[3];
2193 static inline __u32
cor_parse_u32(char *ptr
)
2195 return be32_to_cpu(cor_parse_be32(ptr
));
2198 static inline __be16
cor_parse_be16(char *ptr
)
2204 ((char *)&ret
)[0] = ptr
[0];
2205 ((char *)&ret
)[1] = ptr
[1];
2210 static inline __u16
cor_parse_u16(char *ptr
)
2212 return be16_to_cpu(cor_parse_be16(ptr
));
2215 static inline __u8
cor_parse_u8(char *ptr
)
2218 return (__u8
) ptr
[0];
2221 static inline __u64
cor_pull_u48(struct sk_buff
*skb
)
2223 return cor_parse_u48(cor_pull_skb(skb
, 6));
2226 static inline __be32
cor_pull_be32(struct sk_buff
*skb
)
2228 return cor_parse_be32(cor_pull_skb(skb
, 4));
2231 static inline __u32
cor_pull_u32(struct sk_buff
*skb
)
2233 return cor_parse_u32(cor_pull_skb(skb
, 4));
2236 static inline __u16
cor_pull_u16(struct sk_buff
*skb
)
2238 return cor_parse_u16(cor_pull_skb(skb
, 2));
2241 static inline __u8
cor_pull_u8(struct sk_buff
*skb
)
2243 char *ptr
= cor_pull_skb(skb
, 1);
2249 static inline int cor_is_conn_in(struct cor_conn
*cn_l
, struct cor_neighbor
*nb
,
2252 if (unlikely(unlikely(cn_l
->sourcetype
!= SOURCE_IN
) ||
2253 unlikely(cn_l
->src
.in
.nb
!= nb
) ||
2254 unlikely(cn_l
->src
.in
.conn_id
!= conn_id
) ||
2255 unlikely(cn_l
->isreset
!= 0)))
2260 static inline int cor_is_src_sock(struct cor_conn
*cn_l
, struct cor_sock
*cs
)
2262 if (unlikely(unlikely(cn_l
->sourcetype
!= SOURCE_SOCK
) ||
2263 unlikely(cn_l
->src
.sock
.ed
->cs
!= cs
)))
2268 static inline int cor_is_trgt_sock(struct cor_conn
*cn_l
, struct cor_sock
*cs
)
2270 if (unlikely(unlikely(cn_l
->targettype
!= TARGET_SOCK
) ||
2271 unlikely(cn_l
->trgt
.sock
.cs
!= cs
)))
2276 #define BUFLEN_MIN 128
2277 #define BUFLEN_MAX 4096
2278 #define PAGESIZE (1 << PAGE_SHIFT)
2280 static inline __u32
cor_buf_optlen(__u32 datalen
, int from_sock
)
2282 __u32 optlen
= BUFLEN_MIN
;
2287 while (optlen
< datalen
&& optlen
< PAGESIZE
&& optlen
< BUFLEN_MAX
)
2288 optlen
= (optlen
<< 1);
2293 static inline void cor_databuf_item_free(struct cor_data_buf_item
*item
)
2295 if (item
->type
== DATABUF_BUF
) {
2297 kmem_cache_free(cor_data_buf_item_slab
, item
);
2298 } else if (item
->type
== DATABUF_SKB
) {
2299 struct sk_buff
*skb
= cor_skb_from_pstate(container_of(item
,
2300 struct cor_skb_procstate
, funcstate
.rcv
.dbi
));
2307 static inline __u64
cor_seqno_clean(__u64 seqno
)
2309 return seqno
& ((1LL << 48) - 1);
2312 static inline int cor_seqno_eq(__u64 seqno1
, __u64 seqno2
)
2314 seqno1
= seqno1
<< 16;
2315 seqno2
= seqno2
<< 16;
2316 return seqno1
== seqno2
;
2319 static inline int cor_seqno_before(__u64 seqno1
, __u64 seqno2
)
2321 seqno1
= seqno1
<< 16;
2322 seqno2
= seqno2
<< 16;
2323 return (seqno1
- seqno2
) >= (1LL << 63);
2326 static inline int cor_seqno_before_eq(__u64 seqno1
, __u64 seqno2
)
2328 return cor_seqno_eq(seqno1
, seqno2
) || cor_seqno_before(seqno1
, seqno2
);
2331 static inline int cor_seqno_after(__u64 seqno1
, __u64 seqno2
)
2333 return cor_seqno_before_eq(seqno1
, seqno2
) ? 0 : 1;
2336 static inline int cor_seqno_after_eq(__u64 seqno1
, __u64 seqno2
)
2338 return cor_seqno_before(seqno1
, seqno2
) ? 0 : 1;
2341 static inline int ktime_before_eq(ktime_t time1
, ktime_t time2
)
2343 return ktime_after(time1
, time2
) ? 0 : 1;
2346 static inline int ktime_after_eq(ktime_t time1
, ktime_t time2
)
2348 return ktime_before(time1
, time2
) ? 0 : 1;
2351 static inline __u64
cor_update_atomic_sum(atomic64_t
*atomic_sum
,
2352 __u32 oldvalue
, __u32 newvalue
)
2354 __u64 sum_old
= atomic64_read(atomic_sum
);
2362 BUG_ON(sum
< oldvalue
);
2365 BUG_ON(sum
+ newvalue
< sum
);
2368 cmpxchg_ret
= atomic64_cmpxchg(atomic_sum
, sum_old
, sum
);
2370 if (likely(cmpxchg_ret
== sum_old
))
2373 sum_old
= cmpxchg_ret
;
2379 static inline void cor_sk_write_space(struct cor_sock
*cs
)
2381 atomic_set(&cs
->ready_to_write
, 1);
2383 cs
->sk
.sk_write_space(&cs
->sk
);
2386 static inline void cor_sk_data_ready(struct cor_sock
*cs
)
2388 atomic_set(&cs
->ready_to_read
, 1);
2390 cs
->sk
.sk_data_ready(&cs
->sk
);
2393 /* the other direction may be locked only if called from cor_proc_cpacket */
2394 static inline void cor_flush_buf(struct cor_conn
*cn_lx
)
2396 if (unlikely(cn_lx
->targettype
== TARGET_UNCONNECTED
)) {
2397 cor_proc_cpacket(cn_lx
);
2398 } else if (cn_lx
->targettype
== TARGET_SOCK
) {
2399 cor_flush_sock(cn_lx
);
2400 } else if (cn_lx
->targettype
== TARGET_OUT
) {
2401 __u32 bytessent
= 0;
2403 cor_flush_out(cn_lx
, &bytessent
);
2404 } else if (unlikely(cn_lx
->targettype
== TARGET_DISCARD
)) {
2405 cor_databuf_ackdiscard(cn_lx
);