2 * Connection oriented routing
3 * Copyright (C) 2007-2021 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <linux/net.h>
23 #include <asm/uaccess.h>
27 static int cor_rawsocket_release_trypasssocket(struct cor_sock
*cs
)
29 struct cor_sock
*passto
;
30 struct cor_conn
*src_sock
;
31 struct cor_conn
*trgt_sock
;
34 mutex_lock(&(cs
->lock
));
36 BUG_ON(cs
->type
!= CS_TYPE_CONN_RAW
);
37 passto
= cs
->data
.conn_raw
.pass_on_close
;
38 cs
->data
.conn_raw
.pass_on_close
= 0;
40 src_sock
= cs
->data
.conn_raw
.src_sock
;
41 trgt_sock
= cs
->data
.conn_raw
.trgt_sock
;
43 mutex_unlock(&(cs
->lock
));
48 mutex_lock(&(passto
->lock
));
49 spin_lock_bh(&(src_sock
->rcv_lock
));
50 spin_lock_bh(&(trgt_sock
->rcv_lock
));
52 BUG_ON(src_sock
->is_client
== 0);
53 BUG_ON(passto
->type
!= CS_TYPE_CONN_MANAGED
);
55 if (unlikely(unlikely(passto
->isreleased
!= 0) ||
56 unlikely(passto
->data
.conn_managed
.connect_state
!=
57 CS_CONNECTSTATE_CONNECTING
)))
60 BUG_ON(passto
->data
.conn_managed
.src_sock
!= 0);
61 BUG_ON(passto
->data
.conn_managed
.trgt_sock
!= 0);
63 if (unlikely(unlikely(src_sock
->isreset
!= 0) ||
64 unlikely(trgt_sock
->isreset
!= 0))) {
65 __cor_set_sock_connecterror(passto
, -ENETUNREACH
);
69 BUG_ON(src_sock
->sourcetype
!= SOURCE_SOCK
);
70 BUG_ON(src_sock
->source
.sock
.cs
!= cs
);
71 BUG_ON(trgt_sock
->targettype
!= TARGET_SOCK
);
72 BUG_ON(trgt_sock
->target
.sock
.cs
!= cs
);
74 passto
->data
.conn_managed
.src_sock
= src_sock
;
75 passto
->data
.conn_managed
.trgt_sock
= trgt_sock
;
76 src_sock
->source
.sock
.cs
= passto
;
77 trgt_sock
->target
.sock
.cs
= passto
;
78 kref_get(&(passto
->ref
));
79 kref_get(&(passto
->ref
));
81 BUG_ON(passto
->data
.conn_managed
.rcv_buf
== 0);
82 trgt_sock
->target
.sock
.socktype
= SOCKTYPE_MANAGED
;
83 trgt_sock
->target
.sock
.rcv_buf_state
= RCV_BUF_STATE_INCOMPLETE
;
84 trgt_sock
->target
.sock
.rcv_buf
= passto
->data
.conn_managed
.rcv_buf
;
85 trgt_sock
->target
.sock
.rcvd
= 0;
90 spin_unlock_bh(&(trgt_sock
->rcv_lock
));
91 spin_unlock_bh(&(src_sock
->rcv_lock
));
92 mutex_unlock(&(passto
->lock
));
95 mutex_lock(&(cs
->lock
));
96 cs
->data
.conn_raw
.src_sock
= 0;
97 cs
->data
.conn_raw
.trgt_sock
= 0;
98 mutex_unlock(&(cs
->lock
));
100 lock_sock(&(cs
->sk
));
101 cs
->sk
.sk_socket
->state
= SS_CONNECTED
;
102 release_sock(&(cs
->sk
));
104 mutex_lock(&(passto
->lock
));
105 BUG_ON(passto
->type
!= CS_TYPE_CONN_MANAGED
);
106 passto
->data
.conn_managed
.connect_state
=
107 CS_CONNECTSTATE_CONNECTED
;
108 if (likely(passto
->isreleased
== 0)) {
109 atomic_set(&(passto
->ready_to_write
), 1);
111 passto
->sk
.sk_state_change(&(passto
->sk
));
113 mutex_unlock(&(passto
->lock
));
116 /* pointers from struct cor_conn */
117 kref_put(&(cs
->ref
), cor_kreffree_bug
);
118 kref_put(&(cs
->ref
), cor_kreffree_bug
);
121 kref_put(&(passto
->ref
), cor_free_sock
);
126 int cor_rawsocket_release(struct socket
*sock
)
128 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
131 mutex_lock(&(cs
->lock
));
134 mutex_unlock(&(cs
->lock
));
136 if (type
== CS_TYPE_UNCONNECTED
) {
137 } else if (type
== CS_TYPE_CONN_RAW
) {
138 mutex_lock(&(cs
->lock
));
139 BUG_ON(cs
->type
!= CS_TYPE_CONN_RAW
);
140 if (cs
->data
.conn_raw
.rcvitem
!= 0) {
141 BUG_ON(cs
->data
.conn_raw
.trgt_sock
== 0);
143 cor_databuf_unpull_dpi(cs
->data
.conn_raw
.trgt_sock
, cs
,
144 cs
->data
.conn_raw
.rcvitem
,
145 cs
->data
.conn_raw
.rcvoffset
);
146 cs
->data
.conn_raw
.rcvitem
= 0;
148 mutex_unlock(&(cs
->lock
));
150 if (cor_rawsocket_release_trypasssocket(cs
) != 0)
153 mutex_lock(&(cs
->lock
));
154 BUG_ON(cs
->type
!= CS_TYPE_CONN_RAW
);
155 if (cs
->data
.conn_raw
.src_sock
!= 0 &&
156 cs
->data
.conn_raw
.trgt_sock
!= 0) {
157 cor_reset_conn(cs
->data
.conn_raw
.src_sock
);
158 kref_put(&(cs
->data
.conn_raw
.src_sock
->ref
),
160 kref_put(&(cs
->data
.conn_raw
.trgt_sock
->ref
),
163 mutex_unlock(&(cs
->lock
));
169 kref_put(&(cs
->ref
), cor_free_sock
);
174 int cor_rawsocket_bind(struct socket
*sock
, struct sockaddr
*saddr
,
180 int cor_rawsocket_connect(struct socket
*sock
, struct sockaddr
*saddr
,
181 int sockaddr_len
, int flags
)
183 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
185 struct cor_conn
*src_sock
;
187 src_sock
= cor_alloc_conn(GFP_KERNEL
, cs
->is_highlatency
);
189 if (unlikely(src_sock
== 0))
192 src_sock
->is_client
= 1;
194 mutex_lock(&(cs
->lock
));
195 spin_lock_bh(&(src_sock
->rcv_lock
));
196 spin_lock_bh(&(src_sock
->reversedir
->rcv_lock
));
197 if (cs
->type
!= CS_TYPE_UNCONNECTED
) {
198 spin_unlock_bh(&(src_sock
->reversedir
->rcv_lock
));
199 spin_unlock_bh(&(src_sock
->rcv_lock
));
200 mutex_unlock(&(cs
->lock
));
201 cor_reset_conn(src_sock
);
205 cor_conn_init_sock_source(src_sock
);
206 cor_conn_init_sock_target(src_sock
->reversedir
);
208 memset(&(cs
->data
), 0, sizeof(cs
->data
));
209 cs
->type
= CS_TYPE_CONN_RAW
;
210 cs
->data
.conn_raw
.src_sock
= src_sock
;
211 cs
->data
.conn_raw
.trgt_sock
= src_sock
->reversedir
;
212 kref_get(&(src_sock
->ref
));
213 kref_get(&(src_sock
->reversedir
->ref
));
215 src_sock
->is_highlatency
= cs
->is_highlatency
;
216 src_sock
->reversedir
->is_highlatency
= cs
->is_highlatency
;
218 src_sock
->source
.sock
.cs
= cs
;
219 src_sock
->reversedir
->target
.sock
.cs
= cs
;
220 kref_get(&(cs
->ref
));
221 kref_get(&(cs
->ref
));
223 src_sock
->reversedir
->target
.sock
.socktype
= SOCKTYPE_RAW
;
225 spin_unlock_bh(&(src_sock
->reversedir
->rcv_lock
));
226 spin_unlock_bh(&(src_sock
->rcv_lock
));
227 mutex_unlock(&(cs
->lock
));
229 lock_sock(&(cs
->sk
));
230 sock
->state
= SS_CONNECTED
;
231 release_sock(&(cs
->sk
));
236 int cor_rawsocket_accept(struct socket
*sock
, struct socket
*newsock
, int flags
,
242 int cor_rawsocket_listen(struct socket
*sock
, int len
)
247 int cor_rawsocket_shutdown(struct socket
*sock
, int flags
)
252 int cor_rawsocket_ioctl(struct socket
*sock
, unsigned int cmd
,
258 static int cor_rawsocket_setsockopt_passonclose(struct socket
*sock
,
259 char __user
*optval
, unsigned int optlen
)
261 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
267 struct cor_sock
*passto
;
269 if (unlikely(optlen
!= 8))
272 notread
= copy_from_user((char *) &cookie
, optval
, 8);
273 if (unlikely(notread
!= 0))
276 passto
= cor_get_sock_by_cookie(cookie
);
277 if (unlikely(passto
== 0))
280 mutex_lock(&(cs
->lock
));
281 if (unlikely(cs
->type
!= CS_TYPE_CONN_RAW
)) {
286 BUG_ON(passto
->type
!= CS_TYPE_CONN_MANAGED
);
288 if (unlikely(cs
->data
.conn_raw
.pass_on_close
!= 0))
289 kref_put(&(cs
->data
.conn_raw
.pass_on_close
->ref
),
292 cs
->data
.conn_raw
.pass_on_close
= passto
;
295 mutex_unlock(&(cs
->lock
));
297 if (unlikely(rc
!= 0))
298 kref_put(&(passto
->ref
), cor_free_sock
);
303 int cor_rawsocket_setsockopt(struct socket
*sock
, int level
,
304 int optname
, char __user
*optval
, unsigned int optlen
)
306 if (unlikely(level
!= SOL_COR
)) {
310 if (optname
== COR_PASS_ON_CLOSE
) {
311 return cor_rawsocket_setsockopt_passonclose(sock
, optval
,
313 } else if (optname
== COR_TOS
) {
314 return cor_socket_setsockopt_tos(sock
, optval
, optlen
);
320 int cor_rawsocket_getsockopt(struct socket
*sock
, int level
,
321 int optname
, char __user
*optval
, int __user
*optlen
)
326 static unsigned int _cor_rawsocket_poll(struct cor_sock
*cs
, __u32 writelen
,
329 unsigned int mask
= 0;
331 struct cor_conn
*trgt_sock
;
332 struct cor_conn
*src_sock
;
334 mutex_lock(&(cs
->lock
));
337 BUG_ON(cs
->type
!= CS_TYPE_CONN_RAW
);
339 BUG_ON(cs
->type
!= CS_TYPE_UNCONNECTED
&&
340 cs
->type
!= CS_TYPE_CONN_RAW
);
341 if (unlikely(cs
->type
!= CS_TYPE_CONN_RAW
)) {
342 mutex_unlock(&(cs
->lock
));
347 trgt_sock
= cs
->data
.conn_raw
.trgt_sock
;
348 src_sock
= cs
->data
.conn_raw
.src_sock
;
350 if (unlikely(trgt_sock
== 0 || src_sock
== 0)) {
351 mutex_unlock(&(cs
->lock
));
355 spin_lock_bh(&(trgt_sock
->rcv_lock
));
356 if (unlikely(trgt_sock
->isreset
!= 0 ||
357 cor_is_trgt_sock(trgt_sock
, cs
) == 0)) {
359 } else if (cs
->data
.conn_raw
.rcvitem
!= 0 ||
360 trgt_sock
->data_buf
.read_remaining
!= 0) {
361 mask
|= (POLLIN
| POLLRDNORM
);
363 spin_unlock_bh(&(trgt_sock
->rcv_lock
));
365 spin_lock_bh(&(src_sock
->rcv_lock
));
366 if (unlikely(src_sock
->isreset
!= 0 ||
367 cor_is_src_sock(src_sock
, cs
) == 0)) {
369 } else if (cor_sock_sndbufavailable(src_sock
)) {
370 mask
|= (POLLOUT
| POLLWRNORM
);
372 cs
->data
.conn_raw
.snd_delayed_lowbuf
= 1;
374 spin_unlock_bh(&(src_sock
->rcv_lock
));
376 mutex_unlock(&(cs
->lock
));
381 static int ___cor_rawsocket_sendmsg(char *buf
, __u32 bufread
,
382 __u32 buflen
, __u8 flush
, struct cor_sock
*cs_r_l
)
384 struct cor_conn
*src_sock
;
389 BUG_ON(cs_r_l
->type
!= CS_TYPE_CONN_RAW
);
391 src_sock
= cs_r_l
->data
.conn_raw
.src_sock
;
392 if (unlikely(src_sock
== 0)) {
396 spin_lock_bh(&(src_sock
->rcv_lock
));
398 if (unlikely(unlikely(cor_is_src_sock(src_sock
, cs_r_l
) == 0) ||
399 unlikely(src_sock
->isreset
!= 0))) {
400 spin_unlock_bh(&(src_sock
->rcv_lock
));
404 if (cor_sock_sndbufavailable(src_sock
) == 0) {
406 atomic_set(&(cs_r_l
->ready_to_write
), 0);
408 cs_r_l
->data
.conn_raw
.snd_delayed_lowbuf
= 1;
412 BUG_ON(bufread
> (1024 * 1024 * 1024));
413 BUG_ON(buflen
> (1024 * 1024 * 1024));
415 rc2
= cor_receive_buf(src_sock
, buf
, bufread
,
416 cs_r_l
->data
.conn_raw
.snd_delayed_lowbuf
, flush
);
418 BUG_ON(rc2
> (1024 * 1024 * 1024));
419 if (unlikely(rc2
== 0)) {
426 cor_flush_buf(src_sock
);
429 spin_unlock_bh(&(src_sock
->rcv_lock
));
434 static int __cor_rawsocket_sendmsg(struct msghdr
*msg
, __u32 totallen
,
435 __u8 flush
, struct cor_sock
*cs_r_l
)
439 __u32 buflen
= cor_buf_optlen(totallen
);
440 __u32 len
= totallen
;
444 BUG_ON(totallen
> (1024 * 1024 * 1024));
445 BUG_ON(buflen
> (1024 * 1024 * 1024));
452 if (unlikely(len
<= 0))
455 buf
= kmalloc(buflen
, GFP_KERNEL
);
456 if (unlikely(buf
== 0))
459 memset(buf
, 0, buflen
);
461 st_rc
= copy_from_iter(buf
+ bufread
, len
, &(msg
->msg_iter
));
463 if (unlikely(st_rc
!= len
)) {
468 rc
= ___cor_rawsocket_sendmsg(buf
, len
, buflen
, flush
, cs_r_l
);
475 static int _cor_rawsocket_sendmsg(struct msghdr
*msg
, __u32 totallen
,
476 struct cor_sock
*cs
, __u8 flush
)
480 BUG_ON(totallen
> (1024 * 1024 * 1024));
482 mutex_lock(&(cs
->lock
));
484 BUG_ON(cs
->type
!= CS_TYPE_UNCONNECTED
&& cs
->type
!= CS_TYPE_CONN_RAW
);
485 if (unlikely(cs
->type
== CS_TYPE_UNCONNECTED
)) {
486 mutex_unlock(&(cs
->lock
));
488 } else if (unlikely(cs
->type
!= CS_TYPE_CONN_RAW
)) {
489 mutex_unlock(&(cs
->lock
));
493 copied
= __cor_rawsocket_sendmsg(msg
, totallen
, flush
, cs
);
494 BUG_ON(copied
> 0 && ((__u32
) copied
) > totallen
);
496 if (copied
> 0 && copied
== totallen
)
497 cs
->data
.conn_raw
.snd_delayed_lowbuf
= 0;
499 mutex_unlock(&(cs
->lock
));
504 int cor_rawsocket_sendmsg(struct socket
*sock
, struct msghdr
*msg
,
507 __u8 flush
= ((msg
->msg_flags
& MSG_MORE
) == 0) ? 1 : 0;
508 int blocking
= (msg
->msg_flags
& MSG_DONTWAIT
) == 0;
513 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
515 __u32 max
= (1024 * 1024 * 1024);
518 totallen
= total_len
;
519 if (unlikely(totallen
> max
|| total_len
> max
)) {
524 while (rc
>= 0 && copied
< totallen
) {
525 rc
= _cor_rawsocket_sendmsg(msg
, totallen
, cs
, flush
);
527 BUG_ON(rc
> 0 && unlikely((rc
> total_len
|| rc
> totallen
)));
529 if (rc
== -EAGAIN
&& blocking
&& copied
== 0) {
532 waitret
= wait_event_interruptible_timeout(
533 *sk_sleep(&(cs
->sk
)),
534 atomic_read(&(cs
->ready_to_write
)) != 0,
537 if (unlikely(waitret
< 0))
538 rc
= sock_intr_errno(cs
->sk
.sk_sndtimeo
);
539 else if (unlikely(waitret
== 0))
545 if (rc
> 0 || copied
== 0)
547 if (unlikely(rc
== -EFAULT
))
550 BUG_ON(copied
> 0 && ((__u32
) copied
> totallen
));
556 static int __cor_rawsocket_recvmsg(struct msghdr
*msg
, __u32 totallen
,
559 struct cor_data_buf_item
*dbi
= cs
->data
.conn_raw
.rcvitem
;
565 BUG_ON(totallen
> (1024 * 1024 * 1024));
570 BUG_ON(dbi
->datalen
<= cs
->data
.conn_raw
.rcvoffset
);
573 if (len
> (dbi
->datalen
- cs
->data
.conn_raw
.rcvoffset
))
574 len
= dbi
->datalen
- cs
->data
.conn_raw
.rcvoffset
;
576 if (unlikely(len
<= 0))
579 st_rc
= copy_to_iter(dbi
->buf
+ cs
->data
.conn_raw
.rcvoffset
, len
,
582 if (unlikely(st_rc
!= len
))
586 cs
->data
.conn_raw
.rcvoffset
+= len
;
587 if (dbi
->datalen
== cs
->data
.conn_raw
.rcvoffset
) {
588 cor_databuf_item_free(cs
->data
.conn_raw
.rcvitem
);
589 cs
->data
.conn_raw
.rcvitem
= 0;
590 cs
->data
.conn_raw
.rcvoffset
= 0;
593 BUG_ON(written
> totallen
);
598 static int _cor_rawsocket_recvmsg(struct msghdr
*msg
, __u32 totallen
,
599 struct cor_sock
*cs_r
)
604 struct cor_conn
*trgt_sock
;
606 mutex_lock(&(cs_r
->lock
));
608 BUG_ON(cs_r
->type
!= CS_TYPE_CONN_RAW
);
610 trgt_sock
= cs_r
->data
.conn_raw
.trgt_sock
;
612 if (unlikely(cs_r
->data
.conn_raw
.src_sock
== 0 || trgt_sock
== 0)) {
613 mutex_unlock(&(cs_r
->lock
));
617 kref_get(&(trgt_sock
->ref
));
619 while (rc
>= 0 && copied
< totallen
) {
620 if (cs_r
->data
.conn_raw
.rcvitem
!= 0)
623 spin_lock_bh(&(trgt_sock
->rcv_lock
));
624 if (unlikely(unlikely(cor_is_trgt_sock(trgt_sock
, cs_r
) == 0) |
625 unlikely(trgt_sock
->isreset
!= 0))) {
626 spin_unlock_bh(&(trgt_sock
->rcv_lock
));
627 mutex_unlock(&(cs_r
->lock
));
631 cor_databuf_pull_dbi(cs_r
, trgt_sock
);
632 if (cs_r
->data
.conn_raw
.rcvitem
== 0)
633 atomic_set(&(cs_r
->ready_to_read
), 0);
635 cor_bufsize_read_to_sock(trgt_sock
);
637 spin_unlock_bh(&(trgt_sock
->rcv_lock
));
640 rc
= __cor_rawsocket_recvmsg(msg
, totallen
- copied
, cs_r
);
642 if (rc
> 0 || copied
== 0)
644 if (unlikely(rc
== -EFAULT
))
647 BUG_ON(copied
> 0 && ((__u32
) copied
> totallen
));
650 mutex_unlock(&(cs_r
->lock
));
652 if (likely(copied
> 0))
653 cor_wake_sender(trgt_sock
);
655 kref_put(&(trgt_sock
->ref
), cor_free_conn
);
660 int cor_rawsocket_recvmsg(struct socket
*sock
, struct msghdr
*msg
,
661 size_t total_len
, int flags
)
663 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
665 int blocking
= (flags
& MSG_DONTWAIT
) == 0;
668 __u32 max
= (1024 * 1024 * 1024);
671 totallen
= total_len
;
672 if (unlikely(totallen
> max
|| total_len
> max
))
675 if (unlikely((flags
& MSG_PEEK
) != 0))
678 mutex_lock(&(cs
->lock
));
679 BUG_ON(cs
->type
!= CS_TYPE_UNCONNECTED
&& cs
->type
!= CS_TYPE_CONN_RAW
);
680 if (unlikely(cs
->type
== CS_TYPE_UNCONNECTED
)) {
681 mutex_unlock(&(cs
->lock
));
683 } else if (unlikely(cs
->type
!= CS_TYPE_CONN_RAW
)) {
684 mutex_unlock(&(cs
->lock
));
687 mutex_unlock(&(cs
->lock
));
690 rc
= _cor_rawsocket_recvmsg(msg
, totallen
, cs
);
692 BUG_ON(rc
> 0 && unlikely((rc
> total_len
|| rc
> totallen
)));
694 if (rc
== -EAGAIN
&& blocking
) {
695 if (wait_event_interruptible(*sk_sleep(&(cs
->sk
)),
696 atomic_read(&(cs
->ready_to_read
)) != 0) == 0)
704 static unsigned int cor_rawsocket_poll(struct file
*file
, struct socket
*sock
,
707 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
708 sock_poll_wait(file
, sock
, wait
);
709 return _cor_rawsocket_poll(cs
, U32_MAX
, 1);
713 const struct proto_ops cor_raw_proto_ops
= {
715 .owner
= THIS_MODULE
,
716 .release
= cor_rawsocket_release
,
717 .bind
= cor_rawsocket_bind
,
718 .connect
= cor_rawsocket_connect
,
719 .accept
= cor_rawsocket_accept
,
720 .listen
= cor_rawsocket_listen
,
721 .shutdown
= cor_rawsocket_shutdown
,
722 .ioctl
= cor_rawsocket_ioctl
,
723 .setsockopt
= cor_rawsocket_setsockopt
,
724 .getsockopt
= cor_rawsocket_getsockopt
,
726 .combat_ioctl
= cor_rawsocket_ioctl
,
727 .compat_setsockopt
= cor_rawsocket_setsockopt
,
728 .compat_getsockopt
= cor_rawsocket_getsockopt
,
730 .sendmsg
= cor_rawsocket_sendmsg
,
731 .recvmsg
= cor_rawsocket_recvmsg
,
732 .poll
= cor_rawsocket_poll
,
733 .socketpair
= cor_socket_socketpair
,
734 .getname
= cor_socket_getname
,
735 .mmap
= cor_socket_mmap
,
737 /* sendpage, splice_read, are optional */
740 int cor_create_raw_sock(struct net
*net
, struct socket
*sock
, int protocol
,
743 int rc
= _cor_createsock(net
, sock
, protocol
, kern
);
748 sock
->ops
= &cor_raw_proto_ops
;
753 MODULE_LICENSE("GPL");