2 * Connection oriented routing
3 * Copyright (C) 2007-2019 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <linux/net.h>
23 #include <asm/uaccess.h>
27 static int cor_rawsocket_release_trypasssocket(struct cor_sock
*cs
)
29 struct cor_sock
*passto
;
30 struct conn
*src_sock
;
31 struct conn
*trgt_sock
;
34 mutex_lock(&(cs
->lock
));
36 BUG_ON(cs
->type
!= CS_TYPE_CONN_RAW
);
37 passto
= cs
->data
.conn_raw
.pass_on_close
;
38 cs
->data
.conn_raw
.pass_on_close
= 0;
40 src_sock
= cs
->data
.conn_raw
.src_sock
;
41 trgt_sock
= cs
->data
.conn_raw
.trgt_sock
;
43 mutex_unlock(&(cs
->lock
));
48 mutex_lock(&(passto
->lock
));
49 spin_lock_bh(&(src_sock
->rcv_lock
));
50 spin_lock_bh(&(trgt_sock
->rcv_lock
));
52 BUG_ON(src_sock
->is_client
== 0);
53 BUG_ON(passto
->type
!= CS_TYPE_CONN_MANAGED
);
55 if (unlikely(unlikely(passto
->isreleased
!= 0) ||
56 unlikely(passto
->data
.conn_managed
.connect_state
!=
57 CS_CONNECTSTATE_CONNECTING
)))
60 BUG_ON(passto
->data
.conn_managed
.src_sock
!= 0);
61 BUG_ON(passto
->data
.conn_managed
.trgt_sock
!= 0);
63 if (unlikely(unlikely(src_sock
->isreset
!= 0) ||
64 unlikely(trgt_sock
->isreset
!= 0))) {
65 __set_sock_connecterror(passto
, -ENETUNREACH
);
69 BUG_ON(src_sock
->sourcetype
!= SOURCE_SOCK
);
70 BUG_ON(src_sock
->source
.sock
.cs
!= cs
);
71 BUG_ON(trgt_sock
->targettype
!= TARGET_SOCK
);
72 BUG_ON(trgt_sock
->target
.sock
.cs
!= cs
);
74 passto
->data
.conn_managed
.src_sock
= src_sock
;
75 passto
->data
.conn_managed
.trgt_sock
= trgt_sock
;
76 src_sock
->source
.sock
.cs
= passto
;
77 trgt_sock
->target
.sock
.cs
= passto
;
78 kref_get(&(passto
->ref
));
79 kref_get(&(passto
->ref
));
81 BUG_ON(passto
->data
.conn_managed
.rcv_buf
== 0);
82 trgt_sock
->target
.sock
.socktype
= SOCKTYPE_MANAGED
;
83 trgt_sock
->target
.sock
.rcv_buf_state
= RCV_BUF_STATE_INCOMPLETE
;
84 trgt_sock
->target
.sock
.rcv_buf
= passto
->data
.conn_managed
.rcv_buf
;
85 trgt_sock
->target
.sock
.rcvd
= 0;
90 spin_unlock_bh(&(trgt_sock
->rcv_lock
));
91 spin_unlock_bh(&(src_sock
->rcv_lock
));
92 mutex_unlock(&(passto
->lock
));
95 mutex_lock(&(cs
->lock
));
96 cs
->data
.conn_raw
.src_sock
= 0;
97 cs
->data
.conn_raw
.trgt_sock
= 0;
98 mutex_unlock(&(cs
->lock
));
100 lock_sock(&(cs
->sk
));
101 cs
->sk
.sk_socket
->state
= SS_CONNECTED
;
102 release_sock(&(cs
->sk
));
104 mutex_lock(&(passto
->lock
));
105 BUG_ON(passto
->type
!= CS_TYPE_CONN_MANAGED
);
106 passto
->data
.conn_managed
.connect_state
=
107 CS_CONNECTSTATE_CONNECTED
;
108 if (likely(passto
->isreleased
== 0)) {
109 atomic_set(&(passto
->ready_to_write
), 1);
111 passto
->sk
.sk_state_change(&(passto
->sk
));
113 mutex_unlock(&(passto
->lock
));
116 /* pointers from struct conn */
117 kref_put(&(cs
->ref
), kreffree_bug
);
118 kref_put(&(cs
->ref
), kreffree_bug
);
121 kref_put(&(passto
->ref
), free_sock
);
126 int cor_rawsocket_release(struct socket
*sock
)
128 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
131 mutex_lock(&(cs
->lock
));
134 mutex_unlock(&(cs
->lock
));
136 if (type
== CS_TYPE_UNCONNECTED
) {
137 } else if (type
== CS_TYPE_CONN_RAW
) {
138 mutex_lock(&(cs
->lock
));
139 BUG_ON(cs
->type
!= CS_TYPE_CONN_RAW
);
140 if (cs
->data
.conn_raw
.rcvitem
!= 0) {
141 BUG_ON(cs
->data
.conn_raw
.trgt_sock
== 0);
143 databuf_unpull_dpi(cs
->data
.conn_raw
.trgt_sock
, cs
,
144 cs
->data
.conn_raw
.rcvitem
,
145 cs
->data
.conn_raw
.rcvoffset
);
146 cs
->data
.conn_raw
.rcvitem
= 0;
148 mutex_unlock(&(cs
->lock
));
150 if (cor_rawsocket_release_trypasssocket(cs
) != 0)
153 mutex_lock(&(cs
->lock
));
154 BUG_ON(cs
->type
!= CS_TYPE_CONN_RAW
);
155 if (cs
->data
.conn_raw
.src_sock
!= 0 &&
156 cs
->data
.conn_raw
.trgt_sock
!= 0) {
157 reset_conn(cs
->data
.conn_raw
.src_sock
);
158 kref_put(&(cs
->data
.conn_raw
.src_sock
->ref
), free_conn
);
159 kref_put(&(cs
->data
.conn_raw
.trgt_sock
->ref
),
162 mutex_unlock(&(cs
->lock
));
168 kref_put(&(cs
->ref
), free_sock
);
173 int cor_rawsocket_bind(struct socket
*sock
, struct sockaddr
*saddr
,
179 int cor_rawsocket_connect(struct socket
*sock
, struct sockaddr
*saddr
,
180 int sockaddr_len
, int flags
)
182 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
184 struct conn
*src_sock
;
186 src_sock
= alloc_conn(GFP_KERNEL
);
188 if (unlikely(src_sock
== 0))
191 src_sock
->is_client
= 1;
193 mutex_lock(&(cs
->lock
));
194 spin_lock_bh(&(src_sock
->rcv_lock
));
195 spin_lock_bh(&(src_sock
->reversedir
->rcv_lock
));
196 if (cs
->type
!= CS_TYPE_UNCONNECTED
) {
197 spin_unlock_bh(&(src_sock
->reversedir
->rcv_lock
));
198 spin_unlock_bh(&(src_sock
->rcv_lock
));
199 mutex_unlock(&(cs
->lock
));
200 reset_conn(src_sock
);
204 conn_init_sock_source(src_sock
);
205 conn_init_sock_target(src_sock
->reversedir
);
207 memset(&(cs
->data
), 0, sizeof(cs
->data
));
208 cs
->type
= CS_TYPE_CONN_RAW
;
209 cs
->data
.conn_raw
.src_sock
= src_sock
;
210 cs
->data
.conn_raw
.trgt_sock
= src_sock
->reversedir
;
211 kref_get(&(src_sock
->ref
));
212 kref_get(&(src_sock
->reversedir
->ref
));
214 src_sock
->source
.sock
.cs
= cs
;
215 src_sock
->reversedir
->target
.sock
.cs
= cs
;
216 kref_get(&(cs
->ref
));
217 kref_get(&(cs
->ref
));
219 spin_unlock_bh(&(src_sock
->reversedir
->rcv_lock
));
220 spin_unlock_bh(&(src_sock
->rcv_lock
));
221 mutex_unlock(&(cs
->lock
));
223 lock_sock(&(cs
->sk
));
224 sock
->state
= SS_CONNECTED
;
225 release_sock(&(cs
->sk
));
230 int cor_rawsocket_accept(struct socket
*sock
, struct socket
*newsock
, int flags
,
236 int cor_rawsocket_listen(struct socket
*sock
, int len
)
241 int cor_rawsocket_shutdown(struct socket
*sock
, int flags
)
246 int cor_rawsocket_ioctl(struct socket
*sock
, unsigned int cmd
,
252 static int cor_rawsocket_setsockopt_passonclose(struct socket
*sock
,
253 char __user
*optval
, unsigned int optlen
)
255 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
261 struct cor_sock
*passto
;
263 if (unlikely(optlen
!= 8))
266 notread
= copy_from_user((char *) &cookie
, optval
, 8);
267 if (unlikely(notread
!= 0))
270 passto
= get_corsock_by_cookie(cookie
);
271 if (unlikely(passto
== 0))
274 mutex_lock(&(cs
->lock
));
275 if (unlikely(cs
->type
!= CS_TYPE_CONN_RAW
)) {
280 BUG_ON(passto
->type
!= CS_TYPE_CONN_MANAGED
);
282 if (unlikely(cs
->data
.conn_raw
.pass_on_close
!= 0))
283 kref_put(&(cs
->data
.conn_raw
.pass_on_close
->ref
), free_sock
);
285 cs
->data
.conn_raw
.pass_on_close
= passto
;
288 mutex_unlock(&(cs
->lock
));
290 if (unlikely(rc
!= 0))
291 kref_put(&(passto
->ref
), free_sock
);
296 int cor_rawsocket_setsockopt(struct socket
*sock
, int level
,
297 int optname
, char __user
*optval
, unsigned int optlen
)
299 if (unlikely(level
!= SOL_COR
)) {
303 if (optname
== COR_PASS_ON_CLOSE
) {
304 return cor_rawsocket_setsockopt_passonclose(sock
, optval
,
311 int cor_rawsocket_getsockopt(struct socket
*sock
, int level
,
312 int optname
, char __user
*optval
, int __user
*optlen
)
317 static unsigned int _cor_rawsocket_poll(struct cor_sock
*cs
, __u32 writelen
,
320 unsigned int mask
= 0;
322 struct conn
*trgt_sock
;
323 struct conn
*src_sock
;
325 mutex_lock(&(cs
->lock
));
328 BUG_ON(cs
->type
!= CS_TYPE_CONN_RAW
);
330 BUG_ON(cs
->type
!= CS_TYPE_UNCONNECTED
&&
331 cs
->type
!= CS_TYPE_CONN_RAW
);
332 if (unlikely(cs
->type
!= CS_TYPE_CONN_RAW
)) {
333 mutex_unlock(&(cs
->lock
));
338 trgt_sock
= cs
->data
.conn_raw
.trgt_sock
;
339 src_sock
= cs
->data
.conn_raw
.src_sock
;
341 if (unlikely(trgt_sock
== 0 || src_sock
== 0)) {
342 mutex_unlock(&(cs
->lock
));
346 spin_lock_bh(&(trgt_sock
->rcv_lock
));
347 if (unlikely(trgt_sock
->isreset
!= 0 ||
348 is_trgt_sock(trgt_sock
, cs
) == 0)) {
350 } else if (cs
->data
.conn_raw
.rcvitem
!= 0 ||
351 trgt_sock
->data_buf
.read_remaining
!= 0) {
352 mask
|= (POLLIN
| POLLRDNORM
);
354 spin_unlock_bh(&(trgt_sock
->rcv_lock
));
356 spin_lock_bh(&(src_sock
->rcv_lock
));
357 if (unlikely(src_sock
->isreset
!= 0 ||
358 is_src_sock(src_sock
, cs
) == 0)) {
360 } else if (cor_sock_sndbufavailable(src_sock
)) {
361 mask
|= (POLLOUT
| POLLWRNORM
);
363 cs
->data
.conn_raw
.snd_delayed_lowbuf
= 1;
365 spin_unlock_bh(&(src_sock
->rcv_lock
));
367 mutex_unlock(&(cs
->lock
));
372 static int ___cor_rawsocket_sendmsg(char *buf
, __u32 bufread
,
373 __u32 buflen
, __u8 flush
, struct cor_sock
*cs_r_l
)
375 struct conn
*src_sock
;
380 BUG_ON(cs_r_l
->type
!= CS_TYPE_CONN_RAW
);
382 src_sock
= cs_r_l
->data
.conn_raw
.src_sock
;
383 if (unlikely(src_sock
== 0)) {
387 spin_lock_bh(&(src_sock
->rcv_lock
));
389 if (unlikely(unlikely(is_src_sock(src_sock
, cs_r_l
) == 0) ||
390 unlikely(src_sock
->isreset
!= 0))) {
391 spin_unlock_bh(&(src_sock
->rcv_lock
));
395 if (cor_sock_sndbufavailable(src_sock
) == 0) {
397 atomic_set(&(cs_r_l
->ready_to_write
), 0);
399 cs_r_l
->data
.conn_raw
.snd_delayed_lowbuf
= 1;
403 BUG_ON(bufread
> (1024 * 1024 * 1024));
404 BUG_ON(buflen
> (1024 * 1024 * 1024));
406 rc2
= receive_buf(src_sock
, buf
, bufread
,
407 cs_r_l
->data
.conn_raw
.snd_delayed_lowbuf
, flush
);
409 BUG_ON(rc2
> (1024 * 1024 * 1024));
410 if (unlikely(rc2
== 0)) {
420 spin_unlock_bh(&(src_sock
->rcv_lock
));
425 static int __cor_rawsocket_sendmsg(struct msghdr
*msg
, __u32 totallen
,
426 __u8 flush
, struct cor_sock
*cs_r_l
)
430 __u32 buflen
= buf_optlen(totallen
);
431 __u32 len
= totallen
;
435 BUG_ON(totallen
> (1024 * 1024 * 1024));
436 BUG_ON(buflen
> (1024 * 1024 * 1024));
443 if (unlikely(len
<= 0))
446 buf
= kmalloc(buflen
, GFP_KERNEL
);
447 if (unlikely(buf
== 0))
450 memset(buf
, 0, buflen
);
452 st_rc
= copy_from_iter(buf
+ bufread
, len
, &(msg
->msg_iter
));
454 if (unlikely(st_rc
!= len
)) {
459 rc
= ___cor_rawsocket_sendmsg(buf
, len
, buflen
, flush
, cs_r_l
);
466 static int _cor_rawsocket_sendmsg(struct msghdr
*msg
, __u32 totallen
,
467 struct cor_sock
*cs
, __u8 flush
)
471 BUG_ON(totallen
> (1024 * 1024 * 1024));
473 mutex_lock(&(cs
->lock
));
475 BUG_ON(cs
->type
!= CS_TYPE_UNCONNECTED
&& cs
->type
!= CS_TYPE_CONN_RAW
);
476 if (unlikely(cs
->type
== CS_TYPE_UNCONNECTED
)) {
477 mutex_unlock(&(cs
->lock
));
479 } else if (unlikely(cs
->type
!= CS_TYPE_CONN_RAW
)) {
480 mutex_unlock(&(cs
->lock
));
484 copied
= __cor_rawsocket_sendmsg(msg
, totallen
, flush
, cs
);
485 BUG_ON(copied
> 0 && ((__u32
) copied
) > totallen
);
487 if (copied
> 0 && copied
== totallen
)
488 cs
->data
.conn_raw
.snd_delayed_lowbuf
= 0;
490 mutex_unlock(&(cs
->lock
));
495 int cor_rawsocket_sendmsg(struct socket
*sock
, struct msghdr
*msg
,
498 __u8 flush
= ((msg
->msg_flags
& MSG_MORE
) == 0) ? 1 : 0;
499 int blocking
= (msg
->msg_flags
& MSG_DONTWAIT
) == 0;
504 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
506 __u32 max
= (1024 * 1024 * 1024);
509 totallen
= total_len
;
510 if (unlikely(totallen
> max
|| total_len
> max
)) {
515 while (rc
>= 0 && copied
< totallen
) {
516 rc
= _cor_rawsocket_sendmsg(msg
, totallen
, cs
, flush
);
518 BUG_ON(rc
> 0 && unlikely((rc
> total_len
|| rc
> totallen
)));
520 if (rc
== -EAGAIN
&& blocking
&& copied
== 0) {
523 waitret
= wait_event_interruptible_timeout(
524 *sk_sleep(&(cs
->sk
)),
525 atomic_read(&(cs
->ready_to_write
)) != 0,
528 if (unlikely(waitret
< 0))
529 rc
= sock_intr_errno(cs
->sk
.sk_sndtimeo
);
530 else if (unlikely(waitret
== 0))
536 if (rc
> 0 || copied
== 0)
538 if (unlikely(rc
== -EFAULT
))
541 BUG_ON(copied
> 0 && ((__u32
) copied
> totallen
));
547 static int __cor_rawsocket_recvmsg(struct msghdr
*msg
, __u32 totallen
,
550 struct data_buf_item
*dbi
= cs
->data
.conn_raw
.rcvitem
;
556 BUG_ON(totallen
> (1024 * 1024 * 1024));
561 BUG_ON(dbi
->datalen
<= cs
->data
.conn_raw
.rcvoffset
);
564 if (len
> (dbi
->datalen
- cs
->data
.conn_raw
.rcvoffset
))
565 len
= dbi
->datalen
- cs
->data
.conn_raw
.rcvoffset
;
567 if (unlikely(len
<= 0))
570 st_rc
= copy_to_iter(dbi
->buf
+ cs
->data
.conn_raw
.rcvoffset
, len
,
573 if (unlikely(st_rc
!= len
))
577 cs
->data
.conn_raw
.rcvoffset
+= len
;
578 if (dbi
->datalen
== cs
->data
.conn_raw
.rcvoffset
) {
579 databuf_item_free(cs
->data
.conn_raw
.rcvitem
);
580 cs
->data
.conn_raw
.rcvitem
= 0;
581 cs
->data
.conn_raw
.rcvoffset
= 0;
584 BUG_ON(written
> totallen
);
589 static int _cor_rawsocket_recvmsg(struct msghdr
*msg
, __u32 totallen
,
590 struct cor_sock
*cs_r
)
595 struct conn
*trgt_sock
;
597 mutex_lock(&(cs_r
->lock
));
599 BUG_ON(cs_r
->type
!= CS_TYPE_CONN_RAW
);
601 trgt_sock
= cs_r
->data
.conn_raw
.trgt_sock
;
603 if (unlikely(cs_r
->data
.conn_raw
.src_sock
== 0 || trgt_sock
== 0)) {
604 mutex_unlock(&(cs_r
->lock
));
608 kref_get(&(trgt_sock
->ref
));
610 while (rc
>= 0 && copied
< totallen
) {
611 if (cs_r
->data
.conn_raw
.rcvitem
!= 0)
614 spin_lock_bh(&(trgt_sock
->rcv_lock
));
615 if (unlikely(unlikely(is_trgt_sock(trgt_sock
, cs_r
) == 0) |
616 unlikely(trgt_sock
->isreset
!= 0))) {
617 spin_unlock_bh(&(trgt_sock
->rcv_lock
));
618 mutex_unlock(&(cs_r
->lock
));
622 databuf_pull_dbi(cs_r
, trgt_sock
);
623 if (cs_r
->data
.conn_raw
.rcvitem
== 0)
624 atomic_set(&(cs_r
->ready_to_read
), 0);
626 spin_unlock_bh(&(trgt_sock
->rcv_lock
));
629 rc
= __cor_rawsocket_recvmsg(msg
, totallen
- copied
, cs_r
);
631 if (rc
> 0 || copied
== 0)
633 if (unlikely(rc
== -EFAULT
))
636 BUG_ON(copied
> 0 && ((__u32
) copied
> totallen
));
639 mutex_unlock(&(cs_r
->lock
));
641 if (likely(copied
> 0))
642 wake_sender(trgt_sock
);
644 kref_put(&(trgt_sock
->ref
), free_conn
);
649 int cor_rawsocket_recvmsg(struct socket
*sock
, struct msghdr
*msg
,
650 size_t total_len
, int flags
)
652 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
654 int blocking
= (flags
& MSG_DONTWAIT
) == 0;
657 __u32 max
= (1024 * 1024 * 1024);
660 totallen
= total_len
;
661 if (unlikely(totallen
> max
|| total_len
> max
))
664 if (unlikely((flags
& MSG_PEEK
) != 0))
667 mutex_lock(&(cs
->lock
));
668 BUG_ON(cs
->type
!= CS_TYPE_UNCONNECTED
&& cs
->type
!= CS_TYPE_CONN_RAW
);
669 if (unlikely(cs
->type
== CS_TYPE_UNCONNECTED
)) {
670 mutex_unlock(&(cs
->lock
));
672 } else if (unlikely(cs
->type
!= CS_TYPE_CONN_RAW
)) {
673 mutex_unlock(&(cs
->lock
));
676 mutex_unlock(&(cs
->lock
));
679 rc
= _cor_rawsocket_recvmsg(msg
, totallen
, cs
);
681 BUG_ON(rc
> 0 && unlikely((rc
> total_len
|| rc
> totallen
)));
683 if (rc
== -EAGAIN
&& blocking
) {
684 if (wait_event_interruptible(*sk_sleep(&(cs
->sk
)),
685 atomic_read(&(cs
->ready_to_read
)) != 0) == 0)
693 static unsigned int cor_rawsocket_poll(struct file
*file
, struct socket
*sock
,
696 struct cor_sock
*cs
= (struct cor_sock
*) sock
->sk
;
697 sock_poll_wait(file
, sock
, wait
);
698 return _cor_rawsocket_poll(cs
, U32_MAX
, 1);
702 const struct proto_ops cor_raw_proto_ops
= {
704 .owner
= THIS_MODULE
,
705 .release
= cor_rawsocket_release
,
706 .bind
= cor_rawsocket_bind
,
707 .connect
= cor_rawsocket_connect
,
708 .accept
= cor_rawsocket_accept
,
709 .listen
= cor_rawsocket_listen
,
710 .shutdown
= cor_rawsocket_shutdown
,
711 .ioctl
= cor_rawsocket_ioctl
,
712 .setsockopt
= cor_rawsocket_setsockopt
,
713 .getsockopt
= cor_rawsocket_getsockopt
,
715 .combat_ioctl
= cor_rawsocket_ioctl
,
716 .compat_setsockopt
= cor_rawsocket_setsockopt
,
717 .compat_getsockopt
= cor_rawsocket_getsockopt
,
719 .sendmsg
= cor_rawsocket_sendmsg
,
720 .recvmsg
= cor_rawsocket_recvmsg
,
721 .poll
= cor_rawsocket_poll
,
722 .socketpair
= cor_socket_socketpair
,
723 .getname
= cor_socket_getname
,
724 .mmap
= cor_socket_mmap
,
726 /* sendpage, splice_read, are optional */
729 int cor_create_raw_sock(struct net
*net
, struct socket
*sock
, int protocol
,
732 int rc
= _cor_createsock(net
, sock
, protocol
, kern
);
737 sock
->ops
= &cor_raw_proto_ops
;