2 * libvdeplug - A library to connect to a VDE Switch.
3 * Copyright (C) 2006 Renzo Davoli, University of Bologna
4 * (c) 2010 Renzo Davoli - stream + point2point
5 * (c) 2011 Renzo Davoli - udpconnect
7 * This library is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation version 2.1 of the License, or (at
10 * your option) any later version.
12 * This library is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
15 * General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23 #include <sys/socket.h>
31 #include <sys/types.h>
37 #include <netinet/in.h>
41 #include <vdecommon.h>
43 #include <libvdeplug.h>
46 /* Per-User standard switch definition */
47 /* This will be prefixed by getenv("HOME") */
48 /* it can be a symbolic link to the switch dir */
49 #define STDSWITCH "/.vde2/default.switch"
50 /* deprecated old name */
51 #define STDSOCK "/.vde2/stdsock"
55 /* AF_IPN has not been officially assigned yet
56 we "steal" unused AF_NETBEUI in the meanwhile
57 this code will be uncommented when AF_IPN is assigned. */
59 #define AF_IPN 0 /* IPN sockets: */
65 #define AF_NETBEUI PF_NETBEUI
70 #define AF_IPN_STOLEN AF_NETBEUI /* IPN temporary sockets */
71 #define PF_IPN_STOLEN AF_IPN_STOLEN
75 #define IPN_SO_DESCR 1
79 #define MIN(X,Y) (((X)<(Y))?(X):(Y))
82 /* Fallback names for the control socket, NULL-terminated array of absolute
84 char *fallback_sockname
[] = {
85 "/var/run/vde.ctl/ctl",
91 /* Fallback directories for the data socket, NULL-terminated array of absolute
92 * directory names, with no trailing /. */
93 const char *fallback_dirname
[] = {
105 struct sockaddr
*outsock
;
108 #define SWITCH_MAGIC 0xfeedface
110 #define VDEFLAG_P2P_SOCKET 1
111 #define VDEFLAG_UDP_SOCKET 2
112 #define VDEFLAG_P2P (VDEFLAG_P2P_SOCKET | VDEFLAG_UDP_SOCKET)
114 enum request_type
{ REQ_NEW_CONTROL
, REQ_NEW_PORT0
};
119 enum request_type type
;
120 struct sockaddr_un sock
;
121 char description
[MAXDESCR
];
122 } __attribute__((packed
));
124 VDECONN
*vde_open_real(char *given_sockname
, char *descr
,int interface_version
,
125 struct vde_open_args
*open_args
)
127 struct vdeconn
*conn
=NULL
;
128 struct passwd
*callerpwd
;
129 struct request_v3 req
;
137 char *std_sockname
=NULL
;
138 char *real_sockname
=NULL
;
140 char *ssh_client
= getenv("SSH_CLIENT");
143 if (open_args
!= NULL
) {
144 if (interface_version
== 1) {
145 port
=open_args
->port
;
146 group
=open_args
->group
;
147 mode
=open_args
->mode
;
149 flags
|= VDEFLAG_P2P_SOCKET
;
157 memset(&req
, 0, sizeof(req
));
158 if ((std_sockname
=(char *)calloc(PATH_MAX
,sizeof(char)))==NULL
) {
162 if ((real_sockname
=(char *)calloc(PATH_MAX
,sizeof(char)))==NULL
) {
166 sockname
= real_sockname
;
167 if ((conn
=calloc(1,sizeof(struct vdeconn
)))==NULL
)
172 conn
->fdctl
=conn
->fddata
=-1;
174 callerpwd
=getpwuid(getuid());
175 req
.type
= REQ_NEW_CONTROL
;
176 if (given_sockname
== NULL
|| *given_sockname
== '\0') {
177 char *homedir
= getenv("HOME");
178 given_sockname
= NULL
;
181 snprintf(std_sockname
, PATH_MAX
, "%s%s", homedir
, STDSWITCH
);
182 if (lstat(std_sockname
,&statbuf
)==0)
183 given_sockname
= std_sockname
;
185 snprintf(std_sockname
, PATH_MAX
, "%s%s", homedir
, STDSOCK
);
186 if (lstat(std_sockname
,&statbuf
)==0)
187 given_sockname
= std_sockname
;
192 if((split
= strstr(given_sockname
,"->")) != NULL
&& strrchr(split
,':') != NULL
)
193 flags
|= VDEFLAG_UDP_SOCKET
;
194 else if(given_sockname
[strlen(given_sockname
)-1] == ']'
195 && (split
=strrchr(given_sockname
,'[')) != NULL
) {
200 flags
|= VDEFLAG_P2P_SOCKET
;
202 req
.type
= REQ_NEW_PORT0
;
203 if (*given_sockname
==0)
204 given_sockname
= NULL
;
208 /* Canonicalize the sockname: we need to send an absolute pathname to the
209 * switch (we don't know its cwd) for the data socket. Appending
210 * given_sockname to getcwd() would be enough, but we could end up with a
211 * name longer than PATH_MAX that couldn't be used as sun_path. */
212 if (given_sockname
&& !(flags
& VDEFLAG_P2P
) &&
213 vde_realpath(given_sockname
, real_sockname
) == NULL
)
218 /* AF_IPN has not been officially assigned yet
219 we "steal" unused AF_NETBEUI in the meanwhile
220 this code will be uncommented when AF_IPN is assigned. */
221 if((conn
->fddata
= socket(AF_IPN
,SOCK_RAW
,IPN_ANY
)) >= 0) {
222 /* IPN service exists */
223 sockun
.sun_family
= AF_IPN
;
226 if((flags
& VDEFLAG_P2P
) == 0 &&
227 (conn
->fddata
= socket(AF_IPN_STOLEN
,SOCK_RAW
,IPN_ANY
)) >= 0) {
228 struct sockaddr_un sockun
;
229 memset(&sockun
, 0, sizeof(sockun
));
230 /* IPN_STOLEN service exists */
231 sockun
.sun_family
= AF_IPN_STOLEN
;
232 if (port
!= 0 || req
.type
== REQ_NEW_PORT0
)
233 setsockopt(conn
->fddata
,0,IPN_SO_PORT
,&port
,sizeof(port
));
234 /* If we're given a sockname, just try it */
237 snprintf(sockun
.sun_path
, sizeof(sockun
.sun_path
), "%s", sockname
);
238 res
= connect(conn
->fddata
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
240 /* Else try all the fallback socknames, one by one */
244 for (i
= 0, res
= -1; fallback_sockname
[i
] && (res
!= 0); i
++)
246 snprintf(sockun
.sun_path
, sizeof(sockun
.sun_path
), "%s", fallback_sockname
[i
]);
247 res
= connect(conn
->fddata
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
251 /* If one of the connect succeeded, we're done */
254 int descrlen
=snprintf(req
.description
,MAXDESCR
,"%s user=%s PID=%d",
255 descr
,(callerpwd
!= NULL
)?callerpwd
->pw_name
:"??",
258 char *endofip
=strchr(ssh_client
,' ');
259 if (endofip
) *endofip
=0;
260 snprintf(req
.description
+descrlen
,MAXDESCR
-descrlen
,
261 " SSH=%s", ssh_client
);
262 if (endofip
) *endofip
=' ';
264 setsockopt(conn
->fddata
,0,IPN_SO_DESCR
,req
.description
,
265 strlen(req
.description
+1));
273 if (flags
& VDEFLAG_UDP_SOCKET
) {
274 struct addrinfo hints
;
275 struct addrinfo
*result
,*rp
;
277 char *dst
=strstr(given_sockname
,"->");
278 char *src
=given_sockname
;
281 memset(&hints
,0,sizeof(hints
));
282 hints
.ai_socktype
=SOCK_DGRAM
;
285 dstport
=strrchr(dst
,':');
292 srcport
=strrchr(src
,':');
297 //fprintf(stderr,"UDP!%s:%s -> %s:%s \n",src,srcport,dst,dstport);
298 hints
.ai_flags
= AI_PASSIVE
;
299 s
= getaddrinfo(src
, srcport
, &hints
, &result
);
306 for (rp
= result
; rp
!= NULL
; rp
= rp
->ai_next
) {
307 conn
->fddata
= socket(rp
->ai_family
, rp
->ai_socktype
, rp
->ai_protocol
);
308 if (conn
->fddata
== -1)
311 if (bind(conn
->fddata
, rp
->ai_addr
, rp
->ai_addrlen
) == 0)
322 freeaddrinfo(result
);
325 s
= getaddrinfo(dst
, dstport
, &hints
, &result
);
331 /* for now it takes the first */
332 conn
->outlen
= result
->ai_addrlen
;
333 conn
->outsock
= malloc(result
->ai_addrlen
);
334 memcpy(conn
->outsock
, result
->ai_addr
, result
->ai_addrlen
);
336 freeaddrinfo(result
);
340 /* define a female socket for point2point connection */
341 if (flags
& VDEFLAG_P2P_SOCKET
) {
342 struct stat sockstat
;
343 struct sockaddr_un sockun
;
344 struct sockaddr_un
*sockout
;
345 memset(&sockun
, 0, sizeof(sockun
));
346 if(given_sockname
== NULL
) {
350 strcpy(sockname
,given_sockname
); /* XXX canonicalize should be better */
351 if((conn
->fddata
= socket(AF_UNIX
, SOCK_DGRAM
, 0)) < 0)
353 sockun
.sun_family
= AF_UNIX
;
354 memset(sockun
.sun_path
,0,sizeof(sockun
.sun_path
));
355 snprintf(sockun
.sun_path
, sizeof(sockun
.sun_path
)-1, "%s", sockname
);
356 /* the socket already exists */
357 if(stat(sockun
.sun_path
,&sockstat
) == 0) {
358 if (S_ISSOCK(sockstat
.st_mode
)) {
359 /* the socket is already in use */
360 res
= connect(conn
->fddata
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
365 if (errno
== ECONNREFUSED
)
366 unlink(sockun
.sun_path
);
369 res
= bind(conn
->fddata
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
372 conn
->inpath
=strdup(sockun
.sun_path
);
373 conn
->outlen
= sizeof(struct sockaddr_un
);
374 conn
->outsock
= (struct sockaddr
*) (sockout
= calloc(1,sizeof(struct sockaddr_un
)));
375 if (conn
->outsock
==NULL
)
377 sockout
->sun_family
= AF_UNIX
;
378 snprintf(sockout
->sun_path
, sizeof(sockun
.sun_path
), "%s+", sockname
);
382 if ((gs
=getgrnam(group
)) == NULL
)
386 chown(sockun
.sun_path
,-1,gid
);
388 chmod(sockun
.sun_path
,mode
);
391 struct sockaddr_un sockun
;
392 struct sockaddr_un dataout
;
393 memset(&sockun
, 0, sizeof(sockun
));
394 memset(&dataout
, 0, sizeof(dataout
));
396 /* connection to a vde_switch */
397 if((conn
->fdctl
= socket(AF_UNIX
, SOCK_STREAM
, 0)) < 0)
399 if((conn
->fddata
= socket(AF_UNIX
, SOCK_DGRAM
, 0)) < 0)
401 sockun
.sun_family
= AF_UNIX
;
403 /* If we're given a sockname, just try it (remember: sockname is the
404 * canonicalized version of given_sockname - though we don't strictly need
405 * the canonicalized versiono here). sockname should be the name of a
406 * *directory* which contains the control socket, named ctl. Older
407 * versions of VDE used a socket instead of a directory (so an additional
408 * attempt with %s instead of %s/ctl could be made), but they should
409 * really not be used anymore. */
412 snprintf(sockun
.sun_path
, sizeof(sockun
.sun_path
), "%s/ctl", sockname
);
413 res
= connect(conn
->fdctl
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
415 /* Else try all the fallback socknames, one by one */
419 for (i
= 0, res
= -1; fallback_sockname
[i
] && (res
!= 0); i
++)
421 /* Remember sockname for the data socket directory */
422 sockname
= fallback_sockname
[i
];
423 snprintf(sockun
.sun_path
, sizeof(sockun
.sun_path
), "%s", sockname
);
424 res
= connect(conn
->fdctl
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
429 struct stat sockstat
;
430 /* define a male plug for point2point connection */
433 snprintf(sockun
.sun_path
, sizeof(sockun
.sun_path
), "%s", sockname
);
434 res
= connect(conn
->fddata
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
437 snprintf(sockun
.sun_path
, sizeof(sockun
.sun_path
), "%s+", sockname
);
438 if(stat(sockun
.sun_path
,&sockstat
) == 0) {
439 if (S_ISSOCK(sockstat
.st_mode
)) {
440 /* the socket is already in use */
441 res
= connect(conn
->fddata
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
446 if (errno
== ECONNREFUSED
)
447 unlink(sockun
.sun_path
);
450 res
= bind(conn
->fddata
, (struct sockaddr
*) &sockun
, sizeof(sockun
));
453 conn
->inpath
=strdup(sockun
.sun_path
);
457 if ((gs
=getgrnam(group
)) == NULL
)
461 chown(sockun
.sun_path
,-1,gid
);
463 chmod(sockun
.sun_path
,mode
);
469 req
.magic
=SWITCH_MAGIC
;
471 req
.type
=req
.type
+(port
<< 8);
472 req
.sock
.sun_family
=AF_UNIX
;
474 /* First choice, store the return socket from the switch in the control
475 * dir. We assume that given_sockname (hence sockname) is a directory.
476 * Should be a safe assumption unless someone modifies the previous group
477 * of connect() attempts (see the comments above for more information). */
478 memset(req
.sock
.sun_path
, 0, sizeof(req
.sock
.sun_path
));
481 /* Here sockname is the last successful one in the previous step. */
482 sprintf(req
.sock
.sun_path
, "%s/.%05d-%05d", sockname
, pid
, sockno
++);
483 res
=bind(conn
->fddata
, (struct sockaddr
*) &req
.sock
, sizeof (req
.sock
));
485 while (res
< 0 && errno
== EADDRINUSE
);
487 /* It didn't work. So we cycle on the fallback directories until we find a
488 * suitable one (or the list ends). */
492 for (i
= 0, res
= -1; fallback_dirname
[i
] && (res
!= 0); i
++)
494 memset(req
.sock
.sun_path
, 0, sizeof(req
.sock
.sun_path
));
497 sprintf(req
.sock
.sun_path
, "%s/vde.%05d-%05d", fallback_dirname
[i
], pid
, sockno
++);
498 res
= bind(conn
->fddata
, (struct sockaddr
*) &req
.sock
, sizeof (req
.sock
));
500 while (res
< 0 && errno
== EADDRINUSE
);
504 /* Nothing worked, so cleanup and return with an error. */
508 conn
->inpath
=strdup(req
.sock
.sun_path
);
513 if ((gs
=getgrnam(group
)) == NULL
)
517 chown(req
.sock
.sun_path
,-1,gid
);
519 /* when group is not defined, set permission for the reverse channel */
521 /* if no permission gets "voluntarily" granted to the socket */
522 if ((mode
& 077) == 0) {
523 if (stat(sockun
.sun_path
, &ctlstat
) == 0) {
524 /* if the switch is owned by root or by the same user it should
526 if (ctlstat
.st_uid
!= 0 && ctlstat
.st_uid
!= geteuid()) {
527 /* try to change the group ownership to the same of the switch */
528 /* this call succeeds if the vde user and the owner of the switch
529 belong to the group */
530 if (chown(req
.sock
.sun_path
,-1,ctlstat
.st_gid
) == 0)
538 chmod(req
.sock
.sun_path
,mode
);
540 #ifdef DESCR_INCLUDE_SOCK
541 descrlen
=snprintf(req
.description
,MAXDESCR
,"%s user=%s PID=%d SOCK=%s",
542 descr
,(callerpwd
!= NULL
)?callerpwd
->pw_name
:"??",
543 pid
,req
.sock
.sun_path
);
545 descrlen
=snprintf(req
.description
,MAXDESCR
,"%s user=%s PID=%d",
546 descr
,(callerpwd
!= NULL
)?callerpwd
->pw_name
:"??", pid
);
550 char *endofip
=strchr(ssh_client
,' ');
551 if (endofip
) *endofip
=0;
552 snprintf(req
.description
+descrlen
,MAXDESCR
-descrlen
," SSH=%s", ssh_client
);
553 if (endofip
) *endofip
=' ';
557 if (send(conn
->fdctl
,&req
,sizeof(req
)-MAXDESCR
+strlen(req
.description
),0)<0)
560 if (recv(conn
->fdctl
,&dataout
,sizeof(struct sockaddr_un
),0)<0)
563 if (connect(conn
->fddata
,(struct sockaddr
*)&dataout
,sizeof(struct sockaddr_un
))<0)
566 chmod(dataout
.sun_path
,mode
);
575 if (conn
->fdctl
>= 0)
577 if (conn
->fddata
>= 0)
579 if (conn
->inpath
!= NULL
)
580 unlink(conn
->inpath
);
581 if (conn
->outsock
!= NULL
)
591 if (std_sockname
) free(std_sockname
);
592 if (real_sockname
) free(real_sockname
);
598 ssize_t
vde_recv(VDECONN
*conn
,void *buf
,size_t len
,int flags
)
602 if (__builtin_expect(conn
!=0,1)) {
603 if (__builtin_expect(((retval
=recv(conn
->fddata
,buf
,len
,0)) > 0), 1))
606 if (retval
== 0 && conn
->outsock
!= NULL
) {
607 static struct sockaddr unspec
={AF_UNSPEC
};
608 connect(conn
->fddata
,&unspec
,sizeof(unspec
));
618 if (__builtin_expect(conn
!=0,1))
619 return recv(conn
->fddata
,buf
,len
,0);
627 ssize_t
vde_send(VDECONN
*conn
,const void *buf
,size_t len
,int flags
)
630 if (__builtin_expect(conn
!=0,1)) {
632 if (__builtin_expect(((retval
=send(conn
->fddata
,buf
,len
,0)) >= 0),1))
635 if (__builtin_expect(errno
== ENOTCONN
|| errno
== EDESTADDRREQ
,0)) {
636 if (__builtin_expect(conn
->outsock
!= NULL
,1)) {
637 connect(conn
->fddata
, conn
->outsock
,conn
->outlen
);
638 return send(conn
->fddata
,buf
,len
,0);
649 if (__builtin_expect(conn
!=0,1)) {
650 if (__builtin_expect(conn
->outsock
== NULL
,1))
651 return send(conn
->fddata
,buf
,len
,0);
653 return sendto(conn
->fddata
,buf
,len
,0,
654 conn
->outsock
,conn
->outlen
);
662 int vde_datafd(VDECONN
*conn
)
664 if (__builtin_expect(conn
!=0,1))
672 int vde_ctlfd(VDECONN
*conn
)
674 if (__builtin_expect(conn
!=0,1))
682 int vde_close(VDECONN
*conn
)
684 if (__builtin_expect(conn
!=0,1)) {
686 send(conn
->fddata
,NULL
,0,0);
688 if (conn
->inpath
!= NULL
)
689 unlink(conn
->inpath
);
690 if (conn
->outsock
!= NULL
)
704 #define MAXPACKET 1521
709 ssize_t (*frecv
)(void *opaque
, void *buf
, size_t count
);
710 void (*ferr
)(void *opaque
, int type
, char *format
, ...);
711 char fragment
[MAXPACKET
];
713 unsigned int rnx
,remaining
;
716 VDESTREAM
*vdestream_open(void *opaque
,
718 ssize_t (*frecv
)(void *opaque
, void *buf
, size_t count
),
719 void (*ferr
)(void *opaque
, int type
, char *format
, ...)
722 VDESTREAM
*vdestream
;
723 if ((vdestream
=calloc(1,sizeof(struct vdestream
)))==NULL
) {
727 vdestream
->opaque
=opaque
;
728 vdestream
->fdout
=fdout
;
729 vdestream
->frecv
=frecv
;
730 vdestream
->ferr
=ferr
;
735 ssize_t
vdestream_send(VDESTREAM
*vdestream
, const void *buf
, size_t len
)
737 if (len
<= MAXPACKET
) {
738 unsigned char header
[2];
739 struct iovec iov
[2]={{header
,2},{(void *)buf
,len
}};
741 header
[1]=len
& 0xff;
742 return writev(vdestream
->fdout
,iov
,2);
747 void vdestream_recv(VDESTREAM
*vdestream
, unsigned char *buf
, size_t len
)
749 //fprintf(stderr,"%s: splitpacket rnx=%d remaining=%d size=%d\n",myname,rnx,vdestream->remaining,len);
751 if (vdestream
->rnx
>0) {
752 register int amount
=MIN(vdestream
->remaining
,len
);
753 //fprintf(stderr,"%s: fragment amount %d\n",myname,amount);
754 memcpy(vdestream
->fragp
,buf
,amount
);
755 vdestream
->remaining
-=amount
;
756 vdestream
->fragp
+=amount
;
759 if (vdestream
->remaining
==0) {
760 //fprintf(stderr,"%s: delivered defrag %d\n",myname,vdestream->rnx);
761 vdestream
->frecv(vdestream
->opaque
,vdestream
->fragment
,vdestream
->rnx
);
766 vdestream
->rnx
=(buf
[0]<<8)+buf
[1];
768 //fprintf(stderr,"%s %d: packet %d size %d %x %x\n",myname,getpid(),vdestream->rnx,len,buf[0],buf[1]);
770 if (vdestream
->rnx
== 0)
772 if (vdestream
->rnx
> MAXPACKET
) {
773 if (vdestream
->ferr
!= NULL
)
774 vdestream
->ferr(vdestream
->opaque
,PACKET_LENGTH_ERROR
,
775 "size %d expected size %d",len
,vdestream
->rnx
);
779 if (vdestream
->rnx
> len
) {
780 //fprintf(stderr,"%s: begin defrag %d\n",myname,vdestream->rnx);
781 vdestream
->fragp
=vdestream
->fragment
;
782 memcpy(vdestream
->fragp
,buf
,len
);
783 vdestream
->remaining
=vdestream
->rnx
-len
;
784 vdestream
->fragp
+=len
;
787 //fprintf(stderr,"%s: deliver %d\n",myname,vdestream->rnx);
788 vdestream
->frecv(vdestream
->opaque
,buf
,vdestream
->rnx
);
796 void vdestream_close(VDESTREAM
*vdestream
)