1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
24 // CancelIo Windows API cancels all outstanding IO for a particular
25 // socket on current thread. To overcome that limitation, we run
26 // special goroutine, locked to OS single thread, that both starts
27 // and cancels IO. It means, there are 2 unavoidable thread switches
29 // Some newer versions of Windows has new CancelIoEx API, that does
30 // not have that limitation and can be used from any thread. This
31 // package uses CancelIoEx API, if present, otherwise it fallback
35 canCancelIO
bool // determines if CancelIoEx API is present
37 hasLoadSetFileCompletionNotificationModes
bool
42 e
:= syscall
.WSAStartup(uint32(0x202), &d
)
46 canCancelIO
= syscall
.LoadCancelIoEx() == nil
47 hasLoadSetFileCompletionNotificationModes
= syscall
.LoadSetFileCompletionNotificationModes() == nil
48 if hasLoadSetFileCompletionNotificationModes
{
49 // It's not safe to use FILE_SKIP_COMPLETION_PORT_ON_SUCCESS if non IFS providers are installed:
50 // http://support.microsoft.com/kb/2568167
52 protos
:= [2]int32{syscall
.IPPROTO_TCP
, 0}
53 var buf
[32]syscall
.WSAProtocolInfo
54 len := uint32(unsafe
.Sizeof(buf
))
55 n
, err
:= syscall
.WSAEnumProtocols(&protos
[0], &buf
[0], &len)
59 for i
:= int32(0); i
< n
; i
++ {
60 if buf
[i
].ServiceFlags1
&syscall
.XP1_IFS_HANDLES
== 0 {
69 // operation contains superset of data necessary to perform all async IO.
70 type operation
struct {
71 // Used by IOCP interface, it must be first field
72 // of the struct, as our code rely on it.
75 // fields used by runtime.netpoll
81 // fields used only by net package
86 rsa
*syscall
.RawSockaddrAny
93 func (o
*operation
) InitBuf(buf
[]byte) {
94 o
.buf
.Len
= uint32(len(buf
))
101 func (o
*operation
) InitBufs(buf
*[][]byte) {
103 o
.bufs
= make([]syscall
.WSABuf
, 0, len(*buf
))
107 for _
, b
:= range *buf
{
112 o
.bufs
= append(o
.bufs
, syscall
.WSABuf
{Len
: uint32(len(b
)), Buf
: p
})
116 // ClearBufs clears all pointers to Buffers parameter captured
117 // by InitBufs, so it can be released by garbage collector.
118 func (o
*operation
) ClearBufs() {
119 for i
:= range o
.bufs
{
125 // ioSrv executes net IO requests.
130 type ioSrvReq
struct {
132 submit
func(o
*operation
) error
// if nil, cancel the operation
135 // ProcessRemoteIO will execute submit IO requests on behalf
136 // of other goroutines, all on a single os thread, so it can
137 // cancel them later. Results of all operations will be sent
138 // back to their requesters via channel supplied in request.
139 // It is used only when the CancelIoEx API is unavailable.
140 func (s
*ioSrv
) ProcessRemoteIO() {
141 runtime
.LockOSThread()
142 defer runtime
.UnlockOSThread()
143 for r
:= range s
.req
{
145 r
.o
.errc
<- r
.submit(r
.o
)
147 r
.o
.errc
<- syscall
.CancelIo(r
.o
.fd
.Sysfd
)
152 // ExecIO executes a single IO operation o. It submits and cancels
153 // IO in the current thread for systems where Windows CancelIoEx API
154 // is available. Alternatively, it passes the request onto
155 // runtime netpoll and waits for completion or cancels request.
156 func (s
*ioSrv
) ExecIO(o
*operation
, submit
func(o
*operation
) error
) (int, error
) {
157 if o
.fd
.pd
.runtimeCtx
== 0 {
158 return 0, errors
.New("internal error: polling on unsupported descriptor type")
162 onceStartServer
.Do(startServer
)
166 // Notify runtime netpoll about starting IO.
167 err
:= fd
.pd
.prepare(int(o
.mode
), fd
.isFile
)
175 // Send request to a special dedicated thread,
176 // so it can stop the IO with CancelIO later.
177 s
.req
<- ioSrvReq
{o
, submit
}
182 // IO completed immediately
183 if o
.fd
.skipSyncNotif
{
184 // No completion message will follow, so return immediately.
185 return int(o
.qty
), nil
187 // Need to get our completion message anyway.
188 case syscall
.ERROR_IO_PENDING
:
189 // IO started, and we have to wait for its completion.
194 // Wait for our request to complete.
195 err
= fd
.pd
.wait(int(o
.mode
), fd
.isFile
)
197 // All is good. Extract our IO results and return.
199 err
= syscall
.Errno(o
.errno
)
202 return int(o
.qty
), nil
204 // IO is interrupted by "close" or "timeout"
207 case ErrNetClosing
, ErrFileClosing
, ErrTimeout
:
208 // will deal with those.
210 panic("unexpected runtime.netpoll error: " + netpollErr
.Error())
212 // Cancel our request.
214 err
:= syscall
.CancelIoEx(fd
.Sysfd
, &o
.o
)
215 // Assuming ERROR_NOT_FOUND is returned, if IO is completed.
216 if err
!= nil && err
!= syscall
.ERROR_NOT_FOUND
{
217 // TODO(brainman): maybe do something else, but panic.
221 s
.req
<- ioSrvReq
{o
, nil}
224 // Wait for cancelation to complete.
225 fd
.pd
.waitCanceled(int(o
.mode
))
227 err
= syscall
.Errno(o
.errno
)
228 if err
== syscall
.ERROR_OPERATION_ABORTED
{ // IO Canceled
233 // We issued a cancelation request. But, it seems, IO operation succeeded
234 // before the cancelation request run. We need to treat the IO operation as
235 // succeeded (the bytes are actually sent/recv from network).
236 return int(o
.qty
), nil
239 // Start helper goroutines.
241 var onceStartServer sync
.Once
244 // This is called, once, when only the CancelIo API is available.
245 // Start two special goroutines, both locked to an OS thread,
246 // that start and cancel IO requests.
247 // One will process read requests, while the other will do writes.
248 rsrv
.req
= make(chan ioSrvReq
)
249 go rsrv
.ProcessRemoteIO()
250 wsrv
.req
= make(chan ioSrvReq
)
251 go wsrv
.ProcessRemoteIO()
254 // FD is a file descriptor. The net and os packages embed this type in
255 // a larger type representing a network connection or OS file.
257 // Lock sysfd and serialize access to Read and Write methods.
260 // System file descriptor. Immutable until Close.
271 // Used to implement pread/pwrite.
276 lastbits
[]byte // first few bytes of the last incomplete rune in last write
277 readuint16
[]uint16 // buffer to hold uint16s obtained with ReadConsole
278 readbyte
[]byte // buffer to hold decoding of readuint16 from utf16 to utf8
279 readbyteOffset
int // readbyte[readOffset:] is yet to be consumed with file.Read
283 // Whether this is a streaming descriptor, as opposed to a
284 // packet-based descriptor like a UDP socket.
287 // Whether a zero byte read indicates EOF. This is false for a
288 // message based socket connection.
291 // Whether this is a normal file.
294 // Whether this is a directory.
298 // Init initializes the FD. The Sysfd field should already be set.
299 // This can be called multiple times on a single FD.
300 // The net argument is a network name from the net package (e.g., "tcp"),
301 // or "file" or "console" or "dir".
302 func (fd
*FD
) Init(net
string) (string, error
) {
314 case "tcp", "tcp4", "tcp6":
315 case "udp", "udp4", "udp6":
316 case "ip", "ip4", "ip6":
317 case "unix", "unixgram", "unixpacket":
319 return "", errors
.New("internal error: unknown network type " + net
)
322 if !fd
.isFile
&& !fd
.isConsole
&& !fd
.isDir
{
323 // Only call init for a network socket.
324 // This means that we don't add files to the runtime poller.
325 // Adding files to the runtime poller can confuse matters
326 // if the user is doing their own overlapped I/O.
329 // In general the code below avoids calling the ExecIO
330 // method for non-network sockets. If some method does
331 // somehow call ExecIO, then ExecIO, and therefore the
332 // calling method, will return an error, because
333 // fd.pd.runtimeCtx will be 0.
334 if err
:= fd
.pd
.init(fd
); err
!= nil {
338 if hasLoadSetFileCompletionNotificationModes
{
339 // We do not use events, so we can skip them always.
340 flags
:= uint8(syscall
.FILE_SKIP_SET_EVENT_ON_HANDLE
)
341 // It's not safe to skip completion notifications for UDP:
342 // http://blogs.technet.com/b/winserverperformance/archive/2008/06/26/designing-applications-for-high-performance-part-iii.aspx
343 if skipSyncNotif
&& (net
== "tcp" || net
== "file") {
344 flags |
= syscall
.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS
346 err
:= syscall
.SetFileCompletionNotificationModes(fd
.Sysfd
, flags
)
347 if err
== nil && flags
&syscall
.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS
!= 0 {
348 fd
.skipSyncNotif
= true
351 // Disable SIO_UDP_CONNRESET behavior.
352 // http://support.microsoft.com/kb/263823
354 case "udp", "udp4", "udp6":
357 size
:= uint32(unsafe
.Sizeof(flag
))
358 err
:= syscall
.WSAIoctl(fd
.Sysfd
, syscall
.SIO_UDP_CONNRESET
, (*byte)(unsafe
.Pointer(&flag
)), size
, nil, 0, &ret
, nil, 0)
360 return "wsaioctl", err
367 fd
.rop
.runtimeCtx
= fd
.pd
.runtimeCtx
368 fd
.wop
.runtimeCtx
= fd
.pd
.runtimeCtx
370 fd
.rop
.errc
= make(chan error
)
371 fd
.wop
.errc
= make(chan error
)
376 func (fd
*FD
) destroy() error
{
377 if fd
.Sysfd
== syscall
.InvalidHandle
{
378 return syscall
.EINVAL
380 // Poller may want to unregister fd in readiness notification mechanism,
381 // so this must be executed before fd.CloseFunc.
384 if fd
.isFile || fd
.isConsole
{
385 err
= syscall
.CloseHandle(fd
.Sysfd
)
387 err
= syscall
.FindClose(fd
.Sysfd
)
389 // The net package uses the CloseFunc variable for testing.
390 err
= CloseFunc(fd
.Sysfd
)
392 fd
.Sysfd
= syscall
.InvalidHandle
396 // Close closes the FD. The underlying file descriptor is closed by
397 // the destroy method when there are no remaining references.
398 func (fd
*FD
) Close() error
{
399 if !fd
.fdmu
.increfAndClose() {
400 return errClosing(fd
.isFile
)
402 // unblock pending reader and writer
407 // Shutdown wraps the shutdown network call.
408 func (fd
*FD
) Shutdown(how
int) error
{
409 if err
:= fd
.incref(); err
!= nil {
413 return syscall
.Shutdown(fd
.Sysfd
, how
)
416 // Read implements io.Reader.
417 func (fd
*FD
) Read(buf
[]byte) (int, error
) {
418 if err
:= fd
.readLock(); err
!= nil {
421 defer fd
.readUnlock()
425 if fd
.isFile || fd
.isDir || fd
.isConsole
{
429 n
, err
= fd
.readConsole(buf
)
431 n
, err
= syscall
.Read(fd
.Sysfd
, buf
)
439 n
, err
= rsrv
.ExecIO(o
, func(o
*operation
) error
{
440 return syscall
.WSARecv(o
.fd
.Sysfd
, &o
.buf
, 1, &o
.qty
, &o
.flags
, &o
.o
, nil)
443 race
.Acquire(unsafe
.Pointer(&ioSync
))
447 err
= fd
.eofError(n
, err
)
452 var ReadConsole
= syscall
.ReadConsole
// changed for testing
454 // readConsole reads utf16 characters from console File,
455 // encodes them into utf8 and stores them in buffer b.
456 // It returns the number of utf8 bytes read and an error, if any.
457 func (fd
*FD
) readConsole(b
[]byte) (int, error
) {
462 if fd
.readuint16
== nil {
463 // Note: syscall.ReadConsole fails for very large buffers.
464 // The limit is somewhere around (but not exactly) 16384.
466 fd
.readuint16
= make([]uint16, 0, 10000)
467 fd
.readbyte
= make([]byte, 0, 4*cap(fd
.readuint16
))
470 for fd
.readbyteOffset
>= len(fd
.readbyte
) {
471 n
:= cap(fd
.readuint16
) - len(fd
.readuint16
)
476 err
:= ReadConsole(fd
.Sysfd
, &fd
.readuint16
[:len(fd
.readuint16
)+1][len(fd
.readuint16
)], uint32(n
), &nw
, nil)
480 uint16s
:= fd
.readuint16
[:len(fd
.readuint16
)+int(nw
)]
481 fd
.readuint16
= fd
.readuint16
[:0]
482 buf
:= fd
.readbyte
[:0]
483 for i
:= 0; i
< len(uint16s
); i
++ {
484 r
:= rune(uint16s
[i
])
485 if utf16
.IsSurrogate(r
) {
486 if i
+1 == len(uint16s
) {
488 // Save half surrogate pair for next time.
489 fd
.readuint16
= fd
.readuint16
[:1]
490 fd
.readuint16
[0] = uint16(r
)
495 r
= utf16
.DecodeRune(r
, rune(uint16s
[i
+1]))
496 if r
!= utf8
.RuneError
{
501 n
:= utf8
.EncodeRune(buf
[len(buf
):cap(buf
)], r
)
502 buf
= buf
[:len(buf
)+n
]
505 fd
.readbyteOffset
= 0
511 src
:= fd
.readbyte
[fd
.readbyteOffset
:]
513 for i
= 0; i
< len(src
) && i
< len(b
); i
++ {
515 if x
== 0x1A { // Ctrl-Z
523 fd
.readbyteOffset
+= i
527 // Pread emulates the Unix pread system call.
528 func (fd
*FD
) Pread(b
[]byte, off
int64) (int, error
) {
529 // Call incref, not readLock, because since pread specifies the
530 // offset it is independent from other reads.
531 if err
:= fd
.incref(); err
!= nil {
538 curoffset
, e
:= syscall
.Seek(fd
.Sysfd
, 0, io
.SeekCurrent
)
542 defer syscall
.Seek(fd
.Sysfd
, curoffset
, io
.SeekStart
)
543 o
:= syscall
.Overlapped
{
544 OffsetHigh
: uint32(off
>> 32),
548 e
= syscall
.ReadFile(fd
.Sysfd
, b
, &done
, &o
)
551 if e
== syscall
.ERROR_HANDLE_EOF
{
556 e
= fd
.eofError(int(done
), e
)
561 // ReadFrom wraps the recvfrom network call.
562 func (fd
*FD
) ReadFrom(buf
[]byte) (int, syscall
.Sockaddr
, error
) {
566 if err
:= fd
.readLock(); err
!= nil {
569 defer fd
.readUnlock()
572 n
, err
:= rsrv
.ExecIO(o
, func(o
*operation
) error
{
574 o
.rsa
= new(syscall
.RawSockaddrAny
)
576 o
.rsan
= int32(unsafe
.Sizeof(*o
.rsa
))
577 return syscall
.WSARecvFrom(o
.fd
.Sysfd
, &o
.buf
, 1, &o
.qty
, &o
.flags
, o
.rsa
, &o
.rsan
, &o
.o
, nil)
579 err
= fd
.eofError(n
, err
)
583 sa
, _
:= o
.rsa
.Sockaddr()
587 // Write implements io.Writer.
588 func (fd
*FD
) Write(buf
[]byte) (int, error
) {
589 if err
:= fd
.writeLock(); err
!= nil {
592 defer fd
.writeUnlock()
596 if fd
.isFile || fd
.isDir || fd
.isConsole
{
600 n
, err
= fd
.writeConsole(buf
)
602 n
, err
= syscall
.Write(fd
.Sysfd
, buf
)
609 race
.ReleaseMerge(unsafe
.Pointer(&ioSync
))
613 n
, err
= wsrv
.ExecIO(o
, func(o
*operation
) error
{
614 return syscall
.WSASend(o
.fd
.Sysfd
, &o
.buf
, 1, &o
.qty
, 0, &o
.o
, nil)
620 // writeConsole writes len(b) bytes to the console File.
621 // It returns the number of bytes written and an error, if any.
622 func (fd
*FD
) writeConsole(b
[]byte) (int, error
) {
624 runes
:= make([]rune
, 0, 256)
625 if len(fd
.lastbits
) > 0 {
626 b
= append(fd
.lastbits
, b
...)
630 for len(b
) >= utf8
.UTFMax || utf8
.FullRune(b
) {
631 r
, l
:= utf8
.DecodeRune(b
)
632 runes
= append(runes
, r
)
636 fd
.lastbits
= make([]byte, len(b
))
639 // syscall.WriteConsole seems to fail, if given large buffer.
640 // So limit the buffer to 16000 characters. This number was
641 // discovered by experimenting with syscall.WriteConsole.
642 const maxWrite
= 16000
650 uint16s
:= utf16
.Encode(chunk
)
651 for len(uint16s
) > 0 {
653 err
:= syscall
.WriteConsole(fd
.Sysfd
, &uint16s
[0], uint32(len(uint16s
)), &written
, nil)
657 uint16s
= uint16s
[written
:]
663 // Pwrite emulates the Unix pwrite system call.
664 func (fd
*FD
) Pwrite(b
[]byte, off
int64) (int, error
) {
665 // Call incref, not writeLock, because since pwrite specifies the
666 // offset it is independent from other writes.
667 if err
:= fd
.incref(); err
!= nil {
674 curoffset
, e
:= syscall
.Seek(fd
.Sysfd
, 0, io
.SeekCurrent
)
678 defer syscall
.Seek(fd
.Sysfd
, curoffset
, io
.SeekStart
)
679 o
:= syscall
.Overlapped
{
680 OffsetHigh
: uint32(off
>> 32),
684 e
= syscall
.WriteFile(fd
.Sysfd
, b
, &done
, &o
)
688 return int(done
), nil
691 // Writev emulates the Unix writev system call.
692 func (fd
*FD
) Writev(buf
*[][]byte) (int64, error
) {
696 if err
:= fd
.writeLock(); err
!= nil {
699 defer fd
.writeUnlock()
701 race
.ReleaseMerge(unsafe
.Pointer(&ioSync
))
705 n
, err
:= wsrv
.ExecIO(o
, func(o
*operation
) error
{
706 return syscall
.WSASend(o
.fd
.Sysfd
, &o
.bufs
[0], uint32(len(o
.bufs
)), &o
.qty
, 0, &o
.o
, nil)
710 consume(buf
, int64(n
))
714 // WriteTo wraps the sendto network call.
715 func (fd
*FD
) WriteTo(buf
[]byte, sa syscall
.Sockaddr
) (int, error
) {
719 if err
:= fd
.writeLock(); err
!= nil {
722 defer fd
.writeUnlock()
726 n
, err
:= wsrv
.ExecIO(o
, func(o
*operation
) error
{
727 return syscall
.WSASendto(o
.fd
.Sysfd
, &o
.buf
, 1, &o
.qty
, 0, o
.sa
, &o
.o
, nil)
732 // Call ConnectEx. This doesn't need any locking, since it is only
733 // called when the descriptor is first created. This is here rather
734 // than in the net package so that it can use fd.wop.
735 func (fd
*FD
) ConnectEx(ra syscall
.Sockaddr
) error
{
738 _
, err
:= wsrv
.ExecIO(o
, func(o
*operation
) error
{
739 return ConnectExFunc(o
.fd
.Sysfd
, o
.sa
, nil, 0, nil, &o
.o
)
744 func (fd
*FD
) acceptOne(s syscall
.Handle
, rawsa
[]syscall
.RawSockaddrAny
, o
*operation
) (string, error
) {
745 // Submit accept request.
747 o
.rsan
= int32(unsafe
.Sizeof(rawsa
[0]))
748 _
, err
:= rsrv
.ExecIO(o
, func(o
*operation
) error
{
749 return AcceptFunc(o
.fd
.Sysfd
, o
.handle
, (*byte)(unsafe
.Pointer(&rawsa
[0])), 0, uint32(o
.rsan
), uint32(o
.rsan
), &o
.qty
, &o
.o
)
753 return "acceptex", err
756 // Inherit properties of the listening socket.
757 err
= syscall
.Setsockopt(s
, syscall
.SOL_SOCKET
, syscall
.SO_UPDATE_ACCEPT_CONTEXT
, (*byte)(unsafe
.Pointer(&fd
.Sysfd
)), int32(unsafe
.Sizeof(fd
.Sysfd
)))
760 return "setsockopt", err
766 // Accept handles accepting a socket. The sysSocket parameter is used
767 // to allocate the net socket.
768 func (fd
*FD
) Accept(sysSocket
func() (syscall
.Handle
, error
)) (syscall
.Handle
, []syscall
.RawSockaddrAny
, uint32, string, error
) {
769 if err
:= fd
.readLock(); err
!= nil {
770 return syscall
.InvalidHandle
, nil, 0, "", err
772 defer fd
.readUnlock()
775 var rawsa
[2]syscall
.RawSockaddrAny
777 s
, err
:= sysSocket()
779 return syscall
.InvalidHandle
, nil, 0, "", err
782 errcall
, err
:= fd
.acceptOne(s
, rawsa
[:], o
)
784 return s
, rawsa
[:], uint32(o
.rsan
), "", nil
787 // Sometimes we see WSAECONNRESET and ERROR_NETNAME_DELETED is
788 // returned here. These happen if connection reset is received
789 // before AcceptEx could complete. These errors relate to new
790 // connection, not to AcceptEx, so ignore broken connection and
791 // try AcceptEx again for more connections.
792 errno
, ok
:= err
.(syscall
.Errno
)
794 return syscall
.InvalidHandle
, nil, 0, errcall
, err
797 case syscall
.ERROR_NETNAME_DELETED
, syscall
.WSAECONNRESET
:
798 // ignore these and try again
800 return syscall
.InvalidHandle
, nil, 0, errcall
, err
805 // Seek wraps syscall.Seek.
806 func (fd
*FD
) Seek(offset
int64, whence
int) (int64, error
) {
807 if err
:= fd
.incref(); err
!= nil {
815 return syscall
.Seek(fd
.Sysfd
, offset
, whence
)
818 // FindNextFile wraps syscall.FindNextFile.
819 func (fd
*FD
) FindNextFile(data
*syscall
.Win32finddata
) error
{
820 if err
:= fd
.incref(); err
!= nil {
824 return syscall
.FindNextFile(fd
.Sysfd
, data
)
827 // Fchdir wraps syscall.Fchdir.
828 func (fd
*FD
) Fchdir() error
{
829 if err
:= fd
.incref(); err
!= nil {
833 return syscall
.Fchdir(fd
.Sysfd
)
836 // GetFileType wraps syscall.GetFileType.
837 func (fd
*FD
) GetFileType() (uint32, error
) {
838 if err
:= fd
.incref(); err
!= nil {
842 return syscall
.GetFileType(fd
.Sysfd
)
845 // GetFileInformationByHandle wraps GetFileInformationByHandle.
846 func (fd
*FD
) GetFileInformationByHandle(data
*syscall
.ByHandleFileInformation
) error
{
847 if err
:= fd
.incref(); err
!= nil {
851 return syscall
.GetFileInformationByHandle(fd
.Sysfd
, data
)
854 // RawControl invokes the user-defined function f for a non-IO
856 func (fd
*FD
) RawControl(f
func(uintptr)) error
{
857 if err
:= fd
.incref(); err
!= nil {
865 // RawRead invokes the user-defined function f for a read operation.
866 func (fd
*FD
) RawRead(f
func(uintptr) bool) error
{
867 return errors
.New("not implemented")
870 // RawWrite invokes the user-defined function f for a write operation.
871 func (fd
*FD
) RawWrite(f
func(uintptr) bool) error
{
872 return errors
.New("not implemented")