Resolves 198158, by backporting fixes for largefile and sendfile bugs NSPRPUB_RELEASE_4_1_BRANCH
authorkirk.erickson%sun.com
Thu, 10 Apr 2003 20:04:45 +0000
branchNSPRPUB_RELEASE_4_1_BRANCH
changeset 2800 4700524e4329cfe91220355ccb65c954fb18b73e
parent 2760 42b0ae3cd9d0f110a1faa620d9de12af5b694de5
child 2801 27f537d707ae0b3fec10dd74b9f08a7a604ad7c9
child 2802 9acf1e63b0fe1dd6336a02a66724fa8a48b5a742
child 2803 92095a5450c2c8a15d33ede269ddf01dd72df2db
child 2839 88a5ca811402a042a41c6d7d918462089facf90d
push idunknown
push userunknown
push dateunknown
bugs198158, 84361, 132812, 132208, 132940
Resolves 198158, by backporting fixes for largefile and sendfile bugs (84361,132812,132208,132940).
config/Linux.mk
pr/include/md/_aix.h
pr/include/md/_linux.h
pr/include/md/_solaris.h
pr/include/md/_unixos.h
pr/src/md/unix/solaris.c
pr/src/md/unix/unix_errors.c
pr/src/pthreads/ptio.c
--- a/config/Linux.mk
+++ b/config/Linux.mk
@@ -77,17 +77,17 @@ CPU_ARCH_TAG		= _$(CPU_ARCH)
 CC			= gcc
 CCC			= g++
 RANLIB			= ranlib
 
 OS_INCLUDES		=
 G++INCLUDES		= -I/usr/include/g++
 
 PLATFORM_FLAGS		= -ansi -Wall -pipe -DLINUX -Dlinux
-PORT_FLAGS		= -D_POSIX_SOURCE -D_BSD_SOURCE -D_SVID_SOURCE -DHAVE_STRERROR -DHAVE_FCNTL_FILE_LOCKING
+PORT_FLAGS		= -D_POSIX_SOURCE -D_BSD_SOURCE -D_SVID_SOURCE -DHAVE_STRERROR -DHAVE_FCNTL_FILE_LOCKING -D_LARGEFILE64_SOURCE
 
 OS_CFLAGS		= $(DSO_CFLAGS) $(PLATFORM_FLAGS) $(PORT_FLAGS)
 
 ######################################################################
 # Version-specific stuff
 ######################################################################
 
 ifeq ($(CPU_ARCH),alpha)
--- a/pr/include/md/_aix.h
+++ b/pr/include/md/_aix.h
@@ -238,9 +238,11 @@ struct _MDCPU {
 #define _MD_CLEAN_THREAD(_thread)
 #endif /* PTHREADS_USER */
 
 #ifdef AIX_RENAME_SELECT
 #define _MD_SELECT	select
 #define _MD_POLL	poll
 #endif
 
+extern void _MD_aix_map_sendfile_error(int err);
+
 #endif /* nspr_aix_defs_h___ */
--- a/pr/include/md/_linux.h
+++ b/pr/include/md/_linux.h
@@ -106,16 +106,18 @@ extern PRInt32 _PR_ia64_AtomicSet(PRInt3
 #define USE_SETJMP
 #if defined(__GLIBC__) && __GLIBC__ >= 2
 #define _PR_POLL_AVAILABLE
 #endif
 #undef _PR_USE_POLL
 #define _PR_STAT_HAS_ONLY_ST_ATIME
 #if defined(__alpha) || defined(__ia64__)
 #define _PR_HAVE_LARGE_OFF_T
+#elif (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 1)
+#define _PR_HAVE_OFF64_T
 #else
 #define _PR_NO_LARGE_FILES
 #endif
 #if (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 1)
 #define _PR_INET6
 #define _PR_HAVE_GETHOSTBYNAME2
 #define _PR_INET6_PROBE
 #endif
@@ -453,9 +455,11 @@ extern PRIntervalTime _PR_UNIX_TicksPerS
 extern int __syscall_poll(struct pollfd *ufds, unsigned long int nfds,
 	int timeout);
 #define _MD_POLL __syscall_poll
 #endif
 
 /* For writev() */
 #include <sys/uio.h>
 
+extern void _MD_linux_map_sendfile_error(int err);
+
 #endif /* nspr_linux_defs_h___ */
--- a/pr/include/md/_solaris.h
+++ b/pr/include/md/_solaris.h
@@ -777,10 +777,12 @@ PR_BEGIN_EXTERN_C
 ** Missing function prototypes
 */
 extern int gethostname (char *name, int namelen);
 
 PR_END_EXTERN_C
 
 #endif /* _PR_GLOBAL_THREADS_ONLY */
 
+extern void _MD_solaris_map_sendfile_error(int err);
+
 #endif /* nspr_solaris_defs_h___ */
 
--- a/pr/include/md/_unixos.h
+++ b/pr/include/md/_unixos.h
@@ -140,16 +140,19 @@ extern void _PR_Unblock_IO_Wait(struct P
 
 extern fd_set _pr_md_read_set, _pr_md_write_set, _pr_md_exception_set;
 extern PRInt16 _pr_md_read_cnt[], _pr_md_write_cnt[], _pr_md_exception_cnt[];
 extern PRInt32 _pr_md_ioq_max_osfd;
 extern PRUint32 _pr_md_ioq_timeout;
 
 struct _MDFileDesc {
     int osfd;
+#if defined(LINUX) && defined(_PR_PTHREADS)
+    int tcp_nodelay;  /* used by pt_LinuxSendFile */
+#endif
 };
 
 struct _MDDir {
 	DIR *d;
 };
 
 struct _PRCPU;
 extern void _MD_unix_init_running_cpu(struct _PRCPU *cpu);
--- a/pr/src/md/unix/solaris.c
+++ b/pr/src/md/unix/solaris.c
@@ -27,18 +27,16 @@
  * allow others to use your version of this file under the MPL,
  * indicate your decision by deleting the provisions above and
  * replace them with the notice and other provisions required by
  * the GPL.  If you do not delete the provisions above, a recipient
  * may use your version of this file under either the MPL or the
  * GPL.
  */
 
-#undef _FILE_OFFSET_BITS
-
 #include "primpl.h"
 
 
 extern PRBool suspendAllOn;
 extern PRThread *suspendAllThread;
 
 extern void _MD_SET_PRIORITY(_MDThread *md, PRThreadPriority newPri);
 
--- a/pr/src/md/unix/unix_errors.c
+++ b/pr/src/md/unix/unix_errors.c
@@ -834,8 +834,16 @@ void _MD_hpux_map_sendfile_error(int err
 #endif /* HPUX11 */
 
 #ifdef SOLARIS
 void _MD_solaris_map_sendfile_error(int err)
 {
     _MD_unix_map_default_error(err) ;
 }
 #endif /* SOLARIS */
+
+#ifdef LINUX
+void _MD_linux_map_sendfile_error(int err)
+{
+    _MD_unix_map_default_error(err) ;
+}
+#endif /* LINUX */
+
--- a/pr/src/pthreads/ptio.c
+++ b/pr/src/pthreads/ptio.c
@@ -170,21 +170,31 @@ struct sf_parms {
  */
 static ssize_t (*pt_aix_sendfile_fptr)() = NULL;
 
 #define AIX_SEND_FILE(a, b, c) (*pt_aix_sendfile_fptr)(a, b, c)
 
 #endif /* HAVE_SEND_FILE */
 #endif /* AIX */
 
+#ifdef LINUX
+#include <sys/sendfile.h>
+#endif
+
 #include "primpl.h"
 
 /* On Alpha Linux, these are already defined in sys/socket.h */
 #if !(defined(LINUX) && defined(__alpha))
 #include <netinet/tcp.h>  /* TCP_NODELAY, TCP_MAXSEG */
+#ifdef LINUX
+/* TCP_CORK is not defined in <netinet/tcp.h> on Red Hat Linux 6.0 */
+#ifndef TCP_CORK
+#define TCP_CORK 3
+#endif
+#endif
 #endif
 
 #if defined(SOLARIS)
 #define _PRSockOptVal_t char *
 #elif defined(IRIX) || defined(OSF1) || defined(AIX) || defined(HPUX) \
     || defined(LINUX) || defined(FREEBSD) || defined(BSDI) || defined(VMS) \
     || defined(NTO) || defined(OPENBSD) || defined(RHAPSODY)
 #define _PRSockOptVal_t void *
@@ -321,16 +331,25 @@ struct pt_Continuation
     
 #ifdef SOLARIS
     /*
      * For sendfilev()
      */
     int nbytes_to_send;                     /* size of header and file */
 #endif  /* SOLARIS */
  
+#ifdef LINUX
+    /*
+     * For sendfile()
+     */
+    int in_fd;                              /* descriptor of file to send */
+    off_t offset;
+    size_t count;
+#endif  /* LINUX */
+ 
     PRIntervalTime timeout;                 /* client (relative) timeout */
 
     PRInt16 event;                           /* flags for poll()'s events */
 
     /*
     ** The representation and notification of the results of the operation.
     ** These function can either return an int return code or a pointer to
     ** some object.
@@ -1016,17 +1035,18 @@ static PRBool pt_solaris_sendfile_cont(p
     ssize_t count;
 
     count = SOLARIS_SENDFILEV(op->arg1.osfd, vec, op->arg3.amount, &xferred);
     PR_ASSERT((count == -1) || (count == xferred));
     PR_ASSERT(xferred <= op->nbytes_to_send);
     op->syserrno = errno;
 
     if (count == -1) {
-        if (op->syserrno != EWOULDBLOCK && op->syserrno != EAGAIN) {
+        if (op->syserrno != EWOULDBLOCK && op->syserrno != EAGAIN
+                && op->syserrno != EINTR) {
             op->result.code = -1;
             return PR_TRUE;
         }
         count = xferred;
     }
     
     op->result.code += count;
     if (count < op->nbytes_to_send) {
@@ -1046,16 +1066,42 @@ static PRBool pt_solaris_sendfile_cont(p
 
         return PR_FALSE;
     }
 
     return PR_TRUE;
 }
 #endif  /* SOLARIS */
 
+#ifdef LINUX 
+static PRBool pt_linux_sendfile_cont(pt_Continuation *op, PRInt16 revents)
+{
+    ssize_t rv;
+    off_t oldoffset;
+
+    oldoffset = op->offset;
+    rv = sendfile(op->arg1.osfd, op->in_fd, &op->offset, op->count);
+    op->syserrno = errno;
+
+    if (rv == -1) {
+        if (op->syserrno != EWOULDBLOCK && op->syserrno != EAGAIN) {
+            op->result.code = -1;
+            return PR_TRUE;
+        }
+        rv = 0;
+    }
+    PR_ASSERT(rv == op->offset - oldoffset);
+    op->result.code += rv;
+    if (rv < op->count) {
+        op->count -= rv;
+        return PR_FALSE;
+    }
+    return PR_TRUE;
+}
+#endif  /* LINUX */
 void _PR_InitIO()
 {
 #if defined(DEBUG)
     memset(&pt_debug, 0, sizeof(PTDebug));
     pt_debug.timeStarted = PR_Now();
 #endif
 
     _pr_flock_lock = PR_NewLock();
@@ -1571,16 +1617,24 @@ static PRFileDesc* pt_Accept(
         addr->raw.family = PR_AF_INET6;
 #endif
     newfd = pt_SetMethods(osfd, PR_DESC_SOCKET_TCP, PR_TRUE, PR_FALSE);
     if (newfd == NULL) close(osfd);  /* $$$ whoops! this doesn't work $$$ */
     else
     {
         PR_ASSERT(IsValidNetAddr(addr) == PR_TRUE);
         PR_ASSERT(IsValidNetAddrLen(addr, addr_len) == PR_TRUE);
+#ifdef LINUX
+        /*
+         * On Linux, experiments showed that the accepted sockets
+         * inherit the TCP_NODELAY socket option of the listening
+         * socket.
+         */
+        newfd->secret->md.tcp_nodelay = fd->secret->md.tcp_nodelay;
+#endif
     }
     return newfd;
 
 failed:
     pt_MapError(_PR_MD_MAP_ACCEPT_ERROR, syserrno);
     return NULL;
 }  /* pt_Accept */
 
@@ -2064,17 +2118,17 @@ static PRInt32 pt_AIXSendFile(PRFileDesc
         op.timeout = timeout;
         op.function = pt_aix_sendfile_cont;
         op.event = POLLOUT | POLLPRI;
         count = pt_Continue(&op);
         syserrno = op.syserrno;
     }
 
     if (count == -1) {
-        _MD_aix_map_sendfile_error(syserrno);
+        pt_MapError(_MD_aix_map_sendfile_error, syserrno);
         return -1;
     }
     if (flags & PR_TRANSMITFILE_CLOSE_SOCKET) {
         PR_Close(sd);
     }
 	PR_ASSERT(count == (sfd->hlen + sfd->tlen +
 						((sfd->file_nbytes ==  0) ?
 						sf_struct.file_size - sfd->file_offset :
@@ -2103,24 +2157,26 @@ static PRInt32 pt_HPUXSendFile(PRFileDes
 {
     struct stat statbuf;
     size_t nbytes_to_send, file_nbytes_to_send;
     struct iovec hdtrl[2];  /* optional header and trailer buffers */
     int send_flags;
     PRInt32 count;
     int syserrno;
 
-    /* Get file size */
-    if (fstat(sfd->fd->secret->md.osfd, &statbuf) == -1) {
-        _PR_MD_MAP_FSTAT_ERROR(errno);
-        return -1;
+    if (sfd->file_nbytes == 0) {
+        /* Get file size */
+        if (fstat(sfd->fd->secret->md.osfd, &statbuf) == -1) {
+            _PR_MD_MAP_FSTAT_ERROR(errno);
+            return -1;
+        } 		
+        file_nbytes_to_send = statbuf.st_size - sfd->file_offset;
+    } else {
+        file_nbytes_to_send = sfd->file_nbytes;
     }
-	file_nbytes_to_send = (sfd->file_nbytes ==  0) ?
-						statbuf.st_size - sfd->file_offset :
-						sfd->file_nbytes;
     nbytes_to_send = sfd->hlen + sfd->tlen + file_nbytes_to_send;
 
     hdtrl[0].iov_base = (void *) sfd->header;  /* cast away the 'const' */
     hdtrl[0].iov_len = sfd->hlen;
     hdtrl[1].iov_base = (void *) sfd->trailer;
     hdtrl[1].iov_len = sfd->tlen;
     /*
      * SF_DISCONNECT seems to close the socket even if sendfile()
@@ -2185,17 +2241,17 @@ static PRInt32 pt_HPUXSendFile(PRFileDes
         op.timeout = timeout;
         op.function = pt_hpux_sendfile_cont;
         op.event = POLLOUT | POLLPRI;
         count = pt_Continue(&op);
         syserrno = op.syserrno;
     }
 
     if (count == -1) {
-        _MD_hpux_map_sendfile_error(syserrno);
+        pt_MapError(_MD_hpux_map_sendfile_error, syserrno);
         return -1;
     }
     if (flags & PR_TRANSMITFILE_CLOSE_SOCKET) {
         PR_Close(sd);
     }
     PR_ASSERT(count == nbytes_to_send);
     return count;
 }
@@ -2315,17 +2371,17 @@ static PRInt32 pt_SolarisSendFile(PRFile
         op.function = pt_solaris_sendfile_cont;
         op.event = POLLOUT | POLLPRI;
         count = pt_Continue(&op);
         syserrno = op.syserrno;
     }
 
 done:
     if (count == -1) {
-        _MD_solaris_map_sendfile_error(syserrno);
+        pt_MapError(_MD_solaris_map_sendfile_error, syserrno);
         return -1;
     }
     if (flags & PR_TRANSMITFILE_CLOSE_SOCKET) {
         PR_Close(sd);
     }
     PR_ASSERT(count == nbytes_to_send);
     return count;
 }
@@ -2382,16 +2438,150 @@ static PRInt32 pt_SolarisDispatchSendFil
     } else {
         return PR_EmulateSendFile(sd, sfd, flags, timeout);
     }
 }
 #endif /* !HAVE_SENDFILEV */
 
 #endif  /* SOLARIS */
 
+#ifdef LINUX
+/*
+ * pt_LinuxSendFile
+ *
+ *    Send file sfd->fd across socket sd. If specified, header and trailer
+ *    buffers are sent before and after the file, respectively.
+ *
+ *    PR_TRANSMITFILE_CLOSE_SOCKET flag - close socket after sending file
+ *    
+ *    return number of bytes sent or -1 on error
+ *
+ *      This implementation takes advantage of the sendfile() system
+ *      call available in Linux kernel 2.2 or higher.
+ */
+
+static PRInt32 pt_LinuxSendFile(PRFileDesc *sd, PRSendFileData *sfd,
+                PRTransmitFileFlags flags, PRIntervalTime timeout)
+{
+    struct stat statbuf;
+    size_t file_nbytes_to_send;	
+    PRInt32 count = 0;
+    ssize_t rv;
+    int syserrno;
+    off_t offset;
+    PRBool tcp_cork_enabled = PR_FALSE;
+    int tcp_cork;
+
+    if (sfd->file_nbytes == 0) {
+        /* Get file size */
+        if (fstat(sfd->fd->secret->md.osfd, &statbuf) == -1) {
+            _PR_MD_MAP_FSTAT_ERROR(errno);
+            return -1;
+        } 		
+        file_nbytes_to_send = statbuf.st_size - sfd->file_offset;
+    } else {
+        file_nbytes_to_send = sfd->file_nbytes;
+    }
+
+    if ((sfd->hlen != 0 || sfd->tlen != 0)
+            && sd->secret->md.tcp_nodelay == 0) {
+        tcp_cork = 1;
+        if (setsockopt(sd->secret->md.osfd, SOL_TCP, TCP_CORK,
+                &tcp_cork, sizeof tcp_cork) == 0) {
+            tcp_cork_enabled = PR_TRUE;
+        } else {
+            syserrno = errno;
+            if (syserrno != EINVAL) {
+                _PR_MD_MAP_SETSOCKOPT_ERROR(syserrno);
+                return -1;
+            }
+            /*
+             * The most likely reason for the EINVAL error is that
+             * TCP_NODELAY is set (with a function other than
+             * PR_SetSocketOption).  This is not fatal, so we keep
+             * on going.
+             */
+            PR_LOG(_pr_io_lm, PR_LOG_WARNING,
+                ("pt_LinuxSendFile: "
+                "setsockopt(TCP_CORK) failed with EINVAL\n"));
+        }
+    }
+
+    if (sfd->hlen != 0) {
+        count = PR_Send(sd, sfd->header, sfd->hlen, 0, timeout);
+        if (count == -1) {
+            goto failed;
+        }
+    }
+
+    if (file_nbytes_to_send != 0) {
+        offset = sfd->file_offset;
+        do {
+            rv = sendfile(sd->secret->md.osfd, sfd->fd->secret->md.osfd,
+                &offset, file_nbytes_to_send);
+        } while (rv == -1 && (syserrno = errno) == EINTR);
+        if (rv == -1) {
+            if (syserrno != EAGAIN && syserrno != EWOULDBLOCK) {
+                _MD_linux_map_sendfile_error(syserrno);
+                count = -1;
+                goto failed;
+            }
+            rv = 0;
+        }
+        PR_ASSERT(rv == offset - sfd->file_offset);
+        count += rv;
+
+        if (rv < file_nbytes_to_send) {
+            pt_Continuation op;
+
+            op.arg1.osfd = sd->secret->md.osfd;
+            op.in_fd = sfd->fd->secret->md.osfd;
+            op.offset = offset;
+            op.count = file_nbytes_to_send - rv;
+            op.result.code = count;
+            op.timeout = timeout;
+            op.function = pt_linux_sendfile_cont;
+            op.event = POLLOUT | POLLPRI;
+            count = pt_Continue(&op);
+            syserrno = op.syserrno;
+            if (count == -1) {
+                pt_MapError(_MD_linux_map_sendfile_error, syserrno);
+                goto failed;
+            }
+        }
+    }
+
+    if (sfd->tlen != 0) {
+        rv = PR_Send(sd, sfd->trailer, sfd->tlen, 0, timeout);
+        if (rv == -1) {
+            count = -1;
+            goto failed;
+        }
+        count += rv;
+    }
+
+failed:
+    if (tcp_cork_enabled) {
+        tcp_cork = 0;
+        if (setsockopt(sd->secret->md.osfd, SOL_TCP, TCP_CORK,
+                &tcp_cork, sizeof tcp_cork) == -1 && count != -1) {
+            _PR_MD_MAP_SETSOCKOPT_ERROR(errno);
+            count = -1;
+        }
+    }
+    if (count != -1) {
+        if (flags & PR_TRANSMITFILE_CLOSE_SOCKET) {
+            PR_Close(sd);
+        }
+        PR_ASSERT(count == sfd->hlen + sfd->tlen + file_nbytes_to_send);
+    }
+    return count;
+}
+#endif  /* LINUX */
+
 #ifdef AIX
 extern	int _pr_aix_send_file_use_disabled;
 #endif
 
 static PRInt32 pt_SendFile(
     PRFileDesc *sd, PRSendFileData *sfd,
     PRTransmitFileFlags flags, PRIntervalTime timeout)
 {
@@ -2422,16 +2612,18 @@ static PRInt32 pt_SendFile(
     /* return(pt_AIXDispatchSendFile(sd, sfd, flags, timeout));*/
 #endif /* HAVE_SEND_FILE */
 #elif defined(SOLARIS)
 #ifdef HAVE_SENDFILEV
     	return(pt_SolarisSendFile(sd, sfd, flags, timeout));
 #else
 	return(pt_SolarisDispatchSendFile(sd, sfd, flags, timeout));
 #endif /* HAVE_SENDFILEV */
+#elif defined(LINUX)
+    	return(pt_LinuxSendFile(sd, sfd, flags, timeout));
 #else
 	return(PR_EmulateSendFile(sd, sfd, flags, timeout));
 #endif
 }
 
 static PRInt32 pt_TransmitFile(
     PRFileDesc *sd, PRFileDesc *fd, const void *headers,
     PRInt32 hlen, PRTransmitFileFlags flags, PRIntervalTime timeout)
@@ -2685,16 +2877,22 @@ static PRStatus pt_SetSocketOption(PRFil
             case PR_SockOpt_Keepalive:
             case PR_SockOpt_NoDelay:
             case PR_SockOpt_Broadcast:
             {
                 PRIntn value = (data->value.reuse_addr) ? 1 : 0;
                 rv = setsockopt(
                     fd->secret->md.osfd, level, name,
                     (char*)&value, sizeof(PRIntn));
+#ifdef LINUX
+                /* for pt_LinuxSendFile */
+                if (name == TCP_NODELAY && rv == 0) {
+                    fd->secret->md.tcp_nodelay = value;
+                }
+#endif
                 break;
             }
             case PR_SockOpt_McastLoopback:
             {
                 PRUint8 xbool = data->value.mcast_loopback ? 1 : 0;
                 rv = setsockopt(
                     fd->secret->md.osfd, level, name,
                     (char*)&xbool, sizeof(xbool));