Bug 1374012 - Update to Expat 2.2.1. Part 1: More correct calculations. r=erahm
authorPeter Van der Beken <peterv@propagandism.org>
Thu, 27 Dec 2018 15:13:10 +0000
changeset 509075 31402242ed1cd666cd3ac887f9b7fa8d0478e52d
parent 509074 01d3cd0517a13e9c783769aff6c46a3b978fc867
child 509076 e63ef8bbd6a150adaa9bf185dbd5c843c68d0af2
push id10547
push userffxbld-merge
push dateMon, 21 Jan 2019 13:03:58 +0000
treeherdermozilla-beta@24ec1916bffe [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerserahm
bugs1374012
milestone66.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1374012 - Update to Expat 2.2.1. Part 1: More correct calculations. r=erahm Differential Revision: https://phabricator.services.mozilla.com/D14439
parser/expat/lib/xmlparse.c
parser/expat/lib/xmltok.c
--- a/parser/expat/lib/xmlparse.c
+++ b/parser/expat/lib/xmlparse.c
@@ -1,15 +1,16 @@
 /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
    See the file COPYING for copying permission.
 */
 
 #include <stddef.h>
 #include <string.h>                     /* memset(), memcpy() */
 #include <assert.h>
+#include <limits.h>                     /* UINT_MAX */
 
 #define XML_BUILDING_EXPAT 1
 
 #ifdef COMPILED_FROM_DSP
 #include "winconfig.h"
 #elif defined(MACOS_CLASSIC)
 #include "macconfig.h"
 #elif defined(__amigaos4__)
@@ -1518,16 +1519,29 @@ XML_Parse(XML_Parser parser, const char 
   }
 #ifndef XML_CONTEXT_BYTES
   else if (bufferPtr == bufferEnd) {
     const char *end;
     int nLeftOver;
 /* BEGIN MOZILLA CHANGE (|result| has type XML_Status, not XML_Error) */
     enum XML_Status result;
 /* END MOZILLA CHANGE */
+    /* Detect overflow (a+b > MAX <==> b > MAX-a) */
+/* BEGIN MOZILLA CHANGE (len is signed, trying to compare it to an unsigned value) */
+#if 0
+    if (len > ((XML_Size)-1) / 2 - parseEndByteIndex) {
+#else
+    if ((XML_Size)len > ((XML_Size)-1) / 2 - parseEndByteIndex) {
+#endif
+/* END MOZILLA CHANGE */
+       errorCode = XML_ERROR_NO_MEMORY;
+       eventPtr = eventEndPtr = NULL;
+       processor = errorProcessor;
+       return XML_STATUS_ERROR;
+    }
     parseEndByteIndex += len;
     positionPtr = s;
     ps_finalBuffer = (XML_Bool)isFinal;
 
     errorCode = processor(parser, s, parseEndPtr = s + len, &end);
 
     if (errorCode != XML_ERROR_NONE) {
       eventEndPtr = eventPtr;
@@ -1560,59 +1574,32 @@ XML_Parse(XML_Parser parser, const char 
 /* END MOZILLA CHANGE */
       }
     }
 
     XmlUpdatePosition(encoding, positionPtr, end, &position);
     nLeftOver = s + len - end;
     if (nLeftOver) {
       if (buffer == NULL || nLeftOver > bufferLim - buffer) {
-/* BEGIN MOZILLA CHANGE (check for overflow) */
-#if 0
-        /* FIXME avoid integer overflow */
-        char *temp;
-        temp = (buffer == NULL
-                ? (char *)MALLOC(len * 2)
-                : (char *)REALLOC(buffer, len * 2));
+        /* avoid _signed_ integer overflow */
+        char *temp = NULL;
+        const int bytesToAllocate = (int)((unsigned)len * 2U);
+        if (bytesToAllocate > 0) {
+          temp = (buffer == NULL
+                ? (char *)MALLOC(bytesToAllocate)
+                : (char *)REALLOC(buffer, bytesToAllocate));
+        }
         if (temp == NULL) {
           errorCode = XML_ERROR_NO_MEMORY;
-          return XML_STATUS_ERROR;
-        }
-        buffer = temp;
-        if (!buffer) {
-          errorCode = XML_ERROR_NO_MEMORY;
           eventPtr = eventEndPtr = NULL;
           processor = errorProcessor;
           return XML_STATUS_ERROR;
         }
-        bufferLim = buffer + len * 2;
-#else
-        char *temp;
-        int newLen = len * 2;
-        if (newLen < 0) {
-          errorCode = XML_ERROR_NO_MEMORY;
-          return XML_STATUS_ERROR;
-        }
-        temp = (buffer == NULL
-                ? (char *)MALLOC(newLen)
-                : (char *)REALLOC(buffer, newLen));
-        if (temp == NULL) {
-          errorCode = XML_ERROR_NO_MEMORY;
-          return XML_STATUS_ERROR;
-        }
         buffer = temp;
-        if (!buffer) {
-          errorCode = XML_ERROR_NO_MEMORY;
-          eventPtr = eventEndPtr = NULL;
-          processor = errorProcessor;
-          return XML_STATUS_ERROR;
-        }
-        bufferLim = buffer + newLen;
-#endif
-/* END MOZILLA CHANGE */
+        bufferLim = buffer + bytesToAllocate;
       }
       memcpy(buffer, end, nLeftOver);
     }
     bufferPtr = buffer;
     bufferEnd = buffer + nLeftOver;
     positionPtr = bufferPtr;
     parseEndPtr = bufferEnd;
     eventPtr = bufferPtr;
@@ -1697,17 +1684,18 @@ XML_GetBuffer(XML_Parser parser, int len
     return NULL;
   case XML_FINISHED:
     errorCode = XML_ERROR_FINISHED;
     return NULL;
   default: ;
   }
 
   if (len > bufferLim - bufferEnd) {
-    int neededSize = len + (int)(bufferEnd - bufferPtr);
+    /* Do not invoke signed arithmetic overflow: */
+    int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
 /* BEGIN MOZILLA CHANGE (sanity check neededSize) */
     if (neededSize < 0) {
       errorCode = XML_ERROR_NO_MEMORY;
       return NULL;
     }
 /* END MOZILLA CHANGE */
 #ifdef XML_CONTEXT_BYTES
     int keep = (int)(bufferPtr - buffer);
@@ -1731,17 +1719,18 @@ XML_GetBuffer(XML_Parser parser, int len
 #endif  /* not defined XML_CONTEXT_BYTES */
     }
     else {
       char *newBuf;
       int bufferSize = (int)(bufferLim - bufferPtr);
       if (bufferSize == 0)
         bufferSize = INIT_BUFFER_SIZE;
       do {
-        bufferSize *= 2;
+        /* Do not invoke signed arithmetic overflow: */
+        bufferSize = (int) (2U * (unsigned) bufferSize);
 /* BEGIN MOZILLA CHANGE (prevent infinite loop on overflow) */
       } while (bufferSize < neededSize && bufferSize > 0);
 /* END MOZILLA CHANGE */
 /* BEGIN MOZILLA CHANGE (sanity check bufferSize) */
       if (bufferSize <= 0) {
         errorCode = XML_ERROR_NO_MEMORY;
         return NULL;
       }
@@ -6301,16 +6290,45 @@ poolStoreString(STRING_POOL *pool, const
   if (!poolAppend(pool, enc, ptr, end))
     return NULL;
   if (pool->ptr == pool->end && !poolGrow(pool))
     return NULL;
   *(pool->ptr)++ = 0;
   return pool->start;
 }
 
+static size_t
+poolBytesToAllocateFor(int blockSize)
+{
+  /* Unprotected math would be:
+  ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
+  **
+  ** Detect overflow, avoiding _signed_ overflow undefined behavior
+  ** For a + b * c we check b * c in isolation first, so that addition of a
+  ** on top has no chance of making us accept a small non-negative number
+  */
+  const size_t stretch = sizeof(XML_Char);  /* can be 4 bytes */
+
+  if (blockSize <= 0)
+    return 0;
+
+  if (blockSize > (int)(INT_MAX / stretch))
+    return 0;
+
+  {
+    const int stretchedBlockSize = blockSize * (int)stretch;
+    const int bytesToAllocate = (int)(
+        offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
+    if (bytesToAllocate < 0)
+      return 0;
+
+    return (size_t)bytesToAllocate;
+  }
+}
+
 static XML_Bool FASTCALL
 poolGrow(STRING_POOL *pool)
 {
   if (pool->freeBlocks) {
     if (pool->start == 0) {
       pool->blocks = pool->freeBlocks;
       pool->freeBlocks = pool->freeBlocks->next;
       pool->blocks->next = NULL;
@@ -6328,47 +6346,60 @@ poolGrow(STRING_POOL *pool)
              (pool->end - pool->start) * sizeof(XML_Char));
       pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
       pool->start = pool->blocks->s;
       pool->end = pool->start + pool->blocks->size;
       return XML_TRUE;
     }
   }
   if (pool->blocks && pool->start == pool->blocks->s) {
-    int blockSize = (int)(pool->end - pool->start)*2;
+    BLOCK *temp;
+    int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
+    size_t bytesToAllocate;
+
     if (blockSize < 0)
       return XML_FALSE;
 
-    pool->blocks = (BLOCK *)
-      pool->mem->realloc_fcn(pool->blocks,
-                             (offsetof(BLOCK, s)
-                              + blockSize * sizeof(XML_Char)));
-    if (pool->blocks == NULL)
+    bytesToAllocate = poolBytesToAllocateFor(blockSize);
+    if (bytesToAllocate == 0)
       return XML_FALSE;
+
+    temp = (BLOCK *)
+      pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
+    if (temp == NULL)
+      return XML_FALSE;
+    pool->blocks = temp;
     pool->blocks->size = blockSize;
     pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
     pool->start = pool->blocks->s;
     pool->end = pool->start + blockSize;
   }
   else {
     BLOCK *tem;
     int blockSize = (int)(pool->end - pool->start);
+    size_t bytesToAllocate;
+
     if (blockSize < 0)
       return XML_FALSE;
 
     if (blockSize < INIT_BLOCK_SIZE)
       blockSize = INIT_BLOCK_SIZE;
-    else
+    else {
+      /* Detect overflow, avoiding _signed_ overflow undefined behavior */
+      if ((int)((unsigned)blockSize * 2U) < 0) {
+        return XML_FALSE;
+      }
       blockSize *= 2;
-
-    if (blockSize < 0)
+    }
+
+    bytesToAllocate = poolBytesToAllocateFor(blockSize);
+    if (bytesToAllocate == 0)
       return XML_FALSE;
 
-    tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s)
-                                        + blockSize * sizeof(XML_Char));
+    tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
     if (!tem)
       return XML_FALSE;
     tem->size = blockSize;
     tem->next = pool->blocks;
     pool->blocks = tem;
     if (pool->ptr != pool->start)
       memcpy(tem->s, pool->start,
              (pool->ptr - pool->start) * sizeof(XML_Char));
--- a/parser/expat/lib/xmltok.c
+++ b/parser/expat/lib/xmltok.c
@@ -39,40 +39,40 @@
   PREFIX(charRefNumber), \
   PREFIX(predefinedEntityName), \
   PREFIX(updatePosition), \
   PREFIX(isPublicId)
 
 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
 
 #define UCS2_GET_NAMING(pages, hi, lo) \
-   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
+   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
 
 /* A 2 byte UTF-8 representation splits the characters 11 bits between
    the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
    pages, 3 bits to add to that index and 5 bits to generate the mask.
 */
 #define UTF8_GET_NAMING2(pages, byte) \
     (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
                       + ((((byte)[0]) & 3) << 1) \
                       + ((((byte)[1]) >> 5) & 1)] \
-         & (1 << (((byte)[1]) & 0x1F)))
+         & (1u << (((byte)[1]) & 0x1F)))
 
 /* A 3 byte UTF-8 representation splits the characters 16 bits between
    the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
    into pages, 3 bits to add to that index and 5 bits to generate the
    mask.
 */
 #define UTF8_GET_NAMING3(pages, byte) \
   (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
                              + ((((byte)[1]) >> 2) & 0xF)] \
                        << 3) \
                       + ((((byte)[1]) & 3) << 1) \
                       + ((((byte)[2]) >> 5) & 1)] \
-         & (1 << (((byte)[2]) & 0x1F)))
+         & (1u << (((byte)[2]) & 0x1F)))
 
 #define UTF8_GET_NAMING(pages, p, n) \
   ((n) == 2 \
   ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
   : ((n) == 3 \
      ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
      : 0))