Bug 671029: Ignore Byte-Order-Mark in UTF-8 files (r=jwalden)
authorPaul Biggar <pbiggar@mozilla.com>
Mon, 18 Jul 2011 14:14:33 -0700
changeset 72974 102481f5e2b9d5d253c2bec545de05d6b4407d27
parent 72973 4e34e92f1605cb0db5b6e8520f1bac4b5a320d35
child 72975 52e36db1e8c7c8faba145c6ab78a1385c02f9523
push id20799
push usermak77@bonardo.net
push dateTue, 19 Jul 2011 12:30:13 +0000
treeherdermozilla-central@b0a58fd855e1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjwalden
bugs671029
milestone8.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 671029: Ignore Byte-Order-Mark in UTF-8 files (r=jwalden) It's legit (though pointless) to have BOMs in UTF-8 files, so we shouldn't throw a SyntaxError.
js/src/shell/js.cpp
--- a/js/src/shell/js.cpp
+++ b/js/src/shell/js.cpp
@@ -387,16 +387,44 @@ ShellOperationCallback(JSContext *cx)
 
 static void
 SetContextOptions(JSContext *cx)
 {
     JS_SetNativeStackQuota(cx, gMaxStackSize);
     JS_SetOperationCallback(cx, ShellOperationCallback);
 }
 
+/*
+ * Some UTF-8 files, notably those written using Notepad, have a Unicode
+ * Byte-Order-Mark (BOM) as their first character. This is useless (byte-order
+ * is meaningless for UTF-8) but causes a syntax error unless we skip it.
+ */
+static void
+SkipUTF8BOM(FILE* file, size_t size)
+{
+    if (!js_CStringsAreUTF8)
+        return;
+
+    int ch1 = fgetc(file);
+    int ch2 = fgetc(file);
+    int ch3 = fgetc(file);
+
+    // Skip the BOM
+    if (ch1 == 0xEF && ch2 == 0xBB && ch3 == 0xBF)
+        return;
+
+    // No BOM - revert
+    if (ch3 != EOF)
+        ungetc(ch3, file);
+    if (ch2 != EOF)
+        ungetc(ch2, file);
+    if (ch1 != EOF)
+        ungetc(ch1, file);
+}
+
 static void
 Process(JSContext *cx, JSObject *obj, char *filename, JSBool forceTTY)
 {
     JSBool ok, hitEOF;
     JSObject *scriptObj;
     jsval result;
     JSString *str;
     char *buffer;
@@ -419,16 +447,18 @@ Process(JSContext *cx, JSObject *obj, ch
             return;
         }
     }
 
     SetContextOptions(cx);
 
     if (!forceTTY && !isatty(fileno(file)))
     {
+        SkipUTF8BOM(file, size);
+
         /*
          * It's not interactive - just execute it.
          *
          * Support the UNIX #! shell hack; gobble the first line if it starts
          * with '#'.  TODO - this isn't quite compatible with sharp variables,
          * as a legal js program (using sharp variables) might start with '#'.
          * But that would require multi-character lookahead.
          */