author | Andrea Marchesini <amarchesini@mozilla.com> |
Thu, 08 Feb 2018 11:45:06 +0100 | |
changeset 402950 | 55a9b2fe876c5f9ac8d33b861b1df4679e5a3191 |
parent 402949 | fdab702b72fd058c47dc776d8735e62b44bff202 |
child 402951 | 042c5cd8abb0dd86c7ee59abf14fbf5b3e335b5c |
push id | 33407 |
push user | cbrindusan@mozilla.com |
push date | Thu, 08 Feb 2018 19:02:31 +0000 |
treeherder | mozilla-central@c5120bcaf7bd [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | rillian |
bugs | 1434884 |
milestone | 60.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/dom/media/webvtt/moz.build +++ b/dom/media/webvtt/moz.build @@ -14,8 +14,10 @@ XPIDL_MODULE = 'webvtt' EXTRA_COMPONENTS += [ 'WebVTT.manifest', 'WebVTTParserWrapper.js', ] EXTRA_JS_MODULES += [ 'vtt.jsm', ] + +XPCSHELL_TESTS_MANIFESTS += ['tests/xpcshell.ini']
new file mode 100644 --- /dev/null +++ b/dom/media/webvtt/tests/test_parser.js @@ -0,0 +1,77 @@ +"use strict"; + +ChromeUtils.import("resource://gre/modules/vtt.jsm"); +ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm"); + +let fakeWindow = { + VTTCue: function() {}, + VTTRegion: function() {}, +}; + +// We have a better parser check in WPT. Here I want to check that incomplete +// lines are correctly parsable. +let tests = [ + // Signature + { input: [ "WEBVTT" ], cue: 0, region: 0 }, + { input: [ "", "WE", "BVT", "T" ], cue: 0, region: 0 }, + { input: [ "WEBVTT - This file has no cues." ], cue: 0, region: 0 }, + { input: [ "WEBVTT", " - ", "This file has no cues." ], cue: 0, region: 0 }, + + // Body with IDs + { input: [ "WEB", "VTT - This file has cues.\n", "\n", "14\n", + "00:01:14", ".815 --> 00:0", "1:18.114\n", "- What?\n", "- Where are we now?\n", "\n", + "15\n", "00:01:18.171 --> 00:01:20.991\n", "- T", "his is big bat country.\n", "\n", + "16\n", "00:01:21.058 --> 00:01:23.868\n", "- [ Bat", "s Screeching ]\n", + "- They won't get in your hair. They're after the bug", "s.\n", ], + cue: 3, region: 0 }, + + // Body without IDs + { input: [ "WEBVTT - This file has c", "ues.\n", "\n", + "00:01:14.815 --> 00:01:18.114\n", "- What?\n", "- Where are we now?\n", "\n", + "00:01:18.171 --> 00:01:2", "0.991\n", "- ", "This is big bat country.\n", "\n", + "00:01:21.058 --> 00:01:23.868\n", "- [ Bats S", "creeching ]\n", + "- They won't get in your hair. They're after the bugs.\n", ], + cue: 3, region: 0 }, + + // Note + { input: [ "WEBVTT - This file has no cues.\n", "\n", "NOTE what" ], + cue: 0, region: 0 }, + + // Regions - This vtt is taken from a WPT + { input: [ "WE", "BVTT\n", "\n", "REGION\n", "id:0\n", "\n", "REGION\n", "id:1\n", + "region", "an", "chor:0%,0%\n", "\n", "R", "EGION\n", "id:2\n", + "regionanchor:18446744073709552000%,18446744", "073709552000%\n", "\n", + "REGION\n", "id:3\n", "regionanchor: 100%,100%\n", "regio", "nanchor :100%,100%\n", + "regionanchor:100% ,100%\n", "regionanchor:100%, 100%\n", + "regionanchor:100 %,100%\n", "regionanchor:10", "0%,100 %\n", "\n", + "00:00:00.000 --> 00:00:01.000", " region:0\n", "text\n", "\n", + "00:00:00.000 --> 00:00:01.000 region:1\n", "text\n", "\n", + "00:00:00.000 --> 00:00:01.000 region:3\n", "text\n" ], cue: 3, region: 4 }, +]; + +function run_test() { + + tests.forEach(test => { + let parser = new WebVTT.Parser(fakeWindow, null); + ok(!!parser, "Ok... this is a good starting point"); + + let cue = 0; + parser.oncue = () => { ++cue; }; + + let region = 0; + parser.onregion = () => { ++region; }; + + parser.onparsingerror = () => { + ok(false, "No error accepted"); + } + + test.input.forEach(input => { + parser.parse(new TextEncoder().encode(input)); + }); + + parser.flush(); + + equal(cue, test.cue, "Cue value matches"); + equal(region, test.region, "Region value matches"); + }); +}
new file mode 100644 --- /dev/null +++ b/dom/media/webvtt/tests/xpcshell.ini @@ -0,0 +1,3 @@ +[DEFAULT] + +[test_parser.js]
--- a/dom/media/webvtt/vtt.jsm +++ b/dom/media/webvtt/vtt.jsm @@ -23,18 +23,17 @@ this.EXPORTED_SYMBOLS = ["WebVTT"]; * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ ChromeUtils.import('resource://gre/modules/Services.jsm'); -const { require } = ChromeUtils.import("resource://devtools/shared/Loader.jsm", {}); -const { XPCOMUtils } = require("resource://gre/modules/XPCOMUtils.jsm"); +ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm"); (function(global) { var _objCreate = Object.create || (function() { function F() {} return function(o) { if (arguments.length !== 1) { throw new Error('Object.create shim only accepts one parameter.'); @@ -270,18 +269,18 @@ const { XPCOMUtils } = require("resource skipWhitespace(); cue.endTime = consumeTimeStamp(); // (5) collect cue end time // 4.1 WebVTT cue settings list. skipWhitespace(); consumeCueSettings(input, cue); } - function onlyContainsWhiteSpaces(input) { - return /^[ \f\n\r\t]+$/.test(input); + function emptyOrOnlyContainsWhiteSpaces(input) { + return input == "" || /^[ \f\n\r\t]+$/.test(input); } function containsTimeDirectionSymbol(input) { return input.includes("-->"); } function maybeIsTimeStampFormat(input) { return /^\s*(\d+:)?(\d{2}):(\d{2})\.(\d+)\s*-->\s*(\d+:)?(\d{2}):(\d{2})\.(\d+)\s*/.test(input); @@ -1122,66 +1121,72 @@ const { XPCOMUtils } = require("resource } } })(); }; WebVTT.Parser = function(window, decoder) { this.window = window; this.state = "INITIAL"; + this.substate = ""; + this.substatebuffer = ""; this.buffer = ""; this.decoder = decoder || new TextDecoder("utf8"); this.regionList = []; }; WebVTT.Parser.prototype = { // If the error is a ParsingError then report it to the consumer if // possible. If it's not a ParsingError then throw it like normal. reportOrThrowError: function(e) { if (e instanceof ParsingError) { this.onparsingerror && this.onparsingerror(e); } else { throw e; } }, parse: function (data) { - var self = this; - // If there is no data then we won't decode it, but will just try to parse // whatever is in buffer already. This may occur in circumstances, for // example when flush() is called. if (data) { // Try to decode the data that we received. - self.buffer += self.decoder.decode(data, {stream: true}); + this.buffer += this.decoder.decode(data, {stream: true}); } - function collectNextLine() { - var buffer = self.buffer; + // This parser is line-based. Let's see if we have a line to parse. + while (/\r\n|\n|\r/.test(this.buffer)) { + var buffer = this.buffer; var pos = 0; - while (pos < buffer.length && buffer[pos] !== '\r' && buffer[pos] !== '\n') { + while (buffer[pos] !== '\r' && buffer[pos] !== '\n') { ++pos; } var line = buffer.substr(0, pos); // Advance the buffer early in case we fail below. if (buffer[pos] === '\r') { ++pos; } if (buffer[pos] === '\n') { ++pos; } - self.buffer = buffer.substr(pos); + this.buffer = buffer.substr(pos); + // Spec defined replacement. line = line.replace(/[\u0000]/g, "\uFFFD"); - if (/^NOTE($|[ \t])/.test(line)) { - line = null; + if (!/^NOTE($|[ \t])/.test(line)) { + this.parseLine(line); } - return line; } + return this; + }, + parseLine: function(line) { + var self = this; + function createCueIfNeeded() { if (!self.cue) { self.cue = new self.window.VTTCue(0, 0, ""); } } // Parsing cue identifier and the identifier should be unique. // Return true if the input is a cue identifier. @@ -1277,18 +1282,17 @@ const { XPCOMUtils } = require("resource var regionPref = Services.prefs.getBoolPref("media.webvtt.regions.enabled"); dump("regionPref " + regionPref + "\n"); } } } // Parsing the WebVTT signature, it contains parsing algo step1 to step9. // See spec, https://w3c.github.io/webvtt/#file-parsing - function parseSignatureMayThrow(input) { - let signature = collectNextLine(); + function parseSignatureMayThrow(signature) { if (!/^WEBVTT([ \t].*)?$/.test(signature)) { throw new ParsingError(ParsingError.Errors.BadSignature); } else { self.state = "HEADER"; } } function parseRegionOrStyle(input) { @@ -1305,128 +1309,127 @@ const { XPCOMUtils } = require("resource // See spec, https://w3c.github.io/webvtt/#collect-a-webvtt-block // // There are sereval things would appear in header, // 1. Region or Style setting // 2. Garbage (meaningless string) // 3. Empty line // 4. Cue's timestamp // The case 4 happens when there is no line interval between the header - // and the cue blocks. In this case, we should preserve the line and - // return it for the next phase parsing. - function parseHeader() { - let line = null; - while (self.buffer && self.state === "HEADER") { - line = collectNextLine(); - var tempStr = ""; - if (/^REGION|^STYLE/.test(line)) { - self.substate = /^REGION/.test(line) ? "REGION" : "STYLE"; + // and the cue blocks. In this case, we should preserve the line for the + // next phase parsing, returning "true". + function parseHeader(line) { + if (!self.substate && /^REGION|^STYLE/.test(line)) { + self.substate = /^REGION/.test(line) ? "REGION" : "STYLE"; + return false; + } - while (true) { - line = collectNextLine(); - if (!line || maybeIsTimeStampFormat(line) || onlyContainsWhiteSpaces(line) || containsTimeDirectionSymbol(line)) { - // parse the tempStr and break the while loop. - parseRegionOrStyle(tempStr); - break; - } else if (/^REGION|^STYLE/.test(line)) { - // The line is another REGION/STYLE, parse tempStr then reset tempStr. - // Don't break the while loop to parse the next REGION/STYLE. - parseRegionOrStyle(tempStr); - self.substate = /^REGION/.test(line) ? "REGION" : "STYLE"; - tempStr = ""; - } else { - tempStr = tempStr + " " + line; - } - } + if (self.substate === "REGION" || self.substate === "STYLE") { + if (maybeIsTimeStampFormat(line) || + emptyOrOnlyContainsWhiteSpaces(line) || + containsTimeDirectionSymbol(line)) { + parseRegionOrStyle(self.substatebuffer); + self.substatebuffer = ""; + self.substate = null; + + // This is the end of the region or style state. + return parseHeader(line); } - if (!line || onlyContainsWhiteSpaces(line)) { - // empty line, whitespaces - continue; - } else if (maybeIsTimeStampFormat(line)) { - self.state = "CUE"; - break; - } else if (containsTimeDirectionSymbol(line)) { - // string contains "-->" - break; - } else { - //It is an ID. - break; + if (/^REGION|^STYLE/.test(line)) { + // The line is another REGION/STYLE, parse and reset substatebuffer. + // Don't break the while loop to parse the next REGION/STYLE. + parseRegionOrStyle(self.substatebuffer); + self.substatebuffer = ""; + self.substate = /^REGION/.test(line) ? "REGION" : "STYLE"; + return false; } - } // self.state === "HEADER" - // End parsing header part and doesn't see the timestamp. - if (self.state === "HEADER") { - self.state = "ID"; + // We weren't able to parse the line as a header. Accumulate and + // return. + self.substatebuffer += " " + line; + return false; } - return line; - } - // 5.1 WebVTT file parsing. - try { - if (self.state === "INITIAL") { - parseSignatureMayThrow(); + if (emptyOrOnlyContainsWhiteSpaces(line)) { + // empty line, whitespaces, nothing to do. + return false; + } + + if (maybeIsTimeStampFormat(line)) { + self.state = "CUE"; + // We want to process the same line again. + return true; } - var line; - if (self.state === "HEADER") { - line = parseHeader(); + // string contains "-->" or an ID + self.state = "ID"; + return true; + } + + try { + // 5.1 WebVTT file parsing. + if (self.state === "INITIAL") { + parseSignatureMayThrow(line); + return; } - var nextIteration = false; - while (nextIteration || self.buffer) { - nextIteration = false; - if (!line) { - // Since the data receiving is async, we need to wait until the - // buffer gets the full line. - if (!/\r\n|\n|\r/.test(self.buffer)) { - return this; - } - line = collectNextLine(); + if (self.state === "HEADER") { + // parseHeader returns false if the same line doesn't need to be + // parsed again. + if (!parseHeader(line)) { + return; + } + } + + if (self.state === "ID") { + // If there is no cue identifier, read the next line. + if (line == "") { + return; } - switch (self.state) { - case "ID": - // If there is no cue identifier, keep the line and reuse this line - // in next iteration. - if (!line || !parseCueIdentifier(line)) { - nextIteration = true; - continue; + // If there is no cue identifier, parse the line again. + if (!parseCueIdentifier(line)) { + return self.parseLine(line); + } + return; + } + + if (self.state === "CUE") { + parseCueMayThrow(line); + return; + } + + if (self.state === "CUETEXT") { + // Report the cue when (1) get an empty line (2) get the "-->"" + if (emptyOrOnlyContainsWhiteSpaces(line) || + containsTimeDirectionSymbol(line)) { + // We are done parsing self cue. + self.oncue && self.oncue(self.cue); + self.cue = null; + self.state = "ID"; + + if (emptyOrOnlyContainsWhiteSpaces(line)) { + return; } - break; - case "CUE": - parseCueMayThrow(line); - break; - case "CUETEXT": - // Report the cue when (1) get an empty line (2) get the "-->"" - if (!line || containsTimeDirectionSymbol(line)) { - // We are done parsing self cue. - self.oncue && self.oncue(self.cue); - self.cue = null; - self.state = "ID"; - // Keep the line and reuse this line in next iteration. - nextIteration = true; - continue; - } - if (self.cue.text) { - self.cue.text += "\n"; - } - self.cue.text += line; - break; - case "BADCUE": // BADCUE - // 54-62 - Collect and discard the remaining cue. - self.state = "ID"; - if (line) { // keep this line to ID state. - continue; - } - break; + + // Reuse the same line. + return self.parseLine(line); + } + if (self.cue.text) { + self.cue.text += "\n"; } - // The line was already parsed, empty it to ensure we can get the - // new line in next iteration. - line = null; + self.cue.text += line; + return; + } + + if (self.state === "BADCUE") { + // 54-62 - Collect and discard the remaining cue. + self.state = "ID"; + return self.parseLine(line); } } catch (e) { self.reportOrThrowError(e); // If we are currently parsing a cue, report what we have. if (self.state === "CUETEXT" && self.cue && self.oncue) { self.oncue(self.cue); } @@ -1437,27 +1440,18 @@ const { XPCOMUtils } = require("resource } return this; }, flush: function () { var self = this; try { // Finish decoding the stream. self.buffer += self.decoder.decode(); - // Synthesize the end of the current cue or region. - if (self.cue || self.state === "HEADER") { - self.buffer += "\n\n"; - self.parse(); - } - // If we've flushed, parsed, and we're still on the INITIAL state then - // that means we don't have enough of the stream to parse the first - // line. - if (self.state === "INITIAL") { - throw new ParsingError(ParsingError.Errors.BadSignature); - } + self.buffer += "\n\n"; + self.parse(); } catch(e) { self.reportOrThrowError(e); } self.onflush && self.onflush(); return this; } };