js/src/irregexp/RegExpInterpreter.cpp
author Ehsan Akhgari <ehsan@mozilla.com>
Sun, 25 May 2014 21:46:24 -0400
changeset 185108 5021d1337fa9bb5cca83ae0800c6ca1902167d2b
parent 185105 9965b32f739bc934bae9d998bdc5ac543ef7ae42
child 189603 4df3e4664b11ff9d8f49d3443de567f815f9f6b1
permissions -rw-r--r--
Bug 1013663 - Fix some bad implicit conversion constructors in the JS engine; r=jorendorff,jandem

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sts=4 et sw=4 tw=99: */

// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// A simple interpreter for the Irregexp byte code.

#include "irregexp/RegExpBytecode.h"
#include "irregexp/RegExpMacroAssembler.h"
#include "vm/MatchPairs.h"

using namespace js;
using namespace js::irregexp;

static const size_t kBitsPerByte = 8;
static const size_t kBitsPerByteLog2 = 3;

class MOZ_STACK_CLASS RegExpStackCursor
{
  public:
    explicit RegExpStackCursor(JSContext *cx)
      : cx(cx), cursor(base())
    {}

    bool push(int32_t value) {
        *cursor++ = value;
        if (cursor >= stack().limit()) {
            int32_t pos = position();
            if (!stack().grow()) {
                js_ReportOverRecursed(cx);
                return false;
            }
            setPosition(pos);
        }
        return true;
    }

    int32_t pop() {
        JS_ASSERT(cursor > base());
        return *--cursor;
    }

    int32_t peek() {
        JS_ASSERT(cursor > base());
        return *(cursor - 1);
    }

    int32_t position() {
        return cursor - base();
    }

    void setPosition(int32_t position) {
        cursor = base() + position;
        JS_ASSERT(cursor < stack().limit());
    }

  private:
    JSContext *cx;

    int32_t *cursor;

    RegExpStack &stack() { return cx->runtime()->mainThread.regexpStack; }
    int32_t *base() { return (int32_t *) stack().base(); }
};

static int32_t
Load32Aligned(const uint8_t* pc)
{
    JS_ASSERT((reinterpret_cast<uintptr_t>(pc) & 3) == 0);
    return *reinterpret_cast<const int32_t *>(pc);
}

static int32_t
Load16Aligned(const uint8_t* pc)
{
    JS_ASSERT((reinterpret_cast<uintptr_t>(pc) & 1) == 0);
    return *reinterpret_cast<const uint16_t *>(pc);
}

#define BYTECODE(name)  case BC_##name:

RegExpRunStatus
irregexp::InterpretCode(JSContext *cx, const uint8_t *byteCode,
                        const jschar *chars, size_t current, size_t length, MatchPairs *matches)
{
    const uint8_t* pc = byteCode;

    jschar current_char = current ? chars[current - 1] : '\n';

    RegExpStackCursor stack(cx);

    int32_t numRegisters = Load32Aligned(pc);
    pc += 4;

    Vector<int32_t, 0, SystemAllocPolicy> registers;
    if (!registers.growByUninitialized(numRegisters))
        return RegExpRunStatus_Error;
    for (size_t i = 0; i < matches->length() * 2; i++)
        registers[i] = -1;

    while (true) {
        int32_t insn = Load32Aligned(pc);
        switch (insn & BYTECODE_MASK) {
          BYTECODE(BREAK)
            MOZ_ASSUME_UNREACHABLE("Bad bytecode");
            return RegExpRunStatus_Error;
          BYTECODE(PUSH_CP)
            if (!stack.push(current))
                return RegExpRunStatus_Error;
            pc += BC_PUSH_CP_LENGTH;
            break;
          BYTECODE(PUSH_BT)
            if (!stack.push(Load32Aligned(pc + 4)))
                return RegExpRunStatus_Error;
            pc += BC_PUSH_BT_LENGTH;
            break;
          BYTECODE(PUSH_REGISTER)
            if (!stack.push(registers[insn >> BYTECODE_SHIFT]))
                return RegExpRunStatus_Error;
            pc += BC_PUSH_REGISTER_LENGTH;
            break;
          BYTECODE(SET_REGISTER)
            registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
            pc += BC_SET_REGISTER_LENGTH;
            break;
          BYTECODE(ADVANCE_REGISTER)
            registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
            pc += BC_ADVANCE_REGISTER_LENGTH;
            break;
          BYTECODE(SET_REGISTER_TO_CP)
            registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
            pc += BC_SET_REGISTER_TO_CP_LENGTH;
            break;
          BYTECODE(SET_CP_TO_REGISTER)
            current = registers[insn >> BYTECODE_SHIFT];
            pc += BC_SET_CP_TO_REGISTER_LENGTH;
            break;
          BYTECODE(SET_REGISTER_TO_SP)
            registers[insn >> BYTECODE_SHIFT] = stack.position();
            pc += BC_SET_REGISTER_TO_SP_LENGTH;
            break;
          BYTECODE(SET_SP_TO_REGISTER)
            stack.setPosition(registers[insn >> BYTECODE_SHIFT]);
            pc += BC_SET_SP_TO_REGISTER_LENGTH;
            break;
          BYTECODE(POP_CP)
            current = stack.pop();
            pc += BC_POP_CP_LENGTH;
            break;
          BYTECODE(POP_BT)
            if (!CheckForInterrupt(cx))
                return RegExpRunStatus_Error;
            pc = byteCode + stack.pop();
            break;
          BYTECODE(POP_REGISTER)
            registers[insn >> BYTECODE_SHIFT] = stack.pop();
            pc += BC_POP_REGISTER_LENGTH;
            break;
          BYTECODE(FAIL)
            return RegExpRunStatus_Success_NotFound;
          BYTECODE(SUCCEED)
            memcpy(matches->pairsRaw(), registers.begin(), matches->length() * 2 * sizeof(int32_t));
            return RegExpRunStatus_Success;
          BYTECODE(ADVANCE_CP)
            current += insn >> BYTECODE_SHIFT;
            pc += BC_ADVANCE_CP_LENGTH;
            break;
          BYTECODE(GOTO)
            pc = byteCode + Load32Aligned(pc + 4);
            break;
          BYTECODE(ADVANCE_CP_AND_GOTO)
            current += insn >> BYTECODE_SHIFT;
            pc = byteCode + Load32Aligned(pc + 4);
            break;
          BYTECODE(CHECK_GREEDY)
            if ((int32_t)current == stack.peek()) {
                stack.pop();
                pc = byteCode + Load32Aligned(pc + 4);
            } else {
                pc += BC_CHECK_GREEDY_LENGTH;
            }
            break;
          BYTECODE(LOAD_CURRENT_CHAR) {
            size_t pos = current + (insn >> BYTECODE_SHIFT);
            if (pos >= length) {
                pc = byteCode + Load32Aligned(pc + 4);
            } else {
                current_char = chars[pos];
                pc += BC_LOAD_CURRENT_CHAR_LENGTH;
            }
            break;
          }
          BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
            int pos = current + (insn >> BYTECODE_SHIFT);
            current_char = chars[pos];
            pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
            break;
          }
          BYTECODE(LOAD_2_CURRENT_CHARS) {
            size_t pos = current + (insn >> BYTECODE_SHIFT);
            if (pos + 2 > length) {
                pc = byteCode + Load32Aligned(pc + 4);
            } else {
                jschar next = chars[pos + 1];
                current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(jschar))));
                pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
            }
            break;
          }
          BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
            int pos = current + (insn >> BYTECODE_SHIFT);
            jschar next = chars[pos + 1];
            current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(jschar))));
            pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
            break;
          }
          BYTECODE(LOAD_4_CURRENT_CHARS)
            MOZ_ASSUME_UNREACHABLE("Ascii handling implemented");
            break;
          BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED)
            MOZ_ASSUME_UNREACHABLE("Ascii handling implemented");
          BYTECODE(CHECK_4_CHARS) {
            uint32_t c = Load32Aligned(pc + 4);
            if (c == current_char)
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_CHECK_4_CHARS_LENGTH;
            break;
          }
          BYTECODE(CHECK_CHAR) {
            uint32_t c = (insn >> BYTECODE_SHIFT);
            if (c == current_char)
                pc = byteCode + Load32Aligned(pc + 4);
            else
                pc += BC_CHECK_CHAR_LENGTH;
            break;
          }
          BYTECODE(CHECK_NOT_4_CHARS) {
            uint32_t c = Load32Aligned(pc + 4);
            if (c != current_char)
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_CHECK_NOT_4_CHARS_LENGTH;
            break;
          }
          BYTECODE(CHECK_NOT_CHAR) {
            uint32_t c = (insn >> BYTECODE_SHIFT);
            if (c != current_char)
                pc = byteCode + Load32Aligned(pc + 4);
            else
                pc += BC_CHECK_NOT_CHAR_LENGTH;
            break;
          }
          BYTECODE(AND_CHECK_4_CHARS) {
            uint32_t c = Load32Aligned(pc + 4);
            if (c == (current_char & Load32Aligned(pc + 8)))
                pc = byteCode + Load32Aligned(pc + 12);
            else
                pc += BC_AND_CHECK_4_CHARS_LENGTH;
            break;
          }
          BYTECODE(AND_CHECK_CHAR) {
            uint32_t c = (insn >> BYTECODE_SHIFT);
            if (c == (current_char & Load32Aligned(pc + 4)))
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_AND_CHECK_CHAR_LENGTH;
            break;
          }
          BYTECODE(AND_CHECK_NOT_4_CHARS) {
            uint32_t c = Load32Aligned(pc + 4);
            if (c != (current_char & Load32Aligned(pc + 8)))
                pc = byteCode + Load32Aligned(pc + 12);
            else
                pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
            break;
          }
          BYTECODE(AND_CHECK_NOT_CHAR) {
            uint32_t c = (insn >> BYTECODE_SHIFT);
            if (c != (current_char & Load32Aligned(pc + 4)))
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
            break;
          }
          BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
            uint32_t c = (insn >> BYTECODE_SHIFT);
            uint32_t minus = Load16Aligned(pc + 4);
            uint32_t mask = Load16Aligned(pc + 6);
            if (c != ((current_char - minus) & mask))
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
            break;
          }
          BYTECODE(CHECK_CHAR_IN_RANGE) {
            uint32_t from = Load16Aligned(pc + 4);
            uint32_t to = Load16Aligned(pc + 6);
            if (from <= current_char && current_char <= to)
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
            break;
          }
          BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
            uint32_t from = Load16Aligned(pc + 4);
            uint32_t to = Load16Aligned(pc + 6);
            if (from > current_char || current_char > to)
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
            break;
          }
          BYTECODE(CHECK_BIT_IN_TABLE) {
            int mask = RegExpMacroAssembler::kTableMask;
            uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
            int bit = (current_char & (kBitsPerByte - 1));
            if ((b & (1 << bit)) != 0)
                pc = byteCode + Load32Aligned(pc + 4);
            else
                pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
            break;
          }
          BYTECODE(CHECK_LT) {
            uint32_t limit = (insn >> BYTECODE_SHIFT);
            if (current_char < limit)
                pc = byteCode + Load32Aligned(pc + 4);
            else
                pc += BC_CHECK_LT_LENGTH;
            break;
          }
          BYTECODE(CHECK_GT) {
            uint32_t limit = (insn >> BYTECODE_SHIFT);
            if (current_char > limit)
                pc = byteCode + Load32Aligned(pc + 4);
            else
                pc += BC_CHECK_GT_LENGTH;
            break;
          }
          BYTECODE(CHECK_REGISTER_LT)
            if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4))
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_CHECK_REGISTER_LT_LENGTH;
            break;
          BYTECODE(CHECK_REGISTER_GE)
            if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4))
                pc = byteCode + Load32Aligned(pc + 8);
            else
                pc += BC_CHECK_REGISTER_GE_LENGTH;
            break;
          BYTECODE(CHECK_REGISTER_EQ_POS)
            if (registers[insn >> BYTECODE_SHIFT] == (int32_t) current)
                pc = byteCode + Load32Aligned(pc + 4);
            else
                pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
            break;
          BYTECODE(CHECK_NOT_REGS_EQUAL)
            if (registers[insn >> BYTECODE_SHIFT] == registers[Load32Aligned(pc + 4)])
                pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
            else
                pc = byteCode + Load32Aligned(pc + 8);
            break;
          BYTECODE(CHECK_NOT_BACK_REF) {
            int from = registers[insn >> BYTECODE_SHIFT];
            int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
            if (from < 0 || len <= 0) {
                pc += BC_CHECK_NOT_BACK_REF_LENGTH;
                break;
            }
            if (current + len > length) {
                pc = byteCode + Load32Aligned(pc + 4);
                break;
            } else {
                int i;
                for (i = 0; i < len; i++) {
                    if (chars[from + i] != chars[current + i]) {
                        pc = byteCode + Load32Aligned(pc + 4);
                        break;
                    }
                }
                if (i < len) break;
                current += len;
            }
            pc += BC_CHECK_NOT_BACK_REF_LENGTH;
            break;
          }
          BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
            int from = registers[insn >> BYTECODE_SHIFT];
            int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
            if (from < 0 || len <= 0) {
                pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
                break;
            }
            if (current + len > length) {
                pc = byteCode + Load32Aligned(pc + 4);
                break;
            }
            if (CaseInsensitiveCompareStrings(chars + from, chars + current, len * 2)) {
                current += len;
                pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
            } else {
                pc = byteCode + Load32Aligned(pc + 4);
            }
            break;
          }
          BYTECODE(CHECK_AT_START)
            if (current == 0)
                pc = byteCode + Load32Aligned(pc + 4);
            else
                pc += BC_CHECK_AT_START_LENGTH;
            break;
          BYTECODE(CHECK_NOT_AT_START)
            if (current == 0)
                pc += BC_CHECK_NOT_AT_START_LENGTH;
            else
                pc = byteCode + Load32Aligned(pc + 4);
            break;
          BYTECODE(SET_CURRENT_POSITION_FROM_END) {
            size_t by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
            if (length - current > by) {
                current = length - by;
                current_char = chars[current - 1];
            }
            pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
            break;
          }
          default:
            MOZ_ASSUME_UNREACHABLE("Bad bytecode");
            break;
        }
    }
}