author Nicholas Nethercote <nnethercote@mozilla.com>
Tue, 09 Jul 2013 13:41:16 -0700
changeset 138441 763c1a2daaee63f011c327790fb2ff066284831b
parent 138350 b674f0e40c8eb12edb964b80aa4ca2af37fcbf4c
child 139310 8abf922fb3ea4bb8af617666c64d165b9154eedd
permissions -rw-r--r--
Bug 891215 (part 16) - Slim down RegExpObject-inl.h. r=terrence.

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sts=4 et sw=4 tw=99:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef vm_RegExpObject_h
#define vm_RegExpObject_h

#include "mozilla/Attributes.h"
#include "mozilla/MemoryReporting.h"

#include <stddef.h>
#include "jscntxt.h"
#include "jsobj.h"
#include "jsproxy.h"

#include "gc/Barrier.h"
#include "gc/Marking.h"
#include "js/TemplateLib.h"
#include "vm/MatchPairs.h"
#include "vm/Runtime.h"
#include "yarr/MatchResult.h"
#include "yarr/Yarr.h"
#include "yarr/YarrJIT.h"
#include "yarr/YarrSyntaxChecker.h"

 * JavaScript Regular Expressions
 * There are several engine concepts associated with a single logical regexp:
 *   RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp"
 *   RegExpShared - The compiled representation of the regexp.
 *   RegExpCompartment - Owns all RegExpShared instances in a compartment.
 * To save memory, a RegExpShared is not created for a RegExpObject until it is
 * needed for execution. When a RegExpShared needs to be created, it is looked
 * up in a per-compartment table to allow reuse between objects. Lastly, on
 * GC, every RegExpShared (that is not active on the callstack) is discarded.
 * Because of the last point, any code using a RegExpShared (viz., by executing
 * a regexp) must indicate the RegExpShared is active via RegExpGuard.
namespace js {

enum RegExpRunStatus

class RegExpObjectBuilder
    ExclusiveContext *cx;
    Rooted<RegExpObject*> reobj_;

    bool getOrCreate();
    bool getOrCreateClone(RegExpObject *proto);

    RegExpObjectBuilder(ExclusiveContext *cx, RegExpObject *reobj = NULL);

    RegExpObject *reobj() { return reobj_; }

    RegExpObject *build(HandleAtom source, RegExpFlag flags);
    RegExpObject *build(HandleAtom source, RegExpShared &shared);

    /* Perform a VM-internal clone. */
    RegExpObject *clone(Handle<RegExpObject*> other, Handle<RegExpObject*> proto);

JSObject *
CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *proto);

 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
 * cache so that they can be reused when compiling the same regex string.
 * During a GC, the trace hook for RegExpObject clears any pointers to
 * RegExpShareds so that there will be no dangling pointers when they are
 * deleted. However, some RegExpShareds are not deleted:
 *   1. Any RegExpShared with pointers from the C++ stack is not deleted.
 *   2. Any RegExpShared which has been embedded into jitcode is not deleted.
 *      This rarely comes into play, as jitcode is usually purged before the
 *      RegExpShared are sweeped.
 *   3. Any RegExpShared that was installed in a RegExpObject during an
 *      incremental GC is not deleted. This is because the RegExpObject may have
 *      been traced through before the new RegExpShared was installed, in which
 *      case deleting the RegExpShared would turn the RegExpObject's reference
 *      into a dangling pointer
 * The activeUseCount and gcNumberWhenUsed fields are used to track these
 * conditions.
 * There are two tables used to track RegExpShareds. map_ implements the cache
 * and is cleared on every GC. inUse_ logically owns all RegExpShareds in the
 * compartment and attempts to delete all RegExpShareds that aren't kept alive
 * by the above conditions on every GC sweep phase. It is necessary to use two
 * separate tables since map_ *must* be fully cleared on each GC since the Key
 * points to a JSAtom that can become garbage.
class RegExpShared
    friend class RegExpCompartment;
    friend class RegExpStatics;
    friend class RegExpGuard;

    typedef frontend::TokenStream TokenStream;
    typedef JSC::Yarr::BytecodePattern BytecodePattern;
    typedef JSC::Yarr::ErrorCode ErrorCode;
    typedef JSC::Yarr::YarrPattern YarrPattern;
    typedef JSC::Yarr::JSGlobalData JSGlobalData;
    typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock;
    typedef JSC::Yarr::YarrJITCompileMode YarrJITCompileMode;

     * Source to the RegExp, for lazy compilation.
     * The source must be rooted while activeUseCount is non-zero
     * via RegExpGuard or explicit calls to trace().
    JSAtom *           source;

    RegExpFlag         flags;
    unsigned           parenCount;

    /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */
    YarrCodeBlock   codeBlock;
    BytecodePattern *bytecode;

    /* Lifetime-preserving variables: see class-level comment above. */
    size_t             activeUseCount;
    uint64_t           gcNumberWhenUsed;

    /* Internal functions. */
    bool compile(JSContext *cx, bool matchOnly);
    bool compile(JSContext *cx, JSLinearString &pattern, bool matchOnly);

    bool compileIfNecessary(JSContext *cx);
    bool compileMatchOnlyIfNecessary(JSContext *cx);

    RegExpShared(JSAtom *source, RegExpFlag flags, uint64_t gcNumber);

    /* Explicit trace function for use by the RegExpStatics and JITs. */
    void trace(JSTracer *trc) {
        MarkStringUnbarriered(trc, &source, "regexpshared source");

    /* Static functions to expose some Yarr logic. */

    // This function should be deleted once bad Android platforms phase out. See bug 604774.
    static bool isJITRuntimeEnabled(JSContext *cx) {
        #if ENABLE_YARR_JIT
        # if defined(ANDROID)
            return !cx->jitIsBroken;
        # else
            return true;
        # endif
            return false;
    static void reportYarrError(ExclusiveContext *cx, TokenStream *ts, ErrorCode error);
    static bool checkSyntax(ExclusiveContext *cx, TokenStream *tokenStream, JSLinearString *source);

    /* Called when a RegExpShared is installed into a RegExpObject. */
    void prepareForUse(ExclusiveContext *cx) {
        gcNumberWhenUsed = cx->gcNumber();

    /* Primary interface: run this regular expression on the given string. */
    RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length,
                            size_t *lastIndex, MatchPairs &matches);

    /* Run the regular expression without collecting matches, for test(). */
    RegExpRunStatus executeMatchOnly(JSContext *cx, const jschar *chars, size_t length,
                                     size_t *lastIndex, MatchPair &match);

    /* Accessors */

    size_t getParenCount() const        { JS_ASSERT(isCompiled()); return parenCount; }
    void incRef()                       { activeUseCount++; }
    void decRef()                       { JS_ASSERT(activeUseCount > 0); activeUseCount--; }

    /* Accounts for the "0" (whole match) pair. */
    size_t pairCount() const            { return getParenCount() + 1; }

    RegExpFlag getFlags() const         { return flags; }
    bool ignoreCase() const             { return flags & IgnoreCaseFlag; }
    bool global() const                 { return flags & GlobalFlag; }
    bool multiline() const              { return flags & MultilineFlag; }
    bool sticky() const                 { return flags & StickyFlag; }

    bool hasCode() const                { return codeBlock.has16BitCode(); }
    bool hasMatchOnlyCode() const       { return codeBlock.has16BitCodeMatchOnly(); }
    bool hasCode() const                { return false; }
    bool hasMatchOnlyCode() const       { return false; }
    bool hasBytecode() const            { return bytecode != NULL; }
    bool isCompiled() const             { return hasBytecode() || hasCode() || hasMatchOnlyCode(); }

 * Extend the lifetime of a given RegExpShared to at least the lifetime of
 * the guard object. See Regular Expression comment at the top.
class RegExpGuard
    RegExpShared *re_;

     * Prevent the RegExp source from being collected:
     * because RegExpShared objects compile at execution time, the source
     * must remain rooted for the active lifetime of the RegExpShared.
    RootedAtom source_;

    RegExpGuard(const RegExpGuard &) MOZ_DELETE;
    void operator=(const RegExpGuard &) MOZ_DELETE;

    RegExpGuard(ExclusiveContext *cx)
      : re_(NULL), source_(cx)

    RegExpGuard(ExclusiveContext *cx, RegExpShared &re)
      : re_(&re), source_(cx, re.source)

    ~RegExpGuard() {

    void init(RegExpShared &re) {
        re_ = &re;
        source_ = re_->source;

    void release() {
        if (re_) {
            re_ = NULL;
            source_ = NULL;

    bool initialized() const { return !!re_; }
    RegExpShared *re() const { JS_ASSERT(initialized()); return re_; }
    RegExpShared *operator->() { return re(); }
    RegExpShared &operator*() { return *re(); }

class RegExpCompartment
    struct Key {
        JSAtom *atom;
        uint16_t flag;

        Key() {}
        Key(JSAtom *atom, RegExpFlag flag)
          : atom(atom), flag(flag)
        { }

        typedef Key Lookup;
        static HashNumber hash(const Lookup &l) {
            return DefaultHasher<JSAtom *>::hash(l.atom) ^ (l.flag << 1);
        static bool match(Key l, Key r) {
            return l.atom == r.atom && l.flag == r.flag;

     * Cache to reuse RegExpShareds with the same source/flags/etc. The cache
     * is entirely cleared on each GC.
    typedef HashMap<Key, RegExpShared *, Key, RuntimeAllocPolicy> Map;
    Map map_;

     * The set of all RegExpShareds in the compartment. On every GC, every
     * RegExpShared that is not actively being used is deleted and removed from
     * the set.
    typedef HashSet<RegExpShared *, DefaultHasher<RegExpShared*>, RuntimeAllocPolicy> PendingSet;
    PendingSet inUse_;

    RegExpCompartment(JSRuntime *rt);

    bool init(JSContext *cx);
    void sweep(JSRuntime *rt);

    bool get(ExclusiveContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g);

    /* Like 'get', but compile 'maybeOpt' (if non-null). */
    bool get(JSContext *cx, HandleAtom source, JSString *maybeOpt, RegExpGuard *g);

    size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);

class RegExpObject : public JSObject
    static const unsigned LAST_INDEX_SLOT          = 0;
    static const unsigned SOURCE_SLOT              = 1;
    static const unsigned GLOBAL_FLAG_SLOT         = 2;
    static const unsigned IGNORE_CASE_FLAG_SLOT    = 3;
    static const unsigned MULTILINE_FLAG_SLOT      = 4;
    static const unsigned STICKY_FLAG_SLOT         = 5;

    static const unsigned RESERVED_SLOTS = 6;

    static Class class_;

     * Note: The regexp statics flags are OR'd into the provided flags,
     * so this function is really meant for object creation during code
     * execution, as opposed to during something like XDR.
    static RegExpObject *
    create(ExclusiveContext *cx, RegExpStatics *res, const jschar *chars, size_t length,
           RegExpFlag flags, frontend::TokenStream *ts);

    static RegExpObject *
    createNoStatics(ExclusiveContext *cx, const jschar *chars, size_t length, RegExpFlag flags,
                    frontend::TokenStream *ts);

    static RegExpObject *
    createNoStatics(ExclusiveContext *cx, HandleAtom atom, RegExpFlag flags, frontend::TokenStream *ts);

    /* Accessors. */

    static unsigned lastIndexSlot() { return LAST_INDEX_SLOT; }

    const Value &getLastIndex() const { return getSlot(LAST_INDEX_SLOT); }
    inline void setLastIndex(double d);
    inline void zeroLastIndex();

    JSFlatString *toString(JSContext *cx) const;

    JSAtom *getSource() const { return &getSlot(SOURCE_SLOT).toString()->asAtom(); }
    inline void setSource(JSAtom *source);

    RegExpFlag getFlags() const {
        unsigned flags = 0;
        flags |= global() ? GlobalFlag : 0;
        flags |= ignoreCase() ? IgnoreCaseFlag : 0;
        flags |= multiline() ? MultilineFlag : 0;
        flags |= sticky() ? StickyFlag : 0;
        return RegExpFlag(flags);

    /* Flags. */

    inline void setIgnoreCase(bool enabled);
    inline void setGlobal(bool enabled);
    inline void setMultiline(bool enabled);
    inline void setSticky(bool enabled);
    bool ignoreCase() const { return getSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); }
    bool global() const     { return getSlot(GLOBAL_FLAG_SLOT).toBoolean(); }
    bool multiline() const  { return getSlot(MULTILINE_FLAG_SLOT).toBoolean(); }
    bool sticky() const     { return getSlot(STICKY_FLAG_SLOT).toBoolean(); }

    void shared(RegExpGuard *g) const {
        JS_ASSERT(maybeShared() != NULL);

    bool getShared(ExclusiveContext *cx, RegExpGuard *g) {
        if (RegExpShared *shared = maybeShared()) {
            return true;
        return createShared(cx, g);
    inline void setShared(ExclusiveContext *cx, RegExpShared &shared);

    friend class RegExpObjectBuilder;

     * Compute the initial shape to associate with fresh RegExp objects,
     * encoding their initial properties. Return the shape after
     * changing this regular expression object's last property to it.
    Shape *assignInitialShape(ExclusiveContext *cx);

    bool init(ExclusiveContext *cx, HandleAtom source, RegExpFlag flags);

     * Precondition: the syntax for |source| has already been validated.
     * Side effect: sets the private field.
    bool createShared(ExclusiveContext *cx, RegExpGuard *g);
    RegExpShared *maybeShared() const {
        return static_cast<RegExpShared *>(JSObject::getPrivate());

    /* Call setShared in preference to setPrivate. */
    void setPrivate(void *priv) MOZ_DELETE;

 * Parse regexp flags. Report an error and return false if an invalid
 * sequence of flags is encountered (repeat/invalid flag).
 * N.B. flagStr must be rooted.
ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut);

 * Assuming ObjectClassIs(obj, ESClass_RegExp), return obj's RegExpShared.
 * Beware: this RegExpShared can be owned by a compartment other than
 * cx->compartment. Normal RegExpGuard (which is necessary anyways)
 * will protect the object but it is important not to assign the return value
 * to be the private of any RegExpObject.
inline bool
RegExpToShared(JSContext *cx, HandleObject obj, RegExpGuard *g)
    if (obj->is<RegExpObject>())
        return obj->as<RegExpObject>().getShared(cx, g);
    return Proxy::regexp_toShared(cx, obj, g);

template<XDRMode mode>
XDRScriptRegExpObject(XDRState<mode> *xdr, HeapPtrObject *objp);

extern JSObject *
CloneScriptRegExpObject(JSContext *cx, RegExpObject &re);

} /* namespace js */

#endif /* vm_RegExpObject_h */