no bug: implementation of concurrency for windows 8 (p=dtomack,r=jasowill)
authorDan Schaffer <Dan.Schaffer@adobe.com>
Fri, 25 Jan 2013 10:08:11 -0800
changeset 7583 8d37a994952c
parent 7582 a099bc675915
child 7584 ae7348551cff
push id4262
push userdschaffe@adobe.com
push dateWed, 30 Jan 2013 19:01:31 +0000
reviewersjasowill
bugs1140327, 1158408
no bug: implementation of concurrency for windows 8 (p=dtomack,r=jasowill) Integrate up to @CL 1140327 CL@1158408
build/avmfeatures.py
core/ByteArrayGlue.cpp
core/CodegenLIR.cpp
core/CodegenLIR.h
core/avmfeatures.as
core/avmfeatures.cpp
core/avmfeatures.h
nanojit/Assembler.cpp
nanojit/Assembler.h
nanojit/LIR.cpp
nanojit/LIR.h
nanojit/LIRopcode.tbl
nanojit/NativeARM.cpp
nanojit/NativeARM.h
nanojit/NativeMIPS.h
nanojit/NativePPC.h
nanojit/NativeSH4.h
nanojit/NativeSparc.h
nanojit/NativeThumb2.cpp
nanojit/NativeThumb2.h
nanojit/NativeX64.cpp
nanojit/NativeX64.h
nanojit/Nativei386.cpp
nanojit/Nativei386.h
nanojit/nanojit.h
test/acceptance/failconfig.txt
--- a/build/avmfeatures.py
+++ b/build/avmfeatures.py
@@ -1,19 +1,21 @@
 #                     DO NOT EDIT THIS FILE
 #
 #  This file has been generated by the script core/avmfeatures.as,
 #  from a set of configuration parameters in that file.
 #
 #  If you feel you need to make changes below, instead edit the configuration
 #  file and rerun it to get a new version of this file.
 #
-#  This Source Code Form is subject to the terms of the Mozilla Public
-#  License, v. 2.0. If a copy of the MPL was not distributed with this
-#  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#/*
+#*  This Source Code Form is subject to the terms of the Mozilla Public
+#*  License, v. 2.0. If a copy of the MPL was not distributed with this
+#*  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#*/
 
 def featureSettings(o):
     args = ""
     arg = o.getBoolArg("debugger")
     if (arg == True):
         args += "-DAVMFEATURE_DEBUGGER=1 "
     if (arg == False):
         args += "-DAVMFEATURE_DEBUGGER=0 "
--- a/core/ByteArrayGlue.cpp
+++ b/core/ByteArrayGlue.cpp
@@ -1491,22 +1491,30 @@ namespace avmplus
 
         uint8_t *b = m_buffer->array + m_position;
         m_position += nbytes;
         return b;
     }
 
     int32_t ByteArray::CAS(uint32_t index, int32_t expected, int32_t next)
     {
-		if (m_buffer->length == 0)
-			m_toplevel->throwRangeError(kInvalidRangeError);
-        if (index > (m_buffer->length - sizeof(int32_t))) // Handle the race. 
+        if (m_buffer->length == 0) {
+            m_toplevel->throwRangeError(kInvalidRangeError);
+        }
+
+        const size_t minLength = sizeof(int32_t);
+        if ((m_buffer->length < minLength) || (index > (m_buffer->length - minLength))) {
             m_toplevel->throwRangeError(kInvalidRangeError);
-        if (index % sizeof(expected) != 0) // require word alignment
+        }
+
+        // require word alignment
+        if (index % sizeof(expected) != 0) {
             m_toplevel->throwRangeError(kInvalidRangeError);
+        }
+
         uint8_t* wordptr = &m_buffer->array[index];
         return vmbase::AtomicOps::compareAndSwap32WithBarrierPrev(expected, next, (int32_t*)wordptr);
     }
 
     bool ByteArray::isShareable () const
     {
         return m_isShareable;
     }
--- a/core/CodegenLIR.cpp
+++ b/core/CodegenLIR.cpp
@@ -2559,30 +2559,22 @@ FLOAT_ONLY(           !(v.sst_mask == (1
         memset(jit_sst, 0, framesize * sizeof(uint16_t));
 #endif
 
         // If this is the target of a backwards branch, generate an interrupt check.
 
 #ifdef VMCFG_INTERRUPT_SAFEPOINT_POLL
         // Always poll for safepoints, regardless of config settings.
 		if (state->targetOfBackwardsBranch) {
-            Ins(LIR_savepc);
+			AvmAssert(AvmCore::NotInterrupted == 0);
             LIns* interrupted = loadIns(LIR_ldi, offsetof(AvmCore,interrupted), coreAddr, ACCSET_OTHER, LOAD_VOLATILE);
-            LIns* cond = binaryIns(LIR_eqi, interrupted, InsConst(AvmCore::NotInterrupted));
-            branchToLabel(LIR_jf, cond, interrupt_label);
-
-          /*
-            CodegenLabel not_interrupt_label;
-            branchToLabel(LIR_jt, cond, not_interrupt_label);
-            branchToLabel(LIR_j, NULL, interrupt_label);
-            emitLabel(not_interrupt_label);
-          */
-            Ins(LIR_discardpc);
-        }
-#else 
+
+			branchAndSavePC(interrupted, interrupt_label);
+        }
+#else
         if (interruptable && core->config.interrupts && state->targetOfBackwardsBranch) {
             LIns* interrupted = loadIns(LIR_ldi, offsetof(AvmCore,interrupted),
                     coreAddr, ACCSET_OTHER, LOAD_VOLATILE);
             LIns* cond = binaryIns(LIR_eqi, interrupted, InsConst(AvmCore::NotInterrupted));
             branchToLabel(LIR_jf, cond, interrupt_label);
         }
 #endif            
     }
@@ -6020,16 +6012,17 @@ FLOAT_ONLY(           !(v.sst_mask == (1
             case OP_lix16:
             case OP_li8:
             case OP_li16:
             case OP_li32:
             {
                 int32_t index = (int32_t) op1;
                 LIns* mopAddr = localGet(index);
                 const MopsInfo& mi = kMopsLoadInfo[opcode-OP_lix8];
+				Ins(LIR_memfence);
             #ifdef VMCFG_MOPS_USE_EXPANDED_LOADSTORE_INT
                 int32_t disp = 0;
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, mi.size, &disp);
                 LIns* i2 = loadIns(mi.op, disp, realAddr, ACCSET_OTHER);
             #else
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, mi.size, NULL);
                 LIns* i2 = callIns(mi.call, 1, realAddr);
             #endif
@@ -6038,30 +6031,32 @@ FLOAT_ONLY(           !(v.sst_mask == (1
             }
 
             case OP_lf32:
             case OP_lf64:
             {
                 int32_t index = (int32_t) op1;
                 LIns* mopAddr = localGet(index);
                 const MopsInfo& mi = kMopsLoadInfo[opcode-OP_lix8];
+				Ins(LIR_memfence);
             #ifdef VMCFG_MOPS_USE_EXPANDED_LOADSTORE_FP
                 int32_t disp = 0;
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, mi.size, &disp);
                 LIns* i2 = loadIns(mi.op, disp, realAddr, ACCSET_OTHER);
             #else
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, mi.size, NULL);
                 LIns* i2 = callIns(mi.call, 1, realAddr);
             #endif
                 localSet(index, i2, result);
                 break;
             }
 #ifdef VMCFG_FLOAT
             case OP_lf32x4:
             {
+				Ins(LIR_memfence);
                 // TODO: inlining.  The appropriate condition is probably *not* VMCFG_MOPS_USE_EXPANDED_LOADSTORE_FP.
                 int32_t index = (int32_t) op1;
                 LIns* mopAddr = binaryIns(LIR_andi, localGet(index), InsConst(~15U));
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, sizeof(float4_t), NULL);
                 LIns* retval = insAlloc(sizeof(float4_t));
                 callIns(FUNCTIONID(mop_lf32x4), 2, retval, realAddr);
                 localSet(index, ldf4(retval,0,ACCSET_OTHER), result);
                 break;
@@ -6071,17 +6066,18 @@ FLOAT_ONLY(           !(v.sst_mask == (1
             // stores
             case OP_si8:
             case OP_si16:
             case OP_si32:
             {
                 LIns* svalue = localGet(sp-1);
                 LIns* mopAddr = localGet(sp);
                 const MopsInfo& mi = kMopsStoreInfo[opcode-OP_si8];
-            #ifdef VMCFG_MOPS_USE_EXPANDED_LOADSTORE_INT
+ 				Ins(LIR_memfence);
+           #ifdef VMCFG_MOPS_USE_EXPANDED_LOADSTORE_INT
                 int32_t disp = 0;
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, mi.size, &disp);
                 lirout->insStore(mi.op, svalue, realAddr, disp, ACCSET_OTHER);
             #else
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, mi.size, NULL);
                 callIns(mi.call, 2, realAddr, svalue);
             #endif
                 break;
@@ -6089,30 +6085,32 @@ FLOAT_ONLY(           !(v.sst_mask == (1
 
             case OP_sf32:
             case OP_sf64:
             {
                 bool singlePrecision = IFFLOAT(state->value(sp-1).traits == FLOAT_TYPE, false);
                 LIns* svalue = singlePrecision ? localGetf(sp-1) : localGetd(sp-1);
                 LIns* mopAddr = localGet(sp);
                 const MopsInfo& mi = kMopsStoreInfo[opcode-OP_si8];
-            #ifdef VMCFG_MOPS_USE_EXPANDED_LOADSTORE_FP
+ 				Ins(LIR_memfence);
+           #ifdef VMCFG_MOPS_USE_EXPANDED_LOADSTORE_FP
                 int32_t disp = 0;
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, mi.size, &disp);
                 lirout->insStore(mi.op, svalue, realAddr, disp, ACCSET_OTHER);
             #else
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, mi.size, NULL);
                 callIns(mi.call, 2, realAddr, svalue);
             #endif
                 break;
             }
 #ifdef VMCFG_FLOAT
             case OP_sf32x4:
             {
-                // TODO: inlining.  The appropriate condition is probably *not* VMCFG_MOPS_USE_EXPANDED_LOADSTORE_FP.
+ 				Ins(LIR_memfence);
+               // TODO: inlining.  The appropriate condition is probably *not* VMCFG_MOPS_USE_EXPANDED_LOADSTORE_FP.
                 LIns* svalue = localGetf4Addr(sp-1);
                 LIns* mopAddr = binaryIns(LIR_andi, localGet(sp), InsConst(~15U));
                 LIns* realAddr = mopAddrToRangeCheckedRealAddrAndDisp(mopAddr, sizeof(float4_t), NULL);
                 callIns(FUNCTIONID(mop_sf32x4), 2, realAddr, svalue);
                 break;
             }
 #endif
 
@@ -8208,16 +8206,24 @@ FLOAT_ONLY(           !(v.sst_mask == (1
                 label.unpatchedEdges = new (*alloc1) Seq<InEdge>(InEdge(br), label.unpatchedEdges);
                 varTracker->trackForwardEdge(label);
             }
         } else {
             // branch was optimized away.  do nothing.
         }
     }
 
+    void CodegenLIR::branchAndSavePC(LIns *flag, CodegenLabel& label) {
+        LIns* labelIns = label.labelIns;
+		LIns* br = lirout->insBranch(LIR_brsavpc, flag, labelIns);
+		AvmAssert(br != NULL && labelIns == NULL);
+        label.unpatchedEdges = new (*alloc1) Seq<InEdge>(InEdge(br), label.unpatchedEdges);
+        varTracker->trackForwardEdge(label);
+    }
+
     LIns* CodegenLIR::branchJovToLabel(LOpcode op, LIns *a, LIns *b, CodegenLabel& label) {
         LIns* labelIns = label.labelIns;
         LIns* result = lirout->insBranchJov(op, a, b, labelIns);
         NanoAssert(result);
         if (result->isop(op)) {
             if (labelIns != NULL) {
                 varTracker->checkBackEdge(label, state);
             } else {
--- a/core/CodegenLIR.h
+++ b/core/CodegenLIR.h
@@ -353,16 +353,17 @@ namespace avmplus
         LIns* localGet(int i);
         LIns* localGetp(int i);
         LIns* localGetd(int i);
         LIns* localGetf(int i); // Aborts if float not enabled
         LIns* localGetf4(int i);  // Aborts if float not enabled
         LIns* localGetf4Addr(int i);  // Aborts if float not enabled
         LIns* localCopy(int i); // sniff's type from FrameState
         void branchToLabel(LOpcode op, LIns *cond, CodegenLabel& label);
+		void branchAndSavePC(LIns *flag, CodegenLabel& label);
         LIns* branchJovToLabel(LOpcode op, LIns *a, LIns *b, CodegenLabel& label);
         void branchToAbcPos(LOpcode op, LIns *cond, const uint8_t* target);
         LIns* mopAddrToRangeCheckedRealAddrAndDisp(LIns* mopAddr, int32_t const size, int32_t* disp);
         LIns* loadEnvScope();
         LIns* loadEnvVTable();
         LIns* loadEnvAbcEnv();
         LIns* loadEnvDomainEnv();
         LIns* loadEnvToplevel();
--- a/core/avmfeatures.as
+++ b/core/avmfeatures.as
@@ -738,20 +738,16 @@ var FEATURES =
     <defines> VMCFG_SAFEPOINTS </defines>
   </feature>
   
   <feature>
     <desc> Enables local returns and safepoint polling from interrupt checks </desc>
     <name> AVMFEATURE_INTERRUPT_SAFEPOINT_POLL </name>
     <defines> VMCFG_INTERRUPT_SAFEPOINT_POLL </defines>
     <requires>AVMFEATURE_SAFEPOINTS </requires>
-    <requires> <exactly-one> 
-       <name>AVMSYSTEM_IA32</name> 
-       <name>AVMSYSTEM_AMD64</name> 
-    </exactly-one> </requires>
   </feature>
 
   <feature>
     <desc> Enabling this will support SWF12 / ABC version 47.12 </desc>
     <name> AVMFEATURE_SWF12 </name>
     <defines> VMCFG_SWF12 </defines>
   </feature>
 
--- a/core/avmfeatures.cpp
+++ b/core/avmfeatures.cpp
@@ -1,19 +1,21 @@
 //                     DO NOT EDIT THIS FILE
 //
 //  This file has been generated by the script core/avmfeatures.as,
 //  from a set of configuration parameters in that file.
 //
 //  If you feel you need to make changes below, instead edit the configuration
 //  file and rerun it to get a new version of this file.
 //
-//  This Source Code Form is subject to the terms of the Mozilla Public
-//  License, v. 2.0. If a copy of the MPL was not distributed with this
-//  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+///*
+//*  This Source Code Form is subject to the terms of the Mozilla Public
+//*  License, v. 2.0. If a copy of the MPL was not distributed with this
+//*  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+//*/
 
 #include "avmplus.h"
 
 #ifdef AVMSHELL_BUILD
 
 // The string avmfeatures contains the names of all features that were enabled
 // when the program was compiled.  Each feature name is terminated by a semicolon.
 const char * const avmfeatures = ""
--- a/core/avmfeatures.h
+++ b/core/avmfeatures.h
@@ -1,19 +1,21 @@
 //                     DO NOT EDIT THIS FILE
 //
 //  This file has been generated by the script core/avmfeatures.as,
 //  from a set of configuration parameters in that file.
 //
 //  If you feel you need to make changes below, instead edit the configuration
 //  file and rerun it to get a new version of this file.
 //
-//  This Source Code Form is subject to the terms of the Mozilla Public
-//  License, v. 2.0. If a copy of the MPL was not distributed with this
-//  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+///*
+//*  This Source Code Form is subject to the terms of the Mozilla Public
+//*  License, v. 2.0. If a copy of the MPL was not distributed with this
+//*  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+//*/
 #undef VMCFG_32BIT
 #undef VMCFG_64BIT
 #undef MMGC_64BIT
 #undef AVMPLUS_64BIT
 #undef VMCFG_UNALIGNED_INT_ACCESS
 #undef VMCFG_UNALIGNED_FP_ACCESS
 #undef VMCFG_BIG_ENDIAN
 #undef VMCFG_LITTLE_ENDIAN
@@ -119,116 +121,181 @@
 #if !defined AVMSYSTEM_64BIT || AVMSYSTEM_64BIT != 0 && AVMSYSTEM_64BIT != 1
 #  error "AVMSYSTEM_64BIT must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMSYSTEM_UNALIGNED_INT_ACCESS
  *
  * Selects an architecture that allows load/store of unaligned 16- and 32-bit ints.
+
+ * 
  *
  * While it's OK for an unaligned int load or store to be slower than an aligned load
+
  * or store, we require that:
+
+ * 
  *
  * - an unaligned load/store MUST NOT generate a run-time fault, and
+
  * - an unaligned load/store MUST be at least as efficient as separate instructions
+
  * to load and assemble the word one byte at a time / disassemble and store one
+
  * byte at a time.
+
+ * 
  *
  * If you cannot guarantee that the requirements are met then DO NOT enable
- * this feature.  (For example, on Palm Pre unaligned loads/stores are allowed but
+
+ * this feature.  (For example, on Palm Pre unaligned loads/stores are allowed but 
+
  * they're reportedly so slow that they're pointless.)
+
+ * 
  *
  * Code that uses this feature MUST NOT use it as a license to load/store floating-point
+
  * data using integer instructions, since in general that will not work.  In
+
  * particular this classical pattern will not work:
+
+ * 
  *
  * uint8_t* p = ...;  // possibly-unaligned address we're loading from
+
  * union (
+
  * float f;
+
  * uint32_t i;
+
  * ) u;
+
  * #ifdef VMCFG_UNALIGNED_INT_ACCESS
+
  * u.i = *(uint32_t*)p;
+
  * return u.f;
+
  * #else
+
  * ...
+
+ * 
  *
  * The reason it won't work is that some compilers (notably on ARM) will generate
+
  * code that uses a floating-point load into an FP register, so the code actually
+
  * needs unaligned floating-point loads to be supported, AVMSYSTEM_UNALIGNED_FP_ACCESS.
+
  * (Whether it is correct for the compiler to generate that code is beside the point.)
+
+ * 
  *
  * The prohibition applies to 64-bit loads/stores as well (expressed as pairs of
- * uint32_t loads/stores): ARM compilers as of October 2011 will rewrite a pair of loads
+
+ * uint32_t loads/stores): ARM compilers as of October 2011 will rewrite a pair of loads 
+
  * into a uint32_t array in a union with a return of a double from the union as a single
+
  * double load into the return register.  See comments throughout the code as well
+
  * as Bugzilla 569691 and 685441.
  */
 #if !defined AVMSYSTEM_UNALIGNED_INT_ACCESS || AVMSYSTEM_UNALIGNED_INT_ACCESS != 0 && AVMSYSTEM_UNALIGNED_INT_ACCESS != 1
 #  error "AVMSYSTEM_UNALIGNED_INT_ACCESS must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMSYSTEM_UNALIGNED_FP_ACCESS
  *
  * Selects an architecture that allows load/store of unaligned 32- and 64-bit floats.
- *
+
+ * 
+
  * While it's OK for an unaligned floating-point load/store to be slower than an aligned
+
  * load/store, we require that:
+
+ * 
  *
  * - an unaligned load/store MUST NOT generate a run-time fault, and
+
  * - an unaligned load/store MUST be at least as efficient as separate instructions
+
  * to load and assemble the datum one byte at a time / disassemble and store one
+
  * byte at a time.
+
+ * 
  *
  * If you cannot guarantee that the requirements are met then DO NOT enable
+
  * this feature.
+
+ * 
  *
  * Note that if AVMSYSTEM_UNALIGNED_FP_ACCESS is not set then it is assumed that 64-bit
+
  * floats require 8-byte alignment.
- *
+
+ * 
+
  * Note that AVMSYSTEM_UNALIGNED_FP_ACCESS does not apply to float4 values.  Some SIMD
- * units have different instructions for aligned and unaligned access; on some
+
+ * units have different instructions for aligned and unaligned access; on some 
+
  * systems the alignment requirement is 16 bytes, on others it's 8 bytes.  But as of
+
  * November 2011 all C++ compilers we use will assume such alignment when manipulating
+
  * float4 values and will not use the instructions for unaligned access even if
+
  * they are available.  C++ code must never assume that unaligned access is OK is
+
  * appropriate for float4 data.
  */
 #if !defined AVMSYSTEM_UNALIGNED_FP_ACCESS || AVMSYSTEM_UNALIGNED_FP_ACCESS != 0 && AVMSYSTEM_UNALIGNED_FP_ACCESS != 1
 #  error "AVMSYSTEM_UNALIGNED_FP_ACCESS must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMSYSTEM_BIG_ENDIAN
  *
  * Selects a big-endian architecture: the most significant byte of a word
+
  * is stored at the lowest byte address of the word
  */
 #if !defined AVMSYSTEM_BIG_ENDIAN || AVMSYSTEM_BIG_ENDIAN != 0 && AVMSYSTEM_BIG_ENDIAN != 1
 #  error "AVMSYSTEM_BIG_ENDIAN must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMSYSTEM_LITTLE_ENDIAN
  *
  * Selects a little-endian architecture: the least significant byte of a word
+
  * is stored at the lowest byte address of the word
  */
 #if !defined AVMSYSTEM_LITTLE_ENDIAN || AVMSYSTEM_LITTLE_ENDIAN != 0 && AVMSYSTEM_LITTLE_ENDIAN != 1
 #  error "AVMSYSTEM_LITTLE_ENDIAN must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMSYSTEM_DOUBLE_MSW_FIRST
  *
  * Selects a reverse floating-point layout on little-endian systems:
+
  * the most significant word (containing the sign, exponent, and most
+
  * significant bits of the significand) are at the lower word address.
+
  * Each word is stored little-endian, however.
  */
 #if !defined AVMSYSTEM_DOUBLE_MSW_FIRST || AVMSYSTEM_DOUBLE_MSW_FIRST != 0 && AVMSYSTEM_DOUBLE_MSW_FIRST != 1
 #  error "AVMSYSTEM_DOUBLE_MSW_FIRST must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMSYSTEM_IA32
@@ -256,16 +323,17 @@
 #if !defined AVMSYSTEM_ARM || AVMSYSTEM_ARM != 0 && AVMSYSTEM_ARM != 1
 #  error "AVMSYSTEM_ARM must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMSYSTEM_PPC
  *
  * Selects the PowerPC / Power architecture.  Whether it's the 32-bit or the
+
  * 64-bit version of the architecture is controlled independently.
  */
 #if !defined AVMSYSTEM_PPC || AVMSYSTEM_PPC != 0 && AVMSYSTEM_PPC != 1
 #  error "AVMSYSTEM_PPC must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMSYSTEM_SPARC
@@ -329,85 +397,110 @@
 #if !defined AVMSYSTEM_SYMBIAN || AVMSYSTEM_SYMBIAN != 0 && AVMSYSTEM_SYMBIAN != 1
 #  error "AVMSYSTEM_SYMBIAN must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_DEBUGGER
  *
  * Selects the AVM debugger API, including retaining debug information at
+
  * run-time and human-readable error messages for run-time errors.
+
+ * 
  *
  * There is a performance penalty to enabling this; clients that want
+
  * maximal execution performance and don't care about debugging should
+
  * disable it.
+
+ * 
  *
  * If you enable the debugger you may want to consider enabling support for
+
  * specific language strings for error messages in order to avoid getting
+
  * them all.  See the AVMPLUS_ERROR_LANG_ macros in core/ErrorConstants.h.
+
  * It's easiest to define the ones you want in core/avmbuild.h.
  */
 #if !defined AVMFEATURE_DEBUGGER || AVMFEATURE_DEBUGGER != 0 && AVMFEATURE_DEBUGGER != 1
 #  error "AVMFEATURE_DEBUGGER must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_DEBUGGER_STUB
  *
  * This is used to compile AVM with the debugger API enabled, but
+
  * certain bits of functionality reduced to no-ops.
  */
 #if !defined AVMFEATURE_DEBUGGER_STUB || AVMFEATURE_DEBUGGER_STUB != 0 && AVMFEATURE_DEBUGGER_STUB != 1
 #  error "AVMFEATURE_DEBUGGER_STUB must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_ALLOCATION_SAMPLER
  *
  * Enable the sample-based memory profiler.  This makes allocation a
+
  * little more expensive if a sampler callback is not installed, and
+
  * more expensive still if it is installed.
+
+ * 
  *
  * FIXME: more information needed.
+
+ * 
  *
  * Note that this is enabled always by AVMFEATURE_DEBUGGER.
+
+ * 
  *
  * It is known that the Flash Player wants to enable this if SCRIPT_DEBUGGER
+
  * is enabled in the Player code.
  */
 #if !defined AVMFEATURE_ALLOCATION_SAMPLER || AVMFEATURE_ALLOCATION_SAMPLER != 0 && AVMFEATURE_ALLOCATION_SAMPLER != 1
 #  error "AVMFEATURE_ALLOCATION_SAMPLER must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_VTUNE
  *
  * Selects VTune profiling of jit'd code.  Requires Windows x86,
+
  * and could support windows x64 after more testing.
+
  * turns on AVMPLUS_VERBOSE solely to get method/class names for profiling
  */
 #if !defined AVMFEATURE_VTUNE || AVMFEATURE_VTUNE != 0 && AVMFEATURE_VTUNE != 1
 #  error "AVMFEATURE_VTUNE must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_SHARK
  *
  * Selects Shark profiling of jit'd code.  MacOS 10.6.  This technique
+
  * should work for oprofile on linux and/or android, with more tweaks.
+
  * See README in utils/sharkprof.
  */
 #if !defined AVMFEATURE_SHARK || AVMFEATURE_SHARK != 0 && AVMFEATURE_SHARK != 1
 #  error "AVMFEATURE_SHARK must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_JIT
  *
  * Enables the just-in-time compiler.  This will typically increase performance
+
  * significantly but may result in significantly higher memory consumption.
  */
 #if !defined AVMFEATURE_JIT || AVMFEATURE_JIT != 0 && AVMFEATURE_JIT != 1
 #  error "AVMFEATURE_JIT must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_HALFMOON
@@ -426,32 +519,40 @@
 #if !defined AVMFEATURE_FLOAT || AVMFEATURE_FLOAT != 0 && AVMFEATURE_FLOAT != 1
 #  error "AVMFEATURE_FLOAT must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_OSR
  *
  * Enables delayed JIT-compilation with on-stack replacement, by default,
+
  * and supports runtime-disabling of OSR to get the legacy policy (OSR=0).
+
  * Without this feature, legacy policy is the default: the VM
+
  * compiles a method eagerly or interprets it always, and the OSR
+
  * invocation threshold can be enabled at runtime (OSR=K, K>0).
  */
 #if !defined AVMFEATURE_OSR || AVMFEATURE_OSR != 0 && AVMFEATURE_OSR != 1
 #  error "AVMFEATURE_OSR must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_COMPILEPOLICY
  *
  * Allows the default JIT compilation policy to be overriden with alternate rules.
+
  * In shell builds, this enables the -policy option which allows one to specify
+
  * which methods should be compiled and which should be interpreted.  There are
+
  * currently three means of identifying a method to be controlled; unique id,
+
  * exact name match, and regular expression name match.
  */
 #if !defined AVMFEATURE_COMPILEPOLICY || AVMFEATURE_COMPILEPOLICY != 0 && AVMFEATURE_COMPILEPOLICY != 1
 #  error "AVMFEATURE_COMPILEPOLICY must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_AOT
@@ -461,72 +562,84 @@
 #if !defined AVMFEATURE_AOT || AVMFEATURE_AOT != 0 && AVMFEATURE_AOT != 1
 #  error "AVMFEATURE_AOT must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_BUFFER_GUARD
  *
  * Enables the exception based caching code, right now this is used
+
  * exclusively by AOT.
  */
 #if !defined AVMFEATURE_BUFFER_GUARD || AVMFEATURE_BUFFER_GUARD != 0 && AVMFEATURE_BUFFER_GUARD != 1
 #  error "AVMFEATURE_BUFFER_GUARD must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_ABC_INTERP
  *
  * Selects the ABC interpreter.  Appropriate for platforms that run
+
  * the interpreter only for initialization code and for
+
  * platforms that are exceptionally memory-constrained.
  */
 #if !defined AVMFEATURE_ABC_INTERP || AVMFEATURE_ABC_INTERP != 0 && AVMFEATURE_ABC_INTERP != 1
 #  error "AVMFEATURE_ABC_INTERP must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_WORDCODE_INTERP
  *
  * Selects the wordcode interpreter.  Appropriate for platforms that run the
+
  * interpreter for some or all methods and are not exceptionally memory-constrained.
  */
 #if !defined AVMFEATURE_WORDCODE_INTERP || AVMFEATURE_WORDCODE_INTERP != 0 && AVMFEATURE_WORDCODE_INTERP != 1
 #  error "AVMFEATURE_WORDCODE_INTERP must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_THREADED_INTERP
  *
  * Selects the faster, direct threaded wordcode interpreter.
+
  * This is appropriate only for C++ compilers that support GCC-style computed
+
  * "goto".  It is believed that RCVT, Intel's C++ compiler, and the Sunpro
+
  * compiler all do.
  */
 #if !defined AVMFEATURE_THREADED_INTERP || AVMFEATURE_THREADED_INTERP != 0 && AVMFEATURE_THREADED_INTERP != 1
 #  error "AVMFEATURE_THREADED_INTERP must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_SELFTEST
  *
  * AVMFEATURE_SELFTEST enables the built-in selftests.  These can be run by -Dselftest
+
  * at the shell or by calling the global function avmplus::selftests(), see extensions/Selftest.h.
+
  * Mostly they are useful for AVM development, not for embedders.
+
+ * 
  *
  * Apart from code size considerations this can be enabled for release builds.
  */
 #if !defined AVMFEATURE_SELFTEST || AVMFEATURE_SELFTEST != 0 && AVMFEATURE_SELFTEST != 1
 #  error "AVMFEATURE_SELFTEST must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_EVAL
  *
  * Select support for the AS3 run-time compiler.  NOT RECOMMENDED.  The run-time compiler
+
  * is still undergoing development.
  */
 #if !defined AVMFEATURE_EVAL || AVMFEATURE_EVAL != 0 && AVMFEATURE_EVAL != 1
 #  error "AVMFEATURE_EVAL must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_TELEMETRY
@@ -536,130 +649,164 @@
 #if !defined AVMFEATURE_TELEMETRY || AVMFEATURE_TELEMETRY != 0 && AVMFEATURE_TELEMETRY != 1
 #  error "AVMFEATURE_TELEMETRY must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_TELEMETRY_SAMPLER
  *
  * Select support for Telemetry based sampler, requires a Telemetry implementation
+
  * (to be used in host)
  */
 #if !defined AVMFEATURE_TELEMETRY_SAMPLER || AVMFEATURE_TELEMETRY_SAMPLER != 0 && AVMFEATURE_TELEMETRY_SAMPLER != 1
 #  error "AVMFEATURE_TELEMETRY_SAMPLER must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_PROTECT_JITMEM
  *
  * Makes all JIT code buffers read-only whenever JIT code is executing,
+
  * to reduce the probability of heap overflow attacks.
  */
 #if !defined AVMFEATURE_PROTECT_JITMEM || AVMFEATURE_PROTECT_JITMEM != 0 && AVMFEATURE_PROTECT_JITMEM != 1
 #  error "AVMFEATURE_PROTECT_JITMEM must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_SHARED_GCHEAP
  *
  * Selects locking around calls to the memory block manager (GCHeap), allowing multiple
+
  * threads to share the block manager.  Any client with more than one thread that uses
+
  * MMgc either for garbage collected or manually managed memory wants this; the Flash
+
  * Player requires it.
  */
 #if !defined AVMFEATURE_SHARED_GCHEAP || AVMFEATURE_SHARED_GCHEAP != 0 && AVMFEATURE_SHARED_GCHEAP != 1
 #  error "AVMFEATURE_SHARED_GCHEAP must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_USE_SYSTEM_MALLOC
  *
  * Make MMgc's overridden global new and delete operators delegate allocation and
+
  * deallocation to VMPI_alloc and VMPI_free instead of going to FixedMalloc.
+
+ * 
  *
  * Whether you want this or not probably depends on the performance of the
+
  * underlying malloc and might depend on memory consumption patterns.  On desktop
+
  * systems you probably want this to be disabled.
  */
 #if !defined AVMFEATURE_USE_SYSTEM_MALLOC || AVMFEATURE_USE_SYSTEM_MALLOC != 0 && AVMFEATURE_USE_SYSTEM_MALLOC != 1
 #  error "AVMFEATURE_USE_SYSTEM_MALLOC must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_CPP_EXCEPTIONS
  *
  * Support C++ exceptions in the MMgc API.  At the time of writing (Apr 2009)
+
  * this means decorating the global new and delete operator with appropriate 'throw'
+
  * clauses.  It is unlikely to mean anything more, as AVM+ and MMgc do not use and
+
  * do not generally support C++ exceptions.
+
+ * 
  *
  * Note that even if this is enabled, the global new and delete operators may
+
  * not throw exceptions when memory can't be allocated, because the out-of-memory
+
  * handling in MMgc may take precedence.
+
+ * 
  *
  * FixedMalloc never throws an exception for a failed allocation.
  */
 #if !defined AVMFEATURE_CPP_EXCEPTIONS || AVMFEATURE_CPP_EXCEPTIONS != 0 && AVMFEATURE_CPP_EXCEPTIONS != 1
 #  error "AVMFEATURE_CPP_EXCEPTIONS must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_INTERIOR_POINTERS
  *
  * Recognize a pointer or pointer-like value into anywhere in an object as referencing
+
  * that object during marking in the garbage collector.
+
+ * 
  *
  * Enabling this tends to be increase GC cost but it can be a useful debugging aid.
  */
 #if !defined AVMFEATURE_INTERIOR_POINTERS || AVMFEATURE_INTERIOR_POINTERS != 0 && AVMFEATURE_INTERIOR_POINTERS != 1
 #  error "AVMFEATURE_INTERIOR_POINTERS must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_HEAP_ALLOCA
  *
  * If enabled then always divert avmStackAlloc() to a separately managed stack,
+
  * to avoid blowing the stack on small systems or to support systems that
+
  * don't provide alloca().  If disabled then smallish allocations are handled
+
  * by the built-in alloca() (which must be provided) and larger allocations
+
  * are handled by diverting to a separately managed stack; the latter case is
+
  * mainly a security issue, as alloca() will do strange things if given sufficiently
+
  * large requests.
  */
 #if !defined AVMFEATURE_HEAP_ALLOCA || AVMFEATURE_HEAP_ALLOCA != 0 && AVMFEATURE_HEAP_ALLOCA != 1
 #  error "AVMFEATURE_HEAP_ALLOCA must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_OVERRIDE_GLOBAL_NEW
  *
  * Enabling this will cause the mmfx_* memory macros to use global new/delete.
+
  * By default we use specialized new/delete operators and avoid global new/delete.  However
+
  * this requires some tricks to get multiple inheritance and private destructors to work
+
  * so some codebases may want to use the simpler path of overriding global new/delete.
+
  * Note that this feature works independently of AVMFEATURE_USE_SYSTEM_MALLOC.
  */
 #if !defined AVMFEATURE_OVERRIDE_GLOBAL_NEW || AVMFEATURE_OVERRIDE_GLOBAL_NEW != 0 && AVMFEATURE_OVERRIDE_GLOBAL_NEW != 1
 #  error "AVMFEATURE_OVERRIDE_GLOBAL_NEW must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_MEMORY_PROFILER
  *
  * Enabling this will compile in code to enable memory profiling. (Must still be
+
  * enabled at runtime.)
  */
 #if !defined AVMFEATURE_MEMORY_PROFILER || AVMFEATURE_MEMORY_PROFILER != 0 && AVMFEATURE_MEMORY_PROFILER != 1
 #  error "AVMFEATURE_MEMORY_PROFILER must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_CACHE_GQCN
  *
  * Enabling this will cache the result of getQualifiedClassName, making it run
+
  * much more quickly, at the expense of more memory usage.
  */
 #if !defined AVMFEATURE_CACHE_GQCN || AVMFEATURE_CACHE_GQCN != 0 && AVMFEATURE_CACHE_GQCN != 1
 #  error "AVMFEATURE_CACHE_GQCN must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMFEATURE_VALGRIND
@@ -769,29 +916,31 @@
 #  error "AVMFEATURE_SWF20 must be defined and 0 or 1 (only)."
 #endif
 
 
 
 /* AVMTWEAK_SIN_COS_NONFINITE
  *
  * Various iphone SDK versions - at least - botch sin() and cos() around NaN
+
  * and infinity.  See https://bugzilla.mozilla.org/show_bug.cgi?id=556149.
  */
 #if !defined AVMTWEAK_SIN_COS_NONFINITE
 #  define AVMTWEAK_SIN_COS_NONFINITE 0
 #endif
 #if AVMTWEAK_SIN_COS_NONFINITE != 0 && AVMTWEAK_SIN_COS_NONFINITE != 1
 #  error "AVMTWEAK_SIN_COS_NONFINITE must be defined and 0 or 1 (only)."
 #endif
 
 
 /* AVMTWEAK_EPOC_EMULATOR
  *
  * The current (June 2010) EPOC/Symbian emulator has certain limitations,
+
  * described throughout the code where this tweak is used.
  */
 #if !defined AVMTWEAK_EPOC_EMULATOR
 #  define AVMTWEAK_EPOC_EMULATOR 0
 #endif
 #if AVMTWEAK_EPOC_EMULATOR != 0 && AVMTWEAK_EPOC_EMULATOR != 1
 #  error "AVMTWEAK_EPOC_EMULATOR must be defined and 0 or 1 (only)."
 #endif
@@ -987,20 +1136,16 @@
 
 
 
 
 #if AVMFEATURE_INTERRUPT_SAFEPOINT_POLL
 #  if !AVMFEATURE_SAFEPOINTS
 #    error "AVMFEATURE_SAFEPOINTS is required for AVMFEATURE_INTERRUPT_SAFEPOINT_POLL"
 #  endif
-#if AVMSYSTEM_IA32+AVMSYSTEM_AMD64 != 1
-#  error "Exactly one of AVMSYSTEM_IA32,AVMSYSTEM_AMD64 must be defined."
-#endif
-
 #endif
 
 #if AVMFEATURE_SWF13
 #  if !AVMFEATURE_SWF12
 #    error "AVMFEATURE_SWF12 is required for AVMFEATURE_SWF13"
 #  endif
 #endif
 #if AVMFEATURE_SWF14
--- a/nanojit/Assembler.cpp
+++ b/nanojit/Assembler.cpp
@@ -1401,16 +1401,34 @@ typedef void* (*decode_instructions_ftyp
                 _patches.put(branches.branch1,to);
             }
             if (branches.branch2) {
                 _patches.put(branches.branch2,to);
             }
         }
     }
 
+#if NJ_SAFEPOINT_POLLING_SUPPORTED
+    void Assembler::asm_brsavpc(LIns* ins)
+    {
+        NanoAssert(ins->opcode() == LIR_brsavpc);
+        LIns* cond = ins->oprnd1();
+
+        countlir_jcc();
+        LIns* to = ins->getTarget();
+        LabelState *label = _labels.get(to);
+		NanoAssert(label && label->addr);
+		{
+            // Forward jump to known label.  Need to merge with label's register state.
+            unionRegisterState(label->regs);
+            asm_brsavpc_impl(cond, label->addr);
+        }
+    }
+#endif
+
     void Assembler::asm_jov(LIns* ins, InsList& pending_lives)
     {
         // The caller is responsible for countlir_* profiling, unlike
         // asm_jcc above.  The reason for this is that asm_jov may not be
         // be called if the instruction is dead, and it is our convention
         // to count such instructions anyway.
         LOpcode op = ins->opcode();
         LIns* to = ins->getTarget();
@@ -1586,33 +1604,34 @@ typedef void* (*decode_instructions_ftyp
                     } else {
                         *((NIns**)(ins->safePayload())) = _nIns;
                     }
                     break;
 
                 case LIR_regfence:
                     evictAllActiveRegs();
                     break;
-
-               case LIR_pushstate:
+#if NJ_SAFEPOINT_POLLING_SUPPORTED
+                case LIR_pushstate:
                    asm_pushstate();
                    break;
-               case LIR_popstate:
+                case LIR_popstate:
                    asm_popstate();
                    break;
-               case LIR_savepc:
-                   asm_savepc();
-                   break;
-               case LIR_restorepc:
+                case LIR_brsavpc:
+                    ins->oprnd1()->setResultLive();
+                    asm_brsavpc(ins);
+                    break;
+                case LIR_restorepc:
                    asm_restorepc();
                    break;
-               case LIR_discardpc:
-                   asm_discardpc();
-                   break;
-
+                case LIR_memfence:
+				   asm_memfence();
+				   break;
+#endif
                 case LIR_livei:
                 CASE64(LIR_liveq:)
                 case LIR_lived:
                 case LIR_livef:
                 case LIR_livef4:
                 {
                     countlir_live();
                     LIns* op1 = ins->oprnd1();
--- a/nanojit/Assembler.h
+++ b/nanojit/Assembler.h
@@ -452,16 +452,19 @@ namespace nanojit
             RegAlloc    _allocator;
 
             MetaDataWriter* _mdWriter;
 
             verbose_only( void asm_inc_m32(uint32_t*); )
             void        asm_mmq(Register rd, int dd, Register rs, int ds);
             void        asm_jmp(LIns* ins, InsList& pending_lives);
             void        asm_jcc(LIns* ins, InsList& pending_lives);
+#if NJ_SAFEPOINT_POLLING_SUPPORTED
+			void        asm_brsavpc(LIns* ins);
+#endif
             void        asm_jov(LIns* ins, InsList& pending_lives);
             void        asm_x(LIns* ins);
             void        asm_xcc(LIns* ins);
             NIns*       asm_exit(LIns* guard);
             NIns*       asm_leave_trace(LIns* guard);
             void        asm_store32(LOpcode op, LIns *val, int d, LIns *base);
             void        asm_store64(LOpcode op, LIns *val, int d, LIns *base);
 
--- a/nanojit/LIR.cpp
+++ b/nanojit/LIR.cpp
@@ -651,17 +651,17 @@ namespace nanojit
     LIns* LirBufWriter::insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr)
     {
         return ins3(op, a, b, (LIns*)gr);
     }
 
     LIns* LirBufWriter::insBranch(LOpcode op, LIns* condition, LIns* toLabel)
     {
         NanoAssert((op == LIR_j && !condition) ||
-                   ((op == LIR_jf || op == LIR_jt) && condition));
+                   ((op == LIR_jf || op == LIR_jt || op == LIR_brsavpc) && condition));
         return ins2(op, condition, toLabel);
     }
 
     LIns* LirBufWriter::insBranchJov(LOpcode op, LIns* a, LIns* b, LIns* toLabel)
     {
         return ins3(op, a, b, toLabel);
     }
 
@@ -1902,18 +1902,17 @@ namespace nanojit
                 case LIR_skip:
                     NanoAssertMsg(0, "Shouldn't see LIR_skip");
                     break;
 
                 case LIR_start:
                 case LIR_regfence:
                 case LIR_pushstate:
                 case LIR_popstate:
-                case LIR_savepc:
-                case LIR_discardpc:
+                case LIR_memfence:
                 case LIR_restorepc:
                 case LIR_paramp:
                 case LIR_x:
                 case LIR_xbarrier:
                 case LIR_j:
                 case LIR_label:
                 case LIR_immi:
                 CASE64(LIR_immq:)
@@ -1944,16 +1943,17 @@ namespace nanojit
                 CASE64(LIR_liveq:)
                 case LIR_lived:
                 case LIR_livef:
                 case LIR_livef4:
                 case LIR_xt:
                 case LIR_xf:
                 case LIR_jt:
                 case LIR_jf:
+				case LIR_brsavpc:
                 case LIR_jtbl:
                 case LIR_negi:
                 case LIR_noti:
                 case LIR_negd:
                 case LIR_negf:
                 case LIR_negf4:
                 case LIR_absd:
                 case LIR_absf:
@@ -2378,20 +2378,19 @@ namespace nanojit
                 break;
                 
             case LIR_allocp:
                 VMPI_snprintf(s, n, "%s = %s %d", formatRef(&b1, i), lirNames[op], i->size());
                 break;
 
             case LIR_start:
             case LIR_regfence:
-            case LIR_savepc:
 	        case LIR_pushstate:
 	        case LIR_popstate:
-            case LIR_discardpc:
+            case LIR_memfence:
             case LIR_restorepc:
                 VMPI_snprintf(s, n, "%s", lirNames[op]);
                 break;
 
             case LIR_callv:
             case LIR_calli:
             CASE64(LIR_callq:)
             case LIR_calld: 
@@ -2451,16 +2450,17 @@ namespace nanojit
             }
 
             case LIR_label:
                 VMPI_snprintf(s, n, "%s:", formatRef(&b1, i));
                 break;
 
             case LIR_jt:
             case LIR_jf:
+			case LIR_brsavpc:
                 VMPI_snprintf(s, n, "%s %s -> %s", lirNames[op], formatRef(&b1, i->oprnd1()),
                     i->oprnd2() ? formatRef(&b2, i->oprnd2()) : "unpatched");
                 break;
 
             case LIR_j:
                 VMPI_snprintf(s, n, "%s -> %s", lirNames[op],
                     i->oprnd2() ? formatRef(&b1, i->oprnd2()) : "unpatched");
                 break;
@@ -4210,19 +4210,18 @@ namespace nanojit
     LIns* ValidateWriter::ins0(LOpcode op)
     {
         switch (op) {
         case LIR_start:
         case LIR_regfence:
         case LIR_label:
         case LIR_pushstate:
         case LIR_popstate:
-        case LIR_savepc:
+        case LIR_memfence:
         case LIR_restorepc:
-        case LIR_discardpc:
             break;
         default:
             NanoAssert(0);
         }
 
         // No args to type-check.
 
         return out->ins0(op);
@@ -4685,16 +4684,22 @@ namespace nanojit
         case LIR_jt:
         case LIR_jf:
             checkLInsIsACondOrConst(op, 1, cond);
             nArgs = 1;
             formals[0] = LTy_I;
             args[0] = cond;
             break;
 
+	case LIR_brsavpc:
+            nArgs = 1;
+            formals[0] = LTy_I;
+            args[0] = cond;
+            break;
+
         default:
             NanoAssert(0);
         }
 
         // We check that target is a label in ValidateReader because it may
         // not have been set here.
 
         typeCheckArgs(op, nArgs, formals, args);
--- a/nanojit/LIR.h
+++ b/nanojit/LIR.h
@@ -1064,21 +1064,21 @@ NanoStaticAssert(LIR_start == 0 && LIR_s
         }
 
         // True if the instruction an any type of immediate.
         bool isImmAny() const {
             return isImmIorF() || isImmQorD() || isImmF4();
         }
 
         bool isConditionalBranch() const {
-            return isop(LIR_jt) || isop(LIR_jf) || isJov();
+            return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_brsavpc) || isJov();
         }
 
         bool isUnConditionalBranch() const {
-            return isop(LIR_j) || isop(LIR_jtbl);
+            return isop(LIR_j) || isop(LIR_jtbl) || isop(LIR_brsavpc);
         }
 
         bool isBranch() const {
             return isConditionalBranch() || isUnConditionalBranch();
         }
 
         LTy retType() const {
             return retTypes[opcode()];
--- a/nanojit/LIRopcode.tbl
+++ b/nanojit/LIRopcode.tbl
@@ -411,20 +411,22 @@ OP_SF(ii2d,     Op2,  D,    1)  // join 
 
 // LIR_hcalli is a hack that's only used on 32-bit platforms that use
 // SoftFloat.  Its operand is always a LIR_calli, but one that specifies a
 // function that returns a double.  It indicates that the double result is
 // returned via two 32-bit integer registers.  The result is always used as the
 // second operand of a LIR_ii2d.
 OP_SF(hcalli,   Op1,  I,    1)
 
-OP___(savepc, Op0, V, 0)
+//---------------------------------------------------------------------------
+// Safepoint Polling
+//---------------------------------------------------------------------------
+OP___(memfence, Op0, V, 0)
+OP___(brsavpc, Op2, V, 0)        // branch and save pc
 OP___(restorepc, Op0, V, 0)
-OP___(discardpc, Op0, V, 0)
-
 OP___(pushstate, Op0, V, 0)
 OP___(popstate, Op0, V, 0)
 
 #undef OP_UN
 #undef OP_32
 #undef OP_64
 #undef OP_SF
 #undef OP_86
--- a/nanojit/NativeARM.cpp
+++ b/nanojit/NativeARM.cpp
@@ -878,46 +878,16 @@ Assembler::asm_stkarg(LIns* arg, int stk
         } else {
             // EABI requires that 64-bit arguments are 64-bit aligned.
             NanoAssert((stkd % 8) == 0);
             FSTD(dt, SP, stkd);
         }
     }
 }
 
-void 
-Assembler::asm_pushstate()
-{
-    NanoAssert(false);
-}
-
-void 
-Assembler::asm_popstate()
-{
-    NanoAssert(false);
-}
-
-void 
-Assembler::asm_savepc()
-{
-    NanoAssert(false);
-}
-
-void 
-Assembler::asm_restorepc()
-{
-    NanoAssert(false);
-}
-
-void 
-Assembler::asm_discardpc()
-{
-    NanoAssert(false);
-}
-
 void
 Assembler::asm_call(LIns* ins)
 {
     bool handled = false;
     NanoAssert(ARM_VFP);
     RegisterMask nonVolatile = 0;
     if (ARM_VFP) {
         /* Because ARM actually returns the result in (R0,R1), and not in a
--- a/nanojit/NativeARM.h
+++ b/nanojit/NativeARM.h
@@ -54,16 +54,17 @@ namespace nanojit
 
 #define NJ_JTBL_SUPPORTED               1
 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
 #define NJ_F2I_SUPPORTED                1
 #define NJ_SOFTFLOAT_SUPPORTED          1
 #define NJ_DIVI_SUPPORTED               0
 
 #define NJ_JTBL_ALLOWED_IDX_REGS        GpRegs
+#define NJ_SAFEPOINT_POLLING_SUPPORTED  0
 
 #define RA_REGISTERS_OVERLAP            1
 #define firstAvailableReg               getAvailableReg
 #define getFatherReg                    _allocator.getSuitableRegFor
 
 #define NJ_CONSTANT_POOLS
 const int NJ_MAX_CPOOL_OFFSET = 4096;
 const int NJ_CPOOL_SIZE = 16;
@@ -325,21 +326,16 @@ verbose_only( extern const char* shiftNa
     void        B_cond_chk(ConditionCode, NIns*, bool);                         \
     void        underrunProtect(int bytes);                                     \
     void        nativePageReset();                                              \
     void        nativePageSetup();                                              \
     bool        hardenNopInsertion(const Config& /*c*/) { return false; }       \
     void        asm_immd_nochk(Register, int32_t, int32_t);                     \
     void        asm_regarg(ArgType, LIns*, Register);                           \
     void        asm_stkarg(LIns* p, int stkd);                                  \
-    void        asm_pushstate();                                                \
-    void        asm_popstate();                                                 \
-    void        asm_savepc();                                                   \
-    void        asm_restorepc();                                                \
-    void        asm_discardpc();                                                \
     void        asm_cmpi(Register, int32_t imm);                                \
     void        asm_ldr_chk(Register d, Register b, int32_t off, bool chk);     \
     int32_t     asm_str(Register rt, Register rr, int32_t off);                 \
     void        asm_cmp(LIns *cond);                                            \
     void        asm_cmpd(LIns *cond);                                           \
     void        asm_ld_imm(Register d, int32_t imm, bool chk = true);           \
     void        asm_arg(ArgType ty, LIns* arg, ParameterRegisters& params);     \
     void        asm_arg_float(LIns* arg, ParameterRegisters& params);           \
--- a/nanojit/NativeMIPS.h
+++ b/nanojit/NativeMIPS.h
@@ -22,16 +22,17 @@
 #define count_fpu()     do { _nvprof("mips-fpu", 1); count_instr(); } while (0)
 #define count_br()      do { _nvprof("mips-br", 1); count_instr(); } while (0)
 
 namespace nanojit
 {
 #define NJ_JTBL_SUPPORTED               0
 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
 #define NJ_F2I_SUPPORTED                1
+#define NJ_SAFEPOINT_POLLING_SUPPORTED  0
 
     // Req: NJ_MAX_STACK_ENTRY is number of instructions to hold in LIR stack
 #if 0
     // FIXME: Inconsistent use in signed/unsigned expressions makes this generate errors
     static const uint32_t NJ_MAX_STACK_ENTRY = 4096;
 #else
 #define NJ_MAX_STACK_ENTRY 4096
 #endif
--- a/nanojit/NativePPC.h
+++ b/nanojit/NativePPC.h
@@ -31,16 +31,17 @@ namespace nanojit
 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
 #define NJ_F2I_SUPPORTED                0
 #else
 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
 #define NJ_F2I_SUPPORTED                1
 #endif
 #define NJ_SOFTFLOAT_SUPPORTED          0
 #define NJ_DIVI_SUPPORTED               0
+#define NJ_SAFEPOINT_POLLING_SUPPORTED  0
 #define firstAvailableReg(i,c,m)   nRegisterAllocFromSet(m)
 
 #define NJ_JTBL_ALLOWED_IDX_REGS        GpRegs
 
 
     enum ConditionRegister {
         CR0 = 0,
         CR1 = 1,
--- a/nanojit/NativeSH4.h
+++ b/nanojit/NativeSH4.h
@@ -109,16 +109,17 @@ namespace nanojit
     // Maximum size in bytes of a stack entry.
 #define NJ_MAX_STACK_ENTRY 4096
 
     // Minimum alignement for the stack pointer.
 #define NJ_ALIGN_STACK 8
 
     // Support the extended load/store opcodes.
 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
+#define NJ_SAFEPOINT_POLLING_SUPPORTED  0
 
     // Maximum size in bytes of a FP64 load, keep in sync' with asm_immd_nochk().
 #define SH4_IMMD_NOCHK_SIZE (9 * sizeof(NIns) + 2 * sizeof(uint32_t))
 
     // Maximum size in bytes of a INT32 load, keep in sync' with asm_immi_nochk().
 #define SH4_IMMI_NOCHK_SIZE (6 * sizeof(NIns))
 
     // "static" point of the frame.
--- a/nanojit/NativeSparc.h
+++ b/nanojit/NativeSparc.h
@@ -42,16 +42,17 @@ namespace nanojit
 #define NJ_MAX_PARAMETERS               1
 
 #define NJ_JTBL_SUPPORTED               0
 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
 #define NJ_F2I_SUPPORTED                1
 #define NJ_SOFTFLOAT_SUPPORTED          0
 #define NJ_DIVI_SUPPORTED               0
 #define RA_PREFERS_LSREG                1
+#define NJ_SAFEPOINT_POLLING_SUPPORTED  0
 
     const int NJ_ALIGN_STACK = 16;
 
     typedef uint32_t NIns;
 
     // Bytes of icache to flush after Assembler::patch
     const size_t LARGEST_BRANCH_PATCH = 2 * sizeof(NIns);
 
--- a/nanojit/NativeThumb2.cpp
+++ b/nanojit/NativeThumb2.cpp
@@ -1084,17 +1084,17 @@ Assembler::PUSH_mask(RegisterMask mask)
 // POP {reg}
 inline void
 Assembler::POPr(Register r)
 {
     NanoAssert(IsGpReg(r));
     NanoAssert(r != SP);
     underrunProtect(4);
     // A8.8.131 T3
-    emitT32(0xE85D0B04 | r << 12);
+    emitT32(0xF85D0B04 | r << 12);
     asm_output("pop %s", gpn(r));
 }
 
 // POP {reglist}
 inline void
 Assembler::POP_mask(RegisterMask mask)
 {
     NanoAssert(isU16(mask));
@@ -1109,17 +1109,17 @@ Assembler::POP_mask(RegisterMask mask)
     // TODO: List the set of registers by name.
     asm_output("pop %x", (uint32_t)mask);
 }
 
 // Encode a breakpoint. The ID is not important and is ignored by the
 // processor, but it can be useful as a marker when debugging emitted code.
 // There is no 32-bit Thumb2 encoding of BKPT, so we generate a 16-bit
 // encoding followed by a 16-bit NOP to keep everything 32-bit aligned.
-#define BKPT_insn(id)  ((NIns32)(0xBE00BF04 | (id & 0xff) << 16))
+#define BKPT_insn(id)  ((NIns32)(0xBE00BF00 | (id & 0xff) << 16))
 
 inline void
 Assembler::BKPT(uint32_t id)
 {
     NanoAssert((id & 0xff) == id);
     underrunProtect(4);
     emitT32(BKPT_insn(id));
     asm_output("bkpt #%d", id);
@@ -2364,44 +2364,84 @@ Assembler::asm_stkarg(LIns* arg, int stk
         } else {
             // EABI requires that 64-bit arguments are 64-bit aligned.
             NanoAssert((stkd % 8) == 0);
             FSTD(dt, SP, stkd);
         }
     }
 }
 
+const RegisterMask cPushPopStateMask = rmask(FP) | rmask(R0) | 
+		rmask(R1) | rmask(R2) |
+		rmask(R3) | rmask(R4) | 
+		rmask(R5) | rmask(R6) | 
+		rmask(R7) | rmask(R8) |
+		rmask(R9) | rmask(R10)|
+		rmask(LR) | rmask(IP);
+
 void 
 Assembler::asm_pushstate()
 {
-    NanoAssert(false);
+	PUSH_mask(cPushPopStateMask);
 }
 
 void 
 Assembler::asm_popstate()
 {
-    NanoAssert(false);
-}
-
-void 
-Assembler::asm_savepc()
+	POP_mask(cPushPopStateMask);
+}
+
+void
+Assembler::asm_brsavpc_impl(LIns* flag, NIns* target)
 {
-    NanoAssert(false);
+	Register r = findRegFor(flag, GpRegs);
+    intptr_t offs = PC_OFFSET_FROM(target, _nIns-2);
+
+	NIns* skipTarget = _nIns;
+
+    if (isS21(offs)) {
+        underrunProtect(4*4);	// speculative size of short branch code
+        // Recalculate the offset, because underrunProtect may have
+        // moved _nIns to a new page.  Unfortunately, Bcc may no longer
+        // be applicable, so we have to check again.
+        offs = PC_OFFSET_FROM(target, _nIns - 2);
+    }
+
+    if (isS21(offs)) {
+        ReserveContiguousSpace(this, 4*4);
+		BNE(target);
+		CMPi(r, 0);
+        ADDi(IP, IP, 4*3);		// offset PC for the next 3 instructions
+ 		emitT16((NIns)0xbf00);	// NOP	-- maintain 32 bit alignment for instructions.
+		emitT16((NIns)0x46FC);  // MOV IP, PC  ; encoding A8.8.103 T1
+    } else {
+        ReserveContiguousSpace(this, 4*4+LDR_PC_size);
+        LDR_PC_longbranch(target);
+        ADDi(IP, IP, LDR_PC_size+4);
+ 		emitT16((NIns)0xbf00);	// NOP	-- maintain 32 bit alignment for instructions.
+		emitT16((NIns)0x46FC);  // MOV IP, PC  ; encoding A8.8.103 T1
+        BEQ(skipTarget);
+		CMPi(r, 0);
+    }
 }
 
 void 
 Assembler::asm_restorepc()
 {
-    NanoAssert(false);
-}
-
-void 
-Assembler::asm_discardpc()
+	ReserveContiguousSpace(this, 4);
+	emitT16((NIns)0xbf00); // NOP
+	emitT16((NIns)0x46E7);  // MOV PC, IP  ; encoding A8.8.103 T1
+}
+
+void
+Assembler::asm_memfence()
 {
-    NanoAssert(false);
+	ReserveContiguousSpace(this, 4);
+	// DMB (SY)  ; encoding A8.8.43 T1
+	emitT32((NIns32)0xf3bf8f5f);
 }
 
 void
 Assembler::asm_call(LIns* ins)
 {
     bool handled = false;
     NanoAssert(ARM_VFP);
     RegisterMask nonVolatile = 0;
@@ -4268,17 +4308,17 @@ void
 Assembler::asm_load32(LIns* ins)
 {
     LOpcode op = ins->opcode();
     LIns*   base = ins->oprnd1();
     int     d = ins->disp();
 
     Register rt = prepareResultReg(ins, GpRegs);
     // Try to re-use the result register for the base pointer.
-    Register rn = base->isInReg() ? base->getReg() : rt;
+	Register rn =  base->isInReg() ? base->getReg() : rt;
 
     // TODO: The x86 back-end has a special case where the base address is
     // given by LIR_addp. The same technique may be useful here to take
     // advantage of ARM's register+register addressing mode.
 
     switch (op) {
         case LIR_lduc2ui:
             if (isU8(-d) || isU12(d)) {
--- a/nanojit/NativeThumb2.h
+++ b/nanojit/NativeThumb2.h
@@ -50,16 +50,17 @@ namespace nanojit
 #define NJ_MAX_PARAMETERS               16
 #define NJ_ALIGN_STACK                  8
 
 #define NJ_JTBL_SUPPORTED               1
 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
 #define NJ_F2I_SUPPORTED                1
 #define NJ_SOFTFLOAT_SUPPORTED          1
 #define NJ_DIVI_SUPPORTED               0
+#define NJ_SAFEPOINT_POLLING_SUPPORTED  1
 
 #define NJ_JTBL_ALLOWED_IDX_REGS        GpRegs
 
 #define RA_REGISTERS_OVERLAP            1
 #define firstAvailableReg               getAvailableReg
 #define getFatherReg                    _allocator.getSuitableRegFor
 
 #define NJ_CONSTANT_POOLS
@@ -400,19 +401,19 @@ verbose_only( extern const char* shiftNa
     void        nativePageReset();                                                                                \
     void        nativePageSetup();                                                                                \
     bool        hardenNopInsertion(const Config& /*c*/) { return false; }                                         \
     void        asm_immd(Register, int32_t, int32_t);                                                             \
     void        asm_regarg(ArgType, LIns*, Register);                                                             \
     void        asm_stkarg(LIns* p, int stkd);                                                                    \
     void        asm_pushstate();                                                                                  \
     void        asm_popstate();                                                                                   \
-    void        asm_savepc();                                                                                     \
+    void        asm_memfence();                                                                                   \
+    void        asm_brsavpc_impl(LIns* flag, NIns* targ);                                                         \
     void        asm_restorepc();                                                                                  \
-    void        asm_discardpc();                                                                                  \
     void        asm_cmpi(Register, int32_t imm);                                                                  \
     void        asm_ldr(Register d, Register b, int32_t off);                                                     \
     int32_t     asm_str(Register rt, Register rr, int32_t off);                                                   \
     void        asm_cmp(LIns *cond);                                                                              \
     void        asm_cmpd(LIns *cond);                                                                             \
     void        asm_ld_imm(Register d, int32_t imm);                                                              \
     void        asm_arg(ArgType ty, LIns* arg, ParameterRegisters& params);                                       \
     void        asm_arg_float(LIns* arg, ParameterRegisters& params);                                             \
--- a/nanojit/NativeX64.cpp
+++ b/nanojit/NativeX64.cpp
@@ -1611,37 +1611,48 @@ namespace nanojit
         POPR(R10);
         POPR(R11);
         POPR(R12);
         POPR(R13);
         POPR(R14);
         POPR(R15);
         ADDQRI(RSP, 32);
     }
-    
-    void Assembler::asm_savepc()
+
+    void Assembler::asm_brsavpc_impl(LIns* flag, NIns* target)
     {
-        emit(X64_call);
-        SUBQRI(RSP, 8);
-        //0xD8F7400000000003LL
-        //emit(0xCC00000000000001LL);
+        Register r = findRegFor(flag, GpRegs);
+        underrunProtect(20);
+    
+        // discard pc
+        ADDQRI(RSP, 16);  
+        
+        // handle interrupt call
+        JNE(0, target);  
+        
+        // save pc
+        emit(X64_call);  
+        
+        CMPQRI(r, 0);   
+        SUBQRI(RSP, 8); 
     }
 
     void Assembler::asm_restorepc()
     {
+        underrunProtect(9);
         // jmp dword ptr [rsp]
-        //0xD8F7400000000003LL
-        emit(0x2424FF0000000003LL); //TODO
+        emit(0x2424FF0000000003LL);
+        // add qword ptr [rsp],6
+        emit(0x0600244483480006LL);
     }
 
-    void Assembler::asm_discardpc()
-    {
-        ADDQRI(RSP, 16);
-    }
-
+	void Assembler::asm_memfence()
+	{
+		// no fencing necessary on x64
+	}
 
     void Assembler::asm_cmp(LIns *cond) {
       if (isCmpF4Opcode(cond->opcode()))
           asm_cmpf4(cond);
       else if (isCmpFOpcode(cond->opcode()) || isCmpDOpcode(cond->opcode()))
           asm_cmpd(cond);
       else
           asm_cmpi(cond);
--- a/nanojit/NativeX64.h
+++ b/nanojit/NativeX64.h
@@ -32,16 +32,17 @@ namespace nanojit
 
 #define NJ_JTBL_SUPPORTED               1
 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
 #define NJ_F2I_SUPPORTED                1
 #define NJ_SOFTFLOAT_SUPPORTED          0
 #define NJ_DIVI_SUPPORTED               1
 #define RA_PREFERS_LSREG                1
 #define NJ_USES_IMMF4_POOL              1   // Note: doesn't use IMMD pool!
+#define NJ_SAFEPOINT_POLLING_SUPPORTED  1
 
 // exclude R12 because ESP and R12 cannot be used as an index
 // (index=100 in SIB means "none")
 #define NJ_JTBL_ALLOWED_IDX_REGS        (GpRegs & ~rmask(R12))
 
     static const Register RAX = { 0 };      // 1st int return, # of sse varargs
     static const Register RCX = { 1 };      // 4th int arg
     static const Register RDX = { 2 };      // 3rd int arg 2nd return
@@ -427,19 +428,19 @@ namespace nanojit
         void beginOp1Regs(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
         void beginOp2Regs(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
         void endOpRegs(LIns *ins, Register rr, Register ra);\
         void beginLoadRegs(LIns *ins, RegisterMask allow, Register &rr, int32_t &d, Register &rb);\
         void endLoadRegs(LIns *ins);\
         void dis(NIns *p, int bytes);\
         void asm_pushstate(); \
         void asm_popstate(); \
-        void asm_savepc(); \
+        void asm_memfence(); \
+        void asm_brsavpc_impl(LIns* flag, NIns* targ);\
         void asm_restorepc(); \
-        void asm_discardpc(); \
         void asm_cmp(LIns*);\
         void asm_cmpi(LIns*);\
         void asm_cmpi_imm(LIns*);\
         void asm_cmpd(LIns*);\
         void asm_cmpf4(LIns*);\
         Branches asm_branch_helper(bool, LIns*, NIns*);\
         Branches asm_branchi_helper(bool, LIns*, NIns*);\
         Branches asm_branchd_helper(bool, LIns*, NIns*);\
--- a/nanojit/Nativei386.cpp
+++ b/nanojit/Nativei386.cpp
@@ -3522,38 +3522,60 @@ namespace nanojit
     }
     
     void Assembler::asm_popstate()
     {
         underrunProtect(1);
         OPCODE(0x61);
     }
 
-    void Assembler::asm_savepc()
-    {
-        underrunProtect(5);
+	void Assembler::asm_brsavpc_impl(LIns* flag, NIns* target)
+	{
+        Register r = findRegFor(flag, GpRegs);
+		underrunProtect(20);
+
+		// discard pc
+        ADDi(rESP, 16);
+
+		/// branch
+        const intptr_t tt = (intptr_t)target - (intptr_t)_nIns;
+
+		// JNE(tt) long form. -- generates 6 bytes of instr.
+        count_jcc();
+        IMM32(tt);
+        OPCODE(0x80 | 0x05);
+        OPCODE(JCC32);
+
+		// save pc
         IMM32(0);
-        OPCODE(0xE8);
-        SUBi(rESP, 12);
-    }
+        OPCODE(0xE8);		// CALL 0
+
+		TEST(r, r);
+
+		SUBi(rESP, 12);		// maintain 16 byte alignment for ABI
+	}
 
     void Assembler::asm_restorepc()
     {
-        underrunProtect(3);
-	// TODO probably use MODRM or something
+		underrunProtect(7);
 	// jmp dword ptr [esp]
         OPCODE(0x24);
         OPCODE(0x24);
         OPCODE(0xff);
+	// add dword ptr [esp],6
+        OPCODE(0x06);
+        OPCODE(0x24);
+        OPCODE(0x04);
+        OPCODE(0x83);
     }
 
-    void Assembler::asm_discardpc()
-    {
-        ADDi(rESP, 16);
-    }
+	void Assembler::asm_memfence()
+	{
+		// no fencing necessary on i386
+	}
 
     // WARNING: This function cannot generate any code that will affect the
     // condition codes prior to the generation of the
     // ucomisd/fcompp/fcmop/fcom.  See asm_cmpi() for more details.
     void Assembler::asm_cmpf4(LIns *cond) {
         NanoAssert((cond->opcode()==LIR_eqf4) );
 
         LIns* a = cond->oprnd1();
--- a/nanojit/Nativei386.h
+++ b/nanojit/Nativei386.h
@@ -75,16 +75,17 @@ namespace nanojit
 
     #define NJ_JTBL_SUPPORTED               1
     #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
     #define NJ_F2I_SUPPORTED                1
     #define NJ_SOFTFLOAT_SUPPORTED          0
     #define NJ_DIVI_SUPPORTED               1
     #define RA_PREFERS_LSREG                1
     #define NJ_JTBL_ALLOWED_IDX_REGS        GpRegs
+    #define NJ_SAFEPOINT_POLLING_SUPPORTED  1
 
         // Preserve a 16-byte stack alignment, to support the use of
         // SSE instructions like MOVDQA (if not by Tamarin itself,
         // then by the C functions it calls).
     const int NJ_ALIGN_STACK = 16;
 
     const int32_t LARGEST_UNDERRUN_PROT = 32;  // largest value passed to underrunProtect
 
@@ -171,19 +172,19 @@ namespace nanojit
         debug_only( int32_t _fpuStkDepth; ) \
         debug_only( int32_t _sv_fpuStkDepth; ) \
         void nativePageReset();\
         void nativePageSetup();\
         void underrunProtect(int);\
         bool hardenNopInsertion(const Config& c) { return c.harden_nop_insertion; } \
         void asm_pushstate();                                           \
         void asm_popstate();                                            \
-        void asm_savepc();                                              \
+        void asm_memfence();                                            \
+        void asm_brsavpc_impl(LIns* flag, NIns* targ);                  \
         void asm_restorepc();                                           \
-        void asm_discardpc();                                           \
         void asm_cmpf4(LIns *cond);  \
         void asm_immf(Register r, int32_t i, float f, bool canClobberCCs); \
         void asm_immf4(Register r, const float4_t& f4, bool canClobberCCs);\
         void asm_immi(Register r, int32_t val, bool canClobberCCs);\
         void asm_stkarg(LIns* p, int32_t& stkd);\
         void asm_farg(LIns*, int32_t& stkd);\
         void asm_arg(ArgType ty, LIns* p, Register r, int32_t& stkd);\
         void asm_pusharg(LIns*);\
--- a/nanojit/nanojit.h
+++ b/nanojit/nanojit.h
@@ -11,17 +11,17 @@
 #include "njcpudetect.h"
 
 #ifdef FEATURE_NANOJIT
 
 #if defined AVMPLUS_IA32
     #define NANOJIT_IA32
 #elif defined AVMPLUS_ARM
     #if defined(TARGET_THUMB2) || defined(UNDER_RT)
-         #define NANOJIT_THUMB2
+         #define NANOJIT_THUMB2 1
     #else
          #define NANOJIT_ARM
     #endif
 #elif defined AVMPLUS_PPC
     #define NANOJIT_PPC
 #elif defined AVMPLUS_SPARC
     #define NANOJIT_SPARC
 #elif defined AVMPLUS_AMD64
--- a/test/acceptance/failconfig.txt
+++ b/test/acceptance/failconfig.txt
@@ -359,16 +359,17 @@ abcasm/funcRedef,                       
 abcasm/semanticErrorDuplicateSlotName,          AOT,, skip, http://watsonexp.corp.adobe.com/#bug=3141012
 abcasm/traitsSparseSlots,                       AOT,, skip, http://watsonexp.corp.adobe.com/#bug=3141016
 regress/security/bug_556543,                    AOT,, skip, http://watsonexp.corp.adobe.com/#bug=3141018
 regress/security/bug_555059b,                   AOT,, skip, http://watsonexp.corp.adobe.com/#bug=3141020
 as3/ByteArray/ByteArray:ByteArray readFloat_2 #1, AOT,, skip, http://watsonexp.corp.adobe.com/#bug=3153294
 as3/ByteArray/ByteArrayLzma:ByteArray readFloat_2 #1, AOT,, skip, http://watsonexp.corp.adobe.com/#bug=3153294
 as3/ShellClasses/toplevel:queue a gc collection,      AOT,, skip, http://watsonexp.corp.adobe.com/#bug=3156533
 regress/bug_515935,                             arm-AOT,, skip , bug https://bugzilla.mozilla.org/show_bug.cgi?id=547282
+regress/bug_557275,                             AOT.*-asc2,, skip , http://watsonexp.corp.adobe.com/#bug=3364513
 ecma3/Statements/edowhile_00(6|7),              AOT.*-asc2,, skip , http://watsonexp.corp.adobe.com/#bug=3348356
 
 ####################
 # Float
 ####################
 as3/Types/Float/round: sign check , .*, , expectedfail, https://bugzilla.mozilla.org/show_bug.cgi?id=686708
 as3/Types/Float/nonstrict/op_greaterthan:Order\ of\ evaluation, .*,, expectedfail, https://bugzilla.mozilla.org/show_bug.cgi?id=640052
 as3/Types/Float/nonstrict/op_lessthanorequal:Order\ of\ evaluation, .*,, expectedfail, https://bugzilla.mozilla.org/show_bug.cgi?id=640052