bug 695061: integrate concurrency changes shareable ByteArray and alchemy fixes (p=jasowill,r=dtomack)
authorDan Schaffer <Dan.Schaffer@adobe.com>
Thu, 02 Aug 2012 10:24:51 -0700
changeset 7515 7c1dc225585efffb07c324ddb5e8a25e8a9c5b10
parent 7514 5e07a25d4308d08f228359683f2b6150322244d5
child 7516 b4e4ed919a2d71223d1f2d545c5ce70db532ff24
push id4254
push userdschaffe@adobe.com
push dateFri, 03 Aug 2012 15:34:49 +0000
reviewersdtomack
bugs695061, 1093380, 1095848
bug 695061: integrate concurrency changes shareable ByteArray and alchemy fixes (p=jasowill,r=dtomack) more details on fixes: ~SafepointManager destructor Assertion fix shareable ByteArray compress and uncompress should not block in background workers Alchemy pthread_test_optimized no longer works integration of CL: 1093380 CL@1095848
VMPI/ThreadsWin.cpp
core/ByteArray.as
core/ByteArrayGlue.cpp
core/ByteArrayGlue.h
core/ConcurrencyGlue.cpp
core/ConcurrencyGlue.h
core/Isolate-inlines.h
core/Isolate.cpp
core/Isolate.h
core/concurrent.as
test/acceptance/as3/Workers/ByteArrayWorkersLzma.as
vmbase/Safepoint.cpp
vmbase/VMThread.h
--- a/VMPI/ThreadsWin.cpp
+++ b/VMPI/ThreadsWin.cpp
@@ -228,18 +228,21 @@ void VMPI_condVarSignal(vmpi_condvar_t* 
     WaitingThread* waiter = condvar->head;
     if (waiter != NULL) {
         waiter->notified = true;
         condvar->head = waiter->next;
         if (condvar->head == NULL) {
             condvar->tail = NULL;
         }
         HANDLE threadHandle = OpenThread(STANDARD_RIGHTS_REQUIRED | SYNCHRONIZE | 0x3FF, false, waiter->threadID);
+        assert(threadHandle != NULL);
         // Signal the thread by queuing a dummy APC.
-        QueueUserAPC(dummyAPC, threadHandle, NULL);
+        DWORD result = QueueUserAPC(dummyAPC, threadHandle, NULL);
+        (void)result;
+        assert(result > 0);
         CloseHandle(threadHandle);
     }
 }
 
 #endif // UNDER_CE
 
 bool VMPI_threadAttrInit(vmpi_thread_attr_t* attr)
 {
--- a/core/ByteArray.as
+++ b/core/ByteArray.as
@@ -1020,50 +1020,53 @@ public class ByteArray implements IDataI
     _dontEnumPrototype(prototype);
 
 
     /**
      * Atomic compare and swap of integer values in adjacent bytes in this byte array.
      *
      * Compares an expected value with the actual value in the byte array location
      * addressed by a start index measured in bytes.
-     * Iff these two values are same,
-     * 'newValue' is placed into the location
-     * and the 'expectedValue' is returned.
+     *
+     * If the specified values at the given location are the same, the value given
+     * by the newValue parameter is writen at the location specified by the byteIndex 
+     * parameter and the previous value of that location is returned.
+     *
      * Otherwise, the actual value is returned.
      * All of the above is performed in one atomic hardware transaction.
      * byteIndex must be a multiple of 4.
      *
-     * @param byteIndex the (low) index at which the actual value in the byte array begins
-     * @param expectedValue if this value is currently in the addressed location, perform the swap
-     * @param newValue the new value to put into the addressed location
+     * @param byteIndex int containing the desired index at which the expectedValue 
+     *        parameter should be compared.
+     * @param expectedValue int containing the expected value of the integter to be
+     *        replaced by the newValue parameter.
+     * @param newValue int containint the new value to put into the location specified 
+     *         by the byteIndex parameter.
      * @throws ArgumentError if byteIndex is not a multiple of 4 or negative
-     * @return either 'expectedValue' or the actual value
+     * @return int containing the previous value at the specified location
      */
     [API(CONFIG::SWF_17)]
     public native function atomicCompareAndSwapIntAt(byteIndex :int, expectedValue: int, newValue :int) :int;
 
     /**
      * Atomic compare and change the length of this byte array.
      *
      * Compares an expected length with the actual length of this the byte array.
-     * Iff these two values are same,
-     * the array length is changed to 'newLength',
-     * allocating a new underlying data buffer
-     * and copying existing data into it if necessary.
-     * In this case 'true' is returned.
-     * Otherwise, 'false' is returned.
+     * If the expected length argument and current ByteArray.length property are equal,
+     * the length is changed to the value specified in the newLength parameter.
+     *
+     * The compare of the parameter and the update of the length all occur in a single atomic 
+     * transaction.
      *
-     * Changing the array's data buffer as needed and assigning the new length to the array and
-     * determining the return value of this call is all done in one atomic action
-     * wrt. all competing calls that may affect the byte array's length.
-     *
-     * @param expectedLength if this value is currently the byte array's length, perform the length change
-     * @param newLength the intended new length of this byte array
-     * @return whether the array length has been changed
+     * @param expectedLength int containint the expected value of the ByteArray's length. if this value is 
+     *        equal to the ByteArray.length property, the length will be changed to the value
+     *        specified by the newLength argument.
+     * @param newLength int containing the the length the ByteArray should be after the
+     *        operation succeeds.
+     * @return int containint the previous value of ByteArray.length
      */
     [API(CONFIG::SWF_17)]
     public native function atomicCompareAndSwapLength(expectedLength: int, newLength :int) :int;    
 
 	
 	/**
 	 * Every byte array can either be "non-shareable" or "shareable".
 	 * This flag indicates which is the case for this byte array.
@@ -1083,16 +1086,20 @@ public class ByteArray implements IDataI
 	 * Then both the local and the remote byte array share their content.
 	 *
 	 * Concurrent access to a shared byte array from multiple workers is subject to races.
 	 * For concurrency control, you can use the compare-and-swap facility in this class
 	 * and/or the dedicated mechanisms in package 'flash.concurrent'.
 	 *
 	 * @returns whether this byte array is backed by storage only accessible
 	 * in this worker and whether passing it on to another worker is done by copying
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
 	 */
 	[API(CONFIG::SWF_17)]
 	public native function get shareable() :Boolean;
 	
 	/**
 	 * Sets the value of the 'shareable' flag described above.
 	 *
 	 * When setting the flag to 'true',
@@ -1102,16 +1109,20 @@ public class ByteArray implements IDataI
 	 *
 	 * When setting the flag to 'false', if its previous value was 'true',
 	 * the byte array's contents is copied into a new backing storage buffer
 	 * and this buffer is immediately assigned to the byte array
 	 * for use by any subsequent operations.
 	 * Thus the sharing of contents with other workers is terminated and
 	 * subsequent uses of this byte array as MessageChannel call argument lead
 	 * to backing storage buffer copying.
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
 	 */
 	[API(CONFIG::SWF_17)]
 	public native function set shareable(newValue :Boolean) :void;
 };
 
 
 
 /*
--- a/core/ByteArrayGlue.cpp
+++ b/core/ByteArrayGlue.cpp
@@ -122,30 +122,45 @@ namespace avmplus
     // requires: T is integral type.
     // returns r, where r is least integral multiple of b such that r >= a.
     //
     // Bugzilla 699176: lift to shared header or specialize to uint64_t.
     template <typename T> REALLY_INLINE T roundUpTo(T a, T b) {
         return ((a + (b - 1)) / b) * b;
     }
 
+    // When we might be reading or writing to ourself, use this function
+    // apparently SunPro compiler doesn't like combining REALLY_INLINE with static functions in CPP files
+    /*static*/
+    REALLY_INLINE void move_or_copy(void* dst, const void* src, uint32_t count)
+    {
+        if ((uintptr_t(dst) - uintptr_t(src)) >= uintptr_t(count))
+        {
+            VMPI_memcpy(dst, src, count);
+        }
+        else
+        {
+            VMPI_memmove(dst, src, count);
+        }
+    }
+
     //
     // ByteArray
     //
     
     ByteArray::ByteArray(Toplevel* toplevel)
         : DataIOBase()
         , DataInput()
         , DataOutput()
         , m_toplevel(toplevel)
         , m_gc(toplevel->core()->GetGC())
         , m_subscribers(m_gc, 0)
         , m_copyOnWriteOwner(NULL)
         , m_position(0)
-        , m_buffer(mmfx_new(Buffer))
+        , m_buffer(mmfx_new(Buffer()))
         , m_workerLocal(true)
         , m_isLinkWrapper(false)
     {
         static_assert(uint64_t(MAX_BYTEARRAY_STORE_LENGTH) < 0x100000000ULL, "Constraint on MAX_BYTEARRAY_STORE_LENGTH");
         static_assert(MAX_BYTEARRAY_SHORT_ACCESS_LENGTH >= 4095, "Constraint on MAX_BYTEARRAY_SHORT_ACCESS_LENGTH");
         static_assert(uint64_t(MAX_BYTEARRAY_STORE_LENGTH) + uint64_t(MAX_BYTEARRAY_SHORT_ACCESS_LENGTH) < 0x100000000ULL, "Constraints on internal ByteArray constants");
         AvmAssert(m_gc != NULL);
 
@@ -278,22 +293,32 @@ namespace avmplus
         // we must have a non-null value for m_copyOnWriteOwner, as we
         // use it as an implicit boolean as well, so if none is provided,
         // cheat and use m_gc->emptyWeakRef
         if (owner == NULL)
             owner = (MMgc::GCObject*)m_gc->emptyWeakRef;
         SetCopyOnWriteOwner(owner);
     }
         
+    REALLY_INLINE bool ByteArray::Grower::RequestWillReallocBackingStore() const
+    {
+        return m_minimumCapacity > m_owner->m_buffer->capacity || m_owner->IsCopyOnWrite();
+    }
+
+    REALLY_INLINE bool ByteArray::Grower::RequestExceedsMemoryAvailable() const
+    {
+        return m_minimumCapacity > (MMgc::GCHeap::kMaxObjectSize - MMgc::GCHeap::kBlockSize*2);
+    }
+
     void FASTCALL ByteArray::Grower::EnsureWritableCapacity()
     {
-        if (m_minimumCapacity > (MMgc::GCHeap::kMaxObjectSize - MMgc::GCHeap::kBlockSize*2))
+        if (RequestExceedsMemoryAvailable())
             m_owner->ThrowMemoryError();
 
-        if (m_minimumCapacity > m_owner->m_buffer->capacity || m_owner->IsCopyOnWrite())
+        if (RequestWillReallocBackingStore())
         {
             uint32_t newCapacity = m_owner->m_buffer->capacity << 1;
             if (newCapacity < m_minimumCapacity)
                 newCapacity = m_minimumCapacity;
             if (newCapacity < kGrowthIncr)
                 newCapacity = kGrowthIncr;
 
             ReallocBackingStore(newCapacity);
@@ -338,23 +363,16 @@ namespace avmplus
             newCapacity = m_minimumCapacity;
         if (newCapacity < kGrowthIncr)
             newCapacity = kGrowthIncr;
         
         m_oldArray = m_owner->m_buffer->array;
         m_oldLength = m_owner->m_buffer->length;
         m_oldCapacity = m_owner->m_buffer->capacity;
         
-        if (m_onlyIfExpected) {
-            if (m_oldLength != m_expectedLength) {
-                m_succeeded = false;
-                return;
-            }
-        }
-
         uint8_t* newArray = mmfx_new_array_opt(uint8_t, newCapacity, MMgc::kCanFail);
         if (!newArray)
             m_owner->ThrowMemoryError();
 
         // Note that TellGcXXX always expects capacity, not (logical) length.
         m_owner->TellGcNewBufferMemory(newArray, newCapacity);
         if (m_oldArray)
         {
@@ -370,45 +388,23 @@ namespace avmplus
         m_owner->m_buffer->array = newArray;
         m_owner->m_buffer->capacity = newCapacity;
         if (m_owner->m_copyOnWriteOwner != NULL)
         {
             m_owner->m_copyOnWriteOwner = NULL;
             // Set this to NULL so we don't attempt to delete it in our dtor.
             m_oldArray = NULL;
         }
-        m_succeeded = true;
 
-    }
-
-    class ByteArrayTask: public vmbase::SafepointTask
-    {
-    public:
-        ByteArrayTask(ByteArray* ba)
-            : m_byteArray(ba)
-        {}
-
-        void exec() 
-        {
-            if (m_byteArray->isWorkerLocal()) {
-                run();
-            } 
-            else {
-                Isolate* isolate = m_byteArray->toplevel()->core()->getIsolate();
-                if (isolate) {
-                    isolate->getAggregate()->runHoldingIsolateMapLock(this);
-                }
-                else {
-                    run();
-                }
+        if (vmbase::SafepointRecord::hasCurrent()) {
+            if (vmbase::SafepointRecord::current()->manager()->inSafepointTask()) {
+                AvmCore::getActiveCore()->getIsolate()->getAggregate()->reloadGlobalMemories();
             }
         }
-
-        ByteArray* m_byteArray;
-    };
+    }
 
     /*
         Why the "Grower" class?
         
         (1) It provides a clean way to defer discarding the old buffer until the
             end of the calling function; this matters in the case of Write(),
             as it's legal to call Write() on your own buffer, and so if growth
             occurs, you must not discard the old buffer until copying takes place.
@@ -433,39 +429,193 @@ namespace avmplus
         if (m_oldArray != NULL && m_oldArray != m_owner->m_buffer->array)
         {
             // Note that TellGcXXX always expects capacity, not (logical) length.
             m_owner->TellGcDeleteBufferMemory(m_oldArray, m_oldCapacity);
             mmfx_delete_array(m_oldArray);
         }
     }
 
-    class ByteArrayGetWritableBufferTask: public ByteArrayTask
+    //
+    // The following code is added to support sharing byte array data between
+    // two Isolates. Domain memory is affected by changes to shared byte arrays
+    // and must be notified within a safepoint when a backing store has been 
+    // changed. Every attempt was made to avoid a safepoint when not 
+    // needed.
+    //
+    // Assumptions: 
+    //    - Acquiring a safepoint is more expensive than 2-3 function calls and
+    //      several if statements.
+    //    - A byte array is only considered shared when its buffer has more than 
+    //      a single reference to it and the only way to get an additional reference
+    //      is to share a byte array between isolates, using setSharedProperty or 
+    //      a similar mechanism.
+    //
+    //  Invariants:
+    //    - all safepoint operations must happen using a SafepointTask object
+    //    - a safepoint cannot be recursively entered
+    //    - a safepoint cannot be long jumped out of; any exception must be
+    //      caught and not allowed to escape the safepoint.
+    //    - if the current operation is not in a safepoint no operations can be
+    //      in a safepoint task.
+    //    - only operations that change the size of a byte array's backing
+    //      store may require a safepoint
+    //    - only backing stores that are shared between isolates may require
+    //      a safepoint.
+    // 
+
+    //
+    // This is the base class for all ByteArray tasks that *may* need to run
+    // in a safepoint.  the exec method makes the determination of requiring
+    // safepoint based on if the associated byte array is local or shared.
+    // 
+    class ByteArrayTask: public vmbase::SafepointTask
     {
     public:
-        ByteArrayGetWritableBufferTask(ByteArray* byteArray)
-            : ByteArrayTask(byteArray)
-            , result(NULL)
+        ByteArrayTask(ByteArray* ba)
+            : m_byteArray(ba)
+            , m_core(ba->m_toplevel->core())
+            , m_exception(NULL)
         {}
 
-        void run() 
+        void exec()
         {
-            ByteArray::Grower grower(m_byteArray, m_byteArray->getUnderlyingBuffer()->capacity);
-            grower.EnsureWritableCapacity();
-            result = m_byteArray->getUnderlyingBuffer()->array;
+            Isolate* isolate = m_core->getIsolate();
+            isolate->getAggregate()->runSafepointTaskHoldingIsolateMapLock(this);
+            if (m_exception) {
+                m_core->throwException(m_exception);
+            }
+        }
+
+    protected:
+        ByteArray* m_byteArray;
+        AvmCore* m_core;
+        Exception* m_exception;
+    };
+
+    //
+    // ensures that we change the length of the byte array within a safepoint
+    // when required. if the new length will not cause a resize or if the
+    // byte array isn't shared then no safepoint 
+    // 
+    class ByteArraySetLengthTask: public ByteArrayTask
+    {
+    public:
+        ByteArraySetLengthTask(ByteArray* ba, uint32_t newLength, bool calledFromLengthSetter)
+            : ByteArrayTask(ba)
+            , m_newLength(newLength)
+            , m_calledFromLengthSetter(calledFromLengthSetter)
+        {
         }
 
-        uint8_t* result;
+        void run()  
+        {
+            // safepoints cannot survive exceptions
+            TRY(m_core, kCatchAction_Rethrow)
+            {
+                m_byteArray->UnprotectedSetLengthCommon(m_newLength, m_calledFromLengthSetter);
+            }
+            CATCH(Exception* e)
+            {
+                m_exception = e;
+            }
+            END_CATCH;
+            END_TRY;
+        }
+        
+    private:
+        uint32_t m_newLength;
+        bool m_calledFromLengthSetter;
+    };
+
+    //
+    // this task ensures that the compare and swap operation is atomic
+    // if the backing store will be updated when the length is set this
+    // task ensures for shared byte arrays that it will be performed
+    // in a safepoint.
+    // 
+    class ByteArrayCompareAndSwapLengthTask: public ByteArrayTask
+    {
+    public:
+        ByteArrayCompareAndSwapLengthTask(ByteArray* ba, uint32_t expectedLength, uint32_t newLength)
+            : ByteArrayTask(ba) 
+            , result(0)
+            , m_expectedLength(expectedLength)
+            , m_newLength(newLength)
+        {
+        }
+
+        void run()
+        {
+            // safepoints cannot survive exceptions
+            TRY(m_core, kCatchAction_Rethrow)
+            {
+                // we are in a safepoint or no other threads can possibly
+                // be referencing this byte array
+                result = m_byteArray->UnprotectedAtomicCompareAndSwapLength(m_expectedLength, m_newLength);
+            }
+            CATCH(Exception* e)
+            {
+                m_exception = e;
+            }
+            END_CATCH;
+            END_TRY;
+        }
+
+        uint32_t result;
+
+    private:
+        uint32_t m_expectedLength;
+        uint32_t m_newLength;
+    };
+
+    //
+    // this task is used for lzma compression operations on 
+    // shared byte arrays *only*.
+    // it allows the very expensive lzma compression algorithm
+    // to run on a temporary buffer outside of a safepoint
+    // and then to swap the temp/scratch buffer in a safepoint
+    //
+    class ByteArraySwapBufferTask: public ByteArrayTask
+    {
+    public:
+        ByteArraySwapBufferTask(ByteArray* ba, ByteArray::Buffer* dest)
+            : ByteArrayTask(ba)
+            , m_destination(dest)
+        {}
+
+        void run()
+        {
+            ByteArray::Buffer* src = m_byteArray->getUnderlyingBuffer();
+
+            AvmAssert((src != NULL) && (m_destination != NULL));
+
+            m_destination->array = src->array;
+            m_destination->capacity = src->capacity;
+            m_destination->length = src->length;
+
+            // we don't want to swap the sources array
+            // value we just want to set it to null
+            // as the compression method will notify
+            // the GC and delete it after the safepoint
+            // has completed
+            src->array = NULL;
+
+            m_byteArray->m_buffer = m_destination;
+        }
+
+    private:
+        FixedHeapRef<ByteArray::Buffer> m_destination;
     };
 
     uint8_t* FASTCALL ByteArray::GetWritableBuffer()
     {
-        ByteArrayGetWritableBufferTask task(this);
-        task.exec();
-        return task.result;
+        // setlength is always called before using this
+        // we are asserting that to remain the case
+        return m_buffer->array;
     }
 
     uint8_t& ByteArray::operator[](uint32_t index)
     {
         if (index >= m_buffer->length)
             SetLength(index + 1);
         return m_buffer->array[index];
     }
@@ -494,67 +644,48 @@ namespace avmplus
     // suggested by Scott Petersen based on experiments with policy in
     // Alchemy; the chosen threshold (also 24 MB) was assumed to be an
     // appropriate value, but it would be worthwhile to check that
     // assumption when time permits.
 
     static const uint32_t kHugeGrowthThreshold = 24*1024*1024;
     static const uint32_t kHugeGrowthIncr = 24*1024*1024;
 
-    class ByteArraySetLengthTask: public ByteArrayTask
+    void ByteArray::SetLengthCommon(uint32_t newLength, bool calledFromLengthSetter)
     {
-    public:
-        ByteArraySetLengthTask(ByteArray* ba, uint32_t newLength, bool calledFromLengthSetter, bool onlyIfExpected, uint32_t expectedLength)
-            : ByteArrayTask(ba)
-            , m_newLength(newLength)
-            , m_onlyIfExpected(onlyIfExpected)
-            , m_calledFromLengthSetter(calledFromLengthSetter)
-            , m_expectedLength(expectedLength)
-        {}
-
-        void run()  //inherited
-        {
-            ByteArray::Grower grower(m_byteArray, m_newLength, m_onlyIfExpected, m_expectedLength);
-            result = grower.SetLengthCommon(m_newLength, m_calledFromLengthSetter);
+        if (IsShared()) {
+            ByteArraySetLengthTask task(this, newLength, calledFromLengthSetter);
+            task.exec();
+        }
+        else {
+            UnprotectedSetLengthCommon(newLength, calledFromLengthSetter);
         }
-        
-        uint32_t result;
+    }
 
-    private:
-        uint32_t m_newLength;
-        bool m_onlyIfExpected;
-        bool m_calledFromLengthSetter;
-        uint32_t m_expectedLength;
-    };
+    void ByteArray::UnprotectedSetLengthCommon(uint32_t newLength, bool calledFromLengthSetter)
+    {
+        if (m_subscribers.length() > 0 && m_buffer->length < DomainEnv::GLOBAL_MEMORY_MIN_SIZE) {
+            m_toplevel->throwRangeError(kInvalidRangeError);
+        }
 
-    uint32_t ByteArray::SetLengthCommon(uint32_t newLength, bool calledFromLengthSetter, bool onlyIfExpected, uint32_t expectedLength)
-    {
-        if (m_subscribers.length() > 0 && m_buffer->length < DomainEnv::GLOBAL_MEMORY_MIN_SIZE)
-            m_toplevel->throwRangeError(kInvalidRangeError);
-        
-        ByteArraySetLengthTask task(this, newLength, calledFromLengthSetter, onlyIfExpected, expectedLength);
-        task.exec();
-        return task.result;
+        Grower grower(this, newLength);
+        grower.SetLengthCommon(newLength, calledFromLengthSetter);
     }
 
 
-    uint32_t ByteArray::Grower::SetLengthCommon(uint32_t newLength, bool calledFromLengthSetter)
+    void ByteArray::Grower::SetLengthCommon(uint32_t newLength, bool calledFromLengthSetter)
     {
         if (!calledFromLengthSetter ||
             (newLength < kHugeGrowthThreshold &&
              m_owner->m_buffer->length < kHugeGrowthThreshold))
         {
             if (newLength > m_owner->m_buffer->capacity)
             {
                 EnsureWritableCapacity();
             } 
-            else
-            {
-                m_succeeded = m_onlyIfExpected ? m_expectedLength == m_oldLength : true;
-            }
         }
         else
         {
             // Bugzilla 699176: Setting the length is different than other
             // expanding/contracting operations; it represents the client
             // saying exactly how much (or how little) storage they
             // anticipate using.
             //
@@ -570,189 +701,61 @@ namespace avmplus
             uint32_t newCap = ((newCapRoundedUp <= MAX_BYTEARRAY_STORE_LENGTH)
                                ? uint32_t(newCapRoundedUp) : newLength);
 
             AvmAssert(newCap >= newLength);
 
             if (newCap != m_owner->m_buffer->capacity)
             {
                 ReallocBackingStore(newCap);
-            } else {
-                m_succeeded = m_onlyIfExpected ? m_expectedLength == m_oldLength : true;
-
-            }
+            } 
         }
         
-        if (m_succeeded) {
-            if (false) fprintf(stderr, "from %u expected: %u to %u (eq?%s) return %d in %d\n", m_owner->m_buffer->length, 
-                               m_expectedLength,
-                               newLength, 
-                               (m_owner->m_buffer->length == newLength) ? "true" : "false",
-                               m_oldLength,
-                               AvmCore::getActiveCore()->getIsolate()->desc);
-            m_owner->m_buffer->length = newLength;
-            
-            if (m_owner->m_position > newLength)
-                m_owner->m_position = newLength;
-
-            if (vmbase::SafepointRecord::hasCurrent()) {
-                if (vmbase::SafepointRecord::current()->manager()->inSafepointTask()) {
-                    AvmCore::getActiveCore()->getIsolate()->getAggregate()->reloadGlobalMemories();
-                }
-            }
+        m_owner->m_buffer->length = newLength;
+        
+        if (m_owner->m_position > newLength) {
+            m_owner->m_position = newLength;
         }
-        return m_oldLength;
     }
 
 
     void FASTCALL ByteArray::SetLength(uint32_t newLength)
     {
         SetLengthCommon(newLength, false);
     }
 
     void FASTCALL ByteArray::SetLengthFromAS3(uint32_t newLength)
     {
         SetLengthCommon(newLength, true);
     }
 
-    // When we might be reading or writing to ourself, use this function
-    // apparently SunPro compiler doesn't like combining REALLY_INLINE with static functions in CPP files
-    /*static*/
-    REALLY_INLINE void move_or_copy(void* dst, const void* src, uint32_t count)
-    {
-        if ((uintptr_t(dst) - uintptr_t(src)) >= uintptr_t(count))
-        {
-            VMPI_memcpy(dst, src, count);
-        }
-        else
-        {
-            VMPI_memmove(dst, src, count);
-        }
-    }
-
     void ByteArray::Read(void* buffer, uint32_t count)
     {
         CheckEOF(count);
         move_or_copy(buffer, m_buffer->array + m_position, count);
         m_position += count;
     }
 
-    class ByteArrayWriteTask: public ByteArrayTask
-    {
-    public:
-        ByteArrayWriteTask(ByteArray* ba, const void* buffer, uint32_t count, uint32_t writeEnd)
-            : ByteArrayTask(ba)
-            , m_buffer(buffer)
-            , m_count(count)
-            , m_writeEnd(writeEnd)
-        {}
-
-        void run() {
-            ByteArray::Grower grower(m_byteArray, m_writeEnd);
-            grower.EnsureWritableCapacity();
-            
-            ByteArray::Buffer* buffer = m_byteArray->getUnderlyingBuffer();
-            uint8_t* dest = buffer->array + m_byteArray->GetPosition();
-            if (dest) {
-                move_or_copy(dest, m_buffer, m_count);
-                m_byteArray->SetPosition(m_byteArray->GetPosition() + m_count);
-                if (buffer->length < m_byteArray->GetPosition()) {
-                    buffer->length = m_byteArray->GetPosition();
-                }
-            }
-        }
-    private:
-        const void* m_buffer;
-        uint32_t m_count;
-        uint32_t m_writeEnd;
-
-    };
-
-    class ByteArrayWriteBytesTask: public ByteArrayTask
-    {
-    public:
-        ByteArrayWriteBytesTask(ByteArray* dest, ByteArray* source, uint32_t offset, uint32_t count)
-            : ByteArrayTask(dest)
-            , rangeError(false)
-            , m_bytes(source)
-            , m_count(count)
-            , m_offset(offset)
-        {
-        }
-
-        void run()
-        {
-            if (m_offset > m_bytes->GetLength())
-                m_offset = m_bytes->GetLength();
-
-            if (m_count == 0) {
-                m_count = m_bytes->GetLength()-m_offset;
-            }
-
-            if (m_count > m_bytes->GetLength()-m_offset) {
-                rangeError = true;
-                return;
-            }
-
-            if (m_count > 0) {
-                uint32_t writeEnd = m_byteArray->GetPosition()+m_count;
-                ByteArray::Grower grower(m_byteArray, writeEnd);
-                grower.EnsureWritableCapacity();
-                
-                ByteArray::Buffer* buffer = m_byteArray->getUnderlyingBuffer();
-                uint8_t* dest = buffer->array + m_byteArray->GetPosition();
-                if (dest) {
-                    const void* source = m_bytes->GetReadableBuffer()+m_offset;
-                    move_or_copy(dest, source, m_count);
-                    m_byteArray->SetPosition(m_byteArray->GetPosition()+m_count);
-                    if (buffer->length < m_byteArray->GetPosition()) {
-                        buffer->length = m_byteArray->GetPosition();
-                    }
-                }
-            }
-        }
-
-        bool rangeError;
-
-    private:
-        ByteArray* m_bytes;
-        uint32_t m_count;
-        uint32_t m_offset;
-    };
-
     void ByteArray::Write(const void* buffer, uint32_t count)
     {
         uint32_t writeEnd = m_position + count;
-        ByteArrayWriteTask task(this, buffer, count, writeEnd);
-        task.exec();
+        
+        Grower grower(this, writeEnd);
+        grower.EnsureWritableCapacity();
+        
+        move_or_copy(m_buffer->array + m_position, buffer, count);
+        m_position += count;
+        if (m_buffer->length < m_position)
+            m_buffer->length = m_position;
     }
-    
-    class ByteArrayEnsureCapacityTask: public ByteArrayTask
-    {
-    public:
-        ByteArrayEnsureCapacityTask(ByteArray* byteArray, uint32_t capacity)
-            : ByteArrayTask(byteArray)
-            , m_capacity(capacity)
-        {}
-
-        void run()
-        {
-            ByteArray::Grower grower(m_byteArray, m_capacity);
-            grower.EnsureWritableCapacity();
-        }
-
-    private:
-        uint32_t m_capacity;
-
-    };
 
     void ByteArray::EnsureCapacity(uint32_t capacity)
     {
-        ByteArrayEnsureCapacityTask task(this, capacity);
-        task.exec();
+        Grower grower(this, capacity);
+        grower.EnsureWritableCapacity();
     }
 
     NO_INLINE void ByteArray::EnsureCapacityNoInline(uint32_t capacity)
     {
         this->EnsureCapacity(capacity);
     }
 
     bool ByteArray::EnsureCapacityOrFail(uint32_t newCap,
@@ -888,517 +891,479 @@ namespace avmplus
         // two (and leaving the high bits at zero)
         uint8_t unpackedSize[4];
         uint8_t unpackedSizeHighBits[4]; // (not uint32_t; that injects padding)
         uint8_t compressedPayload[1];    // payload is variable sized.
     };
 
     REALLY_INLINE uint32_t umax(uint32_t a, uint32_t b) { return a > b ? a : b; }
 
-    //
-    // executes the CompressViaLzma logic within a safepoint
-    //
-    class ByteArrayCompressViaLzmaTask: public ByteArrayTask
+    void ByteArray::CompressViaLzma()
     {
-    public:
-        ByteArrayCompressViaLzmaTask(ByteArray* ba)
-            : ByteArrayTask(ba)
+        // Snarf the data and give ourself some empty data
+        // (remember, existing data might be copy-on-write so don't dance on it)
+        uint8_t* origData                       = m_buffer->array;
+        uint32_t origLen                        = m_buffer->length;
+        uint32_t origCap                        = m_buffer->capacity;
+        uint32_t origPos                        = m_position;
+        MMgc::GCObject* origCopyOnWriteOwner    = m_copyOnWriteOwner;
+        if (!origLen) // empty buffer should give empty result
+            return;
+
+        // we need to create a new scratch buffer that we will
+        // swap with previous one when the compression operation is
+        // completed, so that we don't disturb any other workers
+        // that are referencing this one.
+        // This is done to avoid a long safepoint task as all other
+        // workers must be halted during a safepoint.
+        bool shared = IsShared();
+        FixedHeapRef<Buffer> origBuffer = m_buffer;
+        if (shared) {
+            m_buffer = mmfx_new(Buffer());
+        }
+
+        m_buffer->array    = NULL;
+        m_buffer->length   = 0;
+        m_buffer->capacity = 0;
+        m_position         = 0;
+        m_copyOnWriteOwner = NULL;
+
+        // Unlike zlib, lzma does not provide a method for computing
+        // any upper bound on "compressed" size.  So we guess, and
+        // retry if the guess was too aggressive.  The retry is only
+        // needed if the data was incompressible; we compress anyhow
+        // for compatibility with the lzma v1 format (the v1 headers
+        // do not have a way to flag an uncompressed payload).
+
+        uint32_t newCap = umax(origCap, lzmaHeaderSize);
+        size_t lzmaPropsSize = LZMA_PROPS_SIZE;
+        int retcode;
+        struct lzma_compressed *destOverlay;
+        size_t destLen;
+
+    retry_compress:
+        Exception *exn;
+        bool ensured = EnsureCapacityOrFail(newCap, kCatchAction_Rethrow, &exn);
+        if (!ensured)
         {
+            if (shared) {
+                m_buffer = origBuffer;
+            }
+            // clean up when the EnsureCapacity call fails.
+            m_buffer->array    = origData;
+            m_buffer->length   = origLen;
+            m_buffer->capacity = origCap;
+            m_position         = origPos;
+            SetCopyOnWriteOwner(origCopyOnWriteOwner);
+
+            m_toplevel->core()->throwException(exn);
         }
 
-        void run()
-        {
-            ByteArray::Buffer* buffer = m_byteArray->getUnderlyingBuffer();
-            // Snarf the data and give ourself some empty data
-            // (remember, existing data might be copy-on-write so don't dance on it)
-            uint8_t* origData                       = buffer->array;
-            uint32_t origLen                        = buffer->length;
-            uint32_t origCap                        = buffer->capacity;
-            uint32_t origPos                        = m_byteArray->GetPosition();
-            MMgc::GCObject* origCopyOnWriteOwner    = m_byteArray->m_copyOnWriteOwner;
-            if (!origLen) // empty buffer should give empty result
-                return;
-
-            buffer->array             = NULL;
-            buffer->length            = 0;
-            buffer->capacity          = 0;
-            m_byteArray->SetPosition(0);
-            m_byteArray->m_copyOnWriteOwner = NULL;
-
-            // Unlike zlib, lzma does not provide a method for computing
-            // any upper bound on "compressed" size.  So we guess, and
-            // retry if the guess was too aggressive.  The retry is only
-            // needed if the data was incompressible; we compress anyhow
-            // for compatibility with the lzma v1 format (the v1 headers
-            // do not have a way to flag an uncompressed payload).
-
-            uint32_t newCap = umax(origCap, lzmaHeaderSize);
-            size_t lzmaPropsSize = LZMA_PROPS_SIZE;
-            int retcode;
-            struct lzma_compressed *destOverlay;
-            size_t destLen;
-
-        retry_compress:
-            TRY(m_byteArray->m_toplevel->core(), kCatchAction_Rethrow)
-            {
-                ByteArray::Grower grower(m_byteArray, newCap);
-                grower.EnsureWritableCapacity();
-            }
-            CATCH(Exception *exn)
-            {
-                // clean up when the EnsureCapacity call fails.
-                buffer->array = origData;
-                buffer->length = origLen;
-                buffer->capacity = origCap;
-                m_byteArray->SetPosition(origPos);
-                m_byteArray->SetCopyOnWriteOwner(origCopyOnWriteOwner);
-                m_byteArray->m_toplevel->core()->throwException(exn);
-            }
-            END_CATCH
-            END_TRY
-
-            destOverlay = (struct lzma_compressed*) buffer->array;
-            destLen = buffer->capacity - lzmaHeaderSize;
+        destOverlay = (struct lzma_compressed*) m_buffer->array;
+        destLen = m_buffer->capacity - lzmaHeaderSize;
 
-            retcode = LzmaCompress(destOverlay->compressedPayload, &destLen,
-                                   origData, origLen,
-                                   destOverlay->lzmaProps, &lzmaPropsSize,
-                                   9, // -1 would yield default level (5),
-                                   1<<20, // 0 would yield default dictSize (1<<24)
-                                   -1,  // default lc (3),
-                                   -1,  // default lp (0),
-                                   -1,  // default pb (2),
-                                   -1,  // default fb (32),
-                                   1); // -1 would yield default numThreads (2)
-
-            switch (retcode) {
-            case SZ_OK:
-                if (destLen > (buffer->capacity - lzmaHeaderSize)) {
-                    AvmAssertMsg(false, "LZMA broke its contract.");
-
-                    // Belt-and-suspenders: If control gets here,
-                    // something is terribly wrong, and LZMA is lying to
-                    // us.  Rather than risk establishing a bogus structure,
-                    // fail as if lzma returned an error code.
-                    goto error_cases;
-                }
-
-                destOverlay->unpackedSize[0] = (uint8_t)((origLen)       & 0xFF);
-                destOverlay->unpackedSize[1] = (uint8_t)((origLen >> 8)  & 0xFF);
-                destOverlay->unpackedSize[2] = (uint8_t)((origLen >> 16) & 0xFF);
-                destOverlay->unpackedSize[3] = (uint8_t)((origLen >> 24) & 0xFF);
+        retcode = LzmaCompress(destOverlay->compressedPayload, &destLen,
+                               origData, origLen,
+                               destOverlay->lzmaProps, &lzmaPropsSize,
+                               9, // -1 would yield default level (5),
+                               1<<20, // 0 would yield default dictSize (1<<24)
+                               -1,  // default lc (3),
+                               -1,  // default lp (0),
+                               -1,  // default pb (2),
+                               -1,  // default fb (32),
+                               1); // -1 would yield default numThreads (2)
 
-                AvmAssert(destOverlay->unpackedSizeHighBits[0] == 0
-                          && destOverlay->unpackedSizeHighBits[1] == 0
-                          && destOverlay->unpackedSizeHighBits[2] == 0
-                          && destOverlay->unpackedSizeHighBits[3] == 0);
-
-                buffer->length = uint32_t(lzmaHeaderSize + destLen);
-                break;
-            case SZ_ERROR_OUTPUT_EOF:
-                // Our guessed target length was not conservative enough.
-                // Since this is a compression algorithm, go with linear
-                // growth on failure (rather than e.g. exponential).
-                newCap += origCap;
+        switch (retcode) {
+        case SZ_OK:
+            if (destLen > (m_buffer->capacity - lzmaHeaderSize)) {
+                AvmAssertMsg(false, "LZMA broke its contract.");
 
-                goto retry_compress;
-            case SZ_ERROR_MEM:
-            case SZ_ERROR_PARAM:
-            case SZ_ERROR_THREAD:
-            default:
-            error_cases:
-                // On other failures, just give up.
-
-                // Even though we set length to 0 (effectively clearing
-                // the state), we set array back to origData so that its
-                // memory will be properly managed.
-                buffer->array = origData;
-                buffer->length = 0;
-                buffer->capacity = origCap;
-                break;
+                // Belt-and-suspenders: If control gets here,
+                // something is terribly wrong, and LZMA is lying to
+                // us.  Rather than risk establishing a bogus structure,
+                // fail as if lzma returned an error code.
+                goto error_cases;
             }
 
-            // Analogous to zlib, maintain policy that Compress() sets
-            // position == length (while Uncompress() sets position == 0).
-            m_byteArray->SetPosition(buffer->length);
-
-            if (origData && origData != buffer->array && origCopyOnWriteOwner == NULL)
-            {
-                // Note that TellGcXXX always expects capacity, not (logical) length.
-                m_byteArray->TellGcDeleteBufferMemory(origData, origCap);
-                mmfx_delete_array(origData);
-            }
-        }
-    };
-
-    void ByteArray::CompressViaLzma()
-    {
-        ByteArrayCompressViaLzmaTask task(this);
-        task.exec();
-    }
-
+            destOverlay->unpackedSize[0] = (uint8_t)((origLen)       & 0xFF);
+            destOverlay->unpackedSize[1] = (uint8_t)((origLen >> 8)  & 0xFF);
+            destOverlay->unpackedSize[2] = (uint8_t)((origLen >> 16) & 0xFF);
+            destOverlay->unpackedSize[3] = (uint8_t)((origLen >> 24) & 0xFF);
 
-    //
-    // executes the CompressViaZlibVariant logic within a safepoint
-    //
-    class ByteArrayCompressViaZlibVariantTask: public ByteArrayTask
-    {
-    public:
-        ByteArrayCompressViaZlibVariantTask(ByteArray* ba, ByteArray::CompressionAlgorithm algorithm)
-            : ByteArrayTask(ba)
-            , m_algorithm(algorithm)
-        {}
+            AvmAssert(destOverlay->unpackedSizeHighBits[0] == 0
+                      && destOverlay->unpackedSizeHighBits[1] == 0
+                      && destOverlay->unpackedSizeHighBits[2] == 0
+                      && destOverlay->unpackedSizeHighBits[3] == 0);
 
-        void run()
-        {
-            ByteArray::Buffer* buffer = m_byteArray->getUnderlyingBuffer();
-            // Snarf the data and give ourself some empty data
-            // (remember, existing data might be copy-on-write so don't dance on it)
-            uint8_t* origData                       = buffer->array;
-            uint32_t origLen                        = buffer->length;
-            uint32_t origCap                        = buffer->capacity;
-            MMgc::GCObject* origCopyOnWriteOwner    = m_byteArray->m_copyOnWriteOwner;
-            if (!origLen) // empty buffer should give empty result
-                return;
-
-            buffer->array             = NULL;
-            buffer->length            = 0;
-            buffer->capacity          = 0;
-            m_byteArray->SetPosition(0);
-            m_byteArray->m_copyOnWriteOwner  = NULL;
+            m_buffer->length = uint32_t(lzmaHeaderSize + destLen);
+            break;
+        case SZ_ERROR_OUTPUT_EOF:
+            // Our guessed target length was not conservative enough.
+            // Since this is a compression algorithm, go with linear
+            // growth on failure (rather than e.g. exponential).
+            newCap += origCap;
 
-            int error = Z_OK;
-            
-            // Use zlib to compress the data. This next block is essentially the
-            // implementation of the compress2() method, but modified to pass a
-            // negative window value (-15) to deflateInit2() for k_deflate mode
-            // in order to obtain deflate-only compression (no ZLib headers).
-
-            const int MAX_WINDOW_RAW_DEFLATE = -15;
-            const int DEFAULT_MEMORY_USE = 8;
-
-            z_stream stream;
-            VMPI_memset(&stream, 0, sizeof(stream));
-            error = deflateInit2(&stream,
-                                    Z_BEST_COMPRESSION,
-                                    Z_DEFLATED,
-                                    m_algorithm == ByteArray::k_zlib ? MAX_WBITS : MAX_WINDOW_RAW_DEFLATE,
-                                    DEFAULT_MEMORY_USE,
-                                    Z_DEFAULT_STRATEGY);
-            AvmAssert(error == Z_OK);
-
-            uint32_t newCap = deflateBound(&stream, origLen);
-
-            ByteArray::Grower grower(m_byteArray, newCap);
-            grower.EnsureWritableCapacity();
+            goto retry_compress;
+        case SZ_ERROR_MEM:
+        case SZ_ERROR_PARAM:
+        case SZ_ERROR_THREAD:
+        default:
+        error_cases:
+            if (shared) {
+                m_buffer = origBuffer;
+            }
+            // On other failures, just give up.
 
-            stream.next_in = origData;
-            stream.avail_in = origLen;
-            stream.next_out = buffer->array;
-            stream.avail_out = buffer->capacity;
-
-            error = deflate(&stream, Z_FINISH);
-            AvmAssert(error == Z_STREAM_END);
-
-            buffer->length = stream.total_out;
-            AvmAssert(buffer->length <= buffer->capacity);
-
-            // Note that Compress() has always ended with position == length,
-            // but Uncompress() has always ended with position == 0.
-            // Weird, but we must maintain it.
-            m_byteArray->SetPosition(buffer->length);
-
-            deflateEnd(&stream);
-
-            // Note: the Compress() method has never reported an error for corrupted data,
-            // so we won't start now. (Doing so would probably require a version check,
-            // to avoid breaking content that relies on misbehavior.)
-            if (origData && origData != buffer->array && origCopyOnWriteOwner == NULL)
-            {
-                // Note that TellGcXXX always expects capacity, not (logical) length.
-                m_byteArray->TellGcDeleteBufferMemory(origData, origCap);
-                mmfx_delete_array(origData);
-            }
+            // Even though we set length to 0 (effectively clearing
+            // the state), we set array back to origData so that its
+            // memory will be properly managed.
+            m_buffer->array    = origData;
+            m_buffer->length   = 0;
+            m_buffer->capacity = origCap;
+            break;
         }
 
-    private:
-        ByteArray::CompressionAlgorithm m_algorithm;
-    };
+        // Analogous to zlib, maintain policy that Compress() sets
+        // position == length (while Uncompress() sets position == 0).
+        m_position = m_buffer->length;
+
+        if (shared) {
+            ByteArraySwapBufferTask task(this, origBuffer);
+            task.exec();
+        }
+
+        if (origData && origData != m_buffer->array && origCopyOnWriteOwner == NULL)
+        {
+            // Note that TellGcXXX always expects capacity, not (logical) length.
+            TellGcDeleteBufferMemory(origData, origCap);
+            mmfx_delete_array(origData);
+        }
+    }
 
     void ByteArray::CompressViaZlibVariant(CompressionAlgorithm algorithm)
     {
-        ByteArrayCompressViaZlibVariantTask task(this, algorithm);
-        task.exec();
+        // Snarf the data and give ourself some empty data
+        // (remember, existing data might be copy-on-write so don't dance on it)
+        uint8_t* origData                       = m_buffer->array;
+        uint32_t origLen                        = m_buffer->length;
+        uint32_t origCap                        = m_buffer->capacity;
+        MMgc::GCObject* origCopyOnWriteOwner    = m_copyOnWriteOwner;
+        if (!origLen) // empty buffer should give empty result
+            return;
+
+        bool shared = IsShared();
+        FixedHeapRef<Buffer> origBuffer = m_buffer;
+        if (shared) {
+            m_buffer = mmfx_new(Buffer());
+        }
+
+        m_buffer->array    = NULL;
+        m_buffer->length   = 0;
+        m_buffer->capacity = 0;
+        m_position         = 0;
+        m_copyOnWriteOwner = NULL;
+
+        int error = Z_OK;
+        
+        // Use zlib to compress the data. This next block is essentially the
+        // implementation of the compress2() method, but modified to pass a
+        // negative window value (-15) to deflateInit2() for k_deflate mode
+        // in order to obtain deflate-only compression (no ZLib headers).
+
+        const int MAX_WINDOW_RAW_DEFLATE = -15;
+        const int DEFAULT_MEMORY_USE = 8;
+
+        z_stream stream;
+        VMPI_memset(&stream, 0, sizeof(stream));
+        error = deflateInit2(&stream,
+                                Z_BEST_COMPRESSION,
+                                Z_DEFLATED,
+                                algorithm == k_zlib ? MAX_WBITS : MAX_WINDOW_RAW_DEFLATE,
+                                DEFAULT_MEMORY_USE,
+                                Z_DEFAULT_STRATEGY);
+        AvmAssert(error == Z_OK);
+
+        uint32_t newCap = deflateBound(&stream, origLen);
+        EnsureCapacity(newCap);
+
+        stream.next_in = origData;
+        stream.avail_in = origLen;
+        stream.next_out = m_buffer->array;
+        stream.avail_out = m_buffer->capacity;
+
+        error = deflate(&stream, Z_FINISH);
+        AvmAssert(error == Z_STREAM_END);
+
+        m_buffer->length = stream.total_out;
+        AvmAssert(m_buffer->length <= m_buffer->capacity);
+
+        // Note that Compress() has always ended with position == length,
+        // but Uncompress() has always ended with position == 0.
+        // Weird, but we must maintain it.
+        m_position = m_buffer->length;
+
+        deflateEnd(&stream);
+
+        if (shared)
+        {
+            ByteArraySwapBufferTask task(this, origBuffer);
+            task.exec();
+        }
+        // Note: the Compress() method has never reported an error for corrupted data,
+        // so we won't start now. (Doing so would probably require a version check,
+        // to avoid breaking content that relies on misbehavior.)
+        if (origData && origData != m_buffer->array && origCopyOnWriteOwner == NULL)
+        {
+            // Note that TellGcXXX always expects capacity, not (logical) length.
+            TellGcDeleteBufferMemory(origData, origCap);
+            mmfx_delete_array(origData);
+        }
     }
 
     void ByteArray::Uncompress(CompressionAlgorithm algorithm)
     {
         switch (algorithm) {
         case k_lzma:
             UncompressViaLzma();
             break;
         case k_zlib:
         default:
             UncompressViaZlibVariant(algorithm);
             break;
         }
     }
 
-    //
-    // executes the UncompressViaLzma logic within a safepoint
-    //
-    class ByteArrayUncompressViaLzmaTask: public ByteArrayTask
+    void ByteArray::UncompressViaLzma()
     {
-    public:
-        ByteArrayUncompressViaLzmaTask(ByteArray* ba)
-            : ByteArrayTask(ba)
-        {}
+        // Snarf the data and give ourself some empty data
+        // (remember, existing data might be copy-on-write so don't dance on it)
+        uint8_t* origData                       = m_buffer->array;
+        uint32_t origCap                        = m_buffer->capacity;
+        uint32_t origLen                        = m_buffer->length;
+        uint32_t origPos                        = m_position;
+        MMgc::GCObject* origCopyOnWriteOwner    = m_copyOnWriteOwner;
+
+        if (!origLen) // empty buffer should give empty result
+            return;
+
+        if (!m_buffer->array || m_buffer->length < lzmaHeaderSize)
+            return;
+
+        struct lzma_compressed *srcOverlay;
+        srcOverlay = (struct lzma_compressed*)origData;
 
-        void run()
+        uint32_t unpackedLen;
+        unpackedLen  =  (uint32_t)srcOverlay->unpackedSize[0];
+        unpackedLen +=  (uint32_t)srcOverlay->unpackedSize[1] << 8;
+        unpackedLen +=  (uint32_t)srcOverlay->unpackedSize[2] << 16;
+        unpackedLen +=  (uint32_t)srcOverlay->unpackedSize[3] << 24;
+
+        // check that size is reasonable before modifying internal structure.
+        if (srcOverlay->unpackedSizeHighBits[0] != 0 ||
+            srcOverlay->unpackedSizeHighBits[1] != 0 ||
+            srcOverlay->unpackedSizeHighBits[2] != 0 ||
+            srcOverlay->unpackedSizeHighBits[3] != 0)
         {
-            ByteArray::Buffer* buffer = m_byteArray->getUnderlyingBuffer();
-            // Snarf the data and give ourself some empty data
-            // (remember, existing data might be copy-on-write so don't dance on it)
-            uint8_t* origData                       = buffer->array;
-            uint32_t origCap                        = buffer->capacity;
-            uint32_t origLen                        = buffer->length;
-            uint32_t origPos                        = m_byteArray->GetPosition();
-            MMgc::GCObject* origCopyOnWriteOwner    = m_byteArray->m_copyOnWriteOwner;
+            // We can't allocate a byte array of such large size.
+            ThrowMemoryError();
+        }
+
+        size_t srcLen = (origLen - lzmaHeaderSize);
+
+        bool shared = IsShared();
+        FixedHeapRef<Buffer> origBuffer = m_buffer;
+        if (shared) {
+            m_buffer = mmfx_new(Buffer());
+        }
 
-            if (!origLen) // empty buffer should give empty result
-                return;
+        m_buffer->array    = NULL;
+        m_buffer->length   = 0;
+        m_buffer->capacity = 0;
+        m_position         = 0;
+        m_copyOnWriteOwner = NULL;
 
-            if (!buffer->array || buffer->length < lzmaHeaderSize)
-                return;
-
-            struct lzma_compressed *srcOverlay;
-            srcOverlay = (struct lzma_compressed*)origData;
+        // Since we rely on unpackedLen being correct, we do not need
+        // to loop with different trial lengths; either it works on
+        // first try, or it will always fail.
+        Exception *exn;
+        bool ensured =
+            EnsureCapacityOrFail(unpackedLen, kCatchAction_Rethrow, &exn);
+        if (!ensured)
+        {
+            if (shared) {
+                m_buffer = origBuffer;
+            }
+            // clean up when the EnsureCapacity call fails.
 
-            uint32_t unpackedLen;
-            unpackedLen  =  (uint32_t)srcOverlay->unpackedSize[0];
-            unpackedLen +=  (uint32_t)srcOverlay->unpackedSize[1] << 8;
-            unpackedLen +=  (uint32_t)srcOverlay->unpackedSize[2] << 16;
-            unpackedLen +=  (uint32_t)srcOverlay->unpackedSize[3] << 24;
+            // (keep in sync with state restoration in error_cases: labelled below)
+            m_buffer->array    = origData;
+            m_buffer->length   = origLen;
+            m_buffer->capacity = origCap;
+            m_position         = origPos;
+            SetCopyOnWriteOwner(origCopyOnWriteOwner);
+
+            m_toplevel->core()->throwException(exn);
+        }
+
+        int retcode;
+        size_t destLen = unpackedLen;
 
-            // check that size is reasonable before modifying internal structure.
-            if (srcOverlay->unpackedSizeHighBits[0] != 0 ||
-                srcOverlay->unpackedSizeHighBits[1] != 0 ||
-                srcOverlay->unpackedSizeHighBits[2] != 0 ||
-                srcOverlay->unpackedSizeHighBits[3] != 0)
-            {
-                // We can't allocate a byte array of such large size.
-                m_byteArray->m_toplevel->throwMemoryError(kOutOfMemoryError);
+        retcode = LzmaUncompress(m_buffer->array, &destLen,
+                                 srcOverlay->compressedPayload, &srcLen,
+                                 srcOverlay->lzmaProps, LZMA_PROPS_SIZE);
+        switch (retcode) {
+        case SZ_OK:                // - OK
+            if (destLen != unpackedLen) {
+                // Belt-and-suspenders: If control gets here,
+                // something is terribly wrong, and either LZMA is
+                // lying, or the lzma header in source byte array got
+                // garbled.  Rather than risk establishing a bogus
+                // structure, fail as if lzma returned an error code.
+                goto error_cases;
             }
 
-            size_t srcLen = (origLen - lzmaHeaderSize);
-
-            buffer->array             = NULL;
-            buffer->length            = 0;
-            buffer->capacity          = 0;
-            m_byteArray->SetPosition(0);
-            m_byteArray->m_copyOnWriteOwner  = NULL;
-
-            // Since we rely on unpackedLen being correct, we do not need
-            // to loop with different trial lengths; either it works on
-            // first try, or it will always fail.
-            TRY(m_byteArray->m_toplevel->core(), kCatchAction_Rethrow)
-            {
-                ByteArray::Grower grower(m_byteArray, unpackedLen);
-                grower.EnsureWritableCapacity();
-            }
-            CATCH(Exception *exn)
-            {
-                // clean up when the EnsureCapacity call fails.
-                // (keep in sync with state restoration in error_cases: labelled below)
-                buffer->array = origData;
-                buffer->length = origLen;
-                buffer->capacity = origCap;
-                m_byteArray->SetPosition(origPos);
-                m_byteArray->SetCopyOnWriteOwner(origCopyOnWriteOwner);
-                m_byteArray->m_toplevel->core()->throwException(exn);
-            }
-            END_CATCH
-            END_TRY
-
-            int retcode;
-            size_t destLen = unpackedLen;
+            m_buffer->length = uint32_t(destLen);
 
-            retcode = LzmaUncompress(buffer->array, &destLen,
-                                     srcOverlay->compressedPayload, &srcLen,
-                                     srcOverlay->lzmaProps, LZMA_PROPS_SIZE);
-            switch (retcode) {
-            case SZ_OK:                // - OK
-                if (destLen != unpackedLen) {
-                    // Belt-and-suspenders: If control gets here,
-                    // something is terribly wrong, and either LZMA is
-                    // lying, or the lzma header in source byte array got
-                    // garbled.  Rather than risk establishing a bogus
-                    // structure, fail as if lzma returned an error code.
-                    goto error_cases;
-                }
-
-                buffer->length = uint32_t(destLen);
-
-                // Analogous to zlib, maintain policy that Uncompress() sets
-                // position == 0 (while Compress() sets position == length).
-                // (it was set above)
-
+            // Analogous to zlib, maintain policy that Uncompress() sets
+            // position == 0 (while Compress() sets position == length).
+            // (it was set above)
+            if (shared) {
+                ByteArraySwapBufferTask task(this, origBuffer);
+                task.exec();
+            }
 
-                if (origData && origData != buffer->array && origCopyOnWriteOwner == NULL)
-                {
-                    // Note that TellGcXXX always expects capacity, not (logical) length.
-                    m_byteArray->TellGcDeleteBufferMemory(origData, origCap);
-                    mmfx_delete_array(origData);
-                }
-
-                break;
-
-            case SZ_ERROR_DATA:        // - Data error
-            case SZ_ERROR_MEM:         // - Memory allocation arror
-            case SZ_ERROR_UNSUPPORTED: // - Unsupported properties
-            case SZ_ERROR_INPUT_EOF:   // - it needs more bytes in input buffer (src)
-            default:
-            error_cases:
-                // In error cases:
-
-                // 1) free the new buffer
-                m_byteArray->TellGcDeleteBufferMemory(buffer->array, buffer->capacity);
-                mmfx_delete_array(buffer->array);
-
-                // 2) put the original data back.
-                // (keep in sync with state restoration above)
-                buffer->array = origData;
-                buffer->length = origLen;
-                buffer->capacity = origCap;
-                m_byteArray->SetPosition(origPos);
-                m_byteArray->SetCopyOnWriteOwner(origCopyOnWriteOwner);
-                m_byteArray->m_toplevel->throwIOError(kCompressedDataError);
-
-                break;
+            if (origData && origData != m_buffer->array && origCopyOnWriteOwner == NULL)
+            {
+                // Note that TellGcXXX always expects capacity, not (logical) length.
+                TellGcDeleteBufferMemory(origData, origCap);
+                mmfx_delete_array(origData);
             }
 
-        }
-    };
-
-    void ByteArray::UncompressViaLzma()
-    {
-        ByteArrayUncompressViaLzmaTask task(this);
-        task.exec();
-    }
-
-    class ByteArrayUncompressViaZlibVariantTask: public  ByteArrayTask
-    {
-    public:
-        ByteArrayUncompressViaZlibVariantTask(ByteArray* ba, ByteArray::CompressionAlgorithm algorithm)
-            : ByteArrayTask(ba)
-            , m_algorithm(algorithm)
-        {}
-
-        void run()
-        {
-            ByteArray::Buffer* buffer = m_byteArray->getUnderlyingBuffer();
-            // Snarf the data and give ourself some empty data
-            // (remember, existing data might be copy-on-write so don't dance on it)
-            uint8_t* origData                       = buffer->array;
-            uint32_t origCap                        = buffer->capacity;
-            uint32_t origLen                        = buffer->length;
-            uint32_t origPos                        = m_byteArray->GetPosition();
-            MMgc::GCObject* origCopyOnWriteOwner    = m_byteArray->m_copyOnWriteOwner;
-            if (!origLen) // empty buffer should give empty result
-                return;
+            break;
 
-            buffer->array             = NULL;
-            buffer->length            = 0;
-            buffer->capacity          = 0;
-            m_byteArray->SetPosition(0);
-            m_byteArray->m_copyOnWriteOwner  = NULL;
-            // we know that the uncompressed data will be at least as
-            // large as the compressed data, so let's start there,
-            // rather than at zero.
-            {
-                ByteArray::Grower grower(m_byteArray, origCap);
-                grower.EnsureWritableCapacity();
-                // be sure the grower's dtor is called before moving on...
-            }
-            const uint32_t kScratchSize = 8192;
-            uint8_t* scratch = mmfx_new_array(uint8_t, kScratchSize);
-
-            int error = Z_OK;
-            
-            z_stream stream;
-            VMPI_memset(&stream, 0, sizeof(stream));
-            error = inflateInit2(&stream, m_algorithm == ByteArray::k_zlib ? 15 : -15);
-            AvmAssert(error == Z_OK);
-
-            stream.next_in = origData;
-            stream.avail_in = origLen;
-            while (error == Z_OK)
-            {
-                stream.next_out = scratch;
-                stream.avail_out = kScratchSize;
-                error = inflate(&stream, Z_NO_FLUSH);
-                uint32_t count = kScratchSize - stream.avail_out;
-                uint32_t pos = m_byteArray->GetPosition();
-                ByteArray::Grower grower(m_byteArray, count+pos);
-                grower.EnsureWritableCapacity();
-                uint8_t* dest = buffer->array + pos;
-                if (dest) {
-                    move_or_copy(dest, scratch, count);
-                    m_byteArray->SetPosition(pos + count);
-                    if (buffer->length < m_byteArray->GetPosition()) {
-                        buffer->length = m_byteArray->GetPosition();
-                    }
-                }
+        case SZ_ERROR_DATA:        // - Data error
+        case SZ_ERROR_MEM:         // - Memory allocation arror
+        case SZ_ERROR_UNSUPPORTED: // - Unsupported properties
+        case SZ_ERROR_INPUT_EOF:   // - it needs more bytes in input buffer (src)
+        default:
+        error_cases:
+            // In error cases:
+            if (shared) {
+                m_buffer = origBuffer;
             }
 
-            inflateEnd(&stream);
-
-            mmfx_delete_array(scratch);
-
-            if (error == Z_STREAM_END)
-            {
-                // everything is cool
-                if (origData && origData != buffer->array && origCopyOnWriteOwner == NULL)
-                {
-                    // Note that TellGcXXX always expects capacity, not (logical) length.
-                    m_byteArray->TellGcDeleteBufferMemory(origData, origCap);
-                    mmfx_delete_array(origData);
-                }
+            // 1) free the new buffer
+            TellGcDeleteBufferMemory(m_buffer->array, m_buffer->capacity);
+            mmfx_delete_array(m_buffer->array);
 
-                // Note that Compress() has always ended with position == length,
-                // but Uncompress() has always ended with position == 0.
-                // Weird, but we must maintain it.
-                m_byteArray->SetPosition(0);
-            }
-            else
-            {
-                // When we error:
+            // 2) put the original data back.
+            // (keep in sync with state restoration above)
+            m_buffer->array    = origData;
+            m_buffer->length   = origLen;
+            m_buffer->capacity = origCap;
+            m_position         = origPos;
+            SetCopyOnWriteOwner(origCopyOnWriteOwner);
+            toplevel()->throwIOError(kCompressedDataError);
 
-                // 1) free the new buffer
-                m_byteArray->TellGcDeleteBufferMemory(buffer->array, buffer->capacity);
-                mmfx_delete_array(buffer->array);
-
-                // 2) put the original data back.
-                buffer->array = origData;
-                buffer->length = origLen;
-                buffer->capacity = origCap;
-                m_byteArray->SetPosition(origPos);
-                m_byteArray->SetCopyOnWriteOwner(origCopyOnWriteOwner);
-                m_byteArray->m_toplevel->throwIOError(kCompressedDataError);
-            }
+            break;
         }
 
-    private:
-        ByteArray::CompressionAlgorithm m_algorithm;
-    };
+    }
 
     void ByteArray::UncompressViaZlibVariant(CompressionAlgorithm algorithm)
     {
-        ByteArrayUncompressViaZlibVariantTask task(this, algorithm);
-        task.exec();
+        // Snarf the data and give ourself some empty data
+        // (remember, existing data might be copy-on-write so don't dance on it)
+        uint8_t* origData                       = m_buffer->array;
+        uint32_t origCap                        = m_buffer->capacity;
+        uint32_t origLen                        = m_buffer->length;
+        uint32_t origPos                        = m_position;
+        MMgc::GCObject* origCopyOnWriteOwner    = m_copyOnWriteOwner;
+        if (!origLen) // empty buffer should give empty result
+            return;
+
+        bool shared = IsShared();
+        FixedHeapRef<Buffer> origBuffer = m_buffer;
+        if (shared) {
+            m_buffer = mmfx_new(Buffer());
+        }
+
+        m_buffer->array    = NULL;
+        m_buffer->length   = 0;
+        m_buffer->capacity = 0;
+        m_position         = 0;
+        m_copyOnWriteOwner = NULL;
+        // we know that the uncompressed data will be at least as
+        // large as the compressed data, so let's start there,
+        // rather than at zero.
+        EnsureCapacity(origCap);
+
+        const uint32_t kScratchSize = 8192;
+        uint8_t* scratch = mmfx_new_array(uint8_t, kScratchSize);
+
+        int error = Z_OK;
+        
+        z_stream stream;
+        VMPI_memset(&stream, 0, sizeof(stream));
+        error = inflateInit2(&stream, algorithm == k_zlib ? 15 : -15);
+        AvmAssert(error == Z_OK);
+
+        stream.next_in = origData;
+        stream.avail_in = origLen;
+        while (error == Z_OK)
+        {
+            stream.next_out = scratch;
+            stream.avail_out = kScratchSize;
+            error = inflate(&stream, Z_NO_FLUSH);
+            Write(scratch, kScratchSize - stream.avail_out);
+        }
+
+        inflateEnd(&stream);
+
+        mmfx_delete_array(scratch);
+
+        if (error == Z_STREAM_END)
+        {
+            if (shared) {
+                ByteArraySwapBufferTask task(this, origBuffer);
+                task.exec();
+            }
+            // everything is cool
+            if (origData && origData != m_buffer->array && origCopyOnWriteOwner == NULL)
+            {
+                // Note that TellGcXXX always expects capacity, not (logical) length.
+                TellGcDeleteBufferMemory(origData, origCap);
+                mmfx_delete_array(origData);
+            }
+
+            // Note that Compress() has always ended with position == length,
+            // but Uncompress() has always ended with position == 0.
+            // Weird, but we must maintain it.
+            m_position = 0;
+        }
+        else
+        {
+            // When we error:
+
+            // 1) free the new buffer
+            TellGcDeleteBufferMemory(m_buffer->array, m_buffer->capacity);
+            mmfx_delete_array(m_buffer->array);
+
+            if (shared) {
+                m_buffer = origBuffer;
+            }
+
+            // 2) put the original data back.
+            m_buffer->array    = origData;
+            m_buffer->length   = origLen;
+            m_buffer->capacity = origCap;
+            m_position         = origPos;
+            SetCopyOnWriteOwner(origCopyOnWriteOwner);
+            toplevel()->throwIOError(kCompressedDataError);
+        }
     }
 
     // For requestBytesForShortRead() there is no limit on m_position, but m_length 
     // is limited to MAX_BYTEARRAY_STORE_LENGTH, which is well below 2^32.  We limit 
     // nbytes to MAX_BYTEARRAY_SHORT_ACCESS_LENGTH, which is less than
     // 2^32-MAX_BYTEARRAY_STORE_LENGTH but at least 4095.  Callers that might have a
     // larger value for nbytes should not use this API.  The purpose of all these limits
     // is to make the range check tractably small for inlining in jitted code without
@@ -2146,22 +2111,20 @@ namespace avmplus
                                      uint32_t offset,
                                      uint32_t length)
     {
         toplevel()->checkNull(bytes, "bytes");
 
         if (length == 0) {
             length = bytes->get_length() - offset;
         }
-
-        ByteArrayWriteBytesTask task(&m_byteArray, &bytes->GetByteArray(), offset, length);
-        task.exec();
-        if (task.rangeError) {
-            toplevel()->throwRangeError(kParamRangeError);
-        }
+        
+        m_byteArray.WriteByteArray(bytes->GetByteArray(), 
+                                   offset, 
+                                   length);
     }
 
     void ByteArrayObject::readBytes(ByteArrayObject *bytes,
                                     uint32_t offset,
                                     uint32_t length)
     {
         toplevel()->checkNull(bytes, "bytes");
 
@@ -2292,17 +2255,39 @@ namespace avmplus
 
     int32_t ByteArrayObject::atomicCompareAndSwapIntAt(int32_t byteIndex , int32_t expectedValue, int32_t newValue )
     {
         return m_byteArray.CAS(byteIndex, expectedValue, newValue);
     }
     
     int32_t ByteArrayObject::atomicCompareAndSwapLength(int32_t expectedLength, int32_t newLength)
     {
-        return m_byteArray.SetLengthCommon(newLength, true, true, expectedLength);
+        if (m_byteArray.IsShared()) {
+            ByteArrayCompareAndSwapLengthTask task(&m_byteArray, expectedLength, newLength);
+            task.exec();
+            return task.result;
+        }
+        else {
+            return m_byteArray.UnprotectedAtomicCompareAndSwapLength(expectedLength, newLength);
+        }
+    }
+
+    int32_t ByteArray::UnprotectedAtomicCompareAndSwapLength(int32_t expectedLength, int32_t newLength)
+    {
+        int32_t result = GetLength();
+        if (expectedLength == result) {
+            const bool CalledFromAS3Setter = true;
+            Grower grower(this, newLength);
+            bool reloadGlobals = !grower.RequestWillReallocBackingStore();
+            grower.SetLengthCommon(newLength, CalledFromAS3Setter);
+            if (reloadGlobals  && IsShared()) {
+                AvmCore::getActiveCore()->getIsolate()->getAggregate()->reloadGlobalMemories();
+            }
+        }
+        return result;
     }
 
     void ByteArrayObject::set_shareable(bool val)
     {
         m_byteArray.setWorkerLocal(!val);
     }
     
     bool ByteArrayObject::get_shareable()
--- a/core/ByteArrayGlue.h
+++ b/core/ByteArrayGlue.h
@@ -43,21 +43,22 @@
 
 namespace avmplus
 {
     class ByteArray : public DataInput,
                       public DataOutput
     {
         
     public:
-        friend class ByteArrayCompressViaLzmaTask;
-        friend class ByteArrayCompressViaZlibVariantTask;
-        friend class ByteArrayUncompressViaLzmaTask;
-        friend class ByteArrayUncompressViaZlibVariantTask;
+
+        friend class ByteArrayTask;
         friend class ByteArrayObject;
+        friend class ByteArraySetLengthTask;
+        friend class ByteArraySwapBufferTask;
+        friend class ByteArrayCompareAndSwapLengthTask;
         class Buffer : public FixedHeapRCObject
         {
         public:
             virtual void destroy();
             virtual ~Buffer();
             uint8_t* array;
             uint32_t capacity;
             uint32_t length;
@@ -71,16 +72,17 @@ namespace avmplus
         ~ByteArray();
 
         void Clear();
         
         REALLY_INLINE uint8_t operator[](uint32_t index) const { AvmAssert(index < m_buffer->length); return (index < m_buffer->length) ? m_buffer->array[index] : 0; }
         uint8_t& operator[](uint32_t index);
 
         REALLY_INLINE uint32_t GetLength() const { return m_buffer->length; }
+        REALLY_INLINE bool IsShared() const { return (m_workerLocal == false) && (m_buffer->RefCount() > 1); }
 
         // Ensure that the capacity of the ByteArray is at least 'newLength',
         // and set length = max(GetLength(), newLength),
         // and set position = min(GetPosition(), newLength)
         //
         // Note that SetLength(0) empties the (logical) contents of the ByteArray,
         // but doesn't reduce the underlying capacity allocated.
         // Use Clear() to eliminate existing memory allocations.
@@ -90,17 +92,19 @@ namespace avmplus
         // Ensure that the capacity of the ByteArray is at least 'newLength',
         // and set length = max(GetLength(), newLength),
         // and set position = min(GetPosition(), newLength)
         //
         // This is alternative entry point that *only* the length setter
         // uses; it serves as a hint from client that newLength is
         // expected maximum length for immediate future.
         void FASTCALL SetLengthFromAS3(uint32_t newLength);
-        uint32_t SetLengthCommon(uint32_t newLength, bool calledFromSetter, bool onlyIfExpected = false, uint32_t expectedLength = 0);
+        void SetLengthCommon(uint32_t newLength, bool calledFromSetter);
+        void UnprotectedSetLengthCommon(uint32_t newLength, bool calledFromSetter);
+        int32_t UnprotectedAtomicCompareAndSwapLength(int32_t expectedLength, int32_t newLength);
 
     public:
         // Set the length to x+y, with overflow check.  If x+y overflows a uint32_t then
         // throw a MemoryError (same error that the one-argument variety will throw if
         // trying to create a buffer larger than the buffer limit, which is less than 2^32-1).
         void FASTCALL SetLength(uint32_t x, uint32_t y);
         
         // You can use this call to get a READ_ONLY pointer into the ByteArray.
@@ -214,42 +218,38 @@ namespace avmplus
         void CompressViaLzma();
         void UncompressViaLzma();
 
     public: // Tasks need it
         class Grower
         {
             friend class ByteArray;
         public:
-            Grower(ByteArray* owner, uint32_t minimumCapacity, bool onlyIfExpected = false, uint32_t expectedLength = 0)
+            Grower(ByteArray* owner, uint32_t minimumCapacity)
                 : m_owner(owner)
                 , m_oldArray(owner->m_buffer->array)
                 , m_oldLength(owner->m_buffer->length)
                 , m_oldCapacity(owner->m_buffer->capacity)
                 , m_minimumCapacity(minimumCapacity)
-                , m_expectedLength(expectedLength)
-                , m_onlyIfExpected(onlyIfExpected)
-                , m_succeeded(true)
             {
             }
             void FASTCALL ReallocBackingStore(uint32_t newCapacity);
             void FASTCALL EnsureWritableCapacity();
-            uint32_t SetLengthCommon(uint32_t newLength, bool calledFromLengthSetter);
+            REALLY_INLINE bool RequestWillReallocBackingStore() const;
+            REALLY_INLINE bool RequestExceedsMemoryAvailable() const;
+            void SetLengthCommon(uint32_t newLength, bool calledFromLengthSetter);
 
             void run(); // from SafepointTask
             virtual ~Grower();
         private:
             ByteArray*  m_owner;
             uint8_t*    m_oldArray;
             uint32_t    m_oldLength;
             uint32_t    m_oldCapacity;
             uint32_t    m_minimumCapacity;
-            uint32_t    m_expectedLength;
-            bool        m_onlyIfExpected;
-            bool        m_succeeded;
         };
     public:
         
         Buffer* getUnderlyingBuffer() { return m_buffer; }
         
 
     private:
         enum { kGrowthIncr = 4096 };
--- a/core/ConcurrencyGlue.cpp
+++ b/core/ConcurrencyGlue.cpp
@@ -39,33 +39,18 @@
 
 #include "avmplus.h"
 
 namespace avmplus {
 
 #define DEBUG_CONDITION_MUTEX
 #if defined(DEBUG) && defined(DEBUG_CONDITION_MUTEX)
     #define DEBUG_STATE(_x_) do { AvmLog _x_; } while(0)
-    void _dump_waitlist_(Isolate::WaitRecord* head, int32_t gid) 
-    {
-        char buf[200]; 
-        Isolate::WaitRecord* cur = head;           
-        int loc = VMPI_snprintf(buf, 200, "(%d) wait list [", gid);
-        while(cur) {
-            loc += VMPI_snprintf(buf+loc, 200-loc, "%p ", (void *)cur->threadID);
-            cur = cur->next;
-        };
-        VMPI_snprintf(buf+loc, 200-loc, "]\n");
-        AvmLog(buf);
-    }
-
-    #define DUMP_WAITLIST(_x_, _y_) do { _dump_waitlist_(_x_, _y_); } while (0)
 #else
     #define DEBUG_STATE(_x_) do { } while(0)
-    #define DUMP_WAITLIST(_x_, _y_) do { } while(0)
 #endif
 
     class SafepointHelper_VMPIMutex
     {
     private:
         static void lockInSafepointGate(void* stackPointer, void* mutex)
         {
             vmbase::SafepointGate gate(stackPointer);
@@ -105,210 +90,41 @@ namespace avmplus {
             vmbase::SafepointGate gate(stackPointer);
             WaitRecord* const record = (WaitRecord*)arg;
             if (record->timeout_millis != -1) {
                 record->returnVal = VMPI_condVarTimedWait(record->condVar, record->mutex, record->timeout_millis);
             } else {
                 VMPI_condVarWait(record->condVar, record->mutex);
                 record->returnVal = false;
             }
+            DEBUG_STATE(("thread %d is trying to wake from safepoint\n", VMPI_currentThread()));
 
         }
     public:
 
         static bool wait(vmpi_condvar_t* condVar, vmpi_mutex_t* mutex, int32_t timeout_millis)
         {
             WaitRecord record(condVar, mutex, timeout_millis);
             VMPI_callWithRegistersSaved(timedWaitInSafepointGate, (void*) &record);
             return record.returnVal;
         }
     };
 
-    //
-    // InterruptableState 
-    //
-#ifdef DEBUG
-    int InterruptableState::globalId = 0;
-#endif // DEBUG
-
-
-    InterruptableState::EnterWait::EnterWait(Isolate* isolate, InterruptableState* state, int32_t timeout, bool ignoreAnyPendingNotify)
-        : interrupted(false)
-    {
-        Isolate::WaitRecord record;
-        failed = !record.isValid;
-        state->addWaitRecord(record, isolate, ignoreAnyPendingNotify);
-
-        if (record.isValid) {
-            result = state->wait(timeout, record, isolate);
-        }
-
-        state->removeWaitRecord(record, isolate);
-        interrupted = isolate ? isolate->isInterrupted() : false;
-    }
-
-    InterruptableState::InterruptableState()
-        : m_waitListHead(NULL)
-        , m_waitListTail(NULL)
-    {
-#ifdef DEBUG
-        gid = ++globalId;
-#endif // DEBUG
-        notified = VMPI_nullThread();
-    }
-
-    REALLY_INLINE void InterruptableState::addWaitRecord(Isolate::WaitRecord& record, Isolate* isolate, bool ignorePendingNotify)
-    {
-        SCOPE_LOCK(m_lock) {
-            if (!ignorePendingNotify && m_waitListHead == NULL && (notified != VMPI_nullThread())) {
-                // we don't want to notify ourself, if we called notify and then 
-                // called wait() we don't want to immediately wake up.
-                record.notified = VMPI_currentThread() != notified;
-                notified = VMPI_nullThread();
-            }
-
-            AvmAssert(record.next == NULL);
-            if (m_waitListHead == NULL) {
-                m_waitListHead = &record;
-            }
-
-            if (m_waitListTail) {
-                m_waitListTail->next = &record;
-            }
-            m_waitListTail = &record;
-
-            DUMP_WAITLIST(m_waitListHead, gid);
-            if (record.isValid && isolate) {
-                isolate->setActiveWaitRecord(&record);
-            }
-        }
-    }
-
-    REALLY_INLINE void InterruptableState::removeWaitRecord(Isolate::WaitRecord& record, Isolate* isolate)
-    {
-        SCOPE_LOCK(m_lock) {
-            Isolate::WaitRecord* prev = NULL;
-            Isolate::WaitRecord* cur = m_waitListHead;
-            while(cur) {
-                if (&record == cur) {
-                    if (prev == NULL) {
-                        m_waitListHead = cur->next;
-                    }
-                    else {
-                        prev->next = cur->next;
-                    }
-
-                    if (cur->next == NULL) {
-                        m_waitListTail = prev;
-                    }
-
-                    break;
-                }
-                prev = cur;
-                cur = cur->next;
-            } 
-
-            DUMP_WAITLIST(m_waitListHead, gid);
-            if (isolate) {
-                isolate->setActiveWaitRecord(NULL);
-            }
-        }
-    }
-
-    void InterruptableState::destroy()
-    {
-        // no one could be waiting on any record in the list or we wouldn't
-        // be in the destroy method (everyone's ref to this state is NULL).
-        AvmAssert(m_waitListHead == NULL);
-        AvmAssert(m_waitListTail == NULL);
-    }
-
-    void InterruptableState::notify()
-    {
-        // get the first WaitRecord in the list and notify it
-        // the list is currently protected by the associated mutex 
-        // it is possible that no other thread is waiting on this condition
-        // as seen with the following example:
-        //    mutex.lock();
-        //    condition.notify();
-        SCOPE_LOCK(m_lock) {
-            DEBUG_STATE(("thread %d (%d).notify() with %s waiting threads\n", VMPI_currentThread(), gid, (m_waitListHead?"":"NO")));
-            if (m_waitListHead) {
-                AvmAssert(m_waitListHead->isValid == true);
-                m_waitListHead->notified = true;
-                DEBUG_STATE(("thread %d is notifying thread %d\n", VMPI_currentThread(), m_waitListHead->threadID));
-                VMPI_condVarSignal(&m_waitListHead->condVar);
-            }
-            else {
-                notified = VMPI_currentThread();
-            }
-        }
-    }
-
-    void InterruptableState::notifyAll()
-    {
-        // the list is currently protected by the associated mutex
-        SCOPE_LOCK(m_lock) {
-            DEBUG_STATE(("thread %d (%d).notifyAll() with %s waiting threads\n", VMPI_currentThread(), gid, (m_waitListHead?"":"NO")));
-            if (m_waitListHead) {
-                // notify all waiting records in the list
-                Isolate::WaitRecord* record = m_waitListHead;
-                while(record) {
-                    AvmAssert(record->isValid == true);
-                    record->notified = true;
-                    DEBUG_STATE(("thread %d is notifying thread %d\n", VMPI_currentThread(), record->threadID));
-                    VMPI_condVarSignal(&record->condVar);
-                    record = record->next;
-                }
-            }
-            else {
-                notified = VMPI_currentThread();
-            }
-        }
-    }
-
-    REALLY_INLINE bool InterruptableState::wait(int32_t timeout, Isolate::WaitRecord& record, Isolate* isolate)
-    {
-        bool result = false;
-        // we loop to allow the debugger to wake us up and get a 
-        // call stack then go back to a waiting state
-        do {
-            // make sure before we sleep that we haven't been notified.
-            // this can happen in two cases 
-            //  (1) another thread has the list lock and has traversed the list 
-            //      setting notified and calling signal for each record already
-            //  (2) the debugger has woken this thread to request a call stack and 
-            //      during that stack collection time situation (1) happened.
-            bool notified = false;
-            SCOPE_LOCK(m_lock) {
-                notified = record.notified;
-            }
-            if (!notified) {
-                DEBUG_STATE(("thread %d sleeping\n", VMPI_currentThread(), gid));
-                VMPI_recursiveMutexLock(&record.privateMutex);
-                result = avmplus::SafepointHelper_VMPIWait::wait(&record.condVar, &record.privateMutex, timeout);
-                VMPI_recursiveMutexUnlock(&record.privateMutex);
-                DEBUG_STATE(("thread %d now awake\n", VMPI_currentThread()));
-            }
-        }
-        while(isolate && isolate->retryActiveWaitRecord());
-        return result;
-    }
-
     MutexObject::State::State()
         : InterruptableState()
         , m_recursion_count(0)
         , m_ownerThreadID(VMPI_nullThread())
+        , m_lockWaitListHead(NULL)
+        , m_lockWaitListTail(NULL)
     {
         m_isValid = VMPI_recursiveMutexInit(&m_mutex);
     }
 
     void MutexObject::State::destroy()
     {
-        InterruptableState::destroy();
         if (m_isValid) {
             bool success = VMPI_recursiveMutexTryLock(&m_mutex);
             if (success) {
                 // Only access m_recursion_count if we know it's locked.
                 int64_t saved_recursion_count = m_recursion_count;
                 VMPI_recursiveMutexUnlock(&m_mutex);
                 while (saved_recursion_count > 0) {
                     saved_recursion_count = --m_recursion_count;
@@ -319,46 +135,83 @@ namespace avmplus {
             } else {
                 // It's locked by a thread but not reachable through GC objecs.
                 // The native mutex will be orphaned, but otherwise the memory will be released.
             }
         }
         mmfx_delete(this);
     }
 
+    void MutexObject::State::lock(AvmCore* core) {
+        Isolate* isolate = core->getIsolate();
+        SCOPE_LOCK_NAMED(cond, m_condition) {
+            while(tryLock() == false) {
+                // put us at the end of the list of waiting threads
+                LockWaitRecord record;
+                if (m_lockWaitListHead == NULL) {
+                    m_lockWaitListHead = &record;
+                }
+                else {
+                    m_lockWaitListTail->next = &record;
+                }
+                m_lockWaitListTail = &record;
+
+                // we loop here on each wake if we are 
+                // not the first thread waiting on the lock
+                do {
+                    EnterWait wait(isolate, cond);
+                    // if we are terminated then unset the active wait record and exit
+                    // when the state is destroyed it will clean up all current records.
+                    if (wait.interrupted || core->interruptCheckReason(AvmCore::ExternalInterrupt)) {
+                        return;
+                    }
+                } while(m_lockWaitListHead != &record);
+                
+                m_lockWaitListHead = record.next;
+                if (m_lockWaitListHead == NULL) {
+                    m_lockWaitListTail = NULL;
+                }
+            }
+        }
+        
+        if (isolate) {
+            isolate->removeWaitRecord(&m_condition);
+        }
+    }
+
     bool MutexObject::State::tryLock()
     {
         bool result = VMPI_recursiveMutexTryLock(&m_mutex);
-        if (result == true) {
+        if (result) {
             DEBUG_STATE(("thread %d acquired Mutex(%d)\n", VMPI_currentThread(), gid));
             if (m_recursion_count == 0) {
                 AvmAssert(m_ownerThreadID == VMPI_nullThread());
                 m_ownerThreadID = VMPI_currentThread(); 
             } else {
                 AvmAssert(m_ownerThreadID == VMPI_currentThread());
             }
             m_recursion_count ++;
         }
         return result;
     }
 
     bool MutexObject::State::unlock()
     {
-        DEBUG_STATE(("thread %d unlocking Mutex(%d)\n", VMPI_currentThread(), gid));
         if (m_ownerThreadID != VMPI_currentThread())
         {
             // Non-fenced read of the m_ownerThreadID field possibly outside of a critical section.
             // Writes to m_ownerThreadID occur only in a critical section.
             // If the current thread really holds the lock, then m_ownerThreadID is set accurately.
             // If the current thread doesn't hold the lock, it can't see its own thread id in m_ownerThreadID, because
             // either it never held the lock and never wrote it, or it had held the lock, set 
             // the thread id to null and unlocked (fenced). Only the current thread writes the current
             // thread id to m_ownerThreadID.
             return false;
         }
+        DEBUG_STATE(("thread %d unlocking Mutex(%d)\n", VMPI_currentThread(), gid));
         // Ok so we own the lock.
         AvmAssert(m_recursion_count > 0);
         m_recursion_count --;
 
         if (m_recursion_count == 0) {
             AvmAssert(m_ownerThreadID == VMPI_currentThread());
             m_ownerThreadID = VMPI_nullThread(); 
         } 
@@ -401,43 +254,31 @@ namespace avmplus {
         m_state = NULL;
     }
   
     void MutexObject::lock()
     {
         DEBUG_STATE(("thread %d calling Mutex(%d).lock()\n", VMPI_currentThread(), m_state->gid));
         // we continue to try and get the lock until
         // we are terminated or acquire it
-        while(!m_state->tryLock()) {
-            DEBUG_STATE(("thread %d Mutex(%d) not acquired\n", VMPI_currentThread(), m_state->gid));
-            // if we don't get the lock then we have to wait
-            InterruptableState::EnterWait wait(core()->getIsolate(), m_state);
-            if (wait.failed) {
-                toplevel()->throwError(kMutexCannotBeInitialized);
-            }
-            // if we are terminated then unset the active wait record and exit
-            // when the state is destroyed it will clean up all current records.
-            if (wait.interrupted || core()->interruptCheckReason(AvmCore::ExternalInterrupt)) {
-                break;
-            }
-            DEBUG_STATE(("thread %d attempting Mutex(%d) again\n", VMPI_currentThread(), m_state->gid));
-        }
+        m_state->lock(core());
     	TELEMETRY_METHOD_NO_THRESHOLD(core()->getTelemetry(),".player.mutex.lock");
     }
     
     void MutexObject::unlock()
     {
         DEBUG_STATE(("thread %d calling Mutex(%d).unlock()\n", VMPI_currentThread(), m_state->gid));
         if (!m_state->unlock()) {
             toplevel()->illegalOperationErrorClass()->throwError(kMutextNotLocked);
         }
     }
 
     bool MutexObject::tryLock()
     {
+    	TELEMETRY_METHOD_NO_THRESHOLD(core()->getTelemetry(),".player.mutex.trylock");
         return m_state->tryLock(); 
     }
 	
 	ChannelItem* MutexObject::makeChannelItem()
 	{
         class MutexChannelItem: public ChannelItem
         {
         public:
@@ -484,61 +325,69 @@ namespace avmplus {
     ConditionObject::State::State(MutexObject::State* mutexState)
         : InterruptableState()
         , m_mutexState(mutexState)
     {
     }
 
     bool ConditionObject::State::wait(int32_t millis, Isolate* isolate, Toplevel* toplevel)
     {
-        // we own the mutex.
         m_mutexState->m_ownerThreadID = VMPI_nullThread();
         int64_t saved_recursion_count = m_mutexState->m_recursion_count;
         m_mutexState->m_recursion_count = 0;
 
-        AvmAssert(vmbase::SafepointRecord::hasCurrent());
-        
-        DEBUG_STATE(("thread %d releasing Mutex(%d)\n", VMPI_currentThread(), m_mutexState->gid));
-        // unlock the mutex before we sleep and notify the first thread waiting
-        VMPI_recursiveMutexUnlock(&m_mutexState->m_mutex);
-        m_mutexState->notifyAll();
-        EnterWait wait(isolate, this, millis);
+        bool result = false;
+        SCOPE_LOCK_NAMED(cond, m_condition) {
+            // we own the mutex.
+            AvmAssert(vmbase::SafepointRecord::hasCurrent());
+            
+            DEBUG_STATE(("thread %d releasing Mutex(%d)\n", VMPI_currentThread(), m_mutexState->gid));
+            // unlock the mutex before we sleep and notify the first thread waiting
+            VMPI_recursiveMutexUnlock(&m_mutexState->m_mutex);
+            m_mutexState->notifyAll();
+            
+            // we could already be interrupted so before we block return
+            if ((isolate && isolate->isInterrupted()) || 
+                toplevel->core()->interruptCheckReason(AvmCore::ExternalInterrupt)) {
+                DEBUG_STATE(("thread %d Condition(%d).wait was interrupted!\n", VMPI_currentThread(), gid));
+                return false;
+            }
 
-        if (wait.failed) {
-            // restore state before throwing
-            SafepointHelper_VMPIMutex::lock(&m_mutexState->m_mutex);
-            m_mutexState->m_ownerThreadID = VMPI_currentThread();
-            m_mutexState->m_recursion_count = saved_recursion_count;
-            toplevel->throwError(kConditionCannotBeInitialized);
-        }
+            EnterWait wait(isolate, cond, millis);
 
-        // if we have been interrupted do not re-acquire the public lock, just bail
-        if (wait.interrupted || 
-           (isolate && isolate->targetCore()->interruptCheckReason(AvmCore::ExternalInterrupt))) 
-        {
-            DEBUG_STATE(("thread %d Condition(%d).wait was interrupted!\n", VMPI_currentThread(), gid));
-            return false;
+            result = wait.result;
+            // if we have been interrupted do not re-acquire the public lock, just bail
+            if (wait.interrupted || 
+               (isolate && isolate->targetCore()->interruptCheckReason(AvmCore::ExternalInterrupt))) 
+            {
+                DEBUG_STATE(("thread %d Condition(%d).wait was interrupted!\n", VMPI_currentThread(), gid));
+                return false;
+            }
         }
-
+        
+        if (isolate) {
+            isolate->removeWaitRecord(&m_condition);
+        }
+        
         // re-acquire the public mutex in a safepoint
         SafepointHelper_VMPIMutex::lock(&m_mutexState->m_mutex);
         DEBUG_STATE(("thread %d Condition(%d) re-acquired Mutex(%d)\n", VMPI_currentThread(), gid, m_mutexState->gid));
         m_mutexState->m_ownerThreadID = VMPI_currentThread();
         m_mutexState->m_recursion_count = saved_recursion_count;
 
         if (isolate) {
             // if we are terminating then we should release the public mutex
             if (isolate->targetCore()->interruptCheckReason(AvmCore::ExternalInterrupt) || 
                 isolate->getAggregate()->queryState(isolate) == Isolate::TERMINATED) 
             {
                 VMPI_recursiveMutexUnlock(&m_mutexState->m_mutex);
                 DEBUG_STATE(("thread %d Condition(%2).wait was terminated!\n", VMPI_currentThread(), gid));
             }
         }
-        return wait.result;
+        return result;
     }
 
     void ConditionObject::ctor(GCRef<MutexObject> mutex)
     {
         AvmAssert(mutex != NULL);
         m_state = mmfx_new(ConditionObject::State(mutex->m_state));
 
         m_mutex = mutex;
@@ -605,21 +454,16 @@ namespace avmplus {
 
         ConditionChannelItem* item = mmfx_new(ConditionChannelItem(m_state));
         return item;
 	}
 	
     bool ConditionObject::waitImpl(double timeout)
     {
         Isolate* isolate = core()->getIsolate();
-        // this is a blocking call and we could already be interrupted return early!
-        if ((isolate && isolate->isInterrupted()) || 
-            core()->interruptCheckReason(AvmCore::ExternalInterrupt)) {
-            return false;
-        }
         // See comments in unlockImpl for correctness of reading m_ownerThreadID
         if (m_state->m_mutexState->m_ownerThreadID != VMPI_currentThread())
         {
             toplevel()->throwIllegalOperationError(kConditionCannotWait);
         }
 		TELEMETRY_METHOD_NO_THRESHOLD(core()->getTelemetry(),".player.condition.wait");
  		
         AvmAssert(timeout == -1 || timeout >= 0);
--- a/core/ConcurrencyGlue.h
+++ b/core/ConcurrencyGlue.h
@@ -121,29 +121,52 @@ namespace avmplus {
     // OS level Mutex for coordination.
     // 
     // InterruptableState manages the list of WaitRecords
     // for this Mutex allowing blocking operations like lock()
     // to be interrupted for termination, debugging, or script timeout
     //
     class MutexObject::State: public InterruptableState
     {
-        friend class MutexObject;
-        friend class ConditionObject;
-        friend class ConditionObject::State;
-        vmpi_mutex_t m_mutex;
-        int64_t m_recursion_count; // generous to avoid wraparound.
-        vmpi_thread_t volatile m_ownerThreadID;
-        bool m_isValid;
-
     public:
         State();
         virtual void destroy();
         bool tryLock();
+        void lock(AvmCore* core);
         bool unlock();
+
+    private:
+        friend class MutexObject;
+        friend class ConditionObject;
+        friend class ConditionObject::State;
+        // manages list of threads waiting for 
+        // the lock, this is a FIFO list for acquisition
+        // first one waiting on the lock gets it when it
+        // is unlocked.
+        struct LockWaitRecord
+        {
+            LockWaitRecord() 
+                : next(NULL)
+#ifdef DEBUG
+                , threadID(VMPI_currentThread())
+#endif // DEBUG
+            {}
+
+            LockWaitRecord* next;
+#ifdef DEBUG
+            vmpi_thread_t threadID;
+#endif // DEBUG
+        };
+
+        vmpi_mutex_t m_mutex;
+        int64_t m_recursion_count; // generous to avoid wraparound.
+        vmpi_thread_t volatile m_ownerThreadID;
+        LockWaitRecord* m_lockWaitListHead;
+        LockWaitRecord* m_lockWaitListTail;
+        bool m_isValid;
     };
     
     //
     // this stores the state of the ActionScript Condition 
     // object with a reference count.  this is done to allow
     // ActionScript Condition objects to be passed between
     // isolates allowing multiple isolates to use the same
     // OS level condition for coordination.
--- a/core/Isolate-inlines.h
+++ b/core/Isolate-inlines.h
@@ -191,17 +191,17 @@ namespace avmplus
         return self()->core()->workerStates[code];
      }
     
     template <class T>
     bool WorkerObjectBase<T>::stopInternal(bool shouldWait) 
     {
         Aggregate* aggregate = m_isolate->getAggregate();
         if (aggregate->isPrimordial(giid))  {
-            aggregate->selfExit(self()->toplevel());
+            aggregate->throwWorkerTerminatedException(self()->toplevel());
             return true; // not reached
         } else {
             return aggregate->requestExit(shouldWait, descriptor(), self()->toplevel());
         }
     }
     
 
     template <class T>
--- a/core/Isolate.cpp
+++ b/core/Isolate.cpp
@@ -40,16 +40,24 @@
 #include "avmplus.h"
 
 #include "FixedHeapUtils.cpp"
 #include "Channels.cpp"
 
 namespace avmplus
 {
 
+#define DEBUG_INTERRUPTABLE_STATE
+#if defined(DEBUG) && defined(DEBUG_INTERRUPTABLE_STATE)
+    #define DEBUG_STATE(_x_) do { AvmLog _x_; } while(0)
+#else
+    #define DEBUG_STATE(_x_) do { } while(0)
+#endif
+
+
     Isolate* Aggregate::Globals::at(int32_t giid)
     {
         AvmAssert(m_lock.isLockedByCurrentThread());
         SCOPE_LOCK(m_isolateMap.m_lock) {
             FixedHeapRef<Isolate> isolateRef(NULL);
             if (m_isolateMap.LookupItem(giid, &isolateRef)) {
                 AvmAssert(isolateRef->RefCount() > 0);
                 return isolateRef;
@@ -144,17 +152,17 @@ namespace avmplus
                 }
             }
 
             return result;
             // We are relying on the isolate to clean itself up.
         scope_end: // unlock before throwing
             ;
         }
-        this->selfExit(currentToplevel);
+        this->throwWorkerTerminatedException(currentToplevel);
         return false;
     }
 
 
     void Aggregate::requestAggregateExit()
     {
         SCOPE_LOCK(m_globals->m_lock) {
             m_inShutdown = true;
@@ -229,65 +237,42 @@ namespace avmplus
                 // Note: waitForAnySend() removed, remove notification?
                 lk.notifyAll();
             }
 
         }
     }
 
     /* virtual */
-    void Aggregate::selfExit(Toplevel* currentToplevel)
+    void Aggregate::throwWorkerTerminatedException(Toplevel* currentToplevel)
     {
         AvmCore* core = currentToplevel->core();
         AvmAssert(core->getIsolate()->getAggregate() == this);
         Stringp errorMessage = core->getErrorMessage(kWorkerTerminated);
         GCRef<ErrorObject> error = currentToplevel->errorClass()->constructObject(errorMessage->atom(), core->intToAtom(0));
         Exception *exception = new (core->GetGC()) Exception(core, error->atom());
         exception->flags |= Exception::EXIT_EXCEPTION;
         exception->flags |= Exception::SUPPRESS_ERROR_REPORT;
         core->throwException(exception);
     }
 
     Isolate::Isolate(int32_t desc, int32_t parentDesc, Aggregate* aggregate)
         : desc(desc)
         , parentDesc(parentDesc)
         , m_core(NULL)
+        , m_waitRecordValid(false)
         , m_activeWaitRecord(NULL)
         , m_aggregate(aggregate)
         , m_thread(NULL)
         , m_state(Isolate::NEW)
         , m_failed(false)
         , m_interrupted(false)
     {
     }
 
-    Isolate::WaitRecord::WaitRecord()
-    {
-        isValid = VMPI_condVarInit(&condVar) && VMPI_recursiveMutexInit(&privateMutex);
-        notified = false;
-#ifdef DEBUG
-        threadID = VMPI_currentThread();
-#endif // DEBUG
-        next = NULL;
-    }
-
-    Isolate::WaitRecord::~WaitRecord()
-    {
-        if (isValid) {
-            VMPI_condVarDestroy(&condVar);
-            VMPI_recursiveMutexDestroy(&privateMutex);
-        }
-#ifdef DEBUG
-        threadID = VMPI_nullThread();
-#endif // DEBUG
-        next = NULL;
-        notified = false;
-        isValid = false;
-    }
-
     void Aggregate::stateTransition(Isolate* isolate, Isolate::State to)
     {
         AvmAssert(!m_commlock.isLockedByCurrentThread());
         SCOPE_LOCK(m_globals->m_lock) {
             enum Isolate::State from = isolate->m_state;
             bool verbose = false;
             if (verbose) {
                 
@@ -525,57 +510,86 @@ namespace avmplus
         } else if (AvmCore::istype(atom, toplevel->builtinClasses()->get_ConditionClass()->ivtable()->traits)) {
             ConditionObject* conditionObj = static_cast<ConditionObject*>(toplevel->core()->atomToScriptObject(atom));
 			item = conditionObj->makeChannelItem();
         } else {
         	item = mmfx_new(ScriptObjectChannelItem(toplevel, atom));
         }
         return item;
     }
-
-    void Isolate::setActiveWaitRecord(WaitRecord* record)
+    
+    void Isolate::removeWaitRecord(vmbase::WaitNotifyMonitor* record)
+    {
+        AvmAssert(record != NULL);
+        // this will only be called outside of any lock on the 
+        // specified monitor
+        SCOPE_LOCK(m_activeRecordLock) {
+            if (m_activeWaitRecord == record) {
+                m_activeWaitRecord = NULL;
+            }
+        }
+    }
+    
+    void Isolate::setActiveWaitRecord(vmbase::WaitNotifyMonitor* record)
     {
-        SCOPE_LOCK(m_activeRecordLock) {
-            m_activeWaitRecord = record;
+        if (!isInterrupted()) {
+            // any calls to this must already hold the monitor lock
+            AvmAssert(record->isLockedByCurrentThread());
+            AvmAssert(m_waitRecordValid == false);
+            SCOPE_LOCK(m_activeRecordLock) {
+                m_activeWaitRecord = record;
+            }
+            // protected by the condition (record)
+            m_waitRecordValid = true;
         }
     }
+    
+    void Isolate::invalidateActiveWaitRecord(vmbase::WaitNotifyMonitor* record)
+    {
+        AvmAssert(record->isLockedByCurrentThread());
+        (void)record;
+        // protected by the condition (record)
+        m_waitRecordValid = false;
+    }
 
     // this method is used to determine if a currently active
     // wait record should be reactivated. reactivating a waiting
     // record should only happen if the signal occured because 
     // the debugger needs to get a call stack.
     bool Isolate::retryActiveWaitRecord()
     {
         return false;
     }
 
     bool Isolate::signalActiveWaitRecord()
     {
         SCOPE_LOCK(m_activeRecordLock) {
             if (m_activeWaitRecord) {
-                VMPI_condVarSignal(&m_activeWaitRecord->condVar);
+                SCOPE_LOCK_NAMED(cond, *m_activeWaitRecord) {
+                    // only notify the current condition if 
+                    // it is still considered valid
+                    // when the current waiting condition is
+                    // notified outside of this method we don't
+                    // it needs to "unset" the active wait record
+                    // but going through the setActiveWaitRecord
+                    // requires the m_activeRecordLock be held
+                    // and that will create a dead lock.
+                    // access to this value is protected by the condition
+                    // itself.
+                    if (m_waitRecordValid) {
+                        cond.notifyAll();
+                    }
+                }
+                m_activeWaitRecord = NULL;
                 return true;
             }
         }
         return false;
     }
 
-    // this method should be used when interrupting a blocked
-    // isolate that will throw on the next execution of 
-    // ActionScript byte code.
-    void Isolate::abortActiveWaitRecord()
-    {
-        SCOPE_LOCK(m_activeRecordLock) {
-            if (m_activeWaitRecord) {
-                targetCore()->raiseInterrupt(AvmCore::ExternalInterrupt);
-                signalActiveWaitRecord();
-            }
-        }
-    }
-
     void Isolate::initialize(AvmCore* core) 
     {
         AvmAssert(AvmCore::getActiveCore() == core);
         this->m_core = core; 
         core->setIsolate(this);
     }
     
     bool Isolate::copyByteCode(ByteArrayObject* byteCode)
@@ -679,16 +693,73 @@ namespace avmplus
             // a leak, so we have to scavenge - call Aggregate::destroyIsolate()
             // in waitUntilNoIsolates(), put this into the cleanups list
             getAggregate()->addThreadCleanup(m_thread);
             m_thread = NULL;
         }
         mmfx_delete(this);
     }
 
+    //
+    // InterruptableState 
+    //
+#ifdef DEBUG
+    int InterruptableState::globalId = 0;
+#endif // DEBUG
+
+
+    InterruptableState::EnterWait::EnterWait(Isolate* isolate, vmbase::MonitorLocker<vmbase::IMPLICIT_SAFEPOINT>& cond, int32_t timeout)
+        : interrupted(false)
+    {
+        if (isolate) {
+            isolate->setActiveWaitRecord(cond.getMonitor());
+        }
+        DEBUG_STATE(("thread %d is sleeping\n", VMPI_currentThread()));
+        if (timeout == -1) {
+            cond.wait();
+        }
+        else {
+            result = cond.wait(timeout);
+        }
+        DEBUG_STATE(("thread %d is awake\n", VMPI_currentThread()));
+
+        if (isolate) {
+            isolate->invalidateActiveWaitRecord(cond.getMonitor());
+        }
+        
+        interrupted = isolate ? isolate->isInterrupted() : false;
+    }
+
+    InterruptableState::InterruptableState()
+    {
+#ifdef DEBUG
+        gid = ++globalId;
+#endif // DEBUG
+    }
+
+    void InterruptableState::notify()
+    {
+        SCOPE_LOCK_NAMED(cond, m_condition) {
+            DEBUG_STATE(("thread %d is calling notify on (%d)\n", VMPI_currentThread(), gid));
+            cond.notify();
+        }
+    }
+
+    void InterruptableState::notifyAll()
+    {
+        SCOPE_LOCK_NAMED(cond, m_condition) {
+            DEBUG_STATE(("thread %d is calling notifyAll on (%d)\n", VMPI_currentThread(), gid));
+            cond.notifyAll();
+        }
+    }
+
+    //
+    // Aggregate
+    //
+
     void Aggregate::destroyIsolate(Isolate* isolate) {
         if (isolate->m_thread != NULL) {
             // FIXME hold the m_globals->m_lock lock here?
             isolate->m_thread->join(); // shouldn't block
             mmfx_delete(isolate->m_thread); // can't delete current thread
             isolate->m_thread = NULL;
         }
     }
@@ -834,38 +905,44 @@ namespace avmplus
             m_activeIsolateCount ++;
             locker.notifyAll();
         }
         AvmAssert(targetCore->getIsolate() == current);
     }
 
     EnterSafepointManager::EnterSafepointManager(AvmCore* core)
     {
-        m_safepointMgr = core->getSafepointManager();
-        m_spRecord.setLocationAndDesc( (int32_t*)&core->interrupted, core->getIsolateDesc() ); 
+        Isolate* isolate = core->getIsolate();
+        // we only need to perform this operation on platforms that
+        // support workers, otherwise we can skip it
+        if (isolate) {
+		    m_aggregate = isolate->getAggregate();
+            m_safepointMgr = m_aggregate->safepointManager();
 
-        m_safepointMgr->enter(&m_spRecord);
+            m_spRecord.setLocationAndDesc( (int32_t*)&core->interrupted, core->getIsolateDesc() ); 
+
+            m_safepointMgr->enter(&m_spRecord);
+        }
     }
     
     EnterSafepointManager::~EnterSafepointManager()
     {
         cleanup();
     }
 
     void EnterSafepointManager::cleanup() 
     {
-        m_safepointMgr->leave(&m_spRecord);
-        m_spRecord.setLocationAndDesc( NULL, -1 );
+        if (m_aggregate != NULL) {
+            m_safepointMgr->leave(&m_spRecord);
+            m_spRecord.setLocationAndDesc( NULL, -1 );
+        }
     }
 
     void Aggregate::runIsolate(Isolate* isolate) 
     {
-        // FIXME try-finally?
-        //EnterSafepointManager enterSafepointManager(this);
-        
         stateTransition(isolate, Isolate::STARTING);
         // Make sure the isolate survives for the duration of the following call.
         {
             FixedHeapRef<Isolate> handle(isolate);
             isolate->doRun();
         }
     }
 
@@ -951,17 +1028,17 @@ namespace avmplus
         SCOPE_LOCK(m_globals->m_isolateMap.m_lock) {
             m_globals->m_isolateMap.ForEach(lister);
         }
         
         return workerVector;
     }
 
 
-    void Aggregate::runHoldingIsolateMapLock(vmbase::SafepointTask* task)
+    void Aggregate::runSafepointTaskHoldingIsolateMapLock(vmbase::SafepointTask* task)
     {
         SCOPE_LOCK(m_globals->m_isolateMap.m_lock) {
             safepointManager()->requestSafepointTask(*task);
         }
     }
     
 
     void Aggregate::reloadGlobalMemories()
--- a/core/Isolate.h
+++ b/core/Isolate.h
@@ -168,60 +168,46 @@ namespace avmplus
     public:
         int32_t desc;
         int32_t parentDesc;
 
     protected:
         FixedHeapArray< FixedHeapArray<uint8_t> > m_code;
         vmbase::RecursiveMutex m_sharedPropertyLock; 
         
-    public: 
-        // 
-        // this structure is used to block threads
-        // that call wait on the AS condition object
-        // we keep this data here to allow us to 
-        // interrupt any thread waiting on a condition
-        // 
-        struct WaitRecord
-        {
-            WaitRecord();
-            virtual ~WaitRecord();
-            vmpi_condvar_t condVar;
-            vmpi_mutex_t privateMutex;
-            bool notified;
-#ifdef DEBUG
-            vmpi_thread_t threadID;
-#endif // DEBUG
-            bool isValid;
-            WaitRecord* next;
-        };
-
-
     private:
         virtual void releaseActiveResources();
         SharedPropertyMap m_properties;
 
     public:
         void setSharedProperty(const char* utf8String, int32_t len, ChannelItem* item);
         bool getSharedProperty(const char* utf8String, int32_t len, ChannelItem** outItem);
         virtual ChannelItem* makeChannelItem(Toplevel* toplevel, Atom atom);
-        void setActiveWaitRecord(WaitRecord* record);
+        void invalidateActiveWaitRecord(vmbase::WaitNotifyMonitor* record);
+        void removeWaitRecord(vmbase::WaitNotifyMonitor* record);
+        void setActiveWaitRecord(vmbase::WaitNotifyMonitor* record);
         bool signalActiveWaitRecord();
         virtual bool retryActiveWaitRecord();
 
     protected:
-        void abortActiveWaitRecord();
         AvmCore* m_core;
 
     private:
         // when an isolate is blocked from ActionScript either due to a 
         // condition.wait or a mutex.lock this holds the active wait record
         // from that call. 
         vmbase::RecursiveMutex m_activeRecordLock;
-        WaitRecord* m_activeWaitRecord;
+        // when an active wait record wakes without being notified
+        // by signalActiveWaitRecord() it needs to invalidate
+        // the record without getting the m_activeRecordLock
+        // the condition will always be held when updating this
+        // value so no explicit lock is required (see signalActiveWaitRecord)
+        // for more info.
+        bool m_waitRecordValid;
+        vmbase::WaitNotifyMonitor* m_activeWaitRecord;
 
         FixedHeapRef<Aggregate> m_aggregate;
         // VMThread objects have to be reclaimed after the threads
         // represented by them have terminated, but there's generally
         // no other thread trying to join() on the owner thread, so
         // there's no good point to delete the VMThread. As a result,
         // VMThread objects are retained and deleted
         // opportunistically.
@@ -239,62 +225,42 @@ namespace avmplus
      * need to enter a blocking state and be 
      * interruptable to support termination, debugging, 
      * and script-timeouts
      */ 
     class InterruptableState: public FixedHeapRCObject 
     {
     public:
         InterruptableState();
-        REALLY_INLINE void addWaitRecord(Isolate::WaitRecord& record, Isolate* isolate, bool ignoreAnyPendingNotify);
-        REALLY_INLINE void removeWaitRecord(Isolate::WaitRecord& record, Isolate* isolate);
-        virtual void destroy();
         void notify();
         void notifyAll();
-        REALLY_INLINE bool wait(int32_t timeout, Isolate::WaitRecord& record, Isolate* isolate);
 
         //
         // this is intended as a stack based helper for waiting
         // on an interruptable state object
         //
         class EnterWait
         {
         public:
-            EnterWait(Isolate* isolate, InterruptableState* state, int32_t timeout=-1, bool ignoreAnyPendingNotify=false);
-            bool failed;
+            EnterWait(Isolate* isolate, vmbase::MonitorLocker<vmbase::IMPLICIT_SAFEPOINT>& cond, int32_t timeout=-1);
             bool interrupted;
             bool result;
         };
 
 #ifdef DEBUG
         int32_t gid;
 #endif // DEBUG
 
+    protected:
+        vmbase::WaitNotifyMonitor m_condition;
+
     private:
-        // locks access to the wait list
-        vmbase::RecursiveMutex m_lock;
-        // 
-        // there is a race condition between notifyXXX() calls
-        // and when a wait record gets into the wait list
-        // if the wait list is empty and a notifyXXX() call is
-        // made this value will be set to the calling thread. 
-        // when there are no records in the wait list and a record is added it's 
-        // notified property will be updated to reflect this value
-        // if the thread id for the record is not the same as the
-        // notified value, this avoids a thread notifying itself. 
-        // this value is protected by the lock for the
-        // wait list.
-        // 
-        vmpi_thread_t notified;
 #ifdef DEBUG
         static int32_t globalId; // global id counter
 #endif // DEBUG
-        // list of all isolate threads currently waiting on this state
-        Isolate::WaitRecord* m_waitListHead;
-        Isolate::WaitRecord* m_waitListTail;
     };
 
 
     /* An aggregate is a collection of isolates that have been transitively created from 
      * a single isolate (the primordial isolate).
      * In the avm shell build there is only one of these but in the Flash Player there would be one
      * for each Player instance (for each <object/> or <embed/> tag).
      */
@@ -369,21 +335,21 @@ namespace avmplus
 
 
         static void initializeGlobals();
         static void reclaimGlobals();
         static void dumpGlobals(); // debugging
 
         void closeChannelsWithEndpoint(Isolate* endpoint);
 
-        virtual void selfExit(Toplevel* toplevel);
+        virtual void throwWorkerTerminatedException(Toplevel* toplevel);
 
 
         GCRef<ObjectVectorObject> listWorkers(Toplevel* toplevel);
-        void runHoldingIsolateMapLock(vmbase::SafepointTask* task);
+        void runSafepointTaskHoldingIsolateMapLock(vmbase::SafepointTask* task);
         void reloadGlobalMemories();
 
         Isolate* getIsolate(int32_t desc);
         Isolate::State queryState(Isolate* isolate);
         static bool isGlobalsLocked();
         void stateTransition(Isolate* isolate, enum Isolate::State to);
         vmbase::SafepointManager* safepointManager()
         {
@@ -413,16 +379,17 @@ namespace avmplus
     // Stack allocated, RAII pattern.
     class EnterSafepointManager
     {
     public:
         EnterSafepointManager(AvmCore* core);
         void cleanup(); // If manual cleanup needed b/c of longjmp.
         ~EnterSafepointManager();
     private:
+		FixedHeapRef<Aggregate> m_aggregate;					// to keep the safepoint mgr alive during shutdown, etc.
         vmbase::SafepointManager* m_safepointMgr;
         vmbase::SafepointRecord m_spRecord;
     };
 
     template <class T>
     class WorkerDomainObjectBase
     {
     public:
--- a/core/concurrent.as
+++ b/core/concurrent.as
@@ -67,112 +67,146 @@ include "api-versions.as"
  *
  * Note that locking and unlocking are not lexically paired.
  * Also note that these mutexes are recursive,
  * i.e. repeated locking increases an internal lock count
  * and only an equal amount of unlock calls finally releases a mutex.
  * Otherwise it stays locked.
  *
  * Any attempt to unlock a mutex that is not owned by the caller throws an error.
+ *
+ * @langversion 3.0
+ * @playerversion Flash 11.4	
+ * @playerversion AIR 3.4
  */
 [API(CONFIG::SWF_17)]
 [native(cls="MutexClass",instance="MutexObject",gc="exact")]
 final public class Mutex
 {
     /**
      * The constructor for mutexes.
      *
      * The initial internal lock count of every new mutex is zero.
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
     public function Mutex()
     {
         ctor();
     }
 
     /**
      * Wait until this mutex is available and then take exclusive ownership,
      * increase the mutex's lock count by one, and proceed.
      * This is also known as "acquiring" this mutex.
      *
      * If a thread already owns a mutex when this call is made to lock it,
      * then its internal lock count is increased and no other action occurs.
      *
-     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
     public native function lock() :void
 
     /**
      * Attempt to acquire ownership of this mutex.
      *
      * If and only if the mutex is available, acquire it,
      * increase its lock count, and immediately return true.
      * Otherwise, do not acquire the mutex and immediately return false.
      * Do all of the above atomically.
      *
      * If the mutex is already owned by the current thread/worker,
      * the lock count is increased and no other action occurs.
      *
      * @return true if the lock was acquired, false otherwise.
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
     public native function tryLock() :Boolean;
 
     /**
      * Release ownership of this mutex,
      * allowing any thread/worker to acquire it and proceed.
      *
      * This mutex must be locked and owned by the current thread/worker.
      * Otherwise an error is thrown.
      *
      * @throws IllegalOperationException when the current thread doesn't own the mutex.
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
     native public function unlock():void;
 
     private native function ctor() :void;
 }
     
 /**
  * A Condition (aka condition variable) is a synchronization primitive that facilitates
  * making threads of execution wait until a particular condition occurs.
  *
  * A condition is always associated with a mutex.
  * It can only be manipulated in conjunction with that mutex.
  * This ensures atomic state transitions for all involved threads of execution.
+ *
+ * @langversion 3.0
+ * @playerversion Flash 11.4	
+ * @playerversion AIR 3.4
  */
 [API(CONFIG::SWF_17)]
 [native(cls="ConditionClass",instance="ConditionObject",gc="exact")]
 final public class Condition 
 {
     /**
      * The constructor for condition variables.
      *
      * @param mutex the mutex associated with the condition
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
     public function Condition(mutex:Mutex)
     {
         if (mutex == null)
         	Error.throwError(ArgumentError, kNullPointerError, "mutex");
         ctor(mutex);
     }
 
     /**
      * Provides readonly access to mutex associated with this condition
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
      public native function get mutex():Mutex;
 
 
     /**
      * Releases the condition's mutex and then suspends the current thread/worker
      * until it is awoken by 'notify()' or 'notifyAll()'. If the mutex is owned recursively,
      * the recursion count will be restored upon return from wait.
      *
      * The current thread/worker must "own" the condition's mutex when making this call.
      * Otherwise an exception is thrown and the mutex and the condition remain unaffected.
      * @throws IllegalOperationException when the mutex is not owned by the current thread.
      * @param timeout timeout in milliseconds, -1 if no timeout, fractional values will be rounded up to the nearest millisecond.
      * @return false if wait() returned due to timeout, otherwise true.
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
     public function wait(timeout:Number = -1) :Boolean
     {
         if (timeout < 0 && timeout != -1) {
             Error.throwError(ArgumentError, kConditionInvalidTimeoutError);
         }
         return waitImpl(Math.ceil(timeout));
     }
@@ -181,16 +215,20 @@ final public class Condition
      * Wakes up one of the threads/workers waiting on this condition, if any, 
      * and releases its mutex.
      * The awoken thread/worker acquires the mutex and then starts executing.
      * All of the above happens atomically.
      *
      * The current thread/worker must "own" the condition's mutex when making this call.
      * Otherwise an exception is thrown and the mutex and the condition remain unaffected.
      * @throws IllegalOperationError if the condition's mutex is not owned by the current thread.
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
     public function notify() :void
     {
         if (!notifyImpl()) {
         	Error.throwError(IllegalOperationError, kConditionCannotNotifyError);
         }
     }
 
@@ -202,16 +240,20 @@ final public class Condition
      *
      * The awoken threads acquire the mutex and proceed one by one, in wait order.
      * Each thread continues to wait until its predecessor releases the mutex
      * (by calling Mutex.unlock or Condition.wait().
      *
      * The current thread/worker must "own" the condition's mutex when making this call.
      * Otherwise an exception is thrown and the mutex and the condition remain unaffected.
      * @throws IllegalOperationError if the condition's mutex is not owned by the current thread.
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
      */
     public function notifyAll() :void
     {
         if (!notifyAllImpl()) {
         	Error.throwError(IllegalOperationError, kConditionCannotNotifyAllError);
         }
     }
 
@@ -224,16 +266,31 @@ final public class Condition
 	private static const kConditionInvalidTimeoutError:uint = 1415;
 	private static const kConditionCannotNotifyError:uint = 1516;
 	private static const kConditionCannotNotifyAllError:uint = 1517;
 }
 }
 
 package avm2.intrinsics.memory
 {
+    /**
+     * A complete memory barrier for domainMemory (for both load and store instructions).
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
+     */
 	[API(CONFIG::SWF_17)]
 	[native("ConcurrentMemory::mfence")]
 	public native function mfence():void;
+    /**
+     * A compare and swap for domainMemory.
+     * Behaves like ByteArray.atomicCompareAndSwapIntAt but operates on the current domainMemory.
+     *
+     * @langversion 3.0
+     * @playerversion Flash 11.4	
+     * @playerversion AIR 3.4
+     */
 	[API(CONFIG::SWF_17)]
 	[native("ConcurrentMemory::casi32")]
 	public native function casi32(addr:int, expectedVal:int, newVal:int):int;
 }
 
new file mode 100644
--- /dev/null
+++ b/test/acceptance/as3/Workers/ByteArrayWorkersLzma.as
@@ -0,0 +1,101 @@
+/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+package {
+
+    import flash.utils.ByteArray
+    import flash.system.Worker
+    import flash.system.WorkerDomain
+    import avmplus.System
+
+    var MSG_NEW=0
+    var MSG_READY=1
+    var MSG_COMPRESS=2
+    var MSG_COMPRESS_ACK=3
+    var MSG_UNCOMPRESS=4
+    var MSG_UNCOMPRESS_ACK=5
+    var MSG_DONE=7;
+
+    var data:ByteArray;
+    var message:ByteArray;
+    var done:Boolean=false;
+
+    if (Worker.current.isPrimordial) {
+        var SECTION = "Workers";
+        var VERSION = "as3";
+        var TITLE   = "test ByteArray uncompress/compress across workers";
+        startTest();
+        writeHeaderToLog(SECTION+" "+TITLE);
+
+        data=new ByteArray();
+
+    // test 1: compress shareable bytearray
+        data.shareable=true;
+        data.writeUTF("hello world");
+        AddTestCase("shareable byte array writeUTF initial value","hello world",data.toString().substring(2));
+        AddTestCase("shareable byte array writeUTF initial length",13,data.length);
+        print("initial: "+data.length+" "+data.toString().substring(2));
+        data.compress("lzma");
+        AddTestCase("shareable byte array compress alters data",true,"hello world"!=data.toString().substring(2));
+        AddTestCase("shareable byte array compress length",31,data.length);
+        print("compress: "+data.length+" "+data.toString().substring(2));
+        data.uncompress("lzma");
+        AddTestCase("shareable byte array uncompress restores initial value","hello world",data.toString().substring(2));
+        AddTestCase("shareable byte array uncompress restores initial length",13,data.length);
+
+        // test 2: compress/decompress the byte array, test the remote byte array can see the change
+        message=new ByteArray();
+        message[0]=MSG_NEW;  // status from main
+        message[1]=MSG_NEW;  // status from background
+        message.shareable=true;
+
+        var worker:Worker=WorkerDomain.current.createWorkerFromPrimordial();
+        worker.setSharedProperty("message",message);
+        worker.setSharedProperty("data",data);
+        worker.start();
+        message[0]=MSG_READY;
+    
+        while (!done) {
+            if (message[1]==MSG_READY) {
+                print("[0] ready "+data.length+" "+data.toString().substring(2));
+                AddTestCase("shareable byte array after worker start initial value","hello world",data.toString().substring(2));
+                AddTestCase("shareable byte array after worker start initial length",13,data.length);
+                message[1]=MSG_COMPRESS;
+            } else if (message[1]==MSG_COMPRESS_ACK) {
+                print("[0] compressed "+data.length+" "+data);
+                AddTestCase("shareable byte array after worker compress alters data",true,"hello world"!=data.toString().substring(2));
+                AddTestCase("shareable byte array after worker compress length",31,data.length);
+                message[1]=MSG_UNCOMPRESS;
+            } else if (message[1]==MSG_UNCOMPRESS_ACK) {
+                print("[0] uncompressed "+data.length+" "+data.toString().substring(2));
+                AddTestCase("shareable byte array after worker uncompress restores initial value","hello world",data.toString().substring(2));
+                AddTestCase("shareable byte array after worker uncompress restores initial length",13,data.length);
+                message[1]=MSG_DONE;
+                break;
+            }
+        }
+        test();
+    } else {
+        print("in background worker");
+        message=Worker.current.getSharedProperty("message");
+        data=Worker.current.getSharedProperty("data");
+        message[1]=MSG_READY;
+
+        while (true) {
+            if (message[1]==MSG_COMPRESS) {
+                data.compress("lzma");
+                message[1]=MSG_COMPRESS_ACK;
+            } else if (message[1]==MSG_UNCOMPRESS) {
+                data.uncompress("lzma");
+                message[1]=MSG_UNCOMPRESS_ACK;
+            } else if (message[1]==MSG_DONE) {
+                break;
+            }
+        }
+        print("[1] done");
+        Worker.current.terminate();
+    }
+}
--- a/vmbase/Safepoint.cpp
+++ b/vmbase/Safepoint.cpp
@@ -59,16 +59,19 @@ namespace vmbase {
         , m_requester((vmpi_thread_t) 0)
         , m_hardwareConcurrency(VMPI_processorQtyAtBoot())
     {
     }
 
     SafepointManager::~SafepointManager()
     {
         assert(m_records == NULL);
+		// If a SafepointRecord is left on the list due to OOM not calling the leave() method, NULL it out
+        if(m_records != NULL)
+	        SafepointRecord::setCurrent(NULL);
     }
 
     void SafepointManager::requestSafepointTask(SafepointTask& task)
     {
         assert(SafepointRecord::hasCurrent());
         assert(SafepointRecord::current()->m_manager == this);
         assert(!inSafepointTask());
 
--- a/vmbase/VMThread.h
+++ b/vmbase/VMThread.h
@@ -457,16 +457,17 @@ namespace vmbase {
         ~MonitorLocker();
 
         // These functions just delegate to those of the locked WaitNotifyMonitor.
         // See WaitNotifyMonitor for their documentation.
         void wait();
         bool wait(int32_t timeout_millis);
         void notify();
         void notifyAll();
+        WaitNotifyMonitor* getMonitor() const { return &m_monitor; }
 
         operator bool () const {return false;} // For the SCOPE_LOCK* macros
 
     private:
         // No copying allowed: undefined semantics
 #ifdef VMCFG_SAFEPOINTS
         const MonitorLocker<BLOCKING_MODE>& operator=(const MonitorLocker<BLOCKING_MODE>& locker);
 #else