Bug 951827 - Support absolute jumps in hooks (rollup patch). r=ehsan, a=sledru
☠☠ backed out by cf50055d30af ☠ ☠
authorDavid Major <dmajor@mozilla.com>
Mon, 24 Feb 2014 10:32:36 -0500
changeset 182970 638b67508a3e70764c4afc2e1c366ce6cf54729f
parent 182969 70aded42520e99b33c9539b376ac330fb7939657
child 182971 1ed9ed99b7635bd7bbbc6f24a60f409b72868d06
push id3343
push userffxbld
push dateMon, 17 Mar 2014 21:55:32 +0000
treeherdermozilla-beta@2f7d3415f79f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersehsan, sledru
bugs951827
milestone29.0a2
Bug 951827 - Support absolute jumps in hooks (rollup patch). r=ehsan, a=sledru
mozglue/build/WindowsDllBlocklist.cpp
toolkit/xre/nsWindowsDllInterceptor.h
toolkit/xre/test/win/TestDllInterceptor.cpp
--- a/mozglue/build/WindowsDllBlocklist.cpp
+++ b/mozglue/build/WindowsDllBlocklist.cpp
@@ -609,17 +609,18 @@ DllBlocklist_Initialize()
   if (GetModuleHandleA("user32.dll")) {
     sUser32BeforeBlocklist = true;
   }
 
   NtDllIntercept.Init("ntdll.dll");
 
   ReentrancySentinel::InitializeStatics();
 
-  bool ok = NtDllIntercept.AddHook("LdrLoadDll", reinterpret_cast<intptr_t>(patched_LdrLoadDll), (void**) &stub_LdrLoadDll);
+  // Use a shared hook since other software may also hook this API (bug 951827)
+  bool ok = NtDllIntercept.AddSharedHook("LdrLoadDll", reinterpret_cast<intptr_t>(patched_LdrLoadDll), (void**) &stub_LdrLoadDll);
 
   if (!ok) {
     sBlocklistInitFailed = true;
 #ifdef DEBUG
     printf_stderr ("LdrLoadDll hook failed, no dll blocklisting active\n");
 #endif
   }
 }
--- a/toolkit/xre/nsWindowsDllInterceptor.h
+++ b/toolkit/xre/nsWindowsDllInterceptor.h
@@ -36,18 +36,23 @@
  * Unfortunately nop space patching doesn't work on functions which don't have
  * this magic prelude (and in particular, x86-64 never has the prelude).  So
  * when we can't use the built-in nop space, we fall back to using a detour,
  * which works as follows:
  *
  * 1. Save first N bytes of OrigFunction to trampoline, where N is a
  *    number of bytes >= 5 that are instruction aligned.
  *
- * 2. Replace first 5 bytes of OrigFunction with a jump to the Hook
+ * 2. (Usually) Replace first 5 bytes of OrigFunction with a jump to the hook
  *    function.
+ *    (Special "shared" mode) Replace first 6 bytes of OrigFunction with an
+ *    indirect jump to the hook function. "Shared" means that other software
+ *    also tries to hook the same function. The indirect jump uses an absolute
+ *    address, which allows us to coexist with other hooks that don't know how
+ *    to relocate our 5-byte PC-relative jump.
  *
  * 3. After N bytes of the trampoline, add a jump to OrigFunction+N to
  *    continue original program flow.
  *
  * 4. Hook function needs to call the trampoline during its execution,
  *    to invoke the original function (so address of trampoline is
  *    returned).
  *
@@ -71,29 +76,36 @@ class WindowsDllNopSpacePatcher
   HMODULE mModule;
 
   // Dumb array for remembering the addresses of functions we've patched.
   // (This should be nsTArray, but non-XPCOM code uses this class.)
   static const size_t maxPatchedFns = 128;
   byteptr_t mPatchedFns[maxPatchedFns];
   int mPatchedFnsLen;
 
+  static const uint16_t opTrampolineShortJump = 0xf9eb;
+
 public:
   WindowsDllNopSpacePatcher()
     : mModule(0)
     , mPatchedFnsLen(0)
   {}
 
   ~WindowsDllNopSpacePatcher()
   {
     // Restore the mov edi, edi to the beginning of each function we patched.
 
     for (int i = 0; i < mPatchedFnsLen; i++) {
       byteptr_t fn = mPatchedFns[i];
 
+      // If other code has changed this function, it is not safe to modify.
+      if (*((uint16_t*)fn) != opTrampolineShortJump) {
+        continue;
+      }
+
       // Ensure we can write to the code.
       DWORD op;
       if (!VirtualProtectEx(GetCurrentProcess(), fn, 2, PAGE_EXECUTE_READWRITE, &op)) {
         // printf("VirtualProtectEx failed! %d\n", GetLastError());
         continue;
       }
 
       // mov edi, edi
@@ -185,17 +197,17 @@ public:
     fn[-5] = 0xe9; // jmp
     *((intptr_t*)(fn - 4)) = hookDest - (uintptr_t)(fn); // target displacement
 
     // Set origFunc here, because after this point, hookDest might be called,
     // and hookDest might use the origFunc pointer.
     *origFunc = fn + 2;
 
     // Short jump up into our long jump.
-    *((uint16_t*)(fn)) = 0xf9eb; // jmp $-5
+    *((uint16_t*)(fn)) = opTrampolineShortJump; // jmp $-5
 
     // I think this routine is safe without this, but it can't hurt.
     FlushInstructionCache(GetCurrentProcess(),
                           /* ignored */ nullptr,
                           /* ignored */ 0);
 
     return true;
   }
@@ -207,16 +219,22 @@ public:
   }
 #endif
 };
 
 class WindowsDllDetourPatcher
 {
   typedef unsigned char *byteptr_t;
 public:
+  enum JumpType
+  {
+    JUMP_DONTCARE,
+    JUMP_ABSOLUTE
+  };
+
   WindowsDllDetourPatcher() 
     : mModule(0), mHookPage(0), mMaxHooks(0), mCurHooks(0)
   {
   }
 
   ~WindowsDllDetourPatcher()
   {
     int i;
@@ -224,28 +242,54 @@ public:
     for (i = 0, p = mHookPage; i < mCurHooks; i++, p += kHookSize) {
 #if defined(_M_IX86)
       size_t nBytes = 1 + sizeof(intptr_t);
 #elif defined(_M_X64)
       size_t nBytes = 2 + sizeof(intptr_t);
 #else
 #error "Unknown processor type"
 #endif
-      byteptr_t origBytes = *((byteptr_t *)p);
+      Trampoline *tramp = (Trampoline*)p;
+      byteptr_t origBytes = (byteptr_t)tramp->origFunction;
+
+      // If CreateTrampoline failed, we may have an empty trampoline.
+      if (!origBytes) {
+        continue;
+      }
+
+      // If other code has changed this function, it is not safe to modify.
+#if defined(_M_IX86)
+      if (tramp->jumpType != JUMP_ABSOLUTE &&
+          *origBytes != opTrampolineRelativeJump) {
+        continue;
+      }
+#elif defined(_M_X64)
+      if (*((uint16_t*)origBytes) != opTrampolineRegLoad) {
+        continue;
+      }
+#else
+#error "Unknown processor type"
+#endif
+
       // ensure we can modify the original code
       DWORD op;
       if (!VirtualProtectEx(GetCurrentProcess(), origBytes, nBytes, PAGE_EXECUTE_READWRITE, &op)) {
         //printf ("VirtualProtectEx failed! %d\n", GetLastError());
         continue;
       }
       // Remove the hook by making the original function jump directly
       // in the trampoline.
-      intptr_t dest = (intptr_t)(p + sizeof(void *));
+      intptr_t dest = (intptr_t)(&tramp->code[0]);
 #if defined(_M_IX86)
-      *((intptr_t*)(origBytes+1)) = dest - (intptr_t)(origBytes+5); // target displacement
+      if (tramp->jumpType == JUMP_ABSOLUTE) {
+        // Absolute jumps on x86 are done indirectly via tramp->jumpTarget
+        tramp->jumpTarget = dest;
+      } else {
+        *((intptr_t*)(origBytes+1)) = dest - (intptr_t)(origBytes+5); // target displacement
+      }
 #elif defined(_M_X64)
       *((intptr_t*)(origBytes+2)) = dest;
 #else
 #error "Unknown processor type"
 #endif
       // restore protection; if this fails we can't really do anything about it
       VirtualProtectEx(GetCurrentProcess(), origBytes, nBytes, op, &op);
     }
@@ -290,62 +334,80 @@ public:
       return;
 
     DWORD op;
     VirtualProtectEx(GetCurrentProcess(), mHookPage, mMaxHooks * kHookSize, PAGE_EXECUTE_READ, &op);
 
     mModule = 0;
   }
 
-  bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
+  bool AddHook(const char *pname, intptr_t hookDest, JumpType jumpType, void **origFunc)
   {
     if (!mModule)
       return false;
 
     void *pAddr = (void *) GetProcAddress(mModule, pname);
     if (!pAddr) {
       //printf ("GetProcAddress failed\n");
       return false;
     }
 
-    CreateTrampoline(pAddr, hookDest, origFunc);
+    CreateTrampoline(pAddr, hookDest, jumpType, origFunc);
     if (!*origFunc) {
       //printf ("CreateTrampoline failed\n");
       return false;
     }
 
     return true;
   }
 
 protected:
   const static int kPageSize = 4096;
   const static int kHookSize = 128;
+  const static int kCodeSize = 100;
+
+  const static uint8_t opTrampolineRelativeJump = 0xe9;
+  const static uint16_t opTrampolineIndirectJump = 0x25ff;
+  const static uint16_t opTrampolineRegLoad = 0xbb49;
 
   HMODULE mModule;
   byteptr_t mHookPage;
   int mMaxHooks;
   int mCurHooks;
 
+  struct Trampoline
+  {
+    void *origFunction;
+    JumpType jumpType;
+    intptr_t jumpTarget;
+    uint8_t code[kCodeSize];
+  };
+
+  static_assert(sizeof(Trampoline) <= kHookSize, "Trampolines too big");
+
   void CreateTrampoline(void *origFunction,
                         intptr_t dest,
+                        JumpType jumpType,
                         void **outTramp)
   {
     *outTramp = nullptr;
 
-    byteptr_t tramp = FindTrampolineSpace();
+    Trampoline *tramp = FindTrampolineSpace();
     if (!tramp)
       return;
 
     byteptr_t origBytes = (byteptr_t) origFunction;
 
     int nBytes = 0;
     int pJmp32 = -1;
 
 #if defined(_M_IX86)
-    while (nBytes < 5) {
+    const int bytesNeeded = (jumpType == JUMP_ABSOLUTE) ? 6 : 5;
+
+    while (nBytes < bytesNeeded) {
       // Understand some simple instructions that might be found in a
       // prologue; we might need to extend this as necessary.
       //
       // Note!  If we ever need to understand jump instructions, we'll
       // need to rewrite the displacement argument.
       if (origBytes[nBytes] >= 0x88 && origBytes[nBytes] <= 0x8B) {
         // various MOVs
         unsigned char b = origBytes[nBytes+1];
@@ -379,20 +441,32 @@ protected:
         // PUSH with 4-byte operand
         nBytes += 5;
       } else if ((origBytes[nBytes] & 0xf0) == 0x50) {
         // 1-byte PUSH/POP
         nBytes++;
       } else if (origBytes[nBytes] == 0x6A) {
         // PUSH imm8
         nBytes += 2;
+      } else if (origBytes[nBytes] == 0xa1) {
+        // MOV EAX, dword ptr [m32]
+        nBytes += 5;
       } else if (origBytes[nBytes] == 0xe9) {
         pJmp32 = nBytes;
         // jmp 32bit offset
         nBytes += 5;
+      } else if (origBytes[nBytes] == 0xf6 &&
+                 origBytes[nBytes+1] == 0x05) {
+        // TEST byte ptr [m32], imm8
+        nBytes += 7;
+      } else if (origBytes[nBytes] == 0xff &&
+                 origBytes[nBytes+1] == 0x25) {
+        // JMP dword ptr [m32]
+        // This is an indirect absolute jump; don't set pJmp32
+        nBytes += 6;
       } else {
         //printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n", origBytes[nBytes]);
         return;
       }
     }
 #elif defined(_M_X64)
     byteptr_t directJmpAddr;
 
@@ -534,116 +608,121 @@ protected:
       } else {
         return;
       }
     }
 #else
 #error "Unknown processor type"
 #endif
 
-    if (nBytes > 100) {
+    if (nBytes > kCodeSize) {
       //printf ("Too big!");
       return;
     }
 
     // We keep the address of the original function in the first bytes of
     // the trampoline buffer
-    *((void **)tramp) = origFunction;
-    tramp += sizeof(void *);
+    tramp->origFunction = origFunction;
+    tramp->jumpType = jumpType;
+    tramp->jumpTarget = dest;
 
-    memcpy(tramp, origFunction, nBytes);
+    memcpy(&tramp->code[0], origFunction, nBytes);
 
     // OrigFunction+N, the target of the trampoline
     byteptr_t trampDest = origBytes + nBytes;
 
 #if defined(_M_IX86)
     if (pJmp32 >= 0) {
       // Jump directly to the original target of the jump instead of jumping to the
       // original function.
       // Adjust jump target displacement to jump location in the trampoline.
-      *((intptr_t*)(tramp+pJmp32+1)) += origBytes + pJmp32 - tramp;
+      *((intptr_t*)(&tramp->code[pJmp32+1])) += origBytes + pJmp32 - &tramp->code[0];
     } else {
-      tramp[nBytes] = 0xE9; // jmp
-      *((intptr_t*)(tramp+nBytes+1)) = (intptr_t)trampDest - (intptr_t)(tramp+nBytes+5); // target displacement
+      tramp->code[nBytes] = opTrampolineRelativeJump; // jmp
+      *((intptr_t*)(&tramp->code[nBytes+1])) = (intptr_t)trampDest - (intptr_t)(&tramp->code[nBytes+5]); // target displacement
     }
 #elif defined(_M_X64)
     // If JMP32 opcode found, we don't insert to trampoline jump 
     if (pJmp32 >= 0) {
       // mov r11, address
-      tramp[pJmp32]   = 0x49;
-      tramp[pJmp32+1] = 0xbb;
-      *((intptr_t*)(tramp+pJmp32+2)) = (intptr_t)directJmpAddr;
+      *((uint16_t*)(&tramp->code[pJmp32])) = opTrampolineRegLoad;
+      *((intptr_t*)(&tramp->code[pJmp32+2])) = (intptr_t)directJmpAddr;
 
       // jmp r11
-      tramp[pJmp32+10] = 0x41;
-      tramp[pJmp32+11] = 0xff;
-      tramp[pJmp32+12] = 0xe3;
+      tramp->code[pJmp32+10] = 0x41;
+      tramp->code[pJmp32+11] = 0xff;
+      tramp->code[pJmp32+12] = 0xe3;
     } else {
       // mov r11, address
-      tramp[nBytes] = 0x49;
-      tramp[nBytes+1] = 0xbb;
-      *((intptr_t*)(tramp+nBytes+2)) = (intptr_t)trampDest;
+      *((uint16_t*)(&tramp->code[nBytes])) = opTrampolineRegLoad;
+      *((intptr_t*)(&tramp->code[nBytes+2])) = (intptr_t)trampDest;
 
       // jmp r11
-      tramp[nBytes+10] = 0x41;
-      tramp[nBytes+11] = 0xff;
-      tramp[nBytes+12] = 0xe3;
+      tramp->code[nBytes+10] = 0x41;
+      tramp->code[nBytes+11] = 0xff;
+      tramp->code[nBytes+12] = 0xe3;
     }
 #endif
 
     // The trampoline is now valid.
-    *outTramp = tramp;
+    *outTramp = &tramp->code[0];
 
     // ensure we can modify the original code
     DWORD op;
     if (!VirtualProtectEx(GetCurrentProcess(), origFunction, nBytes, PAGE_EXECUTE_READWRITE, &op)) {
       //printf ("VirtualProtectEx failed! %d\n", GetLastError());
       return;
     }
 
 #if defined(_M_IX86)
     // now modify the original bytes
-    origBytes[0] = 0xE9; // jmp
-    *((intptr_t*)(origBytes+1)) = dest - (intptr_t)(origBytes+5); // target displacement
+    if (jumpType == JUMP_ABSOLUTE) {
+      // Indirect jump with absolute address of pointer
+      // jmp dword ptr [&tramp->jumpTarget]
+      *((uint16_t*)(origBytes)) = opTrampolineIndirectJump;
+      *((intptr_t*)(origBytes+2)) = (intptr_t)&tramp->jumpTarget;
+    } else {
+      origBytes[0] = opTrampolineRelativeJump; // jmp rel32
+      *((intptr_t*)(origBytes+1)) = dest - (intptr_t)(origBytes+5); // target displacement
+    }
 #elif defined(_M_X64)
     // mov r11, address
-    origBytes[0] = 0x49;
-    origBytes[1] = 0xbb;
-
+    *((uint16_t*)(origBytes)) = opTrampolineRegLoad;
     *((intptr_t*)(origBytes+2)) = dest;
 
     // jmp r11
     origBytes[10] = 0x41;
     origBytes[11] = 0xff;
     origBytes[12] = 0xe3;
 #endif
 
     // restore protection; if this fails we can't really do anything about it
     VirtualProtectEx(GetCurrentProcess(), origFunction, nBytes, op, &op);
   }
 
-  byteptr_t FindTrampolineSpace()
+  Trampoline* FindTrampolineSpace()
   {
     if (mCurHooks >= mMaxHooks)
-      return 0;
+      return nullptr;
 
     byteptr_t p = mHookPage + mCurHooks*kHookSize;
 
     mCurHooks++;
 
-    return p;
+    return (Trampoline*)p;
   }
 };
 
 } // namespace internal
 
 class WindowsDllInterceptor
 {
   internal::WindowsDllNopSpacePatcher mNopSpacePatcher;
   internal::WindowsDllDetourPatcher mDetourPatcher;
+  typedef internal::WindowsDllDetourPatcher::JumpType JumpType;
 
   const char *mModuleName;
   int mNHooks;
 
 public:
   WindowsDllInterceptor()
     : mModuleName(nullptr)
     , mNHooks(0)
@@ -681,17 +760,39 @@ public:
       return true;
     }
 
     if (!mDetourPatcher.Initialized()) {
       // printf("Initializing detour patcher.\n");
       mDetourPatcher.Init(mModuleName, mNHooks);
     }
 
-    bool rv = mDetourPatcher.AddHook(pname, hookDest, origFunc);
+    bool rv = mDetourPatcher.AddHook(pname, hookDest, JumpType::JUMP_DONTCARE,
+                                     origFunc);
+
     // printf("detourPatcher returned %d\n", rv);
     return rv;
   }
+
+  bool AddSharedHook(const char *pname, intptr_t hookDest, void **origFunc)
+  {
+    if (!mModuleName) {
+      return false;
+    }
+
+    // Skip the nop-space patcher and use only the detour patcher. Nop-space
+    // patches use relative jumps, which are not safe to share.
+
+    if (!mDetourPatcher.Initialized()) {
+      mDetourPatcher.Init(mModuleName, mNHooks);
+    }
+
+    bool rv = mDetourPatcher.AddHook(pname, hookDest, JumpType::JUMP_ABSOLUTE,
+                                     origFunc);
+
+    return rv;
+  }
+
 };
 
 } // namespace mozilla
 
 #endif /* NS_WINDOWS_DLL_INTERCEPTOR_H_ */
--- a/toolkit/xre/test/win/TestDllInterceptor.cpp
+++ b/toolkit/xre/test/win/TestDllInterceptor.cpp
@@ -33,16 +33,41 @@ static payload (*orig_rotatePayload)(pay
 
 static payload
 patched_rotatePayload(payload p)
 {
   patched_func_called = true;
   return orig_rotatePayload(p);
 }
 
+__declspec(noinline) bool AlwaysTrue(int, int, int, int, int, int) {
+  // Dummy function that makes the caller recognizable by the detour patcher
+  return true;
+}
+
+extern "C" __declspec(dllexport) __declspec(noinline) uint32_t SetBits(uint32_t x)
+{
+  if (AlwaysTrue(1, 2, 3, 4, 5, 6)) {
+    return x | 0x11;
+  }
+  return 0;
+}
+
+static uint32_t (*orig_SetBits_early)(uint32_t);
+static uint32_t patched_SetBits_early(uint32_t x)
+{
+  return orig_SetBits_early(x) | 0x2200;
+}
+
+static uint32_t (*orig_SetBits_late)(uint32_t);
+static uint32_t patched_SetBits_late(uint32_t x)
+{
+  return orig_SetBits_late(x) | 0x330000;
+}
+
 bool TestHook(const char *dll, const char *func)
 {
   void *orig_func;
   bool successful = false;
   {
     WindowsDllInterceptor TestIntercept;
     TestIntercept.Init(dll);
     successful = TestIntercept.AddHook(func, 0, &orig_func);
@@ -52,16 +77,35 @@ bool TestHook(const char *dll, const cha
     printf("TEST-PASS | WindowsDllInterceptor | Could hook %s from %s\n", func, dll);
     return true;
   } else {
     printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Failed to hook %s from %s\n", func, dll);
     return false;
   }
 }
 
+bool TestSharedHook(const char *dll, const char *func)
+{
+  void *orig_func;
+  bool successful = false;
+  {
+    WindowsDllInterceptor TestIntercept;
+    TestIntercept.Init(dll);
+    successful = TestIntercept.AddSharedHook(func, 0, &orig_func);
+  }
+
+  if (successful) {
+    printf("TEST-PASS | WindowsDllInterceptor | Could hook (shared) %s from %s\n", func, dll);
+    return true;
+  } else {
+    printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Failed to hook (shared) %s from %s\n", func, dll);
+    return false;
+  }
+}
+
 int main()
 {
   payload initial = { 0x12345678, 0xfc4e9d31, 0x87654321 };
   payload p0, p1;
   ZeroMemory(&p0, sizeof(p0));
   ZeroMemory(&p1, sizeof(p1));
 
   p0 = rotatePayload(initial);
@@ -107,16 +151,76 @@ int main()
 
   if (p0 == p1) {
     printf("TEST-PASS | WindowsDllInterceptor | Original function worked properly\n");
   } else {
     printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Original function didn't return the right information\n");
     return 1;
   }
 
+#ifdef _M_IX86
+  // The x64 detour patcher does understand the assembly code of SetBits.
+  // We only need these shared hook tests on x86 anyway, because shared hooks
+  // are the same as regular hooks on x64.
+
+  // Despite the noinline annotation, the compiler may try to re-use the
+  // return value of SetBits(0). Force it to call the function every time.
+  uint32_t (*volatile SetBitsVolatile)(uint32_t) = SetBits;
+
+  {
+    WindowsDllInterceptor ExeInterceptEarly;
+    ExeInterceptEarly.Init("TestDllInterceptor.exe");
+    if (ExeInterceptEarly.AddSharedHook("SetBits", reinterpret_cast<intptr_t>(patched_SetBits_early), (void**) &orig_SetBits_early)) {
+      printf("TEST-PASS | WindowsDllInterceptor | Early hook added\n");
+    } else {
+      printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Failed to add early hook\n");
+      return 1;
+    }
+
+    if (SetBitsVolatile(0) == 0x2211) {
+      printf("TEST-PASS | WindowsDllInterceptor | Early hook was called\n");
+    } else {
+      printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Early hook was not called\n");
+      return 1;
+    }
+
+    {
+      WindowsDllInterceptor ExeInterceptLate;
+      ExeInterceptLate.Init("TestDllInterceptor.exe");
+      if (ExeInterceptLate.AddHook("SetBits", reinterpret_cast<intptr_t>(patched_SetBits_late), (void**) &orig_SetBits_late)) {
+        printf("TEST-PASS | WindowsDllInterceptor | Late hook added\n");
+      } else {
+        printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Failed to add late hook\n");
+        return 1;
+      }
+
+      if (SetBitsVolatile(0) == 0x332211) {
+        printf("TEST-PASS | WindowsDllInterceptor | Late hook was called\n");
+      } else {
+        printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Late hook was not called\n");
+        return 1;
+      }
+    }
+
+    if (SetBitsVolatile(0) == 0x2211) {
+      printf("TEST-PASS | WindowsDllInterceptor | Late hook was unregistered\n");
+    } else {
+      printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Late hook was not unregistered\n");
+      return 1;
+    }
+  }
+
+  if (SetBitsVolatile(0) == 0x11) {
+    printf("TEST-PASS | WindowsDllInterceptor | Early hook was unregistered\n");
+  } else {
+    printf("TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Early hook was not unregistered\n");
+    return 1;
+  }
+#endif
+
   if (TestHook("user32.dll", "GetWindowInfo") &&
 #ifdef _WIN64
       TestHook("user32.dll", "SetWindowLongPtrA") &&
       TestHook("user32.dll", "SetWindowLongPtrW") &&
 #else
       TestHook("user32.dll", "SetWindowLongA") &&
       TestHook("user32.dll", "SetWindowLongW") &&
 #endif
@@ -134,15 +238,18 @@ int main()
       // Bug 733892: toolkit/crashreporter/nsExceptionHandler.cpp
       TestHook("kernel32.dll", "SetUnhandledExceptionFilter") &&
 #ifdef _M_IX86
       // Bug 670967: xpcom/base/AvailableMemoryTracker.cpp
       TestHook("kernel32.dll", "VirtualAlloc") &&
       TestHook("kernel32.dll", "MapViewOfFile") &&
       TestHook("gdi32.dll", "CreateDIBSection") &&
 #endif
+#ifdef _M_IX86 // Shared hooks are the same as regular hooks on x64
+      TestSharedHook("ntdll.dll", "LdrLoadDll") &&
+#endif
       TestHook("ntdll.dll", "LdrLoadDll")) {
     printf("TEST-PASS | WindowsDllInterceptor | all checks passed\n");
     return 0;
   }
 
   return 1;
 }