LCMS precaching patch to speed up linear interpolations - bug 444661. r=joe,sr=vlad
authorBobby Holley <bholley@mozilla.com>
Fri, 15 Aug 2008 13:59:18 -0700
changeset 16720 ba60f155cc44c4d02f91c0982be2593936206367
parent 16719 7a37832c471aeb052e8f3291c41c2c9807855321
child 16721 8410e5981323f5465fa54289788813d509eeb1dc
push idunknown
push userunknown
push dateunknown
reviewersjoe, vlad
bugs444661
milestone1.9.1a2pre
LCMS precaching patch to speed up linear interpolations - bug 444661. r=joe,sr=vlad
gfx/thebes/src/gfxPlatform.cpp
gfx/thebes/test/gfxColorManagementTest.cpp
modules/lcms/NEWS
modules/lcms/include/lcms.h
modules/lcms/src/Makefile.in
modules/lcms/src/cmsintrp.c
modules/lcms/src/cmsio1.c
modules/lcms/src/cmsmatsh.c
modules/lcms/src/cmsprecache.c
modules/lcms/src/cmsxform.c
modules/lcms/src/lcms.def
--- a/gfx/thebes/src/gfxPlatform.cpp
+++ b/gfx/thebes/src/gfxPlatform.cpp
@@ -554,26 +554,37 @@ gfxPlatform::GetCMSOutputProfile()
         if (!gCMSOutputProfile) {
             gCMSOutputProfile =
                 gfxPlatform::GetPlatform()->GetPlatformCMSOutputProfile();
         }
 
         if (!gCMSOutputProfile) {
             gCMSOutputProfile = GetCMSsRGBProfile();
         }
+
+        /* Precache the LUT16 Interpolations for the output profile. See 
+           bug 444661 for details. */
+        cmsPrecacheProfile(gCMSOutputProfile, CMS_PRECACHE_LI1616_REVERSE);
     }
 
     return gCMSOutputProfile;
 }
 
 cmsHPROFILE
 gfxPlatform::GetCMSsRGBProfile()
 {
-    if (!gCMSsRGBProfile)
+    if (!gCMSsRGBProfile) {
+
+        /* Create the profile using lcms. */
         gCMSsRGBProfile = cmsCreate_sRGBProfile();
+
+        /* Precache the Fixed-point Interpolations for sRGB as an input
+           profile. See bug 444661 for details. */
+        cmsPrecacheProfile(gCMSsRGBProfile, CMS_PRECACHE_LI16W_FORWARD);
+    }
     return gCMSsRGBProfile;
 }
 
 cmsHTRANSFORM
 gfxPlatform::GetCMSRGBTransform()
 {
     if (!gCMSRGBTransform) {
         cmsHPROFILE inProfile, outProfile;
--- a/gfx/thebes/test/gfxColorManagementTest.cpp
+++ b/gfx/thebes/test/gfxColorManagementTest.cpp
@@ -293,16 +293,20 @@ RunTest(struct TestContext *ctx, struct 
     strcpy(filePath, ctx->basePath);
     strcat(filePath, "/");
     strcat(filePath, profileDir);
     strcat(filePath, "/");
     strcat(filePath, params->oProfileName);
     outProfile = cmsOpenProfileFromFile(filePath, "r");
     CHECK(outProfile != NULL, status, -1, "unable to open input profile!\n", done);
 
+    /* Precache. */
+    cmsPrecacheProfile(inProfile, CMS_PRECACHE_LI16W_FORWARD);
+    cmsPrecacheProfile(outProfile, CMS_PRECACHE_LI1616_REVERSE);
+
     /* Create the transform. */
     transform = cmsCreateTransform(inProfile, TYPE_RGB_8, 
                                    outProfile, TYPE_RGB_8, 
                                    testedIntents[intentIndex], 0);
     CHECK(transform != NULL, status, -1, "unable to create transform!\n", done);
 
     /* Do the transform. */
     cmsDoTransform(transform, ctx->src, ctx->dst, BITMAP_PIXEL_COUNT);
--- a/modules/lcms/NEWS
+++ b/modules/lcms/NEWS
@@ -1,8 +1,16 @@
+ New in Mozilla patchset
+ =======================
+ -A precaching framework has been added, currently with support for precaching
+ linear interpolations of various formats
+ -inline assembly for linear interpolation has been removed on the grounds
+ that it is actually much slower than generated code as a result of 'div'
+ instructions
+ 
 
  New in ver 1.17
  ===============
   
 Changes in API
 ----------------------
 
 WIN64 support
--- a/modules/lcms/include/lcms.h
+++ b/modules/lcms/include/lcms.h
@@ -66,16 +66,29 @@
 // Uncomment this line on multithreading environments
 // #define USE_PTHREADS    1
 
 // Uncomment this line if you want lcms to use the black point tag in profile, 
 // if commented, lcms will compute the black point by its own. 
 // It is safer to leve it commented out
 // #define HONOR_BLACK_POINT_TAG    1
 
+// Define CMS_DEBUG before including lcms.h to turn on asserts
+#ifdef CMS_DEBUG
+#include <stdio.h>
+#define CMSASSERT(x) \
+{   if (!(x)) { \
+        fprintf(stderr, "CMS Assertion Failed: %s:%d\n", __FILE__, __LINE__);\
+        exit(-1); \
+    } \
+}
+#else
+#define CMSASSERT(x)
+#endif
+
 // ********** End of configuration toggles ******************************
 
 #define LCMS_VERSION        117
 
 // Microsoft VisualC++
 
 // Deal with Microsoft's attempt at deprecating C standard runtime functions 
 #ifdef _MSC_VER
@@ -1447,17 +1460,17 @@ typedef struct {
        } _cmsTestAlign8;
 
 #define SIZEOF_UINT8_ALIGNED (sizeof(_cmsTestAlign8) - sizeof(icS15Fixed16Number))
 
 
 // Fixed point
 
 
-typedef icInt32Number Fixed32;       // Fixed 15.16 whith sign
+typedef icInt32Number Fixed32, *LPFixed32;    // Fixed 15.16 whith sign
 
 #define INT_TO_FIXED(x)         ((x)<<16)
 #define DOUBLE_TO_FIXED(x)      ((Fixed32) ((x)*65536.0+0.5))
 #define FIXED_TO_INT(x)         ((x)>>16)
 #define FIXED_REST_TO_INT(x)    ((x)& 0xFFFFU)
 #define FIXED_TO_DOUBLE(x)      (((double)x)/65536.0)
 #define ROUND_FIXED_TO_INT(x)   (((x)+0x8000)>>16)
 
@@ -1690,16 +1703,97 @@ LPGAMMATABLE   cdecl cmsConvertSampledCu
 
 void           cdecl cmsEndpointsOfSampledCurve(LPSAMPLEDCURVE p, double* Min, double* Max);
 void           cdecl cmsClampSampledCurve(LPSAMPLEDCURVE p, double Min, double Max);
 LCMSBOOL       cdecl cmsSmoothSampledCurve(LPSAMPLEDCURVE Tab, double SmoothingLambda);
 void           cdecl cmsRescaleSampledCurve(LPSAMPLEDCURVE p, double Min, double Max, int nPoints);
 
 LPSAMPLEDCURVE cdecl cmsJoinSampledCurves(LPSAMPLEDCURVE X, LPSAMPLEDCURVE Y, int nResultingPoints);
 
+
+// Precache
+
+/*
+ * Type specifier for precaches
+ * 
+ * Naming Convention: CMS_PRECACHE_{KIND}{IFORMAT}{OFORMAT}_DIRECTION
+ *
+ * Valid Kinds:
+ *   LI - Linear Interpolation
+ *
+ * Valid Formats:
+ *   8 - 8 bit integer
+ *   16 - 16 bit integer
+ *   W - 32 bit fixed point
+ *   F - 32 bit floating point
+ *
+ * Valid Directions:
+ *   FORWARD
+ *   REVERSE
+ */
+
+typedef enum {
+             CMS_PRECACHE_LI1616_REVERSE = 0,   
+             CMS_PRECACHE_LI16W_FORWARD = 1,
+             PRECACHE_TYPE_COUNT
+             } LCMSPRECACHETYPE;
+
+#define IS_LI_REVERSE(Type) ((Type == CMS_PRECACHE_LI1616_REVERSE))
+#define IS_LI_FORWARD(Type) ((Type == CMS_PRECACHE_LI16W_FORWARD))
+
+
+// Implementation structure for a 16 bit to 16 bit linear interpolations
+typedef struct _lcms_precache_li1616_impl {
+
+               // Tables containing the precomputed values
+               LPWORD Cache[3];
+
+               } LCMSPRECACHELI1616IMPL, FAR* LPLCMSPRECACHELI1616IMPL;
+
+// Implementation structure for 16 bit to fixed-point linear interpolations
+typedef struct _lcms_precache_li16w_impl {
+
+               // Tables containing the precomputed values
+               LPFixed32 Cache[3];
+
+               } LCMSPRECACHELI16WIMPL, FAR* LPLCMSPRECACHELI16WIMPL;
+
+// This is a struct containing data related to precached linear interpolations
+// on a profile.
+typedef struct _lcms_precache_struct {
+
+               // This structure is used by transforms to precompute otherwise expensive
+               // per-pixel-channel computations. Ideally, a profile would really always
+               // be around as long as any transform usings its information is around, but
+               // it's more trouble than it's worth to enforce that. Instead, we just use a
+               // simple reference counting scheme.
+               unsigned RefCount;
+
+               // Type of precache - determines the active union member below
+               LCMSPRECACHETYPE Type;
+
+               // Different types of precaches require different structures. We use a union
+               // to handle them with the same code when we can.
+               union {
+                     LCMSPRECACHELI1616IMPL LI1616_REVERSE;
+                     LCMSPRECACHELI16WIMPL  LI16W_FORWARD;
+                     } Impl;
+
+               } LCMSPRECACHE, FAR* LPLCMSPRECACHE;
+
+#define PRECACHE_ADDREF(p) {++p->RefCount;}
+#define PRECACHE_RELEASE(p) {if (--p->RefCount == 0) cmsPrecacheFree(p);}
+
+// Public Precache API
+LCMSAPI LCMSBOOL      LCMSEXPORT cmsPrecacheProfile(cmsHPROFILE hProfile, LCMSPRECACHETYPE Type);
+
+// Internal Precache API
+void    cdecl cmsPrecacheFree(LPLCMSPRECACHE Cache);
+
+
 // Shaper/Matrix handling
 
 #define MATSHAPER_HASMATRIX        0x0001        // Do-ops flags
 #define MATSHAPER_HASSHAPER        0x0002
 #define MATSHAPER_INPUT            0x0004        // Behaviour
 #define MATSHAPER_OUTPUT           0x0008
 #define MATSHAPER_HASINPSHAPER     0x0010
 #define MATSHAPER_ALLSMELTED       (MATSHAPER_INPUT|MATSHAPER_OUTPUT)
@@ -1707,24 +1801,27 @@ LPSAMPLEDCURVE cdecl cmsJoinSampledCurve
 
 typedef struct {
                DWORD dwFlags;
 
                WMAT3 Matrix;
 
                L16PARAMS p16;       // Primary curve
                LPWORD L[3];
+               LPLCMSPRECACHE L_Precache;
                
                L16PARAMS p2_16;     // Secondary curve (used as input in smelted ones)
                LPWORD L2[3];
+               LPLCMSPRECACHE L2_Precache;
 
                } MATSHAPER, FAR* LPMATSHAPER;
 
 LPMATSHAPER cdecl cmsAllocMatShaper(LPMAT3 matrix, LPGAMMATABLE Shaper[], DWORD Behaviour);
-LPMATSHAPER cdecl cmsAllocMatShaper2(LPMAT3 matrix, LPGAMMATABLE In[], LPGAMMATABLE Out[], DWORD Behaviour);
+LPMATSHAPER cdecl cmsAllocMatShaper2(LPMAT3 matrix, LPGAMMATABLE In[], LPLCMSPRECACHE InPrecache,
+                                     LPGAMMATABLE Out[], LPLCMSPRECACHE OutPrecache, DWORD Behavior);
 
 void        cdecl cmsFreeMatShaper(LPMATSHAPER MatShaper);
 void        cdecl cmsEvalMatShaper(LPMATSHAPER MatShaper, WORD In[], WORD Out[]);
 
 LCMSBOOL    cdecl cmsReadICCMatrixRGB2XYZ(LPMAT3 r, cmsHPROFILE hProfile);
 
 LPMATSHAPER cdecl cmsBuildInputMatrixShaper(cmsHPROFILE InputProfile);
 LPMATSHAPER cdecl cmsBuildOutputMatrixShaper(cmsHPROFILE OutputProfile);
@@ -1749,16 +1846,17 @@ void cdecl _cmsIdentifyWhitePoint(char *
 WORD cdecl _cmsQuantizeVal(double i, int MaxSamples);
 
 LPcmsNAMEDCOLORLIST  cdecl cmsAllocNamedColorList(int n);
 int                  cdecl cmsReadICCnamedColorList(cmsHTRANSFORM xform, cmsHPROFILE hProfile, icTagSignature sig);
 void                 cdecl cmsFreeNamedColorList(LPcmsNAMEDCOLORLIST List);
 LCMSBOOL             cdecl cmsAppendNamedColor(cmsHTRANSFORM xform, const char* Name, WORD PCS[3], WORD Colorant[MAXCHANNELS]);
 
 
+
 // I/O
 
 #define MAX_TABLE_TAG       100
 
 // This is the internal struct holding profile details.
 
 typedef struct _lcms_iccprofile_struct {
 
@@ -1795,16 +1893,19 @@ typedef struct _lcms_iccprofile_struct {
 
                char                    PhysicalFile[MAX_PATH];
                
                LCMSBOOL                IsWrite;
                LCMSBOOL                SaveAs8Bits;
 
                struct tm               Created;
 
+               // Precache pointers
+               LPLCMSPRECACHE Precache[PRECACHE_TYPE_COUNT];
+
                // I/O handlers
 
                size_t   (* Read)(void *buffer, size_t size, size_t count, struct _lcms_iccprofile_struct* Icc);
                
                LCMSBOOL (* Seek)(struct _lcms_iccprofile_struct* Icc, size_t offset);
                LCMSBOOL (* Close)(struct _lcms_iccprofile_struct* Icc);
                size_t   (* Tell)(struct _lcms_iccprofile_struct* Icc);
                
--- a/modules/lcms/src/Makefile.in
+++ b/modules/lcms/src/Makefile.in
@@ -66,17 +66,17 @@ endif
 MODULE_OPTIMIZE_FLAGS=-O2
 
 REQUIRES	= $(LCMS_REQUIRES) \
 		  $(NULL)
 
 CSRCS = cmscnvrt.c cmserr.c cmsgamma.c cmsgmt.c cmsintrp.c cmsio1.c \
 	cmslut.c cmsmatsh.c cmsmtrx.c cmspack.c cmspcs.c cmswtpnt.c \
 	cmsxform.c cmssamp.c cmscam97.c cmsnamed.c cmsps2.c cmscam02.c \
-	cmsvirt.c cmscgats.c cmsio0.c
+	cmsvirt.c cmscgats.c cmsio0.c cmsprecache.c
 
 LOCAL_INCLUDES += -I../include
 
 include $(topsrcdir)/config/rules.mk
 
 ifeq ($(OS_ARCH),OS2)
 ADD_TO_DEF_FILE = sed -e '1,/^EXPORTS$$/ d' -e 's,=.*$$,,' -e 's,\ \([_c]\),_\1,' \
 		$(srcdir)/lcms.def >> $(DEF_FILE)
--- a/modules/lcms/src/cmsintrp.c
+++ b/modules/lcms/src/cmsintrp.c
@@ -353,25 +353,22 @@ WORD cmsLinearInterpLUT16(WORD Value, WO
        y1 = LutTable[cell1] ;
 
        y = y0 + (y1 - y0) * rest;
 
 
        return (WORD) floor(y+.5);
 }
 
-#endif
-
 
 //
-//  Linear interpolation (Fixed-point optimized, but C source)
+//  Linear interpolation (Fixed-point optimized, but C source).
 //
 
-
-#ifdef USE_C
+#else
 
 WORD cmsLinearInterpLUT16(WORD Value1, WORD LutTable[], LPL16PARAMS p)
 {
        WORD y1, y0;
        WORD y;
        int dif, a1;
        int cell0, rest;
        int val3, Value;
@@ -390,106 +387,33 @@ WORD cmsLinearInterpLUT16(WORD Value1, W
 
        y0 = LutTable[cell0] ;
        y1 = LutTable[cell0+1] ;
 
        dif = (int) y1 - y0;        // dif is in domain -ffff ... ffff
 
        if (dif >= 0)
        {
-       a1 = ToFixedDomain(dif * rest);
+       a1 = ToFixedDomain(((unsigned)dif) * rest);
        a1 += 0x8000;
        }
        else
        {
-              a1 = ToFixedDomain((- dif) * rest);
+              a1 = ToFixedDomain(((unsigned)dif) * rest);
               a1 -= 0x8000;
               a1 = -a1;
        }
 
        y = (WORD) (y0 + FIXED_TO_INT(a1));
 
        return y;
 }
 
 #endif
 
-// Linear interpolation (asm by hand optimized)
-
-#ifdef USE_ASSEMBLER
-
-#ifdef _MSC_VER
-#pragma warning(disable : 4033)
-#pragma warning(disable : 4035)
-#endif
-
-WORD cmsLinearInterpLUT16(WORD Value, WORD LutTable[], LPL16PARAMS p)
-{
-       int xDomain = p -> Domain;
-
-
-       if (Value == 0xffff) return LutTable[p -> Domain];
-       else
-       ASM {
-              xor       eax, eax
-              mov       ax, word ptr ss:Value
-              mov       edx, ss:xDomain
-              mul       edx                         //  val3 = p -> Domain * Value;
-              shld      edx, eax, 16                // Convert it to fixed 15.16
-              shl       eax, 16                     // * 65536 / 65535
-              mov       ebx, 0x0000ffff
-              div       ebx
-              mov       ecx, eax
-              sar       ecx, 16                        // ecx = cell0
-              mov       edx, eax                       // rest = (val2 & 0xFFFFU)
-              and       edx, 0x0000ffff                // edx = rest
-              mov       ebx, ss:LutTable
-              lea       eax, dword ptr [ebx+2*ecx]     // Ptr to LUT
-              xor       ebx, ebx
-              mov        bx, word  ptr [eax]           // EBX = y0
-              movzx     eax, word  ptr [eax+2]         // EAX = y1
-              sub       eax, ebx                       // EAX = y1-y0
-              js        IsNegative
-              mul       edx                            // EAX = EAX * rest
-              shld      edx, eax, 16                   // Pass it to fixed
-              sal       eax, 16                        // * 65536 / 65535
-              mov       ecx, 0x0000ffff
-              div       ecx
-              add       eax, 0x8000                    // Rounding
-              sar       eax, 16
-              add       eax, ebx                       // Done!
-              }
-
-              RET((WORD) _EAX);
-
-       IsNegative:
-
-              ASM {
-              neg       eax
-              mul       edx                            // EAX = EAX * rest
-              shld      edx, eax, 16                   // Pass it to fixed
-              sal       eax, 16                        // * 65536 / 65535
-              mov       ecx, 0x0000ffff
-              div       ecx
-              sub       eax, 0x8000
-              neg       eax
-              sar       eax, 16
-              add       eax, ebx                       // Done!
-              }
-
-              RET((WORD) _EAX);
-}
-
-#ifdef _MSC_VER
-#pragma warning(default : 4033)
-#pragma warning(default : 4035)
-#endif
-
-#endif
-
 Fixed32 cmsLinearInterpFixed(WORD Value1, WORD LutTable[], LPL16PARAMS p)
 {
        Fixed32 y1, y0;
        int cell0;
        int val3, Value;
 
        // if last value...
 
--- a/modules/lcms/src/cmsio1.c
+++ b/modules/lcms/src/cmsio1.c
@@ -2528,16 +2528,21 @@ cmsHPROFILE LCMSEXPORT cmsOpenProfileFro
 LCMSBOOL LCMSEXPORT cmsCloseProfile(cmsHPROFILE hProfile)
 {
        LPLCMSICCPROFILE Icc = (LPLCMSICCPROFILE) (LPSTR) hProfile;
        LCMSBOOL rc = TRUE;
        icInt32Number i;         
 
        if (!Icc) return FALSE;
 
+       // Free any precaches
+       for (i = 0; i < PRECACHE_TYPE_COUNT; ++i)
+              if (Icc->Precache[i] != NULL)
+                     PRECACHE_RELEASE(Icc->Precache[i]);
+
        // Was open in write mode?   
        if (Icc ->IsWrite) {
 
            Icc ->IsWrite = FALSE;      // Assure no further writting
            rc = _cmsSaveProfile(hProfile, Icc ->PhysicalFile);        
        }
        
        for (i=0; i < Icc -> TagCount; i++) {
--- a/modules/lcms/src/cmsmatsh.c
+++ b/modules/lcms/src/cmsmatsh.c
@@ -82,17 +82,19 @@ int ComputeTables(LPGAMMATABLE Table[3],
 
        if (AllLinear != 3) return 1;
 
        return 0;
 
 }
 
 
-LPMATSHAPER cmsAllocMatShaper2(LPMAT3 Matrix, LPGAMMATABLE In[], LPGAMMATABLE Out[], DWORD Behaviour)
+LPMATSHAPER cmsAllocMatShaper2(LPMAT3 Matrix, LPGAMMATABLE In[], LPLCMSPRECACHE InPrecache,
+                               LPGAMMATABLE Out[], LPLCMSPRECACHE OutPrecache, 
+                               DWORD Behaviour)
 {
        LPMATSHAPER NewMatShaper;
        int rc;
 
        NewMatShaper = (LPMATSHAPER) _cmsMalloc(sizeof(MATSHAPER));
        if (NewMatShaper)
               ZeroMemory(NewMatShaper, sizeof(MATSHAPER));
 
@@ -104,38 +106,49 @@ LPMATSHAPER cmsAllocMatShaper2(LPMAT3 Ma
 
        // Reality check
 
        if (!MAT3isIdentity(&NewMatShaper -> Matrix, 0.00001))
                      NewMatShaper -> dwFlags |= MATSHAPER_HASMATRIX;
 
        // Now, on the table characteristics
 
-       if (Out) {
+       // If we have an output precache, use that
+       if (OutPrecache != NULL) {
+              PRECACHE_ADDREF(OutPrecache);
+              NewMatShaper->L_Precache = OutPrecache;
+              NewMatShaper -> dwFlags |= MATSHAPER_HASSHAPER;
+       }
 
-            rc = ComputeTables(Out, NewMatShaper ->L, &NewMatShaper ->p16);
-            if (rc < 0) {
-                 cmsFreeMatShaper(NewMatShaper);
-                 return NULL;
-            }
-            if (rc == 1) NewMatShaper -> dwFlags |= MATSHAPER_HASSHAPER;        
+       else {
+              rc = ComputeTables(Out, NewMatShaper ->L, &NewMatShaper ->p16);
+              if (rc < 0) {
+                     cmsFreeMatShaper(NewMatShaper);
+                     return NULL;
+              }
+              if (rc == 1) NewMatShaper -> dwFlags |= MATSHAPER_HASSHAPER;        
        }
 
-
-       if (In) {
-
-            rc = ComputeTables(In, NewMatShaper ->L2, &NewMatShaper ->p2_16);
-            if (rc < 0) {
-                cmsFreeMatShaper(NewMatShaper);
-                return NULL;
-            }
-            if (rc == 1) NewMatShaper -> dwFlags |= MATSHAPER_HASINPSHAPER;     
+       // If we have an input precache, use that
+       if (InPrecache != NULL) {
+              PRECACHE_ADDREF(InPrecache);
+              NewMatShaper->L2_Precache = InPrecache;
+              NewMatShaper-> dwFlags |= MATSHAPER_HASINPSHAPER;
        }
 
-       
+       else {
+
+              rc = ComputeTables(In, NewMatShaper ->L2, &NewMatShaper ->p2_16);
+              if (rc < 0) {
+                     cmsFreeMatShaper(NewMatShaper);
+                     return NULL;
+              }
+              if (rc == 1) NewMatShaper -> dwFlags |= MATSHAPER_HASINPSHAPER;     
+       }
+
        return NewMatShaper;
 
 }
 
 
 
 // Creation & Destruction
 
@@ -201,18 +214,26 @@ LPMATSHAPER cmsAllocMatShaper(LPMAT3 Mat
 // Free associated memory
 
 void cmsFreeMatShaper(LPMATSHAPER MatShaper)
 {
        int i;
 
        if (!MatShaper) return;
 
+       // Release references to the precaches if we have them
+       if (MatShaper->L_Precache != NULL)
+              PRECACHE_RELEASE(MatShaper->L_Precache);
+       if (MatShaper->L2_Precache != NULL)
+              PRECACHE_RELEASE(MatShaper->L2_Precache);
+
        for (i=0; i < 3; i++)
        {
+              // These are never initialized from their zeroed state if we
+              // were using a cache
               if (MatShaper -> L[i]) _cmsFree(MatShaper ->L[i]);
               if (MatShaper -> L2[i]) _cmsFree(MatShaper ->L2[i]);
        }
 
        _cmsFree(MatShaper);
 }
 
 
@@ -222,58 +243,76 @@ static
 void AllSmeltedBehaviour(LPMATSHAPER MatShaper, WORD In[], WORD Out[])
 {
 
        WORD tmp[3];
        WVEC3 InVect, OutVect;
 
        if (MatShaper -> dwFlags & MATSHAPER_HASINPSHAPER)
        {
-       InVect.n[VX] = cmsLinearInterpFixed(In[0], MatShaper -> L2[0], &MatShaper -> p2_16);
-       InVect.n[VY] = cmsLinearInterpFixed(In[1], MatShaper -> L2[1], &MatShaper -> p2_16);
-       InVect.n[VZ] = cmsLinearInterpFixed(In[2], MatShaper -> L2[2], &MatShaper -> p2_16);
+              if (MatShaper->L2_Precache != NULL) 
+              {
+              InVect.n[VX] = MatShaper->L2_Precache->Impl.LI16W_FORWARD.Cache[0][In[0]];
+              InVect.n[VY] = MatShaper->L2_Precache->Impl.LI16W_FORWARD.Cache[1][In[1]];
+              InVect.n[VZ] = MatShaper->L2_Precache->Impl.LI16W_FORWARD.Cache[2][In[2]];
+              }
+              else 
+              {
+              InVect.n[VX] = cmsLinearInterpFixed(In[0], MatShaper -> L2[0], &MatShaper -> p2_16);
+              InVect.n[VY] = cmsLinearInterpFixed(In[1], MatShaper -> L2[1], &MatShaper -> p2_16);
+              InVect.n[VZ] = cmsLinearInterpFixed(In[2], MatShaper -> L2[2], &MatShaper -> p2_16);
+              }
        }
        else
        {
-            InVect.n[VX] = ToFixedDomain(In[0]);
-            InVect.n[VY] = ToFixedDomain(In[1]);
-            InVect.n[VZ] = ToFixedDomain(In[2]);
+       InVect.n[VX] = ToFixedDomain(In[0]);
+       InVect.n[VY] = ToFixedDomain(In[1]);
+       InVect.n[VZ] = ToFixedDomain(In[2]);
        }
 
 
        if (MatShaper -> dwFlags & MATSHAPER_HASMATRIX)
        {       
                          
              MAT3evalW(&OutVect, &MatShaper -> Matrix, &InVect);
        }
-       else {
-
-           OutVect.n[VX] = InVect.n[VX];
-           OutVect.n[VY] = InVect.n[VY];
-           OutVect.n[VZ] = InVect.n[VZ];
+       else 
+       {
+       OutVect.n[VX] = InVect.n[VX];
+       OutVect.n[VY] = InVect.n[VY];
+       OutVect.n[VZ] = InVect.n[VZ];
        }
 
              
        tmp[0] = _cmsClampWord(FromFixedDomain(OutVect.n[VX]));
        tmp[1] = _cmsClampWord(FromFixedDomain(OutVect.n[VY]));
        tmp[2] = _cmsClampWord(FromFixedDomain(OutVect.n[VZ]));
 
        
            
        if (MatShaper -> dwFlags & MATSHAPER_HASSHAPER)
        {
-       Out[0] = cmsLinearInterpLUT16(tmp[0], MatShaper -> L[0], &MatShaper -> p16);
-       Out[1] = cmsLinearInterpLUT16(tmp[1], MatShaper -> L[1], &MatShaper -> p16);
-       Out[2] = cmsLinearInterpLUT16(tmp[2], MatShaper -> L[2], &MatShaper -> p16);
+              if (MatShaper->L_Precache != NULL) 
+              {
+              Out[0] = MatShaper->L_Precache->Impl.LI1616_REVERSE.Cache[0][tmp[0]];
+              Out[1] = MatShaper->L_Precache->Impl.LI1616_REVERSE.Cache[1][tmp[1]];
+              Out[2] = MatShaper->L_Precache->Impl.LI1616_REVERSE.Cache[2][tmp[2]];
+              }
+              else 
+              {
+              Out[0] = cmsLinearInterpLUT16(tmp[0], MatShaper -> L[0], &MatShaper -> p16);
+              Out[1] = cmsLinearInterpLUT16(tmp[1], MatShaper -> L[1], &MatShaper -> p16);
+              Out[2] = cmsLinearInterpLUT16(tmp[2], MatShaper -> L[2], &MatShaper -> p16);
+              }
        }
        else
        {
-           Out[0] = tmp[0];
-           Out[1] = tmp[1];
-           Out[2] = tmp[2];
+       Out[0] = tmp[0];
+       Out[1] = tmp[1];
+       Out[2] = tmp[2];
        }
         
 }
 
 
 static
 void InputBehaviour(LPMATSHAPER MatShaper, WORD In[], WORD Out[])
 {
new file mode 100644
--- /dev/null
+++ b/modules/lcms/src/cmsprecache.c
@@ -0,0 +1,202 @@
+//  Little cms
+//  Copyright (C) 2008 Mozilla Foundation
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+#include "lcms.h"
+
+/*
+ * Helper Macro for allocating precache tables and freeing them
+ * appropriately. We need a macro here so we can select the appropriate member
+ * of the union.
+ */
+#define cmsAllocPrecacheTables(aProf, cacheType, unionMemb, nTables, elemSize, nElems) \
+{\
+       unsigned i, j; \
+       for (i = 0; i < nTables; ++i) { \
+              aProf->Precache[cacheType]->Impl.unionMemb.Cache[i] = \
+                _cmsMalloc(elemSize * nElems); \
+              if (aProf->Precache[cacheType]->Impl.unionMemb.Cache[i] == NULL) { \
+                     for (j = 0; j < i; ++j) \
+                            _cmsFree(aProf->Precache[cacheType]->Impl.unionMemb.Cache[j]); \
+                     _cmsFree(aProf->Precache[cacheType]); \
+                     aProf->Precache[cacheType] = NULL; \
+                     return FALSE; \
+              } \
+       } \
+}
+
+/*
+ * Precaches the results specified in Type in a reference-counted table.
+ *
+ * Returns true if the precache succeeded (or if the information was already
+ * precached), false otherwise (including cases where the profile was not
+ * precacheable).
+ */
+LCMSBOOL LCMSEXPORT cmsPrecacheProfile(cmsHPROFILE hProfile, 
+                                       LCMSPRECACHETYPE Type) {
+
+       // Locals.
+       LPGAMMATABLE GTables[3];
+       LCMSBOOL hasGammaTables;
+       LPLCMSICCPROFILE Icc = (LPLCMSICCPROFILE) (LPSTR) hProfile;
+       L16PARAMS p16;
+       unsigned i, j;
+
+       // Input Validation
+       CMSASSERT(Type < PRECACHE_TYPE_COUNT);
+
+       /* Do we already have what we need? */
+       if (Icc->Precache[Type] != NULL)
+              return TRUE;
+
+       /* Determine if we have gamma tables in the profile. */
+       hasGammaTables = cmsIsTag(hProfile, icSigRedTRCTag) &&
+                        cmsIsTag(hProfile, icSigGreenTRCTag) &&
+                        cmsIsTag(hProfile, icSigBlueTRCTag);
+
+       /* Zero Out the Gamma Table Pointers. */
+       ZeroMemory(GTables, sizeof(GTables));
+
+       // Create and zero a precache structure
+       Icc->Precache[Type] = _cmsMalloc(sizeof(LCMSPRECACHE));
+       if (Icc->Precache[Type] == NULL)
+              return FALSE;
+       ZeroMemory(Icc->Precache[Type], sizeof(LCMSPRECACHE));
+
+       // Grab a Reference to the precache
+       PRECACHE_ADDREF(Icc->Precache[Type]);
+
+       // Tag the precache with its type (necessary for freeing)
+       Icc->Precache[Type]->Type = Type;
+
+       // Read the Gamma Tables if we need then
+       if (IS_LI_REVERSE(Type)) {
+
+              // Read in the reversed Gamma curves
+              if (hasGammaTables) {
+                     GTables[0] = cmsReadICCGammaReversed(hProfile, icSigRedTRCTag);
+                     GTables[1] = cmsReadICCGammaReversed(hProfile, icSigGreenTRCTag);
+                     GTables[2] = cmsReadICCGammaReversed(hProfile, icSigBlueTRCTag);
+              }
+
+              // Check tables
+              if (!GTables[0] || !GTables[1] || !GTables[2]) {
+                     _cmsFree(Icc->Precache[Type]);
+                     Icc->Precache[Type] = NULL;
+                     return FALSE;
+              }
+       }
+       else if (IS_LI_FORWARD(Type)) {
+
+              // Read in the Gamma curves
+              if (hasGammaTables) {
+                     GTables[0] = cmsReadICCGamma(hProfile, icSigRedTRCTag);
+                     GTables[1] = cmsReadICCGamma(hProfile, icSigGreenTRCTag);
+                     GTables[2] = cmsReadICCGamma(hProfile, icSigBlueTRCTag);
+              }
+
+              // Check tables
+              if (!GTables[0] || !GTables[1] || !GTables[2]) {
+                     _cmsFree(Icc->Precache[Type]);
+                     Icc->Precache[Type] = NULL;
+                     return FALSE;
+              }
+       }
+
+       // Type-Specific Precache Operations
+       switch(Type) {
+
+              case CMS_PRECACHE_LI1616_REVERSE:
+
+                     // Allocate the precache tables
+                     cmsAllocPrecacheTables(Icc, Type, LI1616_REVERSE, 3, sizeof(WORD), (1 << 16));
+
+                     // Calculate the interpolation parameters
+                     cmsCalcL16Params(GTables[0]->nEntries, &p16);
+
+                     // Compute the cache
+                     for (i = 0; i < 3; ++i)
+                            for (j = 0; j < (1 << 16); ++j)
+                                   Icc->Precache[Type]->Impl.LI1616_REVERSE.Cache[i][j] =
+                                          cmsLinearInterpLUT16((WORD)j, GTables[i]->GammaTable, &p16);
+                     break;
+
+              case CMS_PRECACHE_LI16W_FORWARD:
+
+                     // Allocate the precache tables
+                     cmsAllocPrecacheTables(Icc, Type, LI16W_FORWARD, 3, sizeof(Fixed32), (1 << 16));
+
+                     // Calculate the interpolation parameters
+                     cmsCalcL16Params(GTables[0]->nEntries, &p16);
+
+                     // Compute the cache
+                     for (i = 0; i < 3; ++i)
+                            for (j = 0; j < (1 << 16); ++j)
+                                   Icc->Precache[Type]->Impl.LI16W_FORWARD.Cache[i][j] =
+                                          cmsLinearInterpFixed((WORD)j, GTables[i]->GammaTable, &p16);
+                     break;
+
+              default:
+                     CMSASSERT(0); // Not implemented
+                     break;
+       }
+
+       // Success
+       return TRUE;
+}
+
+/*
+ * Frees a Precache structure.
+ *
+ * This function is invoked by the refcounting mechanism when the refcount on
+ * the precache object drops to zero. If should never be invoked manually.
+ */
+void cmsPrecacheFree(LPLCMSPRECACHE Cache) {
+
+       // Locals
+       unsigned i;
+
+       // Validate Input/State
+       CMSASSERT(Cache != NULL);
+       CMSASSERT(Cache->RefCount == 0);
+
+       // Type-Specific behavior
+       switch(Cache->Type) {
+
+              case CMS_PRECACHE_LI1616_REVERSE:
+                     for (i = 0; i < 3; ++i)
+                      _cmsFree(Cache->Impl.LI1616_REVERSE.Cache[i]);
+                     break;
+
+              case CMS_PRECACHE_LI16W_FORWARD:
+                     for (i = 0; i < 3; ++i)
+                      _cmsFree(Cache->Impl.LI16W_FORWARD.Cache[i]);
+                     break;
+
+              default:
+                     CMSASSERT(0); // Bad Type
+                     break;
+       }
+
+       // Free the structure itself
+       _cmsFree(Cache);
+
+}
--- a/modules/lcms/src/cmsxform.c
+++ b/modules/lcms/src/cmsxform.c
@@ -772,55 +772,71 @@ LPMATSHAPER cmsBuildOutputMatrixShaper(c
 
 // This function builds a transform matrix chaining parameters
 
 static
 LCMSBOOL cmsBuildSmeltMatShaper(_LPcmsTRANSFORM p)
 {
        MAT3 From, To, ToInv, Transfer;
        LPGAMMATABLE In[3], InverseOut[3];
+       LPLCMSPRECACHE InPrecache, OutPrecache;
        
         
        if (!cmsReadICCMatrixRGB2XYZ(&From, p -> InputProfile))
                      return FALSE;
 
 
        if (!cmsReadICCMatrixRGB2XYZ(&To, p -> OutputProfile))
                      return FALSE;
 
        // invert dest
        
        if (MAT3inverse(&To, &ToInv) < 0)
                         return FALSE;
 
-       // Multiply
+        // Multiply
         MAT3per(&Transfer, &ToInv, &From); 
+
+        // Check for the relevant precaches
+        InPrecache = ((LPLCMSICCPROFILE)p->InputProfile)->Precache[CMS_PRECACHE_LI16W_FORWARD];
+        OutPrecache = ((LPLCMSICCPROFILE)p->OutputProfile)->Precache[CMS_PRECACHE_LI1616_REVERSE];
     
-            
-        // Read gamma curves
-
-        In[0] = cmsReadICCGamma(p -> InputProfile, icSigRedTRCTag);
-        In[1] = cmsReadICCGamma(p -> InputProfile, icSigGreenTRCTag);
-        In[2] = cmsReadICCGamma(p -> InputProfile, icSigBlueTRCTag);
-
-        if (!In[0] || !In[1] || !In[2])
+        // If the input interpolations aren't already cached, read gamma curves
+        if (InPrecache == NULL) {
+               In[0] = cmsReadICCGamma(p -> InputProfile, icSigRedTRCTag);
+               In[1] = cmsReadICCGamma(p -> InputProfile, icSigGreenTRCTag);
+               In[2] = cmsReadICCGamma(p -> InputProfile, icSigBlueTRCTag);
+
+              if (!In[0] || !In[1] || !In[2])
                      return FALSE;
+        }
+        else
+              In[0] = In[1] = In[2] = NULL;
             
 
-        InverseOut[0] = cmsReadICCGammaReversed(p -> OutputProfile, icSigRedTRCTag);
-        InverseOut[1] = cmsReadICCGammaReversed(p -> OutputProfile, icSigGreenTRCTag);
-        InverseOut[2] = cmsReadICCGammaReversed(p -> OutputProfile, icSigBlueTRCTag);
-
-		if (!InverseOut[0] || !InverseOut[1] || !InverseOut[2]) {
-				     cmsFreeGammaTriple(In); 
+        // If the output interpolations aren't already cached, read reverse
+        // gamma curves
+        if (OutPrecache == NULL) {
+              InverseOut[0] = cmsReadICCGammaReversed(p -> OutputProfile, icSigRedTRCTag);
+              InverseOut[1] = cmsReadICCGammaReversed(p -> OutputProfile, icSigGreenTRCTag);
+              InverseOut[2] = cmsReadICCGammaReversed(p -> OutputProfile, icSigBlueTRCTag);
+
+              if (!InverseOut[0] || !InverseOut[1] || !InverseOut[2]) {
+                     cmsFreeGammaTriple(In); 
                      return FALSE;
-		}
-
-        p -> SmeltMatShaper = cmsAllocMatShaper2(&Transfer, In, InverseOut, MATSHAPER_ALLSMELTED);
-
+              }
+        }
+        else
+              InverseOut[0] = InverseOut[1] = InverseOut[2] = NULL;
+
+        p -> SmeltMatShaper = cmsAllocMatShaper2(&Transfer, In, InPrecache,
+                                                 InverseOut, OutPrecache,
+                                                 MATSHAPER_ALLSMELTED);
+
+        // These don't free if the pointers were already NULL
         cmsFreeGammaTriple(In);
         cmsFreeGammaTriple(InverseOut);
         
         return (p -> SmeltMatShaper != NULL);
 }
 
 
 
--- a/modules/lcms/src/lcms.def
+++ b/modules/lcms/src/lcms.def
@@ -4,16 +4,17 @@ EXPORTS
     _cmsICCcolorSpace            = _cmsICCcolorSpace
     _cmsSaveProfile              = _cmsSaveProfile
     _cmsSaveProfileToMem         = _cmsSaveProfileToMem
     cmsAdaptToIlluminant         = cmsAdaptToIlluminant
     cmsAllocGamma                = cmsAllocGamma
     cmsBuildGamma                = cmsBuildGamma
     cmsSmoothGamma               = cmsSmoothGamma
     cmsBuildRGB2XYZtransferMatrix= cmsBuildRGB2XYZtransferMatrix
+    cmsPrecacheProfile           = cmsPrecacheProfile
     cmsCloseProfile              = cmsCloseProfile
     cmsCreateProofingTransform   = cmsCreateProofingTransform
     cmsCreateRGBProfile          = cmsCreateRGBProfile
     cmsCreateTransform           = cmsCreateTransform
     cmsDeleteTransform           = cmsDeleteTransform
     cmsDoTransform               = cmsDoTransform
     cmsErrorAction               = cmsErrorAction
     cmsFreeGamma                 = cmsFreeGamma