intl/icu/source/common/rbbidata.h
author Jeff Walden <jwalden@mit.edu>
Tue, 12 Nov 2013 16:23:48 -0800
changeset 183153 6e956de758ce84dfb882c82c988626b41e390630
parent 134860 e17bedcbeb7c6707c73a0a41f9c3a2c2e74eca4c
child 183163 604818812338693e46868c0d1a098eb2e9bd3ffa
permissions -rw-r--r--
Bug 924839 - Update our embedded ICU to 52.1, plus a very few local patches. r=lots of people, see subsequent lines in this commit message for the original subcomponents (merged together for landing), and the original bug for the original patch divisions Bug 924839 - Remove a patch already part of ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10283 but also note the relevant code was removed completely upstream. r=glandium * * * Bug 924839 - Remove another patch already part of ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10290 for that. r=gaston * * * Bug 924839 - Remove another patch already in ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10045 for more. r=Norbert * * * Bug 924839 - Remove another patch already applied upstream. See http://bugs.icu-project.org/trac/changeset/32937 for more. r=gaston * * * Bug 924839 - Update the ICU update script to update to 52.1, *without* applying any of our local patches. r=glandium * * * Bug 924839 - Make the ICU update script only do updating within intl/icu/source and nowhere else. r=glandium * * * Bug 924839 - Implement the changes that would be made by |cd intl/; ./update-icu.sh http://source.icu-project.org/repos/icu/icu/tags/release-52-1/;|, run with the prior changesets' changes made (thus not applying any of our local patches). These changes don't actually work without subsequent adjustments, but this provides a codebase upon which those adjustments can be made, for the purpose of generating local patches to be kept in intl/icu-patches/. rs=the-usual-suspects * * * Bug 924839 - Update the bug 899722 local patch to make runConfigureICU not override CC/CXX on BSD systems. r=gaston * * * Bug 924839 - Update the bug 724533 patch that makes ICU builds with MozillaBuild on Windows. r=glandium * * * Bug 924839 - Import an upstream patch fixing the genrb tool to properly handle the -R (--omitCollationRules) option. See http://bugs.icu-project.org/trac/ticket/10043 for the original bug report and a link to the ultimate upstream landing. r=Norbert * * * Bug 924839 - Import the upstream fix for http://bugs.icu-project.org/trac/ticket/10486 so that ICU with -DU_USING_ICU_NAMESPACE=0 will compile on Windows. r=Norbert * * * Bug 924839 - Adjust the update script to update ICU, then to apply all local patches (rather than skipping the second step). Thus if the update script is properly run, now, the final result should be no changes at all to the tree. NOT REVIEWED YET * * * Bug 924839 - Update jstests that depend on CLDR locale data to match CLDR 24. r=Norbert

/*
*******************************************************************************
*
*   Copyright (C) 1999-2013 International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  rbbidata.h
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   RBBI data formats  Includes
*
*                          Structs that describes the format of the Binary RBBI data,
*                          as it is stored in ICU's data file.
*
*      RBBIDataWrapper  -  Instances of this class sit between the
*                          raw data structs and the RulesBasedBreakIterator objects
*                          that are created by applications.  The wrapper class
*                          provides reference counting for the underlying data,
*                          and direct pointers to data that would not otherwise
*                          be accessible without ugly pointer arithmetic.  The
*                          wrapper does not attempt to provide any higher level
*                          abstractions for the data itself.
*
*                          There will be only one instance of RBBIDataWrapper for any
*                          set of RBBI run time data being shared by instances
*                          (clones) of RulesBasedBreakIterator.
*/

#ifndef __RBBIDATA_H__
#define __RBBIDATA_H__

#include "unicode/utypes.h"
#include "unicode/udata.h"
#include "udataswp.h"

/**
 * Swap RBBI data. See udataswp.h.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
ubrk_swap(const UDataSwapper *ds,
          const void *inData, int32_t length, void *outData,
          UErrorCode *pErrorCode);

#ifdef __cplusplus

#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "umutex.h"
#include "utrie.h"

U_NAMESPACE_BEGIN

/*  
 *   The following structs map exactly onto the raw data from ICU common data file. 
 */
struct RBBIDataHeader {
    uint32_t         fMagic;           /*  == 0xbla0                                               */
    uint8_t          fFormatVersion[4]; /* Data Format.  Same as the value in struct UDataInfo      */
                                       /*   if there is one associated with this data.             */
                                       /*     (version originates in rbbi, is copied to UDataInfo) */
                                       /*   For ICU 3.2 and earlier, this field was                */
                                       /*       uint32_t  fVersion                                 */
                                       /*   with a value of 1.                                     */
    uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
                                       /*      including all sections, not just the header.        */
    uint32_t         fCatCount;        /*  Number of character categories.                         */

    /*                                                                        */
    /*  Offsets and sizes of each of the subsections within the RBBI data.    */
    /*  All offsets are bytes from the start of the RBBIDataHeader.           */
    /*  All sizes are in bytes.                                               */
    /*                                                                        */
    uint32_t         fFTable;         /*  forward state transition table. */
    uint32_t         fFTableLen;
    uint32_t         fRTable;         /*  Offset to the reverse state transition table. */
    uint32_t         fRTableLen;
    uint32_t         fSFTable;        /*  safe point forward transition table */
    uint32_t         fSFTableLen;
    uint32_t         fSRTable;        /*  safe point reverse transition table */
    uint32_t         fSRTableLen;
    uint32_t         fTrie;           /*  Offset to Trie data for character categories */
    uint32_t         fTrieLen;
    uint32_t         fRuleSource;     /*  Offset to the source for for the break */
    uint32_t         fRuleSourceLen;  /*    rules.  Stored UChar *. */
    uint32_t         fStatusTable;    /* Offset to the table of rule status values */
    uint32_t         fStatusTableLen;

    uint32_t         fReserved[6];    /*  Reserved for expansion */

};



struct  RBBIStateTableRow {
    int16_t          fAccepting;    /*  Non-zero if this row is for an accepting state.   */
                                    /*  Value 0: not an accepting state.                  */
                                    /*       -1: Unconditional Accepting state.           */
                                    /*    positive:  Look-ahead match has completed.      */
                                    /*           Actual boundary position happened earlier */
                                    /*           Value here == fLookAhead in earlier      */
                                    /*              state, at actual boundary pos.        */
    int16_t          fLookAhead;    /*  Non-zero if this row is for a state that          */
                                    /*    corresponds to a '/' in the rule source.        */
                                    /*    Value is the same as the fAccepting             */
                                    /*      value for the rule (which will appear         */
                                    /*      in a different state.                         */
    int16_t          fTagIdx;       /*  Non-zero if this row covers a {tagged} position   */
                                    /*     from a rule.  Value is the index in the        */
                                    /*     StatusTable of the set of matching             */
                                    /*     tags (rule status values)                      */
    int16_t          fReserved;
    uint16_t         fNextState[2]; /*  Next State, indexed by char category.             */
                                    /*  This array does not have two elements             */
                                    /*    Array Size is actually fData->fHeader->fCatCount         */
                                    /*    CAUTION:  see RBBITableBuilder::getTableSize()  */
                                    /*              before changing anything here.        */
};


struct RBBIStateTable {
    uint32_t         fNumStates;    /*  Number of states.                                 */
    uint32_t         fRowLen;       /*  Length of a state table row, in bytes.            */
    uint32_t         fFlags;        /*  Option Flags for this state table                 */
    uint32_t         fReserved;     /*  reserved                                          */
    char             fTableData[4]; /*  First RBBIStateTableRow begins here.              */
                                    /*    (making it char[] simplifies ugly address       */
                                    /*     arithmetic for indexing variable length rows.) */
};

typedef enum {
    RBBI_LOOKAHEAD_HARD_BREAK = 1,
    RBBI_BOF_REQUIRED = 2
} RBBIStateTableFlags;


/*                                        */
/*   The reference counting wrapper class */
/*                                        */
class RBBIDataWrapper : public UMemory {
public:
    enum EDontAdopt {
        kDontAdopt
    };
    RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
    RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
    RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
    ~RBBIDataWrapper();

    void                  init(const RBBIDataHeader *data, UErrorCode &status);
    RBBIDataWrapper      *addReference();
    void                  removeReference();
    UBool                 operator ==(const RBBIDataWrapper &other) const;
    int32_t               hashCode();
    const UnicodeString  &getRuleSourceString() const;
#ifdef RBBI_DEBUG
    void                  printData();
    void                  printTable(const char *heading, const RBBIStateTable *table);
#else
    #define printData()
    #define printTable(heading, table)
#endif

    /*                                     */
    /*   Pointers to items within the data */
    /*                                     */
    const RBBIDataHeader     *fHeader;
    const RBBIStateTable     *fForwardTable;
    const RBBIStateTable     *fReverseTable;
    const RBBIStateTable     *fSafeFwdTable;
    const RBBIStateTable     *fSafeRevTable;
    const UChar              *fRuleSource;
    const int32_t            *fRuleStatusTable; 

    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
    int32_t             fStatusMaxIdx;

    UTrie               fTrie;

private:
    u_atomic_int32_t    fRefCount;
    UDataMemory        *fUDataMem;
    UnicodeString       fRuleString;
    UBool               fDontFreeData;

    RBBIDataWrapper(const RBBIDataWrapper &other); /*  forbid copying of this class */
    RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /*  forbid copying of this class */
};



U_NAMESPACE_END

#endif /* C++ */

#endif