ICU 4.6  4.6
rbbi.h
Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 *   Copyright (C) 1999-2008 International Business Machines Corporation   *
00004 *   and others. All rights reserved.                                      *
00005 ***************************************************************************
00006 
00007 **********************************************************************
00008 *   Date        Name        Description
00009 *   10/22/99    alan        Creation.
00010 *   11/11/99    rgillam     Complete port from Java.
00011 **********************************************************************
00012 */
00013 
00014 #ifndef RBBI_H
00015 #define RBBI_H
00016 
00017 #include "unicode/utypes.h"
00018 
00024 #if !UCONFIG_NO_BREAK_ITERATION
00025 
00026 #include "unicode/brkiter.h"
00027 #include "unicode/udata.h"
00028 #include "unicode/parseerr.h"
00029 #include "unicode/schriter.h"
00030 #include "unicode/uchriter.h"
00031 
00032 
00033 struct UTrie;
00034 
00035 U_NAMESPACE_BEGIN
00036 
00038 struct RBBIDataHeader;
00039 class  RuleBasedBreakIteratorTables;
00040 class  BreakIterator;
00041 class  RBBIDataWrapper;
00042 class  UStack;
00043 class  LanguageBreakEngine;
00044 class  UnhandledEngine;
00045 struct RBBIStateTable;
00046 
00047 
00048 
00049 
00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00066 
00067 protected:
00072     UText  *fText;
00073 
00079     CharacterIterator  *fCharIter;
00080 
00086     StringCharacterIterator *fSCharIter;
00087 
00093     UCharCharacterIterator *fDCharIter;
00094 
00099     RBBIDataWrapper    *fData;
00100 
00104     int32_t             fLastRuleStatusIndex;
00105 
00112     UBool               fLastStatusIndexValid;
00113 
00119     uint32_t            fDictionaryCharCount;
00120 
00128     int32_t*            fCachedBreakPositions;
00129 
00134     int32_t             fNumCachedBreakPositions;
00135 
00141     int32_t             fPositionInCache;
00142     
00150     UStack              *fLanguageBreakEngines;
00151     
00159     UnhandledEngine     *fUnhandledBreakEngine;
00160     
00166     int32_t             fBreakType;
00167     
00168 protected:
00169     //=======================================================================
00170     // constructors
00171     //=======================================================================
00172 
00181     enum EDontAdopt {
00182         kDontAdopt
00183     };
00184 
00195     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00196 
00205     RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
00206 
00207 
00208     friend class RBBIRuleBuilder;
00210     friend class BreakIterator;
00211 
00212 
00213 
00214 public:
00215 
00220     RuleBasedBreakIterator();
00221 
00228     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00229 
00238     RuleBasedBreakIterator( const UnicodeString    &rules,
00239                              UParseError           &parseError,
00240                              UErrorCode            &status);
00241 
00242 
00255     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00256 
00261     virtual ~RuleBasedBreakIterator();
00262 
00270     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00271 
00280     virtual UBool operator==(const BreakIterator& that) const;
00281 
00289     UBool operator!=(const BreakIterator& that) const;
00290 
00301     virtual BreakIterator* clone() const;
00302 
00308     virtual int32_t hashCode(void) const;
00309 
00315     virtual const UnicodeString& getRules(void) const;
00316 
00317     //=======================================================================
00318     // BreakIterator overrides
00319     //=======================================================================
00320 
00346     virtual  CharacterIterator& getText(void) const;
00347 
00348 
00363      virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00364 
00372     virtual void adoptText(CharacterIterator* newText);
00373 
00380     virtual void setText(const UnicodeString& newText);
00381 
00395     virtual void  setText(UText *text, UErrorCode &status);
00396 
00402     virtual int32_t first(void);
00403 
00409     virtual int32_t last(void);
00410 
00421     virtual int32_t next(int32_t n);
00422 
00428     virtual int32_t next(void);
00429 
00435     virtual int32_t previous(void);
00436 
00444     virtual int32_t following(int32_t offset);
00445 
00453     virtual int32_t preceding(int32_t offset);
00454 
00463     virtual UBool isBoundary(int32_t offset);
00464 
00470     virtual int32_t current(void) const;
00471 
00472 
00505     virtual int32_t getRuleStatus() const;
00506 
00530     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00531 
00543     virtual UClassID getDynamicClassID(void) const;
00544 
00556     static UClassID U_EXPORT2 getStaticClassID(void);
00557 
00558     /*
00559      * Create a clone (copy) of this break iterator in memory provided
00560      *  by the caller.  The idea is to increase performance by avoiding
00561      *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
00562      *  Performance gains are minimal, and correct buffer management is
00563      *  tricky.  Use clone() instead.
00564      *
00565      * @param stackBuffer  The pointer to the memory into which the cloned object
00566      *                     should be placed.  If NULL,  allocate heap memory
00567      *                     for the cloned object.
00568      * @param BufferSize   The size of the buffer.  If zero, return the required
00569      *                     buffer size, but do not clone the object.  If the
00570      *                     size was too small (but not zero), allocate heap
00571      *                     storage for the cloned object.
00572      *
00573      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
00574      *                     returned if the the provided buffer was too small, and
00575      *                     the clone was therefore put on the heap.
00576      *
00577      * @return  Pointer to the clone object.  This may differ from the stackBuffer
00578      *          address if the byte alignment of the stack buffer was not suitable
00579      *          or if the stackBuffer was too small to hold the clone.
00580      * @stable ICU 2.0
00581      */
00582     virtual BreakIterator *  createBufferClone(void *stackBuffer,
00583                                                int32_t &BufferSize,
00584                                                UErrorCode &status);
00585 
00586 
00604     virtual const uint8_t *getBinaryRules(uint32_t &length);
00605 
00606 
00607 protected:
00608     //=======================================================================
00609     // implementation
00610     //=======================================================================
00616     virtual void reset(void);
00617 
00618 #if 0
00619 
00627     virtual UBool isDictionaryChar(UChar32);
00628 
00633     virtual int32_t getBreakType() const;
00634 #endif
00635 
00640     virtual void setBreakType(int32_t type);
00641 
00647     void init();
00648 
00649 private:
00650 
00660     int32_t handlePrevious(const RBBIStateTable *statetable);
00661 
00671     int32_t handleNext(const RBBIStateTable *statetable);
00672 
00673 protected:
00674 
00689     int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
00690 
00691 private:
00692 
00699     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00700 
00704     void makeRuleStatusValid();
00705 
00706 };
00707 
00708 //------------------------------------------------------------------------------
00709 //
00710 //   Inline Functions Definitions ...
00711 //
00712 //------------------------------------------------------------------------------
00713 
00714 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00715     return !operator==(that);
00716 }
00717 
00718 U_NAMESPACE_END
00719 
00720 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
00721 
00722 #endif
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines