ICU 4.6
4.6
|
00001 /* 00002 *************************************************************************** 00003 * Copyright (C) 1999-2008 International Business Machines Corporation * 00004 * and others. All rights reserved. * 00005 *************************************************************************** 00006 00007 ********************************************************************** 00008 * Date Name Description 00009 * 10/22/99 alan Creation. 00010 * 11/11/99 rgillam Complete port from Java. 00011 ********************************************************************** 00012 */ 00013 00014 #ifndef RBBI_H 00015 #define RBBI_H 00016 00017 #include "unicode/utypes.h" 00018 00024 #if !UCONFIG_NO_BREAK_ITERATION 00025 00026 #include "unicode/brkiter.h" 00027 #include "unicode/udata.h" 00028 #include "unicode/parseerr.h" 00029 #include "unicode/schriter.h" 00030 #include "unicode/uchriter.h" 00031 00032 00033 struct UTrie; 00034 00035 U_NAMESPACE_BEGIN 00036 00038 struct RBBIDataHeader; 00039 class RuleBasedBreakIteratorTables; 00040 class BreakIterator; 00041 class RBBIDataWrapper; 00042 class UStack; 00043 class LanguageBreakEngine; 00044 class UnhandledEngine; 00045 struct RBBIStateTable; 00046 00047 00048 00049 00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 00066 00067 protected: 00072 UText *fText; 00073 00079 CharacterIterator *fCharIter; 00080 00086 StringCharacterIterator *fSCharIter; 00087 00093 UCharCharacterIterator *fDCharIter; 00094 00099 RBBIDataWrapper *fData; 00100 00104 int32_t fLastRuleStatusIndex; 00105 00112 UBool fLastStatusIndexValid; 00113 00119 uint32_t fDictionaryCharCount; 00120 00128 int32_t* fCachedBreakPositions; 00129 00134 int32_t fNumCachedBreakPositions; 00135 00141 int32_t fPositionInCache; 00142 00150 UStack *fLanguageBreakEngines; 00151 00159 UnhandledEngine *fUnhandledBreakEngine; 00160 00166 int32_t fBreakType; 00167 00168 protected: 00169 //======================================================================= 00170 // constructors 00171 //======================================================================= 00172 00181 enum EDontAdopt { 00182 kDontAdopt 00183 }; 00184 00195 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 00196 00205 RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status); 00206 00207 00208 friend class RBBIRuleBuilder; 00210 friend class BreakIterator; 00211 00212 00213 00214 public: 00215 00220 RuleBasedBreakIterator(); 00221 00228 RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 00229 00238 RuleBasedBreakIterator( const UnicodeString &rules, 00239 UParseError &parseError, 00240 UErrorCode &status); 00241 00242 00255 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 00256 00261 virtual ~RuleBasedBreakIterator(); 00262 00270 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 00271 00280 virtual UBool operator==(const BreakIterator& that) const; 00281 00289 UBool operator!=(const BreakIterator& that) const; 00290 00301 virtual BreakIterator* clone() const; 00302 00308 virtual int32_t hashCode(void) const; 00309 00315 virtual const UnicodeString& getRules(void) const; 00316 00317 //======================================================================= 00318 // BreakIterator overrides 00319 //======================================================================= 00320 00346 virtual CharacterIterator& getText(void) const; 00347 00348 00363 virtual UText *getUText(UText *fillIn, UErrorCode &status) const; 00364 00372 virtual void adoptText(CharacterIterator* newText); 00373 00380 virtual void setText(const UnicodeString& newText); 00381 00395 virtual void setText(UText *text, UErrorCode &status); 00396 00402 virtual int32_t first(void); 00403 00409 virtual int32_t last(void); 00410 00421 virtual int32_t next(int32_t n); 00422 00428 virtual int32_t next(void); 00429 00435 virtual int32_t previous(void); 00436 00444 virtual int32_t following(int32_t offset); 00445 00453 virtual int32_t preceding(int32_t offset); 00454 00463 virtual UBool isBoundary(int32_t offset); 00464 00470 virtual int32_t current(void) const; 00471 00472 00505 virtual int32_t getRuleStatus() const; 00506 00530 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); 00531 00543 virtual UClassID getDynamicClassID(void) const; 00544 00556 static UClassID U_EXPORT2 getStaticClassID(void); 00557 00558 /* 00559 * Create a clone (copy) of this break iterator in memory provided 00560 * by the caller. The idea is to increase performance by avoiding 00561 * a storage allocation. Use of this functoin is NOT RECOMMENDED. 00562 * Performance gains are minimal, and correct buffer management is 00563 * tricky. Use clone() instead. 00564 * 00565 * @param stackBuffer The pointer to the memory into which the cloned object 00566 * should be placed. If NULL, allocate heap memory 00567 * for the cloned object. 00568 * @param BufferSize The size of the buffer. If zero, return the required 00569 * buffer size, but do not clone the object. If the 00570 * size was too small (but not zero), allocate heap 00571 * storage for the cloned object. 00572 * 00573 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 00574 * returned if the the provided buffer was too small, and 00575 * the clone was therefore put on the heap. 00576 * 00577 * @return Pointer to the clone object. This may differ from the stackBuffer 00578 * address if the byte alignment of the stack buffer was not suitable 00579 * or if the stackBuffer was too small to hold the clone. 00580 * @stable ICU 2.0 00581 */ 00582 virtual BreakIterator * createBufferClone(void *stackBuffer, 00583 int32_t &BufferSize, 00584 UErrorCode &status); 00585 00586 00604 virtual const uint8_t *getBinaryRules(uint32_t &length); 00605 00606 00607 protected: 00608 //======================================================================= 00609 // implementation 00610 //======================================================================= 00616 virtual void reset(void); 00617 00618 #if 0 00619 00627 virtual UBool isDictionaryChar(UChar32); 00628 00633 virtual int32_t getBreakType() const; 00634 #endif 00635 00640 virtual void setBreakType(int32_t type); 00641 00647 void init(); 00648 00649 private: 00650 00660 int32_t handlePrevious(const RBBIStateTable *statetable); 00661 00671 int32_t handleNext(const RBBIStateTable *statetable); 00672 00673 protected: 00674 00689 int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse); 00690 00691 private: 00692 00699 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); 00700 00704 void makeRuleStatusValid(); 00705 00706 }; 00707 00708 //------------------------------------------------------------------------------ 00709 // 00710 // Inline Functions Definitions ... 00711 // 00712 //------------------------------------------------------------------------------ 00713 00714 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 00715 return !operator==(that); 00716 } 00717 00718 U_NAMESPACE_END 00719 00720 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 00721 00722 #endif