00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef __STRINGTRIEBUILDER_H__
00016 #define __STRINGTRIEBUILDER_H__
00017
00018 #include "unicode/utypes.h"
00019 #include "unicode/uobject.h"
00020
00026
00027 struct UHashtable;
00028 typedef struct UHashtable UHashtable;
00029
00034 enum UStringTrieBuildOption {
00039 USTRINGTRIE_BUILD_FAST,
00050 USTRINGTRIE_BUILD_SMALL
00051 };
00052
00053 U_NAMESPACE_BEGIN
00054
00061 class U_COMMON_API StringTrieBuilder : public UObject {
00062 public:
00063 #ifndef U_HIDE_INTERNAL_API
00064
00065 static UBool hashNode(const void *node);
00067 static UBool equalNodes(const void *left, const void *right);
00068 #endif
00069
00070 protected:
00071
00072
00074 StringTrieBuilder();
00076 virtual ~StringTrieBuilder();
00077
00078 #ifndef U_HIDE_INTERNAL_API
00079
00080 void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
00082 void deleteCompactBuilder();
00083
00085 void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
00086
00088 int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
00090 int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
00091 #endif
00092
00093 class Node;
00094
00095 #ifndef U_HIDE_INTERNAL_API
00096
00097 Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
00099 Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
00100 int32_t length, UErrorCode &errorCode);
00101 #endif
00102
00104 virtual int32_t getElementStringLength(int32_t i) const = 0;
00106 virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0;
00108 virtual int32_t getElementValue(int32_t i) const = 0;
00109
00110
00111
00113 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
00114
00115
00117 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
00119 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
00121 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0;
00122
00124 virtual UBool matchNodesCanHaveValues() const = 0;
00125
00127 virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
00129 virtual int32_t getMinLinearMatch() const = 0;
00131 virtual int32_t getMaxLinearMatchLength() const = 0;
00132
00133 #ifndef U_HIDE_INTERNAL_API
00134
00136 static const int32_t kMaxBranchLinearSubNodeLength=5;
00137
00138
00139
00141 static const int32_t kMaxSplitBranchLevels=14;
00142
00153 Node *registerNode(Node *newNode, UErrorCode &errorCode);
00164 Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00184 UHashtable *nodes;
00185
00187 class Node : public UObject {
00188 public:
00189 Node(int32_t initialHash) : hash(initialHash), offset(0) {}
00190 inline int32_t hashCode() const { return hash; }
00191
00192 static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
00193
00194 virtual UBool operator==(const Node &other) const;
00195 inline UBool operator!=(const Node &other) const { return !operator==(other); }
00223 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00224
00225 virtual void write(StringTrieBuilder &builder) = 0;
00226
00227 inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
00228 StringTrieBuilder &builder) {
00229
00230
00231
00232
00233
00234 if(offset<0 && (offset<lastRight || firstRight<offset)) {
00235 write(builder);
00236 }
00237 }
00238 inline int32_t getOffset() const { return offset; }
00239 protected:
00240 int32_t hash;
00241 int32_t offset;
00242 private:
00243
00244 virtual UClassID getDynamicClassID() const;
00245 };
00246
00247
00248
00249
00250
00251
00252
00254 class FinalValueNode : public Node {
00255 public:
00256 FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
00257 virtual UBool operator==(const Node &other) const;
00258 virtual void write(StringTrieBuilder &builder);
00259 protected:
00260 int32_t value;
00261 };
00262
00266 class ValueNode : public Node {
00267 public:
00268 ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
00269 virtual UBool operator==(const Node &other) const;
00270 void setValue(int32_t v) {
00271 hasValue=TRUE;
00272 value=v;
00273 hash=hash*37+v;
00274 }
00275 protected:
00276 UBool hasValue;
00277 int32_t value;
00278 };
00279
00283 class IntermediateValueNode : public ValueNode {
00284 public:
00285 IntermediateValueNode(int32_t v, Node *nextNode)
00286 : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
00287 virtual UBool operator==(const Node &other) const;
00288 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00289 virtual void write(StringTrieBuilder &builder);
00290 protected:
00291 Node *next;
00292 };
00293
00297 class LinearMatchNode : public ValueNode {
00298 public:
00299 LinearMatchNode(int32_t len, Node *nextNode)
00300 : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
00301 length(len), next(nextNode) {}
00302 virtual UBool operator==(const Node &other) const;
00303 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00304 protected:
00305 int32_t length;
00306 Node *next;
00307 };
00308
00312 class BranchNode : public Node {
00313 public:
00314 BranchNode(int32_t initialHash) : Node(initialHash) {}
00315 protected:
00316 int32_t firstEdgeNumber;
00317 };
00318
00322 class ListBranchNode : public BranchNode {
00323 public:
00324 ListBranchNode() : BranchNode(0x444444), length(0) {}
00325 virtual UBool operator==(const Node &other) const;
00326 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00327 virtual void write(StringTrieBuilder &builder);
00328
00329 void add(int32_t c, int32_t value) {
00330 units[length]=(UChar)c;
00331 equal[length]=NULL;
00332 values[length]=value;
00333 ++length;
00334 hash=(hash*37+c)*37+value;
00335 }
00336
00337 void add(int32_t c, Node *node) {
00338 units[length]=(UChar)c;
00339 equal[length]=node;
00340 values[length]=0;
00341 ++length;
00342 hash=(hash*37+c)*37+hashCode(node);
00343 }
00344 protected:
00345 Node *equal[kMaxBranchLinearSubNodeLength];
00346 int32_t length;
00347 int32_t values[kMaxBranchLinearSubNodeLength];
00348 UChar units[kMaxBranchLinearSubNodeLength];
00349 };
00350
00354 class SplitBranchNode : public BranchNode {
00355 public:
00356 SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
00357 : BranchNode(((0x555555*37+middleUnit)*37+
00358 hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
00359 unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
00360 virtual UBool operator==(const Node &other) const;
00361 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00362 virtual void write(StringTrieBuilder &builder);
00363 protected:
00364 UChar unit;
00365 Node *lessThan;
00366 Node *greaterOrEqual;
00367 };
00368
00369
00371 class BranchHeadNode : public ValueNode {
00372 public:
00373 BranchHeadNode(int32_t len, Node *subNode)
00374 : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
00375 length(len), next(subNode) {}
00376 virtual UBool operator==(const Node &other) const;
00377 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00378 virtual void write(StringTrieBuilder &builder);
00379 protected:
00380 int32_t length;
00381 Node *next;
00382 };
00383 #endif
00384
00386 virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
00387 Node *nextNode) const = 0;
00388
00390 virtual int32_t write(int32_t unit) = 0;
00392 virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
00394 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
00396 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
00398 virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
00399
00400 private:
00401
00402 virtual UClassID getDynamicClassID() const;
00403 };
00404
00405 U_NAMESPACE_END
00406
00407 #endif // __STRINGTRIEBUILDER_H__