1244 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			1244 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			C++
		
	
	
	
| // © 2016 and later: Unicode, Inc. and others.
 | ||
| // License & terms of use: http://www.unicode.org/copyright.html
 | ||
| /*
 | ||
| *******************************************************************************
 | ||
| * Copyright (C) 2009-2014, International Business Machines Corporation and
 | ||
| * others. All Rights Reserved.
 | ||
| *******************************************************************************
 | ||
| */
 | ||
| 
 | ||
| #include "unicode/utypes.h"
 | ||
| 
 | ||
| #if !UCONFIG_NO_COLLATION
 | ||
| 
 | ||
| #include "unicode/alphaindex.h"
 | ||
| #include "unicode/coll.h"
 | ||
| #include "unicode/localpointer.h"
 | ||
| #include "unicode/normalizer2.h"
 | ||
| #include "unicode/tblcoll.h"
 | ||
| #include "unicode/uchar.h"
 | ||
| #include "unicode/ulocdata.h"
 | ||
| #include "unicode/uniset.h"
 | ||
| #include "unicode/uobject.h"
 | ||
| #include "unicode/usetiter.h"
 | ||
| #include "unicode/utf16.h"
 | ||
| 
 | ||
| #include "cmemory.h"
 | ||
| #include "cstring.h"
 | ||
| #include "uassert.h"
 | ||
| #include "uvector.h"
 | ||
| #include "uvectr64.h"
 | ||
| 
 | ||
| //#include <string>
 | ||
| //#include <iostream>
 | ||
| 
 | ||
| U_NAMESPACE_BEGIN
 | ||
| 
 | ||
| namespace {
 | ||
| 
 | ||
| /**
 | ||
|  * Prefix string for Chinese index buckets.
 | ||
|  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
 | ||
|  */
 | ||
| const UChar BASE[1] = { 0xFDD0 };
 | ||
| const int32_t BASE_LENGTH = 1;
 | ||
| 
 | ||
| UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
 | ||
|                                 const UnicodeString &one, const UnicodeString &other);
 | ||
| 
 | ||
| }  // namespace
 | ||
| 
 | ||
| static int32_t U_CALLCONV
 | ||
| collatorComparator(const void *context, const void *left, const void *right);
 | ||
| 
 | ||
| static int32_t U_CALLCONV
 | ||
| recordCompareFn(const void *context, const void *left, const void *right);
 | ||
| 
 | ||
| //  UVector<Record *> support function, delete a Record.
 | ||
| static void U_CALLCONV
 | ||
| alphaIndex_deleteRecord(void *obj) {
 | ||
|     delete static_cast<AlphabeticIndex::Record *>(obj);
 | ||
| }
 | ||
| 
 | ||
| namespace {
 | ||
| 
 | ||
| UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
 | ||
|                            UErrorCode &errorCode) {
 | ||
|     if (U_FAILURE(errorCode)) { return NULL; }
 | ||
|     if (owned.isValid()) {
 | ||
|         return owned.orphan();
 | ||
|     }
 | ||
|     UnicodeString *p = new UnicodeString(s);
 | ||
|     if (p == NULL) {
 | ||
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|     }
 | ||
|     return p;
 | ||
| }
 | ||
| 
 | ||
| inline UnicodeString *getString(const UVector &list, int32_t i) {
 | ||
|     return static_cast<UnicodeString *>(list[i]);
 | ||
| }
 | ||
| 
 | ||
| inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
 | ||
|     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
 | ||
| }
 | ||
| 
 | ||
| inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
 | ||
|     return static_cast<AlphabeticIndex::Record *>(list[i]);
 | ||
| }
 | ||
| 
 | ||
| /**
 | ||
|  * Like Java Collections.binarySearch(List, String, Comparator).
 | ||
|  *
 | ||
|  * @return the index>=0 where the item was found,
 | ||
|  *         or the index<0 for inserting the string at ~index in sorted order
 | ||
|  */
 | ||
| int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
 | ||
|     if (list.size() == 0) { return ~0; }
 | ||
|     int32_t start = 0;
 | ||
|     int32_t limit = list.size();
 | ||
|     for (;;) {
 | ||
|         int32_t i = (start + limit) / 2;
 | ||
|         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
 | ||
|         UErrorCode errorCode = U_ZERO_ERROR;
 | ||
|         UCollationResult cmp = coll.compare(s, *si, errorCode);
 | ||
|         if (cmp == UCOL_EQUAL) {
 | ||
|             return i;
 | ||
|         } else if (cmp < 0) {
 | ||
|             if (i == start) {
 | ||
|                 return ~start;  // insert s before *si
 | ||
|             }
 | ||
|             limit = i;
 | ||
|         } else {
 | ||
|             if (i == start) {
 | ||
|                 return ~(start + 1);  // insert s after *si
 | ||
|             }
 | ||
|             start = i;
 | ||
|         }
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| }  // namespace
 | ||
| 
 | ||
| // The BucketList is not in the anonymous namespace because only Clang
 | ||
| // seems to support its use in other classes from there.
 | ||
| // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
 | ||
| class BucketList : public UObject {
 | ||
| public:
 | ||
|     BucketList(UVector *bucketList, UVector *publicBucketList)
 | ||
|             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
 | ||
|         int32_t displayIndex = 0;
 | ||
|         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
 | ||
|             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
 | ||
|         }
 | ||
|     }
 | ||
| 
 | ||
|     // The virtual destructor must not be inline.
 | ||
|     // See ticket #8454 for details.
 | ||
|     virtual ~BucketList();
 | ||
| 
 | ||
|     int32_t getBucketCount() const {
 | ||
|         return immutableVisibleList_->size();
 | ||
|     }
 | ||
| 
 | ||
|     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
 | ||
|                            UErrorCode &errorCode) {
 | ||
|         // binary search
 | ||
|         int32_t start = 0;
 | ||
|         int32_t limit = bucketList_->size();
 | ||
|         while ((start + 1) < limit) {
 | ||
|             int32_t i = (start + limit) / 2;
 | ||
|             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
 | ||
|             UCollationResult nameVsBucket =
 | ||
|                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
 | ||
|             if (nameVsBucket < 0) {
 | ||
|                 limit = i;
 | ||
|             } else {
 | ||
|                 start = i;
 | ||
|             }
 | ||
|         }
 | ||
|         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
 | ||
|         if (bucket->displayBucket_ != NULL) {
 | ||
|             bucket = bucket->displayBucket_;
 | ||
|         }
 | ||
|         return bucket->displayIndex_;
 | ||
|     }
 | ||
| 
 | ||
|     /** All of the buckets, visible and invisible. */
 | ||
|     UVector *bucketList_;
 | ||
|     /** Just the visible buckets. */
 | ||
|     UVector *immutableVisibleList_;
 | ||
| };
 | ||
| 
 | ||
| BucketList::~BucketList() {
 | ||
|     delete bucketList_;
 | ||
|     if (immutableVisibleList_ != bucketList_) {
 | ||
|         delete immutableVisibleList_;
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
 | ||
|     delete buckets_;
 | ||
|     delete collatorPrimaryOnly_;
 | ||
| }
 | ||
| 
 | ||
| int32_t
 | ||
| AlphabeticIndex::ImmutableIndex::getBucketCount() const {
 | ||
|     return buckets_->getBucketCount();
 | ||
| }
 | ||
| 
 | ||
| int32_t
 | ||
| AlphabeticIndex::ImmutableIndex::getBucketIndex(
 | ||
|         const UnicodeString &name, UErrorCode &errorCode) const {
 | ||
|     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
 | ||
| }
 | ||
| 
 | ||
| const AlphabeticIndex::Bucket *
 | ||
| AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
 | ||
|     if (0 <= index && index < buckets_->getBucketCount()) {
 | ||
|         return icu::getBucket(*buckets_->immutableVisibleList_, index);
 | ||
|     } else {
 | ||
|         return NULL;
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
 | ||
|         : inputList_(NULL),
 | ||
|           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
 | ||
|           maxLabelCount_(99),
 | ||
|           initialLabels_(NULL), firstCharsInScripts_(NULL),
 | ||
|           collator_(NULL), collatorPrimaryOnly_(NULL),
 | ||
|           buckets_(NULL) {
 | ||
|     init(&locale, status);
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
 | ||
|         : inputList_(NULL),
 | ||
|           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
 | ||
|           maxLabelCount_(99),
 | ||
|           initialLabels_(NULL), firstCharsInScripts_(NULL),
 | ||
|           collator_(collator), collatorPrimaryOnly_(NULL),
 | ||
|           buckets_(NULL) {
 | ||
|     init(NULL, status);
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex::~AlphabeticIndex() {
 | ||
|     delete collator_;
 | ||
|     delete collatorPrimaryOnly_;
 | ||
|     delete firstCharsInScripts_;
 | ||
|     delete buckets_;
 | ||
|     delete inputList_;
 | ||
|     delete initialLabels_;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return *this;
 | ||
|     }
 | ||
|     initialLabels_->addAll(additions);
 | ||
|     clearBuckets();
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
 | ||
|     addIndexExemplars(locale, status);
 | ||
|     clearBuckets();
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
 | ||
|     if (U_FAILURE(errorCode)) { return NULL; }
 | ||
|     // In C++, the ImmutableIndex must own its copy of the BucketList,
 | ||
|     // even if it contains no records, for proper memory management.
 | ||
|     // We could clone the buckets_ if they are not NULL,
 | ||
|     // but that would be worth it only if this method is called multiple times,
 | ||
|     // or called after using the old-style bucket iterator API.
 | ||
|     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
 | ||
|     LocalPointer<RuleBasedCollator> coll(collatorPrimaryOnly_->clone());
 | ||
|     if (immutableBucketList.isNull() || coll.isNull()) {
 | ||
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
 | ||
|     if (immIndex == NULL) {
 | ||
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     // The ImmutableIndex adopted its parameter objects.
 | ||
|     immutableBucketList.orphan();
 | ||
|     coll.orphan();
 | ||
|     return immIndex;
 | ||
| }
 | ||
| 
 | ||
| int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
 | ||
|     initBuckets(status);
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return 0;
 | ||
|     }
 | ||
|     return buckets_->getBucketCount();
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
 | ||
|     if (U_FAILURE(status) || inputList_ == NULL) {
 | ||
|         return 0;
 | ||
|     }
 | ||
|     return inputList_->size();
 | ||
| }
 | ||
| 
 | ||
| void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
 | ||
|     U_ASSERT(indexCharacters.hasDeleter());
 | ||
|     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
 | ||
|     if (U_FAILURE(errorCode)) { return; }
 | ||
| 
 | ||
|     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
 | ||
|     const UnicodeString &overflowBoundary =
 | ||
|         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
 | ||
| 
 | ||
|     // We make a sorted array of elements.
 | ||
|     // Some of the input may be redundant.
 | ||
|     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
 | ||
|     // We filter out those cases.
 | ||
|     UnicodeSetIterator iter(*initialLabels_);
 | ||
|     while (U_SUCCESS(errorCode) && iter.next()) {
 | ||
|         const UnicodeString *item = &iter.getString();
 | ||
|         LocalPointer<UnicodeString> ownedItem;
 | ||
|         UBool checkDistinct;
 | ||
|         int32_t itemLength = item->length();
 | ||
|         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
 | ||
|             checkDistinct = FALSE;
 | ||
|         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
 | ||
|                 item->charAt(itemLength - 2) != 0x2a) {
 | ||
|             // Use a label if it is marked with one trailing star,
 | ||
|             // even if the label string sorts the same when all contractions are suppressed.
 | ||
|             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
 | ||
|             item = ownedItem.getAlias();
 | ||
|             if (item == NULL) {
 | ||
|                 errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|                 return;
 | ||
|             }
 | ||
|             checkDistinct = FALSE;
 | ||
|         } else {
 | ||
|             checkDistinct = TRUE;
 | ||
|         }
 | ||
|         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
 | ||
|             // Ignore a primary-ignorable or non-alphabetic index character.
 | ||
|         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
 | ||
|             // Ignore an index character that will land in the overflow bucket.
 | ||
|         } else if (checkDistinct &&
 | ||
|                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
 | ||
|             // Ignore a multi-code point index character that does not sort distinctly
 | ||
|             // from the sequence of its separate characters.
 | ||
|         } else {
 | ||
|             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
 | ||
|             if (insertionPoint < 0) {
 | ||
|                 indexCharacters.insertElementAt(
 | ||
|                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
 | ||
|             } else {
 | ||
|                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
 | ||
|                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
 | ||
|                     indexCharacters.setElementAt(
 | ||
|                         ownedString(*item, ownedItem, errorCode), insertionPoint);
 | ||
|                 }
 | ||
|             }
 | ||
|         }
 | ||
|     }
 | ||
|     if (U_FAILURE(errorCode)) { return; }
 | ||
| 
 | ||
|     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
 | ||
| 
 | ||
|     int32_t size = indexCharacters.size() - 1;
 | ||
|     if (size > maxLabelCount_) {
 | ||
|         int32_t count = 0;
 | ||
|         int32_t old = -1;
 | ||
|         for (int32_t i = 0; i < indexCharacters.size();) {
 | ||
|             ++count;
 | ||
|             int32_t bump = count * maxLabelCount_ / size;
 | ||
|             if (bump == old) {
 | ||
|                 indexCharacters.removeElementAt(i);
 | ||
|             } else {
 | ||
|                 old = bump;
 | ||
|                 ++i;
 | ||
|             }
 | ||
|         }
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| namespace {
 | ||
| 
 | ||
| const UnicodeString &fixLabel(const UnicodeString ¤t, UnicodeString &temp) {
 | ||
|     if (!current.startsWith(BASE, BASE_LENGTH)) {
 | ||
|         return current;
 | ||
|     }
 | ||
|     UChar rest = current.charAt(BASE_LENGTH);
 | ||
|     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
 | ||
|         int32_t count = rest-0x2800;
 | ||
|         temp.setTo((UChar)(0x30 + count % 10));
 | ||
|         if (count >= 10) {
 | ||
|             count /= 10;
 | ||
|             temp.insert(0, (UChar)(0x30 + count % 10));
 | ||
|             if (count >= 10) {
 | ||
|                 count /= 10;
 | ||
|                 temp.insert(0, (UChar)(0x30 + count));
 | ||
|             }
 | ||
|         }
 | ||
|         return temp.append((UChar)0x5283);
 | ||
|     }
 | ||
|     return temp.setTo(current, BASE_LENGTH);
 | ||
| }
 | ||
| 
 | ||
| UBool hasMultiplePrimaryWeights(
 | ||
|         const RuleBasedCollator &coll, uint32_t variableTop,
 | ||
|         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
 | ||
|     ces.removeAllElements();
 | ||
|     coll.internalGetCEs(s, ces, errorCode);
 | ||
|     if (U_FAILURE(errorCode)) { return FALSE; }
 | ||
|     UBool seenPrimary = FALSE;
 | ||
|     for (int32_t i = 0; i < ces.size(); ++i) {
 | ||
|         int64_t ce = ces.elementAti(i);
 | ||
|         uint32_t p = (uint32_t)(ce >> 32);
 | ||
|         if (p > variableTop) {
 | ||
|             // not primary ignorable
 | ||
|             if (seenPrimary) {
 | ||
|                 return TRUE;
 | ||
|             }
 | ||
|             seenPrimary = TRUE;
 | ||
|         }
 | ||
|     }
 | ||
|     return FALSE;
 | ||
| }
 | ||
| 
 | ||
| }  // namespace
 | ||
| 
 | ||
| BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
 | ||
|     // Initialize indexCharacters.
 | ||
|     UVector indexCharacters(errorCode);
 | ||
|     indexCharacters.setDeleter(uprv_deleteUObject);
 | ||
|     initLabels(indexCharacters, errorCode);
 | ||
|     if (U_FAILURE(errorCode)) { return NULL; }
 | ||
| 
 | ||
|     // Variables for hasMultiplePrimaryWeights().
 | ||
|     UVector64 ces(errorCode);
 | ||
|     uint32_t variableTop;
 | ||
|     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
 | ||
|         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
 | ||
|     } else {
 | ||
|         variableTop = 0;
 | ||
|     }
 | ||
|     UBool hasInvisibleBuckets = FALSE;
 | ||
| 
 | ||
|     // Helper arrays for Chinese Pinyin collation.
 | ||
|     Bucket *asciiBuckets[26] = {
 | ||
|         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 | ||
|         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
 | ||
|     };
 | ||
|     Bucket *pinyinBuckets[26] = {
 | ||
|         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 | ||
|         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
 | ||
|     };
 | ||
|     UBool hasPinyin = FALSE;
 | ||
| 
 | ||
|     LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
 | ||
|     if (U_FAILURE(errorCode)) {
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     bucketList->setDeleter(uprv_deleteUObject);
 | ||
| 
 | ||
|     // underflow bucket
 | ||
|     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
 | ||
|     if (bucket == NULL) {
 | ||
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     bucketList->addElementX(bucket, errorCode);
 | ||
|     if (U_FAILURE(errorCode)) { return NULL; }
 | ||
| 
 | ||
|     UnicodeString temp;
 | ||
| 
 | ||
|     // fix up the list, adding underflow, additions, overflow
 | ||
|     // Insert inflow labels as needed.
 | ||
|     int32_t scriptIndex = -1;
 | ||
|     const UnicodeString *scriptUpperBoundary = &emptyString_;
 | ||
|     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
 | ||
|         UnicodeString ¤t = *getString(indexCharacters, i);
 | ||
|         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
 | ||
|             // We crossed the script boundary into a new script.
 | ||
|             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
 | ||
|             UBool skippedScript = FALSE;
 | ||
|             for (;;) {
 | ||
|                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
 | ||
|                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
 | ||
|                     break;
 | ||
|                 }
 | ||
|                 skippedScript = TRUE;
 | ||
|             }
 | ||
|             if (skippedScript && bucketList->size() > 1) {
 | ||
|                 // We are skipping one or more scripts,
 | ||
|                 // and we are not just getting out of the underflow label.
 | ||
|                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
 | ||
|                 if (bucket == NULL) {
 | ||
|                     errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|                     return NULL;
 | ||
|                 }
 | ||
|                 bucketList->addElementX(bucket, errorCode);
 | ||
|             }
 | ||
|         }
 | ||
|         // Add a bucket with the current label.
 | ||
|         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
 | ||
|         if (bucket == NULL) {
 | ||
|             errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|             return NULL;
 | ||
|         }
 | ||
|         bucketList->addElementX(bucket, errorCode);
 | ||
|         // Remember ASCII and Pinyin buckets for Pinyin redirects.
 | ||
|         UChar c;
 | ||
|         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
 | ||
|             asciiBuckets[c - 0x41] = bucket;
 | ||
|         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
 | ||
|                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
 | ||
|             pinyinBuckets[c - 0x41] = bucket;
 | ||
|             hasPinyin = TRUE;
 | ||
|         }
 | ||
|         // Check for multiple primary weights.
 | ||
|         if (!current.startsWith(BASE, BASE_LENGTH) &&
 | ||
|                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
 | ||
|                                           ces, errorCode) &&
 | ||
|                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
 | ||
|             // "AE-ligature" or "Sch" etc.
 | ||
|             for (int32_t j = bucketList->size() - 2;; --j) {
 | ||
|                 Bucket *singleBucket = getBucket(*bucketList, j);
 | ||
|                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
 | ||
|                     // There is no single-character bucket since the last
 | ||
|                     // underflow or inflow label.
 | ||
|                     break;
 | ||
|                 }
 | ||
|                 if (singleBucket->displayBucket_ == NULL &&
 | ||
|                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
 | ||
|                                                    singleBucket->lowerBoundary_,
 | ||
|                                                    ces, errorCode)) {
 | ||
|                     // Add an invisible bucket that redirects strings greater than the expansion
 | ||
|                     // to the previous single-character bucket.
 | ||
|                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
 | ||
|                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
 | ||
|                     bucket = new Bucket(emptyString_,
 | ||
|                         UnicodeString(current).append((UChar)0xFFFF),
 | ||
|                         U_ALPHAINDEX_NORMAL);
 | ||
|                     if (bucket == NULL) {
 | ||
|                         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|                         return NULL;
 | ||
|                     }
 | ||
|                     bucket->displayBucket_ = singleBucket;
 | ||
|                     bucketList->addElementX(bucket, errorCode);
 | ||
|                     hasInvisibleBuckets = TRUE;
 | ||
|                     break;
 | ||
|                 }
 | ||
|             }
 | ||
|         }
 | ||
|     }
 | ||
|     if (U_FAILURE(errorCode)) { return NULL; }
 | ||
|     if (bucketList->size() == 1) {
 | ||
|         // No real labels, show only the underflow label.
 | ||
|         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
 | ||
|         if (bl == NULL) {
 | ||
|             errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|             return NULL;
 | ||
|         }
 | ||
|         bucketList.orphan();
 | ||
|         return bl;
 | ||
|     }
 | ||
|     // overflow bucket
 | ||
|     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
 | ||
|     if (bucket == NULL) {
 | ||
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     bucketList->addElementX(bucket, errorCode); // final
 | ||
| 
 | ||
|     if (hasPinyin) {
 | ||
|         // Redirect Pinyin buckets.
 | ||
|         Bucket *asciiBucket = NULL;
 | ||
|         for (int32_t i = 0; i < 26; ++i) {
 | ||
|             if (asciiBuckets[i] != NULL) {
 | ||
|                 asciiBucket = asciiBuckets[i];
 | ||
|             }
 | ||
|             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
 | ||
|                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
 | ||
|                 hasInvisibleBuckets = TRUE;
 | ||
|             }
 | ||
|         }
 | ||
|     }
 | ||
| 
 | ||
|     if (U_FAILURE(errorCode)) { return NULL; }
 | ||
|     if (!hasInvisibleBuckets) {
 | ||
|         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
 | ||
|         if (bl == NULL) {
 | ||
|             errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|             return NULL;
 | ||
|         }
 | ||
|         bucketList.orphan();
 | ||
|         return bl;
 | ||
|     }
 | ||
|     // Merge inflow buckets that are visually adjacent.
 | ||
|     // Iterate backwards: Merge inflow into overflow rather than the other way around.
 | ||
|     int32_t i = bucketList->size() - 1;
 | ||
|     Bucket *nextBucket = getBucket(*bucketList, i);
 | ||
|     while (--i > 0) {
 | ||
|         bucket = getBucket(*bucketList, i);
 | ||
|         if (bucket->displayBucket_ != NULL) {
 | ||
|             continue;  // skip invisible buckets
 | ||
|         }
 | ||
|         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
 | ||
|             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
 | ||
|                 bucket->displayBucket_ = nextBucket;
 | ||
|                 continue;
 | ||
|             }
 | ||
|         }
 | ||
|         nextBucket = bucket;
 | ||
|     }
 | ||
| 
 | ||
|     LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
 | ||
|     if (U_FAILURE(errorCode)) {
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     // Do not call publicBucketList->setDeleter():
 | ||
|     // This vector shares its objects with the bucketList.
 | ||
|     for (int32_t j = 0; j < bucketList->size(); ++j) {
 | ||
|         bucket = getBucket(*bucketList, j);
 | ||
|         if (bucket->displayBucket_ == NULL) {
 | ||
|             publicBucketList->addElementX(bucket, errorCode);
 | ||
|         }
 | ||
|     }
 | ||
|     if (U_FAILURE(errorCode)) { return NULL; }
 | ||
|     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
 | ||
|     if (bl == NULL) {
 | ||
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     bucketList.orphan();
 | ||
|     publicBucketList.orphan();
 | ||
|     return bl;
 | ||
| }
 | ||
| 
 | ||
| /**
 | ||
|  * Creates an index, and buckets and sorts the list of records into the index.
 | ||
|  */
 | ||
| void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
 | ||
|     if (U_FAILURE(errorCode) || buckets_ != NULL) {
 | ||
|         return;
 | ||
|     }
 | ||
|     buckets_ = createBucketList(errorCode);
 | ||
|     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
 | ||
|         return;
 | ||
|     }
 | ||
| 
 | ||
|     // Sort the records by name.
 | ||
|     // Stable sort preserves input order of collation duplicates.
 | ||
|     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
 | ||
| 
 | ||
|     // Now, we traverse all of the input, which is now sorted.
 | ||
|     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
 | ||
|     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
 | ||
|     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
 | ||
|     // we need to improve it for that case.
 | ||
| 
 | ||
|     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
 | ||
|     int32_t bucketIndex = 1;
 | ||
|     Bucket *nextBucket;
 | ||
|     const UnicodeString *upperBoundary;
 | ||
|     if (bucketIndex < buckets_->bucketList_->size()) {
 | ||
|         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
 | ||
|         upperBoundary = &nextBucket->lowerBoundary_;
 | ||
|     } else {
 | ||
|         nextBucket = NULL;
 | ||
|         upperBoundary = NULL;
 | ||
|     }
 | ||
|     for (int32_t i = 0; i < inputList_->size(); ++i) {
 | ||
|         Record *r = getRecord(*inputList_, i);
 | ||
|         // if the current bucket isn't the right one, find the one that is
 | ||
|         // We have a special flag for the last bucket so that we don't look any further
 | ||
|         while (upperBoundary != NULL &&
 | ||
|                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
 | ||
|             currentBucket = nextBucket;
 | ||
|             // now reset the boundary that we compare against
 | ||
|             if (bucketIndex < buckets_->bucketList_->size()) {
 | ||
|                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
 | ||
|                 upperBoundary = &nextBucket->lowerBoundary_;
 | ||
|             } else {
 | ||
|                 upperBoundary = NULL;
 | ||
|             }
 | ||
|         }
 | ||
|         // now put the record into the bucket.
 | ||
|         Bucket *bucket = currentBucket;
 | ||
|         if (bucket->displayBucket_ != NULL) {
 | ||
|             bucket = bucket->displayBucket_;
 | ||
|         }
 | ||
|         if (bucket->records_ == NULL) {
 | ||
|             bucket->records_ = new UVector(errorCode);
 | ||
|             if (bucket->records_ == NULL) {
 | ||
|                 errorCode = U_MEMORY_ALLOCATION_ERROR;
 | ||
|                 return;
 | ||
|             }
 | ||
|         }
 | ||
|         bucket->records_->addElementX(r, errorCode);
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| void AlphabeticIndex::clearBuckets() {
 | ||
|     if (buckets_ != NULL) {
 | ||
|         delete buckets_;
 | ||
|         buckets_ = NULL;
 | ||
|         internalResetBucketIterator();
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| void AlphabeticIndex::internalResetBucketIterator() {
 | ||
|     labelsIterIndex_ = -1;
 | ||
|     currentBucket_ = NULL;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
 | ||
|     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return;
 | ||
|     }
 | ||
| 
 | ||
|     UnicodeSet exemplars;
 | ||
|     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
 | ||
|     if (U_SUCCESS(status)) {
 | ||
|         initialLabels_->addAll(exemplars);
 | ||
|         return;
 | ||
|     }
 | ||
|     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
 | ||
| 
 | ||
|     // The locale data did not include explicit Index characters.
 | ||
|     // Synthesize a set of them from the locale's standard exemplar characters.
 | ||
|     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return;
 | ||
|     }
 | ||
| 
 | ||
|     // question: should we add auxiliary exemplars?
 | ||
|     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) {
 | ||
|         exemplars.add(0x61, 0x7A);
 | ||
|     }
 | ||
|     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
 | ||
|         // cut down to small list
 | ||
|         exemplars.remove(0xAC00, 0xD7A3).
 | ||
|             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
 | ||
|             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
 | ||
|             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
 | ||
|             add(0xD30C).add(0xD558);
 | ||
|     }
 | ||
|     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
 | ||
|         // cut down to small list
 | ||
|         // make use of the fact that Ethiopic is allocated in 8's, where
 | ||
|         // the base is 0 mod 8.
 | ||
|         UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status);
 | ||
|         ethiopic.retainAll(exemplars);
 | ||
|         exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic);
 | ||
|     }
 | ||
| 
 | ||
|     // Upper-case any that aren't already so.
 | ||
|     //   (We only do this for synthesized index characters.)
 | ||
|     UnicodeSetIterator it(exemplars);
 | ||
|     UnicodeString upperC;
 | ||
|     while (it.next()) {
 | ||
|         const UnicodeString &exemplarC = it.getString();
 | ||
|         upperC = exemplarC;
 | ||
|         upperC.toUpper(locale);
 | ||
|         initialLabels_->add(upperC);
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
 | ||
|     UnicodeSet contractions;
 | ||
|     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
 | ||
|     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
 | ||
|     initialLabels_->addAll(contractions);
 | ||
|     UnicodeSetIterator iter(contractions);
 | ||
|     while (iter.next()) {
 | ||
|         const UnicodeString &s = iter.getString();
 | ||
|         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
 | ||
|         UChar c = s.charAt(s.length() - 1);
 | ||
|         if (0x41 <= c && c <= 0x5A) {  // A-Z
 | ||
|             // There are Pinyin labels, add ASCII A-Z labels as well.
 | ||
|             initialLabels_->add(0x41, 0x5A);  // A-Z
 | ||
|             break;
 | ||
|         }
 | ||
|     }
 | ||
|     return TRUE;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| /*
 | ||
|  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
 | ||
|  */
 | ||
| static const UChar CGJ = 0x034F;
 | ||
| UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
 | ||
|     UnicodeString result;
 | ||
|     if (item.length() == 0) {
 | ||
|         return result;
 | ||
|     }
 | ||
|     int32_t i = 0;
 | ||
|     for (;;) {
 | ||
|         UChar32  cp = item.char32At(i);
 | ||
|         result.append(cp);
 | ||
|         i = item.moveIndex32(i, 1);
 | ||
|         if (i >= item.length()) {
 | ||
|             break;
 | ||
|         }
 | ||
|         result.append(CGJ);
 | ||
|     }
 | ||
|     return result;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| bool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
 | ||
|     return false;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| bool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
 | ||
|     return false;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| const RuleBasedCollator &AlphabeticIndex::getCollator() const {
 | ||
|     return *collator_;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| const UnicodeString &AlphabeticIndex::getInflowLabel() const {
 | ||
|     return inflowLabel_;
 | ||
| }
 | ||
| 
 | ||
| const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
 | ||
|     return overflowLabel_;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
 | ||
|     return underflowLabel_;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 | ||
|     inflowLabel_ = label;
 | ||
|     clearBuckets();
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 | ||
|     overflowLabel_ = label;
 | ||
|     clearBuckets();
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 | ||
|     underflowLabel_ = label;
 | ||
|     clearBuckets();
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| int32_t AlphabeticIndex::getMaxLabelCount() const {
 | ||
|     return maxLabelCount_;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return *this;
 | ||
|     }
 | ||
|     if (maxLabelCount <= 0) {
 | ||
|         status = U_ILLEGAL_ARGUMENT_ERROR;
 | ||
|         return *this;
 | ||
|     }
 | ||
|     maxLabelCount_ = maxLabelCount;
 | ||
|     clearBuckets();
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| //
 | ||
| //  init() - Common code for constructors.
 | ||
| //
 | ||
| 
 | ||
| void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
 | ||
|     if (U_FAILURE(status)) { return; }
 | ||
|     if (locale == NULL && collator_ == NULL) {
 | ||
|         status = U_ILLEGAL_ARGUMENT_ERROR;
 | ||
|         return;
 | ||
|     }
 | ||
| 
 | ||
|     initialLabels_         = new UnicodeSet();
 | ||
|     if (initialLabels_ == NULL) {
 | ||
|         status = U_MEMORY_ALLOCATION_ERROR;
 | ||
|         return;
 | ||
|     }
 | ||
| 
 | ||
|     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
 | ||
|     overflowLabel_ = inflowLabel_;
 | ||
|     underflowLabel_ = inflowLabel_;
 | ||
| 
 | ||
|     if (collator_ == NULL) {
 | ||
|         Collator *coll = Collator::createInstance(*locale, status);
 | ||
|         if (U_FAILURE(status)) {
 | ||
|             delete coll;
 | ||
|             return;
 | ||
|         }
 | ||
|         if (coll == NULL) {
 | ||
|             status = U_MEMORY_ALLOCATION_ERROR;
 | ||
|             return;
 | ||
|         }
 | ||
|         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
 | ||
|         if (collator_ == NULL) {
 | ||
|             delete coll;
 | ||
|             status = U_UNSUPPORTED_ERROR;
 | ||
|             return;
 | ||
|         }
 | ||
|     }
 | ||
|     collatorPrimaryOnly_ = collator_->clone();
 | ||
|     if (collatorPrimaryOnly_ == NULL) {
 | ||
|         status = U_MEMORY_ALLOCATION_ERROR;
 | ||
|         return;
 | ||
|     }
 | ||
|     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
 | ||
|     firstCharsInScripts_ = firstStringsInScript(status);
 | ||
|     if (U_FAILURE(status)) { return; }
 | ||
|     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
 | ||
|     // Guard against a degenerate collator where
 | ||
|     // some script boundary strings are primary ignorable.
 | ||
|     for (;;) {
 | ||
|         if (U_FAILURE(status)) { return; }
 | ||
|         if (firstCharsInScripts_->isEmpty()) {
 | ||
|             // AlphabeticIndex requires some non-ignorable script boundary strings.
 | ||
|             status = U_ILLEGAL_ARGUMENT_ERROR;
 | ||
|             return;
 | ||
|         }
 | ||
|         if (collatorPrimaryOnly_->compare(
 | ||
|                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
 | ||
|                 emptyString_, status) == UCOL_EQUAL) {
 | ||
|             firstCharsInScripts_->removeElementAt(0);
 | ||
|         } else {
 | ||
|             break;
 | ||
|         }
 | ||
|     }
 | ||
| 
 | ||
|     // Chinese index characters, which are specific to each of the several Chinese tailorings,
 | ||
|     // take precedence over the single locale data exemplar set per language.
 | ||
|     if (!addChineseIndexCharacters(status) && locale != NULL) {
 | ||
|         addIndexExemplars(*locale, status);
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| //
 | ||
| //  Comparison function for UVector<UnicodeString *> sorting with a collator.
 | ||
| //
 | ||
| static int32_t U_CALLCONV
 | ||
| collatorComparator(const void *context, const void *left, const void *right) {
 | ||
|     const UElement *leftElement = static_cast<const UElement *>(left);
 | ||
|     const UElement *rightElement = static_cast<const UElement *>(right);
 | ||
|     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
 | ||
|     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
 | ||
| 
 | ||
|     if (leftString == rightString) {
 | ||
|         // Catches case where both are NULL
 | ||
|         return 0;
 | ||
|     }
 | ||
|     if (leftString == NULL) {
 | ||
|         return 1;
 | ||
|     }
 | ||
|     if (rightString == NULL) {
 | ||
|         return -1;
 | ||
|     }
 | ||
|     const Collator *col = static_cast<const Collator *>(context);
 | ||
|     UErrorCode errorCode = U_ZERO_ERROR;
 | ||
|     return col->compare(*leftString, *rightString, errorCode);
 | ||
| }
 | ||
| 
 | ||
| //
 | ||
| //  Comparison function for UVector<Record *> sorting with a collator.
 | ||
| //
 | ||
| static int32_t U_CALLCONV
 | ||
| recordCompareFn(const void *context, const void *left, const void *right) {
 | ||
|     const UElement *leftElement = static_cast<const UElement *>(left);
 | ||
|     const UElement *rightElement = static_cast<const UElement *>(right);
 | ||
|     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
 | ||
|     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
 | ||
|     const Collator *col = static_cast<const Collator *>(context);
 | ||
|     UErrorCode errorCode = U_ZERO_ERROR;
 | ||
|     return col->compare(leftRec->name_, rightRec->name_, errorCode);
 | ||
| }
 | ||
| 
 | ||
| UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     LocalPointer<UVector> dest(new UVector(status), status);
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     dest->setDeleter(uprv_deleteUObject);
 | ||
|     // Fetch the script-first-primary contractions which are defined in the root collator.
 | ||
|     // They all start with U+FDD1.
 | ||
|     UnicodeSet set;
 | ||
|     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     if (set.isEmpty()) {
 | ||
|         status = U_UNSUPPORTED_ERROR;
 | ||
|         return NULL;
 | ||
|     }
 | ||
|     UnicodeSetIterator iter(set);
 | ||
|     while (iter.next()) {
 | ||
|         const UnicodeString &boundary = iter.getString();
 | ||
|         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
 | ||
|         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
 | ||
|             // Ignore boundaries for the special reordering groups.
 | ||
|             // Take only those for "real scripts" (where the sample character is a Letter,
 | ||
|             // and the one for unassigned implicit weights (Cn).
 | ||
|             continue;
 | ||
|         }
 | ||
|         UnicodeString *s = new UnicodeString(boundary);
 | ||
|         if (s == NULL) {
 | ||
|             status = U_MEMORY_ALLOCATION_ERROR;
 | ||
|             return NULL;
 | ||
|         }
 | ||
|         dest->addElementX(s, status);
 | ||
|     }
 | ||
|     return dest.orphan();
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| namespace {
 | ||
| 
 | ||
| /**
 | ||
|  * Returns true if one index character string is "better" than the other.
 | ||
|  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
 | ||
|  * better, and otherwise binary-less-than is better.
 | ||
|  */
 | ||
| UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
 | ||
|                                 const UnicodeString &one, const UnicodeString &other) {
 | ||
|     // This is called with primary-equal strings, but never with one.equals(other).
 | ||
|     UErrorCode status = U_ZERO_ERROR;
 | ||
|     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
 | ||
|     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
 | ||
|     if (U_FAILURE(status)) { return FALSE; }
 | ||
|     int32_t result = n1.countChar32() - n2.countChar32();
 | ||
|     if (result != 0) {
 | ||
|         return result < 0;
 | ||
|     }
 | ||
|     result = n1.compareCodePointOrder(n2);
 | ||
|     if (result != 0) {
 | ||
|         return result < 0;
 | ||
|     }
 | ||
|     return one.compareCodePointOrder(other) < 0;
 | ||
| }
 | ||
| 
 | ||
| }  // namespace
 | ||
| 
 | ||
| //
 | ||
| //  Constructor & Destructor for AlphabeticIndex::Record
 | ||
| //
 | ||
| //     Records are internal only, instances are not directly surfaced in the public API.
 | ||
| //     This class is mostly struct-like, with all public fields.
 | ||
| 
 | ||
| AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
 | ||
|         : name_(name), data_(data) {}
 | ||
| 
 | ||
| AlphabeticIndex::Record::~Record() {
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return *this;
 | ||
|     }
 | ||
|     if (inputList_ == NULL) {
 | ||
|         inputList_ = new UVector(status);
 | ||
|         if (inputList_ == NULL) {
 | ||
|             status = U_MEMORY_ALLOCATION_ERROR;
 | ||
|             return *this;
 | ||
|         }
 | ||
|         inputList_->setDeleter(alphaIndex_deleteRecord);
 | ||
|     }
 | ||
|     Record *r = new Record(name, data);
 | ||
|     if (r == NULL) {
 | ||
|         status = U_MEMORY_ALLOCATION_ERROR;
 | ||
|         return *this;
 | ||
|     }
 | ||
|     inputList_->addElementX(r, status);
 | ||
|     clearBuckets();
 | ||
|     //std::string ss;
 | ||
|     //std::string ss2;
 | ||
|     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << 
 | ||
|     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
 | ||
|     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
 | ||
|         inputList_->removeAllElements();
 | ||
|         clearBuckets();
 | ||
|     }
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
 | ||
|     initBuckets(status);
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return 0;
 | ||
|     }
 | ||
|     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| int32_t AlphabeticIndex::getBucketIndex() const {
 | ||
|     return labelsIterIndex_;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     if (buckets_ == NULL && currentBucket_ != NULL) {
 | ||
|         status = U_ENUM_OUT_OF_SYNC_ERROR;
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     initBuckets(status);
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     ++labelsIterIndex_;
 | ||
|     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
 | ||
|         labelsIterIndex_ = buckets_->getBucketCount();
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
 | ||
|     resetRecordIterator();
 | ||
|     return TRUE;
 | ||
| }
 | ||
| 
 | ||
| const UnicodeString &AlphabeticIndex::getBucketLabel() const {
 | ||
|     if (currentBucket_ != NULL) {
 | ||
|         return currentBucket_->label_;
 | ||
|     } else {
 | ||
|         return emptyString_;
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
 | ||
|     if (currentBucket_ != NULL) {
 | ||
|         return currentBucket_->labelType_;
 | ||
|     } else {
 | ||
|         return U_ALPHAINDEX_NORMAL;
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| int32_t AlphabeticIndex::getBucketRecordCount() const {
 | ||
|     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
 | ||
|         return currentBucket_->records_->size();
 | ||
|     } else {
 | ||
|         return 0;
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return *this;
 | ||
|     }
 | ||
|     internalResetBucketIterator();
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
 | ||
|     if (U_FAILURE(status)) {
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     if (currentBucket_ == NULL) {
 | ||
|         // We are trying to iterate over the items in a bucket, but there is no
 | ||
|         // current bucket from the enumeration of buckets.
 | ||
|         status = U_INVALID_STATE_ERROR;
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     if (buckets_ == NULL) {
 | ||
|         status = U_ENUM_OUT_OF_SYNC_ERROR;
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     if (currentBucket_->records_ == NULL) {
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     ++itemsIterIndex_;
 | ||
|     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
 | ||
|         itemsIterIndex_  = currentBucket_->records_->size();
 | ||
|         return FALSE;
 | ||
|     }
 | ||
|     return TRUE;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| const UnicodeString &AlphabeticIndex::getRecordName() const {
 | ||
|     const UnicodeString *retStr = &emptyString_;
 | ||
|     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
 | ||
|         itemsIterIndex_ >= 0 &&
 | ||
|         itemsIterIndex_ < currentBucket_->records_->size()) {
 | ||
|             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
 | ||
|             retStr = &item->name_;
 | ||
|     }
 | ||
|     return *retStr;
 | ||
| }
 | ||
| 
 | ||
| const void *AlphabeticIndex::getRecordData() const {
 | ||
|     const void *retPtr = NULL;
 | ||
|     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
 | ||
|         itemsIterIndex_ >= 0 &&
 | ||
|         itemsIterIndex_ < currentBucket_->records_->size()) {
 | ||
|             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
 | ||
|             retPtr = item->data_;
 | ||
|     }
 | ||
|     return retPtr;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
 | ||
|     itemsIterIndex_ = -1;
 | ||
|     return *this;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
 | ||
|                                 const UnicodeString &lowerBoundary,
 | ||
|                                 UAlphabeticIndexLabelType type)
 | ||
|         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
 | ||
|           displayBucket_(NULL), displayIndex_(-1),
 | ||
|           records_(NULL) {
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| AlphabeticIndex::Bucket::~Bucket() {
 | ||
|     delete records_;
 | ||
| }
 | ||
| 
 | ||
| U_NAMESPACE_END
 | ||
| 
 | ||
| #endif  // !UCONFIG_NO_COLLATION
 |