1656 lines
		
	
	
		
			60 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			1656 lines
		
	
	
		
			60 KiB
		
	
	
	
		
			C++
		
	
	
	
| // © 2016 and later: Unicode, Inc. and others.
 | |
| // License & terms of use: http://www.unicode.org/copyright.html
 | |
| /*
 | |
| *******************************************************************************
 | |
| * Copyright (C) 1996-2015, International Business Machines
 | |
| * Corporation and others.  All Rights Reserved.
 | |
| *******************************************************************************
 | |
| * rulebasedcollator.cpp
 | |
| *
 | |
| * (replaced the former tblcoll.cpp)
 | |
| *
 | |
| * created on: 2012feb14 with new and old collation code
 | |
| * created by: Markus W. Scherer
 | |
| */
 | |
| 
 | |
| #include "unicode/utypes.h"
 | |
| 
 | |
| #if !UCONFIG_NO_COLLATION
 | |
| 
 | |
| #include "unicode/coll.h"
 | |
| #include "unicode/coleitr.h"
 | |
| #include "unicode/localpointer.h"
 | |
| #include "unicode/locid.h"
 | |
| #include "unicode/sortkey.h"
 | |
| #include "unicode/tblcoll.h"
 | |
| #include "unicode/ucol.h"
 | |
| #include "unicode/uiter.h"
 | |
| #include "unicode/uloc.h"
 | |
| #include "unicode/uniset.h"
 | |
| #include "unicode/unistr.h"
 | |
| #include "unicode/usetiter.h"
 | |
| #include "unicode/utf8.h"
 | |
| #include "unicode/uversion.h"
 | |
| #include "bocsu.h"
 | |
| #include "charstr.h"
 | |
| #include "cmemory.h"
 | |
| #include "collation.h"
 | |
| #include "collationcompare.h"
 | |
| #include "collationdata.h"
 | |
| #include "collationdatareader.h"
 | |
| #include "collationfastlatin.h"
 | |
| #include "collationiterator.h"
 | |
| #include "collationkeys.h"
 | |
| #include "collationroot.h"
 | |
| #include "collationsets.h"
 | |
| #include "collationsettings.h"
 | |
| #include "collationtailoring.h"
 | |
| #include "cstring.h"
 | |
| #include "uassert.h"
 | |
| #include "ucol_imp.h"
 | |
| #include "uhash.h"
 | |
| #include "uitercollationiterator.h"
 | |
| #include "ustr_imp.h"
 | |
| #include "utf16collationiterator.h"
 | |
| #include "utf8collationiterator.h"
 | |
| #include "uvectr64.h"
 | |
| 
 | |
| U_NAMESPACE_BEGIN
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| class FixedSortKeyByteSink : public SortKeyByteSink {
 | |
| public:
 | |
|     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
 | |
|             : SortKeyByteSink(dest, destCapacity) {}
 | |
|     virtual ~FixedSortKeyByteSink();
 | |
| 
 | |
| private:
 | |
|     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
 | |
|     virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
 | |
| };
 | |
| 
 | |
| FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
 | |
| 
 | |
| void
 | |
| FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
 | |
|     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
 | |
|     // Fill the buffer completely.
 | |
|     int32_t available = capacity_ - length;
 | |
|     if (available > 0) {
 | |
|         uprv_memcpy(buffer_ + length, bytes, available);
 | |
|     }
 | |
| }
 | |
| 
 | |
| UBool
 | |
| FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
 | |
|     return FALSE;
 | |
| }
 | |
| 
 | |
| }  // namespace
 | |
| 
 | |
| // Not in an anonymous namespace, so that it can be a friend of CollationKey.
 | |
| class CollationKeyByteSink : public SortKeyByteSink {
 | |
| public:
 | |
|     CollationKeyByteSink(CollationKey &key)
 | |
|             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
 | |
|               key_(key) {}
 | |
|     virtual ~CollationKeyByteSink();
 | |
| 
 | |
| private:
 | |
|     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
 | |
|     virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
 | |
| 
 | |
|     CollationKey &key_;
 | |
| };
 | |
| 
 | |
| CollationKeyByteSink::~CollationKeyByteSink() {}
 | |
| 
 | |
| void
 | |
| CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
 | |
|     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
 | |
|     if (Resize(n, length)) {
 | |
|         uprv_memcpy(buffer_ + length, bytes, n);
 | |
|     }
 | |
| }
 | |
| 
 | |
| UBool
 | |
| CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
 | |
|     if (buffer_ == NULL) {
 | |
|         return FALSE;  // allocation failed before already
 | |
|     }
 | |
|     int32_t newCapacity = 2 * capacity_;
 | |
|     int32_t altCapacity = length + 2 * appendCapacity;
 | |
|     if (newCapacity < altCapacity) {
 | |
|         newCapacity = altCapacity;
 | |
|     }
 | |
|     if (newCapacity < 200) {
 | |
|         newCapacity = 200;
 | |
|     }
 | |
|     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
 | |
|     if (newBuffer == NULL) {
 | |
|         SetNotOk();
 | |
|         return FALSE;
 | |
|     }
 | |
|     buffer_ = reinterpret_cast<char *>(newBuffer);
 | |
|     capacity_ = newCapacity;
 | |
|     return TRUE;
 | |
| }
 | |
| 
 | |
| RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
 | |
|         : Collator(other),
 | |
|           data(other.data),
 | |
|           settings(other.settings),
 | |
|           tailoring(other.tailoring),
 | |
|           cacheEntry(other.cacheEntry),
 | |
|           validLocale(other.validLocale),
 | |
|           explicitlySetAttributes(other.explicitlySetAttributes),
 | |
|           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
 | |
|     settings->addRef();
 | |
|     cacheEntry->addRef();
 | |
| }
 | |
| 
 | |
| RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
 | |
|                                      const RuleBasedCollator *base, UErrorCode &errorCode)
 | |
|         : data(NULL),
 | |
|           settings(NULL),
 | |
|           tailoring(NULL),
 | |
|           cacheEntry(NULL),
 | |
|           validLocale(""),
 | |
|           explicitlySetAttributes(0),
 | |
|           actualLocaleIsSameAsValid(FALSE) {
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     if(bin == NULL || length == 0 || base == NULL) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return;
 | |
|     }
 | |
|     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     if(base->tailoring != root) {
 | |
|         errorCode = U_UNSUPPORTED_ERROR;
 | |
|         return;
 | |
|     }
 | |
|     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
 | |
|     if(t.isNull() || t->isBogus()) {
 | |
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|         return;
 | |
|     }
 | |
|     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     t->actualLocale.setToBogus();
 | |
|     adoptTailoring(t.orphan(), errorCode);
 | |
| }
 | |
| 
 | |
| RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
 | |
|         : data(entry->tailoring->data),
 | |
|           settings(entry->tailoring->settings),
 | |
|           tailoring(entry->tailoring),
 | |
|           cacheEntry(entry),
 | |
|           validLocale(entry->validLocale),
 | |
|           explicitlySetAttributes(0),
 | |
|           actualLocaleIsSameAsValid(FALSE) {
 | |
|     settings->addRef();
 | |
|     cacheEntry->addRef();
 | |
| }
 | |
| 
 | |
| RuleBasedCollator::~RuleBasedCollator() {
 | |
|     SharedObject::clearPtr(settings);
 | |
|     SharedObject::clearPtr(cacheEntry);
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
 | |
|     if(U_FAILURE(errorCode)) {
 | |
|         t->deleteIfZeroRefCount();
 | |
|         return;
 | |
|     }
 | |
|     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
 | |
|     cacheEntry = new CollationCacheEntry(t->actualLocale, t);
 | |
|     if(cacheEntry == NULL) {
 | |
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|         t->deleteIfZeroRefCount();
 | |
|         return;
 | |
|     }
 | |
|     data = t->data;
 | |
|     settings = t->settings;
 | |
|     settings->addRef();
 | |
|     tailoring = t;
 | |
|     cacheEntry->addRef();
 | |
|     validLocale = t->actualLocale;
 | |
|     actualLocaleIsSameAsValid = FALSE;
 | |
| }
 | |
| 
 | |
| RuleBasedCollator *
 | |
| RuleBasedCollator::clone() const {
 | |
|     return new RuleBasedCollator(*this);
 | |
| }
 | |
| 
 | |
| RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
 | |
|     if(this == &other) { return *this; }
 | |
|     SharedObject::copyPtr(other.settings, settings);
 | |
|     tailoring = other.tailoring;
 | |
|     SharedObject::copyPtr(other.cacheEntry, cacheEntry);
 | |
|     data = tailoring->data;
 | |
|     validLocale = other.validLocale;
 | |
|     explicitlySetAttributes = other.explicitlySetAttributes;
 | |
|     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
 | |
|     return *this;
 | |
| }
 | |
| 
 | |
| UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
 | |
| 
 | |
| bool
 | |
| RuleBasedCollator::operator==(const Collator& other) const {
 | |
|     if(this == &other) { return true; }
 | |
|     if(!Collator::operator==(other)) { return false; }
 | |
|     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
 | |
|     if(*settings != *o.settings) { return false; }
 | |
|     if(data == o.data) { return true; }
 | |
|     UBool thisIsRoot = data->base == NULL;
 | |
|     UBool otherIsRoot = o.data->base == NULL;
 | |
|     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
 | |
|     if(thisIsRoot != otherIsRoot) { return false; }
 | |
|     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
 | |
|             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
 | |
|         // Shortcut: If both collators have valid rule strings, then compare those.
 | |
|         if(tailoring->rules == o.tailoring->rules) { return true; }
 | |
|     }
 | |
|     // Different rule strings can result in the same or equivalent tailoring.
 | |
|     // The rule strings are optional in ICU resource bundles, although included by default.
 | |
|     // cloneBinary() drops the rule string.
 | |
|     UErrorCode errorCode = U_ZERO_ERROR;
 | |
|     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
 | |
|     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
 | |
|     if(U_FAILURE(errorCode)) { return false; }
 | |
|     if(*thisTailored != *otherTailored) { return false; }
 | |
|     // For completeness, we should compare all of the mappings;
 | |
|     // or we should create a list of strings, sort it with one collator,
 | |
|     // and check if both collators compare adjacent strings the same
 | |
|     // (order & strength, down to quaternary); or similar.
 | |
|     // Testing equality of collators seems unusual.
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| int32_t
 | |
| RuleBasedCollator::hashCode() const {
 | |
|     int32_t h = settings->hashCode();
 | |
|     if(data->base == NULL) { return h; }  // root collator
 | |
|     // Do not rely on the rule string, see comments in operator==().
 | |
|     UErrorCode errorCode = U_ZERO_ERROR;
 | |
|     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
 | |
|     if(U_FAILURE(errorCode)) { return 0; }
 | |
|     UnicodeSetIterator iter(*set);
 | |
|     while(iter.next() && !iter.isString()) {
 | |
|         h ^= data->getCE32(iter.getCodepoint());
 | |
|     }
 | |
|     return h;
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
 | |
|                               const Locale &actual) {
 | |
|     if(actual == tailoring->actualLocale) {
 | |
|         actualLocaleIsSameAsValid = FALSE;
 | |
|     } else {
 | |
|         U_ASSERT(actual == valid);
 | |
|         actualLocaleIsSameAsValid = TRUE;
 | |
|     }
 | |
|     // Do not modify tailoring.actualLocale:
 | |
|     // We cannot be sure that that would be thread-safe.
 | |
|     validLocale = valid;
 | |
|     (void)requested;  // Ignore, see also ticket #10477.
 | |
| }
 | |
| 
 | |
| Locale
 | |
| RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) {
 | |
|         return Locale::getRoot();
 | |
|     }
 | |
|     switch(type) {
 | |
|     case ULOC_ACTUAL_LOCALE:
 | |
|         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
 | |
|     case ULOC_VALID_LOCALE:
 | |
|         return validLocale;
 | |
|     case ULOC_REQUESTED_LOCALE:
 | |
|     default:
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return Locale::getRoot();
 | |
|     }
 | |
| }
 | |
| 
 | |
| const char *
 | |
| RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) {
 | |
|         return NULL;
 | |
|     }
 | |
|     const Locale *result;
 | |
|     switch(type) {
 | |
|     case ULOC_ACTUAL_LOCALE:
 | |
|         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
 | |
|         break;
 | |
|     case ULOC_VALID_LOCALE:
 | |
|         result = &validLocale;
 | |
|         break;
 | |
|     case ULOC_REQUESTED_LOCALE:
 | |
|     default:
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return NULL;
 | |
|     }
 | |
|     if(result->isBogus()) { return NULL; }
 | |
|     const char *id = result->getName();
 | |
|     return id[0] == 0 ? "root" : id;
 | |
| }
 | |
| 
 | |
| const UnicodeString&
 | |
| RuleBasedCollator::getRules() const {
 | |
|     return tailoring->rules;
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
 | |
|     if(delta == UCOL_TAILORING_ONLY) {
 | |
|         buffer = tailoring->rules;
 | |
|         return;
 | |
|     }
 | |
|     // UCOL_FULL_RULES
 | |
|     buffer.remove();
 | |
|     CollationLoader::appendRootRules(buffer);
 | |
|     buffer.append(tailoring->rules).getTerminatedBuffer();
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::getVersion(UVersionInfo version) const {
 | |
|     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
 | |
|     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
 | |
| }
 | |
| 
 | |
| UnicodeSet *
 | |
| RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return NULL; }
 | |
|     UnicodeSet *tailored = new UnicodeSet();
 | |
|     if(tailored == NULL) {
 | |
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|         return NULL;
 | |
|     }
 | |
|     if(data->base != NULL) {
 | |
|         TailoredSet(tailored).forData(data, errorCode);
 | |
|         if(U_FAILURE(errorCode)) {
 | |
|             delete tailored;
 | |
|             return NULL;
 | |
|         }
 | |
|     }
 | |
|     return tailored;
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::internalGetContractionsAndExpansions(
 | |
|         UnicodeSet *contractions, UnicodeSet *expansions,
 | |
|         UBool addPrefixes, UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     if(contractions != NULL) {
 | |
|         contractions->clear();
 | |
|     }
 | |
|     if(expansions != NULL) {
 | |
|         expansions->clear();
 | |
|     }
 | |
|     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
 | |
| }
 | |
| 
 | |
| const CollationSettings &
 | |
| RuleBasedCollator::getDefaultSettings() const {
 | |
|     return *tailoring->settings;
 | |
| }
 | |
| 
 | |
| UColAttributeValue
 | |
| RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
 | |
|     int32_t option;
 | |
|     switch(attr) {
 | |
|     case UCOL_FRENCH_COLLATION:
 | |
|         option = CollationSettings::BACKWARD_SECONDARY;
 | |
|         break;
 | |
|     case UCOL_ALTERNATE_HANDLING:
 | |
|         return settings->getAlternateHandling();
 | |
|     case UCOL_CASE_FIRST:
 | |
|         return settings->getCaseFirst();
 | |
|     case UCOL_CASE_LEVEL:
 | |
|         option = CollationSettings::CASE_LEVEL;
 | |
|         break;
 | |
|     case UCOL_NORMALIZATION_MODE:
 | |
|         option = CollationSettings::CHECK_FCD;
 | |
|         break;
 | |
|     case UCOL_STRENGTH:
 | |
|         return (UColAttributeValue)settings->getStrength();
 | |
|     case UCOL_HIRAGANA_QUATERNARY_MODE:
 | |
|         // Deprecated attribute, unsettable.
 | |
|         return UCOL_OFF;
 | |
|     case UCOL_NUMERIC_COLLATION:
 | |
|         option = CollationSettings::NUMERIC;
 | |
|         break;
 | |
|     default:
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return UCOL_DEFAULT;
 | |
|     }
 | |
|     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
 | |
|                                 UErrorCode &errorCode) {
 | |
|     UColAttributeValue oldValue = getAttribute(attr, errorCode);
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     if(value == oldValue) {
 | |
|         setAttributeExplicitly(attr);
 | |
|         return;
 | |
|     }
 | |
|     const CollationSettings &defaultSettings = getDefaultSettings();
 | |
|     if(settings == &defaultSettings) {
 | |
|         if(value == UCOL_DEFAULT) {
 | |
|             setAttributeDefault(attr);
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
|     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
 | |
|     if(ownedSettings == NULL) {
 | |
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     switch(attr) {
 | |
|     case UCOL_FRENCH_COLLATION:
 | |
|         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
 | |
|                                defaultSettings.options, errorCode);
 | |
|         break;
 | |
|     case UCOL_ALTERNATE_HANDLING:
 | |
|         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
 | |
|         break;
 | |
|     case UCOL_CASE_FIRST:
 | |
|         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
 | |
|         break;
 | |
|     case UCOL_CASE_LEVEL:
 | |
|         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
 | |
|                                defaultSettings.options, errorCode);
 | |
|         break;
 | |
|     case UCOL_NORMALIZATION_MODE:
 | |
|         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
 | |
|                                defaultSettings.options, errorCode);
 | |
|         break;
 | |
|     case UCOL_STRENGTH:
 | |
|         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
 | |
|         break;
 | |
|     case UCOL_HIRAGANA_QUATERNARY_MODE:
 | |
|         // Deprecated attribute. Check for valid values but do not change anything.
 | |
|         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
 | |
|             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         }
 | |
|         break;
 | |
|     case UCOL_NUMERIC_COLLATION:
 | |
|         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
 | |
|         break;
 | |
|     default:
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         break;
 | |
|     }
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     setFastLatinOptions(*ownedSettings);
 | |
|     if(value == UCOL_DEFAULT) {
 | |
|         setAttributeDefault(attr);
 | |
|     } else {
 | |
|         setAttributeExplicitly(attr);
 | |
|     }
 | |
| }
 | |
| 
 | |
| Collator &
 | |
| RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
 | |
|     if(U_FAILURE(errorCode)) { return *this; }
 | |
|     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
 | |
|     int32_t value;
 | |
|     if(group == UCOL_REORDER_CODE_DEFAULT) {
 | |
|         value = UCOL_DEFAULT;
 | |
|     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
 | |
|         value = group - UCOL_REORDER_CODE_FIRST;
 | |
|     } else {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return *this;
 | |
|     }
 | |
|     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
 | |
|     if(value == oldValue) {
 | |
|         setAttributeExplicitly(ATTR_VARIABLE_TOP);
 | |
|         return *this;
 | |
|     }
 | |
|     const CollationSettings &defaultSettings = getDefaultSettings();
 | |
|     if(settings == &defaultSettings) {
 | |
|         if(value == UCOL_DEFAULT) {
 | |
|             setAttributeDefault(ATTR_VARIABLE_TOP);
 | |
|             return *this;
 | |
|         }
 | |
|     }
 | |
|     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
 | |
|     if(ownedSettings == NULL) {
 | |
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|         return *this;
 | |
|     }
 | |
| 
 | |
|     if(group == UCOL_REORDER_CODE_DEFAULT) {
 | |
|         group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
 | |
|     }
 | |
|     uint32_t varTop = data->getLastPrimaryForGroup(group);
 | |
|     U_ASSERT(varTop != 0);
 | |
|     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
 | |
|     if(U_FAILURE(errorCode)) { return *this; }
 | |
|     ownedSettings->variableTop = varTop;
 | |
|     setFastLatinOptions(*ownedSettings);
 | |
|     if(value == UCOL_DEFAULT) {
 | |
|         setAttributeDefault(ATTR_VARIABLE_TOP);
 | |
|     } else {
 | |
|         setAttributeExplicitly(ATTR_VARIABLE_TOP);
 | |
|     }
 | |
|     return *this;
 | |
| }
 | |
| 
 | |
| UColReorderCode
 | |
| RuleBasedCollator::getMaxVariable() const {
 | |
|     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
 | |
| }
 | |
| 
 | |
| uint32_t
 | |
| RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
 | |
|     return settings->variableTop;
 | |
| }
 | |
| 
 | |
| uint32_t
 | |
| RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
 | |
|     if(U_FAILURE(errorCode)) { return 0; }
 | |
|     if(varTop == NULL && len !=0) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return 0;
 | |
|     }
 | |
|     if(len < 0) { len = u_strlen(varTop); }
 | |
|     if(len == 0) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return 0;
 | |
|     }
 | |
|     UBool numeric = settings->isNumeric();
 | |
|     int64_t ce1, ce2;
 | |
|     if(settings->dontCheckFCD()) {
 | |
|         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
 | |
|         ce1 = ci.nextCE(errorCode);
 | |
|         ce2 = ci.nextCE(errorCode);
 | |
|     } else {
 | |
|         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
 | |
|         ce1 = ci.nextCE(errorCode);
 | |
|         ce2 = ci.nextCE(errorCode);
 | |
|     }
 | |
|     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
 | |
|         errorCode = U_CE_NOT_FOUND_ERROR;
 | |
|         return 0;
 | |
|     }
 | |
|     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
 | |
|     return settings->variableTop;
 | |
| }
 | |
| 
 | |
| uint32_t
 | |
| RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
 | |
|     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     if(varTop != settings->variableTop) {
 | |
|         // Pin the variable top to the end of the reordering group which contains it.
 | |
|         // Only a few special groups are supported.
 | |
|         int32_t group = data->getGroupForPrimary(varTop);
 | |
|         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
 | |
|             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|             return;
 | |
|         }
 | |
|         uint32_t v = data->getLastPrimaryForGroup(group);
 | |
|         U_ASSERT(v != 0 && v >= varTop);
 | |
|         varTop = v;
 | |
|         if(varTop != settings->variableTop) {
 | |
|             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
 | |
|             if(ownedSettings == NULL) {
 | |
|                 errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|                 return;
 | |
|             }
 | |
|             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
 | |
|                                           getDefaultSettings().options, errorCode);
 | |
|             if(U_FAILURE(errorCode)) { return; }
 | |
|             ownedSettings->variableTop = varTop;
 | |
|             setFastLatinOptions(*ownedSettings);
 | |
|         }
 | |
|     }
 | |
|     if(varTop == getDefaultSettings().variableTop) {
 | |
|         setAttributeDefault(ATTR_VARIABLE_TOP);
 | |
|     } else {
 | |
|         setAttributeExplicitly(ATTR_VARIABLE_TOP);
 | |
|     }
 | |
| }
 | |
| 
 | |
| int32_t
 | |
| RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
 | |
|                                    UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return 0; }
 | |
|     if(capacity < 0 || (dest == NULL && capacity > 0)) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return 0;
 | |
|     }
 | |
|     int32_t length = settings->reorderCodesLength;
 | |
|     if(length == 0) { return 0; }
 | |
|     if(length > capacity) {
 | |
|         errorCode = U_BUFFER_OVERFLOW_ERROR;
 | |
|         return length;
 | |
|     }
 | |
|     uprv_memcpy(dest, settings->reorderCodes, length * 4);
 | |
|     return length;
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
 | |
|                                    UErrorCode &errorCode) {
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     if(length < 0 || (reorderCodes == NULL && length > 0)) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return;
 | |
|     }
 | |
|     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
 | |
|         length = 0;
 | |
|     }
 | |
|     if(length == settings->reorderCodesLength &&
 | |
|             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
 | |
|         return;
 | |
|     }
 | |
|     const CollationSettings &defaultSettings = getDefaultSettings();
 | |
|     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
 | |
|         if(settings != &defaultSettings) {
 | |
|             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
 | |
|             if(ownedSettings == NULL) {
 | |
|                 errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|                 return;
 | |
|             }
 | |
|             ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
 | |
|             setFastLatinOptions(*ownedSettings);
 | |
|         }
 | |
|         return;
 | |
|     }
 | |
|     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
 | |
|     if(ownedSettings == NULL) {
 | |
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|         return;
 | |
|     }
 | |
|     ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
 | |
|     setFastLatinOptions(*ownedSettings);
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
 | |
|     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
 | |
|             data, ownedSettings,
 | |
|             ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
 | |
| }
 | |
| 
 | |
| UCollationResult
 | |
| RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
 | |
|                            UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
 | |
|     return doCompare(left.getBuffer(), left.length(),
 | |
|                      right.getBuffer(), right.length(), errorCode);
 | |
| }
 | |
| 
 | |
| UCollationResult
 | |
| RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
 | |
|                            int32_t length, UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
 | |
|     if(length < 0) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return UCOL_EQUAL;
 | |
|     }
 | |
|     int32_t leftLength = left.length();
 | |
|     int32_t rightLength = right.length();
 | |
|     if(leftLength > length) { leftLength = length; }
 | |
|     if(rightLength > length) { rightLength = length; }
 | |
|     return doCompare(left.getBuffer(), leftLength,
 | |
|                      right.getBuffer(), rightLength, errorCode);
 | |
| }
 | |
| 
 | |
| UCollationResult
 | |
| RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
 | |
|                            const UChar *right, int32_t rightLength,
 | |
|                            UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
 | |
|     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return UCOL_EQUAL;
 | |
|     }
 | |
|     // Make sure both or neither strings have a known length.
 | |
|     // We do not optimize for mixed length/termination.
 | |
|     if(leftLength >= 0) {
 | |
|         if(rightLength < 0) { rightLength = u_strlen(right); }
 | |
|     } else {
 | |
|         if(rightLength >= 0) { leftLength = u_strlen(left); }
 | |
|     }
 | |
|     return doCompare(left, leftLength, right, rightLength, errorCode);
 | |
| }
 | |
| 
 | |
| UCollationResult
 | |
| RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
 | |
|                                UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
 | |
|     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
 | |
|     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
 | |
|     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return UCOL_EQUAL;
 | |
|     }
 | |
|     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
 | |
| }
 | |
| 
 | |
| UCollationResult
 | |
| RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
 | |
|                                        const char *right, int32_t rightLength,
 | |
|                                        UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
 | |
|     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return UCOL_EQUAL;
 | |
|     }
 | |
|     // Make sure both or neither strings have a known length.
 | |
|     // We do not optimize for mixed length/termination.
 | |
|     if(leftLength >= 0) {
 | |
|         if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); }
 | |
|     } else {
 | |
|         if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); }
 | |
|     }
 | |
|     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
 | |
|                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
 | |
| }
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| /**
 | |
|  * Abstract iterator for identical-level string comparisons.
 | |
|  * Returns FCD code points and handles temporary switching to NFD.
 | |
|  */
 | |
| class NFDIterator : public UObject {
 | |
| public:
 | |
|     NFDIterator() : index(-1), length(0) {}
 | |
|     virtual ~NFDIterator() {}
 | |
|     /**
 | |
|      * Returns the next code point from the internal normalization buffer,
 | |
|      * or else the next text code point.
 | |
|      * Returns -1 at the end of the text.
 | |
|      */
 | |
|     UChar32 nextCodePoint() {
 | |
|         if(index >= 0) {
 | |
|             if(index == length) {
 | |
|                 index = -1;
 | |
|             } else {
 | |
|                 UChar32 c;
 | |
|                 U16_NEXT_UNSAFE(decomp, index, c);
 | |
|                 return c;
 | |
|             }
 | |
|         }
 | |
|         return nextRawCodePoint();
 | |
|     }
 | |
|     /**
 | |
|      * @param nfcImpl
 | |
|      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
 | |
|      * @return the first code point in c's decomposition,
 | |
|      *         or c itself if it was decomposed already or if it does not decompose
 | |
|      */
 | |
|     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
 | |
|         if(index >= 0) { return c; }
 | |
|         decomp = nfcImpl.getDecomposition(c, buffer, length);
 | |
|         if(decomp == NULL) { return c; }
 | |
|         index = 0;
 | |
|         U16_NEXT_UNSAFE(decomp, index, c);
 | |
|         return c;
 | |
|     }
 | |
| protected:
 | |
|     /**
 | |
|      * Returns the next text code point in FCD order.
 | |
|      * Returns -1 at the end of the text.
 | |
|      */
 | |
|     virtual UChar32 nextRawCodePoint() = 0;
 | |
| private:
 | |
|     const UChar *decomp;
 | |
|     UChar buffer[4];
 | |
|     int32_t index;
 | |
|     int32_t length;
 | |
| };
 | |
| 
 | |
| class UTF16NFDIterator : public NFDIterator {
 | |
| public:
 | |
|     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
 | |
| protected:
 | |
|     virtual UChar32 nextRawCodePoint() override {
 | |
|         if(s == limit) { return U_SENTINEL; }
 | |
|         UChar32 c = *s++;
 | |
|         if(limit == NULL && c == 0) {
 | |
|             s = NULL;
 | |
|             return U_SENTINEL;
 | |
|         }
 | |
|         UChar trail;
 | |
|         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
 | |
|             ++s;
 | |
|             c = U16_GET_SUPPLEMENTARY(c, trail);
 | |
|         }
 | |
|         return c;
 | |
|     }
 | |
| 
 | |
|     const UChar *s;
 | |
|     const UChar *limit;
 | |
| };
 | |
| 
 | |
| class FCDUTF16NFDIterator : public UTF16NFDIterator {
 | |
| public:
 | |
|     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
 | |
|             : UTF16NFDIterator(NULL, NULL) {
 | |
|         UErrorCode errorCode = U_ZERO_ERROR;
 | |
|         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
 | |
|         if(U_FAILURE(errorCode)) { return; }
 | |
|         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
 | |
|             s = text;
 | |
|             limit = spanLimit;
 | |
|         } else {
 | |
|             str.setTo(text, (int32_t)(spanLimit - text));
 | |
|             {
 | |
|                 ReorderingBuffer r_buffer(nfcImpl, str);
 | |
|                 if(r_buffer.init(str.length(), errorCode)) {
 | |
|                     nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
 | |
|                 }
 | |
|             }
 | |
|             if(U_SUCCESS(errorCode)) {
 | |
|                 s = str.getBuffer();
 | |
|                 limit = s + str.length();
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| private:
 | |
|     UnicodeString str;
 | |
| };
 | |
| 
 | |
| class UTF8NFDIterator : public NFDIterator {
 | |
| public:
 | |
|     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
 | |
|         : s(text), pos(0), length(textLength) {}
 | |
| protected:
 | |
|     virtual UChar32 nextRawCodePoint() override {
 | |
|         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
 | |
|         UChar32 c;
 | |
|         U8_NEXT_OR_FFFD(s, pos, length, c);
 | |
|         return c;
 | |
|     }
 | |
| 
 | |
|     const uint8_t *s;
 | |
|     int32_t pos;
 | |
|     int32_t length;
 | |
| };
 | |
| 
 | |
| class FCDUTF8NFDIterator : public NFDIterator {
 | |
| public:
 | |
|     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
 | |
|             : u8ci(data, FALSE, text, 0, textLength) {}
 | |
| protected:
 | |
|     virtual UChar32 nextRawCodePoint() override {
 | |
|         UErrorCode errorCode = U_ZERO_ERROR;
 | |
|         return u8ci.nextCodePoint(errorCode);
 | |
|     }
 | |
| private:
 | |
|     FCDUTF8CollationIterator u8ci;
 | |
| };
 | |
| 
 | |
| class UIterNFDIterator : public NFDIterator {
 | |
| public:
 | |
|     UIterNFDIterator(UCharIterator &it) : iter(it) {}
 | |
| protected:
 | |
|     virtual UChar32 nextRawCodePoint() override {
 | |
|         return uiter_next32(&iter);
 | |
|     }
 | |
| private:
 | |
|     UCharIterator &iter;
 | |
| };
 | |
| 
 | |
| class FCDUIterNFDIterator : public NFDIterator {
 | |
| public:
 | |
|     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
 | |
|             : uici(data, FALSE, it, startIndex) {}
 | |
| protected:
 | |
|     virtual UChar32 nextRawCodePoint() override {
 | |
|         UErrorCode errorCode = U_ZERO_ERROR;
 | |
|         return uici.nextCodePoint(errorCode);
 | |
|     }
 | |
| private:
 | |
|     FCDUIterCollationIterator uici;
 | |
| };
 | |
| 
 | |
| UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
 | |
|                                 NFDIterator &left, NFDIterator &right) {
 | |
|     for(;;) {
 | |
|         // Fetch the next FCD code point from each string.
 | |
|         UChar32 leftCp = left.nextCodePoint();
 | |
|         UChar32 rightCp = right.nextCodePoint();
 | |
|         if(leftCp == rightCp) {
 | |
|             if(leftCp < 0) { break; }
 | |
|             continue;
 | |
|         }
 | |
|         // If they are different, then decompose each and compare again.
 | |
|         if(leftCp < 0) {
 | |
|             leftCp = -2;  // end of string
 | |
|         } else if(leftCp == 0xfffe) {
 | |
|             leftCp = -1;  // U+FFFE: merge separator
 | |
|         } else {
 | |
|             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
 | |
|         }
 | |
|         if(rightCp < 0) {
 | |
|             rightCp = -2;  // end of string
 | |
|         } else if(rightCp == 0xfffe) {
 | |
|             rightCp = -1;  // U+FFFE: merge separator
 | |
|         } else {
 | |
|             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
 | |
|         }
 | |
|         if(leftCp < rightCp) { return UCOL_LESS; }
 | |
|         if(leftCp > rightCp) { return UCOL_GREATER; }
 | |
|     }
 | |
|     return UCOL_EQUAL;
 | |
| }
 | |
| 
 | |
| }  // namespace
 | |
| 
 | |
| UCollationResult
 | |
| RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
 | |
|                              const UChar *right, int32_t rightLength,
 | |
|                              UErrorCode &errorCode) const {
 | |
|     // U_FAILURE(errorCode) checked by caller.
 | |
|     if(left == right && leftLength == rightLength) {
 | |
|         return UCOL_EQUAL;
 | |
|     }
 | |
| 
 | |
|     // Identical-prefix test.
 | |
|     const UChar *leftLimit;
 | |
|     const UChar *rightLimit;
 | |
|     int32_t equalPrefixLength = 0;
 | |
|     if(leftLength < 0) {
 | |
|         leftLimit = NULL;
 | |
|         rightLimit = NULL;
 | |
|         UChar c;
 | |
|         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
 | |
|             if(c == 0) { return UCOL_EQUAL; }
 | |
|             ++equalPrefixLength;
 | |
|         }
 | |
|     } else {
 | |
|         leftLimit = left + leftLength;
 | |
|         rightLimit = right + rightLength;
 | |
|         for(;;) {
 | |
|             if(equalPrefixLength == leftLength) {
 | |
|                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
 | |
|                 break;
 | |
|             } else if(equalPrefixLength == rightLength ||
 | |
|                       left[equalPrefixLength] != right[equalPrefixLength]) {
 | |
|                 break;
 | |
|             }
 | |
|             ++equalPrefixLength;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     UBool numeric = settings->isNumeric();
 | |
|     if(equalPrefixLength > 0) {
 | |
|         if((equalPrefixLength != leftLength &&
 | |
|                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
 | |
|                 (equalPrefixLength != rightLength &&
 | |
|                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
 | |
|             // Identical prefix: Back up to the start of a contraction or reordering sequence.
 | |
|             while(--equalPrefixLength > 0 &&
 | |
|                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
 | |
|         }
 | |
|         // Notes:
 | |
|         // - A longer string can compare equal to a prefix of it if only ignorables follow.
 | |
|         // - With a backward level, a longer string can compare less-than a prefix of it.
 | |
| 
 | |
|         // Pass the actual start of each string into the CollationIterators,
 | |
|         // plus the equalPrefixLength position,
 | |
|         // so that prefix matches back into the equal prefix work.
 | |
|     }
 | |
| 
 | |
|     int32_t result;
 | |
|     int32_t fastLatinOptions = settings->fastLatinOptions;
 | |
|     if(fastLatinOptions >= 0 &&
 | |
|             (equalPrefixLength == leftLength ||
 | |
|                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
 | |
|             (equalPrefixLength == rightLength ||
 | |
|                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
 | |
|         if(leftLength >= 0) {
 | |
|             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
 | |
|                                                       settings->fastLatinPrimaries,
 | |
|                                                       fastLatinOptions,
 | |
|                                                       left + equalPrefixLength,
 | |
|                                                       leftLength - equalPrefixLength,
 | |
|                                                       right + equalPrefixLength,
 | |
|                                                       rightLength - equalPrefixLength);
 | |
|         } else {
 | |
|             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
 | |
|                                                       settings->fastLatinPrimaries,
 | |
|                                                       fastLatinOptions,
 | |
|                                                       left + equalPrefixLength, -1,
 | |
|                                                       right + equalPrefixLength, -1);
 | |
|         }
 | |
|     } else {
 | |
|         result = CollationFastLatin::BAIL_OUT_RESULT;
 | |
|     }
 | |
| 
 | |
|     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
 | |
|         if(settings->dontCheckFCD()) {
 | |
|             UTF16CollationIterator leftIter(data, numeric,
 | |
|                                             left, left + equalPrefixLength, leftLimit);
 | |
|             UTF16CollationIterator rightIter(data, numeric,
 | |
|                                             right, right + equalPrefixLength, rightLimit);
 | |
|             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
 | |
|         } else {
 | |
|             FCDUTF16CollationIterator leftIter(data, numeric,
 | |
|                                               left, left + equalPrefixLength, leftLimit);
 | |
|             FCDUTF16CollationIterator rightIter(data, numeric,
 | |
|                                                 right, right + equalPrefixLength, rightLimit);
 | |
|             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
 | |
|         }
 | |
|     }
 | |
|     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
 | |
|         return (UCollationResult)result;
 | |
|     }
 | |
| 
 | |
|     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
 | |
|     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
 | |
|     // and the benefit seems unlikely to be measurable.
 | |
| 
 | |
|     // Compare identical level.
 | |
|     const Normalizer2Impl &nfcImpl = data->nfcImpl;
 | |
|     left += equalPrefixLength;
 | |
|     right += equalPrefixLength;
 | |
|     if(settings->dontCheckFCD()) {
 | |
|         UTF16NFDIterator leftIter(left, leftLimit);
 | |
|         UTF16NFDIterator rightIter(right, rightLimit);
 | |
|         return compareNFDIter(nfcImpl, leftIter, rightIter);
 | |
|     } else {
 | |
|         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
 | |
|         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
 | |
|         return compareNFDIter(nfcImpl, leftIter, rightIter);
 | |
|     }
 | |
| }
 | |
| 
 | |
| UCollationResult
 | |
| RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
 | |
|                              const uint8_t *right, int32_t rightLength,
 | |
|                              UErrorCode &errorCode) const {
 | |
|     // U_FAILURE(errorCode) checked by caller.
 | |
|     if(left == right && leftLength == rightLength) {
 | |
|         return UCOL_EQUAL;
 | |
|     }
 | |
| 
 | |
|     // Identical-prefix test.
 | |
|     int32_t equalPrefixLength = 0;
 | |
|     if(leftLength < 0) {
 | |
|         uint8_t c;
 | |
|         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
 | |
|             if(c == 0) { return UCOL_EQUAL; }
 | |
|             ++equalPrefixLength;
 | |
|         }
 | |
|     } else {
 | |
|         for(;;) {
 | |
|             if(equalPrefixLength == leftLength) {
 | |
|                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
 | |
|                 break;
 | |
|             } else if(equalPrefixLength == rightLength ||
 | |
|                       left[equalPrefixLength] != right[equalPrefixLength]) {
 | |
|                 break;
 | |
|             }
 | |
|             ++equalPrefixLength;
 | |
|         }
 | |
|     }
 | |
|     // Back up to the start of a partially-equal code point.
 | |
|     if(equalPrefixLength > 0 &&
 | |
|             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
 | |
|             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
 | |
|         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
 | |
|     }
 | |
| 
 | |
|     UBool numeric = settings->isNumeric();
 | |
|     if(equalPrefixLength > 0) {
 | |
|         UBool unsafe = FALSE;
 | |
|         if(equalPrefixLength != leftLength) {
 | |
|             int32_t i = equalPrefixLength;
 | |
|             UChar32 c;
 | |
|             U8_NEXT_OR_FFFD(left, i, leftLength, c);
 | |
|             unsafe = data->isUnsafeBackward(c, numeric);
 | |
|         }
 | |
|         if(!unsafe && equalPrefixLength != rightLength) {
 | |
|             int32_t i = equalPrefixLength;
 | |
|             UChar32 c;
 | |
|             U8_NEXT_OR_FFFD(right, i, rightLength, c);
 | |
|             unsafe = data->isUnsafeBackward(c, numeric);
 | |
|         }
 | |
|         if(unsafe) {
 | |
|             // Identical prefix: Back up to the start of a contraction or reordering sequence.
 | |
|             UChar32 c;
 | |
|             do {
 | |
|                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
 | |
|             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
 | |
|         }
 | |
|         // See the notes in the UTF-16 version.
 | |
| 
 | |
|         // Pass the actual start of each string into the CollationIterators,
 | |
|         // plus the equalPrefixLength position,
 | |
|         // so that prefix matches back into the equal prefix work.
 | |
|     }
 | |
| 
 | |
|     int32_t result;
 | |
|     int32_t fastLatinOptions = settings->fastLatinOptions;
 | |
|     if(fastLatinOptions >= 0 &&
 | |
|             (equalPrefixLength == leftLength ||
 | |
|                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
 | |
|             (equalPrefixLength == rightLength ||
 | |
|                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
 | |
|         if(leftLength >= 0) {
 | |
|             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
 | |
|                                                      settings->fastLatinPrimaries,
 | |
|                                                      fastLatinOptions,
 | |
|                                                      left + equalPrefixLength,
 | |
|                                                      leftLength - equalPrefixLength,
 | |
|                                                      right + equalPrefixLength,
 | |
|                                                      rightLength - equalPrefixLength);
 | |
|         } else {
 | |
|             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
 | |
|                                                      settings->fastLatinPrimaries,
 | |
|                                                      fastLatinOptions,
 | |
|                                                      left + equalPrefixLength, -1,
 | |
|                                                      right + equalPrefixLength, -1);
 | |
|         }
 | |
|     } else {
 | |
|         result = CollationFastLatin::BAIL_OUT_RESULT;
 | |
|     }
 | |
| 
 | |
|     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
 | |
|         if(settings->dontCheckFCD()) {
 | |
|             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
 | |
|             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
 | |
|             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
 | |
|         } else {
 | |
|             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
 | |
|             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
 | |
|             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
 | |
|         }
 | |
|     }
 | |
|     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
 | |
|         return (UCollationResult)result;
 | |
|     }
 | |
| 
 | |
|     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
 | |
|     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
 | |
|     // and the benefit seems unlikely to be measurable.
 | |
| 
 | |
|     // Compare identical level.
 | |
|     const Normalizer2Impl &nfcImpl = data->nfcImpl;
 | |
|     left += equalPrefixLength;
 | |
|     right += equalPrefixLength;
 | |
|     if(leftLength > 0) {
 | |
|         leftLength -= equalPrefixLength;
 | |
|         rightLength -= equalPrefixLength;
 | |
|     }
 | |
|     if(settings->dontCheckFCD()) {
 | |
|         UTF8NFDIterator leftIter(left, leftLength);
 | |
|         UTF8NFDIterator rightIter(right, rightLength);
 | |
|         return compareNFDIter(nfcImpl, leftIter, rightIter);
 | |
|     } else {
 | |
|         FCDUTF8NFDIterator leftIter(data, left, leftLength);
 | |
|         FCDUTF8NFDIterator rightIter(data, right, rightLength);
 | |
|         return compareNFDIter(nfcImpl, leftIter, rightIter);
 | |
|     }
 | |
| }
 | |
| 
 | |
| UCollationResult
 | |
| RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
 | |
|                            UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
 | |
|     UBool numeric = settings->isNumeric();
 | |
| 
 | |
|     // Identical-prefix test.
 | |
|     int32_t equalPrefixLength = 0;
 | |
|     {
 | |
|         UChar32 leftUnit;
 | |
|         UChar32 rightUnit;
 | |
|         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
 | |
|             if(leftUnit < 0) { return UCOL_EQUAL; }
 | |
|             ++equalPrefixLength;
 | |
|         }
 | |
| 
 | |
|         // Back out the code units that differed, for the real collation comparison.
 | |
|         if(leftUnit >= 0) { left.previous(&left); }
 | |
|         if(rightUnit >= 0) { right.previous(&right); }
 | |
| 
 | |
|         if(equalPrefixLength > 0) {
 | |
|             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
 | |
|                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
 | |
|                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
 | |
|                 do {
 | |
|                     --equalPrefixLength;
 | |
|                     leftUnit = left.previous(&left);
 | |
|                     right.previous(&right);
 | |
|                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
 | |
|             }
 | |
|             // See the notes in the UTF-16 version.
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     UCollationResult result;
 | |
|     if(settings->dontCheckFCD()) {
 | |
|         UIterCollationIterator leftIter(data, numeric, left);
 | |
|         UIterCollationIterator rightIter(data, numeric, right);
 | |
|         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
 | |
|     } else {
 | |
|         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
 | |
|         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
 | |
|         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
 | |
|     }
 | |
|     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
 | |
|         return result;
 | |
|     }
 | |
| 
 | |
|     // Compare identical level.
 | |
|     left.move(&left, equalPrefixLength, UITER_ZERO);
 | |
|     right.move(&right, equalPrefixLength, UITER_ZERO);
 | |
|     const Normalizer2Impl &nfcImpl = data->nfcImpl;
 | |
|     if(settings->dontCheckFCD()) {
 | |
|         UIterNFDIterator leftIter(left);
 | |
|         UIterNFDIterator rightIter(right);
 | |
|         return compareNFDIter(nfcImpl, leftIter, rightIter);
 | |
|     } else {
 | |
|         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
 | |
|         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
 | |
|         return compareNFDIter(nfcImpl, leftIter, rightIter);
 | |
|     }
 | |
| }
 | |
| 
 | |
| CollationKey &
 | |
| RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
 | |
|                                    UErrorCode &errorCode) const {
 | |
|     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
 | |
| }
 | |
| 
 | |
| CollationKey &
 | |
| RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
 | |
|                                    UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) {
 | |
|         return key.setToBogus();
 | |
|     }
 | |
|     if(s == NULL && length != 0) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return key.setToBogus();
 | |
|     }
 | |
|     key.reset();  // resets the "bogus" state
 | |
|     CollationKeyByteSink sink(key);
 | |
|     writeSortKey(s, length, sink, errorCode);
 | |
|     if(U_FAILURE(errorCode)) {
 | |
|         key.setToBogus();
 | |
|     } else if(key.isBogus()) {
 | |
|         errorCode = U_MEMORY_ALLOCATION_ERROR;
 | |
|     } else {
 | |
|         key.setLength(sink.NumberOfBytesAppended());
 | |
|     }
 | |
|     return key;
 | |
| }
 | |
| 
 | |
| int32_t
 | |
| RuleBasedCollator::getSortKey(const UnicodeString &s,
 | |
|                               uint8_t *dest, int32_t capacity) const {
 | |
|     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
 | |
| }
 | |
| 
 | |
| int32_t
 | |
| RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
 | |
|                               uint8_t *dest, int32_t capacity) const {
 | |
|     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
 | |
|         return 0;
 | |
|     }
 | |
|     uint8_t noDest[1] = { 0 };
 | |
|     if(dest == NULL) {
 | |
|         // Distinguish pure preflighting from an allocation error.
 | |
|         dest = noDest;
 | |
|         capacity = 0;
 | |
|     }
 | |
|     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
 | |
|     UErrorCode errorCode = U_ZERO_ERROR;
 | |
|     writeSortKey(s, length, sink, errorCode);
 | |
|     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
 | |
|                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     const UChar *limit = (length >= 0) ? s + length : NULL;
 | |
|     UBool numeric = settings->isNumeric();
 | |
|     CollationKeys::LevelCallback callback;
 | |
|     if(settings->dontCheckFCD()) {
 | |
|         UTF16CollationIterator iter(data, numeric, s, s, limit);
 | |
|         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
 | |
|                                                   sink, Collation::PRIMARY_LEVEL,
 | |
|                                                   callback, TRUE, errorCode);
 | |
|     } else {
 | |
|         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
 | |
|         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
 | |
|                                                   sink, Collation::PRIMARY_LEVEL,
 | |
|                                                   callback, TRUE, errorCode);
 | |
|     }
 | |
|     if(settings->getStrength() == UCOL_IDENTICAL) {
 | |
|         writeIdenticalLevel(s, limit, sink, errorCode);
 | |
|     }
 | |
|     static const char terminator = 0;  // TERMINATOR_BYTE
 | |
|     sink.Append(&terminator, 1);
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
 | |
|                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
 | |
|     // NFD quick check
 | |
|     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
 | |
|     UChar32 prev = 0;
 | |
|     if(nfdQCYesLimit != s) {
 | |
|         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
 | |
|     }
 | |
|     // Is there non-NFD text?
 | |
|     int32_t destLengthEstimate;
 | |
|     if(limit != NULL) {
 | |
|         if(nfdQCYesLimit == limit) { return; }
 | |
|         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
 | |
|     } else {
 | |
|         // s is NUL-terminated
 | |
|         if(*nfdQCYesLimit == 0) { return; }
 | |
|         destLengthEstimate = -1;
 | |
|     }
 | |
|     UnicodeString nfd;
 | |
|     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
 | |
|     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
 | |
| }
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| /**
 | |
|  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
 | |
|  * with an instance of this callback class.
 | |
|  * When another level is about to be written, the callback
 | |
|  * records the level and the number of bytes that will be written until
 | |
|  * the sink (which is actually a FixedSortKeyByteSink) fills up.
 | |
|  *
 | |
|  * When internalNextSortKeyPart() is called again, it restarts with the last level
 | |
|  * and ignores as many bytes as were written previously for that level.
 | |
|  */
 | |
| class PartLevelCallback : public CollationKeys::LevelCallback {
 | |
| public:
 | |
|     PartLevelCallback(const SortKeyByteSink &s)
 | |
|             : sink(s), level(Collation::PRIMARY_LEVEL) {
 | |
|         levelCapacity = sink.GetRemainingCapacity();
 | |
|     }
 | |
|     virtual ~PartLevelCallback() {}
 | |
|     virtual UBool needToWrite(Collation::Level l) override {
 | |
|         if(!sink.Overflowed()) {
 | |
|             // Remember a level that will be at least partially written.
 | |
|             level = l;
 | |
|             levelCapacity = sink.GetRemainingCapacity();
 | |
|             return TRUE;
 | |
|         } else {
 | |
|             return FALSE;
 | |
|         }
 | |
|     }
 | |
|     Collation::Level getLevel() const { return level; }
 | |
|     int32_t getLevelCapacity() const { return levelCapacity; }
 | |
| 
 | |
| private:
 | |
|     const SortKeyByteSink &sink;
 | |
|     Collation::Level level;
 | |
|     int32_t levelCapacity;
 | |
| };
 | |
| 
 | |
| }  // namespace
 | |
| 
 | |
| int32_t
 | |
| RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
 | |
|                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return 0; }
 | |
|     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return 0;
 | |
|     }
 | |
|     if(count == 0) { return 0; }
 | |
| 
 | |
|     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
 | |
|     sink.IgnoreBytes((int32_t)state[1]);
 | |
|     iter->move(iter, 0, UITER_START);
 | |
| 
 | |
|     Collation::Level level = (Collation::Level)state[0];
 | |
|     if(level <= Collation::QUATERNARY_LEVEL) {
 | |
|         UBool numeric = settings->isNumeric();
 | |
|         PartLevelCallback callback(sink);
 | |
|         if(settings->dontCheckFCD()) {
 | |
|             UIterCollationIterator ci(data, numeric, *iter);
 | |
|             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
 | |
|                                                       sink, level, callback, FALSE, errorCode);
 | |
|         } else {
 | |
|             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
 | |
|             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
 | |
|                                                       sink, level, callback, FALSE, errorCode);
 | |
|         }
 | |
|         if(U_FAILURE(errorCode)) { return 0; }
 | |
|         if(sink.NumberOfBytesAppended() > count) {
 | |
|             state[0] = (uint32_t)callback.getLevel();
 | |
|             state[1] = (uint32_t)callback.getLevelCapacity();
 | |
|             return count;
 | |
|         }
 | |
|         // All of the normal levels are done.
 | |
|         if(settings->getStrength() == UCOL_IDENTICAL) {
 | |
|             level = Collation::IDENTICAL_LEVEL;
 | |
|             iter->move(iter, 0, UITER_START);
 | |
|         }
 | |
|         // else fall through to setting ZERO_LEVEL
 | |
|     }
 | |
| 
 | |
|     if(level == Collation::IDENTICAL_LEVEL) {
 | |
|         int32_t levelCapacity = sink.GetRemainingCapacity();
 | |
|         UnicodeString s;
 | |
|         for(;;) {
 | |
|             UChar32 c = iter->next(iter);
 | |
|             if(c < 0) { break; }
 | |
|             s.append((UChar)c);
 | |
|         }
 | |
|         const UChar *sArray = s.getBuffer();
 | |
|         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
 | |
|         if(U_FAILURE(errorCode)) { return 0; }
 | |
|         if(sink.NumberOfBytesAppended() > count) {
 | |
|             state[0] = (uint32_t)level;
 | |
|             state[1] = (uint32_t)levelCapacity;
 | |
|             return count;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
 | |
|     state[0] = (uint32_t)Collation::ZERO_LEVEL;
 | |
|     state[1] = 0;
 | |
|     int32_t length = sink.NumberOfBytesAppended();
 | |
|     int32_t i = length;
 | |
|     while(i < count) { dest[i++] = 0; }
 | |
|     return length;
 | |
| }
 | |
| 
 | |
| void
 | |
| RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
 | |
|                                   UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     const UChar *s = str.getBuffer();
 | |
|     const UChar *limit = s + str.length();
 | |
|     UBool numeric = settings->isNumeric();
 | |
|     if(settings->dontCheckFCD()) {
 | |
|         UTF16CollationIterator iter(data, numeric, s, s, limit);
 | |
|         int64_t ce;
 | |
|         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
 | |
|             ces.addElement(ce, errorCode);
 | |
|         }
 | |
|     } else {
 | |
|         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
 | |
|         int64_t ce;
 | |
|         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
 | |
|             ces.addElement(ce, errorCode);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
 | |
|                   UErrorCode &errorCode) {
 | |
|     if(U_FAILURE(errorCode) || length == 0) { return; }
 | |
|     if(!s.isEmpty()) {
 | |
|         s.append('_', errorCode);
 | |
|     }
 | |
|     s.append(letter, errorCode);
 | |
|     for(int32_t i = 0; i < length; ++i) {
 | |
|         s.append(uprv_toupper(subtag[i]), errorCode);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void appendAttribute(CharString &s, char letter, UColAttributeValue value,
 | |
|                      UErrorCode &errorCode) {
 | |
|     if(U_FAILURE(errorCode)) { return; }
 | |
|     if(!s.isEmpty()) {
 | |
|         s.append('_', errorCode);
 | |
|     }
 | |
|     static const char *valueChars = "1234...........IXO..SN..LU......";
 | |
|     s.append(letter, errorCode);
 | |
|     s.append(valueChars[value], errorCode);
 | |
| }
 | |
| 
 | |
| }  // namespace
 | |
| 
 | |
| int32_t
 | |
| RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
 | |
|                                                     char *buffer, int32_t capacity,
 | |
|                                                     UErrorCode &errorCode) const {
 | |
|     if(U_FAILURE(errorCode)) { return 0; }
 | |
|     if(buffer == NULL ? capacity != 0 : capacity < 0) {
 | |
|         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return 0;
 | |
|     }
 | |
|     if(locale == NULL) {
 | |
|         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
 | |
|     }
 | |
| 
 | |
|     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
 | |
|     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
 | |
|                                                   "collation", locale,
 | |
|                                                   NULL, &errorCode);
 | |
|     if(U_FAILURE(errorCode)) { return 0; }
 | |
|     resultLocale[length] = 0;
 | |
| 
 | |
|     // Append items in alphabetic order of their short definition letters.
 | |
|     CharString result;
 | |
|     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
 | |
| 
 | |
|     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
 | |
|         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
 | |
|     }
 | |
|     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
 | |
|     // See ICU tickets #10372 and #10386.
 | |
|     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
 | |
|         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
 | |
|     }
 | |
|     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
 | |
|         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
 | |
|     }
 | |
|     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
 | |
|         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
 | |
|     }
 | |
|     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
 | |
|         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
 | |
|     }
 | |
|     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
 | |
|     length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
 | |
|     appendSubtag(result, 'K', subtag, length, errorCode);
 | |
|     length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
 | |
|     if (length == 0) {
 | |
|         appendSubtag(result, 'L', "root", 4, errorCode);
 | |
|     } else {
 | |
|         appendSubtag(result, 'L', subtag, length, errorCode);
 | |
|     }
 | |
|     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
 | |
|         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
 | |
|     }
 | |
|     length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
 | |
|     appendSubtag(result, 'R', subtag, length, errorCode);
 | |
|     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
 | |
|         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
 | |
|     }
 | |
|     length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
 | |
|     appendSubtag(result, 'V', subtag, length, errorCode);
 | |
|     length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
 | |
|     appendSubtag(result, 'Z', subtag, length, errorCode);
 | |
| 
 | |
|     if(U_FAILURE(errorCode)) { return 0; }
 | |
|     return result.extract(buffer, capacity, errorCode);
 | |
| }
 | |
| 
 | |
| UBool
 | |
| RuleBasedCollator::isUnsafe(UChar32 c) const {
 | |
|     return data->isUnsafeBackward(c, settings->isNumeric());
 | |
| }
 | |
| 
 | |
| void U_CALLCONV
 | |
| RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
 | |
|     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
 | |
| }
 | |
| 
 | |
| UBool
 | |
| RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
 | |
|     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
 | |
|     return U_SUCCESS(errorCode);
 | |
| }
 | |
| 
 | |
| CollationElementIterator *
 | |
| RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
 | |
|     UErrorCode errorCode = U_ZERO_ERROR;
 | |
|     if(!initMaxExpansions(errorCode)) { return NULL; }
 | |
|     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
 | |
|     if(U_FAILURE(errorCode)) {
 | |
|         delete cei;
 | |
|         return NULL;
 | |
|     }
 | |
|     return cei;
 | |
| }
 | |
| 
 | |
| CollationElementIterator *
 | |
| RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
 | |
|     UErrorCode errorCode = U_ZERO_ERROR;
 | |
|     if(!initMaxExpansions(errorCode)) { return NULL; }
 | |
|     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
 | |
|     if(U_FAILURE(errorCode)) {
 | |
|         delete cei;
 | |
|         return NULL;
 | |
|     }
 | |
|     return cei;
 | |
| }
 | |
| 
 | |
| int32_t
 | |
| RuleBasedCollator::getMaxExpansion(int32_t order) const {
 | |
|     UErrorCode errorCode = U_ZERO_ERROR;
 | |
|     (void)initMaxExpansions(errorCode);
 | |
|     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
 | |
| }
 | |
| 
 | |
| U_NAMESPACE_END
 | |
| 
 | |
| #endif  // !UCONFIG_NO_COLLATION
 |