414 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			414 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C++
		
	
	
	
| // © 2020 and later: Unicode, Inc. and others.
 | |
| // License & terms of use: http://www.unicode.org/copyright.html
 | |
| 
 | |
| #include "unicode/utypes.h"
 | |
| 
 | |
| #if !UCONFIG_NO_FORMATTING
 | |
| 
 | |
| #include "cstring.h"
 | |
| #include "number_decimalquantity.h"
 | |
| #include "resource.h"
 | |
| #include "uassert.h"
 | |
| #include "unicode/unistr.h"
 | |
| #include "unicode/ures.h"
 | |
| #include "units_data.h"
 | |
| #include "uresimp.h"
 | |
| #include "util.h"
 | |
| #include <utility>
 | |
| 
 | |
| U_NAMESPACE_BEGIN
 | |
| namespace units {
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| using icu::number::impl::DecimalQuantity;
 | |
| 
 | |
| void trimSpaces(CharString& factor, UErrorCode& status){
 | |
|    CharString trimmed;
 | |
|    for (int i = 0 ; i < factor.length(); i++) {
 | |
|        if (factor[i] == ' ') continue;
 | |
| 
 | |
|        trimmed.append(factor[i], status);
 | |
|    }
 | |
| 
 | |
|    factor = std::move(trimmed);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * A ResourceSink that collects conversion rate information.
 | |
|  *
 | |
|  * This class is for use by ures_getAllItemsWithFallback.
 | |
|  */
 | |
| class ConversionRateDataSink : public ResourceSink {
 | |
|   public:
 | |
|     /**
 | |
|      * Constructor.
 | |
|      * @param out The vector to which ConversionRateInfo instances are to be
 | |
|      * added. This vector must outlive the use of the ResourceSink.
 | |
|      */
 | |
|     explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
 | |
| 
 | |
|     /**
 | |
|      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
 | |
|      * conversion rates that are found in `value` to the output vector.
 | |
|      *
 | |
|      * @param source This string must be "convertUnits": the resource that this
 | |
|      * class supports reading.
 | |
|      * @param value The "convertUnits" resource, containing unit conversion rate
 | |
|      * information.
 | |
|      * @param noFallback Ignored.
 | |
|      * @param status The standard ICU error code output parameter.
 | |
|      */
 | |
|     void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
 | |
|         if (U_FAILURE(status)) { return; }
 | |
|         if (uprv_strcmp(source, "convertUnits") != 0) {
 | |
|             // This is very strict, however it is the cheapest way to be sure
 | |
|             // that with `value`, we're looking at the convertUnits table.
 | |
|             status = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|             return;
 | |
|         }
 | |
|         ResourceTable conversionRateTable = value.getTable(status);
 | |
|         const char *srcUnit;
 | |
|         // We're reusing `value`, which seems to be a common pattern:
 | |
|         for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
 | |
|             ResourceTable unitTable = value.getTable(status);
 | |
|             const char *key;
 | |
|             UnicodeString baseUnit = ICU_Utility::makeBogusString();
 | |
|             UnicodeString factor = ICU_Utility::makeBogusString();
 | |
|             UnicodeString offset = ICU_Utility::makeBogusString();
 | |
|             for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
 | |
|                 if (uprv_strcmp(key, "target") == 0) {
 | |
|                     baseUnit = value.getUnicodeString(status);
 | |
|                 } else if (uprv_strcmp(key, "factor") == 0) {
 | |
|                     factor = value.getUnicodeString(status);
 | |
|                 } else if (uprv_strcmp(key, "offset") == 0) {
 | |
|                     offset = value.getUnicodeString(status);
 | |
|                 }
 | |
|             }
 | |
|             if (U_FAILURE(status)) { return; }
 | |
|             if (baseUnit.isBogus() || factor.isBogus()) {
 | |
|                 // We could not find a usable conversion rate: bad resource.
 | |
|                 status = U_MISSING_RESOURCE_ERROR;
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             // We don't have this ConversionRateInfo yet: add it.
 | |
|             ConversionRateInfo *cr = outVector->emplaceBack();
 | |
|             if (!cr) {
 | |
|                 status = U_MEMORY_ALLOCATION_ERROR;
 | |
|                 return;
 | |
|             } else {
 | |
|                 cr->sourceUnit.append(srcUnit, status);
 | |
|                 cr->baseUnit.appendInvariantChars(baseUnit, status);
 | |
|                 cr->factor.appendInvariantChars(factor, status);
 | |
|                 trimSpaces(cr->factor, status);
 | |
|                 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
 | |
|             }
 | |
|         }
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|   private:
 | |
|     MaybeStackVector<ConversionRateInfo> *outVector;
 | |
| };
 | |
| 
 | |
| bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
 | |
|     return a.compareTo(b) < 0;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * A ResourceSink that collects unit preferences information.
 | |
|  *
 | |
|  * This class is for use by ures_getAllItemsWithFallback.
 | |
|  */
 | |
| class UnitPreferencesSink : public ResourceSink {
 | |
|   public:
 | |
|     /**
 | |
|      * Constructor.
 | |
|      * @param outPrefs The vector to which UnitPreference instances are to be
 | |
|      * added. This vector must outlive the use of the ResourceSink.
 | |
|      * @param outMetadata  The vector to which UnitPreferenceMetadata instances
 | |
|      * are to be added. This vector must outlive the use of the ResourceSink.
 | |
|      */
 | |
|     explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
 | |
|                                  MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
 | |
|         : preferences(outPrefs), metadata(outMetadata) {}
 | |
| 
 | |
|     /**
 | |
|      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
 | |
|      * preferences info that are found in `value` to the output vector.
 | |
|      *
 | |
|      * @param source This string must be "unitPreferenceData": the resource that
 | |
|      * this class supports reading.
 | |
|      * @param value The "unitPreferenceData" resource, containing unit
 | |
|      * preferences data.
 | |
|      * @param noFallback Ignored.
 | |
|      * @param status The standard ICU error code output parameter. Note: if an
 | |
|      * error is returned, outPrefs and outMetadata may be inconsistent.
 | |
|      */
 | |
|     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
 | |
|         if (U_FAILURE(status)) { return; }
 | |
|         if (uprv_strcmp(key, "unitPreferenceData") != 0) {
 | |
|             // This is very strict, however it is the cheapest way to be sure
 | |
|             // that with `value`, we're looking at the convertUnits table.
 | |
|             status = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|             return;
 | |
|         }
 | |
|         // The unitPreferenceData structure (see data/misc/units.txt) contains a
 | |
|         // hierarchy of category/usage/region, within which are a set of
 | |
|         // preferences. Hence three for-loops and another loop for the
 | |
|         // preferences themselves:
 | |
|         ResourceTable unitPreferenceDataTable = value.getTable(status);
 | |
|         const char *category;
 | |
|         for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
 | |
|             ResourceTable categoryTable = value.getTable(status);
 | |
|             const char *usage;
 | |
|             for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
 | |
|                 ResourceTable regionTable = value.getTable(status);
 | |
|                 const char *region;
 | |
|                 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
 | |
|                     // `value` now contains the set of preferences for
 | |
|                     // category/usage/region.
 | |
|                     ResourceArray unitPrefs = value.getArray(status);
 | |
|                     if (U_FAILURE(status)) { return; }
 | |
|                     int32_t prefLen = unitPrefs.getSize();
 | |
| 
 | |
|                     // Update metadata for this set of preferences.
 | |
|                     UnitPreferenceMetadata *meta = metadata->emplaceBack(
 | |
|                         category, usage, region, preferences->length(), prefLen, status);
 | |
|                     if (!meta) {
 | |
|                         status = U_MEMORY_ALLOCATION_ERROR;
 | |
|                         return;
 | |
|                     }
 | |
|                     if (U_FAILURE(status)) { return; }
 | |
|                     if (metadata->length() > 1) {
 | |
|                         // Verify that unit preferences are sorted and
 | |
|                         // without duplicates.
 | |
|                         if (!(*(*metadata)[metadata->length() - 2] <
 | |
|                               *(*metadata)[metadata->length() - 1])) {
 | |
|                             status = U_INVALID_FORMAT_ERROR;
 | |
|                             return;
 | |
|                         }
 | |
|                     }
 | |
| 
 | |
|                     // Collect the individual preferences.
 | |
|                     for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
 | |
|                         UnitPreference *up = preferences->emplaceBack();
 | |
|                         if (!up) {
 | |
|                             status = U_MEMORY_ALLOCATION_ERROR;
 | |
|                             return;
 | |
|                         }
 | |
|                         ResourceTable unitPref = value.getTable(status);
 | |
|                         if (U_FAILURE(status)) { return; }
 | |
|                         for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
 | |
|                             if (uprv_strcmp(key, "unit") == 0) {
 | |
|                                 int32_t length;
 | |
|                                 const UChar *u = value.getString(length, status);
 | |
|                                 up->unit.appendInvariantChars(u, length, status);
 | |
|                             } else if (uprv_strcmp(key, "geq") == 0) {
 | |
|                                 int32_t length;
 | |
|                                 const UChar *g = value.getString(length, status);
 | |
|                                 CharString geq;
 | |
|                                 geq.appendInvariantChars(g, length, status);
 | |
|                                 DecimalQuantity dq;
 | |
|                                 dq.setToDecNumber(geq.data(), status);
 | |
|                                 up->geq = dq.toDouble();
 | |
|                             } else if (uprv_strcmp(key, "skeleton") == 0) {
 | |
|                                 up->skeleton = value.getUnicodeString(status);
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   private:
 | |
|     MaybeStackVector<UnitPreference> *preferences;
 | |
|     MaybeStackVector<UnitPreferenceMetadata> *metadata;
 | |
| };
 | |
| 
 | |
| int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
 | |
|                      const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
 | |
|                      bool *foundRegion, UErrorCode &status) {
 | |
|     if (U_FAILURE(status)) { return -1; }
 | |
|     int32_t start = 0;
 | |
|     int32_t end = metadata->length();
 | |
|     *foundCategory = false;
 | |
|     *foundUsage = false;
 | |
|     *foundRegion = false;
 | |
|     while (start < end) {
 | |
|         int32_t mid = (start + end) / 2;
 | |
|         int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
 | |
|         if (cmp < 0) {
 | |
|             start = mid + 1;
 | |
|         } else if (cmp > 0) {
 | |
|             end = mid;
 | |
|         } else {
 | |
|             return mid;
 | |
|         }
 | |
|     }
 | |
|     return -1;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Finds the UnitPreferenceMetadata instance that matches the given category,
 | |
|  * usage and region: if missing, region falls back to "001", and usage
 | |
|  * repeatedly drops tailing components, eventually trying "default"
 | |
|  * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
 | |
|  *
 | |
|  * @param metadata The full list of UnitPreferenceMetadata instances.
 | |
|  * @param category The category to search for. See getUnitCategory().
 | |
|  * @param usage The usage for which formatting preferences is needed. If the
 | |
|  * given usage is not known, automatic fallback occurs, see function description
 | |
|  * above.
 | |
|  * @param region The region for which preferences are needed. If there are no
 | |
|  * region-specific preferences, this function automatically falls back to the
 | |
|  * "001" region (global).
 | |
|  * @param status The standard ICU error code output parameter.
 | |
|  *   * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
 | |
|  *   * If fallback to "default" or "001" didn't resolve, status will be
 | |
|  *     U_MISSING_RESOURCE.
 | |
|  * @return The index into the metadata vector which represents the appropriate
 | |
|  * preferences. If appropriate preferences are not found, -1 is returned.
 | |
|  */
 | |
| int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
 | |
|                                    StringPiece category, StringPiece usage, StringPiece region,
 | |
|                                    UErrorCode &status) {
 | |
|     if (U_FAILURE(status)) { return -1; }
 | |
|     bool foundCategory, foundUsage, foundRegion;
 | |
|     UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
 | |
|     int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
 | |
|     if (U_FAILURE(status)) { return -1; }
 | |
|     if (idx >= 0) { return idx; }
 | |
|     if (!foundCategory) {
 | |
|         // TODO: failures can happen if units::getUnitCategory returns a category
 | |
|         // that does not appear in unitPreferenceData. Do we want a unit test that
 | |
|         // checks unitPreferenceData has full coverage of categories? Or just trust
 | |
|         // CLDR?
 | |
|         status = U_ILLEGAL_ARGUMENT_ERROR;
 | |
|         return -1;
 | |
|     }
 | |
|     U_ASSERT(foundCategory);
 | |
|     while (!foundUsage) {
 | |
|         int32_t lastDashIdx = desired.usage.lastIndexOf('-');
 | |
|         if (lastDashIdx > 0) {
 | |
|             desired.usage.truncate(lastDashIdx);
 | |
|         } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
 | |
|             desired.usage.truncate(0).append("default", status);
 | |
|         } else {
 | |
|             // "default" is not supposed to be missing for any valid category.
 | |
|             status = U_MISSING_RESOURCE_ERROR;
 | |
|             return -1;
 | |
|         }
 | |
|         idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
 | |
|         if (U_FAILURE(status)) { return -1; }
 | |
|     }
 | |
|     U_ASSERT(foundCategory);
 | |
|     U_ASSERT(foundUsage);
 | |
|     if (!foundRegion) {
 | |
|         if (uprv_strcmp(desired.region.data(), "001") != 0) {
 | |
|             desired.region.truncate(0).append("001", status);
 | |
|             idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
 | |
|         }
 | |
|         if (!foundRegion) {
 | |
|             // "001" is not supposed to be missing for any valid usage.
 | |
|             status = U_MISSING_RESOURCE_ERROR;
 | |
|             return -1;
 | |
|         }
 | |
|     }
 | |
|     U_ASSERT(foundCategory);
 | |
|     U_ASSERT(foundUsage);
 | |
|     U_ASSERT(foundRegion);
 | |
|     U_ASSERT(idx >= 0);
 | |
|     return idx;
 | |
| }
 | |
| 
 | |
| } // namespace
 | |
| 
 | |
| UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
 | |
|                                                StringPiece region, int32_t prefsOffset,
 | |
|                                                int32_t prefsCount, UErrorCode &status) {
 | |
|     this->category.append(category, status);
 | |
|     this->usage.append(usage, status);
 | |
|     this->region.append(region, status);
 | |
|     this->prefsOffset = prefsOffset;
 | |
|     this->prefsCount = prefsCount;
 | |
| }
 | |
| 
 | |
| int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
 | |
|     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
 | |
|     if (cmp == 0) {
 | |
|         cmp = uprv_strcmp(usage.data(), other.usage.data());
 | |
|     }
 | |
|     if (cmp == 0) {
 | |
|         cmp = uprv_strcmp(region.data(), other.region.data());
 | |
|     }
 | |
|     return cmp;
 | |
| }
 | |
| 
 | |
| int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
 | |
|                                           bool *foundUsage, bool *foundRegion) const {
 | |
|     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
 | |
|     if (cmp == 0) {
 | |
|         *foundCategory = true;
 | |
|         cmp = uprv_strcmp(usage.data(), other.usage.data());
 | |
|     }
 | |
|     if (cmp == 0) {
 | |
|         *foundUsage = true;
 | |
|         cmp = uprv_strcmp(region.data(), other.region.data());
 | |
|     }
 | |
|     if (cmp == 0) {
 | |
|         *foundRegion = true;
 | |
|     }
 | |
|     return cmp;
 | |
| }
 | |
| 
 | |
| // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
 | |
| void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
 | |
|     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
 | |
|     ConversionRateDataSink sink(&result);
 | |
|     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
 | |
| }
 | |
| 
 | |
| const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
 | |
|                                                                  UErrorCode &status) const {
 | |
|     for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
 | |
|         if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
 | |
|     }
 | |
| 
 | |
|     status = U_INTERNAL_PROGRAM_ERROR;
 | |
|     return nullptr;
 | |
| }
 | |
| 
 | |
| U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
 | |
|     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
 | |
|     UnitPreferencesSink sink(&unitPrefs_, &metadata_);
 | |
|     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
 | |
| }
 | |
| 
 | |
| // TODO: make outPreferences const?
 | |
| //
 | |
| // TODO: consider replacing `UnitPreference **&outPreferences` with slice class
 | |
| // of some kind.
 | |
| void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
 | |
|                                                    StringPiece region,
 | |
|                                                    const UnitPreference *const *&outPreferences,
 | |
|                                                    int32_t &preferenceCount, UErrorCode &status) const {
 | |
|     int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status);
 | |
|     if (U_FAILURE(status)) {
 | |
|         outPreferences = nullptr;
 | |
|         preferenceCount = 0;
 | |
|         return;
 | |
|     }
 | |
|     U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
 | |
|     const UnitPreferenceMetadata *m = metadata_[idx];
 | |
|     outPreferences = unitPrefs_.getAlias() + m->prefsOffset;
 | |
|     preferenceCount = m->prefsCount;
 | |
| }
 | |
| 
 | |
| } // namespace units
 | |
| U_NAMESPACE_END
 | |
| 
 | |
| #endif /* #if !UCONFIG_NO_FORMATTING */
 |