469 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			469 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C++
		
	
	
	
| // © 2016 and later: Unicode, Inc. and others.
 | |
| // License & terms of use: http://www.unicode.org/copyright.html
 | |
| /*
 | |
| **********************************************************************
 | |
| *   Copyright (c) 2001-2014, International Business Machines
 | |
| *   Corporation and others.  All Rights Reserved.
 | |
| **********************************************************************
 | |
| *   Date        Name        Description
 | |
| *   08/10/2001  aliu        Creation.
 | |
| **********************************************************************
 | |
| */
 | |
| #ifndef _TRANSREG_H
 | |
| #define _TRANSREG_H
 | |
| 
 | |
| #include "unicode/utypes.h"
 | |
| 
 | |
| #if !UCONFIG_NO_TRANSLITERATION
 | |
| 
 | |
| #include "unicode/uobject.h"
 | |
| #include "unicode/translit.h"
 | |
| #include "hash.h"
 | |
| #include "uvector.h"
 | |
| 
 | |
| U_NAMESPACE_BEGIN
 | |
| 
 | |
| class TransliteratorEntry;
 | |
| class TransliteratorSpec;
 | |
| class UnicodeString;
 | |
| 
 | |
| //------------------------------------------------------------------
 | |
| // TransliteratorAlias
 | |
| //------------------------------------------------------------------
 | |
| 
 | |
| /**
 | |
|  * A TransliteratorAlias object is returned by get() if the given ID
 | |
|  * actually translates into something else.  The caller then invokes
 | |
|  * the create() method on the alias to create the actual
 | |
|  * transliterator, and deletes the alias.
 | |
|  *
 | |
|  * Why all the shenanigans?  To prevent circular calls between
 | |
|  * the registry code and the transliterator code that deadlocks.
 | |
|  */
 | |
| class TransliteratorAlias : public UMemory {
 | |
|  public:
 | |
|     /**
 | |
|      * Construct a simple alias (type == SIMPLE)
 | |
|      * @param aliasID the given id.
 | |
|      */
 | |
|     TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
 | |
| 
 | |
|     /**
 | |
|      * Construct a compound RBT alias (type == COMPOUND)
 | |
|      */
 | |
|     TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
 | |
|                         UVector* adoptedTransliterators,
 | |
|                         const UnicodeSet* compoundFilter);
 | |
| 
 | |
|     /**
 | |
|      * Construct a rules alias (type = RULES)
 | |
|      */
 | |
|     TransliteratorAlias(const UnicodeString& theID,
 | |
|                         const UnicodeString& rules,
 | |
|                         UTransDirection dir);
 | |
| 
 | |
|     ~TransliteratorAlias();
 | |
| 
 | |
|     /**
 | |
|      * The whole point of create() is that the caller must invoke
 | |
|      * it when the registry mutex is NOT held, to prevent deadlock.
 | |
|      * It may only be called once.
 | |
|      *
 | |
|      * Note: Only call create() if isRuleBased() returns false.
 | |
|      *
 | |
|      * This method must be called *outside* of the TransliteratorRegistry
 | |
|      * mutex.
 | |
|      */
 | |
|     Transliterator* create(UParseError&, UErrorCode&);
 | |
| 
 | |
|     /**
 | |
|      * Return true if this alias is rule-based.  If so, the caller
 | |
|      * must call parse() on it, then call TransliteratorRegistry::reget().
 | |
|      */
 | |
|     UBool isRuleBased() const;
 | |
| 
 | |
|     /**
 | |
|      * If isRuleBased() returns true, then the caller must call this
 | |
|      * method, followed by TransliteratorRegistry::reget().  The latter
 | |
|      * method must be called inside the TransliteratorRegistry mutex.
 | |
|      *
 | |
|      * Note: Only call parse() if isRuleBased() returns true.
 | |
|      *
 | |
|      * This method must be called *outside* of the TransliteratorRegistry
 | |
|      * mutex, because it can instantiate Transliterators embedded in
 | |
|      * the rules via the "&Latin-Arabic()" syntax.
 | |
|      */
 | |
|     void parse(TransliteratorParser& parser,
 | |
|                UParseError& pe, UErrorCode& ec) const;
 | |
| 
 | |
|  private:
 | |
|     // We actually come in three flavors:
 | |
|     // 1. Simple alias
 | |
|     //    Here aliasID is the alias string.  Everything else is
 | |
|     //    null, zero, empty.
 | |
|     // 2. CompoundRBT
 | |
|     //    Here ID is the ID, aliasID is the idBlock, trans is the
 | |
|     //    contained RBT, and idSplitPoint is the offset in aliasID
 | |
|     //    where the contained RBT goes.  compoundFilter is the
 | |
|     //    compound filter, and it is _not_ owned.
 | |
|     // 3. Rules
 | |
|     //    Here ID is the ID, aliasID is the rules string.
 | |
|     //    idSplitPoint is the UTransDirection.
 | |
|     UnicodeString ID;
 | |
|     UnicodeString aliasesOrRules;
 | |
|     UVector* transes; // owned
 | |
|     const UnicodeSet* compoundFilter; // alias
 | |
|     UTransDirection direction;
 | |
|     enum { SIMPLE, COMPOUND, RULES } type;
 | |
| 
 | |
|     TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
 | |
|     TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
 | |
| };
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * A registry of system transliterators.  This is the data structure
 | |
|  * that implements the mapping between transliterator IDs and the data
 | |
|  * or function pointers used to create the corresponding
 | |
|  * transliterators.  There is one instance of the registry that is
 | |
|  * created statically.
 | |
|  *
 | |
|  * The registry consists of a dynamic component -- a hashtable -- and
 | |
|  * a static component -- locale resource bundles.  The dynamic store
 | |
|  * is semantically overlaid on the static store, so the static mapping
 | |
|  * can be dynamically overridden.
 | |
|  *
 | |
|  * This is an internal class that is only used by Transliterator.
 | |
|  * Transliterator maintains one static instance of this class and
 | |
|  * delegates all registry-related operations to it.
 | |
|  *
 | |
|  * @author Alan Liu
 | |
|  */
 | |
| class TransliteratorRegistry : public UMemory {
 | |
| 
 | |
|  public:
 | |
| 
 | |
|     /**
 | |
|      * Constructor
 | |
|      * @param status Output param set to success/failure code.
 | |
|      */
 | |
|     TransliteratorRegistry(UErrorCode& status);
 | |
| 
 | |
|     /**
 | |
|      * Nonvirtual destructor -- this class is not subclassable.
 | |
|      */
 | |
|     ~TransliteratorRegistry();
 | |
| 
 | |
|     //------------------------------------------------------------------
 | |
|     // Basic public API
 | |
|     //------------------------------------------------------------------
 | |
| 
 | |
|     /**
 | |
|      * Given a simple ID (forward direction, no inline filter, not
 | |
|      * compound) attempt to instantiate it from the registry.  Return
 | |
|      * 0 on failure.
 | |
|      *
 | |
|      * Return a non-NULL aliasReturn value if the ID points to an alias.
 | |
|      * We cannot instantiate it ourselves because the alias may contain
 | |
|      * filters or compounds, which we do not understand.  Caller should
 | |
|      * make aliasReturn NULL before calling.
 | |
|      * @param ID          the given ID
 | |
|      * @param aliasReturn output param to receive TransliteratorAlias;
 | |
|      *                    should be NULL on entry
 | |
|      * @param parseError  Struct to receive information on position
 | |
|      *                    of error if an error is encountered
 | |
|      * @param status      Output param set to success/failure code.
 | |
|      */
 | |
|     Transliterator* get(const UnicodeString& ID,
 | |
|                         TransliteratorAlias*& aliasReturn,
 | |
|                         UErrorCode& status);
 | |
| 
 | |
|     /**
 | |
|      * The caller must call this after calling get(), if [a] calling get()
 | |
|      * returns an alias, and [b] the alias is rule based.  In that
 | |
|      * situation the caller must call alias->parse() to do the parsing
 | |
|      * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
 | |
|      * instantiating the transliterator.
 | |
|      *
 | |
|      * Note: Another alias might be returned by this method.
 | |
|      *
 | |
|      * This method (like all public methods of this class) must be called
 | |
|      * from within the TransliteratorRegistry mutex.
 | |
|      *
 | |
|      * @param aliasReturn output param to receive TransliteratorAlias;
 | |
|      *                    should be NULL on entry
 | |
|      */
 | |
|     Transliterator* reget(const UnicodeString& ID,
 | |
|                           TransliteratorParser& parser,
 | |
|                           TransliteratorAlias*& aliasReturn,
 | |
|                           UErrorCode& status);
 | |
| 
 | |
|     /**
 | |
|      * Register a prototype (adopted).  This adds an entry to the
 | |
|      * dynamic store, or replaces an existing entry.  Any entry in the
 | |
|      * underlying static locale resource store is masked.
 | |
|      */
 | |
|     void put(Transliterator* adoptedProto,
 | |
|              UBool visible,
 | |
|              UErrorCode& ec);
 | |
| 
 | |
|     /**
 | |
|      * Register an ID and a factory function pointer.  This adds an
 | |
|      * entry to the dynamic store, or replaces an existing entry.  Any
 | |
|      * entry in the underlying static locale resource store is masked.
 | |
|      */
 | |
|     void put(const UnicodeString& ID,
 | |
|              Transliterator::Factory factory,
 | |
|              Transliterator::Token context,
 | |
|              UBool visible,
 | |
|              UErrorCode& ec);
 | |
| 
 | |
|     /**
 | |
|      * Register an ID and a resource name.  This adds an entry to the
 | |
|      * dynamic store, or replaces an existing entry.  Any entry in the
 | |
|      * underlying static locale resource store is masked.
 | |
|      */
 | |
|     void put(const UnicodeString& ID,
 | |
|              const UnicodeString& resourceName,
 | |
|              UTransDirection dir,
 | |
|              UBool readonlyResourceAlias,
 | |
|              UBool visible,
 | |
|              UErrorCode& ec);
 | |
| 
 | |
|     /**
 | |
|      * Register an ID and an alias ID.  This adds an entry to the
 | |
|      * dynamic store, or replaces an existing entry.  Any entry in the
 | |
|      * underlying static locale resource store is masked.
 | |
|      */
 | |
|     void put(const UnicodeString& ID,
 | |
|              const UnicodeString& alias,
 | |
|              UBool readonlyAliasAlias,
 | |
|              UBool visible,
 | |
|              UErrorCode& ec);
 | |
| 
 | |
|     /**
 | |
|      * Unregister an ID.  This removes an entry from the dynamic store
 | |
|      * if there is one.  The static locale resource store is
 | |
|      * unaffected.
 | |
|      * @param ID    the given ID.
 | |
|      */
 | |
|     void remove(const UnicodeString& ID);
 | |
| 
 | |
|     //------------------------------------------------------------------
 | |
|     // Public ID and spec management
 | |
|     //------------------------------------------------------------------
 | |
| 
 | |
|     /**
 | |
|      * Return a StringEnumeration over the IDs currently registered
 | |
|      * with the system.
 | |
|      * @internal
 | |
|      */
 | |
|     StringEnumeration* getAvailableIDs() const;
 | |
| 
 | |
|     /**
 | |
|      * == OBSOLETE - remove in ICU 3.4 ==
 | |
|      * Return the number of IDs currently registered with the system.
 | |
|      * To retrieve the actual IDs, call getAvailableID(i) with
 | |
|      * i from 0 to countAvailableIDs() - 1.
 | |
|      * @return the number of IDs currently registered with the system.
 | |
|      * @internal
 | |
|      */
 | |
|     int32_t countAvailableIDs(void) const;
 | |
| 
 | |
|     /**
 | |
|      * == OBSOLETE - remove in ICU 3.4 ==
 | |
|      * Return the index-th available ID.  index must be between 0
 | |
|      * and countAvailableIDs() - 1, inclusive.  If index is out of
 | |
|      * range, the result of getAvailableID(0) is returned.
 | |
|      * @param index the given index.
 | |
|      * @return the index-th available ID.  index must be between 0
 | |
|      *         and countAvailableIDs() - 1, inclusive.  If index is out of
 | |
|      *         range, the result of getAvailableID(0) is returned.
 | |
|      * @internal
 | |
|      */
 | |
|     const UnicodeString& getAvailableID(int32_t index) const;
 | |
| 
 | |
|     /**
 | |
|      * Return the number of registered source specifiers.
 | |
|      * @return the number of registered source specifiers.
 | |
|      */
 | |
|     int32_t countAvailableSources(void) const;
 | |
| 
 | |
|     /**
 | |
|      * Return a registered source specifier.
 | |
|      * @param index which specifier to return, from 0 to n-1, where
 | |
|      * n = countAvailableSources()
 | |
|      * @param result fill-in parameter to receive the source specifier.
 | |
|      * If index is out of range, result will be empty.
 | |
|      * @return reference to result
 | |
|      */
 | |
|     UnicodeString& getAvailableSource(int32_t index,
 | |
|                                       UnicodeString& result) const;
 | |
| 
 | |
|     /**
 | |
|      * Return the number of registered target specifiers for a given
 | |
|      * source specifier.
 | |
|      * @param source the given source specifier.
 | |
|      * @return the number of registered target specifiers for a given
 | |
|      *         source specifier.
 | |
|      */
 | |
|     int32_t countAvailableTargets(const UnicodeString& source) const;
 | |
| 
 | |
|     /**
 | |
|      * Return a registered target specifier for a given source.
 | |
|      * @param index which specifier to return, from 0 to n-1, where
 | |
|      * n = countAvailableTargets(source)
 | |
|      * @param source the source specifier
 | |
|      * @param result fill-in parameter to receive the target specifier.
 | |
|      * If source is invalid or if index is out of range, result will
 | |
|      * be empty.
 | |
|      * @return reference to result
 | |
|      */
 | |
|     UnicodeString& getAvailableTarget(int32_t index,
 | |
|                                       const UnicodeString& source,
 | |
|                                       UnicodeString& result) const;
 | |
| 
 | |
|     /**
 | |
|      * Return the number of registered variant specifiers for a given
 | |
|      * source-target pair.  There is always at least one variant: If
 | |
|      * just source-target is registered, then the single variant
 | |
|      * NO_VARIANT is returned.  If source-target/variant is registered
 | |
|      * then that variant is returned.
 | |
|      * @param source the source specifiers
 | |
|      * @param target the target specifiers
 | |
|      * @return the number of registered variant specifiers for a given
 | |
|      *         source-target pair.
 | |
|      */
 | |
|     int32_t countAvailableVariants(const UnicodeString& source,
 | |
|                                    const UnicodeString& target) const;
 | |
| 
 | |
|     /**
 | |
|      * Return a registered variant specifier for a given source-target
 | |
|      * pair.  If NO_VARIANT is one of the variants, then it will be
 | |
|      * at index 0.
 | |
|      * @param index which specifier to return, from 0 to n-1, where
 | |
|      * n = countAvailableVariants(source, target)
 | |
|      * @param source the source specifier
 | |
|      * @param target the target specifier
 | |
|      * @param result fill-in parameter to receive the variant
 | |
|      * specifier.  If source is invalid or if target is invalid or if
 | |
|      * index is out of range, result will be empty.
 | |
|      * @return reference to result
 | |
|      */
 | |
|     UnicodeString& getAvailableVariant(int32_t index,
 | |
|                                        const UnicodeString& source,
 | |
|                                        const UnicodeString& target,
 | |
|                                        UnicodeString& result) const;
 | |
| 
 | |
|  private:
 | |
| 
 | |
|     //----------------------------------------------------------------
 | |
|     // Private implementation
 | |
|     //----------------------------------------------------------------
 | |
| 
 | |
|     TransliteratorEntry* find(const UnicodeString& ID);
 | |
| 
 | |
|     TransliteratorEntry* find(UnicodeString& source,
 | |
|                 UnicodeString& target,
 | |
|                 UnicodeString& variant);
 | |
| 
 | |
|     TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
 | |
|                               const TransliteratorSpec& trg,
 | |
|                               const UnicodeString& variant) const;
 | |
| 
 | |
|     TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
 | |
|                              const TransliteratorSpec& trg,
 | |
|                              const UnicodeString& variant);
 | |
| 
 | |
|     static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
 | |
|                                const TransliteratorSpec& specToFind,
 | |
|                                const UnicodeString& variant,
 | |
|                                UTransDirection direction);
 | |
| 
 | |
|     void registerEntry(const UnicodeString& source,
 | |
|                        const UnicodeString& target,
 | |
|                        const UnicodeString& variant,
 | |
|                        TransliteratorEntry* adopted,
 | |
|                        UBool visible);
 | |
| 
 | |
|     void registerEntry(const UnicodeString& ID,
 | |
|                        TransliteratorEntry* adopted,
 | |
|                        UBool visible);
 | |
| 
 | |
|     void registerEntry(const UnicodeString& ID,
 | |
|                        const UnicodeString& source,
 | |
|                        const UnicodeString& target,
 | |
|                        const UnicodeString& variant,
 | |
|                        TransliteratorEntry* adopted,
 | |
|                        UBool visible);
 | |
| 
 | |
|     void registerSTV(const UnicodeString& source,
 | |
|                      const UnicodeString& target,
 | |
|                      const UnicodeString& variant);
 | |
| 
 | |
|     void removeSTV(const UnicodeString& source,
 | |
|                    const UnicodeString& target,
 | |
|                    const UnicodeString& variant);
 | |
| 
 | |
|     Transliterator* instantiateEntry(const UnicodeString& ID,
 | |
|                                      TransliteratorEntry *entry,
 | |
|                                      TransliteratorAlias*& aliasReturn,
 | |
|                                      UErrorCode& status);
 | |
| 
 | |
|     /**
 | |
|      * A StringEnumeration over the registered IDs in this object.
 | |
|      */
 | |
|     class Enumeration : public StringEnumeration {
 | |
|     public:
 | |
|         Enumeration(const TransliteratorRegistry& reg);
 | |
|         virtual ~Enumeration();
 | |
|         virtual int32_t count(UErrorCode& status) const override;
 | |
|         virtual const UnicodeString* snext(UErrorCode& status) override;
 | |
|         virtual void reset(UErrorCode& status) override;
 | |
|         static UClassID U_EXPORT2 getStaticClassID();
 | |
|         virtual UClassID getDynamicClassID() const override;
 | |
|     private:
 | |
|         int32_t index;
 | |
|         const TransliteratorRegistry& reg;
 | |
|     };
 | |
|     friend class Enumeration;
 | |
| 
 | |
|  private:
 | |
| 
 | |
|     /**
 | |
|      * Dynamic registry mapping full IDs to Entry objects.  This
 | |
|      * contains both public and internal entities.  The visibility is
 | |
|      * controlled by whether an entry is listed in availableIDs and
 | |
|      * specDAG or not.
 | |
|      */
 | |
|     Hashtable registry;
 | |
| 
 | |
|     /**
 | |
|      * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
 | |
|      * target => variant bitmask)
 | |
|      */
 | |
|     Hashtable specDAG;
 | |
| 
 | |
|     /**
 | |
|      * Vector of all variant names
 | |
|      */
 | |
|     UVector variantList;
 | |
| 
 | |
|     /**
 | |
|      * Vector of public full IDs.
 | |
|      */
 | |
|     UVector availableIDs;
 | |
| 
 | |
|     TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
 | |
|     TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
 | |
| };
 | |
| 
 | |
| U_NAMESPACE_END
 | |
| 
 | |
| U_CFUNC UBool utrans_transliterator_cleanup(void);
 | |
| 
 | |
| #endif /* #if !UCONFIG_NO_TRANSLITERATION */
 | |
| 
 | |
| #endif
 | |
| //eof
 |