1466 lines
40 KiB
C
1466 lines
40 KiB
C
/** \file
|
|
* Implementation of the ANTLR3 string and string factory classes
|
|
*/
|
|
|
|
// [The "BSD licence"]
|
|
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
|
|
// http://www.temporal-wave.com
|
|
// http://www.linkedin.com/in/jimidle
|
|
//
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// 3. The name of the author may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#include <antlr3string.h>
|
|
|
|
/* Factory API
|
|
*/
|
|
static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory);
|
|
static pANTLR3_STRING newRawUTF16 (pANTLR3_STRING_FACTORY factory);
|
|
static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
|
|
static pANTLR3_STRING newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
|
|
static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
|
|
static pANTLR3_STRING newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
|
|
static pANTLR3_STRING newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
|
|
static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
|
|
static pANTLR3_STRING newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
|
|
static pANTLR3_STRING newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
|
|
static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
|
|
static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
|
|
static pANTLR3_STRING printableUTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
|
|
static void closeFactory(pANTLR3_STRING_FACTORY factory);
|
|
|
|
/* String API
|
|
*/
|
|
static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars);
|
|
static pANTLR3_UINT8 setUTF16_8 (pANTLR3_STRING string, const char * chars);
|
|
static pANTLR3_UINT8 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars);
|
|
static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit);
|
|
static pANTLR3_UINT8 appendUTF16_8 (pANTLR3_STRING string, const char * newbit);
|
|
static pANTLR3_UINT8 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit);
|
|
static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
|
|
static pANTLR3_UINT8 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
|
|
static pANTLR3_UINT8 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
|
|
|
|
static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars);
|
|
static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit);
|
|
static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
|
|
|
|
static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c);
|
|
static pANTLR3_UINT8 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c);
|
|
static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i);
|
|
static pANTLR3_UINT8 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i);
|
|
static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
|
|
static pANTLR3_UINT8 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
|
|
|
|
static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr);
|
|
static ANTLR3_UINT32 compareUTF16_8 (pANTLR3_STRING string, const char * compStr);
|
|
static ANTLR3_UINT32 compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
|
|
static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr);
|
|
static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
|
|
static ANTLR3_UCHAR charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
|
|
static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
|
|
static pANTLR3_STRING subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
|
|
static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string);
|
|
static ANTLR3_INT32 toInt32_UTF16 (pANTLR3_STRING string);
|
|
static pANTLR3_STRING to8_8 (pANTLR3_STRING string);
|
|
static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string);
|
|
static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string);
|
|
static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string);
|
|
|
|
/* Local helpers
|
|
*/
|
|
static void stringInit8 (pANTLR3_STRING string);
|
|
static void stringInitUTF16 (pANTLR3_STRING string);
|
|
static void ANTLR3_CDECL stringFree (pANTLR3_STRING string);
|
|
|
|
ANTLR3_API pANTLR3_STRING_FACTORY
|
|
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
|
|
{
|
|
pANTLR3_STRING_FACTORY factory;
|
|
|
|
/* Allocate memory
|
|
*/
|
|
factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
|
|
|
|
if (factory == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
/* Now we make a new list to track the strings.
|
|
*/
|
|
factory->strings = antlr3VectorNew(0);
|
|
factory->index = 0;
|
|
|
|
if (factory->strings == NULL)
|
|
{
|
|
ANTLR3_FREE(factory);
|
|
return NULL;
|
|
}
|
|
|
|
// Install the API
|
|
//
|
|
// TODO: These encodings need equivalent functions to
|
|
// UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
|
|
// The STRING stuff was intended as a quick and dirty hack for people that did not
|
|
// want to worry about memory and performance very much, but nobody ever reads the
|
|
// notes or comments or uses the email list search. I want to discourage using these
|
|
// interfaces as it is much more efficient to use the pointers within the tokens
|
|
// directly, so I am not implementing the string stuff for the newer encodings.
|
|
// We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
|
|
// will not be useful beyond returning the text.
|
|
//
|
|
switch(encoding)
|
|
{
|
|
case ANTLR3_ENC_UTF32:
|
|
break;
|
|
|
|
case ANTLR3_ENC_UTF32BE:
|
|
break;
|
|
|
|
case ANTLR3_ENC_UTF32LE:
|
|
break;
|
|
|
|
case ANTLR3_ENC_UTF16BE:
|
|
case ANTLR3_ENC_UTF16LE:
|
|
case ANTLR3_ENC_UTF16:
|
|
|
|
factory->newRaw = newRawUTF16;
|
|
factory->newSize = newSizeUTF16;
|
|
factory->newPtr = newPtrUTF16_UTF16;
|
|
factory->newPtr8 = newPtrUTF16_8;
|
|
factory->newStr = newStrUTF16_UTF16;
|
|
factory->newStr8 = newStrUTF16_8;
|
|
factory->printable = printableUTF16;
|
|
factory->destroy = destroy;
|
|
factory->close = closeFactory;
|
|
break;
|
|
|
|
case ANTLR3_ENC_UTF8:
|
|
case ANTLR3_ENC_EBCDIC:
|
|
case ANTLR3_ENC_8BIT:
|
|
default:
|
|
|
|
factory->newRaw = newRaw8;
|
|
factory->newSize = newSize8;
|
|
factory->newPtr = newPtr8;
|
|
factory->newPtr8 = newPtr8;
|
|
factory->newStr = newStr8;
|
|
factory->newStr8 = newStr8;
|
|
factory->printable = printable8;
|
|
factory->destroy = destroy;
|
|
factory->close = closeFactory;
|
|
break;
|
|
}
|
|
return factory;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* \param factory
|
|
* \return
|
|
*/
|
|
static pANTLR3_STRING
|
|
newRaw8 (pANTLR3_STRING_FACTORY factory)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
|
|
|
|
if (string == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
/* Structure is allocated, now fill in the API etc.
|
|
*/
|
|
stringInit8(string);
|
|
string->factory = factory;
|
|
|
|
/* Add the string into the allocated list
|
|
*/
|
|
factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
|
|
string->index = factory->index++;
|
|
|
|
return string;
|
|
}
|
|
/**
|
|
*
|
|
* \param factory
|
|
* \return
|
|
*/
|
|
static pANTLR3_STRING
|
|
newRawUTF16 (pANTLR3_STRING_FACTORY factory)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
|
|
|
|
if (string == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
/* Structure is allocated, now fill in the API etc.
|
|
*/
|
|
stringInitUTF16(string);
|
|
string->factory = factory;
|
|
|
|
/* Add the string into the allocated list
|
|
*/
|
|
factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
|
|
string->index = factory->index++;
|
|
|
|
return string;
|
|
}
|
|
static
|
|
void ANTLR3_CDECL stringFree (pANTLR3_STRING string)
|
|
{
|
|
/* First free the string itself if there was anything in it
|
|
*/
|
|
if (string->chars)
|
|
{
|
|
ANTLR3_FREE(string->chars);
|
|
}
|
|
|
|
/* Now free the space for this string
|
|
*/
|
|
ANTLR3_FREE(string);
|
|
|
|
return;
|
|
}
|
|
/**
|
|
*
|
|
* \param string
|
|
* \return
|
|
*/
|
|
static void
|
|
stringInit8 (pANTLR3_STRING string)
|
|
{
|
|
string->len = 0;
|
|
string->size = 0;
|
|
string->chars = NULL;
|
|
string->encoding = ANTLR3_ENC_8BIT ;
|
|
|
|
/* API for 8 bit strings*/
|
|
|
|
string->set = set8;
|
|
string->set8 = set8;
|
|
string->append = append8;
|
|
string->append8 = append8;
|
|
string->insert = insert8;
|
|
string->insert8 = insert8;
|
|
string->addi = addi8;
|
|
string->inserti = inserti8;
|
|
string->addc = addc8;
|
|
string->charAt = charAt8;
|
|
string->compare = compare8;
|
|
string->compare8 = compare8;
|
|
string->subString = subString8;
|
|
string->toInt32 = toInt32_8;
|
|
string->to8 = to8_8;
|
|
string->toUTF8 = toUTF8_8;
|
|
string->compareS = compareS;
|
|
string->setS = setS;
|
|
string->appendS = appendS;
|
|
string->insertS = insertS;
|
|
|
|
}
|
|
/**
|
|
*
|
|
* \param string
|
|
* \return
|
|
*/
|
|
static void
|
|
stringInitUTF16 (pANTLR3_STRING string)
|
|
{
|
|
string->len = 0;
|
|
string->size = 0;
|
|
string->chars = NULL;
|
|
string->encoding = ANTLR3_ENC_8BIT;
|
|
|
|
/* API for UTF16 strings */
|
|
|
|
string->set = setUTF16_UTF16;
|
|
string->set8 = setUTF16_8;
|
|
string->append = appendUTF16_UTF16;
|
|
string->append8 = appendUTF16_8;
|
|
string->insert = insertUTF16_UTF16;
|
|
string->insert8 = insertUTF16_8;
|
|
string->addi = addiUTF16;
|
|
string->inserti = insertiUTF16;
|
|
string->addc = addcUTF16;
|
|
string->charAt = charAtUTF16;
|
|
string->compare = compareUTF16_UTF16;
|
|
string->compare8 = compareUTF16_8;
|
|
string->subString = subStringUTF16;
|
|
string->toInt32 = toInt32_UTF16;
|
|
string->to8 = to8_UTF16;
|
|
string->toUTF8 = toUTF8_UTF16;
|
|
|
|
string->compareS = compareS;
|
|
string->setS = setS;
|
|
string->appendS = appendS;
|
|
string->insertS = insertS;
|
|
}
|
|
/**
|
|
*
|
|
* \param string
|
|
* \return
|
|
* TODO: Implement UTF-8
|
|
*/
|
|
static void
|
|
stringInitUTF8 (pANTLR3_STRING string)
|
|
{
|
|
string->len = 0;
|
|
string->size = 0;
|
|
string->chars = NULL;
|
|
|
|
/* API */
|
|
|
|
}
|
|
|
|
// Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
|
|
// a memcpy as we make no assumptions about the 8 bit encoding.
|
|
//
|
|
static pANTLR3_STRING
|
|
toUTF8_8 (pANTLR3_STRING string)
|
|
{
|
|
return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
|
|
}
|
|
|
|
// Convert a UTF16 string into a UTF8 representation using the Unicode.org
|
|
// supplied C algorithms, which are now contained within the ANTLR3 C runtime
|
|
// as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
|
|
// UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
|
|
//
|
|
static pANTLR3_STRING
|
|
toUTF8_UTF16 (pANTLR3_STRING string)
|
|
{
|
|
|
|
UTF8 * outputEnd;
|
|
UTF16 * inputEnd;
|
|
pANTLR3_STRING utf8String;
|
|
|
|
ConversionResult cResult;
|
|
|
|
// Allocate the output buffer, which needs to accommodate potentially
|
|
// 3X (in bytes) the input size (in chars).
|
|
//
|
|
utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
|
|
|
|
if (utf8String != NULL)
|
|
{
|
|
// Free existing allocation
|
|
//
|
|
ANTLR3_FREE(utf8String->chars);
|
|
|
|
// Reallocate according to maximum expected size
|
|
//
|
|
utf8String->size = string->len *3;
|
|
utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
|
|
|
|
if (utf8String->chars != NULL)
|
|
{
|
|
inputEnd = (UTF16 *) (string->chars);
|
|
outputEnd = (UTF8 *) (utf8String->chars);
|
|
|
|
// Call the Unicode converter
|
|
//
|
|
cResult = ConvertUTF16toUTF8
|
|
(
|
|
(const UTF16**)&inputEnd,
|
|
((const UTF16 *)(string->chars)) + string->len,
|
|
&outputEnd,
|
|
outputEnd + utf8String->size - 1,
|
|
lenientConversion
|
|
);
|
|
|
|
// We don't really care if things failed or not here, we just converted
|
|
// everything that was vaguely possible and stopped when it wasn't. It is
|
|
// up to the grammar programmer to verify that the input is sensible.
|
|
//
|
|
utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
|
|
|
|
*(outputEnd+1) = '\0'; // Always null terminate
|
|
}
|
|
}
|
|
return utf8String;
|
|
}
|
|
|
|
/**
|
|
* Creates a new string with enough capacity for size 8 bit characters plus a terminator.
|
|
*
|
|
* \param[in] factory - Pointer to the string factory that owns strings
|
|
* \param[in] size - In characters
|
|
* \return pointer to the new string.
|
|
*/
|
|
static pANTLR3_STRING
|
|
newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
string = factory->newRaw(factory);
|
|
|
|
if (string == NULL)
|
|
{
|
|
return string;
|
|
}
|
|
|
|
/* Always add one more byte for a terminator ;-)
|
|
*/
|
|
string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
|
|
if (string->chars != NULL)
|
|
{
|
|
*(string->chars) = '\0';
|
|
string->size = size + 1;
|
|
}
|
|
|
|
return string;
|
|
}
|
|
/**
|
|
* Creates a new string with enough capacity for size UTF16 characters plus a terminator.
|
|
*
|
|
* \param[in] factory - Pointer to the string factory that owns strings
|
|
* \param[in] size - In characters (count double for surrogate pairs!!!)
|
|
* \return pointer to the new string.
|
|
*/
|
|
static pANTLR3_STRING
|
|
newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
string = factory->newRaw(factory);
|
|
|
|
if (string == NULL)
|
|
{
|
|
return string;
|
|
}
|
|
|
|
/* Always add one more byte for a terminator ;-)
|
|
*/
|
|
string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
|
|
if (string->chars != NULL)
|
|
{
|
|
*(string->chars) = '\0';
|
|
string->size = size+1; /* Size is always in characters, as is len */
|
|
}
|
|
|
|
return string;
|
|
}
|
|
|
|
/** Creates a new 8 bit string initialized with the 8 bit characters at the
|
|
* supplied ptr, of pre-determined size.
|
|
* \param[in] factory - Pointer to the string factory that owns the strings
|
|
* \param[in] ptr - Pointer to 8 bit encoded characters
|
|
* \return pointer to the new string
|
|
*/
|
|
static pANTLR3_STRING
|
|
newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
string = factory->newSize(factory, size);
|
|
|
|
if (string == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
if (size <= 0)
|
|
{
|
|
return string;
|
|
}
|
|
|
|
if (ptr != NULL)
|
|
{
|
|
ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
|
|
*(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */
|
|
string->len = size;
|
|
}
|
|
|
|
return string;
|
|
}
|
|
|
|
/** Creates a new UTF16 string initialized with the 8 bit characters at the
|
|
* supplied 8 bit character ptr, of pre-determined size.
|
|
* \param[in] factory - Pointer to the string factory that owns the strings
|
|
* \param[in] ptr - Pointer to 8 bit encoded characters
|
|
* \return pointer to the new string
|
|
*/
|
|
static pANTLR3_STRING
|
|
newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
/* newSize accepts size in characters, not bytes
|
|
*/
|
|
string = factory->newSize(factory, size);
|
|
|
|
if (string == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
if (size <= 0)
|
|
{
|
|
return string;
|
|
}
|
|
|
|
if (ptr != NULL)
|
|
{
|
|
pANTLR3_UINT16 out;
|
|
ANTLR3_INT32 inSize;
|
|
|
|
out = (pANTLR3_UINT16)(string->chars);
|
|
inSize = size;
|
|
|
|
while (inSize-- > 0)
|
|
{
|
|
*out++ = (ANTLR3_UINT16)(*ptr++);
|
|
}
|
|
|
|
/* Terminate, these strings are usually used for Token streams and printing etc.
|
|
*/
|
|
*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
|
|
|
|
string->len = size;
|
|
}
|
|
|
|
return string;
|
|
}
|
|
|
|
/** Creates a new UTF16 string initialized with the UTF16 characters at the
|
|
* supplied ptr, of pre-determined size.
|
|
* \param[in] factory - Pointer to the string factory that owns the strings
|
|
* \param[in] ptr - Pointer to UTF16 encoded characters
|
|
* \return pointer to the new string
|
|
*/
|
|
static pANTLR3_STRING
|
|
newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
string = factory->newSize(factory, size);
|
|
|
|
if (string == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
if (size <= 0)
|
|
{
|
|
return string;
|
|
}
|
|
|
|
if (ptr != NULL)
|
|
{
|
|
ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
|
|
|
|
/* Terminate, these strings are usually used for Token streams and printing etc.
|
|
*/
|
|
*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
|
|
string->len = size;
|
|
}
|
|
|
|
return string;
|
|
}
|
|
|
|
/** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
|
|
* \param[in] factory - Pointer to the string factory that owns strings.
|
|
* \param[in] ptr - Pointer to the 8 bit encoded string
|
|
* \return Pointer to the newly initialized string
|
|
*/
|
|
static pANTLR3_STRING
|
|
newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
|
|
{
|
|
return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
|
|
}
|
|
|
|
/** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
|
|
* \param[in] factory - Pointer to the string factory that owns strings.
|
|
* \param[in] ptr - Pointer to the 8 bit encoded string
|
|
* \return POinter to the newly initialized string
|
|
*/
|
|
static pANTLR3_STRING
|
|
newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
|
|
{
|
|
return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
|
|
}
|
|
|
|
/** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
|
|
* \param[in] factory - Pointer to the string factory that owns strings.
|
|
* \param[in] ptr - Pointer to the UTF16 encoded string
|
|
* \return Pointer to the newly initialized string
|
|
*/
|
|
static pANTLR3_STRING
|
|
newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
|
|
{
|
|
pANTLR3_UINT16 in;
|
|
ANTLR3_UINT32 count;
|
|
|
|
/** First, determine the length of the input string
|
|
*/
|
|
in = (pANTLR3_UINT16)ptr;
|
|
count = 0;
|
|
|
|
while (*in++ != '\0')
|
|
{
|
|
count++;
|
|
}
|
|
return factory->newPtr(factory, ptr, count);
|
|
}
|
|
|
|
static void
|
|
destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
|
|
{
|
|
// Record which string we are deleting
|
|
//
|
|
ANTLR3_UINT32 strIndex = string->index;
|
|
|
|
// Ensure that the string was not factory made, or we would try
|
|
// to delete memory that wasn't allocated outside the factory
|
|
// block.
|
|
// Remove the specific indexed string from the vector
|
|
//
|
|
factory->strings->del(factory->strings, strIndex);
|
|
|
|
// One less string in the vector, so decrement the factory index
|
|
// so that the next string allocated is indexed correctly with
|
|
// respect to the vector.
|
|
//
|
|
factory->index--;
|
|
|
|
// Now we have to reindex the strings in the vector that followed
|
|
// the one we just deleted. We only do this if the one we just deleted
|
|
// was not the last one.
|
|
//
|
|
if (strIndex< factory->index)
|
|
{
|
|
// We must reindex the strings after the one we just deleted.
|
|
// The one that follows the one we just deleted is also out
|
|
// of whack, so we start there.
|
|
//
|
|
ANTLR3_UINT32 i;
|
|
|
|
for (i = strIndex; i < factory->index; i++)
|
|
{
|
|
// Renumber the entry
|
|
//
|
|
((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
|
|
}
|
|
}
|
|
|
|
// The string has been destroyed and the elements of the factory are reindexed.
|
|
//
|
|
|
|
}
|
|
|
|
static pANTLR3_STRING
|
|
printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
/* We don't need to be too efficient here, this is mostly for error messages and so on.
|
|
*/
|
|
pANTLR3_UINT8 scannedText;
|
|
ANTLR3_UINT32 i;
|
|
|
|
/* Assume we need as much as twice as much space to parse out the control characters
|
|
*/
|
|
string = factory->newSize(factory, instr->len *2 + 1);
|
|
|
|
/* Scan through and replace unprintable (in terms of this routine)
|
|
* characters
|
|
*/
|
|
scannedText = string->chars;
|
|
|
|
for (i = 0; i < instr->len; i++)
|
|
{
|
|
if (*(instr->chars + i) == '\n')
|
|
{
|
|
*scannedText++ = '\\';
|
|
*scannedText++ = 'n';
|
|
}
|
|
else if (*(instr->chars + i) == '\r')
|
|
{
|
|
*scannedText++ = '\\';
|
|
*scannedText++ = 'r';
|
|
}
|
|
else if (!isprint(*(instr->chars +i)))
|
|
{
|
|
*scannedText++ = '?';
|
|
}
|
|
else
|
|
{
|
|
*scannedText++ = *(instr->chars + i);
|
|
}
|
|
}
|
|
*scannedText = '\0';
|
|
|
|
string->len = (ANTLR3_UINT32)(scannedText - string->chars);
|
|
|
|
return string;
|
|
}
|
|
|
|
static pANTLR3_STRING
|
|
printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
|
|
{
|
|
pANTLR3_STRING string;
|
|
|
|
/* We don't need to be too efficient here, this is mostly for error messages and so on.
|
|
*/
|
|
pANTLR3_UINT16 scannedText;
|
|
pANTLR3_UINT16 inText;
|
|
ANTLR3_UINT32 i;
|
|
ANTLR3_UINT32 outLen;
|
|
|
|
/* Assume we need as much as twice as much space to parse out the control characters
|
|
*/
|
|
string = factory->newSize(factory, instr->len *2 + 1);
|
|
|
|
/* Scan through and replace unprintable (in terms of this routine)
|
|
* characters
|
|
*/
|
|
scannedText = (pANTLR3_UINT16)(string->chars);
|
|
inText = (pANTLR3_UINT16)(instr->chars);
|
|
outLen = 0;
|
|
|
|
for (i = 0; i < instr->len; i++)
|
|
{
|
|
if (*(inText + i) == '\n')
|
|
{
|
|
*scannedText++ = '\\';
|
|
*scannedText++ = 'n';
|
|
outLen += 2;
|
|
}
|
|
else if (*(inText + i) == '\r')
|
|
{
|
|
*scannedText++ = '\\';
|
|
*scannedText++ = 'r';
|
|
outLen += 2;
|
|
}
|
|
else if (!isprint(*(inText +i)))
|
|
{
|
|
*scannedText++ = '?';
|
|
outLen++;
|
|
}
|
|
else
|
|
{
|
|
*scannedText++ = *(inText + i);
|
|
outLen++;
|
|
}
|
|
}
|
|
*scannedText = '\0';
|
|
|
|
string->len = outLen;
|
|
|
|
return string;
|
|
}
|
|
|
|
/** Fascist Capitalist Pig function created
|
|
* to oppress the workers comrade.
|
|
*/
|
|
static void
|
|
closeFactory (pANTLR3_STRING_FACTORY factory)
|
|
{
|
|
/* Delete the vector we were tracking the strings with, this will
|
|
* causes all the allocated strings to be deallocated too
|
|
*/
|
|
factory->strings->free(factory->strings);
|
|
|
|
/* Delete the space for the factory itself
|
|
*/
|
|
ANTLR3_FREE((void *)factory);
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
append8 (pANTLR3_STRING string, const char * newbit)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
|
|
len = (ANTLR3_UINT32)strlen(newbit);
|
|
|
|
if (string->size < (string->len + len + 1))
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = string->len + len + 1;
|
|
}
|
|
|
|
/* Note we copy one more byte than the strlen in order to get the trailing
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
|
|
string->len += len;
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
appendUTF16_8 (pANTLR3_STRING string, const char * newbit)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
pANTLR3_UINT16 apPoint;
|
|
ANTLR3_UINT32 count;
|
|
|
|
len = (ANTLR3_UINT32)strlen(newbit);
|
|
|
|
if (string->size < (string->len + len + 1))
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = string->len + len + 1;
|
|
}
|
|
|
|
apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
|
|
string->len += len;
|
|
|
|
for (count = 0; count < len; count++)
|
|
{
|
|
*apPoint++ = *(newbit + count);
|
|
}
|
|
*apPoint = '\0';
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
pANTLR3_UINT16 in;
|
|
|
|
/** First, determine the length of the input string
|
|
*/
|
|
in = (pANTLR3_UINT16)newbit;
|
|
len = 0;
|
|
|
|
while (*in++ != '\0')
|
|
{
|
|
len++;
|
|
}
|
|
|
|
if (string->size < (string->len + len + 1))
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = string->len + len + 1;
|
|
}
|
|
|
|
/* Note we copy one more byte than the strlen in order to get the trailing delimiter
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
|
|
string->len += len;
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
set8 (pANTLR3_STRING string, const char * chars)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
|
|
len = (ANTLR3_UINT32)strlen(chars);
|
|
if (string->size < len + 1)
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = len + 1;
|
|
}
|
|
|
|
/* Note we copy one more byte than the strlen in order to get the trailing '\0'
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
|
|
string->len = len;
|
|
|
|
return string->chars;
|
|
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
setUTF16_8 (pANTLR3_STRING string, const char * chars)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
ANTLR3_UINT32 count;
|
|
pANTLR3_UINT16 apPoint;
|
|
|
|
len = (ANTLR3_UINT32)strlen(chars);
|
|
if (string->size < len + 1)
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = len + 1;
|
|
}
|
|
apPoint = ((pANTLR3_UINT16)string->chars);
|
|
string->len = len;
|
|
|
|
for (count = 0; count < string->len; count++)
|
|
{
|
|
*apPoint++ = *(chars + count);
|
|
}
|
|
*apPoint = '\0';
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
setUTF16_UTF16 (pANTLR3_STRING string, const char * chars)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
pANTLR3_UINT16 in;
|
|
|
|
/** First, determine the length of the input string
|
|
*/
|
|
in = (pANTLR3_UINT16)chars;
|
|
len = 0;
|
|
|
|
while (*in++ != '\0')
|
|
{
|
|
len++;
|
|
}
|
|
|
|
if (string->size < len + 1)
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = len + 1;
|
|
}
|
|
|
|
/* Note we copy one more byte than the strlen in order to get the trailing '\0'
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
|
|
string->len = len;
|
|
|
|
return string->chars;
|
|
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c)
|
|
{
|
|
if (string->size < string->len + 2)
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = string->len + 2;
|
|
}
|
|
*(string->chars + string->len) = (ANTLR3_UINT8)c;
|
|
*(string->chars + string->len + 1) = '\0';
|
|
string->len++;
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c)
|
|
{
|
|
pANTLR3_UINT16 ptr;
|
|
|
|
if (string->size < string->len + 2)
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = string->len + 2;
|
|
}
|
|
ptr = (pANTLR3_UINT16)(string->chars);
|
|
|
|
*(ptr + string->len) = (ANTLR3_UINT16)c;
|
|
*(ptr + string->len + 1) = '\0';
|
|
string->len++;
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
addi8 (pANTLR3_STRING string, ANTLR3_INT32 i)
|
|
{
|
|
ANTLR3_UINT8 newbit[32];
|
|
|
|
sprintf((char *)newbit, "%d", i);
|
|
|
|
return string->append8(string, (const char *)newbit);
|
|
}
|
|
static pANTLR3_UINT8
|
|
addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i)
|
|
{
|
|
ANTLR3_UINT8 newbit[32];
|
|
|
|
sprintf((char *)newbit, "%d", i);
|
|
|
|
return string->append8(string, (const char *)newbit);
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
|
|
{
|
|
ANTLR3_UINT8 newbit[32];
|
|
|
|
sprintf((char *)newbit, "%d", i);
|
|
return string->insert8(string, point, (const char *)newbit);
|
|
}
|
|
static pANTLR3_UINT8
|
|
insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
|
|
{
|
|
ANTLR3_UINT8 newbit[32];
|
|
|
|
sprintf((char *)newbit, "%d", i);
|
|
return string->insert8(string, point, (const char *)newbit);
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
|
|
if (point >= string->len)
|
|
{
|
|
return string->append(string, newbit);
|
|
}
|
|
|
|
len = (ANTLR3_UINT32)strlen(newbit);
|
|
|
|
if (len == 0)
|
|
{
|
|
return string->chars;
|
|
}
|
|
|
|
if (string->size < (string->len + len + 1))
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = string->len + len + 1;
|
|
}
|
|
|
|
/* Move the characters we are inserting before, including the delimiter
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
|
|
|
|
/* Note we copy the exact number of bytes
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
|
|
|
|
string->len += len;
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
ANTLR3_UINT32 count;
|
|
pANTLR3_UINT16 inPoint;
|
|
|
|
if (point >= string->len)
|
|
{
|
|
return string->append8(string, newbit);
|
|
}
|
|
|
|
len = (ANTLR3_UINT32)strlen(newbit);
|
|
|
|
if (len == 0)
|
|
{
|
|
return string->chars;
|
|
}
|
|
|
|
if (string->size < (string->len + len + 1))
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = string->len + len + 1;
|
|
}
|
|
|
|
/* Move the characters we are inserting before, including the delimiter
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
|
|
|
|
string->len += len;
|
|
|
|
inPoint = ((pANTLR3_UINT16)(string->chars))+point;
|
|
for (count = 0; count<len; count++)
|
|
{
|
|
*(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
|
|
}
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8
|
|
insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
|
|
{
|
|
ANTLR3_UINT32 len;
|
|
pANTLR3_UINT16 in;
|
|
|
|
if (point >= string->len)
|
|
{
|
|
return string->append(string, newbit);
|
|
}
|
|
|
|
/** First, determine the length of the input string
|
|
*/
|
|
in = (pANTLR3_UINT16)newbit;
|
|
len = 0;
|
|
|
|
while (*in++ != '\0')
|
|
{
|
|
len++;
|
|
}
|
|
|
|
if (len == 0)
|
|
{
|
|
return string->chars;
|
|
}
|
|
|
|
if (string->size < (string->len + len + 1))
|
|
{
|
|
pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
|
|
if (newAlloc == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
string->chars = newAlloc;
|
|
string->size = string->len + len + 1;
|
|
}
|
|
|
|
/* Move the characters we are inserting before, including the delimiter
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
|
|
|
|
|
|
/* Note we copy the exact number of characters
|
|
*/
|
|
ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
|
|
|
|
string->len += len;
|
|
|
|
return string->chars;
|
|
}
|
|
|
|
static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars)
|
|
{
|
|
return string->set(string, (const char *)(chars->chars));
|
|
}
|
|
|
|
static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit)
|
|
{
|
|
/* We may be passed an empty string, in which case we just return the current pointer
|
|
*/
|
|
if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
|
|
{
|
|
return string->chars;
|
|
}
|
|
else
|
|
{
|
|
return string->append(string, (const char *)(newbit->chars));
|
|
}
|
|
}
|
|
|
|
static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
|
|
{
|
|
return string->insert(string, point, (const char *)(newbit->chars));
|
|
}
|
|
|
|
/* Function that compares the text of a string to the supplied
|
|
* 8 bit character string and returns a result a la strcmp()
|
|
*/
|
|
static ANTLR3_UINT32
|
|
compare8 (pANTLR3_STRING string, const char * compStr)
|
|
{
|
|
return strcmp((const char *)(string->chars), compStr);
|
|
}
|
|
|
|
/* Function that compares the text of a string with the supplied character string
|
|
* (which is assumed to be in the same encoding as the string itself) and returns a result
|
|
* a la strcmp()
|
|
*/
|
|
static ANTLR3_UINT32
|
|
compareUTF16_8 (pANTLR3_STRING string, const char * compStr)
|
|
{
|
|
pANTLR3_UINT16 ourString;
|
|
ANTLR3_UINT32 charDiff;
|
|
|
|
ourString = (pANTLR3_UINT16)(string->chars);
|
|
|
|
while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
|
|
{
|
|
charDiff = *ourString - *compStr;
|
|
if (charDiff != 0)
|
|
{
|
|
return charDiff;
|
|
}
|
|
ourString++;
|
|
compStr++;
|
|
}
|
|
|
|
/* At this point, one of the strings was terminated
|
|
*/
|
|
return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
|
|
|
|
}
|
|
|
|
/* Function that compares the text of a string with the supplied character string
|
|
* (which is assumed to be in the same encoding as the string itself) and returns a result
|
|
* a la strcmp()
|
|
*/
|
|
static ANTLR3_UINT32
|
|
compareUTF16_UTF16 (pANTLR3_STRING string, const char * compStr8)
|
|
{
|
|
pANTLR3_UINT16 ourString;
|
|
pANTLR3_UINT16 compStr;
|
|
ANTLR3_UINT32 charDiff;
|
|
|
|
ourString = (pANTLR3_UINT16)(string->chars);
|
|
compStr = (pANTLR3_UINT16)(compStr8);
|
|
|
|
while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
|
|
{
|
|
charDiff = *ourString - *compStr;
|
|
if (charDiff != 0)
|
|
{
|
|
return charDiff;
|
|
}
|
|
ourString++;
|
|
compStr++;
|
|
}
|
|
|
|
/* At this point, one of the strings was terminated
|
|
*/
|
|
return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
|
|
}
|
|
|
|
/* Function that compares the text of a string with the supplied string
|
|
* (which is assumed to be in the same encoding as the string itself) and returns a result
|
|
* a la strcmp()
|
|
*/
|
|
static ANTLR3_UINT32
|
|
compareS (pANTLR3_STRING string, pANTLR3_STRING compStr)
|
|
{
|
|
return string->compare(string, (const char *)compStr->chars);
|
|
}
|
|
|
|
|
|
/* Function that returns the character indexed at the supplied
|
|
* offset as a 32 bit character.
|
|
*/
|
|
static ANTLR3_UCHAR
|
|
charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
|
|
{
|
|
if (offset > string->len)
|
|
{
|
|
return (ANTLR3_UCHAR)'\0';
|
|
}
|
|
else
|
|
{
|
|
return (ANTLR3_UCHAR)(*(string->chars + offset));
|
|
}
|
|
}
|
|
|
|
/* Function that returns the character indexed at the supplied
|
|
* offset as a 32 bit character.
|
|
*/
|
|
static ANTLR3_UCHAR
|
|
charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
|
|
{
|
|
if (offset > string->len)
|
|
{
|
|
return (ANTLR3_UCHAR)'\0';
|
|
}
|
|
else
|
|
{
|
|
return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
|
|
}
|
|
}
|
|
|
|
/* Function that returns a substring of the supplied string a la .subString(s,e)
|
|
* in java runtimes.
|
|
*/
|
|
static pANTLR3_STRING
|
|
subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
|
|
{
|
|
pANTLR3_STRING newStr;
|
|
|
|
if (endIndex > string->len)
|
|
{
|
|
endIndex = string->len + 1;
|
|
}
|
|
newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
|
|
|
|
return newStr;
|
|
}
|
|
|
|
/* Returns a substring of the supplied string a la .subString(s,e)
|
|
* in java runtimes.
|
|
*/
|
|
static pANTLR3_STRING
|
|
subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
|
|
{
|
|
pANTLR3_STRING newStr;
|
|
|
|
if (endIndex > string->len)
|
|
{
|
|
endIndex = string->len + 1;
|
|
}
|
|
newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
|
|
|
|
return newStr;
|
|
}
|
|
|
|
/* Function that can convert the characters in the string to an integer
|
|
*/
|
|
static ANTLR3_INT32
|
|
toInt32_8 (struct ANTLR3_STRING_struct * string)
|
|
{
|
|
return atoi((const char *)(string->chars));
|
|
}
|
|
|
|
/* Function that can convert the characters in the string to an integer
|
|
*/
|
|
static ANTLR3_INT32
|
|
toInt32_UTF16 (struct ANTLR3_STRING_struct * string)
|
|
{
|
|
pANTLR3_UINT16 input;
|
|
ANTLR3_INT32 value;
|
|
ANTLR3_BOOLEAN negate;
|
|
|
|
value = 0;
|
|
input = (pANTLR3_UINT16)(string->chars);
|
|
negate = ANTLR3_FALSE;
|
|
|
|
if (*input == (ANTLR3_UCHAR)'-')
|
|
{
|
|
negate = ANTLR3_TRUE;
|
|
input++;
|
|
}
|
|
else if (*input == (ANTLR3_UCHAR)'+')
|
|
{
|
|
input++;
|
|
}
|
|
|
|
while (*input != '\0' && isdigit(*input))
|
|
{
|
|
value = value * 10;
|
|
value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
|
|
input++;
|
|
}
|
|
|
|
return negate ? -value : value;
|
|
}
|
|
|
|
/* Function that returns a pointer to an 8 bit version of the string,
|
|
* which in this case is just the string as this is
|
|
* 8 bit encodiing anyway.
|
|
*/
|
|
static pANTLR3_STRING to8_8 (pANTLR3_STRING string)
|
|
{
|
|
return string;
|
|
}
|
|
|
|
/* Function that returns an 8 bit version of the string,
|
|
* which in this case is returning all the UTF16 characters
|
|
* narrowed back into 8 bits, with characters that are too large
|
|
* replaced with '_'
|
|
*/
|
|
static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string)
|
|
{
|
|
pANTLR3_STRING newStr;
|
|
ANTLR3_UINT32 i;
|
|
|
|
/* Create a new 8 bit string
|
|
*/
|
|
newStr = newRaw8(string->factory);
|
|
|
|
if (newStr == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
/* Always add one more byte for a terminator
|
|
*/
|
|
newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
|
|
if (newStr->chars != NULL)
|
|
{
|
|
newStr->size = string->len + 1;
|
|
newStr->len = string->len;
|
|
|
|
/* Now copy each UTF16 charActer , making it an 8 bit character of
|
|
* some sort.
|
|
*/
|
|
for (i=0; i<string->len; i++)
|
|
{
|
|
ANTLR3_UCHAR c;
|
|
|
|
c = *(((pANTLR3_UINT16)(string->chars)) + i);
|
|
|
|
*(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
|
|
}
|
|
|
|
/* Terminate
|
|
*/
|
|
*(newStr->chars + newStr->len) = '\0';
|
|
}
|
|
|
|
return newStr;
|
|
}
|