498 lines
15 KiB
C++
498 lines
15 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
*******************************************************************************
|
|
*
|
|
* Copyright (C) 2005-2012, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
*******************************************************************************
|
|
* file name: writesrc.c
|
|
* encoding: UTF-8
|
|
* tab size: 8 (not used)
|
|
* indentation:4
|
|
*
|
|
* created on: 2005apr23
|
|
* created by: Markus W. Scherer
|
|
*
|
|
* Helper functions for writing source code for data.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <time.h>
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/putil.h"
|
|
#include "unicode/ucptrie.h"
|
|
#include "unicode/errorcode.h"
|
|
#include "unicode/uniset.h"
|
|
#include "unicode/usetiter.h"
|
|
#include "unicode/utf16.h"
|
|
#include "utrie2.h"
|
|
#include "cstring.h"
|
|
#include "writesrc.h"
|
|
#include "util.h"
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
ValueNameGetter::~ValueNameGetter() {}
|
|
|
|
U_NAMESPACE_END
|
|
|
|
U_NAMESPACE_USE
|
|
|
|
static FILE *
|
|
usrc_createWithoutHeader(const char *path, const char *filename) {
|
|
char buffer[1024];
|
|
const char *p;
|
|
char *q;
|
|
FILE *f;
|
|
char c;
|
|
|
|
if(path==NULL) {
|
|
p=filename;
|
|
} else {
|
|
/* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
|
|
uprv_strcpy(buffer, path);
|
|
q=buffer+uprv_strlen(buffer);
|
|
if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
|
|
*q++=U_FILE_SEP_CHAR;
|
|
}
|
|
uprv_strcpy(q, filename);
|
|
p=buffer;
|
|
}
|
|
|
|
f=fopen(p, "w");
|
|
if (f==NULL) {
|
|
fprintf(
|
|
stderr,
|
|
"usrc_create(%s, %s): unable to create file\n",
|
|
path!=NULL ? path : "", filename);
|
|
}
|
|
return f;
|
|
}
|
|
|
|
U_CAPI FILE * U_EXPORT2
|
|
usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
|
|
FILE *f = usrc_createWithoutHeader(path, filename);
|
|
if (f == NULL) {
|
|
return f;
|
|
}
|
|
usrc_writeCopyrightHeader(f, "//", copyrightYear);
|
|
usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
|
|
return f;
|
|
}
|
|
|
|
U_CAPI FILE * U_EXPORT2
|
|
usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
|
|
FILE *f = usrc_createWithoutHeader(path, filename);
|
|
if (f == NULL) {
|
|
return f;
|
|
}
|
|
usrc_writeCopyrightHeader(f, "#", copyrightYear);
|
|
usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
|
|
return f;
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
|
|
fprintf(f,
|
|
"%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
|
|
"%s License & terms of use: http://www.unicode.org/copyright.html\n",
|
|
prefix, copyrightYear, prefix);
|
|
if (copyrightYear <= 2016) {
|
|
fprintf(f,
|
|
"%s Copyright (C) 1999-2016, International Business Machines\n"
|
|
"%s Corporation and others. All Rights Reserved.\n",
|
|
prefix, prefix);
|
|
}
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeFileNameGeneratedBy(
|
|
FILE *f,
|
|
const char *prefix,
|
|
const char *filename,
|
|
const char *generator) {
|
|
char buffer[1024];
|
|
const struct tm *lt;
|
|
time_t t;
|
|
|
|
const char *pattern =
|
|
"%s\n"
|
|
"%s file name: %s\n"
|
|
"%s\n"
|
|
"%s machine-generated by: %s\n"
|
|
"\n";
|
|
|
|
time(&t);
|
|
lt=localtime(&t);
|
|
if(generator==NULL) {
|
|
strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
|
|
fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
|
|
} else {
|
|
fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
|
|
}
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeArray(FILE *f,
|
|
const char *prefix,
|
|
const void *p, int32_t width, int32_t length,
|
|
const char *indent,
|
|
const char *postfix) {
|
|
const uint8_t *p8;
|
|
const uint16_t *p16;
|
|
const uint32_t *p32;
|
|
uint32_t value;
|
|
int32_t i, col;
|
|
|
|
p8=NULL;
|
|
p16=NULL;
|
|
p32=NULL;
|
|
switch(width) {
|
|
case 8:
|
|
p8=(const uint8_t *)p;
|
|
break;
|
|
case 16:
|
|
p16=(const uint16_t *)p;
|
|
break;
|
|
case 32:
|
|
p32=(const uint32_t *)p;
|
|
break;
|
|
default:
|
|
fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
|
|
return;
|
|
}
|
|
if(prefix!=NULL) {
|
|
fprintf(f, prefix, (long)length);
|
|
}
|
|
for(i=col=0; i<length; ++i, ++col) {
|
|
if(i>0) {
|
|
if(col<16) {
|
|
fputc(',', f);
|
|
} else {
|
|
fputs(",\n", f);
|
|
fputs(indent, f);
|
|
col=0;
|
|
}
|
|
}
|
|
switch(width) {
|
|
case 8:
|
|
value=p8[i];
|
|
break;
|
|
case 16:
|
|
value=p16[i];
|
|
break;
|
|
case 32:
|
|
value=p32[i];
|
|
break;
|
|
default:
|
|
value=0; /* unreachable */
|
|
break;
|
|
}
|
|
fprintf(f, value<=9 ? "%lu" : "0x%lx", (unsigned long)value);
|
|
}
|
|
if(postfix!=NULL) {
|
|
fputs(postfix, f);
|
|
}
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeUTrie2Arrays(FILE *f,
|
|
const char *indexPrefix, const char *data32Prefix,
|
|
const UTrie2 *pTrie,
|
|
const char *postfix) {
|
|
if(pTrie->data32==NULL) {
|
|
/* 16-bit trie */
|
|
usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
|
|
} else {
|
|
/* 32-bit trie */
|
|
usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
|
|
usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
|
|
}
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeUTrie2Struct(FILE *f,
|
|
const char *prefix,
|
|
const UTrie2 *pTrie,
|
|
const char *indexName, const char *data32Name,
|
|
const char *postfix) {
|
|
if(prefix!=NULL) {
|
|
fputs(prefix, f);
|
|
}
|
|
if(pTrie->data32==NULL) {
|
|
/* 16-bit trie */
|
|
fprintf(
|
|
f,
|
|
" %s,\n" /* index */
|
|
" %s+%ld,\n" /* data16 */
|
|
" NULL,\n", /* data32 */
|
|
indexName,
|
|
indexName,
|
|
(long)pTrie->indexLength);
|
|
} else {
|
|
/* 32-bit trie */
|
|
fprintf(
|
|
f,
|
|
" %s,\n" /* index */
|
|
" NULL,\n" /* data16 */
|
|
" %s,\n", /* data32 */
|
|
indexName,
|
|
data32Name);
|
|
}
|
|
fprintf(
|
|
f,
|
|
" %ld,\n" /* indexLength */
|
|
" %ld,\n" /* dataLength */
|
|
" 0x%hx,\n" /* index2NullOffset */
|
|
" 0x%hx,\n" /* dataNullOffset */
|
|
" 0x%lx,\n" /* initialValue */
|
|
" 0x%lx,\n" /* errorValue */
|
|
" 0x%lx,\n" /* highStart */
|
|
" 0x%lx,\n" /* highValueIndex */
|
|
" NULL, 0, FALSE, FALSE, 0, NULL\n",
|
|
(long)pTrie->indexLength, (long)pTrie->dataLength,
|
|
(short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
|
|
(long)pTrie->initialValue, (long)pTrie->errorValue,
|
|
(long)pTrie->highStart, (long)pTrie->highValueIndex);
|
|
if(postfix!=NULL) {
|
|
fputs(postfix, f);
|
|
}
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeUCPTrieArrays(FILE *f,
|
|
const char *indexPrefix, const char *dataPrefix,
|
|
const UCPTrie *pTrie,
|
|
const char *postfix,
|
|
UTargetSyntax syntax) {
|
|
const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? " " : "";
|
|
usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
|
|
int32_t width=
|
|
pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
|
|
pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
|
|
pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
|
|
usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeUCPTrieStruct(FILE *f,
|
|
const char *prefix,
|
|
const UCPTrie *pTrie,
|
|
const char *indexName, const char *dataName,
|
|
const char *postfix,
|
|
UTargetSyntax syntax) {
|
|
if(prefix!=NULL) {
|
|
fputs(prefix, f);
|
|
}
|
|
if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
|
|
fprintf(
|
|
f,
|
|
" %s,\n" // index
|
|
" { %s },\n", // data (union)
|
|
indexName,
|
|
dataName);
|
|
}
|
|
const char* pattern =
|
|
(syntax == UPRV_TARGET_SYNTAX_CCODE) ?
|
|
" %ld, %ld,\n" // indexLength, dataLength
|
|
" 0x%lx, 0x%x,\n" // highStart, shifted12HighStart
|
|
" %d, %d,\n" // type, valueWidth
|
|
" 0, 0,\n" // reserved32, reserved16
|
|
" 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset
|
|
" 0x%lx,\n" // nullValue
|
|
:
|
|
"indexLength = %ld\n"
|
|
"dataLength = %ld\n"
|
|
"highStart = 0x%lx\n"
|
|
"shifted12HighStart = 0x%x\n"
|
|
"type = %d\n"
|
|
"valueWidth = %d\n"
|
|
"index3NullOffset = 0x%x\n"
|
|
"dataNullOffset = 0x%lx\n"
|
|
"nullValue = 0x%lx\n"
|
|
;
|
|
fprintf(
|
|
f,
|
|
pattern,
|
|
(long)pTrie->indexLength, (long)pTrie->dataLength,
|
|
(long)pTrie->highStart, pTrie->shifted12HighStart,
|
|
pTrie->type, pTrie->valueWidth,
|
|
pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
|
|
(long)pTrie->nullValue);
|
|
if(postfix!=NULL) {
|
|
fputs(postfix, f);
|
|
}
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
|
|
int32_t width=
|
|
pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
|
|
pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
|
|
pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
|
|
char line[100], line2[100], line3[100], line4[100];
|
|
|
|
switch (syntax) {
|
|
case UPRV_TARGET_SYNTAX_CCODE:
|
|
sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name);
|
|
sprintf(line2, "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
|
|
sprintf(line3, "\n};\n\n");
|
|
break;
|
|
case UPRV_TARGET_SYNTAX_TOML:
|
|
sprintf(line, "index = [\n ");
|
|
sprintf(line2, "data_%d = [\n ", (int)width);
|
|
sprintf(line3, "\n]\n");
|
|
break;
|
|
default:
|
|
UPRV_UNREACHABLE_EXIT;
|
|
}
|
|
usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
|
|
|
|
switch (syntax) {
|
|
case UPRV_TARGET_SYNTAX_CCODE:
|
|
sprintf(line, "static const UCPTrie %s_trie={\n", name);
|
|
sprintf(line2, "%s_trieIndex", name);
|
|
sprintf(line3, "%s_trieData", name);
|
|
sprintf(line4, "};\n\n");
|
|
break;
|
|
case UPRV_TARGET_SYNTAX_TOML:
|
|
line[0] = 0;
|
|
line2[0] = 0;
|
|
line3[0] = 0;
|
|
line4[0] = 0;
|
|
break;
|
|
default:
|
|
UPRV_UNREACHABLE_EXIT;
|
|
}
|
|
usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeUnicodeSet(
|
|
FILE *f,
|
|
const USet *pSet,
|
|
UTargetSyntax syntax) {
|
|
// ccode is not yet supported
|
|
U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
|
|
|
|
// Write out a list of ranges
|
|
const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
|
|
UnicodeSetIterator it(*set);
|
|
fprintf(f, "# Inclusive ranges of the code points in the set.\n");
|
|
fprintf(f, "ranges = [\n");
|
|
bool seenFirstString = false;
|
|
while (it.nextRange()) {
|
|
if (it.isString()) {
|
|
if (!seenFirstString) {
|
|
seenFirstString = true;
|
|
fprintf(f, "]\nstrings = [\n");
|
|
}
|
|
const UnicodeString& str = it.getString();
|
|
fprintf(f, " ");
|
|
usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
|
|
fprintf(f, ",\n");
|
|
} else {
|
|
U_ASSERT(!seenFirstString);
|
|
UChar32 start = it.getCodepoint();
|
|
UChar32 end = it.getCodepointEnd();
|
|
fprintf(f, " [0x%x, 0x%x],\n", start, end);
|
|
}
|
|
}
|
|
fprintf(f, "]\n");
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeUCPMap(
|
|
FILE *f,
|
|
const UCPMap *pMap,
|
|
icu::ValueNameGetter *valueNameGetter,
|
|
UTargetSyntax syntax) {
|
|
// ccode is not yet supported
|
|
U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
|
|
(void) syntax; // silence unused variable errors
|
|
|
|
// Print out list of ranges
|
|
UChar32 start = 0, end;
|
|
uint32_t value;
|
|
fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
|
|
fprintf(f, "ranges = [\n");
|
|
while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
|
|
if (valueNameGetter != nullptr) {
|
|
const char *name = valueNameGetter->getName(value);
|
|
fprintf(f, " {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
|
|
} else {
|
|
fprintf(f, " {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
|
|
}
|
|
start = end + 1;
|
|
}
|
|
fprintf(f, "]\n");
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeArrayOfMostlyInvChars(FILE *f,
|
|
const char *prefix,
|
|
const char *p, int32_t length,
|
|
const char *postfix) {
|
|
int32_t i, col;
|
|
int prev2, prev, c;
|
|
|
|
if(prefix!=NULL) {
|
|
fprintf(f, prefix, (long)length);
|
|
}
|
|
prev2=prev=-1;
|
|
for(i=col=0; i<length; ++i, ++col) {
|
|
c=(uint8_t)p[i];
|
|
if(i>0) {
|
|
/* Break long lines. Try to break at interesting places, to minimize revision diffs. */
|
|
if(
|
|
/* Very long line. */
|
|
col>=32 ||
|
|
/* Long line, break after terminating NUL. */
|
|
(col>=24 && prev2>=0x20 && prev==0) ||
|
|
/* Medium-long line, break before non-NUL, non-character byte. */
|
|
(col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
|
|
) {
|
|
fputs(",\n", f);
|
|
col=0;
|
|
} else {
|
|
fputc(',', f);
|
|
}
|
|
}
|
|
fprintf(f, c<0x20 ? "%u" : "'%c'", c);
|
|
prev2=prev;
|
|
prev=c;
|
|
}
|
|
if(postfix!=NULL) {
|
|
fputs(postfix, f);
|
|
}
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
usrc_writeStringAsASCII(FILE *f,
|
|
const UChar* ptr, int32_t length,
|
|
UTargetSyntax) {
|
|
// For now, assume all UTargetSyntax values are valid here.
|
|
fprintf(f, "\"");
|
|
int32_t i = 0;
|
|
UChar32 cp;
|
|
while (i < length) {
|
|
U16_NEXT(ptr, i, length, cp);
|
|
if (cp == u'"') {
|
|
fprintf(f, "\\\"");
|
|
} else if (ICU_Utility::isUnprintable(cp)) {
|
|
UnicodeString u16result;
|
|
ICU_Utility::escapeUnprintable(u16result, cp);
|
|
std::string u8result;
|
|
u16result.toUTF8String(u8result);
|
|
fprintf(f, "%s", u8result.data());
|
|
} else {
|
|
U_ASSERT(cp < 0x80);
|
|
char s[2] = {static_cast<char>(cp), 0};
|
|
fprintf(f, "%s", s);
|
|
}
|
|
}
|
|
fprintf(f, "\"");
|
|
}
|