1715 lines
62 KiB
C++
1715 lines
62 KiB
C++
//===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/ConvertUTF.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/Support/Format.h"
|
|
#include "gtest/gtest.h"
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
using namespace llvm;
|
|
|
|
TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
|
|
// Src is the look of disapproval.
|
|
static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
|
|
ArrayRef<char> Ref(Src, sizeof(Src) - 1);
|
|
std::string Result;
|
|
bool Success = convertUTF16ToUTF8String(Ref, Result);
|
|
EXPECT_TRUE(Success);
|
|
std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
|
|
EXPECT_EQ(Expected, Result);
|
|
}
|
|
|
|
TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
|
|
// Src is the look of disapproval.
|
|
static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
|
|
ArrayRef<char> Ref(Src, sizeof(Src) - 1);
|
|
std::string Result;
|
|
bool Success = convertUTF16ToUTF8String(Ref, Result);
|
|
EXPECT_TRUE(Success);
|
|
std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
|
|
EXPECT_EQ(Expected, Result);
|
|
}
|
|
|
|
TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
|
|
// Src is the look of disapproval.
|
|
static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
|
|
StringRef Ref(Src, sizeof(Src) - 1);
|
|
SmallVector<UTF16, 5> Result;
|
|
bool Success = convertUTF8ToUTF16String(Ref, Result);
|
|
EXPECT_TRUE(Success);
|
|
static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
|
|
ASSERT_EQ(3u, Result.size());
|
|
for (int I = 0, E = 3; I != E; ++I)
|
|
EXPECT_EQ(Expected[I], Result[I]);
|
|
}
|
|
|
|
TEST(ConvertUTFTest, OddLengthInput) {
|
|
std::string Result;
|
|
bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
|
|
EXPECT_FALSE(Success);
|
|
}
|
|
|
|
TEST(ConvertUTFTest, Empty) {
|
|
std::string Result;
|
|
bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
|
|
EXPECT_TRUE(Success);
|
|
EXPECT_TRUE(Result.empty());
|
|
}
|
|
|
|
TEST(ConvertUTFTest, HasUTF16BOM) {
|
|
bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
|
|
EXPECT_TRUE(HasBOM);
|
|
HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
|
|
EXPECT_TRUE(HasBOM);
|
|
HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
|
|
EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
|
|
HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
|
|
EXPECT_TRUE(HasBOM);
|
|
|
|
HasBOM = hasUTF16ByteOrderMark(None);
|
|
EXPECT_FALSE(HasBOM);
|
|
HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
|
|
EXPECT_FALSE(HasBOM);
|
|
}
|
|
|
|
TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
|
|
// Src is the look of disapproval.
|
|
static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
|
|
ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
|
|
std::string Result;
|
|
bool Success = convertUTF16ToUTF8String(SrcRef, Result);
|
|
EXPECT_TRUE(Success);
|
|
std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
|
|
EXPECT_EQ(Expected, Result);
|
|
}
|
|
|
|
TEST(ConvertUTFTest, ConvertUTF8toWide) {
|
|
// Src is the look of disapproval.
|
|
static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
|
|
std::wstring Result;
|
|
bool Success = ConvertUTF8toWide((const char*)Src, Result);
|
|
EXPECT_TRUE(Success);
|
|
std::wstring Expected(L"\x0ca0_\x0ca0");
|
|
EXPECT_EQ(Expected, Result);
|
|
Result.clear();
|
|
Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
|
|
EXPECT_TRUE(Success);
|
|
EXPECT_EQ(Expected, Result);
|
|
}
|
|
|
|
TEST(ConvertUTFTest, convertWideToUTF8) {
|
|
// Src is the look of disapproval.
|
|
static const wchar_t Src[] = L"\x0ca0_\x0ca0";
|
|
std::string Result;
|
|
bool Success = convertWideToUTF8(Src, Result);
|
|
EXPECT_TRUE(Success);
|
|
std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
|
|
EXPECT_EQ(Expected, Result);
|
|
}
|
|
|
|
struct ConvertUTFResultContainer {
|
|
ConversionResult ErrorCode;
|
|
std::vector<unsigned> UnicodeScalars;
|
|
|
|
ConvertUTFResultContainer(ConversionResult ErrorCode)
|
|
: ErrorCode(ErrorCode) {}
|
|
|
|
ConvertUTFResultContainer
|
|
withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
|
|
unsigned US2 = 0x110000, unsigned US3 = 0x110000,
|
|
unsigned US4 = 0x110000, unsigned US5 = 0x110000,
|
|
unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
|
|
ConvertUTFResultContainer Result(*this);
|
|
if (US0 != 0x110000)
|
|
Result.UnicodeScalars.push_back(US0);
|
|
if (US1 != 0x110000)
|
|
Result.UnicodeScalars.push_back(US1);
|
|
if (US2 != 0x110000)
|
|
Result.UnicodeScalars.push_back(US2);
|
|
if (US3 != 0x110000)
|
|
Result.UnicodeScalars.push_back(US3);
|
|
if (US4 != 0x110000)
|
|
Result.UnicodeScalars.push_back(US4);
|
|
if (US5 != 0x110000)
|
|
Result.UnicodeScalars.push_back(US5);
|
|
if (US6 != 0x110000)
|
|
Result.UnicodeScalars.push_back(US6);
|
|
if (US7 != 0x110000)
|
|
Result.UnicodeScalars.push_back(US7);
|
|
return Result;
|
|
}
|
|
};
|
|
|
|
std::pair<ConversionResult, std::vector<unsigned>>
|
|
ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
|
|
const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
|
|
|
|
const UTF8 *SourceNext = SourceStart;
|
|
std::vector<UTF32> Decoded(S.size(), 0);
|
|
UTF32 *TargetStart = Decoded.data();
|
|
|
|
auto ErrorCode =
|
|
ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
|
|
Decoded.data() + Decoded.size(), lenientConversion);
|
|
|
|
Decoded.resize(TargetStart - Decoded.data());
|
|
|
|
return std::make_pair(ErrorCode, Decoded);
|
|
}
|
|
|
|
std::pair<ConversionResult, std::vector<unsigned>>
|
|
ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
|
|
const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
|
|
|
|
const UTF8 *SourceNext = SourceStart;
|
|
std::vector<UTF32> Decoded(S.size(), 0);
|
|
UTF32 *TargetStart = Decoded.data();
|
|
|
|
auto ErrorCode = ConvertUTF8toUTF32Partial(
|
|
&SourceNext, SourceStart + S.size(), &TargetStart,
|
|
Decoded.data() + Decoded.size(), lenientConversion);
|
|
|
|
Decoded.resize(TargetStart - Decoded.data());
|
|
|
|
return std::make_pair(ErrorCode, Decoded);
|
|
}
|
|
|
|
::testing::AssertionResult
|
|
CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
|
|
StringRef S, bool Partial = false) {
|
|
ConversionResult ErrorCode;
|
|
std::vector<unsigned> Decoded;
|
|
if (!Partial)
|
|
std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
|
|
else
|
|
std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
|
|
|
|
if (Expected.ErrorCode != ErrorCode)
|
|
return ::testing::AssertionFailure() << "Expected error code "
|
|
<< Expected.ErrorCode << ", actual "
|
|
<< ErrorCode;
|
|
|
|
if (Expected.UnicodeScalars != Decoded)
|
|
return ::testing::AssertionFailure()
|
|
<< "Expected lenient decoded result:\n"
|
|
<< ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
|
|
<< "Actual result:\n" << ::testing::PrintToString(Decoded);
|
|
|
|
return ::testing::AssertionSuccess();
|
|
}
|
|
|
|
TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
|
|
|
|
//
|
|
// 1-byte sequences
|
|
//
|
|
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
|
|
|
|
//
|
|
// 2-byte sequences
|
|
//
|
|
|
|
// U+0283 LATIN SMALL LETTER ESH
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
|
|
"\xca\x83"));
|
|
|
|
// U+03BA GREEK SMALL LETTER KAPPA
|
|
// U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
|
|
// U+03C3 GREEK SMALL LETTER SIGMA
|
|
// U+03BC GREEK SMALL LETTER MU
|
|
// U+03B5 GREEK SMALL LETTER EPSILON
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK)
|
|
.withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
|
|
"\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
|
|
|
|
//
|
|
// 3-byte sequences
|
|
//
|
|
|
|
// U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
|
|
// U+6587 CJK UNIFIED IDEOGRAPH-6587
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
|
|
"\xe4\xbe\x8b\xe6\x96\x87"));
|
|
|
|
// U+D55C HANGUL SYLLABLE HAN
|
|
// U+AE00 HANGUL SYLLABLE GEUL
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
|
|
"\xed\x95\x9c\xea\xb8\x80"));
|
|
|
|
// U+1112 HANGUL CHOSEONG HIEUH
|
|
// U+1161 HANGUL JUNGSEONG A
|
|
// U+11AB HANGUL JONGSEONG NIEUN
|
|
// U+1100 HANGUL CHOSEONG KIYEOK
|
|
// U+1173 HANGUL JUNGSEONG EU
|
|
// U+11AF HANGUL JONGSEONG RIEUL
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK)
|
|
.withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
|
|
"\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
|
|
"\xe1\x86\xaf"));
|
|
|
|
//
|
|
// 4-byte sequences
|
|
//
|
|
|
|
// U+E0100 VARIATION SELECTOR-17
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
|
|
"\xf3\xa0\x84\x80"));
|
|
|
|
//
|
|
// First possible sequence of a certain length
|
|
//
|
|
|
|
// U+0000 NULL
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
|
|
StringRef("\x00", 1)));
|
|
|
|
// U+0080 PADDING CHARACTER
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
|
|
"\xc2\x80"));
|
|
|
|
// U+0800 SAMARITAN LETTER ALAF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
|
|
"\xe0\xa0\x80"));
|
|
|
|
// U+10000 LINEAR B SYLLABLE B008 A
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
|
|
"\xf0\x90\x80\x80"));
|
|
|
|
// U+200000 (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\x88\x80\x80\x80"));
|
|
|
|
// U+4000000 (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x84\x80\x80\x80\x80"));
|
|
|
|
//
|
|
// Last possible sequence of a certain length
|
|
//
|
|
|
|
// U+007F DELETE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
|
|
|
|
// U+07FF (unassigned)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
|
|
"\xdf\xbf"));
|
|
|
|
// U+FFFF (noncharacter)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
|
|
"\xef\xbf\xbf"));
|
|
|
|
// U+1FFFFF (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf7\xbf\xbf\xbf"));
|
|
|
|
// U+3FFFFFF (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfb\xbf\xbf\xbf\xbf"));
|
|
|
|
// U+7FFFFFFF (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfd\xbf\xbf\xbf\xbf\xbf"));
|
|
|
|
//
|
|
// Other boundary conditions
|
|
//
|
|
|
|
// U+D7FF (unassigned)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
|
|
"\xed\x9f\xbf"));
|
|
|
|
// U+E000 (private use)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
|
|
"\xee\x80\x80"));
|
|
|
|
// U+FFFD REPLACEMENT CHARACTER
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
|
|
"\xef\xbf\xbd"));
|
|
|
|
// U+10FFFF (noncharacter)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
|
|
"\xf4\x8f\xbf\xbf"));
|
|
|
|
// U+110000 (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf4\x90\x80\x80"));
|
|
|
|
//
|
|
// Unexpected continuation bytes
|
|
//
|
|
|
|
// A sequence of unexpected continuation bytes that don't follow a first
|
|
// byte, every byte is a maximal subpart.
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\x80\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xbf\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\x80\xbf\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\x80\xbf\x80\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\x80\xbf\x82\xbf\xaa"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xaa\xb0\xbb\xbf\xaa\xa0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
|
|
|
|
// All continuation bytes (0x80--0xbf).
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
|
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
|
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
|
|
"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
|
|
|
|
//
|
|
// Lonely start bytes
|
|
//
|
|
|
|
// Start bytes of 2-byte sequences (0xc0--0xdf).
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
|
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020),
|
|
"\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
|
|
"\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
|
|
"\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
|
|
"\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
|
|
|
|
// Start bytes of 3-byte sequences (0xe0--0xef).
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020),
|
|
"\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
|
|
"\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
|
|
|
|
// Start bytes of 4-byte sequences (0xf0--0xf7).
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
|
0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020),
|
|
"\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
|
|
|
|
// Start bytes of 5-byte sequences (0xf8--0xfb).
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\xf9\xfa\xfb"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020),
|
|
"\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
|
|
|
|
// Start bytes of 6-byte sequences (0xfc--0xfd).
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfc\xfd"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
|
|
"\xfc\x20\xfd\x20"));
|
|
|
|
//
|
|
// Other bytes (0xc0--0xc1, 0xfe--0xff).
|
|
//
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xc0\xc1\xfe\xff"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfe\xfe\xff\xff"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfe\x80\x80\x80\x80\x80"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xff\x80\x80\x80\x80\x80"));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
|
|
0xfffd, 0x0020, 0xfffd, 0x0020),
|
|
"\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
|
|
|
|
//
|
|
// Sequences with one continuation byte missing
|
|
//
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xe0\xa0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xe0\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xe1\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xec\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xed\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xed\x9f"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xee\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xef\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf0\x90\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf0\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf1\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf3\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf4\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf4\x8f\xbf"));
|
|
|
|
// Overlong sequences with one trailing byte missing.
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xc0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xc1"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xe0\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xe0\x9f"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf0\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf0\x8f\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\x80\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x80\x80\x80\x80"));
|
|
|
|
// Sequences that represent surrogates with one trailing byte missing.
|
|
// High surrogates
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xed\xa0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xed\xac"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xed\xaf"));
|
|
// Low surrogates
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xed\xb0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xed\xb4"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xed\xbf"));
|
|
|
|
// Ill-formed 4-byte sequences.
|
|
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+1100xx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf4\x90\x80"));
|
|
// U+13FBxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf4\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf5\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf6\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf7\x80\x80"));
|
|
// U+1FFBxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf7\xbf\xbf"));
|
|
|
|
// Ill-formed 5-byte sequences.
|
|
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+2000xx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\x88\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\xbf\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf9\x80\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfa\x80\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfb\x80\x80\x80"));
|
|
// U+3FFFFxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfb\xbf\xbf\xbf"));
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
|
|
// U+40000xx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x84\x80\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\xbf\xbf\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfd\x80\x80\x80\x80"));
|
|
// U+7FFFFFxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfd\xbf\xbf\xbf\xbf"));
|
|
|
|
//
|
|
// Sequences with two continuation bytes missing
|
|
//
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf0\x90"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf0\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf1\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf3\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf4\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
|
|
"\xf4\x8f"));
|
|
|
|
// Overlong sequences with two trailing byte missing.
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf0\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf0\x8f"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x80\x80\x80"));
|
|
|
|
// Sequences that represent surrogates with two trailing bytes missing.
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
|
|
|
|
// Ill-formed 4-byte sequences.
|
|
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+110yxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf4\x90"));
|
|
// U+13Fyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf4\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf5\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf6\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf7\x80"));
|
|
// U+1FFyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf7\xbf"));
|
|
|
|
// Ill-formed 5-byte sequences.
|
|
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+200yxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\x88\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xf9\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xfa\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xfb\x80\x80"));
|
|
// U+3FFFyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xfb\xbf\xbf"));
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+4000yxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x84\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\xbf\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfd\x80\x80\x80"));
|
|
// U+7FFFFyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfd\xbf\xbf\xbf"));
|
|
|
|
//
|
|
// Sequences with three continuation bytes missing
|
|
//
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
|
|
|
|
// Broken overlong sequences.
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf8\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x80\x80"));
|
|
|
|
// Ill-formed 4-byte sequences.
|
|
// 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+14yyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
|
|
// U+1Cyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
|
|
|
|
// Ill-formed 5-byte sequences.
|
|
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+20yyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf8\x88"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf8\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xf9\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfa\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfb\x80"));
|
|
// U+3FCyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfb\xbf"));
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+400yyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x84\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xfd\x80\x80"));
|
|
// U+7FFCyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xfd\xbf\xbf"));
|
|
|
|
//
|
|
// Sequences with four continuation bytes missing
|
|
//
|
|
|
|
// Ill-formed 5-byte sequences.
|
|
// 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+uzyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
|
|
// U+3zyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
|
|
|
|
// Broken overlong sequences.
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfc\x80"));
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+uzzyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfc\x84"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfc\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfd\x80"));
|
|
// U+7Fzzyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xfd\xbf"));
|
|
|
|
//
|
|
// Sequences with five continuation bytes missing
|
|
//
|
|
|
|
// Ill-formed 6-byte sequences.
|
|
// 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
|
|
// U+uzzyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
|
|
// U+uuzzyyxx (invalid)
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
|
|
|
|
//
|
|
// Consecutive sequences with trailing bytes missing
|
|
//
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xc0" "\xe0\x80" "\xf0\x80\x80"
|
|
"\xf8\x80\x80\x80"
|
|
"\xfc\x80\x80\x80\x80"
|
|
"\xdf" "\xef\xbf" "\xf7\xbf\xbf"
|
|
"\xfb\xbf\xbf\xbf"
|
|
"\xfd\xbf\xbf\xbf\xbf"));
|
|
|
|
//
|
|
// Overlong UTF-8 sequences
|
|
//
|
|
|
|
// U+002F SOLIDUS
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
|
|
|
|
// Overlong sequences of the above.
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xc0\xaf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xe0\x80\xaf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf0\x80\x80\xaf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\x80\x80\x80\xaf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x80\x80\x80\x80\xaf"));
|
|
|
|
// U+0000 NULL
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
|
|
StringRef("\x00", 1)));
|
|
|
|
// Overlong sequences of the above.
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xc0\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xe0\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf0\x80\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\x80\x80\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x80\x80\x80\x80\x80"));
|
|
|
|
// Other overlong sequences.
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xc0\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xc1\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
|
|
"\xc1\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xe0\x9f\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xa0\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf0\x8f\x80\x80"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf0\x8f\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xf8\x87\xbf\xbf\xbf"));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xfc\x83\xbf\xbf\xbf\xbf"));
|
|
|
|
//
|
|
// Isolated surrogates
|
|
//
|
|
|
|
// Unicode 6.3.0:
|
|
//
|
|
// D71. High-surrogate code point: A Unicode code point in the range
|
|
// U+D800 to U+DBFF.
|
|
//
|
|
// D73. Low-surrogate code point: A Unicode code point in the range
|
|
// U+DC00 to U+DFFF.
|
|
|
|
// Note: U+E0100 is <DB40 DD00> in UTF16.
|
|
|
|
// High surrogates
|
|
|
|
// U+D800
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xa0\x80"));
|
|
|
|
// U+DB40
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xac\xa0"));
|
|
|
|
// U+DBFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xaf\xbf"));
|
|
|
|
// Low surrogates
|
|
|
|
// U+DC00
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xb0\x80"));
|
|
|
|
// U+DD00
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xb4\x80"));
|
|
|
|
// U+DFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xbf\xbf"));
|
|
|
|
// Surrogate pairs
|
|
|
|
// U+D800 U+DC00
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xa0\x80\xed\xb0\x80"));
|
|
|
|
// U+D800 U+DD00
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xa0\x80\xed\xb4\x80"));
|
|
|
|
// U+D800 U+DFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xa0\x80\xed\xbf\xbf"));
|
|
|
|
// U+DB40 U+DC00
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xac\xa0\xed\xb0\x80"));
|
|
|
|
// U+DB40 U+DD00
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xac\xa0\xed\xb4\x80"));
|
|
|
|
// U+DB40 U+DFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xac\xa0\xed\xbf\xbf"));
|
|
|
|
// U+DBFF U+DC00
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xaf\xbf\xed\xb0\x80"));
|
|
|
|
// U+DBFF U+DD00
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xaf\xbf\xed\xb4\x80"));
|
|
|
|
// U+DBFF U+DFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceIllegal)
|
|
.withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
|
|
"\xed\xaf\xbf\xed\xbf\xbf"));
|
|
|
|
//
|
|
// Noncharacters
|
|
//
|
|
|
|
// Unicode 6.3.0:
|
|
//
|
|
// D14. Noncharacter: A code point that is permanently reserved for
|
|
// internal use and that should never be interchanged. Noncharacters
|
|
// consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
|
|
// and the values U+FDD0..U+FDEF.
|
|
|
|
// U+FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
|
|
"\xef\xbf\xbe"));
|
|
|
|
// U+FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
|
|
"\xef\xbf\xbf"));
|
|
|
|
// U+1FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
|
|
"\xf0\x9f\xbf\xbe"));
|
|
|
|
// U+1FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
|
|
"\xf0\x9f\xbf\xbf"));
|
|
|
|
// U+2FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
|
|
"\xf0\xaf\xbf\xbe"));
|
|
|
|
// U+2FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
|
|
"\xf0\xaf\xbf\xbf"));
|
|
|
|
// U+3FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
|
|
"\xf0\xbf\xbf\xbe"));
|
|
|
|
// U+3FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
|
|
"\xf0\xbf\xbf\xbf"));
|
|
|
|
// U+4FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
|
|
"\xf1\x8f\xbf\xbe"));
|
|
|
|
// U+4FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
|
|
"\xf1\x8f\xbf\xbf"));
|
|
|
|
// U+5FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
|
|
"\xf1\x9f\xbf\xbe"));
|
|
|
|
// U+5FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
|
|
"\xf1\x9f\xbf\xbf"));
|
|
|
|
// U+6FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
|
|
"\xf1\xaf\xbf\xbe"));
|
|
|
|
// U+6FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
|
|
"\xf1\xaf\xbf\xbf"));
|
|
|
|
// U+7FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
|
|
"\xf1\xbf\xbf\xbe"));
|
|
|
|
// U+7FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
|
|
"\xf1\xbf\xbf\xbf"));
|
|
|
|
// U+8FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
|
|
"\xf2\x8f\xbf\xbe"));
|
|
|
|
// U+8FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
|
|
"\xf2\x8f\xbf\xbf"));
|
|
|
|
// U+9FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
|
|
"\xf2\x9f\xbf\xbe"));
|
|
|
|
// U+9FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
|
|
"\xf2\x9f\xbf\xbf"));
|
|
|
|
// U+AFFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
|
|
"\xf2\xaf\xbf\xbe"));
|
|
|
|
// U+AFFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
|
|
"\xf2\xaf\xbf\xbf"));
|
|
|
|
// U+BFFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
|
|
"\xf2\xbf\xbf\xbe"));
|
|
|
|
// U+BFFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
|
|
"\xf2\xbf\xbf\xbf"));
|
|
|
|
// U+CFFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
|
|
"\xf3\x8f\xbf\xbe"));
|
|
|
|
// U+CFFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
|
|
"\xf3\x8f\xbf\xbf"));
|
|
|
|
// U+DFFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
|
|
"\xf3\x9f\xbf\xbe"));
|
|
|
|
// U+DFFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
|
|
"\xf3\x9f\xbf\xbf"));
|
|
|
|
// U+EFFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
|
|
"\xf3\xaf\xbf\xbe"));
|
|
|
|
// U+EFFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
|
|
"\xf3\xaf\xbf\xbf"));
|
|
|
|
// U+FFFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
|
|
"\xf3\xbf\xbf\xbe"));
|
|
|
|
// U+FFFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
|
|
"\xf3\xbf\xbf\xbf"));
|
|
|
|
// U+10FFFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
|
|
"\xf4\x8f\xbf\xbe"));
|
|
|
|
// U+10FFFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
|
|
"\xf4\x8f\xbf\xbf"));
|
|
|
|
// U+FDD0
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
|
|
"\xef\xb7\x90"));
|
|
|
|
// U+FDD1
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
|
|
"\xef\xb7\x91"));
|
|
|
|
// U+FDD2
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
|
|
"\xef\xb7\x92"));
|
|
|
|
// U+FDD3
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
|
|
"\xef\xb7\x93"));
|
|
|
|
// U+FDD4
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
|
|
"\xef\xb7\x94"));
|
|
|
|
// U+FDD5
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
|
|
"\xef\xb7\x95"));
|
|
|
|
// U+FDD6
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
|
|
"\xef\xb7\x96"));
|
|
|
|
// U+FDD7
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
|
|
"\xef\xb7\x97"));
|
|
|
|
// U+FDD8
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
|
|
"\xef\xb7\x98"));
|
|
|
|
// U+FDD9
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
|
|
"\xef\xb7\x99"));
|
|
|
|
// U+FDDA
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
|
|
"\xef\xb7\x9a"));
|
|
|
|
// U+FDDB
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
|
|
"\xef\xb7\x9b"));
|
|
|
|
// U+FDDC
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
|
|
"\xef\xb7\x9c"));
|
|
|
|
// U+FDDD
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
|
|
"\xef\xb7\x9d"));
|
|
|
|
// U+FDDE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
|
|
"\xef\xb7\x9e"));
|
|
|
|
// U+FDDF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
|
|
"\xef\xb7\x9f"));
|
|
|
|
// U+FDE0
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
|
|
"\xef\xb7\xa0"));
|
|
|
|
// U+FDE1
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
|
|
"\xef\xb7\xa1"));
|
|
|
|
// U+FDE2
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
|
|
"\xef\xb7\xa2"));
|
|
|
|
// U+FDE3
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
|
|
"\xef\xb7\xa3"));
|
|
|
|
// U+FDE4
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
|
|
"\xef\xb7\xa4"));
|
|
|
|
// U+FDE5
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
|
|
"\xef\xb7\xa5"));
|
|
|
|
// U+FDE6
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
|
|
"\xef\xb7\xa6"));
|
|
|
|
// U+FDE7
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
|
|
"\xef\xb7\xa7"));
|
|
|
|
// U+FDE8
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
|
|
"\xef\xb7\xa8"));
|
|
|
|
// U+FDE9
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
|
|
"\xef\xb7\xa9"));
|
|
|
|
// U+FDEA
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
|
|
"\xef\xb7\xaa"));
|
|
|
|
// U+FDEB
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
|
|
"\xef\xb7\xab"));
|
|
|
|
// U+FDEC
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
|
|
"\xef\xb7\xac"));
|
|
|
|
// U+FDED
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
|
|
"\xef\xb7\xad"));
|
|
|
|
// U+FDEE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
|
|
"\xef\xb7\xae"));
|
|
|
|
// U+FDEF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
|
|
"\xef\xb7\xaf"));
|
|
|
|
// U+FDF0
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
|
|
"\xef\xb7\xb0"));
|
|
|
|
// U+FDF1
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
|
|
"\xef\xb7\xb1"));
|
|
|
|
// U+FDF2
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
|
|
"\xef\xb7\xb2"));
|
|
|
|
// U+FDF3
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
|
|
"\xef\xb7\xb3"));
|
|
|
|
// U+FDF4
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
|
|
"\xef\xb7\xb4"));
|
|
|
|
// U+FDF5
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
|
|
"\xef\xb7\xb5"));
|
|
|
|
// U+FDF6
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
|
|
"\xef\xb7\xb6"));
|
|
|
|
// U+FDF7
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
|
|
"\xef\xb7\xb7"));
|
|
|
|
// U+FDF8
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
|
|
"\xef\xb7\xb8"));
|
|
|
|
// U+FDF9
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
|
|
"\xef\xb7\xb9"));
|
|
|
|
// U+FDFA
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
|
|
"\xef\xb7\xba"));
|
|
|
|
// U+FDFB
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
|
|
"\xef\xb7\xbb"));
|
|
|
|
// U+FDFC
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
|
|
"\xef\xb7\xbc"));
|
|
|
|
// U+FDFD
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
|
|
"\xef\xb7\xbd"));
|
|
|
|
// U+FDFE
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
|
|
"\xef\xb7\xbe"));
|
|
|
|
// U+FDFF
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
|
|
"\xef\xb7\xbf"));
|
|
}
|
|
|
|
TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
|
|
// U+0041 LATIN CAPITAL LETTER A
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
|
|
"\x41", true));
|
|
|
|
//
|
|
// Sequences with one continuation byte missing
|
|
//
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xc2", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xdf", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xe0\xa0", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xe0\xbf", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xe1\x80", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xec\xbf", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xed\x80", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xed\x9f", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xee\x80", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xef\xbf", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xf0\x90\x80", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xf0\xbf\xbf", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xf1\x80\x80", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xf3\xbf\xbf", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xf4\x80\x80", true));
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted),
|
|
"\xf4\x8f\xbf", true));
|
|
|
|
EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
|
|
ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
|
|
"\x41\xc2", true));
|
|
}
|
|
|