574 lines
12 KiB
C++
574 lines
12 KiB
C++
/*-------------------------------------------------------------------------
|
|
* drawElements Quality Program Test Executor
|
|
* ------------------------------------------
|
|
*
|
|
* Copyright 2014 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*//*!
|
|
* \file
|
|
* \brief XML Parser.
|
|
*//*--------------------------------------------------------------------*/
|
|
|
|
#include "xeXMLParser.hpp"
|
|
#include "deInt32.h"
|
|
|
|
namespace xe
|
|
{
|
|
namespace xml
|
|
{
|
|
|
|
enum
|
|
{
|
|
TOKENIZER_INITIAL_BUFFER_SIZE = 1024
|
|
};
|
|
|
|
static inline bool isIdentifierStartChar (int ch)
|
|
{
|
|
return de::inRange<int>(ch, 'a', 'z') || de::inRange<int>(ch, 'A', 'Z');
|
|
}
|
|
|
|
static inline bool isIdentifierChar (int ch)
|
|
{
|
|
return isIdentifierStartChar(ch) || de::inRange<int>(ch, '0', '9') || (ch == '-') || (ch == '_');
|
|
}
|
|
|
|
static inline bool isWhitespaceChar (int ch)
|
|
{
|
|
return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
|
|
}
|
|
|
|
static int getNextBufferSize (int curSize, int minNewSize)
|
|
{
|
|
return de::max(curSize*2, 1<<deLog2Ceil32(minNewSize));
|
|
}
|
|
|
|
Tokenizer::Tokenizer (void)
|
|
: m_curToken (TOKEN_INCOMPLETE)
|
|
, m_curTokenLen (0)
|
|
, m_state (STATE_DATA)
|
|
, m_buf (TOKENIZER_INITIAL_BUFFER_SIZE)
|
|
{
|
|
}
|
|
|
|
Tokenizer::~Tokenizer (void)
|
|
{
|
|
}
|
|
|
|
void Tokenizer::clear (void)
|
|
{
|
|
m_curToken = TOKEN_INCOMPLETE;
|
|
m_curTokenLen = 0;
|
|
m_state = STATE_DATA;
|
|
m_buf.clear();
|
|
}
|
|
|
|
void Tokenizer::error (const std::string& what)
|
|
{
|
|
throw ParseError(what);
|
|
}
|
|
|
|
void Tokenizer::feed (const deUint8* bytes, int numBytes)
|
|
{
|
|
// Grow buffer if necessary.
|
|
if (m_buf.getNumFree() < numBytes)
|
|
{
|
|
m_buf.resize(getNextBufferSize(m_buf.getSize(), m_buf.getNumElements()+numBytes));
|
|
}
|
|
|
|
// Append to front.
|
|
m_buf.pushFront(bytes, numBytes);
|
|
|
|
// If we haven't parsed complete token, re-try after data feed.
|
|
if (m_curToken == TOKEN_INCOMPLETE)
|
|
advance();
|
|
}
|
|
|
|
int Tokenizer::getChar (int offset) const
|
|
{
|
|
DE_ASSERT(de::inRange(offset, 0, m_buf.getNumElements()));
|
|
|
|
if (offset < m_buf.getNumElements())
|
|
return m_buf.peekBack(offset);
|
|
else
|
|
return END_OF_BUFFER;
|
|
}
|
|
|
|
void Tokenizer::advance (void)
|
|
{
|
|
if (m_curToken != TOKEN_INCOMPLETE)
|
|
{
|
|
// Parser should not try to advance beyond end of string.
|
|
DE_ASSERT(m_curToken != TOKEN_END_OF_STRING);
|
|
|
|
// If current token is tag end, change state to data.
|
|
if (m_curToken == TOKEN_TAG_END ||
|
|
m_curToken == TOKEN_EMPTY_ELEMENT_END ||
|
|
m_curToken == TOKEN_PROCESSING_INSTRUCTION_END ||
|
|
m_curToken == TOKEN_COMMENT ||
|
|
m_curToken == TOKEN_ENTITY)
|
|
m_state = STATE_DATA;
|
|
|
|
// Advance buffer by length of last token.
|
|
m_buf.popBack(m_curTokenLen);
|
|
|
|
// Reset state.
|
|
m_curToken = TOKEN_INCOMPLETE;
|
|
m_curTokenLen = 0;
|
|
|
|
// If we hit end of string here, report it as end of string.
|
|
if (getChar(0) == END_OF_STRING)
|
|
{
|
|
m_curToken = TOKEN_END_OF_STRING;
|
|
m_curTokenLen = 1;
|
|
return;
|
|
}
|
|
}
|
|
|
|
int curChar = getChar(m_curTokenLen);
|
|
|
|
for (;;)
|
|
{
|
|
if (m_state == STATE_DATA)
|
|
{
|
|
// Advance until we hit end of buffer or tag start and treat that as data token.
|
|
if (curChar == END_OF_STRING || curChar == (int)END_OF_BUFFER || curChar == '<' || curChar == '&')
|
|
{
|
|
if (curChar == '<')
|
|
m_state = STATE_TAG;
|
|
else if (curChar == '&')
|
|
m_state = STATE_ENTITY;
|
|
|
|
if (m_curTokenLen > 0)
|
|
{
|
|
// Report data token.
|
|
m_curToken = TOKEN_DATA;
|
|
return;
|
|
}
|
|
else if (curChar == END_OF_STRING || curChar == (int)END_OF_BUFFER)
|
|
{
|
|
// Just return incomplete token, no data parsed.
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
DE_ASSERT(m_state == STATE_TAG || m_state == STATE_ENTITY);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Eat all whitespace if present.
|
|
if (m_curTokenLen == 0)
|
|
{
|
|
while (isWhitespaceChar(curChar))
|
|
{
|
|
m_buf.popBack();
|
|
curChar = getChar(0);
|
|
}
|
|
}
|
|
|
|
// Handle end of string / buffer.
|
|
if (curChar == END_OF_STRING)
|
|
error("Unexpected end of string");
|
|
else if (curChar == (int)END_OF_BUFFER)
|
|
{
|
|
DE_ASSERT(m_curToken == TOKEN_INCOMPLETE);
|
|
return;
|
|
}
|
|
|
|
if (m_curTokenLen == 0)
|
|
{
|
|
// Expect start of identifier, value or special tag token.
|
|
if (curChar == '\'' || curChar == '"')
|
|
m_state = STATE_VALUE;
|
|
else if (isIdentifierStartChar(curChar))
|
|
m_state = STATE_IDENTIFIER;
|
|
else if (curChar == '<' || curChar == '?' || curChar == '/')
|
|
m_state = STATE_TAG;
|
|
else if (curChar == '&')
|
|
DE_ASSERT(m_state == STATE_ENTITY);
|
|
else if (curChar == '=')
|
|
{
|
|
m_curToken = TOKEN_EQUAL;
|
|
m_curTokenLen = 1;
|
|
return;
|
|
}
|
|
else if (curChar == '>')
|
|
{
|
|
m_curToken = TOKEN_TAG_END;
|
|
m_curTokenLen = 1;
|
|
return;
|
|
}
|
|
else
|
|
error("Unexpected character");
|
|
}
|
|
else if (m_state == STATE_IDENTIFIER)
|
|
{
|
|
if (!isIdentifierChar(curChar))
|
|
{
|
|
m_curToken = TOKEN_IDENTIFIER;
|
|
return;
|
|
}
|
|
}
|
|
else if (m_state == STATE_VALUE)
|
|
{
|
|
// \todo [2012-06-07 pyry] Escapes.
|
|
if (curChar == '\'' || curChar == '"')
|
|
{
|
|
// \todo [2012-10-17 pyry] Should we actually do the check against getChar(0)?
|
|
if (curChar != getChar(0))
|
|
error("Mismatched quote");
|
|
m_curToken = TOKEN_STRING;
|
|
m_curTokenLen += 1;
|
|
return;
|
|
}
|
|
}
|
|
else if (m_state == STATE_COMMENT)
|
|
{
|
|
DE_ASSERT(m_curTokenLen >= 2); // 2 characters have been parsed if we are in comment state.
|
|
|
|
if (m_curTokenLen <= 3)
|
|
{
|
|
if (curChar != '-')
|
|
error("Invalid comment start");
|
|
}
|
|
else
|
|
{
|
|
int prev2 = m_curTokenLen > 5 ? getChar(m_curTokenLen-2) : 0;
|
|
int prev1 = m_curTokenLen > 4 ? getChar(m_curTokenLen-1) : 0;
|
|
|
|
if (prev2 == '-' && prev1 == '-')
|
|
{
|
|
if (curChar != '>')
|
|
error("Invalid comment end");
|
|
m_curToken = TOKEN_COMMENT;
|
|
m_curTokenLen += 1;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
else if (m_state == STATE_ENTITY)
|
|
{
|
|
if (m_curTokenLen >= 1)
|
|
{
|
|
if (curChar == ';')
|
|
{
|
|
m_curToken = TOKEN_ENTITY;
|
|
m_curTokenLen += 1;
|
|
return;
|
|
}
|
|
else if (!de::inRange<int>(curChar, '0', '9') &&
|
|
!de::inRange<int>(curChar, 'a', 'z') &&
|
|
!de::inRange<int>(curChar, 'A', 'Z'))
|
|
error("Invalid entity");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Special tokens are at most 2 characters.
|
|
DE_ASSERT(m_state == STATE_TAG && m_curTokenLen == 1);
|
|
|
|
int prevChar = getChar(m_curTokenLen-1);
|
|
|
|
if (prevChar == '<')
|
|
{
|
|
// Tag start.
|
|
if (curChar == '/')
|
|
{
|
|
m_curToken = TOKEN_END_TAG_START;
|
|
m_curTokenLen = 2;
|
|
return;
|
|
}
|
|
else if (curChar == '?')
|
|
{
|
|
m_curToken = TOKEN_PROCESSING_INSTRUCTION_START;
|
|
m_curTokenLen = 2;
|
|
return;
|
|
}
|
|
else if (curChar == '!')
|
|
{
|
|
m_state = STATE_COMMENT;
|
|
}
|
|
else
|
|
{
|
|
m_curToken = TOKEN_TAG_START;
|
|
m_curTokenLen = 1;
|
|
return;
|
|
}
|
|
}
|
|
else if (prevChar == '?')
|
|
{
|
|
if (curChar != '>')
|
|
error("Invalid processing instruction end");
|
|
m_curToken = TOKEN_PROCESSING_INSTRUCTION_END;
|
|
m_curTokenLen = 2;
|
|
return;
|
|
}
|
|
else if (prevChar == '/')
|
|
{
|
|
if (curChar != '>')
|
|
error("Invalid empty element end");
|
|
m_curToken = TOKEN_EMPTY_ELEMENT_END;
|
|
m_curTokenLen = 2;
|
|
return;
|
|
}
|
|
else
|
|
error("Could not parse special token");
|
|
}
|
|
}
|
|
|
|
m_curTokenLen += 1;
|
|
curChar = getChar(m_curTokenLen);
|
|
}
|
|
}
|
|
|
|
void Tokenizer::getString (std::string& dst) const
|
|
{
|
|
DE_ASSERT(m_curToken == TOKEN_STRING);
|
|
dst.resize(m_curTokenLen-2);
|
|
for (int ndx = 0; ndx < m_curTokenLen-2; ndx++)
|
|
dst[ndx] = m_buf.peekBack(ndx+1);
|
|
}
|
|
|
|
Parser::Parser (void)
|
|
: m_element (ELEMENT_INCOMPLETE)
|
|
, m_state (STATE_DATA)
|
|
{
|
|
}
|
|
|
|
Parser::~Parser (void)
|
|
{
|
|
}
|
|
|
|
void Parser::clear (void)
|
|
{
|
|
m_tokenizer.clear();
|
|
m_elementName.clear();
|
|
m_attributes.clear();
|
|
m_attribName.clear();
|
|
m_entityValue.clear();
|
|
|
|
m_element = ELEMENT_INCOMPLETE;
|
|
m_state = STATE_DATA;
|
|
}
|
|
|
|
void Parser::error (const std::string& what)
|
|
{
|
|
throw ParseError(what);
|
|
}
|
|
|
|
void Parser::feed (const deUint8* bytes, int numBytes)
|
|
{
|
|
m_tokenizer.feed(bytes, numBytes);
|
|
|
|
if (m_element == ELEMENT_INCOMPLETE)
|
|
advance();
|
|
}
|
|
|
|
void Parser::advance (void)
|
|
{
|
|
if (m_element == ELEMENT_START)
|
|
m_attributes.clear();
|
|
|
|
// \note No token is advanced when element end is reported.
|
|
if (m_state == STATE_YIELD_EMPTY_ELEMENT_END)
|
|
{
|
|
DE_ASSERT(m_element == ELEMENT_START);
|
|
m_element = ELEMENT_END;
|
|
m_state = STATE_DATA;
|
|
return;
|
|
}
|
|
|
|
if (m_element != ELEMENT_INCOMPLETE)
|
|
{
|
|
m_tokenizer.advance();
|
|
m_element = ELEMENT_INCOMPLETE;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
Token curToken = m_tokenizer.getToken();
|
|
|
|
// Skip comments.
|
|
while (curToken == TOKEN_COMMENT)
|
|
{
|
|
m_tokenizer.advance();
|
|
curToken = m_tokenizer.getToken();
|
|
}
|
|
|
|
if (curToken == TOKEN_INCOMPLETE)
|
|
{
|
|
DE_ASSERT(m_element == ELEMENT_INCOMPLETE);
|
|
return;
|
|
}
|
|
|
|
switch (m_state)
|
|
{
|
|
case STATE_ENTITY:
|
|
m_state = STATE_DATA;
|
|
// Fall-through
|
|
|
|
case STATE_DATA:
|
|
switch (curToken)
|
|
{
|
|
case TOKEN_DATA:
|
|
m_element = ELEMENT_DATA;
|
|
return;
|
|
|
|
case TOKEN_END_OF_STRING:
|
|
m_element = ELEMENT_END_OF_STRING;
|
|
return;
|
|
|
|
case TOKEN_TAG_START:
|
|
m_state = STATE_START_TAG_OPEN;
|
|
break;
|
|
|
|
case TOKEN_END_TAG_START:
|
|
m_state = STATE_END_TAG_OPEN;
|
|
break;
|
|
|
|
case TOKEN_PROCESSING_INSTRUCTION_START:
|
|
m_state = STATE_IN_PROCESSING_INSTRUCTION;
|
|
break;
|
|
|
|
case TOKEN_ENTITY:
|
|
m_state = STATE_ENTITY;
|
|
m_element = ELEMENT_DATA;
|
|
parseEntityValue();
|
|
return;
|
|
|
|
default:
|
|
error("Unexpected token");
|
|
}
|
|
break;
|
|
|
|
case STATE_IN_PROCESSING_INSTRUCTION:
|
|
if (curToken == TOKEN_PROCESSING_INSTRUCTION_END)
|
|
m_state = STATE_DATA;
|
|
else
|
|
if (curToken != TOKEN_IDENTIFIER && curToken != TOKEN_EQUAL && curToken != TOKEN_STRING)
|
|
error("Unexpected token in processing instruction");
|
|
break;
|
|
|
|
case STATE_START_TAG_OPEN:
|
|
if (curToken != TOKEN_IDENTIFIER)
|
|
error("Expected identifier");
|
|
m_tokenizer.getTokenStr(m_elementName);
|
|
m_state = STATE_ATTRIBUTE_LIST;
|
|
break;
|
|
|
|
case STATE_END_TAG_OPEN:
|
|
if (curToken != TOKEN_IDENTIFIER)
|
|
error("Expected identifier");
|
|
m_tokenizer.getTokenStr(m_elementName);
|
|
m_state = STATE_EXPECTING_END_TAG_CLOSE;
|
|
break;
|
|
|
|
case STATE_EXPECTING_END_TAG_CLOSE:
|
|
if (curToken != TOKEN_TAG_END)
|
|
error("Expected tag end");
|
|
m_state = STATE_DATA;
|
|
m_element = ELEMENT_END;
|
|
return;
|
|
|
|
case STATE_ATTRIBUTE_LIST:
|
|
if (curToken == TOKEN_IDENTIFIER)
|
|
{
|
|
m_tokenizer.getTokenStr(m_attribName);
|
|
m_state = STATE_EXPECTING_ATTRIBUTE_EQ;
|
|
}
|
|
else if (curToken == TOKEN_EMPTY_ELEMENT_END)
|
|
{
|
|
m_state = STATE_YIELD_EMPTY_ELEMENT_END;
|
|
m_element = ELEMENT_START;
|
|
return;
|
|
}
|
|
else if (curToken == TOKEN_TAG_END)
|
|
{
|
|
m_state = STATE_DATA;
|
|
m_element = ELEMENT_START;
|
|
return;
|
|
}
|
|
else
|
|
error("Unexpected token");
|
|
break;
|
|
|
|
case STATE_EXPECTING_ATTRIBUTE_EQ:
|
|
if (curToken != TOKEN_EQUAL)
|
|
error("Expected '='");
|
|
m_state = STATE_EXPECTING_ATTRIBUTE_VALUE;
|
|
break;
|
|
|
|
case STATE_EXPECTING_ATTRIBUTE_VALUE:
|
|
if (curToken != TOKEN_STRING)
|
|
error("Expected value");
|
|
if (hasAttribute(m_attribName.c_str()))
|
|
error("Duplicate attribute");
|
|
|
|
m_tokenizer.getString(m_attributes[m_attribName]);
|
|
m_state = STATE_ATTRIBUTE_LIST;
|
|
break;
|
|
|
|
default:
|
|
DE_ASSERT(false);
|
|
}
|
|
|
|
m_tokenizer.advance();
|
|
}
|
|
}
|
|
|
|
static char getEntityValue (const std::string& entity)
|
|
{
|
|
static const struct
|
|
{
|
|
const char* name;
|
|
char value;
|
|
} s_entities[] =
|
|
{
|
|
{ "<", '<' },
|
|
{ ">", '>' },
|
|
{ "&", '&' },
|
|
{ "'", '\''},
|
|
{ """, '"' },
|
|
};
|
|
|
|
for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_entities); ndx++)
|
|
{
|
|
if (entity == s_entities[ndx].name)
|
|
return s_entities[ndx].value;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void Parser::parseEntityValue (void)
|
|
{
|
|
DE_ASSERT(m_state == STATE_ENTITY && m_tokenizer.getToken() == TOKEN_ENTITY);
|
|
|
|
std::string entity;
|
|
m_tokenizer.getTokenStr(entity);
|
|
|
|
const char value = getEntityValue(entity);
|
|
if (value == 0)
|
|
error("Invalid entity '" + entity + "'");
|
|
|
|
m_entityValue.resize(1);
|
|
m_entityValue[0] = value;
|
|
}
|
|
|
|
} // xml
|
|
} // xe
|