588 lines
15 KiB
Rust
588 lines
15 KiB
Rust
#![forbid(unsafe_code)]
|
|
|
|
extern crate xml;
|
|
#[macro_use]
|
|
extern crate lazy_static;
|
|
|
|
use std::env;
|
|
use std::fmt;
|
|
use std::fs::File;
|
|
use std::io::{BufRead, BufReader, Write, stderr};
|
|
use std::path::Path;
|
|
|
|
use xml::name::OwnedName;
|
|
use xml::common::Position;
|
|
use xml::reader::{Result, XmlEvent, ParserConfig, EventReader};
|
|
|
|
/// Dummy function that opens a file, parses it, and returns a `Result`.
|
|
/// There can be IO errors (from `File::open`) and XML errors (from the parser).
|
|
/// Having `impl From<std::io::Error> for xml::reader::Error` allows the user to
|
|
/// do this without defining their own error type.
|
|
#[allow(dead_code)]
|
|
fn count_event_in_file(name: &Path) -> Result<usize> {
|
|
let mut event_count = 0;
|
|
for event in EventReader::new(BufReader::new(try!(File::open(name)))) {
|
|
try!(event);
|
|
event_count += 1;
|
|
}
|
|
Ok(event_count)
|
|
}
|
|
|
|
#[test]
|
|
fn sample_1_short() {
|
|
test(
|
|
include_bytes!("documents/sample_1.xml"),
|
|
include_bytes!("documents/sample_1_short.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(true)
|
|
.whitespace_to_characters(true)
|
|
.cdata_to_characters(true)
|
|
.trim_whitespace(true)
|
|
.coalesce_characters(true),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sample_1_full() {
|
|
test(
|
|
include_bytes!("documents/sample_1.xml"),
|
|
include_bytes!("documents/sample_1_full.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(false)
|
|
.whitespace_to_characters(false)
|
|
.cdata_to_characters(false)
|
|
.trim_whitespace(false)
|
|
.coalesce_characters(false),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sample_2_short() {
|
|
test(
|
|
include_bytes!("documents/sample_2.xml"),
|
|
include_bytes!("documents/sample_2_short.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(true)
|
|
.whitespace_to_characters(true)
|
|
.cdata_to_characters(true)
|
|
.trim_whitespace(true)
|
|
.coalesce_characters(true),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sample_2_full() {
|
|
test(
|
|
include_bytes!("documents/sample_2.xml"),
|
|
include_bytes!("documents/sample_2_full.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(false)
|
|
.whitespace_to_characters(false)
|
|
.cdata_to_characters(false)
|
|
.trim_whitespace(false)
|
|
.coalesce_characters(false),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sample_3_short() {
|
|
test(
|
|
include_bytes!("documents/sample_3.xml"),
|
|
include_bytes!("documents/sample_3_short.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(true)
|
|
.whitespace_to_characters(true)
|
|
.cdata_to_characters(true)
|
|
.trim_whitespace(true)
|
|
.coalesce_characters(true),
|
|
true
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sample_3_full() {
|
|
test(
|
|
include_bytes!("documents/sample_3.xml"),
|
|
include_bytes!("documents/sample_3_full.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(false)
|
|
.whitespace_to_characters(false)
|
|
.cdata_to_characters(false)
|
|
.trim_whitespace(false)
|
|
.coalesce_characters(false),
|
|
true
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sample_4_short() {
|
|
test(
|
|
include_bytes!("documents/sample_4.xml"),
|
|
include_bytes!("documents/sample_4_short.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(true)
|
|
.whitespace_to_characters(true)
|
|
.cdata_to_characters(true)
|
|
.trim_whitespace(true)
|
|
.coalesce_characters(true),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sample_4_full() {
|
|
test(
|
|
include_bytes!("documents/sample_4.xml"),
|
|
include_bytes!("documents/sample_4_full.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(false)
|
|
.whitespace_to_characters(false)
|
|
.cdata_to_characters(false)
|
|
.trim_whitespace(false)
|
|
.coalesce_characters(false),
|
|
false
|
|
);
|
|
|
|
}
|
|
|
|
#[test]
|
|
fn sample_5_short() {
|
|
test(
|
|
include_bytes!("documents/sample_5.xml"),
|
|
include_bytes!("documents/sample_5_short.txt"),
|
|
ParserConfig::new()
|
|
.ignore_comments(true)
|
|
.whitespace_to_characters(true)
|
|
.cdata_to_characters(true)
|
|
.trim_whitespace(true)
|
|
.coalesce_characters(true)
|
|
.add_entity("nbsp", " ")
|
|
.add_entity("copy", "©")
|
|
.add_entity("NotEqualTilde", "≂̸"),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sample_6_full() {
|
|
test(
|
|
include_bytes!("documents/sample_6.xml"),
|
|
include_bytes!("documents/sample_6_full.txt"),
|
|
ParserConfig::new()
|
|
.ignore_root_level_whitespace(false)
|
|
.ignore_comments(false)
|
|
.whitespace_to_characters(false)
|
|
.cdata_to_characters(false)
|
|
.trim_whitespace(false)
|
|
.coalesce_characters(false),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn eof_1() {
|
|
test(
|
|
br#"<?xml"#,
|
|
br#"1:6 Unexpected end of stream: no root element found"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn bad_1() {
|
|
test(
|
|
br#"<?xml&.,"#,
|
|
br#"1:6 Unexpected token: <?xml&"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn dashes_in_comments() {
|
|
test(
|
|
br#"<!-- comment -- --><hello/>"#,
|
|
br#"
|
|
|1:14 Unexpected token '--' before ' '
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
|
|
test(
|
|
br#"<!-- comment ---><hello/>"#,
|
|
br#"
|
|
|1:14 Unexpected token '--' before '-'
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn tabs_1() {
|
|
test(
|
|
b"\t<a>\t<b/></a>",
|
|
br#"
|
|
|1:2 StartDocument(1.0, UTF-8)
|
|
|1:2 StartElement(a)
|
|
|1:6 StartElement(b)
|
|
|1:6 EndElement(b)
|
|
|1:10 EndElement(a)
|
|
|1:14 EndDocument
|
|
"#,
|
|
ParserConfig::new()
|
|
.trim_whitespace(true),
|
|
true
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn issue_32_unescaped_cdata_end() {
|
|
test(
|
|
br#"<hello>]]></hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|Characters("]]>")
|
|
|EndElement(hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn issue_unescaped_processing_instruction_end() {
|
|
test(
|
|
br#"<hello>?></hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|Characters("?>")
|
|
|EndElement(hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn issue_unescaped_empty_tag_end() {
|
|
test(
|
|
br#"<hello>/></hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|Characters("/>")
|
|
|EndElement(hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn issue_83_duplicate_attributes() {
|
|
test(
|
|
br#"<hello><some-tag a='10' a="20"></hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|1:30 Attribute 'a' is redefined
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn issue_93_large_characters_in_entity_references() {
|
|
test(
|
|
r#"<hello>&𤶼;</hello>"#.as_bytes(),
|
|
r#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|1:10 Unexpected entity: 𤶼
|
|
"#.as_bytes(), // FIXME: it shouldn't be 10, looks like indices are off slightly
|
|
ParserConfig::new(),
|
|
false
|
|
)
|
|
}
|
|
|
|
#[test]
|
|
fn issue_98_cdata_ending_with_right_bracket() {
|
|
test(
|
|
br#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|CData("Foo [Bar]")
|
|
|EndElement(hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
)
|
|
}
|
|
|
|
#[test]
|
|
fn issue_105_unexpected_double_dash() {
|
|
test(
|
|
br#"<hello>-- </hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|Characters("-- ")
|
|
|EndElement(hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
|
|
test(
|
|
br#"<hello>--</hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|Characters("--")
|
|
|EndElement(hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
|
|
test(
|
|
br#"<hello>--></hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|Characters("-->")
|
|
|EndElement(hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
|
|
test(
|
|
br#"<hello><![CDATA[--]]></hello>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(hello)
|
|
|CData("--")
|
|
|EndElement(hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn issue_attribues_have_no_default_namespace () {
|
|
test(
|
|
br#"<hello xmlns="urn:foo" x="y"/>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement({urn:foo}hello [x="y"])
|
|
|EndElement({urn:foo}hello)
|
|
|EndDocument
|
|
"#,
|
|
ParserConfig::new(),
|
|
false
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn issue_replacement_character_entity_reference() {
|
|
test(
|
|
br#"<doc>��</doc>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(doc)
|
|
|1:13 Invalid decimal character number in an entity: #55357
|
|
"#,
|
|
ParserConfig::new(),
|
|
false,
|
|
);
|
|
|
|
test(
|
|
br#"<doc>��</doc>"#,
|
|
br#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(doc)
|
|
|1:13 Invalid hexadecimal character number in an entity: #xd83d
|
|
"#,
|
|
ParserConfig::new(),
|
|
false,
|
|
);
|
|
|
|
test(
|
|
br#"<doc>��</doc>"#,
|
|
format!(
|
|
r#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(doc)
|
|
|Characters("{replacement_character}{replacement_character}")
|
|
|EndElement(doc)
|
|
|EndDocument
|
|
"#,
|
|
replacement_character = "\u{fffd}"
|
|
)
|
|
.as_bytes(),
|
|
ParserConfig::new()
|
|
.replace_unknown_entity_references(true),
|
|
false,
|
|
);
|
|
|
|
test(
|
|
br#"<doc>��</doc>"#,
|
|
format!(
|
|
r#"
|
|
|StartDocument(1.0, UTF-8)
|
|
|StartElement(doc)
|
|
|Characters("{replacement_character}{replacement_character}")
|
|
|EndElement(doc)
|
|
|EndDocument
|
|
"#,
|
|
replacement_character = "\u{fffd}"
|
|
)
|
|
.as_bytes(),
|
|
ParserConfig::new()
|
|
.replace_unknown_entity_references(true),
|
|
false,
|
|
);
|
|
}
|
|
|
|
lazy_static! {
|
|
// If PRINT_SPEC env variable is set, print the lines
|
|
// to stderr instead of comparing with the output
|
|
// it can be used like this:
|
|
// PRINT_SPEC=1 cargo test --test event_reader sample_1_full 2> sample_1_full.txt
|
|
static ref PRINT: bool = {
|
|
for (key, value) in env::vars() {
|
|
if key == "PRINT_SPEC" && value == "1" {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
};
|
|
}
|
|
|
|
// clones a lot but that's fine
|
|
fn trim_until_bar(s: String) -> String {
|
|
match s.trim() {
|
|
ts if ts.starts_with('|') => return ts[1..].to_owned(),
|
|
_ => {}
|
|
}
|
|
s
|
|
}
|
|
|
|
fn test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool) {
|
|
let mut reader = config.create_reader(input);
|
|
let mut spec_lines = BufReader::new(output).lines()
|
|
.map(|line| line.unwrap())
|
|
.enumerate()
|
|
.map(|(i, line)| (i, trim_until_bar(line)))
|
|
.filter(|&(_, ref line)| !line.trim().is_empty());
|
|
|
|
loop {
|
|
let e = reader.next();
|
|
let line =
|
|
if test_position {
|
|
format!("{} {}", reader.position(), Event(&e))
|
|
} else {
|
|
format!("{}", Event(&e))
|
|
};
|
|
|
|
if *PRINT {
|
|
writeln!(&mut stderr(), "{}", line).unwrap();
|
|
} else {
|
|
if let Some((n, spec)) = spec_lines.next() {
|
|
if line != spec {
|
|
const SPLITTER: &'static str = "-------------------";
|
|
panic!("\n{}\nUnexpected event at line {}:\nExpected: {}\nFound: {}\n{}\n",
|
|
SPLITTER, n + 1, spec, line, std::str::from_utf8(output).unwrap());
|
|
}
|
|
} else {
|
|
panic!("Unexpected event: {}", line);
|
|
}
|
|
}
|
|
|
|
match e {
|
|
Ok(XmlEvent::EndDocument) | Err(_) => break,
|
|
_ => {},
|
|
}
|
|
}
|
|
}
|
|
|
|
// Here we define our own string representation of events so we don't depend
|
|
// on the specifics of Display implementation for XmlEvent and OwnedName.
|
|
|
|
struct Name<'a>(&'a OwnedName);
|
|
|
|
impl <'a> fmt::Display for Name<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
if let Some(ref namespace) = self.0.namespace {
|
|
try! { write!(f, "{{{}}}", namespace) }
|
|
}
|
|
|
|
if let Some(ref prefix) = self.0.prefix {
|
|
try! { write!(f, "{}:", prefix) }
|
|
}
|
|
|
|
write!(f, "{}", self.0.local_name)
|
|
}
|
|
}
|
|
|
|
struct Event<'a>(&'a Result<XmlEvent>);
|
|
|
|
impl<'a> fmt::Display for Event<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
let empty = String::new();
|
|
match *self.0 {
|
|
Ok(ref e) => match *e {
|
|
XmlEvent::StartDocument { ref version, ref encoding, .. } =>
|
|
write!(f, "StartDocument({}, {})", version, encoding),
|
|
XmlEvent::EndDocument =>
|
|
write!(f, "EndDocument"),
|
|
XmlEvent::ProcessingInstruction { ref name, ref data } =>
|
|
write!(f, "ProcessingInstruction({}={:?})", name,
|
|
data.as_ref().unwrap_or(&empty)),
|
|
XmlEvent::StartElement { ref name, ref attributes, .. } => {
|
|
if attributes.is_empty() {
|
|
write!(f, "StartElement({})", Name(name))
|
|
}
|
|
else {
|
|
let attrs: Vec<_> = attributes.iter()
|
|
.map(|a| format!("{}={:?}", Name(&a.name), a.value)) .collect();
|
|
write!(f, "StartElement({} [{}])", Name(name), attrs.join(", "))
|
|
}
|
|
},
|
|
XmlEvent::EndElement { ref name } =>
|
|
write!(f, "EndElement({})", Name(name)),
|
|
XmlEvent::Comment(ref data) =>
|
|
write!(f, r#"Comment("{}")"#, data.escape_debug()),
|
|
XmlEvent::CData(ref data) =>
|
|
write!(f, r#"CData("{}")"#, data.escape_debug()),
|
|
XmlEvent::Characters(ref data) =>
|
|
write!(f, r#"Characters("{}")"#, data.escape_debug()),
|
|
XmlEvent::Whitespace(ref data) =>
|
|
write!(f, r#"Whitespace("{}")"#, data.escape_debug()),
|
|
},
|
|
Err(ref e) => e.fmt(f),
|
|
}
|
|
}
|
|
}
|