android13/external/rust/crates/xml-rs/tests/event_reader.rs

#![forbid(unsafe_code)]

extern crate xml;
#[macro_use]
extern crate lazy_static;

use std::env;
use std::fmt;
use std::fs::File;
use std::io::{BufRead, BufReader, Write, stderr};
use std::path::Path;

use xml::name::OwnedName;
use xml::common::Position;
use xml::reader::{Result, XmlEvent, ParserConfig, EventReader};

/// Dummy function that opens a file, parses it, and returns a `Result`.
/// There can be IO errors (from `File::open`) and XML errors (from the parser).
/// Having `impl From<std::io::Error> for xml::reader::Error` allows the user to
/// do this without defining their own error type.
#[allow(dead_code)]
fn count_event_in_file(name: &Path) -> Result<usize> {
    let mut event_count = 0;
    for event in EventReader::new(BufReader::new(try!(File::open(name)))) {
        try!(event);
        event_count += 1;
    }
    Ok(event_count)
}

#[test]
fn sample_1_short() {
    test(
        include_bytes!("documents/sample_1.xml"),
        include_bytes!("documents/sample_1_short.txt"),
        ParserConfig::new()
            .ignore_comments(true)
            .whitespace_to_characters(true)
            .cdata_to_characters(true)
            .trim_whitespace(true)
            .coalesce_characters(true),
        false
    );
}

#[test]
fn sample_1_full() {
    test(
        include_bytes!("documents/sample_1.xml"),
        include_bytes!("documents/sample_1_full.txt"),
        ParserConfig::new()
            .ignore_comments(false)
            .whitespace_to_characters(false)
            .cdata_to_characters(false)
            .trim_whitespace(false)
            .coalesce_characters(false),
        false
    );
}

#[test]
fn sample_2_short() {
    test(
        include_bytes!("documents/sample_2.xml"),
        include_bytes!("documents/sample_2_short.txt"),
        ParserConfig::new()
            .ignore_comments(true)
            .whitespace_to_characters(true)
            .cdata_to_characters(true)
            .trim_whitespace(true)
            .coalesce_characters(true),
        false
    );
}

#[test]
fn sample_2_full() {
    test(
        include_bytes!("documents/sample_2.xml"),
        include_bytes!("documents/sample_2_full.txt"),
        ParserConfig::new()
            .ignore_comments(false)
            .whitespace_to_characters(false)
            .cdata_to_characters(false)
            .trim_whitespace(false)
            .coalesce_characters(false),
        false
    );
}

#[test]
fn sample_3_short() {
    test(
        include_bytes!("documents/sample_3.xml"),
        include_bytes!("documents/sample_3_short.txt"),
        ParserConfig::new()
            .ignore_comments(true)
            .whitespace_to_characters(true)
            .cdata_to_characters(true)
            .trim_whitespace(true)
            .coalesce_characters(true),
        true
    );
}

#[test]
fn sample_3_full() {
    test(
        include_bytes!("documents/sample_3.xml"),
        include_bytes!("documents/sample_3_full.txt"),
        ParserConfig::new()
            .ignore_comments(false)
            .whitespace_to_characters(false)
            .cdata_to_characters(false)
            .trim_whitespace(false)
            .coalesce_characters(false),
        true
    );
}

#[test]
fn sample_4_short() {
    test(
        include_bytes!("documents/sample_4.xml"),
        include_bytes!("documents/sample_4_short.txt"),
        ParserConfig::new()
            .ignore_comments(true)
            .whitespace_to_characters(true)
            .cdata_to_characters(true)
            .trim_whitespace(true)
            .coalesce_characters(true),
        false
    );
}

#[test]
fn sample_4_full() {
    test(
        include_bytes!("documents/sample_4.xml"),
        include_bytes!("documents/sample_4_full.txt"),
        ParserConfig::new()
            .ignore_comments(false)
            .whitespace_to_characters(false)
            .cdata_to_characters(false)
            .trim_whitespace(false)
            .coalesce_characters(false),
        false
    );

}

#[test]
fn sample_5_short() {
    test(
        include_bytes!("documents/sample_5.xml"),
        include_bytes!("documents/sample_5_short.txt"),
        ParserConfig::new()
            .ignore_comments(true)
            .whitespace_to_characters(true)
            .cdata_to_characters(true)
            .trim_whitespace(true)
            .coalesce_characters(true)
            .add_entity("nbsp", " ")
            .add_entity("copy", "©")
            .add_entity("NotEqualTilde", "≂̸"),
        false
    );
}

#[test]
fn sample_6_full() {
    test(
        include_bytes!("documents/sample_6.xml"),
        include_bytes!("documents/sample_6_full.txt"),
        ParserConfig::new()
            .ignore_root_level_whitespace(false)
            .ignore_comments(false)
            .whitespace_to_characters(false)
            .cdata_to_characters(false)
            .trim_whitespace(false)
            .coalesce_characters(false),
        false
    );
}

#[test]
fn eof_1() {
    test(
        br#"<?xml"#,
        br#"1:6 Unexpected end of stream: no root element found"#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn bad_1() {
    test(
        br#"<?xml&.,"#,
        br#"1:6 Unexpected token: <?xml&"#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn dashes_in_comments() {
    test(
        br#"<!-- comment -- --><hello/>"#,
        br#"
            |1:14 Unexpected token '--' before ' '
        "#,
        ParserConfig::new(),
        false
    );

    test(
        br#"<!-- comment ---><hello/>"#,
        br#"
            |1:14 Unexpected token '--' before '-'
        "#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn tabs_1() {
    test(
        b"\t<a>\t<b/></a>",
        br#"
            |1:2 StartDocument(1.0, UTF-8)
            |1:2 StartElement(a)
            |1:6 StartElement(b)
            |1:6 EndElement(b)
            |1:10 EndElement(a)
            |1:14 EndDocument
        "#,
        ParserConfig::new()
            .trim_whitespace(true),
        true
    );
}

#[test]
fn issue_32_unescaped_cdata_end() {
    test(
        br#"<hello>]]></hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |Characters("]]>")
            |EndElement(hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn issue_unescaped_processing_instruction_end() {
    test(
        br#"<hello>?></hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |Characters("?>")
            |EndElement(hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn issue_unescaped_empty_tag_end() {
    test(
        br#"<hello>/></hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |Characters("/>")
            |EndElement(hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn issue_83_duplicate_attributes() {
    test(
        br#"<hello><some-tag a='10' a="20"></hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |1:30 Attribute 'a' is redefined
        "#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn issue_93_large_characters_in_entity_references() {
    test(
        r#"<hello>&𤶼;</hello>"#.as_bytes(),
        r#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |1:10 Unexpected entity: 𤶼
        "#.as_bytes(),  // FIXME: it shouldn't be 10, looks like indices are off slightly
        ParserConfig::new(),
        false
    )
}

#[test]
fn issue_98_cdata_ending_with_right_bracket() {
    test(
        br#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |CData("Foo [Bar]")
            |EndElement(hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    )
}

#[test]
fn issue_105_unexpected_double_dash() {
    test(
        br#"<hello>-- </hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |Characters("-- ")
            |EndElement(hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    );

    test(
        br#"<hello>--</hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |Characters("--")
            |EndElement(hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    );

    test(
        br#"<hello>--></hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |Characters("-->")
            |EndElement(hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    );

    test(
        br#"<hello><![CDATA[--]]></hello>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(hello)
            |CData("--")
            |EndElement(hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn issue_attribues_have_no_default_namespace () {
    test(
        br#"<hello xmlns="urn:foo" x="y"/>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement({urn:foo}hello [x="y"])
            |EndElement({urn:foo}hello)
            |EndDocument
        "#,
        ParserConfig::new(),
        false
    );
}

#[test]
fn issue_replacement_character_entity_reference() {
    test(
        br#"<doc>&#55357;&#56628;</doc>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(doc)
            |1:13 Invalid decimal character number in an entity: #55357
        "#,
        ParserConfig::new(),
        false,
    );

    test(
        br#"<doc>&#xd83d;&#xdd34;</doc>"#,
        br#"
            |StartDocument(1.0, UTF-8)
            |StartElement(doc)
            |1:13 Invalid hexadecimal character number in an entity: #xd83d
        "#,
        ParserConfig::new(),
        false,
    );

    test(
        br#"<doc>&#55357;&#56628;</doc>"#,
        format!(
            r#"
                |StartDocument(1.0, UTF-8)
                |StartElement(doc)
                |Characters("{replacement_character}{replacement_character}")
                |EndElement(doc)
                |EndDocument
            "#,
            replacement_character = "\u{fffd}"
        )
        .as_bytes(),
        ParserConfig::new()
            .replace_unknown_entity_references(true),
        false,
    );

    test(
        br#"<doc>&#xd83d;&#xdd34;</doc>"#,
        format!(
            r#"
                |StartDocument(1.0, UTF-8)
                |StartElement(doc)
                |Characters("{replacement_character}{replacement_character}")
                |EndElement(doc)
                |EndDocument
            "#,
            replacement_character = "\u{fffd}"
        )
        .as_bytes(),
        ParserConfig::new()
            .replace_unknown_entity_references(true),
        false,
    );
}

lazy_static! {
    // If PRINT_SPEC env variable is set, print the lines
    // to stderr instead of comparing with the output
    // it can be used like this:
    // PRINT_SPEC=1 cargo test --test event_reader sample_1_full 2> sample_1_full.txt
    static ref PRINT: bool = {
        for (key, value) in env::vars() {
            if key == "PRINT_SPEC" && value == "1" {
                return true;
            }
        }
        false
    };
}

// clones a lot but that's fine
fn trim_until_bar(s: String) -> String {
    match s.trim() {
        ts if ts.starts_with('|') => return ts[1..].to_owned(),
        _ => {}
    }
    s
}

fn test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool) {
    let mut reader = config.create_reader(input);
    let mut spec_lines = BufReader::new(output).lines()
        .map(|line| line.unwrap())
        .enumerate()
        .map(|(i, line)| (i, trim_until_bar(line)))
        .filter(|&(_, ref line)| !line.trim().is_empty());

    loop {
        let e = reader.next();
        let line =
            if test_position {
                format!("{} {}", reader.position(), Event(&e))
            } else {
                format!("{}", Event(&e))
            };

        if *PRINT {
            writeln!(&mut stderr(), "{}", line).unwrap();
        } else {
            if let Some((n, spec)) = spec_lines.next() {
                if line != spec {
                    const SPLITTER: &'static str = "-------------------";
                    panic!("\n{}\nUnexpected event at line {}:\nExpected: {}\nFound:    {}\n{}\n",
                           SPLITTER, n + 1, spec, line, std::str::from_utf8(output).unwrap());
                }
            } else {
                panic!("Unexpected event: {}", line);
            }
        }

        match e {
            Ok(XmlEvent::EndDocument) | Err(_) => break,
            _ => {},
        }
    }
}

// Here we define our own string representation of events so we don't depend
// on the specifics of Display implementation for XmlEvent and OwnedName.

struct Name<'a>(&'a OwnedName);

impl <'a> fmt::Display for Name<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        if let Some(ref namespace) = self.0.namespace {
            try! { write!(f, "{{{}}}", namespace) }
        }

        if let Some(ref prefix) = self.0.prefix {
            try! { write!(f, "{}:", prefix) }
        }

        write!(f, "{}", self.0.local_name)
    }
}

struct Event<'a>(&'a Result<XmlEvent>);

impl<'a> fmt::Display for Event<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let empty = String::new();
        match *self.0 {
            Ok(ref e) => match *e {
                XmlEvent::StartDocument { ref version, ref encoding, .. } =>
                    write!(f, "StartDocument({}, {})", version, encoding),
                XmlEvent::EndDocument =>
                    write!(f, "EndDocument"),
                XmlEvent::ProcessingInstruction { ref name, ref data } =>
                    write!(f, "ProcessingInstruction({}={:?})", name,
                        data.as_ref().unwrap_or(&empty)),
                XmlEvent::StartElement { ref name, ref attributes, .. } => {
                    if attributes.is_empty() {
                        write!(f, "StartElement({})", Name(name))
                    }
                    else {
                        let attrs: Vec<_> = attributes.iter()
                            .map(|a| format!("{}={:?}", Name(&a.name), a.value)) .collect();
                        write!(f, "StartElement({} [{}])", Name(name), attrs.join(", "))
                    }
                },
                XmlEvent::EndElement { ref name } =>
                    write!(f, "EndElement({})", Name(name)),
                XmlEvent::Comment(ref data) =>
                    write!(f, r#"Comment("{}")"#, data.escape_debug()),
                XmlEvent::CData(ref data) =>
                    write!(f, r#"CData("{}")"#, data.escape_debug()),
                XmlEvent::Characters(ref data) =>
                    write!(f, r#"Characters("{}")"#, data.escape_debug()),
                XmlEvent::Whitespace(ref data) =>
                    write!(f, r#"Whitespace("{}")"#, data.escape_debug()),
            },
            Err(ref e) => e.fmt(f),
        }
    }
}