third_party/rust/lalrpop/src/lr1/build/test.rs
author Dorel Luca <dluca@mozilla.com>
Sat, 12 Jan 2019 03:43:46 +0200
changeset 453620 def9811f0311
parent 453619 3c4b8e03e722
child 460816 c5e856d5edba
permissions -rw-r--r--
Backed out 2 changesets (bug 1516337) for build bustage. CLOSED TREE Backed out changeset 3c4b8e03e722 (bug 1516337) Backed out changeset 4fc377013db5 (bug 1516337)

use string_cache::DefaultAtom as Atom;
use generate;
use grammar::repr::*;
use test_util::{compare, expect_debug, normalized_grammar};
use lr1::core::*;
use lr1::interpret::interpret;
use lr1::lookahead::Token;
use lr1::lookahead::Token::EOF;
use lr1::lookahead::TokenSet;
use lr1::tls::Lr1Tls;
use tls::Tls;

use super::{use_lane_table, build_lr0_states, build_lr1_states, LR};

fn nt(t: &str) -> NonterminalString {
    NonterminalString(Atom::from(t))
}

const ITERATIONS: usize = 22;

fn random_test<'g>(grammar: &Grammar, states: &'g [LR1State<'g>], start_symbol: NonterminalString) {
    for i in 0..ITERATIONS {
        let input_tree = generate::random_parse_tree(grammar, start_symbol.clone());
        let output_tree = interpret(&states, input_tree.terminals()).unwrap();

        println!("test {}", i);
        println!("input_tree = {}", input_tree);
        println!("output_tree = {}", output_tree);

        compare(output_tree, input_tree);
    }
}

macro_rules! tokens {
    ($($x:expr),*) => {
        vec![$(TerminalString::quoted(Atom::from($x))),*]
    }
}

fn items<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Token) -> LR1Items<'g> {
    let set = TokenSet::from(la);
    let lr1: LR<TokenSet> = LR::new(&grammar, nt(nonterminal), set.clone());
    let items = lr1.transitive_closure(lr1.items(&nt(nonterminal), index, &set));
    items
}

#[test]
fn start_state() {
    let grammar = normalized_grammar(
        r#"
grammar;
    extern { enum Tok { "C" => .., "D" => .. } }
    A = B "C";
    B: Option<u32> = {
        "D" => Some(1),
        () => None
    };
"#,
    );
    let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
    let items = items(&grammar, "A", 0, EOF);
    expect_debug(
        items.vec,
        r#"[
    A = (*) B "C" [EOF],
    B = (*) ["C"],
    B = (*) "D" ["C"]
]"#,
    );
}

#[test]
fn start_state_1() {
    let grammar = normalized_grammar(
        r#"
grammar;
extern { enum Tok { "B1" => .., "C1" => .. } }
A = B C;
B: Option<u32> = {
    "B1" => Some(1),
    () => None
};
C: Option<u32> = {
    "C1" => Some(1),
    () => None
};
"#,
    );

    let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());

    expect_debug(
        items(&grammar, "A", 0, EOF).vec,
        r#"[
    A = (*) B C [EOF],
    B = (*) ["C1", EOF],
    B = (*) "B1" ["C1", EOF]
]"#,
    );

    expect_debug(
        items(&grammar, "A", 1, EOF).vec,
        r#"[
    A = B (*) C [EOF],
    C = (*) [EOF],
    C = (*) "C1" [EOF]
]"#,
    );
}

#[test]
fn expr_grammar1() {
    let _tls = Tls::test();

    let grammar = normalized_grammar(
        r#"
grammar;
    extern { enum Tok { "-" => .., "N" => .., "(" => .., ")" => .. } }

    S: () =
        E => ();

    E: () = {
        E "-" T => (),
        T => ()
    };

    T: () = {
        "N" => (),
        "(" E ")" => ()
    };
"#,
    );

    let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());

    // for now, just test that process does not result in an error
    // and yields expected number of states.
    let states = build_lr1_states(&grammar, nt("S")).unwrap();
    println!("{:#?}", states);
    assert_eq!(states.len(), if use_lane_table() { 9 } else { 16 });

    // execute it on some sample inputs.
    let tree = interpret(&states, tokens!["N", "-", "(", "N", "-", "N", ")"]).unwrap();
    assert_eq!(
        &format!("{}", tree)[..],
        r#"[S: [E: [E: [T: "N"]], "-", [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]]]"#
    );

    // incomplete:
    assert!(interpret(&states, tokens!["N", "-", "(", "N", "-", "N"]).is_err());

    // incomplete:
    assert!(interpret(&states, tokens!["N", "-"]).is_err());

    // unexpected character:
    assert!(interpret(&states, tokens!["N", "-", ")", "N", "-", "N", "("]).is_err());

    // parens first:
    let tree = interpret(&states, tokens!["(", "N", "-", "N", ")", "-", "N"]).unwrap();
    println!("{}", tree);
    assert_eq!(
        &format!("{}", tree)[..],
        r#"[S: [E: [E: [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]], "-", [T: "N"]]]"#
    );

    // run some random tests
    random_test(&grammar, &states, nt("S"));
}

#[test]
fn shift_reduce_conflict1() {
    let _tls = Tls::test();

    // This grammar gets a shift-reduce conflict because if the input
    // is "&" (*) "L", then we see two possibilities, and we must decide
    // between them:
    //
    // "&" (*) "L" E
    //  |       |  |
    //  +-------+--|
    //          |
    //          E
    //
    // or
    //
    // "&"      (*) "L"
    //  |            |
    //  |  OPT_L     E
    //  |   |        |
    //  +---+---+----+
    //          |
    //          E
    //
    // to some extent this may be a false conflict, in that inlined
    // rules would address it, but it's an interesting one for
    // producing a useful error message.

    let grammar = normalized_grammar(
        r#"
        grammar;
        extern { enum Tok { "L" => .., "&" => .., } }
        E: () = {
            "L",
            "&" OPT_L E
        };
        OPT_L: () = {
            (),
            "L"
        };
    "#,
    );

    let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());

    assert!(build_lr1_states(&grammar, nt("E")).is_err());
}

/// One of the few grammars that IS LR(0).
#[test]
fn lr0_expr_grammar_with_explicit_eof() {
    let _tls = Tls::test();

    let grammar = normalized_grammar(
        r#"
grammar;

S: () = E "$";

E: () = {
    E "-" T,
    T,
};

T: () = {
    "N",
    "(" E ")",
};
"#,
    );

    let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());

    // for now, just test that process does not result in an error
    // and yields expected number of states.
    let states = build_lr0_states(&grammar, nt("S")).unwrap();
    assert_eq!(states.len(), 10);
}

/// Without the artifical '$', grammar is not LR(0).
#[test]
fn lr0_expr_grammar_with_implicit_eof() {
    let _tls = Tls::test();

    let grammar = normalized_grammar(
        r#"
grammar;

S: () = E;

E: () = {
    E "-" T,
    T,
};

T: () = {
    "N",
    "(" E ")",
};
"#,
    );

    let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());

    build_lr0_states(&grammar, nt("S")).unwrap_err();
}

/// When we moved to storing items as (lr0 -> TokenSet) pairs, a bug
/// in the transitive closure routine could cause us to have `(Foo,
/// S0)` and `(Foo, S1)` as distinct items instead of `(Foo, S0|S1)`.
#[test]
fn issue_144() {
    let _tls = Tls::test();

    let grammar = normalized_grammar(
        r##"
grammar;

pub ForeignItem: () = {
  AttrsAndVis "item_foreign_fn",
  AttrsAndVis "unsafe" "item_foreign_fn",
};

AttrsAndVis: () = {
    MaybeOuterAttrs visibility,
};

MaybeOuterAttrs: () = {
    OuterAttrs,
    (),
};

visibility: () = {
  "pub",
  (),
};

OuterAttrs: () = {
    OuterAttr,
    OuterAttrs OuterAttr,
};

OuterAttr: () = {
    "#" "[" "]",
};

Ident: () = {
    "IDENT",
};

ty: () = {
    "ty"
};
"##,
    );

    let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
    build_lr1_states(&grammar, nt("ForeignItem")).unwrap();
}

// Not sure if this is the right spot
#[test]
fn match_grammar() {
    let _tls = Tls::test();

    let grammar = normalized_grammar(
        r#"
grammar;

match {
    r"(?i)select" => SELECT
} else {
    _
}

pub Query = SELECT r"[a-z]+";
"#,
    );

    let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());

    let states = build_lr0_states(&grammar, nt("Query")).expect("build states");
    println!("states: {:?}", states);
}