third_party/rust/zerotrie/tests/builder_test.rs
author Duncan McIntosh <dmcintosh@mozilla.com>
Wed, 09 Jul 2025 19:42:02 +0000 (5 hours ago)
changeset 795924 9ccc6a2267cbf69c621fec973bd28573c2a45a1f
parent 792559 59c51c0d8f5da56bd8d46451ae59a9f2de0774bd
permissions -rw-r--r--
Bug 1966586 - Reuse other browser windows when opening _blank links in Taskbar Tabs windows. r=nrishel This doesn't affect other tab additions, nor does it stop the tab bar from appearing altogether. The idea is that _if_ another tab is somehow made, the user should see it; but we should not create new tabs if we can avoid it. This also adds tests for opening URIs in popups and taskbar tabs to make it less likely that this breaks in future. Differential Revision: https://phabricator.services.mozilla.com/D253726
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use litemap::LiteMap;
use zerotrie::ZeroTriePerfectHash;
use zerotrie::ZeroTrieSimpleAscii;

mod testdata {
    include!("data/data.rs");
}

use testdata::strings_to_litemap;

const NON_EXISTENT_STRINGS: &[&str] = &[
    "a9PS", "ahsY", "ahBO", "a8IN", "xk8o", "xv1l", "xI2S", "618y", "d6My", "uszy",
];

macro_rules! assert_bytes_eq {
    ($len:literal, $a:expr, $b:expr) => {
        assert_eq!($len, $a.len());
        assert_eq!($a, $b);
    };
}

fn check_simple_ascii_trie<S>(items: &LiteMap<&[u8], usize>, trie: &ZeroTrieSimpleAscii<S>)
where
    S: AsRef<[u8]> + ?Sized,
{
    // Check that each item is in the trie
    for (k, v) in items.iter() {
        assert_eq!(trie.get(k), Some(*v));
    }
    // Check that some items are not in the trie
    for s in NON_EXISTENT_STRINGS.iter() {
        assert_eq!(trie.get(s.as_bytes()), None);
    }
    // Check that the iterator returns items in the same order as the LiteMap
    assert!(items
        .iter()
        .map(|(s, v)| (String::from_utf8(s.to_vec()).unwrap(), *v))
        .eq(trie.iter()));
    // Check that the const builder works
    let const_trie = ZeroTrieSimpleAscii::try_from_litemap_with_const_builder(items).unwrap();
    assert_eq!(trie.as_bytes(), const_trie.as_bytes());
}

fn check_phf_ascii_trie<S>(items: &LiteMap<&[u8], usize>, trie: &ZeroTriePerfectHash<S>)
where
    S: AsRef<[u8]> + ?Sized,
{
    // Check that each item is in the trie
    for (k, v) in items.iter() {
        assert_eq!(trie.get(k), Some(*v));
    }
    // Check that some items are not in the trie
    for s in NON_EXISTENT_STRINGS.iter() {
        assert_eq!(trie.get(s.as_bytes()), None);
    }
    // Check that the iterator returns the contents of the LiteMap
    // Note: Since the items might not be in order, we collect them into a new LiteMap
    let recovered_items: LiteMap<_, _> = trie.iter().collect();
    assert_eq!(
        items.to_borrowed_keys_values::<[u8], usize, Vec<_>>(),
        recovered_items.to_borrowed_keys_values()
    );
}

fn check_phf_bytes_trie<S>(items: &LiteMap<&[u8], usize>, trie: &ZeroTriePerfectHash<S>)
where
    S: AsRef<[u8]> + ?Sized,
{
    // Check that each item is in the trie
    for (k, v) in items.iter() {
        assert_eq!(trie.get(k), Some(*v), "{k:?}");
    }
    // Check that some items are not in the trie
    for s in NON_EXISTENT_STRINGS.iter() {
        assert_eq!(trie.get(s.as_bytes()), None, "{s:?}");
    }
    // Check that the iterator returns the contents of the LiteMap
    // Note: Since the items might not be in order, we collect them into a new LiteMap
    let recovered_items: LiteMap<_, _> = trie.iter().collect();
    assert_eq!(
        items.to_borrowed_keys_values::<[u8], usize, Vec<_>>(),
        recovered_items.to_borrowed_keys_values()
    );
}

#[test]
fn test_basic() {
    let lm1a: LiteMap<&[u8], usize> = testdata::basic::DATA_ASCII.iter().copied().collect();
    let lm1b: LiteMap<&[u8], usize> = lm1a.to_borrowed_keys();
    let lm2: LiteMap<&[u8], usize> = testdata::basic::DATA_UNICODE.iter().copied().collect();
    let lm3: LiteMap<&[u8], usize> = testdata::basic::DATA_BINARY.iter().copied().collect();

    let expected_bytes = testdata::basic::TRIE_ASCII;
    let trie = ZeroTrieSimpleAscii::try_from(&lm1a).unwrap();
    assert_bytes_eq!(26, trie.as_bytes(), expected_bytes);
    check_simple_ascii_trie(&lm1a, &trie);

    let trie = ZeroTriePerfectHash::try_from(&lm1b).unwrap();
    assert_bytes_eq!(26, trie.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&lm1a, &trie);

    let expected_bytes = testdata::basic::TRIE_UNICODE;
    let trie = ZeroTriePerfectHash::try_from(&lm2).unwrap();
    assert_bytes_eq!(39, trie.as_bytes(), expected_bytes);
    check_phf_bytes_trie(&lm2, &trie);

    let expected_bytes = testdata::basic::TRIE_BINARY;
    let trie = ZeroTriePerfectHash::try_from(&lm3).unwrap();
    assert_bytes_eq!(26, trie.as_bytes(), expected_bytes);
    check_phf_bytes_trie(&lm3, &trie);
}

#[test]
fn test_empty() {
    let trie = ZeroTrieSimpleAscii::try_from(&LiteMap::<&[u8], usize>::new_vec()).unwrap();
    assert_eq!(trie.byte_len(), 0);
    assert!(trie.is_empty());
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.as_bytes(), &[]);
}

#[test]
fn test_single_empty_value() {
    let litemap: LiteMap<&[u8], usize> = [
        (&b""[..], 10), //
    ]
    .into_iter()
    .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), Some(10));
    assert_eq!(trie.get(b"x"), None);
    let expected_bytes = &[0b10001010];
    assert_eq!(trie.as_bytes(), expected_bytes);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(1, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_single_byte_string() {
    let litemap: LiteMap<&[u8], usize> = [
        (&b"x"[..], 10), //
    ]
    .into_iter()
    .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"xy"), None);
    check_simple_ascii_trie(&litemap, &trie);
    let expected_bytes = &[b'x', 0b10001010];
    assert_bytes_eq!(2, trie.as_bytes(), expected_bytes);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(2, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_single_string() {
    let litemap: LiteMap<&[u8], usize> = [
        (&b"xyz"[..], 10), //
    ]
    .into_iter()
    .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"x"), None);
    assert_eq!(trie.get(b"xy"), None);
    assert_eq!(trie.get(b"xyzz"), None);
    check_simple_ascii_trie(&litemap, &trie);
    let expected_bytes = &[b'x', b'y', b'z', 0b10001010];
    assert_bytes_eq!(4, trie.as_bytes(), expected_bytes);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(4, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_prefix_strings() {
    let litemap: LiteMap<&[u8], usize> = [(&b"x"[..], 0), (b"xy", 1)].into_iter().collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"xyz"), None);
    check_simple_ascii_trie(&litemap, &trie);
    let expected_bytes = &[b'x', 0b10000000, b'y', 0b10000001];
    assert_bytes_eq!(4, trie.as_bytes(), expected_bytes);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(4, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_single_byte_branch() {
    let litemap: LiteMap<&[u8], usize> = [(&b"x"[..], 0), (b"y", 1)].into_iter().collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"xy"), None);
    check_simple_ascii_trie(&litemap, &trie);
    let expected_bytes = &[0b11000010, b'x', b'y', 1, 0b10000000, 0b10000001];
    assert_bytes_eq!(6, trie.as_bytes(), expected_bytes);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(6, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_multi_byte_branch() {
    let litemap: LiteMap<&[u8], usize> = [(&b"axb"[..], 0), (b"ayc", 1)].into_iter().collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"a"), None);
    assert_eq!(trie.get(b"ax"), None);
    assert_eq!(trie.get(b"ay"), None);
    check_simple_ascii_trie(&litemap, &trie);
    let expected_bytes = &[
        b'a', 0b11000010, b'x', b'y', 2, b'b', 0b10000000, b'c', 0b10000001,
    ];
    assert_bytes_eq!(9, trie.as_bytes(), expected_bytes);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(9, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_linear_varint_values() {
    let litemap: LiteMap<&[u8], usize> = [(&b""[..], 100), (b"x", 500), (b"xyz", 5000)]
        .into_iter()
        .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b"xy"), None);
    assert_eq!(trie.get(b"xz"), None);
    assert_eq!(trie.get(b"xyzz"), None);
    check_simple_ascii_trie(&litemap, &trie);
    let expected_bytes = &[0x90, 0x54, b'x', 0x93, 0x64, b'y', b'z', 0x90, 0x96, 0x78];
    assert_bytes_eq!(10, trie.as_bytes(), expected_bytes);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(10, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_bug() {
    let litemap: LiteMap<&[u8], usize> = [(&b"abc"[..], 100), (b"abcd", 500), (b"abcde", 5000)]
        .into_iter()
        .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b"ab"), None);
    assert_eq!(trie.get(b"abd"), None);
    assert_eq!(trie.get(b"abCD"), None);
    check_simple_ascii_trie(&litemap, &trie);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_varint_branch() {
    let chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
    let litemap: LiteMap<&[u8], usize> = (0..chars.len())
        .map(|i| (chars.get(i..i + 1).unwrap().as_bytes(), i))
        .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"ax"), None);
    assert_eq!(trie.get(b"ay"), None);
    check_simple_ascii_trie(&litemap, &trie);
    #[rustfmt::skip]
    let expected_bytes = &[
        0b11100000, // branch varint lead
        0x14,       // branch varint trail
        // search array:
        b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
        b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
        b'U', b'V', b'W', b'X', b'Y', b'Z',
        b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
        b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
        b'u', b'v', b'w', b'x', b'y', b'z',
        // offset array:
        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20,
        22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52,
        54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84,
        86,
        // single-byte values:
        0x80, (0x80 | 1), (0x80 | 2), (0x80 | 3), (0x80 | 4),
        (0x80 | 5), (0x80 | 6), (0x80 | 7), (0x80 | 8), (0x80 | 9),
        (0x80 | 10), (0x80 | 11), (0x80 | 12), (0x80 | 13), (0x80 | 14),
        (0x80 | 15),
        // multi-byte values:
        0x90, 0, 0x90, 1, 0x90, 2, 0x90, 3, 0x90, 4, 0x90, 5,
        0x90, 6, 0x90, 7, 0x90, 8, 0x90, 9, 0x90, 10, 0x90, 11,
        0x90, 12, 0x90, 13, 0x90, 14, 0x90, 15, 0x90, 16, 0x90, 17,
        0x90, 18, 0x90, 19, 0x90, 20, 0x90, 21, 0x90, 22, 0x90, 23,
        0x90, 24, 0x90, 25, 0x90, 26, 0x90, 27, 0x90, 28, 0x90, 29,
        0x90, 30, 0x90, 31, 0x90, 32, 0x90, 33, 0x90, 34, 0x90, 35,
    ];
    assert_bytes_eq!(193, trie.as_bytes(), expected_bytes);

    #[rustfmt::skip]
    let expected_bytes = &[
        0b11100000, // branch varint lead
        0x14,       // branch varint trail
        // PHF metadata:
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 10, 12, 16, 4, 4, 4, 4, 4, 4, 8,
        4, 4, 4, 16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 7,
        // search array:
        b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
        b'p', b'u', b'v', b'w', b'D', b'E', b'F', b'q',
        b'r', b'A', b'B', b'C', b'x', b'y', b'z', b's',
        b'H', b'I', b'J', b'G', b'P', b'Q', b'R', b'S',
        b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'K',
        b'L', b'M', b'N', b'O', b'g', b'a', b'b', b'c',
        b't', b'd', b'f', b'e',
        // offset array:
        2, 4, 6, 8, 10, 12, 14,
        16, 18, 20, 22, 24, 25, 26, 27,
        29, 31, 32, 33, 34, 36, 38, 40,
        42, 43, 44, 45, 46, 47, 49, 51,
        53, 55, 57, 59, 61, 63, 65, 67,
        68, 69, 70, 71, 72, 74, 76, 78,
        80, 82, 84, 86,
        // values:
        0x90, 17, 0x90, 18, 0x90, 19, 0x90, 20, 0x90, 21, 0x90, 22, 0x90, 23,
        0x90, 24, 0x90, 25, 0x90, 30, 0x90, 31, 0x90, 32, 0x80 | 3, 0x80 | 4,
        0x80 | 5, 0x90, 26, 0x90, 27, 0x80, 0x80 | 1, 0x80 | 2, 0x90, 33,
        0x90, 34, 0x90, 35, 0x90, 28, 0x80 | 7, 0x80 | 8, 0x80 | 9, 0x80 | 6,
        0x80 | 15, 0x90, 0, 0x90, 1, 0x90, 2, 0x90, 3, 0x90, 4, 0x90, 5,
        0x90, 6, 0x90, 7, 0x90, 8, 0x90, 9, 0x80 | 10, 0x80 | 11, 0x80 | 12,
        0x80 | 13, 0x80 | 14, 0x90, 16, 0x90, 10, 0x90, 11, 0x90, 12, 0x90, 29,
        0x90, 13, 0x90, 15, 0x90, 14,
    ];
    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(246, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);
}

#[test]
fn test_below_wide() {
    let litemap: LiteMap<&[u8], usize> = [
        (&b"abcdefghijklmnopqrstuvwxyz"[..], 1),
        (b"bcdefghijklmnopqrstuvwxyza", 2),
        (b"cdefghijklmnopqrstuvwxyzab", 3),
        (b"defghijklmnopqrstuvwxyzabc", 4),
        (b"efghijklmnopqrstuvwxyzabcd", 5),
        (b"fghijklmnopqrstuvwxyzabcde", 6),
        (b"ghijklmnopqrstuvwxyzabcdef", 7),
        (b"hijklmnopqrstuvwxyzabcdefg", 8),
        (b"ijklmnopqrstuvwxyzabcdefgh", 9),
        (b"jklmnopqrstuvwxyzabcd", 10),
    ]
    .into_iter()
    .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"abc"), None);
    check_simple_ascii_trie(&litemap, &trie);
    #[rustfmt::skip]
    let expected_bytes = &[
        0b11001010, // branch
        // search array:
        b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
        // offset array:
        26, 52, 78, 104, 130, 156, 182, 208, 234,
        // offset data:
        b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
        b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
        0x81,
        b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
        b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a',
        0x82,
        b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
        b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b',
        0x83,
        b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q',
        b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c',
        0x84,
        b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r',
        b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd',
        0x85,
        b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
        b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e',
        0x86,
        b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
        b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f',
        0x87,
        b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u',
        b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
        0x88,
        b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
        b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h',
        0x89,
        b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
        b'x', b'y', b'z', b'a', b'b', b'c', b'd',
        0x8A,
    ];
    assert_bytes_eq!(275, trie.as_bytes(), expected_bytes);
}

#[test]
fn test_at_wide() {
    let litemap: LiteMap<&[u8], usize> = [
        (&b"abcdefghijklmnopqrstuvwxyz"[..], 1),
        (b"bcdefghijklmnopqrstuvwxyza", 2),
        (b"cdefghijklmnopqrstuvwxyzab", 3),
        (b"defghijklmnopqrstuvwxyzabc", 4),
        (b"efghijklmnopqrstuvwxyzabcd", 5),
        (b"fghijklmnopqrstuvwxyzabcde", 6),
        (b"ghijklmnopqrstuvwxyzabcdef", 7),
        (b"hijklmnopqrstuvwxyzabcdefg", 8),
        (b"ijklmnopqrstuvwxyzabcdefgh", 9),
        (b"jklmnopqrstuvwxyzabcde", 10),
    ]
    .into_iter()
    .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"abc"), None);
    check_simple_ascii_trie(&litemap, &trie);
    #[rustfmt::skip]
    let expected_bytes = &[
        0b11100001, // branch lead
        0x6A, // branch trail
        // search array:
        b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
        // offset array (wide):
        0, 0, 0, 0, 0, 0, 0, 0, 0,
        26, 52, 78, 104, 130, 156, 182, 208, 234,
        // offset data:
        b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
        b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
        0x81,
        b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
        b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a',
        0x82,
        b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
        b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b',
        0x83,
        b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q',
        b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c',
        0x84,
        b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r',
        b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd',
        0x85,
        b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
        b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e',
        0x86,
        b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
        b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f',
        0x87,
        b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u',
        b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
        0x88,
        b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
        b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h',
        0x89,
        b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
        b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e',
        0x8A,
    ];
    assert_bytes_eq!(286, trie.as_bytes(), expected_bytes);
}

#[test]
fn test_at_wide_plus() {
    let litemap: LiteMap<&[u8], usize> = [
        (&b"abcdefghijklmnopqrstuvwxyz"[..], 1),
        (b"bcdefghijklmnopqrstuvwxyza", 2),
        (b"cdefghijklmnopqrstuvwxyzab", 3),
        (b"defghijklmnopqrstuvwxyzabc", 4),
        (b"efghijklmnopqrstuvwxyzabcd", 5),
        (b"fghijklmnopqrstuvwxyzabcde", 6),
        (b"ghijklmnopqrstuvwxyzabcdef", 7),
        (b"hijklmnopqrstuvwxyzabcdefg", 8),
        (b"ijklmnopqrstuvwxyzabcdefgh", 9),
        (b"jklmnopqrstuvwxyzabcdef", 10),
    ]
    .into_iter()
    .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), None);
    assert_eq!(trie.get(b"abc"), None);
    check_simple_ascii_trie(&litemap, &trie);
    #[rustfmt::skip]
    let expected_bytes = &[
        0b11100001, // branch lead
        0x6A, // branch trail
        // search array:
        b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
        // offset array (wide):
        0, 0, 0, 0, 0, 0, 0, 0, 0,
        26, 52, 78, 104, 130, 156, 182, 208, 234,
        // offset data:
        b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
        b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
        0x81,
        b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
        b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a',
        0x82,
        b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
        b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b',
        0x83,
        b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q',
        b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c',
        0x84,
        b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r',
        b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd',
        0x85,
        b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
        b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e',
        0x86,
        b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
        b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f',
        0x87,
        b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u',
        b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
        0x88,
        b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
        b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h',
        0x89,
        b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
        b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f',
        0x8A,
    ];
    assert_bytes_eq!(287, trie.as_bytes(), expected_bytes);
}

#[test]
fn test_everything() {
    let litemap: LiteMap<&[u8], usize> = [
        (&b""[..], 0),
        (b"axb", 100),
        (b"ayc", 2),
        (b"azd", 3),
        (b"bxe", 4),
        (b"bxefg", 500),
        (b"bxefh", 6),
        (b"bxei", 7),
        (b"bxeikl", 8),
    ]
    .into_iter()
    .collect();
    let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
    assert_eq!(trie.get(b""), Some(0));
    assert_eq!(trie.get(b"a"), None);
    assert_eq!(trie.get(b"ax"), None);
    assert_eq!(trie.get(b"ay"), None);
    check_simple_ascii_trie(&litemap, &trie);
    let expected_bytes = &[
        0b10000000, // value 0
        0b11000010, // branch of 2
        b'a',       //
        b'b',       //
        13,         //
        0b11000011, // branch of 3
        b'x',       //
        b'y',       //
        b'z',       //
        3,          //
        5,          //
        b'b',       //
        0b10010000, // value 100 (lead)
        0x54,       // value 100 (trail)
        b'c',       //
        0b10000010, // value 2
        b'd',       //
        0b10000011, // value 3
        b'x',       //
        b'e',       //
        0b10000100, // value 4
        0b11000010, // branch of 2
        b'f',       //
        b'i',       //
        7,          //
        0b11000010, // branch of 2
        b'g',       //
        b'h',       //
        2,          //
        0b10010011, // value 500 (lead)
        0x64,       // value 500 (trail)
        0b10000110, // value 6
        0b10000111, // value 7
        b'k',       //
        b'l',       //
        0b10001000, // value 8
    ];
    assert_bytes_eq!(36, trie.as_bytes(), expected_bytes);

    #[rustfmt::skip]
    let expected_bytes = &[
        0b10000000, // value 0
        0b11000010, // branch of 2
        b'a',       //
        b'b',       //
        13,         //
        0b11000011, // start of 'a' subtree: branch of 3
        b'x',       //
        b'y',       //
        b'z',       //
        3,          //
        5,          //
        b'b',       //
        0b10010000, // value 100 (lead)
        0x54,       // value 100 (trail)
        b'c',       //
        0b10000010, // value 2
        b'd',       //
        0b10000011, // value 3
        b'x',       // start of 'b' subtree
        b'e',       //
        0b10000100, // value 4
        0b11000010, // branch of 2
        b'f',       //
        b'i',       //
        7,          //
        0b11000010, // branch of 2
        b'g',       //
        b'h',       //
        2,          //
        0b10010011, // value 500 (lead)
        0x64,       // value 500 (trail)
        0b10000110, // value 6
        0b10000111, // value 7
        b'k',       //
        b'l',       //
        0b10001000, // value 8
    ];
    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_bytes_eq!(36, trie_phf.as_bytes(), expected_bytes);
    check_phf_ascii_trie(&litemap, &trie_phf);

    let zhm: zerovec::ZeroMap<[u8], u32> = litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 88);

    let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 61);

    let zhm: zerovec::ZeroHashMap<[u8], u32> =
        litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 161);

    let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 134);
}

macro_rules! utf8_byte {
    ($ch:expr, $i:literal) => {{
        let mut utf8_encoder_buf = [0u8; 4];
        $ch.encode_utf8(&mut utf8_encoder_buf);
        utf8_encoder_buf[$i]
    }};
}

#[test]
fn test_non_ascii() {
    let litemap: LiteMap<&[u8], usize> = [
        ("".as_bytes(), 0),
        ("axb".as_bytes(), 100),
        ("ayc".as_bytes(), 2),
        ("azd".as_bytes(), 3),
        ("bxe".as_bytes(), 4),
        ("bxefg".as_bytes(), 500),
        ("bxefh".as_bytes(), 6),
        ("bxei".as_bytes(), 7),
        ("bxeikl".as_bytes(), 8),
        ("bxeiklmΚαλημέρααα".as_bytes(), 9),
        ("bxeiklmαnλo".as_bytes(), 10),
        ("bxeiklmη".as_bytes(), 11),
    ]
    .into_iter()
    .collect();

    #[rustfmt::skip]
    let expected_bytes = &[
        0b10000000, // value 0
        0b11000010, // branch of 2
        b'a',       //
        b'b',       //
        13,         //
        0b11000011, // start of 'a' subtree: branch of 3
        b'x',       //
        b'y',       //
        b'z',       //
        3,          //
        5,          //
        b'b',       //
        0b10010000, // value 100 (lead)
        0x54,       // value 100 (trail)
        b'c',       //
        0b10000010, // value 2
        b'd',       //
        0b10000011, // value 3
        b'x',       // start of 'b' subtree
        b'e',       //
        0b10000100, // value 4
        0b11000010, // branch of 2
        b'f',       //
        b'i',       //
        7,          //
        0b11000010, // branch of 2
        b'g',       //
        b'h',       //
        2,          //
        0b10010011, // value 500 (lead)
        0x64,       // value 500 (trail)
        0b10000110, // value 6
        0b10000111, // value 7
        b'k',       //
        b'l',       //
        0b10001000, // value 8
        b'm',       //
        0b10100001, // span of length 1
        utf8_byte!('Κ', 0), // NOTE: all three letters have the same lead byte
        0b11000011, // branch of 3
        utf8_byte!('Κ', 1),
        utf8_byte!('α', 1),
        utf8_byte!('η', 1),
        21,
        27,
        0b10110000, // span of length 18 (lead)
        0b00000010, // span of length 18 (trail)
        utf8_byte!('α', 0),
        utf8_byte!('α', 1),
        utf8_byte!('λ', 0),
        utf8_byte!('λ', 1),
        utf8_byte!('η', 0),
        utf8_byte!('η', 1),
        utf8_byte!('μ', 0),
        utf8_byte!('μ', 1),
        utf8_byte!('έ', 0),
        utf8_byte!('έ', 1),
        utf8_byte!('ρ', 0),
        utf8_byte!('ρ', 1),
        utf8_byte!('α', 0),
        utf8_byte!('α', 1),
        utf8_byte!('α', 0),
        utf8_byte!('α', 1),
        utf8_byte!('α', 0),
        utf8_byte!('α', 1),
        0b10001001, // value 9
        b'n',
        0b10100010, // span of length 2
        utf8_byte!('λ', 0),
        utf8_byte!('λ', 1),
        b'o',
        0b10001010, // value 10
        0b10001011, // value 11
    ];
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap();
    assert_bytes_eq!(73, trie_phf.as_bytes(), expected_bytes);
    check_phf_bytes_trie(&litemap, &trie_phf);
}

#[test]
fn test_max_branch() {
    // Evaluate a branch with all 256 possible children
    let mut litemap: LiteMap<&[u8], usize> = LiteMap::new_vec();
    let all_bytes: Vec<u8> = (u8::MIN..=u8::MAX).collect();
    assert_eq!(all_bytes.len(), 256);
    let all_bytes_prefixed: Vec<[u8; 2]> = (u8::MIN..=u8::MAX).map(|x| [b'\0', x]).collect();
    for b in all_bytes.iter() {
        litemap.insert(core::slice::from_ref(b), *b as usize);
    }
    for s in all_bytes_prefixed.iter() {
        litemap.insert(s, s[1] as usize);
    }
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap();
    assert_eq!(trie_phf.byte_len(), 3042);
    check_phf_bytes_trie(&litemap, &trie_phf);
}

#[test]
fn test_short_subtags_10pct() {
    let litemap = strings_to_litemap(testdata::short_subtags_10pct::STRINGS);

    let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
    assert_eq!(trie.byte_len(), 1050);
    check_simple_ascii_trie(&litemap, &trie);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_eq!(trie_phf.byte_len(), 1100);
    check_phf_ascii_trie(&litemap, &trie_phf);

    let zhm: zerovec::ZeroMap<[u8], u32> = litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 1890);

    let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 1326);

    let zhm: zerovec::ZeroHashMap<[u8], u32> =
        litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 3396);

    let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 2832);
}

#[test]
fn test_short_subtags() {
    let litemap = strings_to_litemap(testdata::short_subtags::STRINGS);

    let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
    assert_eq!(trie.byte_len(), 8793);
    check_simple_ascii_trie(&litemap, &trie);

    let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
    let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
    assert_eq!(trie_phf.byte_len(), 9400);
    check_phf_ascii_trie(&litemap, &trie_phf);

    let zm: zerovec::ZeroMap<[u8], u32> = litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
    let zhm_buf = postcard::to_allocvec(&zm).unwrap();
    assert_eq!(zhm_buf.len(), 18931);

    let zm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
    let zhm_buf = postcard::to_allocvec(&zm).unwrap();
    assert_eq!(zhm_buf.len(), 13300);

    let zhm: zerovec::ZeroHashMap<[u8], u32> =
        litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 33949);

    let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
    let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
    assert_eq!(zhm_buf.len(), 28318);
}