157 lines
4.5 KiB
Python
Executable File
157 lines
4.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Copyright 2020 The Pigweed Authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
# use this file except in compliance with the License. You may obtain a copy of
|
|
# the License at
|
|
#
|
|
# https://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations under
|
|
# the License.
|
|
"""Generates test data for hash_test.cc."""
|
|
|
|
import datetime
|
|
import os
|
|
import random
|
|
|
|
from pw_tokenizer import tokens
|
|
|
|
HASH_LENGTHS = 80, 96, 128
|
|
HASH_MACRO = 'PW_TOKENIZER_65599_FIXED_LENGTH_{}_HASH'
|
|
|
|
FILE_HEADER = """\
|
|
// Copyright {year} The Pigweed Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
// use this file except in compliance with the License. You may obtain a copy of
|
|
// the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
// License for the specific language governing permissions and limitations under
|
|
// the License.
|
|
|
|
// AUTOGENERATED - DO NOT EDIT
|
|
//
|
|
// This file was generated by {script}.
|
|
// To make changes, update the script and run it to generate new files.
|
|
#pragma once
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <string_view>
|
|
|
|
{includes}
|
|
|
|
namespace pw::tokenizer {{
|
|
|
|
// Test a series of generated test cases.
|
|
inline constexpr struct {{
|
|
std::string_view string;
|
|
size_t hash_length;
|
|
uint32_t python_calculated_hash;
|
|
uint32_t macro_calculated_hash; // clang-format off
|
|
}} kHashTests[] = {{
|
|
|
|
"""
|
|
|
|
FILE_FOOTER = """
|
|
}; // kHashTests
|
|
|
|
// clang-format on
|
|
|
|
} // namespace pw::tokenizer
|
|
"""
|
|
|
|
_TEST_CASE = """{{
|
|
std::string_view("{str}", {string_length}u),
|
|
{hash_length}u, // fixed hash length
|
|
UINT32_C({hash}), // Python-calculated hash
|
|
{macro}("{str}"), // macro-calculated hash
|
|
}},
|
|
"""
|
|
|
|
|
|
def _include_paths(lengths):
|
|
return '\n'.join(
|
|
sorted(
|
|
'#include "pw_tokenizer/internal/'
|
|
'pw_tokenizer_65599_fixed_length_{}_hash_macro.h"'.format(length)
|
|
for length in lengths))
|
|
|
|
|
|
def _test_case_at_length(data, hash_length):
|
|
"""Generates a test case for a particular hash length."""
|
|
|
|
if isinstance(data, str):
|
|
data = data.encode()
|
|
|
|
if all(ord(' ') <= b <= ord('~') for b in data):
|
|
escaped_str = data.decode().replace('"', r'\"')
|
|
else:
|
|
escaped_str = ''.join(r'\x{:02x}'.format(b) for b in data)
|
|
|
|
return _TEST_CASE.format(str=escaped_str,
|
|
string_length=len(data),
|
|
hash_length=hash_length,
|
|
hash=tokens.pw_tokenizer_65599_hash(
|
|
data, hash_length),
|
|
macro=HASH_MACRO.format(hash_length))
|
|
|
|
|
|
def test_case(data):
|
|
return ''.join(
|
|
_test_case_at_length(data, length) for length in (80, 96, 128))
|
|
|
|
|
|
def generate_test_cases():
|
|
yield test_case('')
|
|
yield test_case(b'\xa1')
|
|
yield test_case(b'\xff')
|
|
yield test_case('\0')
|
|
yield test_case('\0\0')
|
|
yield test_case('a')
|
|
yield test_case('A')
|
|
yield test_case('hello, "world"')
|
|
yield test_case('YO' * 100)
|
|
|
|
random.seed(600613)
|
|
|
|
random_string = lambda size: bytes(
|
|
random.randrange(256) for _ in range(size))
|
|
|
|
for i in range(1, 16):
|
|
yield test_case(random_string(i))
|
|
yield test_case(random_string(i))
|
|
|
|
for length in HASH_LENGTHS:
|
|
yield test_case(random_string(length - 1))
|
|
yield test_case(random_string(length))
|
|
yield test_case(random_string(length + 1))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
path = os.path.realpath(
|
|
os.path.join(os.path.dirname(__file__), '..', 'pw_tokenizer_private',
|
|
'generated_hash_test_cases.h'))
|
|
|
|
with open(path, 'w') as output:
|
|
output.write(
|
|
FILE_HEADER.format(year=datetime.date.today().year,
|
|
script=os.path.basename(__file__),
|
|
includes=_include_paths(HASH_LENGTHS)))
|
|
|
|
for case in generate_test_cases():
|
|
output.write(case)
|
|
|
|
output.write(FILE_FOOTER)
|
|
|
|
print('Wrote test data to', path)
|