// SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- // Copyright 2011-2020 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy // of the License at: // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations // under the License. // ---------------------------------------------------------------------------- /** * @brief Functions for generating partition tables on demand. */ #include "astc_codec_internals.h" /* Produce a canonicalized representation of a partition pattern The largest possible such representation is 432 bits, equal to 7 uint64_t values. */ static void gen_canonicalized_partition_table( int texel_count, const uint8_t* partition_table, uint64_t canonicalized[7] ) { int i; for (i = 0; i < 7; i++) canonicalized[i] = 0; int mapped_index[4]; int map_weight_count = 0; for (i = 0; i < 4; i++) mapped_index[i] = -1; for (i = 0; i < texel_count; i++) { int index = partition_table[i]; if (mapped_index[index] == -1) mapped_index[index] = map_weight_count++; uint64_t xlat_index = mapped_index[index]; canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F)); } } static int compare_canonicalized_partition_tables( const uint64_t part1[7], const uint64_t part2[7] ) { if (part1[0] != part2[0]) return 0; if (part1[1] != part2[1]) return 0; if (part1[2] != part2[2]) return 0; if (part1[3] != part2[3]) return 0; if (part1[4] != part2[4]) return 0; if (part1[5] != part2[5]) return 0; if (part1[6] != part2[6]) return 0; return 1; } /* For a partition table, detect partitionss that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode performance. */ static void partition_table_zap_equal_elements( int texel_count, partition_info* pi ) { int partition_tables_zapped = 0; int i, j; uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7]; for (i = 0; i < PARTITION_COUNT; i++) { gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7); } for (i = 0; i < PARTITION_COUNT; i++) { for (j = 0; j < i; j++) { if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j)) { pi[i].partition_count = 0; partition_tables_zapped++; break; } } } delete[]canonicalizeds; } static uint32_t hash52(uint32_t inp) { inp ^= inp >> 15; inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1) inp ^= inp >> 5; inp += inp << 16; inp ^= inp >> 7; inp ^= inp >> 3; inp ^= inp << 6; inp ^= inp >> 17; return inp; } static int select_partition( int seed, int x, int y, int z, int partitioncount, int small_block ) { if (small_block) { x <<= 1; y <<= 1; z <<= 1; } seed += (partitioncount - 1) * 1024; uint32_t rnum = hash52(seed); uint8_t seed1 = rnum & 0xF; uint8_t seed2 = (rnum >> 4) & 0xF; uint8_t seed3 = (rnum >> 8) & 0xF; uint8_t seed4 = (rnum >> 12) & 0xF; uint8_t seed5 = (rnum >> 16) & 0xF; uint8_t seed6 = (rnum >> 20) & 0xF; uint8_t seed7 = (rnum >> 24) & 0xF; uint8_t seed8 = (rnum >> 28) & 0xF; uint8_t seed9 = (rnum >> 18) & 0xF; uint8_t seed10 = (rnum >> 22) & 0xF; uint8_t seed11 = (rnum >> 26) & 0xF; uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; // squaring all the seeds in order to bias their distribution // towards lower values. seed1 *= seed1; seed2 *= seed2; seed3 *= seed3; seed4 *= seed4; seed5 *= seed5; seed6 *= seed6; seed7 *= seed7; seed8 *= seed8; seed9 *= seed9; seed10 *= seed10; seed11 *= seed11; seed12 *= seed12; int sh1, sh2, sh3; if (seed & 1) { sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5); } else { sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5); } sh3 = (seed & 0x10) ? sh1 : sh2; seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2; seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2; seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3; int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); // apply the saw a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; // remove some of the components if we are to output < 4 partitions. if (partitioncount <= 3) d = 0; if (partitioncount <= 2) c = 0; if (partitioncount <= 1) b = 0; int partition; if (a >= b && a >= c && a >= d) partition = 0; else if (b >= c && b >= d) partition = 1; else if (c >= d) partition = 2; else partition = 3; return partition; } static void generate_one_partition_table( const block_size_descriptor* bsd, int partition_count, int partition_index, partition_info* pt ) { int texels_per_block = bsd->texel_count; int small_block = texels_per_block < 32; uint8_t *partition_of_texel = pt->partition_of_texel; int x, y, z, i; for (z = 0; z < bsd->zdim; z++) for (y = 0; y < bsd->ydim; y++) for (x = 0; x < bsd->xdim; x++) { uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block); *partition_of_texel++ = part; } int counts[4]; for (i = 0; i < 4; i++) counts[i] = 0; for (i = 0; i < texels_per_block; i++) { int partition = pt->partition_of_texel[i]; counts[partition]++; } if (counts[0] == 0) pt->partition_count = 0; else if (counts[1] == 0) pt->partition_count = 1; else if (counts[2] == 0) pt->partition_count = 2; else if (counts[3] == 0) pt->partition_count = 3; else pt->partition_count = 4; } /* Public function, see header file for detailed documentation */ void init_partition_tables( block_size_descriptor* bsd ) { partition_info *par_tab2 = bsd->partitions; partition_info *par_tab3 = par_tab2 + PARTITION_COUNT; partition_info *par_tab4 = par_tab3 + PARTITION_COUNT; partition_info *par_tab1 = par_tab4 + PARTITION_COUNT; generate_one_partition_table(bsd, 1, 0, par_tab1); for (int i = 0; i < 1024; i++) { generate_one_partition_table(bsd, 2, i, par_tab2 + i); generate_one_partition_table(bsd, 3, i, par_tab3 + i); generate_one_partition_table(bsd, 4, i, par_tab4 + i); } partition_table_zap_equal_elements(bsd->texel_count, par_tab2); partition_table_zap_equal_elements(bsd->texel_count, par_tab3); partition_table_zap_equal_elements(bsd->texel_count, par_tab4); }