3420 lines
132 KiB
C
3420 lines
132 KiB
C
/******************************************************************************
|
|
*
|
|
* Copyright (C) 2018 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*****************************************************************************
|
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
*/
|
|
/*!
|
|
******************************************************************************
|
|
* \file hme_defs.h
|
|
*
|
|
* \brief
|
|
* Important definitions, enumerations, macros and structures used by ME
|
|
*
|
|
* \date
|
|
* 18/09/2012
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
|
|
#ifndef _HME_DEFS_H_
|
|
#define _HME_DEFS_H_
|
|
|
|
/*****************************************************************************/
|
|
/* Constant Macros */
|
|
/*****************************************************************************/
|
|
/**
|
|
*******************************************************************************
|
|
@brief Blk size of the CTB in the max possible case
|
|
*******************************************************************************
|
|
*/
|
|
#define CTB_BLK_SIZE 64
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Maximun number of results per partition
|
|
*******************************************************************************
|
|
*/
|
|
#define MAX_RESULTS_PER_PART 2
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Not used currently
|
|
*******************************************************************************
|
|
*/
|
|
#define MAX_NUM_UNIFIED_RESULTS 10
|
|
#define MAX_NUM_CTB_NODES 10
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief For 64x64 CTB, we have 16x16 MV grid for prediction purposes (cost calc)
|
|
This has 1 padding at boundaries for causal neighbours
|
|
*******************************************************************************
|
|
*/
|
|
#define CTB_MV_GRID_PAD 1
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief number of bits per bin
|
|
*******************************************************************************
|
|
*/
|
|
#define HME_CABAC_BITS_PER_BIN 0.5
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief bin count to bit count conversion
|
|
*******************************************************************************
|
|
*/
|
|
#define HME_GET_CAB_BIT(x) (U08(((x)*HME_CABAC_BITS_PER_BIN + 0.5)))
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Columns in the MV grid
|
|
*******************************************************************************
|
|
*/
|
|
#define NUM_COLUMNS_IN_CTB_GRID (((CTB_BLK_SIZE) >> 2) + (2 * CTB_MV_GRID_PAD))
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Rows in MV grid
|
|
*******************************************************************************
|
|
*/
|
|
#define NUM_ROWS_IN_CTB_GRID (NUM_COLUMNS_IN_CTB_GRID)
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Total number of MVs held in CTB grid for prediction pourposes
|
|
*******************************************************************************
|
|
*/
|
|
#define NUM_MVS_IN_CTB_GRID ((NUM_COLUMNS_IN_CTB_GRID) * (NUM_ROWS_IN_CTB_GRID))
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Max number of candidates used for refinement during CU merge stage
|
|
*******************************************************************************
|
|
*/
|
|
#define MAX_MERGE_CANDTS 64
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief For BIDIR refinement, we use 2I-P0 as input, done max at CTB level, so
|
|
stride for this input is 64
|
|
*******************************************************************************
|
|
*/
|
|
#define BACK_PREDICTION_INPUT_STRIDE 64
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief We basically store an impossible and unique MV to identify intra blks
|
|
or CUs
|
|
*******************************************************************************
|
|
*/
|
|
#define INTRA_MV 0x4000
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Defines the largest CTB supported by HME
|
|
*******************************************************************************
|
|
*/
|
|
#define HME_MAX_CTB_SIZE 64
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Maximum number of 16x16 blks possible in a CTB. The basic search unit
|
|
in the encode layer is 16x16
|
|
*******************************************************************************
|
|
*/
|
|
#define HME_MAX_16x16_IN_CTB ((HME_MAX_CTB_SIZE >> 4) * (HME_MAX_CTB_SIZE >> 4))
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Max number of 8x8s possible in a CTB, this in other words is also the
|
|
maximum number of CUs possible in a CTB
|
|
*******************************************************************************
|
|
*/
|
|
#define HME_MAX_8x8_IN_CTB ((HME_MAX_CTB_SIZE >> 3) * (HME_MAX_CTB_SIZE >> 3))
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Maximum number of init candts supported for refinement search.
|
|
*******************************************************************************
|
|
*/
|
|
#define MAX_INIT_CANDTS 60
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Maximum MV in X and Y directions in fullpel units allowed in any layer
|
|
Any computed range for MV hasto be within this
|
|
*******************************************************************************
|
|
*/
|
|
#define MAX_MV_X_FINEST 1024
|
|
#define MAX_MV_Y_FINEST 512
|
|
|
|
#define MAX_NUM_RESULTS 10
|
|
|
|
#define USE_MODIFIED 1
|
|
|
|
#define ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 1
|
|
|
|
#define ENABLE_EXPLICIT_SEARCH_IN_PQ 0
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Driven by reasoning that we can tolerate an error of 4 in global mv
|
|
in coarsest layer per comp, assuming we have search range of 1024x512, the mv
|
|
range in coarse layer is 128x64, total bins is then 256/4 x 128/4 or 2K bins
|
|
*******************************************************************************
|
|
*/
|
|
#define LOG_MAX_NUM_BINS 11
|
|
#define MAX_NUM_BINS (1 << LOG_MAX_NUM_BINS)
|
|
|
|
#define NEXT_BLOCK_OFFSET_IN_L0_ME 22
|
|
|
|
#define PREV_BLOCK_OFFSET_IN_L0_ME 6
|
|
|
|
#define COLOCATED_BLOCK_OFFSET 2
|
|
|
|
#define COLOCATED_4X4_NEXT_BLOCK_OFFSET 14
|
|
|
|
#define MAP_X_MAX 16
|
|
|
|
#define MAP_Y_MAX 16
|
|
|
|
#define NUM_POINTS_IN_RECTANGULAR_GRID 9
|
|
|
|
/*
|
|
******************************************************************************
|
|
@brief Maximum number of elements in the sigmaX and sigmaX-Square array
|
|
computed at 4x4 level for any CU size
|
|
******************************************************************************
|
|
*/
|
|
#define MAX_NUM_SIGMAS_4x4 256
|
|
|
|
/*****************************************************************************/
|
|
/* Function Macros */
|
|
/*****************************************************************************/
|
|
|
|
/**
|
|
*******************************************************************************
|
|
@brief Calculates number of blks in picture, given width, ht, and a variable
|
|
shift that controls basic blk size
|
|
*******************************************************************************
|
|
*/
|
|
#define GET_NUM_BLKS_IN_PIC(wd, ht, shift, num_cols, num_blks) \
|
|
{ \
|
|
S32 y, rnd; \
|
|
rnd = (1 << shift) - 1; \
|
|
num_cols = (wd + rnd) >> shift; \
|
|
y = (ht + rnd) >> shift; \
|
|
num_blks = num_cols * y; \
|
|
}
|
|
|
|
#define COUNT_CANDS(a, b) \
|
|
{ \
|
|
b = (((a) & (1))) + (((a >> 1) & (1))) + (((a >> 2) & (1))) + (((a >> 3) & (1))) + \
|
|
(((a >> 4) & (1))) + (((a >> 5) & (1))) + (((a >> 6) & (1))) + (((a >> 7) & (1))) + \
|
|
(((a >> 8) & (1))); \
|
|
}
|
|
|
|
#define COPY_MV_TO_SEARCH_NODE(node, mv, pref, refid, shift) \
|
|
{ \
|
|
(node)->s_mv.i2_mvx = (mv)->i2_mv_x; \
|
|
(node)->s_mv.i2_mvy = (mv)->i2_mv_y; \
|
|
(node)->i1_ref_idx = *pref; \
|
|
(node)->u1_is_avail = 1; \
|
|
\
|
|
/* Can set the availability flag for MV Pred purposes */ \
|
|
if(((node)->i1_ref_idx < 0) || ((node)->s_mv.i2_mvx == INTRA_MV)) \
|
|
{ \
|
|
(node)->u1_is_avail = 0; \
|
|
(node)->i1_ref_idx = refid; \
|
|
(node)->s_mv.i2_mvx = 0; \
|
|
(node)->s_mv.i2_mvy = 0; \
|
|
} \
|
|
(node)->s_mv.i2_mvx >>= (shift); \
|
|
(node)->s_mv.i2_mvy >>= (shift); \
|
|
(node)->u1_subpel_done = (shift) ? 0 : 1; \
|
|
}
|
|
|
|
#define COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance) \
|
|
{ \
|
|
S32 mvx_q8 = (ps_mv)->mvx << 8; \
|
|
S32 mvy_q8 = (ps_mv)->mvy << 8; \
|
|
S32 mvcx_q8 = (ps_data)->s_centroid.i4_pos_x_q8; \
|
|
S32 mvcy_q8 = (ps_data)->s_centroid.i4_pos_y_q8; \
|
|
\
|
|
S32 mvdx_q8 = mvx_q8 - mvcx_q8; \
|
|
S32 mvdy_q8 = mvy_q8 - mvcy_q8; \
|
|
\
|
|
S32 mvdx = (mvdx_q8 + (1 << 7)) >> 8; \
|
|
S32 mvdy = (mvdy_q8 + (1 << 7)) >> 8; \
|
|
\
|
|
S32 mvd = ABS(mvdx) + ABS(mvdy); \
|
|
\
|
|
cumulative_mv_distance += mvd; \
|
|
}
|
|
|
|
#define STATS_COLLECTOR_MV_INSERT( \
|
|
ps_mv_store, num_mvs_stored, mvx_cur, mvy_cur, stats_struct, check_for_duplicate, ref_idx) \
|
|
{ \
|
|
S32 i4_j; \
|
|
(stats_struct).f_num_cands_being_processed++; \
|
|
check_for_duplicate = 0; \
|
|
\
|
|
for(i4_j = 0; i4_j < (num_mvs_stored); i4_j++) \
|
|
{ \
|
|
if(((ps_mv_store)[i4_j].s_mv.i2_mvx == (mvx_cur)) && \
|
|
((ps_mv_store)[i4_j].s_mv.i2_mvy == (mvy_cur)) && \
|
|
((ps_mv_store)[i4_j].i1_ref_idx == ref_idx)) \
|
|
{ \
|
|
(stats_struct).f_num_duplicates_amongst_processed++; \
|
|
check_for_duplicate = 0; \
|
|
break; \
|
|
} \
|
|
} \
|
|
\
|
|
if(i4_j == (num_mvs_stored)) \
|
|
{ \
|
|
(ps_mv_store)[i4_j].s_mv.i2_mvx = (mvx_cur); \
|
|
(ps_mv_store)[i4_j].s_mv.i2_mvy = (mvy_cur); \
|
|
(ps_mv_store)[i4_j].i1_ref_idx = ref_idx; \
|
|
(num_mvs_stored)++; \
|
|
} \
|
|
}
|
|
|
|
#define UPDATE_CLUSTER_METADATA_POST_MERGE(ps_cluster) \
|
|
{ \
|
|
S32 m; \
|
|
\
|
|
S32 num_clusters_evaluated = 0; \
|
|
\
|
|
for(m = 0; num_clusters_evaluated < (ps_cluster)->num_clusters; m++) \
|
|
{ \
|
|
if(!((ps_cluster)->as_cluster_data[m].is_valid_cluster)) \
|
|
{ \
|
|
if(-1 != (ps_cluster)->as_cluster_data[m].ref_id) \
|
|
{ \
|
|
(ps_cluster)->au1_num_clusters[(ps_cluster)->as_cluster_data[m].ref_id]--; \
|
|
} \
|
|
} \
|
|
else \
|
|
{ \
|
|
num_clusters_evaluated++; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define SET_VALUES_FOR_TOP_REF_IDS(ps_cluster_blk, best_uni_ref, best_alt_ref, num_ref) \
|
|
{ \
|
|
ps_cluster_blk->best_uni_ref = best_uni_ref; \
|
|
ps_cluster_blk->best_alt_ref = best_alt_ref; \
|
|
ps_cluster_blk->num_refs = num_ref; \
|
|
}
|
|
|
|
#define MAP_X_MAX 16
|
|
#define MAP_Y_MAX 16
|
|
|
|
#define CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES( \
|
|
ps_dedup_enabler, num_cands, mvx, mvy, check_for_duplicate) \
|
|
{ \
|
|
S32 center_mvx; \
|
|
S32 center_mvy; \
|
|
S32 mvdx; \
|
|
S32 mvdy; \
|
|
U32 *pu4_node_map; \
|
|
S32 columnar_presence; \
|
|
\
|
|
(check_for_duplicate) = 0; \
|
|
{ \
|
|
subpel_dedup_enabler_t *ps_dedup = &(ps_dedup_enabler)[0]; \
|
|
center_mvx = ps_dedup->i2_mv_x; \
|
|
center_mvy = ps_dedup->i2_mv_y; \
|
|
pu4_node_map = ps_dedup->au4_node_map; \
|
|
\
|
|
mvdx = (mvx)-center_mvx; \
|
|
mvdy = (mvy)-center_mvy; \
|
|
\
|
|
if(((mvdx < MAP_X_MAX) && (mvdx >= -MAP_X_MAX)) && \
|
|
((mvdy < MAP_Y_MAX) && (mvdy >= -MAP_Y_MAX))) \
|
|
{ \
|
|
columnar_presence = pu4_node_map[MAP_X_MAX + mvdx]; \
|
|
\
|
|
if(0 == (columnar_presence & (1U << (MAP_Y_MAX + mvdy)))) \
|
|
{ \
|
|
columnar_presence |= (1U << (MAP_Y_MAX + mvdy)); \
|
|
pu4_node_map[MAP_X_MAX + mvdx] = columnar_presence; \
|
|
} \
|
|
else \
|
|
{ \
|
|
(check_for_duplicate) = 1; \
|
|
} \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define BUMP_OUTLIER_CLUSTERS(ps_cluster_blk, sdi_threshold) \
|
|
{ \
|
|
outlier_data_t as_outliers[MAX_NUM_CLUSTERS_64x64 + 1]; \
|
|
\
|
|
S32 j, k; \
|
|
\
|
|
S32 num_clusters_evaluated = 0; \
|
|
S32 num_clusters = ps_cluster_blk->num_clusters; \
|
|
S32 num_outliers_present = 0; \
|
|
\
|
|
for(j = 0; num_clusters_evaluated < num_clusters; j++) \
|
|
{ \
|
|
cluster_data_t *ps_data = &ps_cluster_blk->as_cluster_data[j]; \
|
|
\
|
|
if(!ps_data->is_valid_cluster) \
|
|
{ \
|
|
continue; \
|
|
} \
|
|
\
|
|
num_clusters_evaluated++; \
|
|
\
|
|
if((ps_data->num_mvs == 1) && (ps_data->as_mv[0].sdi < sdi_threshold) && \
|
|
(ps_cluster_blk->au1_num_clusters[ps_data->ref_id] > \
|
|
MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)) \
|
|
{ \
|
|
as_outliers[num_outliers_present].cluster_id = j; \
|
|
as_outliers[num_outliers_present].ref_idx = ps_data->ref_id; \
|
|
as_outliers[num_outliers_present].sdi = ps_data->as_mv[0].sdi; \
|
|
num_outliers_present++; \
|
|
} \
|
|
} \
|
|
\
|
|
for(j = 0; j < (num_outliers_present - 1); j++) \
|
|
{ \
|
|
for(k = (j + 1); k < num_outliers_present; k++) \
|
|
{ \
|
|
if(as_outliers[j].sdi > as_outliers[k].sdi) \
|
|
{ \
|
|
as_outliers[MAX_NUM_CLUSTERS_64x64] = as_outliers[j]; \
|
|
as_outliers[j] = as_outliers[k]; \
|
|
as_outliers[k] = as_outliers[MAX_NUM_CLUSTERS_64x64]; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
for(j = 0; j < (num_outliers_present); j++) \
|
|
{ \
|
|
S32 ref_idx = as_outliers[j].ref_idx; \
|
|
\
|
|
if((ps_cluster_blk->au1_num_clusters[ref_idx] > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)) \
|
|
{ \
|
|
ps_cluster_blk->as_cluster_data[as_outliers[j].cluster_id].is_valid_cluster = 0; \
|
|
ps_cluster_blk->num_clusters--; \
|
|
ps_cluster_blk->au1_num_clusters[ref_idx]--; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define ADD_CLUSTER_CENTROID_AS_CANDS_FOR_BLK_MERGE( \
|
|
ps_cluster_data, ps_range_prms, ps_list, ps_mv, is_ref_in_l0, ref_idx) \
|
|
{ \
|
|
ps_list = &(ps_cluster_data)->as_mv_list[!(is_ref_in_l0)][(ref_idx)]; \
|
|
ps_mv = &ps_list->as_mv[ps_list->num_mvs]; \
|
|
\
|
|
ps_mv->i2_mvx = (ps_centroid->i4_pos_x_q8 + (1 << 7)) >> 8; \
|
|
ps_mv->i2_mvy = (ps_centroid->i4_pos_y_q8 + (1 << 7)) >> 8; \
|
|
\
|
|
CLIP_MV_WITHIN_RANGE(ps_mv->i2_mvx, ps_mv->i2_mvy, (ps_range_prms), 0, 0, 0); \
|
|
\
|
|
ps_cluster_data->ai4_ref_id_valid[!(is_ref_in_l0)][(ref_idx)] = 1; \
|
|
\
|
|
ps_list->num_mvs++; \
|
|
}
|
|
|
|
#define COPY_SEARCH_CANDIDATE_DATA(node, mv, pref, refid, shift) \
|
|
{ \
|
|
(node)->ps_mv->i2_mvx = (mv)->i2_mv_x; \
|
|
(node)->ps_mv->i2_mvy = (mv)->i2_mv_y; \
|
|
(node)->i1_ref_idx = *pref; \
|
|
(node)->u1_is_avail = 1; \
|
|
\
|
|
/* Can set the availability flag for MV Pred purposes */ \
|
|
if(((node)->i1_ref_idx < 0) || ((node)->ps_mv->i2_mvx == INTRA_MV)) \
|
|
{ \
|
|
(node)->u1_is_avail = 0; \
|
|
(node)->i1_ref_idx = refid; \
|
|
(node)->ps_mv->i2_mvx = 0; \
|
|
(node)->ps_mv->i2_mvy = 0; \
|
|
} \
|
|
(node)->ps_mv->i2_mvx >>= (shift); \
|
|
(node)->ps_mv->i2_mvy >>= (shift); \
|
|
(node)->u1_subpel_done = (shift) ? 0 : 1; \
|
|
}
|
|
/**
|
|
*******************************************************************************
|
|
* @macro MIN_NODE
|
|
* @brief Returns the search node with lesser cost
|
|
*******************************************************************************
|
|
*/
|
|
#define MIN_NODE(a, b) (((a)->i4_tot_cost < (b)->i4_tot_cost) ? (a) : (b))
|
|
|
|
/**
|
|
*******************************************************************************
|
|
* @macro MAX_NODE
|
|
* @brief Returns search node with higher cost
|
|
*******************************************************************************
|
|
*/
|
|
#define MAX_NODE(a, b) (((a)->i4_tot_cost >= (b)->i4_tot_cost) ? (a) : (b))
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro HME_INV_WT_PRED
|
|
* @brief Implements inverse of wt pred formula. Actual wt pred formula is
|
|
* ((input * wt) + rnd) >> shift) + offset
|
|
******************************************************************************
|
|
*/
|
|
#define HME_INV_WT_PRED(inp, wt, off, shift) (((((inp) - (off)) << (shift)) + ((wt) >> 1)) / (wt))
|
|
#define HME_INV_WT_PRED1(inp, wt, off, shift) \
|
|
(((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro HME_WT_PRED
|
|
* @brief Implements wt pred formula as per spec
|
|
******************************************************************************
|
|
*/
|
|
#define HME_WT_PRED(p0, p1, w0, w1, rnd, shift) \
|
|
(((((S32)w0) * ((S32)p0) + ((S32)w1) * ((S32)p1)) >> shift) + rnd)
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro PREFETCH_BLK
|
|
* @brief Prefetches a block of data into cahce before hand
|
|
******************************************************************************
|
|
*/
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro INSERT_NEW_NODE
|
|
* @brief Inserts a new search node in a list if it is unique; helps in
|
|
removing duplicate nodes/candidates
|
|
******************************************************************************
|
|
*/
|
|
#define PREFETCH_BLK(pu1_src, src_stride, lines, type) \
|
|
{ \
|
|
WORD32 ctr; \
|
|
for(ctr = 0; ctr < lines; ctr++) \
|
|
{ \
|
|
PREFETCH((char const *)pu1_src, type); \
|
|
pu1_src += src_stride; \
|
|
} \
|
|
}
|
|
|
|
#define INSERT_UNIQUE_NODE( \
|
|
as_nodes, num_nodes, new_node, au4_map, center_x, center_y, use_hashing) \
|
|
{ \
|
|
WORD32 k; \
|
|
UWORD32 map; \
|
|
WORD32 delta_x, delta_y; \
|
|
delta_x = (new_node).ps_mv->i2_mvx - (center_x); \
|
|
delta_y = (new_node).ps_mv->i2_mvy - (center_y); \
|
|
map = 0; \
|
|
\
|
|
if((use_hashing) && (delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) && \
|
|
(delta_y < MAP_Y_MAX) && (delta_y >= (-MAP_Y_MAX))) \
|
|
{ \
|
|
map = (au4_map)[delta_x + MAP_X_MAX]; \
|
|
if(0 == (map & (1U << (delta_y + MAP_Y_MAX)))) \
|
|
{ \
|
|
(new_node).s_mv = (new_node).ps_mv[0]; \
|
|
(as_nodes)[(num_nodes)] = (new_node); \
|
|
((num_nodes))++; \
|
|
map |= 1U << (delta_y + MAP_Y_MAX); \
|
|
(au4_map)[delta_x + MAP_X_MAX] = map; \
|
|
} \
|
|
} \
|
|
else \
|
|
{ \
|
|
for(k = 0; k < ((num_nodes)); k++) \
|
|
{ \
|
|
/* Search is this node is already present in unique list */ \
|
|
if(((as_nodes)[k].s_mv.i2_mvx == (new_node).ps_mv->i2_mvx) && \
|
|
((as_nodes)[k].s_mv.i2_mvy == (new_node).ps_mv->i2_mvy) && \
|
|
((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx)) \
|
|
{ \
|
|
/* This is duplicate node; need not be inserted */ \
|
|
break; \
|
|
} \
|
|
} \
|
|
if(k == ((num_nodes))) \
|
|
{ \
|
|
/* Insert new node only if it is not duplicate node */ \
|
|
(new_node).s_mv = (new_node).ps_mv[0]; \
|
|
(as_nodes)[k] = (new_node); \
|
|
((num_nodes))++; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro INSERT_NEW_NODE
|
|
* @brief Inserts a new search node in a list if it is unique; helps in
|
|
removing duplicate nodes/candidates
|
|
******************************************************************************
|
|
*/
|
|
#define INSERT_NEW_NODE_NOMAP(as_nodes, num_nodes, new_node, implicit_layer) \
|
|
{ \
|
|
WORD32 k; \
|
|
if(!implicit_layer) \
|
|
{ \
|
|
for(k = 0; k < (num_nodes); k++) \
|
|
{ \
|
|
/* Search is this node is already present in unique list */ \
|
|
if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) && \
|
|
(as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy)) \
|
|
{ \
|
|
/* This is duplicate node; need not be inserted */ \
|
|
break; \
|
|
} \
|
|
} \
|
|
} \
|
|
else \
|
|
{ \
|
|
for(k = 0; k < (num_nodes); k++) \
|
|
{ \
|
|
/* Search is this node is already present in unique list */ \
|
|
if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) && \
|
|
(as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy) && \
|
|
(as_nodes[k].i1_ref_idx == new_node.i1_ref_idx)) \
|
|
{ \
|
|
/* This is duplicate node; need not be inserted */ \
|
|
break; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
if(k == (num_nodes)) \
|
|
{ \
|
|
/* Insert new node only if it is not duplicate node */ \
|
|
as_nodes[k] = new_node; \
|
|
(num_nodes)++; \
|
|
} \
|
|
}
|
|
/**
|
|
******************************************************************************
|
|
* @macro INSERT_NEW_NODE_NOMAP_ALTERNATE
|
|
* @brief Inserts a new search node in a list if it is unique; helps in
|
|
removing duplicate nodes/candidates
|
|
******************************************************************************
|
|
*/
|
|
#define INSERT_NEW_NODE_NOMAP_ALTERNATE(as_nodes, num_nodes, new_node, result_num, part_id) \
|
|
{ \
|
|
WORD32 k; \
|
|
WORD32 part_id_1 = (new_node->i4_num_valid_parts > 8) ? new_node->ai4_part_id[part_id] \
|
|
: part_id; \
|
|
for(k = 0; k < (num_nodes); k++) \
|
|
{ \
|
|
/* Search is this node is already present in unique list */ \
|
|
if((as_nodes[k].s_mv.i2_mvx == new_node->i2_mv_x[result_num][part_id_1]) && \
|
|
(as_nodes[k].s_mv.i2_mvy == new_node->i2_mv_y[result_num][part_id_1]) && \
|
|
(as_nodes[k].i1_ref_idx == new_node->i2_ref_idx[result_num][part_id_1])) \
|
|
{ \
|
|
/* This is duplicate node; need not be inserted */ \
|
|
break; \
|
|
} \
|
|
} \
|
|
\
|
|
if(k == (num_nodes)) \
|
|
{ \
|
|
/* Insert new node only if it is not duplicate node */ \
|
|
as_nodes[k].i4_tot_cost = (WORD32)new_node->i2_tot_cost[result_num][part_id_1]; \
|
|
as_nodes[k].i4_mv_cost = (WORD32)new_node->i2_mv_cost[result_num][part_id_1]; \
|
|
as_nodes[k].s_mv.i2_mvx = new_node->i2_mv_x[result_num][part_id_1]; \
|
|
as_nodes[k].s_mv.i2_mvy = new_node->i2_mv_y[result_num][part_id_1]; \
|
|
as_nodes[k].i1_ref_idx = (WORD8)new_node->i2_ref_idx[result_num][part_id_1]; \
|
|
as_nodes[k].u1_part_id = new_node->ai4_part_id[part_id]; \
|
|
(num_nodes)++; \
|
|
} \
|
|
}
|
|
|
|
#define INSERT_NEW_NODE( \
|
|
as_nodes, num_nodes, new_node, implicit_layer, au4_map, center_x, center_y, use_hashing) \
|
|
{ \
|
|
WORD32 k; \
|
|
UWORD32 map; \
|
|
WORD32 delta_x, delta_y; \
|
|
delta_x = (new_node).s_mv.i2_mvx - center_x; \
|
|
delta_y = (new_node).s_mv.i2_mvy - center_y; \
|
|
map = 0; \
|
|
if((delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) && (delta_y < MAP_Y_MAX) && \
|
|
(delta_y >= (-MAP_Y_MAX)) && (use_hashing)) \
|
|
{ \
|
|
map = (au4_map)[delta_x + MAP_X_MAX]; \
|
|
if(0 == (map & (1U << (delta_y + MAP_Y_MAX)))) \
|
|
{ \
|
|
(as_nodes)[(num_nodes)] = (new_node); \
|
|
(num_nodes)++; \
|
|
map |= 1U << (delta_y + MAP_Y_MAX); \
|
|
(au4_map)[delta_x + MAP_X_MAX] = map; \
|
|
} \
|
|
} \
|
|
else if(!(implicit_layer)) \
|
|
{ \
|
|
for(k = 0; k < (num_nodes); k++) \
|
|
{ \
|
|
/* Search is this node is already present in unique list */ \
|
|
if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) && \
|
|
((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy)) \
|
|
{ \
|
|
/* This is duplicate node; need not be inserted */ \
|
|
break; \
|
|
} \
|
|
} \
|
|
if(k == (num_nodes)) \
|
|
{ \
|
|
/* Insert new node only if it is not duplicate node */ \
|
|
(as_nodes)[k] = (new_node); \
|
|
(num_nodes)++; \
|
|
} \
|
|
} \
|
|
else \
|
|
{ \
|
|
for(k = 0; k < (num_nodes); k++) \
|
|
{ \
|
|
/* Search is this node is already present in unique list */ \
|
|
if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) && \
|
|
((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy) && \
|
|
((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx)) \
|
|
{ \
|
|
/* This is duplicate node; need not be inserted */ \
|
|
break; \
|
|
} \
|
|
} \
|
|
if(k == (num_nodes)) \
|
|
{ \
|
|
/* Insert new node only if it is not duplicate node */ \
|
|
(as_nodes)[k] = (new_node); \
|
|
(num_nodes)++; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define COMPUTE_DIFF_MV(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh) \
|
|
{ \
|
|
mvdx = (inp_node)->s_mv.i2_mvx << (inp_sh); \
|
|
mvdy = (inp_node)->s_mv.i2_mvy << (inp_sh); \
|
|
mvdx -= ((mv_p_x) << (pred_sh)); \
|
|
mvdy -= ((mv_p_y) << (pred_sh)); \
|
|
}
|
|
|
|
#define COMPUTE_MV_DIFFERENCE(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh) \
|
|
{ \
|
|
mvdx = (inp_node)->ps_mv->i2_mvx << (inp_sh); \
|
|
mvdy = (inp_node)->ps_mv->i2_mvy << (inp_sh); \
|
|
mvdx -= ((mv_p_x) << (pred_sh)); \
|
|
mvdy -= ((mv_p_y) << (pred_sh)); \
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum CU_MERGE_RESULT_T
|
|
* @brief Describes the results of merge, whether successful or not
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
CU_MERGED,
|
|
CU_SPLIT
|
|
} CU_MERGE_RESULT_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum PART_ORIENT_T
|
|
* @brief Describes the orientation of partition (vert/horz, left/rt)
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
VERT_LEFT,
|
|
VERT_RIGHT,
|
|
HORZ_TOP,
|
|
HORZ_BOT
|
|
} PART_ORIENT_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum GRID_PT_T
|
|
* @brief For a 3x3 rect grid, nubers each pt as shown
|
|
* 5 2 6
|
|
* 1 0 3
|
|
* 7 4 8
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
PT_C = 0,
|
|
PT_L = 1,
|
|
PT_T = 2,
|
|
PT_R = 3,
|
|
PT_B = 4,
|
|
PT_TL = 5,
|
|
PT_TR = 6,
|
|
PT_BL = 7,
|
|
PT_BR = 8,
|
|
NUM_GRID_PTS
|
|
} GRID_PT_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro IS_POW
|
|
* @brief Returns whwehter a number is power of 2
|
|
******************************************************************************
|
|
*/
|
|
#define IS_POW_2(x) (!((x) & ((x)-1)))
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro GRID_ALL_PTS_VALID
|
|
* @brief For a 3x3 rect grid, this can be used to enable all pts in grid
|
|
******************************************************************************
|
|
*/
|
|
#define GRID_ALL_PTS_VALID 0x1ff
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro GRID_DIAMOND_ENABLE_ALL
|
|
* @brief If we search diamond, this enables all 5 pts of diamond (including centre)
|
|
******************************************************************************
|
|
*/
|
|
#define GRID_DIAMOND_ENABLE_ALL \
|
|
(BIT_EN(PT_C) | BIT_EN(PT_L) | BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B))
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro GRID_RT_3_INVALID, GRID_LT_3_INVALID,GRID_TOP_3_INVALID,GRID_BOT_3_INVALID
|
|
* @brief For a square grid search, depending on where the best result is
|
|
* we can optimise search for next iteration by invalidating some pts
|
|
******************************************************************************
|
|
*/
|
|
#define GRID_RT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR)))
|
|
#define GRID_LT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL)))
|
|
#define GRID_TOP_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR)))
|
|
#define GRID_BOT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR)))
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum GMV_MVTYPE_T
|
|
* @brief Defines what type of GMV we need (thin lobe for a very spiky
|
|
* distribution of mv or thick lobe for a blurred distrib of mvs
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
GMV_THICK_LOBE,
|
|
GMV_THIN_LOBE,
|
|
NUM_GMV_LOBES
|
|
} GMV_MVTYPE_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum BLK_TYPE_T
|
|
* @brief Defines all possible inter blks possible
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
BLK_INVALID = -1,
|
|
BLK_4x4 = 0,
|
|
BLK_4x8,
|
|
BLK_8x4,
|
|
BLK_8x8,
|
|
BLK_4x16,
|
|
BLK_8x16,
|
|
BLK_12x16,
|
|
BLK_16x4,
|
|
BLK_16x8,
|
|
BLK_16x12,
|
|
BLK_16x16,
|
|
BLK_8x32,
|
|
BLK_16x32,
|
|
BLK_24x32,
|
|
BLK_32x8,
|
|
BLK_32x16,
|
|
BLK_32x24,
|
|
BLK_32x32,
|
|
BLK_16x64,
|
|
BLK_32x64,
|
|
BLK_48x64,
|
|
BLK_64x16,
|
|
BLK_64x32,
|
|
BLK_64x48,
|
|
BLK_64x64,
|
|
NUM_BLK_SIZES
|
|
} BLK_SIZE_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum SEARCH_COMPLEXITY_T
|
|
* @brief For refinement layer, this decides the number of refinement candts
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
SEARCH_CX_LOW = 0,
|
|
SEARCH_CX_MED = 1,
|
|
SEARCH_CX_HIGH = 2
|
|
} SEARCH_COMPLEXITY_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum CTB_BOUNDARY_TYPES_T
|
|
* @brief For pictures not a multiples of CTB horizontally or vertically, we
|
|
* define 4 unique cases, centre (full ctbs), bottom boundary (64x8k CTBs),
|
|
* right boundary (8mx64 CTBs), and bottom rt corner (8mx8k CTB)
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
CTB_CENTRE,
|
|
CTB_BOT_PIC_BOUNDARY,
|
|
CTB_RT_PIC_BOUNDARY,
|
|
CTB_BOT_RT_PIC_BOUNDARY,
|
|
NUM_CTB_BOUNDARY_TYPES,
|
|
} CTB_BOUNDARY_TYPES_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum SEARCH_CANDIDATE_TYPE_T
|
|
* @brief Monikers for all sorts of search candidates used in ME
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
ILLUSORY_CANDIDATE = -1,
|
|
ZERO_MV = 0,
|
|
ZERO_MV_ALTREF,
|
|
SPATIAL_LEFT0,
|
|
SPATIAL_TOP0,
|
|
SPATIAL_TOP_RIGHT0,
|
|
SPATIAL_TOP_LEFT0,
|
|
SPATIAL_LEFT1,
|
|
SPATIAL_TOP1,
|
|
SPATIAL_TOP_RIGHT1,
|
|
SPATIAL_TOP_LEFT1,
|
|
PROJECTED_COLOC0,
|
|
PROJECTED_COLOC1,
|
|
PROJECTED_COLOC2,
|
|
PROJECTED_COLOC3,
|
|
PROJECTED_COLOC4,
|
|
PROJECTED_COLOC5,
|
|
PROJECTED_COLOC6,
|
|
PROJECTED_COLOC7,
|
|
PROJECTED_COLOC_TR0,
|
|
PROJECTED_COLOC_TR1,
|
|
PROJECTED_COLOC_BL0,
|
|
PROJECTED_COLOC_BL1,
|
|
PROJECTED_COLOC_BR0,
|
|
PROJECTED_COLOC_BR1,
|
|
PROJECTED_TOP0,
|
|
PROJECTED_TOP1,
|
|
PROJECTED_TOP_RIGHT0,
|
|
PROJECTED_TOP_RIGHT1,
|
|
PROJECTED_TOP_LEFT0,
|
|
PROJECTED_TOP_LEFT1,
|
|
PROJECTED_RIGHT0,
|
|
PROJECTED_RIGHT1,
|
|
PROJECTED_BOTTOM0,
|
|
PROJECTED_BOTTOM1,
|
|
PROJECTED_BOTTOM_RIGHT0,
|
|
PROJECTED_BOTTOM_RIGHT1,
|
|
PROJECTED_BOTTOM_LEFT0,
|
|
PROJECTED_BOTTOM_LEFT1,
|
|
COLOCATED_GLOBAL_MV0,
|
|
COLOCATED_GLOBAL_MV1,
|
|
PROJECTED_TOP2,
|
|
PROJECTED_TOP3,
|
|
PROJECTED_TOP_RIGHT2,
|
|
PROJECTED_TOP_RIGHT3,
|
|
PROJECTED_TOP_LEFT2,
|
|
PROJECTED_TOP_LEFT3,
|
|
PROJECTED_RIGHT2,
|
|
PROJECTED_RIGHT3,
|
|
PROJECTED_BOTTOM2,
|
|
PROJECTED_BOTTOM3,
|
|
PROJECTED_BOTTOM_RIGHT2,
|
|
PROJECTED_BOTTOM_RIGHT3,
|
|
PROJECTED_BOTTOM_LEFT2,
|
|
PROJECTED_BOTTOM_LEFT3,
|
|
NUM_SEARCH_CAND_TYPES
|
|
} SEARCH_CANDIDATE_TYPE_T;
|
|
|
|
typedef enum
|
|
{
|
|
ILLUSORY_LOCATION = -1,
|
|
COLOCATED,
|
|
COLOCATED_4x4_TR,
|
|
COLOCATED_4x4_BL,
|
|
COLOCATED_4x4_BR,
|
|
LEFT,
|
|
TOPLEFT,
|
|
TOP,
|
|
TOPRIGHT,
|
|
RIGHT,
|
|
BOTTOMRIGHT,
|
|
BOTTOM,
|
|
BOTTOMLEFT,
|
|
NUM_SEARCH_CAND_LOCATIONS
|
|
} SEARCH_CAND_LOCATIONS_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macros ENABLE_mxn
|
|
* @brief Enables a type or a group of partitions. ENABLE_ALL_PARTS, enables all
|
|
* partitions, while others enable selected partitions. These can be used
|
|
* to set the mask of active partitions
|
|
******************************************************************************
|
|
*/
|
|
#define ENABLE_2Nx2N (BIT_EN(PART_ID_2Nx2N))
|
|
#define ENABLE_2NxN (BIT_EN(PART_ID_2NxN_T) | BIT_EN(PART_ID_2NxN_B))
|
|
#define ENABLE_Nx2N (BIT_EN(PART_ID_Nx2N_L) | BIT_EN(PART_ID_Nx2N_R))
|
|
#define ENABLE_NxN \
|
|
(BIT_EN(PART_ID_NxN_TL) | BIT_EN(PART_ID_NxN_TR) | BIT_EN(PART_ID_NxN_BL) | \
|
|
BIT_EN(PART_ID_NxN_BR))
|
|
#define ENABLE_2NxnU (BIT_EN(PART_ID_2NxnU_T) | BIT_EN(PART_ID_2NxnU_B))
|
|
#define ENABLE_2NxnD (BIT_EN(PART_ID_2NxnD_T) | BIT_EN(PART_ID_2NxnD_B))
|
|
#define ENABLE_nLx2N (BIT_EN(PART_ID_nLx2N_L) | BIT_EN(PART_ID_nLx2N_R))
|
|
#define ENABLE_nRx2N (BIT_EN(PART_ID_nRx2N_L) | BIT_EN(PART_ID_nRx2N_R))
|
|
#define ENABLE_AMP ((ENABLE_2NxnU) | (ENABLE_2NxnD) | (ENABLE_nLx2N) | (ENABLE_nRx2N))
|
|
#define ENABLE_SMP ((ENABLE_2NxN) | (ENABLE_Nx2N))
|
|
#define ENABLE_ALL_PARTS \
|
|
((ENABLE_2Nx2N) | (ENABLE_NxN) | (ENABLE_2NxN) | (ENABLE_Nx2N) | (ENABLE_AMP))
|
|
#define ENABLE_SQUARE_PARTS ((ENABLE_2Nx2N) | (ENABLE_NxN))
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum MV_PEL_RES_T
|
|
* @brief Resolution of MV fpel/hpel/qpel units. Useful for maintaining
|
|
* predictors. During fpel search, candts, predictors etc are in fpel units,
|
|
* in subpel search, they are in subpel units
|
|
******************************************************************************
|
|
*/
|
|
typedef enum
|
|
{
|
|
MV_RES_FPEL,
|
|
MV_RES_HPEL,
|
|
MV_RES_QPEL
|
|
} MV_PEL_RES_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum HME_SET_MVPRED_RES
|
|
* @brief Sets resolution for predictor bank (fpel/qpel/hpel units)
|
|
******************************************************************************
|
|
*/
|
|
#define HME_SET_MVPRED_RES(ps_pred_ctxt, mv_pel_res) ((ps_pred_ctxt)->mv_pel = mv_pel_res)
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @enum HME_SET_MVPRED_DIR
|
|
* @brief Sets the direction, meaning L0/L1. Since L0 and L1 use separate
|
|
* candts, the pred ctxt for them hasto be maintained separately
|
|
******************************************************************************
|
|
*/
|
|
#define HME_SET_MVPRED_DIR(ps_pred_ctxt, pred_lx) ((ps_pred_ctxt)->pred_lx = pred_lx)
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief macros to clip / check mv within specified range
|
|
******************************************************************************
|
|
*/
|
|
#define CHECK_MV_WITHIN_RANGE(x, y, range) \
|
|
(((x) > (range)->i2_min_x) && ((x) < (range)->i2_max_x) && ((y) > (range)->i2_min_y) && \
|
|
((y) < (range)->i2_max_y))
|
|
|
|
#define CONVERT_MV_LIMIT_TO_QPEL(range) \
|
|
{ \
|
|
(range)->i2_max_x <<= 2; \
|
|
(range)->i2_max_y <<= 2; \
|
|
(range)->i2_min_x <<= 2; \
|
|
(range)->i2_min_y <<= 2; \
|
|
}
|
|
|
|
#define CONVERT_MV_LIMIT_TO_FPEL(range) \
|
|
{ \
|
|
(range)->i2_max_x >>= 2; \
|
|
(range)->i2_max_y >>= 2; \
|
|
(range)->i2_min_x >>= 2; \
|
|
(range)->i2_min_y >>= 2; \
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Swicth to debug the number of subpel search nodes
|
|
******************************************************************************
|
|
*/
|
|
#define DEBUG_SUBPEL_SEARCH_NODE_HS_COUNT 0
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @typedef SAD_GRID_T
|
|
* @brief Defines a 2D array type used to store SADs across grid and across
|
|
* partition types
|
|
******************************************************************************
|
|
*/
|
|
typedef S32 SAD_GRID_T[9][MAX_NUM_PARTS];
|
|
|
|
/*****************************************************************************/
|
|
/* Structures */
|
|
/*****************************************************************************/
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct grid_node_t
|
|
* @brief stores a complete info for a candt
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
S16 i2_mv_x;
|
|
S16 i2_mv_y;
|
|
S08 i1_ref_idx;
|
|
} grid_node_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct search_node_t
|
|
* @brief Basic structure used for storage of search results, specification
|
|
* of init candidates for search etc. This structure is complete for
|
|
* specification of mv and cost for a given direction of search (L0/L1) but
|
|
* does not carry information of what type of partition it represents.
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Motion vector */
|
|
mv_t s_mv;
|
|
|
|
/** Used in the hme_mv_clipper function to reduce loads and stores */
|
|
mv_t *ps_mv;
|
|
|
|
/** Ref id, as specified in terms of Lc, unified list */
|
|
S08 i1_ref_idx;
|
|
|
|
/** Flag to indicate whether mv is in fpel or QPEL units */
|
|
U08 u1_subpel_done;
|
|
|
|
/**
|
|
* Indicates whether this node constitutes a valid predictor candt.
|
|
* Since this structure also used for predictor candts, some candts may
|
|
* not be available (anti causal or outside pic boundary). Availabilit
|
|
* can be inferred using this flag.
|
|
*/
|
|
U08 u1_is_avail;
|
|
|
|
/**
|
|
* Indicates partition Id to which this node belongs. Useful during
|
|
* subpel / fullpel refinement search to identify partition whose
|
|
* cost needs to be minimized
|
|
*/
|
|
U08 u1_part_id;
|
|
|
|
/** SAD / SATD stored here */
|
|
S32 i4_sad;
|
|
|
|
/**
|
|
* Cost related to coding MV, multiplied by lambda
|
|
* TODO : Entry may be redundant, can be removed
|
|
*/
|
|
S32 i4_mv_cost;
|
|
|
|
/** Total cost, (SAD + MV Cost) */
|
|
S32 i4_tot_cost;
|
|
|
|
/** Subpel_Dist_Improvement.
|
|
It is the reduction in distortion (SAD or SATD) achieved
|
|
from the full-pel stage to the sub-pel stage
|
|
*/
|
|
S32 i4_sdi;
|
|
|
|
} search_node_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @macro INIT_SEARCH_NODE
|
|
* @brief Initializes this search_node_t structure. Can be used to zero
|
|
* out candts, set max costs in results etc
|
|
******************************************************************************
|
|
*/
|
|
#define INIT_SEARCH_NODE(x, a) \
|
|
{ \
|
|
(x)->s_mv.i2_mvx = 0; \
|
|
(x)->s_mv.i2_mvy = 0; \
|
|
(x)->i1_ref_idx = a; \
|
|
(x)->i4_tot_cost = MAX_32BIT_VAL; \
|
|
(x)->i4_sad = MAX_32BIT_VAL; \
|
|
(x)->u1_subpel_done = 0; \
|
|
(x)->u1_is_avail = 1; \
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct part_attr_t
|
|
* @brief Geometric description of a partition w.r.t. CU start. Note that
|
|
* since this is used across various CU sizes, the inference of
|
|
* these members is to be done in the context of specific usage
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Start of partition w.r.t. CU start in x dirn */
|
|
U08 u1_x_start;
|
|
/** Size of partitino w.r.t. CU start in x dirn */
|
|
U08 u1_x_count;
|
|
/** Start of partition w.r.t. CU start in y dirn */
|
|
U08 u1_y_start;
|
|
/** Size of partitino w.r.t. CU start in y dirn */
|
|
U08 u1_y_count;
|
|
} part_attr_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct search_candt_t
|
|
* @brief Complete information for a given candt in any refinement srch
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Points to the mv, ref id info. */
|
|
search_node_t *ps_search_node;
|
|
/** Number of refinemnts to be done for this candt */
|
|
U08 u1_num_steps_refine;
|
|
} search_candt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct result_node_t
|
|
* @brief Contains complete search result for a CU for a given type of
|
|
* partition split. Holds ptrs to results for each partition, with
|
|
* information of partition type.
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/**
|
|
* Type of partition that the CU is split into, for which this
|
|
* result is relevant
|
|
*/
|
|
PART_TYPE_T e_part_type;
|
|
|
|
/**
|
|
* Total cost of coding the CU (sum of costs of individual partitions
|
|
* plus other possible CU level overheads)
|
|
*/
|
|
S32 i4_tot_cost;
|
|
|
|
/**
|
|
* Pointer to results of each individual partitions. Note that max
|
|
* number of partitions a CU can be split into is MAX_NUM_PARTS
|
|
*/
|
|
search_node_t *ps_part_result[MAX_NUM_PARTS];
|
|
|
|
/* TU split flag : tu_split_flag[0] represents the transform splits
|
|
* for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
|
|
* to respective 32x32 */
|
|
S32 ai4_tu_split_flag[4];
|
|
|
|
} result_node_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct ctb_node_t
|
|
* @brief Finalized information for a given CU or CTB. This is a recursive
|
|
* structure and can hence start at CTB level, recursing for every
|
|
* level of split till we hit leaf CUs in the CTB. At leaf node
|
|
* it contains info for coded non split CU, with child nodes being
|
|
* set to NULL
|
|
******************************************************************************
|
|
*/
|
|
typedef struct ctb_node_t
|
|
{
|
|
/** x offset of this CU w.r.t. CTB start (0-63) */
|
|
U08 u1_x_off;
|
|
/** y offset of this C U w.r.t. CTB start (0-63) */
|
|
U08 u1_y_off;
|
|
/** Results of each partition in both directions L0,L1 */
|
|
search_node_t as_part_results[MAX_NUM_PARTS][2];
|
|
/**
|
|
* Pointers to pred buffers. Note that the buffer may be allocated
|
|
* at parent level or at this level
|
|
*/
|
|
U08 *apu1_pred[2];
|
|
/** Prediction direction for each partition: 0-L0, 1-L1, 2-BI */
|
|
U08 u1_pred_dir[MAX_NUM_PARTS];
|
|
/**
|
|
* When pred direction is decided to be BI, we still store the best
|
|
* uni pred dir (L0/L1) in this array, for RD Opt purposes
|
|
*/
|
|
U08 u1_best_uni_dir[MAX_NUM_PARTS];
|
|
/** Stride of pred buffer pointed to by apu1_pred member */
|
|
S32 i4_pred_stride;
|
|
/** Size of the CU that this node represents */
|
|
CU_SIZE_T e_cu_size;
|
|
/** For leaf CUs, this indicats type of partition (for e.g. PRT_2NxN) */
|
|
PART_TYPE_T e_part_type;
|
|
/** Below entries are for a CU level*/
|
|
S32 i4_sad;
|
|
S32 i4_satd;
|
|
S32 i4_mv_cost;
|
|
S32 i4_rate;
|
|
S32 i4_dist;
|
|
S32 i4_tot_cost;
|
|
/** Best costs of each partitions, if partition is BI, then best cost across uni/bi */
|
|
S32 ai4_part_costs[4];
|
|
|
|
/* TU split flag : tu_split_flag[0] represents the transform splits
|
|
* for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
|
|
* to respective 32x32 */
|
|
/* For a 8x8 TU - 1 bit used to indicate split */
|
|
/* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
|
|
/* For a 32x32 TU - See above */
|
|
S32 ai4_tu_split_flag[4];
|
|
|
|
/**
|
|
* pointers to child nodes. If this node is split, then the below point
|
|
* to children nodes (TL, TR, BL, BR) each of quarter size (w/2, h/2)
|
|
* If this node not split, then below point to null
|
|
*/
|
|
struct ctb_node_t *ps_tl;
|
|
struct ctb_node_t *ps_tr;
|
|
struct ctb_node_t *ps_bl;
|
|
struct ctb_node_t *ps_br;
|
|
} ctb_node_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct ctb_mem_mgr_t
|
|
* @brief Memory manager structure for CTB level memory allocations of CTB
|
|
* nodes
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Base memory ptr */
|
|
U08 *pu1_mem;
|
|
/** Amount used so far (running value) */
|
|
S32 i4_used;
|
|
/** Total memory available for this mem mgr */
|
|
S32 i4_tot;
|
|
|
|
/** Size of CTB node, and alignment requiremnts */
|
|
S32 i4_size;
|
|
S32 i4_align;
|
|
} ctb_mem_mgr_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct buf_mgr_t
|
|
* @brief Memory manager structure for CTB level buffer allocations on the
|
|
* fly, esp useful for pred bufs and working memory
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** base memory ptr */
|
|
U08 *pu1_wkg_mem;
|
|
/** total memory available */
|
|
S32 i4_total;
|
|
/** Memory used so far */
|
|
S32 i4_used;
|
|
} buf_mgr_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct pred_candt_nodes_t
|
|
* @brief For a given partition and a given CU/blk, this has pointers to
|
|
* all the neighbouring and coloc pred candts. All the pred candts
|
|
* are stored as search_node_t structures itself.
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
search_node_t *ps_tl;
|
|
search_node_t *ps_t;
|
|
search_node_t *ps_tr;
|
|
search_node_t *ps_bl;
|
|
search_node_t *ps_l;
|
|
search_node_t *ps_coloc;
|
|
search_node_t *ps_zeromv;
|
|
search_node_t **pps_proj_coloc;
|
|
|
|
search_node_t *ps_mvp_node;
|
|
} pred_candt_nodes_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct pred_ctxt_t
|
|
* @brief For a given CU/blk, has complete prediction information for all
|
|
* types of partitions. Note that the pred candts are only pointed
|
|
* to, not actually stored here. This indirection is to avoid
|
|
* copies after each partition search, this way, the result of
|
|
* a partition is updated and the causally next partition
|
|
* automatically uses this result
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
pred_candt_nodes_t as_pred_nodes[TOT_NUM_PARTS];
|
|
|
|
/**
|
|
* We use S + lambda * R to evaluate cost. Here S = SAD/SATD and lambda
|
|
* is the scaling of bits to S and R is bits of overhead (MV + mode).
|
|
* Choice of lambda depends on open loop / closed loop, Qp, temporal id
|
|
* and possibly CU depth. It is the caller's responsiblity to pass
|
|
* to this module the appropriate lambda.
|
|
*/
|
|
S32 lambda;
|
|
|
|
/** lambda is in Q format, so this is the downshift reqd */
|
|
S32 lambda_q_shift;
|
|
|
|
/** Prediction direction : PRED_L0 or PRED_L1 */
|
|
S32 pred_lx;
|
|
|
|
/** MV resolution: FPEL, HPEL or QPEL */
|
|
S32 mv_pel;
|
|
|
|
/** Points to the ref bits lookup 1 ptr for each PRED_Lx */
|
|
U08 **ppu1_ref_bits_tlu;
|
|
|
|
/**
|
|
* Points to the ref scale factor, for a given ref id k,
|
|
* to scale as per ref id m, we use entry k+MAX_NUM_REF*m
|
|
*/
|
|
S16 *pi2_ref_scf;
|
|
|
|
/**
|
|
* Flag that indicates whether T, TR and TL candidates used
|
|
* are causal or projected
|
|
*/
|
|
U08 proj_used;
|
|
|
|
} pred_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct search_results_t
|
|
* @brief For a given CU/blk, Stores all the results of ME search. Results
|
|
* are stored per partition, also the best results for CU are stored
|
|
* across partitions.
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Size of CU for which this structure used */
|
|
CU_SIZE_T e_cu_size;
|
|
|
|
/**
|
|
* X and y offsets w.r.t. CTB start in encode layers. For non encode
|
|
* layers, these may typically be 0
|
|
*/
|
|
U08 u1_x_off;
|
|
U08 u1_y_off;
|
|
|
|
/** Number of best results for this CU stored */
|
|
U08 u1_num_best_results;
|
|
|
|
/** Number of results stored per partition. */
|
|
U08 u1_num_results_per_part;
|
|
|
|
/**
|
|
* Number of result planes active. This may be different from total
|
|
* number of active references during search. For example, we may
|
|
* have 4 active ref, 2 ineach dirn, but active result planes may
|
|
* only be 2, one for L0 and 1 for L1
|
|
*/
|
|
U08 u1_num_active_ref;
|
|
/**
|
|
* mask of active partitions, Totally 17 bits. For a given partition
|
|
* id, as per PART_ID_T enum the corresponding bit position is 1/0
|
|
* indicating that partition is active or inactive
|
|
*/
|
|
S32 i4_part_mask;
|
|
|
|
/** Points to partial results for each partition id
|
|
* Temporary hack for the bug: If +1 is not kept,
|
|
* it doesn't bit match with older version
|
|
*/
|
|
search_node_t *aps_part_results[MAX_NUM_REF][TOT_NUM_PARTS];
|
|
|
|
/**
|
|
* Ptr to best results for the current CU post bi pred evaluation and
|
|
* intra mode insertions
|
|
*/
|
|
inter_cu_results_t *ps_cu_results;
|
|
|
|
/** 2 pred ctxts, one for L0 and one for L1 */
|
|
pred_ctxt_t as_pred_ctxt[2];
|
|
|
|
/**
|
|
* Pointer to a table that indicates whether the ref id
|
|
* corresponds to past or future dirn. Input is ref id Lc form
|
|
*/
|
|
|
|
U08 *pu1_is_past;
|
|
|
|
/**
|
|
* Overall best CU cost, while other entries store CU costs
|
|
* in single direction, this is best CU cost, where each
|
|
* partition cost is evaluated as best of uni/bi
|
|
*/
|
|
S32 best_cu_cost;
|
|
|
|
/**
|
|
* Split_flag which is used for deciding if 16x16 CU is split or not
|
|
*/
|
|
U08 u1_split_flag;
|
|
} search_results_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct ctb_list_t
|
|
* @brief Tree structure containing info for entire CTB. At top level
|
|
* it points to entire CTB results, with children nodes at each lvl
|
|
* being non null if split.
|
|
******************************************************************************
|
|
*/
|
|
typedef struct ctb_list_t
|
|
{
|
|
/** Indicates whether this level split further */
|
|
U08 u1_is_split;
|
|
|
|
/** Number of result candts present */
|
|
U08 u1_num_candts;
|
|
|
|
/**
|
|
* Whether this level valid. E.g. if we are at boundary, where only
|
|
* left 2 32x32 are within pic boundary, then the parent is force split
|
|
* at the children level, TR and BR are invalid.
|
|
*/
|
|
U08 u1_is_valid;
|
|
|
|
/**
|
|
* IF this level is 16x16 then this mask indicates which 8x8 blks
|
|
* are valid
|
|
*/
|
|
U08 u1_8x8_mask;
|
|
|
|
/** Search results of this CU */
|
|
search_results_t *ps_search_results;
|
|
|
|
/** Search results of this CU */
|
|
inter_cu_results_t *ps_cu_results;
|
|
|
|
/** Pointers to leaf nodes, if CU is split further, else null */
|
|
struct ctb_list_t *ps_tl;
|
|
struct ctb_list_t *ps_tr;
|
|
struct ctb_list_t *ps_bl;
|
|
struct ctb_list_t *ps_br;
|
|
} ctb_list_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct layer_mv_t
|
|
* @brief mv bank structure for a particular layer
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Number of mvs for a given ref/pred dirn */
|
|
S32 i4_num_mvs_per_ref;
|
|
/** Number of reference for which results stored */
|
|
S32 i4_num_ref;
|
|
/** Number of mvs stored per blk. Product of above two */
|
|
S32 i4_num_mvs_per_blk;
|
|
/** Block size of the unit for which MVs stored */
|
|
BLK_SIZE_T e_blk_size;
|
|
/** Number of blocks present per row */
|
|
S32 i4_num_blks_per_row;
|
|
|
|
/** Number of mvs stored every row */
|
|
S32 i4_num_mvs_per_row;
|
|
|
|
/**
|
|
* Max number of mvs allowed per row. The main purpose of this variable
|
|
* is to resolve or detect discrepanceis between allocation time mem
|
|
* and run time mem, when alloc time resolution and run time resolution
|
|
* may be different
|
|
*/
|
|
S32 max_num_mvs_per_row;
|
|
|
|
/**
|
|
* Pointer to mvs of 0, 0 blk, This is different from base since the
|
|
* mv bank is padded all sides
|
|
*/
|
|
hme_mv_t *ps_mv;
|
|
|
|
/** Pointer to base of mv bank mvs */
|
|
hme_mv_t *ps_mv_base;
|
|
|
|
/** Pointers to ref idx.One to one correspondence between this and ps_mv*/
|
|
S08 *pi1_ref_idx;
|
|
/** Base of ref ids just like in case of ps_mv */
|
|
S08 *pi1_ref_idx_base;
|
|
|
|
/** Part mask for every blk, if stored, 1 per blk */
|
|
U08 *pu1_part_mask;
|
|
} layer_mv_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct mv_hist_t
|
|
* @brief Histogram structure to calculate global mvs
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
S32 i4_num_rows;
|
|
S32 i4_num_cols;
|
|
S32 i4_shift_x;
|
|
S32 i4_shift_y;
|
|
S32 i4_lobe1_size;
|
|
S32 i4_lobe2_size;
|
|
S32 i4_min_x;
|
|
S32 i4_min_y;
|
|
S32 i4_num_bins;
|
|
S32 ai4_bin_count[MAX_NUM_BINS];
|
|
} mv_hist_t;
|
|
|
|
typedef struct
|
|
{
|
|
U08 u1_is_past;
|
|
} ref_attr_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct layer_ctxt_t
|
|
* @brief Complete information for the layer
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Display Width of this layer */
|
|
S32 i4_disp_wd;
|
|
/** Display height of this layer */
|
|
S32 i4_disp_ht;
|
|
/** Width of this layer */
|
|
S32 i4_wd;
|
|
/** height of this layer */
|
|
S32 i4_ht;
|
|
/** Amount of padding of input in x dirn */
|
|
S32 i4_pad_x_inp;
|
|
/** Amount of padding of input in y dirn */
|
|
S32 i4_pad_y_inp;
|
|
/** Padding amount of recon in x dirn */
|
|
S32 i4_pad_x_rec;
|
|
/** padding amt of recon in y dirn */
|
|
S32 i4_pad_y_rec;
|
|
|
|
/**
|
|
* Offset for recon. Since recon has padding, the 0, 0 start differs
|
|
* from base of buffer
|
|
*/
|
|
S32 i4_rec_offset;
|
|
/** Offset for input, same explanation as recon */
|
|
S32 i4_inp_offset;
|
|
/** stride of input buffer */
|
|
S32 i4_inp_stride;
|
|
/** stride of recon buffer */
|
|
S32 i4_rec_stride;
|
|
/** Pic order count */
|
|
S32 i4_poc;
|
|
/** input pointer. */
|
|
U08 *pu1_inp;
|
|
/** Base of input. Add inp_offset to go to 0, 0 locn */
|
|
U08 *pu1_inp_base;
|
|
|
|
/** Pointer to 4 hpel recon planes */
|
|
U08 *pu1_rec_fxfy;
|
|
U08 *pu1_rec_hxfy;
|
|
U08 *pu1_rec_fxhy;
|
|
U08 *pu1_rec_hxhy;
|
|
|
|
/** Global mv, one set per reference searched */
|
|
hme_mv_t s_global_mv[MAX_NUM_REF][NUM_GMV_LOBES];
|
|
|
|
/** Layer MV bank */
|
|
layer_mv_t *ps_layer_mvbank;
|
|
|
|
/** Pointer to list of recon buffers for each ref id, one ptr per plane */
|
|
U08 **ppu1_list_rec_fxfy;
|
|
U08 **ppu1_list_rec_hxfy;
|
|
U08 **ppu1_list_rec_fxhy;
|
|
U08 **ppu1_list_rec_hxhy;
|
|
|
|
void **ppv_dep_mngr_recon;
|
|
|
|
/** Pointer to list of input buffers for each ref id, one ptr per plane */
|
|
U08 **ppu1_list_inp;
|
|
|
|
/** Max MV in x and y direction supported at this layer resolution */
|
|
S16 i2_max_mv_x;
|
|
S16 i2_max_mv_y;
|
|
|
|
/** Converts ref id (as per Lc list) to POC */
|
|
S32 ai4_ref_id_to_poc_lc[MAX_NUM_REF];
|
|
|
|
S32 ai4_ref_id_to_disp_num[MAX_NUM_REF];
|
|
|
|
/** status of the buffer */
|
|
S32 i4_is_free;
|
|
|
|
/** idr gop number */
|
|
S32 i4_idr_gop_num;
|
|
|
|
/** is reference picture */
|
|
S32 i4_is_reference;
|
|
|
|
/** is non reference picture processed by me*/
|
|
S32 i4_non_ref_free;
|
|
|
|
} layer_ctxt_t;
|
|
|
|
typedef S32 (*PF_MV_COST_FXN)(search_node_t *, pred_ctxt_t *, PART_ID_T, S32);
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct refine_prms_t
|
|
* @brief All the configurable input parameters for the refinement layer
|
|
*
|
|
* @param encode: Whether this layer is encoded or not
|
|
* @param explicit_ref: If enabled, then the number of reference frames to
|
|
* be searched is a function of coarsest layer num ref
|
|
frames. Else, number of references collapsed to 1/2
|
|
* @param i4_num_fpel_results : Number of full pel results to be allowed
|
|
* @param i4_num_results_per_part: Number of results stored per partition
|
|
* @param e_search_complexity: Decides the number of initial candts, refer
|
|
* to SEARCH_COMPLEXITY_T
|
|
* @param i4_use_rec_in_fpel: Whether to use input buf or recon buf in fpel
|
|
* @param i4_enable_4x4_part : if encode is 0, we use 8x8 blks, if this param
|
|
enabled, then we do 4x4 partial sad update
|
|
* @param i4_layer_id : id of this layer (0 = finest)
|
|
* @param i4_num_32x32_merge_results: number of 32x32 merged results stored
|
|
* @param i4_num_64x64_merge_results: number of 64x64 merged results stored
|
|
* @param i4_use_satd_cu_merge: Use SATD during CU merge
|
|
* @param i4_num_steps_hpel_refine : Number of steps during hpel refinement
|
|
* @param i4_num_steps_qpel_refine : Same as above but for qpel
|
|
* @param i4_use_satd_subpel : Use of SATD or SAD for subpel
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/* This array is used to place upper bounds on the number of search candidates */
|
|
/* that can be used per 'search cand location' */
|
|
U08 au1_num_fpel_search_cands[NUM_SEARCH_CAND_LOCATIONS];
|
|
|
|
U08 u1_max_2nx2n_tu_recur_cands;
|
|
|
|
U08 u1_max_num_fpel_refine_centers;
|
|
|
|
U08 u1_max_num_subpel_refine_centers;
|
|
|
|
S32 i4_encode;
|
|
S32 explicit_ref;
|
|
S32 i4_num_ref_fpel;
|
|
S32 i4_num_fpel_results;
|
|
|
|
S32 i4_num_results_per_part;
|
|
|
|
S32 i4_num_mvbank_results;
|
|
SEARCH_COMPLEXITY_T e_search_complexity;
|
|
S32 i4_use_rec_in_fpel;
|
|
|
|
S32 i4_enable_4x4_part;
|
|
S32 i4_layer_id;
|
|
|
|
S32 i4_num_32x32_merge_results;
|
|
S32 i4_num_64x64_merge_results;
|
|
|
|
S32 i4_use_satd_cu_merge;
|
|
|
|
S32 i4_num_steps_post_refine_fpel;
|
|
S32 i4_num_steps_fpel_refine;
|
|
S32 i4_num_steps_hpel_refine;
|
|
S32 i4_num_steps_qpel_refine;
|
|
S32 i4_use_satd_subpel;
|
|
|
|
double *pd_intra_costs;
|
|
S32 bidir_enabled;
|
|
S32 lambda_inp;
|
|
S32 lambda_recon;
|
|
S32 lambda_q_shift;
|
|
|
|
S32 limit_active_partitions;
|
|
|
|
S32 sdi_threshold;
|
|
|
|
U08 u1_use_lambda_derived_from_min_8x8_act_in_ctb;
|
|
|
|
U08 u1_max_subpel_candts;
|
|
|
|
U08 u1_max_subpel_candts_2Nx2N;
|
|
U08 u1_max_subpel_candts_NxN;
|
|
|
|
U08 u1_subpel_candt_threshold;
|
|
|
|
/* Pointer to the array which has num best results for
|
|
fpel refinement */
|
|
U08 *pu1_num_best_results;
|
|
|
|
} refine_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct coarse_prms_t
|
|
* @brief All the parameters passed to coarse layer search
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** ID of this layer, typically N-1 where N is tot layers */
|
|
S32 i4_layer_id;
|
|
|
|
/** Initial step size, valid if full search disabled */
|
|
S32 i4_start_step;
|
|
|
|
/** Maximum number of iterations to consider if full search disabled */
|
|
S32 i4_max_iters;
|
|
|
|
/** Number of reference frames to search */
|
|
S32 i4_num_ref;
|
|
|
|
/** Number of best results to maintain at this layer for projection */
|
|
S32 num_results;
|
|
|
|
/**
|
|
* Enable or disable full search, if disabled then, we search around initial
|
|
* candidates with early exit
|
|
*/
|
|
S32 do_full_search;
|
|
|
|
/** Values of lambda and the Q format */
|
|
S32 lambda;
|
|
S32 lambda_q_shift;
|
|
|
|
/** Step size for full search 2/4 */
|
|
S32 full_search_step;
|
|
|
|
} coarse_prms_t;
|
|
|
|
typedef struct
|
|
{
|
|
/**
|
|
* These pointers point to modified input, one each for one ref idx.
|
|
* Instead of weighting the reference, we weight the input with inverse
|
|
* wt and offset.
|
|
* +1 for storing non weighted input
|
|
*/
|
|
U08 *apu1_wt_inp[MAX_NUM_REF + 1];
|
|
|
|
/* These are allocated once at the start of encoding */
|
|
/* These are necessary only if wt_pred is switched on */
|
|
/* Else, only a single buffer is used to store the */
|
|
/* unweighed input */
|
|
U08 *apu1_wt_inp_buf_array[MAX_NUM_REF + 1];
|
|
|
|
/** Stores the weights and offsets for each ref */
|
|
S32 a_wpred_wt[MAX_NUM_REF];
|
|
S32 a_inv_wpred_wt[MAX_NUM_REF];
|
|
S32 a_wpred_off[MAX_NUM_REF];
|
|
S32 wpred_log_wdc;
|
|
|
|
S32 ai4_shift_val[MAX_NUM_REF];
|
|
} wgt_pred_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct mv_refine_ctxt_t
|
|
* @brief This structure contains important parameters used motion vector
|
|
refinement
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/* Added +7 in the array sizes below to make every array dimension
|
|
16-byte aligned */
|
|
/** Cost of best candidate for each partition*/
|
|
MEM_ALIGN16 WORD16 i2_tot_cost[2][TOT_NUM_PARTS + 7];
|
|
|
|
MEM_ALIGN16 WORD16 i2_stim_injected_cost[2][TOT_NUM_PARTS + 7];
|
|
|
|
/** Motion vector cost for the best candidate of each partition*/
|
|
MEM_ALIGN16 WORD16 i2_mv_cost[2][TOT_NUM_PARTS + 7];
|
|
/** X component of the motion vector of the best candidate of each partition*/
|
|
MEM_ALIGN16 WORD16 i2_mv_x[2][TOT_NUM_PARTS + 7];
|
|
/** Y component of the motion vector of the best candidate of each partition*/
|
|
MEM_ALIGN16 WORD16 i2_mv_y[2][TOT_NUM_PARTS + 7];
|
|
/** Reference index of the best candidate of each partition*/
|
|
MEM_ALIGN16 WORD16 i2_ref_idx[2][TOT_NUM_PARTS + 7];
|
|
|
|
/** Partition id for the various partitions*/
|
|
WORD32 ai4_part_id[TOT_NUM_PARTS + 1];
|
|
/** Indicates the total number of valid partitions*/
|
|
WORD32 i4_num_valid_parts;
|
|
|
|
/** Number of candidates to refine through*/
|
|
WORD32 i4_num_search_nodes;
|
|
|
|
/** Stores the satd at the end of fullpel refinement*/
|
|
WORD16 ai2_fullpel_satd[2][TOT_NUM_PARTS];
|
|
} mv_refine_ctxt_t;
|
|
|
|
typedef mv_refine_ctxt_t fullpel_refine_ctxt_t;
|
|
typedef mv_refine_ctxt_t subpel_refine_ctxt_t;
|
|
/**
|
|
******************************************************************************
|
|
* @struct hme_search_prms_t
|
|
* @brief All prms going to any fpel search
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** for explicit search, indicates which ref frm to search */
|
|
/** for implicit search, indicates the prediction direction for search */
|
|
S08 i1_ref_idx;
|
|
|
|
/** Blk size used for search, and for which the search is done */
|
|
BLK_SIZE_T e_blk_size;
|
|
|
|
/** Number of init candts being searched */
|
|
S32 i4_num_init_candts;
|
|
|
|
S32 i4_num_steps_post_refine;
|
|
|
|
/**
|
|
* For coarser searches, bigger refinement is done around each candt
|
|
* in these cases, this prm has start step
|
|
*/
|
|
S32 i4_start_step;
|
|
|
|
/** whether SATD to be used for srch */
|
|
S32 i4_use_satd;
|
|
|
|
/** if 1, we use recon frm for search (closed loop ) */
|
|
S32 i4_use_rec;
|
|
|
|
/** bitmask of active partitions */
|
|
S32 i4_part_mask;
|
|
|
|
/** x and y offset of blk w.r.t. pic start */
|
|
S32 i4_x_off;
|
|
S32 i4_y_off;
|
|
|
|
/**
|
|
* max number of iterations to search if early exit not hit
|
|
* relevant only for coarser searches
|
|
*/
|
|
S32 i4_max_iters;
|
|
|
|
/** pointer to str holding all results for this blk */
|
|
search_results_t *ps_search_results;
|
|
|
|
/** pts to str having all search candt with refinement info */
|
|
search_candt_t *ps_search_candts;
|
|
/** pts to str having valid mv range info for this blk */
|
|
range_prms_t *aps_mv_range[MAX_NUM_REF];
|
|
/** cost compute fxnptr */
|
|
PF_MV_COST_FXN pf_mv_cost_compute;
|
|
|
|
/** when this str is set up for full search, indicates step size for same */
|
|
S32 full_search_step;
|
|
|
|
/** stride ofinp buffer */
|
|
S32 i4_inp_stride;
|
|
|
|
/** x and y offset of cu w.r.t. ctb start, set to 0 for non enc layer */
|
|
S32 i4_cu_x_off;
|
|
S32 i4_cu_y_off;
|
|
|
|
/** base pointer to the de-duplicated search nodes */
|
|
search_node_t *ps_search_nodes;
|
|
|
|
/** number of de-duplicated nodes to be searched */
|
|
S32 i4_num_search_nodes;
|
|
|
|
fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt;
|
|
|
|
U32 au4_src_variance[TOT_NUM_PARTS];
|
|
|
|
S32 i4_alpha_stim_multiplier;
|
|
|
|
U08 u1_is_cu_noisy;
|
|
|
|
ULWORD64 *pu8_part_src_sigmaX;
|
|
ULWORD64 *pu8_part_src_sigmaXSquared;
|
|
|
|
} hme_search_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct hme_err_prms_t
|
|
* @brief This is input prms struct for SAD/SATD computation
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Ptr to input blk for which err computed */
|
|
U08 *pu1_inp;
|
|
|
|
U16 *pu2_inp;
|
|
|
|
/** Ptr to ref blk after adjusting for mv and coordinates in pic */
|
|
U08 *pu1_ref;
|
|
|
|
U16 *pu2_ref;
|
|
|
|
/** Stride of input buffer */
|
|
S32 i4_inp_stride;
|
|
/** Stride of ref buffer */
|
|
S32 i4_ref_stride;
|
|
/** Mask of active partitions. */
|
|
S32 i4_part_mask;
|
|
/** Mask of active grid pts. Refer to GRID_PT_T enum for bit posns */
|
|
S32 i4_grid_mask;
|
|
/**
|
|
* Pointer to SAD Grid where SADs for each partition are stored.
|
|
* The layout is as follows: If there are M total partitions
|
|
* and N active pts in the grid, then the first N results contain
|
|
* first partition, e.g. 2Nx2N. Next N results contain 2nd partitino
|
|
* sad, e.g. 2NxN_T. Totally we have MxN results.
|
|
* Note: The active partition count may be lesser than M, still we
|
|
* have results for M partitions
|
|
*/
|
|
S32 *pi4_sad_grid;
|
|
|
|
/** Pointer to TU_SPLIT grid flags */
|
|
S32 *pi4_tu_split_flags;
|
|
|
|
/** Pointer to the Child's satd cost */
|
|
S32 *pi4_child_cost;
|
|
|
|
/** pointer to the child'd TU_split flags */
|
|
S32 *pi4_child_tu_split_flags;
|
|
|
|
/** pointer to the child'd TU_early_cbf flags */
|
|
S32 *pi4_child_tu_early_cbf;
|
|
|
|
/** Pointer to TU early CBF flags */
|
|
S32 *pi4_tu_early_cbf;
|
|
|
|
/** pointer to the early cbf thresholds */
|
|
S32 *pi4_tu_early_cbf_threshold;
|
|
|
|
/** store the DC value */
|
|
S32 i4_dc_val;
|
|
|
|
/** Block width and ht of the block being evaluated for SAD */
|
|
S32 i4_blk_wd;
|
|
S32 i4_blk_ht;
|
|
|
|
/**
|
|
* Array of valid partition ids. E.g. if 2 partitions active,
|
|
* then there will be 3 entries, 3rd entry being -1
|
|
*/
|
|
S32 *pi4_valid_part_ids;
|
|
/** Step size of the grid */
|
|
S32 i4_step;
|
|
|
|
/* Number of partitions */
|
|
S32 i4_num_partitions;
|
|
|
|
/** Store the tu_spli_flag cost */
|
|
S32 i4_tu_split_cost;
|
|
|
|
/** The max_depth for inter tu_tree */
|
|
U08 u1_max_tr_depth;
|
|
|
|
U08 u1_max_tr_size;
|
|
|
|
/** Scratch memory for Doing hadamard */
|
|
U08 *pu1_wkg_mem;
|
|
|
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
|
|
|
|
} err_prms_t;
|
|
|
|
typedef struct grid
|
|
{
|
|
WORD32 num_grids; /* Number of grid to work with */
|
|
WORD32 ref_buf_stride; /* Buffer stride of reference buffer */
|
|
WORD32
|
|
grd_sz_y_x; /* Packed 16 bits indicating grid spacing in y & x direction <--grid-size-y--><--grid-size-x--> */
|
|
UWORD8 **ppu1_ref_ptr; /* Center point for the grid search */
|
|
WORD32 *pi4_grd_mask; /* Mask indicating which grid points need to be evaluated */
|
|
hme_mv_t *p_mv; /* <--MVy--><--MVx--> */
|
|
WORD32 *p_ref_idx; /* Ref idx to which the grid is pointing */
|
|
} grid_ctxt_t;
|
|
|
|
typedef struct cand
|
|
{
|
|
hme_mv_t mv; /* MV corresponding to the candidate <--MVy--><--MVx--> */
|
|
WORD32 ref_idx; /* Ref idx corresponding to the candidate */
|
|
WORD32 grid_ix; /* Grid to which this candidate belongs */
|
|
UWORD8 *pu1_ref_ptr; /* Pointer to the candidate */
|
|
} cand_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct hme_ctb_prms_t
|
|
* @brief Parameters to create the CTB list, which is a tree structure
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/**
|
|
* These parameters cover number of input 16x16, 32x32 and 64x64 results
|
|
* and the number of output results that are mix of all above CU sizes.
|
|
* i4_num_kxk_unified_out is relevant only if we are sending multiple CU
|
|
* sizes for same region for RD Opt.
|
|
*/
|
|
S32 i4_num_16x16_in;
|
|
S32 i4_num_32x32_in;
|
|
S32 i4_num_32x32_unified_out;
|
|
S32 i4_num_64x64_in;
|
|
S32 i4_num_64x64_unified_out;
|
|
|
|
/** Pointers to results at differen CU sizes */
|
|
search_results_t *ps_search_results_16x16;
|
|
search_results_t *ps_search_results_32x32;
|
|
search_results_t *ps_search_results_64x64;
|
|
|
|
S32 i4_num_part_type;
|
|
|
|
/** Indicates whether we have split at 64x64 level */
|
|
S32 i4_cu_64x64_split;
|
|
/** Indicates whether each of the 32x32 CU is split */
|
|
S32 ai4_cu_32x32_split[4];
|
|
|
|
/** X and y offset of the CTB */
|
|
S32 i4_ctb_x;
|
|
S32 i4_ctb_y;
|
|
|
|
/**
|
|
* Memory manager for the CTB that is responsible for node allocation
|
|
* at a CU level
|
|
*/
|
|
ctb_mem_mgr_t *ps_ctb_mem_mgr;
|
|
|
|
/** Buffer manager that is responsible for memory allocation (pred bufs) */
|
|
buf_mgr_t *ps_buf_mgr;
|
|
} hme_ctb_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct result_upd_prms_t
|
|
* @brief Updation of results
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Cost compuatation function ponter */
|
|
PF_MV_COST_FXN pf_mv_cost_compute;
|
|
|
|
/** Points to the SAD grid updated during SAD compute fxn */
|
|
S32 *pi4_sad_grid;
|
|
|
|
/** Points to the TU_SPLIT grid updates duting the SATD TU REC fxn */
|
|
S32 *pi4_tu_split_flags;
|
|
|
|
/**
|
|
* This is the central mv of the grid. For e.g. if we have a 3x3 grid,
|
|
* this covers the central pt's mv in the grid.
|
|
*/
|
|
const search_node_t *ps_search_node_base;
|
|
|
|
/** Search results structure updated by the result update fxn */
|
|
search_results_t *ps_search_results;
|
|
|
|
/** List of active partitions, only these are processed and updated */
|
|
S32 *pi4_valid_part_ids;
|
|
|
|
/** Reference id for this candt and grid */
|
|
S08 i1_ref_idx;
|
|
|
|
/** Mask of active pts in the grid */
|
|
S32 i4_grid_mask;
|
|
|
|
/**
|
|
* For early exit reasons we may want to know the id of the least candt
|
|
* This will correspond to id of candt with least cost for 2Nx2N part,
|
|
* if multiple partitions enabled, or if 1 part enabled, it will be for
|
|
* id of candt of that partition
|
|
*/
|
|
S32 i4_min_id;
|
|
|
|
/** Step size of the grid */
|
|
S32 i4_step;
|
|
|
|
/** Mask of active partitions */
|
|
S32 i4_part_mask;
|
|
|
|
/** Min cost corresponding to min id */
|
|
S32 i4_min_cost;
|
|
|
|
/** Store the motion vectors in qpel unit*/
|
|
S16 i2_mv_x;
|
|
|
|
S16 i2_mv_y;
|
|
|
|
U08 u1_pred_lx;
|
|
|
|
subpel_refine_ctxt_t *ps_subpel_refine_ctxt;
|
|
|
|
/** Current candidate in the subpel refinement process*/
|
|
search_node_t *ps_search_node;
|
|
|
|
} result_upd_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct mv_grid_t
|
|
* @brief Grid of MVs storing results for a CTB and neighbours. For a CTB
|
|
* of size 64x64, we may store upto 16x16 mvs (one for each 4x4)
|
|
* along with 1 neighbour on each side. Valid only for encode layer
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** All the mvs in the grid */
|
|
search_node_t as_node[NUM_MVS_IN_CTB_GRID];
|
|
|
|
/** Stride of the grid */
|
|
S32 i4_stride;
|
|
|
|
/** Start offset of the 0,0 locn in CTB. */
|
|
S32 i4_start_offset;
|
|
} mv_grid_t;
|
|
|
|
typedef struct
|
|
{
|
|
/* centroid's (x, y) co-ordinates in Q8 format */
|
|
WORD32 i4_pos_x_q8;
|
|
|
|
WORD32 i4_pos_y_q8;
|
|
} centroid_t;
|
|
|
|
typedef struct
|
|
{
|
|
S16 min_x;
|
|
|
|
S16 min_y;
|
|
|
|
S16 max_x;
|
|
|
|
S16 max_y;
|
|
|
|
/* The cumulative sum of partition sizes of the mvs */
|
|
/* in this cluster */
|
|
S16 area_in_pixels;
|
|
|
|
S16 uni_mv_pixel_area;
|
|
|
|
S16 bi_mv_pixel_area;
|
|
|
|
mv_data_t as_mv[128];
|
|
|
|
U08 num_mvs;
|
|
|
|
/* Weighted average of all mvs in the cluster */
|
|
centroid_t s_centroid;
|
|
|
|
S08 ref_id;
|
|
|
|
S32 max_dist_from_centroid;
|
|
|
|
U08 is_valid_cluster;
|
|
|
|
} cluster_data_t;
|
|
|
|
typedef struct
|
|
{
|
|
cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_16x16];
|
|
|
|
U08 num_clusters;
|
|
|
|
U08 au1_num_clusters[MAX_NUM_REF];
|
|
|
|
S16 intra_mv_area;
|
|
|
|
S32 best_inter_cost;
|
|
|
|
} cluster_16x16_blk_t;
|
|
|
|
typedef struct
|
|
{
|
|
cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_32x32];
|
|
|
|
U08 num_clusters;
|
|
|
|
U08 au1_num_clusters[MAX_NUM_REF];
|
|
|
|
S16 intra_mv_area;
|
|
|
|
S08 best_uni_ref;
|
|
|
|
S08 best_alt_ref;
|
|
|
|
S32 best_inter_cost;
|
|
|
|
U08 num_refs;
|
|
|
|
U08 num_clusters_with_weak_sdi_density;
|
|
|
|
} cluster_32x32_blk_t;
|
|
|
|
typedef struct
|
|
{
|
|
cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_64x64];
|
|
|
|
U08 num_clusters;
|
|
|
|
U08 au1_num_clusters[MAX_NUM_REF];
|
|
|
|
S16 intra_mv_area;
|
|
|
|
S08 best_uni_ref;
|
|
|
|
S08 best_alt_ref;
|
|
|
|
S32 best_inter_cost;
|
|
|
|
U08 num_refs;
|
|
|
|
} cluster_64x64_blk_t;
|
|
|
|
typedef struct
|
|
{
|
|
cluster_16x16_blk_t *ps_16x16_blk;
|
|
|
|
cluster_32x32_blk_t *ps_32x32_blk;
|
|
|
|
cluster_64x64_blk_t *ps_64x64_blk;
|
|
|
|
cur_ctb_cu_tree_t *ps_cu_tree_root;
|
|
ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
|
|
S32 nodes_created_in_cu_tree;
|
|
|
|
S32 *pi4_blk_8x8_mask;
|
|
|
|
S32 blk_32x32_mask;
|
|
|
|
S32 sdi_threshold;
|
|
|
|
S32 i4_frame_qstep;
|
|
|
|
S32 i4_frame_qstep_multiplier;
|
|
|
|
U08 au1_is_16x16_blk_split[16];
|
|
|
|
S32 ai4_part_mask[16];
|
|
|
|
} ctb_cluster_info_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct hme_merge_prms_t
|
|
* @brief All parameters related to the merge process
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/**
|
|
* MV Range prms for the merged CU, this may have to be conservative
|
|
* in comparison to individual CUs
|
|
*/
|
|
range_prms_t *aps_mv_range[MAX_NUM_REF];
|
|
|
|
/** Pointers to search results of 4 children CUs to be merged */
|
|
search_results_t *ps_results_tl;
|
|
search_results_t *ps_results_tr;
|
|
search_results_t *ps_results_bl;
|
|
search_results_t *ps_results_br;
|
|
|
|
search_results_t *ps_results_grandchild;
|
|
|
|
/** Pointer to search results of the parent CU updated during merge */
|
|
search_results_t *ps_results_merge;
|
|
|
|
inter_cu_results_t *ps_8x8_cu_results;
|
|
|
|
/** Layer related context */
|
|
layer_ctxt_t *ps_layer_ctxt;
|
|
|
|
inter_ctb_prms_t *ps_inter_ctb_prms;
|
|
|
|
/**
|
|
* Points to an array of pointers. This array in turn points to
|
|
* the active mv grid in each direction (L0/L1)
|
|
*/
|
|
mv_grid_t **pps_mv_grid;
|
|
|
|
ctb_cluster_info_t *ps_cluster_info;
|
|
|
|
S08 *pi1_past_list;
|
|
|
|
S08 *pi1_future_list;
|
|
|
|
/** MV cost compute function */
|
|
PF_MV_COST_FXN pf_mv_cost_compute;
|
|
|
|
/** If segmentation info available for the parent block */
|
|
S32 i4_seg_info_avail;
|
|
|
|
/** Partition mask (if segmentation info available) */
|
|
S32 i4_part_mask;
|
|
|
|
/** Number of input results available for the merge proc from children*/
|
|
S32 i4_num_inp_results;
|
|
|
|
/** Whether SATD to be used for fpel searches */
|
|
S32 i4_use_satd;
|
|
|
|
/**
|
|
* Number of result planes valid for this merge process. For example,
|
|
* for fpel search in encode layer, we may have only L0 and L1
|
|
*/
|
|
S32 i4_num_ref;
|
|
|
|
/** Whether to use input or recon frm for search */
|
|
S32 i4_use_rec;
|
|
|
|
/** optimized mv grid flag : indicates if same mvgrid is used for both fpel and qpel
|
|
* This helps in copying fpel and qpel mv grid in pred context mv grid
|
|
*/
|
|
S32 i4_mv_grid_opt;
|
|
|
|
/** ctb size, typically 32 or 64 */
|
|
S32 log_ctb_size;
|
|
|
|
S32 i4_ctb_x_off;
|
|
|
|
S32 i4_ctb_y_off;
|
|
|
|
ME_QUALITY_PRESETS_T e_quality_preset;
|
|
|
|
S32 i4_num_pred_dir_actual;
|
|
|
|
U08 au1_pred_dir_searched[2];
|
|
|
|
S32 i4_alpha_stim_multiplier;
|
|
|
|
U08 u1_is_cu_noisy;
|
|
|
|
} hme_merge_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct mvbank_update_prms_t
|
|
* @brief Useful prms for updating the mv bank
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Number of references for which update to be done */
|
|
S32 i4_num_ref;
|
|
|
|
/**
|
|
* Search blk size that was used, if this is different from the blk
|
|
* size used in mv bank, then some replications or reductions may
|
|
* have to be done. E.g. if search blk size is 8x8 and result blk
|
|
* size is 4x4, then we have to update part NxN results to be
|
|
* used for update along with replication of 2Nx2N result in each
|
|
* of the 4 4x4 blk.
|
|
*/
|
|
BLK_SIZE_T e_search_blk_size;
|
|
|
|
/**
|
|
* Redundant prm as it reflects differences between search blk size
|
|
* and mv blk size if any
|
|
*/
|
|
S32 i4_shift;
|
|
|
|
S32 i4_num_active_ref_l0;
|
|
|
|
S32 i4_num_active_ref_l1;
|
|
|
|
S32 i4_num_results_to_store;
|
|
} mvbank_update_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct hme_subpel_prms_t
|
|
* @brief input and control prms for subpel refinement
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Relevant only for the case where we mix up results of diff cu sizes */
|
|
S32 i4_num_16x16_candts;
|
|
S32 i4_num_32x32_candts;
|
|
S32 i4_num_64x64_candts;
|
|
|
|
/** X and y offset of ctb w.r.t. start of pic */
|
|
S32 i4_ctb_x_off;
|
|
S32 i4_ctb_y_off;
|
|
|
|
/** Max Number of diamond steps for hpel and qpel refinement */
|
|
S32 i4_num_steps_hpel_refine;
|
|
S32 i4_num_steps_qpel_refine;
|
|
|
|
/** Whether SATD to be used or SAD to be used */
|
|
S32 i4_use_satd;
|
|
|
|
/**
|
|
* Input ptr. This is updated inside the subpel refinement by picking
|
|
* up correct adress
|
|
*/
|
|
void *pv_inp;
|
|
|
|
/**
|
|
* Pred buffer ptr, updated inside subpel refinement process. This
|
|
* location passed to the leaf fxn for copying the winner pred buf
|
|
*/
|
|
U08 *pu1_pred;
|
|
|
|
/** Interpolation fxn sent by top layer, should exact qpel be desired */
|
|
PF_INTERP_FXN_T pf_qpel_interp;
|
|
|
|
/** Working mem passed to leaf fxns */
|
|
U08 *pu1_wkg_mem;
|
|
|
|
/** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
|
|
S32 i4_pred_stride;
|
|
|
|
/** Type of input ; sizeof(UWORD8) => unidir refinement, else BIDIR */
|
|
S32 i4_inp_type;
|
|
|
|
/** Stride of input buf, updated inside subpel fxn */
|
|
S32 i4_inp_stride;
|
|
|
|
/**
|
|
* Pointer to the backward input ptr. This is also updated inside
|
|
* the subpel fxn. Needed for BIDIR refinement where modified inpu
|
|
* is 2I - P0
|
|
*/
|
|
S16 *pi2_inp_bck;
|
|
|
|
/** Indicates if CU merge uses SATD / SAD */
|
|
S32 i4_use_satd_cu_merge;
|
|
|
|
/** valid MV range in hpel and qpel units */
|
|
range_prms_t *aps_mv_range_hpel[MAX_NUM_REF];
|
|
range_prms_t *aps_mv_range_qpel[MAX_NUM_REF];
|
|
/** Relevant only for mixed CU cases */
|
|
search_results_t *ps_search_results_16x16;
|
|
search_results_t *ps_search_results_32x32;
|
|
search_results_t *ps_search_results_64x64;
|
|
|
|
/** Cost computatino fxn ptr */
|
|
PF_MV_COST_FXN pf_mv_cost_compute;
|
|
|
|
/** Whether BI mode is allowed for this pic (not allowed in P) */
|
|
S32 bidir_enabled;
|
|
|
|
/**
|
|
* Total number of references of current picture which is enocded
|
|
*/
|
|
U08 u1_num_ref;
|
|
|
|
/**
|
|
* Number of candidates used for refinement
|
|
* If given 1 candidate, then 2Nx2N is chosen as the best candidate
|
|
*/
|
|
U08 u1_max_subpel_candts;
|
|
|
|
U08 u1_subpel_candt_threshold;
|
|
|
|
ME_QUALITY_PRESETS_T e_me_quality_presets;
|
|
|
|
U08 u1_max_subpel_candts_2Nx2N;
|
|
U08 u1_max_subpel_candts_NxN;
|
|
|
|
U08 u1_max_num_subpel_refine_centers;
|
|
|
|
subpel_refine_ctxt_t *ps_subpel_refine_ctxt;
|
|
|
|
S32 i4_num_act_ref_l0;
|
|
|
|
S32 i4_num_act_ref_l1;
|
|
|
|
U08 u1_is_cu_noisy;
|
|
} hme_subpel_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct layers_descr_t
|
|
* @brief One such str exists for each ref and curr input in the me ctxt
|
|
* Has ctxt handles for all layers of a given POC
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Handles for all layers. Entry 0 is finest layer */
|
|
layer_ctxt_t *aps_layers[MAX_NUM_LAYERS];
|
|
} layers_descr_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct blk_ctb_attrs_t
|
|
* @brief The CTB is split into 16x16 blks. For each such blk, this str
|
|
* stores attributes of this blk w.r.t. ctb
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/**
|
|
* ID of the blk in the full ctb. Assuming the full ctb were coded,
|
|
* this indicates what is the blk num of this blk (in encode order)
|
|
* within the full ctb
|
|
*/
|
|
U08 u1_blk_id_in_full_ctb;
|
|
|
|
/** x and y coordinates of this blk w.r.t. ctb base */
|
|
U08 u1_blk_x;
|
|
U08 u1_blk_y;
|
|
/**
|
|
* Mask of 8x8 blks that are active. Bits 0-3 for blks 0-3 in raster order
|
|
* within a 16x16 blk. This will be 0xf in interiors and < 0xf at rt/bot
|
|
* boundaries or at bot rt corners, where we may not have full 16x16 blk
|
|
*/
|
|
U08 u1_blk_8x8_mask;
|
|
} blk_ctb_attrs_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct ctb_boundary_attrs_t
|
|
* @brief Depending on the location of ctb (rt boundary, bot boundary,
|
|
* bot rt corner, elsewhere) this picks out the appropriate
|
|
* attributes of the ctb
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/**
|
|
* 4 bit variable, one for each of the 4 possible 32x32s in a full ctb
|
|
* If any 32x32 is partially present / not present at boundaries, that
|
|
* bit posn will be 0
|
|
*/
|
|
U08 u1_merge_to_32x32_flag;
|
|
|
|
/**
|
|
* 1 bit flag indicating whether it is a complete ctb or not, and
|
|
* consequently whether it can be merged to a full 64x64
|
|
*/
|
|
U08 u1_merge_to_64x64_flag;
|
|
|
|
/** Number of valid 16x16 blks (includes those partially/fully present*/
|
|
U08 u1_num_blks_in_ctb;
|
|
|
|
/** 16 bit variable indicating whether the corresponding 16x16 is valid */
|
|
S32 cu_16x16_valid_flag;
|
|
|
|
/**
|
|
* For possible 16 16x16 blks in a CTB, we have one attribute str for
|
|
* every valid blk. Tightly packed structure. For example,
|
|
* 0 1 4 5
|
|
* 2 3 6 7
|
|
* 8 9 12 13
|
|
* 10 11 14 15
|
|
* Assuming the ctb width is only 48, blks 5,7,13,15 are invalid
|
|
* Then We store attributes in the order: 0,1,2,3,4,6,8,9,10,11,12,14
|
|
*/
|
|
blk_ctb_attrs_t as_blk_attrs[16];
|
|
} ctb_boundary_attrs_t;
|
|
|
|
typedef struct
|
|
{
|
|
S32 sdi;
|
|
|
|
S32 ref_idx;
|
|
|
|
S32 cluster_id;
|
|
} outlier_data_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct coarse_dyn_range_prms_t
|
|
* @brief The parameters for Dyn. Search Range in coarse ME
|
|
******************************************************************************
|
|
*/
|
|
|
|
typedef struct
|
|
{
|
|
/* TO DO : size can be reduced, as not getting used for L0 */
|
|
|
|
/** Dynamical Search Range parameters per layer & ref_pic */
|
|
dyn_range_prms_t as_dyn_range_prms[MAX_NUM_LAYERS][MAX_NUM_REF];
|
|
|
|
/** Min y value Normalized per POC distance */
|
|
WORD16 i2_dyn_min_y_per_poc[MAX_NUM_LAYERS];
|
|
/** Max y value Normalized per POC distance */
|
|
WORD16 i2_dyn_max_y_per_poc[MAX_NUM_LAYERS];
|
|
|
|
} coarse_dyn_range_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct coarse_me_ctxt_t
|
|
* @brief Handle for Coarse ME
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Init search candts, 2 sets, one for 4x8 and one for 8x4 */
|
|
search_node_t s_init_search_node[MAX_INIT_CANDTS * 2];
|
|
|
|
/** For non enc layer, we search 8x8 blks and store results here */
|
|
search_results_t s_search_results_8x8;
|
|
/**
|
|
* Below arays store input planes for each ref pic.
|
|
* These are duplications, and are present within layer ctxts, but
|
|
* kept here together for faster indexing during search
|
|
*/
|
|
U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF];
|
|
|
|
/** Ptr to all layer context placeholder for curr pic encoded */
|
|
layers_descr_t *ps_curr_descr;
|
|
|
|
/** Ptr to all layer ctxt place holder for all pics */
|
|
layers_descr_t as_ref_descr[MAX_NUM_REF + 1 + NUM_BUFS_DECOMP_HME];
|
|
|
|
/**
|
|
* ME uses ref id lc to search multi ref. This TLU gets POC of
|
|
* the pic w.r.t. a given ref id
|
|
*/
|
|
S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF];
|
|
|
|
/** use this array to get disp num from ref_idx. Used for L1 traqo **/
|
|
S32 ai4_ref_idx_to_disp_num[MAX_NUM_REF];
|
|
|
|
/** POC of pic encoded just before current */
|
|
S32 i4_prev_poc;
|
|
|
|
/** POC of curret pic being encoded */
|
|
S32 i4_curr_poc;
|
|
|
|
/** Number of HME layers encode + non encode */
|
|
S32 num_layers;
|
|
|
|
/** Alloc time parameter, max ref frms used for this session */
|
|
S32 max_num_ref;
|
|
|
|
/**
|
|
* Number of layers that use explicit search. Explicit search means
|
|
* that each ref id is searched separately
|
|
*/
|
|
S32 num_layers_explicit_search;
|
|
|
|
/**
|
|
* Maximum number of results maintained at any refinement layer
|
|
* search. Important from mem alloc perspective
|
|
*/
|
|
S32 max_num_results;
|
|
|
|
/** Same as above but for coarse layer */
|
|
S32 max_num_results_coarse;
|
|
|
|
/** Array of flags, one per layer indicating hwether layer is encoded */
|
|
U08 u1_encode[MAX_NUM_LAYERS];
|
|
|
|
/** Init prms send by encoder during create time */
|
|
hme_init_prms_t s_init_prms;
|
|
|
|
/**
|
|
* Array look up created each frm, maintaining the corresponding
|
|
* layer descr look up for each ref id
|
|
*/
|
|
S32 a_ref_to_descr_id[MAX_NUM_REF];
|
|
|
|
/**
|
|
* Array lookup created each frame that maps a given ref id
|
|
* pertaining to unified list to a L0/L1 list. Encoder searches in terms
|
|
* of LC list or in other words does not differentiate between L0
|
|
* and L1 frames for most of search. Finally to report results to
|
|
* encoder, the ref id has to be remapped to suitable list
|
|
*/
|
|
S32 a_ref_idx_lc_to_l0[MAX_NUM_REF];
|
|
S32 a_ref_idx_lc_to_l1[MAX_NUM_REF];
|
|
|
|
/** Width and ht of each layer */
|
|
S32 a_wd[MAX_NUM_LAYERS];
|
|
S32 a_ht[MAX_NUM_LAYERS];
|
|
|
|
/** Histogram, one for each ref, allocated during craete time */
|
|
mv_hist_t *aps_mv_hist[MAX_NUM_REF];
|
|
|
|
/** Whether a given ref id in Lc list is past frm or future frm */
|
|
U08 au1_is_past[MAX_NUM_REF];
|
|
|
|
/** These are L0 and L1 lists, storing ref id Lc in them */
|
|
S08 ai1_past_list[MAX_NUM_REF];
|
|
S08 ai1_future_list[MAX_NUM_REF];
|
|
|
|
/** Number of past and future ref pics sent this frm */
|
|
S32 num_ref_past;
|
|
S32 num_ref_future;
|
|
|
|
void *pv_ext_frm_prms;
|
|
|
|
hme_frm_prms_t *ps_hme_frm_prms;
|
|
|
|
hme_ref_map_t *ps_hme_ref_map;
|
|
/**
|
|
* Scale factor of any given ref lc to another ref in Q8
|
|
* First MAX_NUM_REF entries are to scale an mv of ref id k
|
|
* w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k))
|
|
* Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0
|
|
* And so on
|
|
*/
|
|
S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF];
|
|
|
|
/** bits for a given ref id, in either list L0/L1 */
|
|
U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF];
|
|
|
|
/** Points to above: 1 ptr for each list */
|
|
U08 *apu1_ref_bits_tlu_lc[2];
|
|
|
|
/** number of b fraems between P, depends on number of hierarchy layers */
|
|
S32 num_b_frms;
|
|
|
|
/** Frame level qp passed every frame by ME's caller */
|
|
S32 frm_qstep;
|
|
|
|
/** Backup of frame parameters */
|
|
hme_frm_prms_t s_frm_prms;
|
|
|
|
/** Weighted prediction parameters for all references are stored
|
|
* Scratch buffers for populated widgted inputs are also stored in this
|
|
*/
|
|
wgt_pred_ctxt_t s_wt_pred;
|
|
|
|
/** Weighted pred enable flag */
|
|
S32 i4_wt_pred_enable_flag;
|
|
|
|
/* Pointer to hold 5 rows of best search node information */
|
|
search_node_t *aps_best_search_nodes_4x8_n_rows[MAX_NUM_REF];
|
|
|
|
search_node_t *aps_best_search_nodes_8x4_n_rows[MAX_NUM_REF];
|
|
|
|
/* Pointer to hold 5 rows of best search node information */
|
|
S16 *api2_sads_4x4_n_rows[MAX_NUM_REF];
|
|
|
|
/* Number of row buffers to store SADs and best search nodes */
|
|
S32 i4_num_row_bufs;
|
|
|
|
/* (HEVCE_MAX_HEIGHT>>1) assuming layer 1 is coarse layer and >>2 assuming block size is 4x4*/
|
|
S32 ai4_row_index[(HEVCE_MAX_HEIGHT >> 1) >> 2];
|
|
|
|
/* store L1 cost required for rate control for enc decision*/
|
|
S32 i4_L1_hme_best_cost;
|
|
|
|
/* store L1 cost required for modulation index calc*/
|
|
//S32 i4_L1_hme_best_cost_for_ref;
|
|
|
|
/* store L1 satd */
|
|
S32 i4_L1_hme_sad;
|
|
/* EIID: layer1 buffer to store the early inter intra costs and decisions */
|
|
/* pic_level pointer stored here */
|
|
ihevce_ed_blk_t *ps_ed_blk;
|
|
/* EIID: layer1 buffer to store the sad/cost information for rate control
|
|
or cu level qp modulation*/
|
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
|
|
/** Dynamical Search Range parameters */
|
|
coarse_dyn_range_prms_t s_coarse_dyn_range_prms;
|
|
|
|
/** Dependency manager for Row level sync in HME pass */
|
|
void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1];
|
|
|
|
/* pointer buffers for memory mapping */
|
|
UWORD8 *pu1_me_reverse_map_info;
|
|
|
|
/*blk count which has higher SAD*/
|
|
S32 i4_num_blks_high_sad;
|
|
|
|
/*num of 8x8 blocks in nearest poc*/
|
|
S32 i4_num_blks;
|
|
|
|
/* thread id of the current context */
|
|
WORD32 thrd_id;
|
|
|
|
/* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
|
|
void *pv_me_optimised_function_list;
|
|
|
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
|
|
|
|
} coarse_me_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct coarse_dyn_range_prms_t
|
|
* @brief The parameters for Dyn. Search Range in coarse ME
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Dynamical Search Range parameters per ref_pic */
|
|
dyn_range_prms_t as_dyn_range_prms[MAX_NUM_REF];
|
|
|
|
/** Min y value Normalized per POC distance */
|
|
WORD16 i2_dyn_min_y_per_poc;
|
|
/** Max y value Normalized per POC distance */
|
|
WORD16 i2_dyn_max_y_per_poc;
|
|
|
|
/* The number of ref. pic. actually used in L0. Used to communicate */
|
|
/* to ihevce_l0_me_frame_end and frame process */
|
|
WORD32 i4_num_act_ref_in_l0;
|
|
|
|
/*display number*/
|
|
WORD32 i4_display_num;
|
|
|
|
} l0_dyn_range_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief inter prediction (MC) context for me loop
|
|
******************************************************************************
|
|
*/
|
|
/*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
|
|
typedef struct
|
|
{
|
|
/** pointer to reference lists */
|
|
recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
|
|
|
|
/** scratch buffer for horizontal interpolation destination */
|
|
WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
|
|
|
|
/** scratch 16 bit buffer for interpolation in l0 direction */
|
|
WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
|
|
|
|
/** scratch 16 bit buffer for interpolation in l1 direction */
|
|
WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
|
|
|
|
/** Pointer to struct containing function pointers to
|
|
functions in the 'common' library' */
|
|
func_selector_t *ps_func_selector;
|
|
|
|
/** common denominator used for luma weights */
|
|
WORD32 i4_log2_luma_wght_denom;
|
|
|
|
/** common denominator used for chroma weights */
|
|
WORD32 i4_log2_chroma_wght_denom;
|
|
|
|
/** offset w.r.t frame start in horz direction (pels) */
|
|
WORD32 i4_ctb_frm_pos_x;
|
|
|
|
/** offset w.r.t frame start in vert direction (pels) */
|
|
WORD32 i4_ctb_frm_pos_y;
|
|
|
|
/* Bit Depth of Input */
|
|
WORD32 i4_bit_depth;
|
|
|
|
/* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
|
|
UWORD8 u1_chroma_array_type;
|
|
|
|
/** weighted_pred_flag */
|
|
WORD8 i1_weighted_pred_flag;
|
|
|
|
/** weighted_bipred_flag */
|
|
WORD8 i1_weighted_bipred_flag;
|
|
|
|
/** Structure to describe extra CTBs around frame due to search
|
|
range associated with distributed-mode. Entries are top, left,
|
|
right and bottom */
|
|
WORD32 ai4_tile_xtra_pel[4];
|
|
|
|
} inter_pred_me_ctxt_t;
|
|
|
|
typedef void FT_CALC_SATD_AND_RESULT(err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms);
|
|
|
|
typedef struct
|
|
{
|
|
FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_eq_1;
|
|
FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_9;
|
|
FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_17;
|
|
FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_eq_1;
|
|
FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_9;
|
|
FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_17;
|
|
FT_HAD_8X8_USING_4_4X4_R *pf_had_8x8_using_4_4x4_r;
|
|
FT_HAD_16X16_R *pf_had_16x16_r;
|
|
FT_HAD_32X32_USING_16X16 *pf_compute_32x32HAD_using_16x16;
|
|
} me_func_selector_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct me_frm_ctxt_t
|
|
* @brief Handle for ME
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Init search candts, 2 sets, one for 4x8 and one for 8x4 */
|
|
search_node_t s_init_search_node[MAX_INIT_CANDTS];
|
|
|
|
/** Motion Vectors array */
|
|
mv_t as_search_cand_mv[MAX_INIT_CANDTS];
|
|
|
|
/** Results of 16 16x16 blks within a CTB used in enc layer */
|
|
search_results_t as_search_results_16x16[16];
|
|
|
|
/** Results of 4 32x32 blks in a ctb for enc layer merge stage */
|
|
search_results_t as_search_results_32x32[4];
|
|
|
|
/** Same as above but fo 64x64 blk */
|
|
search_results_t s_search_results_64x64;
|
|
|
|
/**
|
|
* Below arays store input, 4 recon planes for each ref pic.
|
|
* These are duplications, and are present within layer ctxts, but
|
|
* kept here together for faster indexing during search
|
|
*/
|
|
|
|
U08 *apu1_list_rec_fxfy[MAX_NUM_LAYERS][MAX_NUM_REF];
|
|
U08 *apu1_list_rec_hxfy[MAX_NUM_LAYERS][MAX_NUM_REF];
|
|
U08 *apu1_list_rec_fxhy[MAX_NUM_LAYERS][MAX_NUM_REF];
|
|
U08 *apu1_list_rec_hxhy[MAX_NUM_LAYERS][MAX_NUM_REF];
|
|
U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF];
|
|
|
|
void *apv_list_dep_mngr[MAX_NUM_LAYERS][MAX_NUM_REF];
|
|
|
|
/** Ptr to all layer context placeholder for curr pic encoded */
|
|
layers_descr_t *ps_curr_descr;
|
|
|
|
/**
|
|
* ME uses ref id lc to search multi ref. This TLU gets POC of
|
|
* the pic w.r.t. a given ref id
|
|
*/
|
|
S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF];
|
|
|
|
/** POC of pic encoded just before current */
|
|
S32 i4_prev_poc;
|
|
|
|
/** POC of curret pic being encoded */
|
|
S32 i4_curr_poc;
|
|
|
|
/** Buf mgr for memory allocation */
|
|
buf_mgr_t s_buf_mgr;
|
|
|
|
/** MV Grid for L0 and L1, this is active one used */
|
|
mv_grid_t as_mv_grid[2];
|
|
|
|
/**
|
|
* MV grid for FPEL and QPEL maintained separately. Depending on the
|
|
* correct prediction res. being used, copy appropriate results to
|
|
* the as_mv_Grid structure
|
|
*/
|
|
mv_grid_t as_mv_grid_fpel[2];
|
|
mv_grid_t as_mv_grid_qpel[2];
|
|
|
|
/** Number of HME layers encode + non encode */
|
|
S32 num_layers;
|
|
|
|
/** Alloc time parameter, max ref frms used for this session */
|
|
S32 max_num_ref;
|
|
|
|
/**
|
|
* Number of layers that use explicit search. Explicit search means
|
|
* that each ref id is searched separately
|
|
*/
|
|
S32 num_layers_explicit_search;
|
|
|
|
/**
|
|
* Maximum number of results maintained at any refinement layer
|
|
* search. Important from mem alloc perspective
|
|
*/
|
|
S32 max_num_results;
|
|
|
|
/** Same as above but for coarse layer */
|
|
S32 max_num_results_coarse;
|
|
|
|
/** Array of flags, one per layer indicating hwether layer is encoded */
|
|
U08 u1_encode[MAX_NUM_LAYERS];
|
|
|
|
/* Parameters used for lambda computation */
|
|
frm_lambda_ctxt_t s_frm_lambda_ctxt;
|
|
|
|
/**
|
|
* Array look up created each frm, maintaining the corresponding
|
|
* layer descr look up for each ref id
|
|
*/
|
|
S32 a_ref_to_descr_id[MAX_NUM_REF];
|
|
|
|
/**
|
|
* Array lookup created each frame that maps a given ref id
|
|
* pertaining to unified list to a L0/L1 list. Encoder searches in terms
|
|
* of LC list or in other words does not differentiate between L0
|
|
* and L1 frames for most of search. Finally to report results to
|
|
* encoder, the ref id has to be remapped to suitable list
|
|
*/
|
|
S32 a_ref_idx_lc_to_l0[MAX_NUM_REF];
|
|
S32 a_ref_idx_lc_to_l1[MAX_NUM_REF];
|
|
|
|
/** Width and ht of each layer */
|
|
S32 i4_wd;
|
|
S32 i4_ht;
|
|
|
|
/** Histogram, one for each ref, allocated during craete time */
|
|
mv_hist_t *aps_mv_hist[MAX_NUM_REF];
|
|
|
|
/**
|
|
* Back input requiring > 8 bit precision, allocated during
|
|
* create time, storing 2I-P0 for Bidir refinement
|
|
*/
|
|
S16 *pi2_inp_bck;
|
|
ctb_boundary_attrs_t as_ctb_bound_attrs[NUM_CTB_BOUNDARY_TYPES];
|
|
|
|
/** Whether a given ref id in Lc list is past frm or future frm */
|
|
U08 au1_is_past[MAX_NUM_REF];
|
|
|
|
/** These are L0 and L1 lists, storing ref id Lc in them */
|
|
S08 ai1_past_list[MAX_NUM_REF];
|
|
S08 ai1_future_list[MAX_NUM_REF];
|
|
|
|
/** Number of past and future ref pics sent this frm */
|
|
S32 num_ref_past;
|
|
S32 num_ref_future;
|
|
|
|
/**
|
|
* Passed by encoder, stored as void to avoid header file inclusion
|
|
* of encoder wks into ME, these are frm prms passed by encoder,
|
|
* pointers to ctbanalyse_t and cu_analyse_t structures and the
|
|
* corresponding running ptrs
|
|
*/
|
|
|
|
ctb_analyse_t *ps_ctb_analyse_base;
|
|
cur_ctb_cu_tree_t *ps_cu_tree_base;
|
|
me_ctb_data_t *ps_me_ctb_data_base;
|
|
|
|
ctb_analyse_t *ps_ctb_analyse_curr_row;
|
|
cu_analyse_t *ps_cu_analyse_curr_row;
|
|
cur_ctb_cu_tree_t *ps_cu_tree_curr_row;
|
|
me_ctb_data_t *ps_me_ctb_data_curr_row;
|
|
|
|
/** Log2 of ctb size e.g. for 64 size, it will be 6 */
|
|
S32 log_ctb_size;
|
|
|
|
hme_frm_prms_t *ps_hme_frm_prms;
|
|
|
|
hme_ref_map_t *ps_hme_ref_map;
|
|
|
|
/**
|
|
* Scale factor of any given ref lc to another ref in Q8
|
|
* First MAX_NUM_REF entries are to scale an mv of ref id k
|
|
* w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k))
|
|
* Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0
|
|
* And so on
|
|
*/
|
|
S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF];
|
|
|
|
/** bits for a given ref id, in either list L0/L1 */
|
|
U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF];
|
|
|
|
/** Points to above: 1 ptr for each list */
|
|
U08 *apu1_ref_bits_tlu_lc[2];
|
|
|
|
/**
|
|
* Frame level base pointer to L0 IPE ctb analyze structures.
|
|
* This strucutres include the following
|
|
*
|
|
* 1. Best costs and modes at all levels of CTB (CU=8,16,32,64)
|
|
* 2. Recommended IPE intra CU sizes for this CTB size
|
|
* 3. Early intra/inter decision structures for all 8x8 blocks of CTB
|
|
* populated by L1-ME and L1-IPE
|
|
*
|
|
*/
|
|
ipe_l0_ctb_analyse_for_me_t *ps_ipe_l0_ctb_frm_base;
|
|
|
|
/** array of ptrs to intra cost per layer encoded, stored at 8x8 */
|
|
double *apd_intra_cost[MAX_NUM_LAYERS];
|
|
|
|
/** number of b fraems between P, depends on number of hierarchy layers */
|
|
S32 num_b_frms;
|
|
|
|
/** Frame level qp passed every frame by ME's caller */
|
|
S32 frm_qstep;
|
|
|
|
/** Frame level qp with higher precision : left shifted by 8 */
|
|
S32 qstep_ls8;
|
|
|
|
/** Backup of frame parameters */
|
|
hme_frm_prms_t s_frm_prms;
|
|
|
|
/** Weighted prediction parameters for all references are stored
|
|
* Scratch buffers for populated widgted inputs are also stored in this
|
|
*/
|
|
wgt_pred_ctxt_t s_wt_pred;
|
|
|
|
/** Weighted pred enable flag */
|
|
S32 i4_wt_pred_enable_flag;
|
|
|
|
/** Results of 16 16x16 blks within a CTB used in enc layer */
|
|
inter_cu_results_t as_cu16x16_results[16];
|
|
|
|
/** Results of 4 32x32 blks in a ctb for enc layer merge stage */
|
|
inter_cu_results_t as_cu32x32_results[4];
|
|
|
|
/** Same as above but fo 64x64 blk */
|
|
inter_cu_results_t s_cu64x64_results;
|
|
|
|
/** Results of 64 8x8 blks within a CTB used in enc layer */
|
|
inter_cu_results_t as_cu8x8_results[64];
|
|
|
|
WORD32 i4_is_prev_frame_reference;
|
|
|
|
rc_quant_t *ps_rc_quant_ctxt;
|
|
|
|
/** Dynamical Search Range parameters */
|
|
l0_dyn_range_prms_t as_l0_dyn_range_prms[NUM_SG_INTERLEAVED];
|
|
|
|
/** Dependency manager for Row level sync in L0 ME pass */
|
|
void *pv_dep_mngr_l0_me_sync;
|
|
|
|
/** Pointer to structure containing function pointers of encoder*/
|
|
me_func_selector_t *ps_func_selector;
|
|
|
|
cluster_16x16_blk_t *ps_blk_16x16;
|
|
|
|
cluster_32x32_blk_t *ps_blk_32x32;
|
|
|
|
cluster_64x64_blk_t *ps_blk_64x64;
|
|
|
|
ctb_cluster_info_t *ps_ctb_cluster_info;
|
|
|
|
fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt;
|
|
|
|
/* thread id of the current context */
|
|
WORD32 thrd_id;
|
|
|
|
/* dependency manager for froward ME sync */
|
|
void *pv_dep_mngr_encloop_dep_me;
|
|
WORD32 i4_l0me_qp_mod;
|
|
|
|
/*mc ctxt to reuse lume inter pred fucntion
|
|
for the purpose of TRAQO*/
|
|
inter_pred_me_ctxt_t s_mc_ctxt;
|
|
|
|
WORD32 i4_rc_pass;
|
|
/*pic type*/
|
|
WORD32 i4_pic_type;
|
|
|
|
WORD32 i4_temporal_layer;
|
|
|
|
WORD32 i4_count;
|
|
|
|
WORD32 i4_use_const_lamda_modifier;
|
|
|
|
double f_i_pic_lamda_modifier;
|
|
|
|
UWORD8 u1_is_curFrame_a_refFrame;
|
|
|
|
/* src_var related variables */
|
|
U32 au4_4x4_src_sigmaX[MAX_NUM_SIGMAS_4x4];
|
|
U32 au4_4x4_src_sigmaXSquared[MAX_NUM_SIGMAS_4x4];
|
|
} me_frm_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @struct me_ctxt_t
|
|
* @brief Handle for ME
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Init prms send by encoder during create time */
|
|
hme_init_prms_t s_init_prms;
|
|
|
|
/** Not used in encoder, relevant to test bench */
|
|
U08 *pu1_debug_out;
|
|
|
|
void *pv_ext_frm_prms;
|
|
|
|
/* Frame level ME ctxt */
|
|
me_frm_ctxt_t *aps_me_frm_prms[MAX_NUM_ME_PARALLEL];
|
|
|
|
/** Ptr to all layer ctxt place holder for all pics */
|
|
/** number of reference descriptors should be equal to max number of active references **/
|
|
layers_descr_t as_ref_descr[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1];
|
|
|
|
/* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
|
|
void *pv_me_optimised_function_list;
|
|
|
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
|
|
|
|
/* Pointer to Tile params base */
|
|
void *pv_tile_params_base;
|
|
|
|
} me_ctxt_t;
|
|
|
|
typedef struct
|
|
{
|
|
/** array of context for each thread */
|
|
coarse_me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
|
|
|
|
/** memtabs storage memory */
|
|
hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
|
|
|
|
/** Frame level parameters for ME */
|
|
hme_frm_prms_t s_frm_prms;
|
|
|
|
/** Holds all reference mapping */
|
|
hme_ref_map_t s_ref_map;
|
|
|
|
/** number of threads created run time */
|
|
WORD32 i4_num_proc_thrds;
|
|
|
|
/** Dependency manager for Row level sync in HME pass */
|
|
/* Note : Indexing should be like layer_id - 1 */
|
|
void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1];
|
|
/* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
|
|
void *pv_me_optimised_function_list;
|
|
|
|
ihevce_cmn_opt_func_t s_cmn_opt_func;
|
|
} coarse_me_master_ctxt_t;
|
|
|
|
typedef struct
|
|
{
|
|
/** array of context for each thread */
|
|
me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
|
|
|
|
/** memtabs storage memory */
|
|
hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
|
|
|
|
/** Frame level parameters for ME */
|
|
hme_frm_prms_t as_frm_prms[MAX_NUM_ME_PARALLEL];
|
|
|
|
/** Holds all reference mapping */
|
|
hme_ref_map_t as_ref_map[MAX_NUM_ME_PARALLEL];
|
|
|
|
/** number of threads created run time */
|
|
WORD32 i4_num_proc_thrds;
|
|
|
|
/** number of me frames running in parallel */
|
|
WORD32 i4_num_me_frm_pllel;
|
|
|
|
/** Pointer to structure containing function pointers of encoder*/
|
|
me_func_selector_t s_func_selector;
|
|
/* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
|
|
void *pv_me_optimised_function_list;
|
|
|
|
ihevce_cmn_opt_func_t s_cmn_opt_func;
|
|
|
|
/* Pointer to Tile params base */
|
|
void *pv_tile_params_base;
|
|
|
|
} me_master_ctxt_t;
|
|
|
|
typedef struct
|
|
{
|
|
S16 i2_mv_x;
|
|
|
|
S16 i2_mv_y;
|
|
|
|
U08 u1_ref_idx;
|
|
|
|
U32 au4_node_map[2 * MAP_Y_MAX];
|
|
|
|
} subpel_dedup_enabler_t;
|
|
|
|
typedef subpel_dedup_enabler_t hme_dedup_enabler_t;
|
|
|
|
typedef struct
|
|
{
|
|
layer_ctxt_t *ps_curr_layer;
|
|
|
|
layer_ctxt_t *ps_coarse_layer;
|
|
|
|
U08 *pu1_num_fpel_search_cands;
|
|
|
|
S32 *pi4_ref_id_lc_to_l0_map;
|
|
|
|
S32 *pi4_ref_id_lc_to_l1_map;
|
|
|
|
S32 i4_pos_x;
|
|
|
|
S32 i4_pos_y;
|
|
|
|
S32 i4_num_act_ref_l0;
|
|
|
|
S32 i4_num_act_ref_l1;
|
|
|
|
search_candt_t *ps_search_cands;
|
|
|
|
U08 u1_search_candidate_list_index;
|
|
|
|
S32 i4_max_num_init_cands;
|
|
|
|
U08 u1_pred_dir;
|
|
|
|
/* Indicates the position of the current predDir in the processing order of predDir */
|
|
U08 u1_pred_dir_ctr;
|
|
|
|
/* The following 4 flags apply exclusively to spatial candidates */
|
|
U08 u1_is_topRight_available;
|
|
|
|
U08 u1_is_topLeft_available;
|
|
|
|
U08 u1_is_top_available;
|
|
|
|
U08 u1_is_left_available;
|
|
|
|
S08 i1_default_ref_id;
|
|
|
|
S08 i1_alt_default_ref_id;
|
|
|
|
U08 u1_num_results_in_mvbank;
|
|
|
|
BLK_SIZE_T e_search_blk_size;
|
|
|
|
} fpel_srch_cand_init_data_t;
|
|
|
|
typedef struct
|
|
{
|
|
U08 *pu1_pred;
|
|
|
|
S32 i4_pred_stride;
|
|
|
|
U08 u1_pred_buf_array_id;
|
|
|
|
} hme_pred_buf_info_t;
|
|
|
|
/*****************************************************************************/
|
|
/* Typedefs */
|
|
/*****************************************************************************/
|
|
typedef void (*PF_SAD_FXN_T)(err_prms_t *);
|
|
|
|
typedef void (*PF_SAD_RESULT_FXN_T)(err_prms_t *, result_upd_prms_t *ps_result_prms);
|
|
|
|
typedef WORD32 (*PF_SAD_FXN_TU_REC)(
|
|
err_prms_t *,
|
|
WORD32 lambda,
|
|
WORD32 lamda_q_shift,
|
|
WORD32 i4_frm_qstep,
|
|
me_func_selector_t *ps_func_selector);
|
|
|
|
typedef void (*PF_RESULT_FXN_T)(result_upd_prms_t *);
|
|
|
|
typedef void (*PF_CALC_SAD_AND_RESULT)(
|
|
hme_search_prms_t *, wgt_pred_ctxt_t *, err_prms_t *, result_upd_prms_t *, U08 **, S32);
|
|
|
|
#endif /* _HME_DEFS_H_ */
|