3208 lines
96 KiB
C
3208 lines
96 KiB
C
/******************************************************************************
|
|
*
|
|
* Copyright (C) 2018 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*****************************************************************************
|
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
*/
|
|
/*!
|
|
******************************************************************************
|
|
* \file ihevce_enc_loop_structs.h
|
|
*
|
|
* \brief
|
|
* This file contains strcutures of enc_loop pass
|
|
*
|
|
* \date
|
|
* 18/09/2012
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
|
|
#ifndef _IHEVCE_ENC_LOOP_STRUCTS_H_
|
|
#define _IHEVCE_ENC_LOOP_STRUCTS_H_
|
|
|
|
#include "ihevc_macros.h"
|
|
|
|
extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
|
|
|
|
/*****************************************************************************/
|
|
/* Constant Macros */
|
|
/*****************************************************************************/
|
|
/** /breif 4x4 DST, 4x4, 8x8, 16x16, 32x32 */
|
|
#define NUM_TRANS_TYPES 5
|
|
#define INTRA_PLANAR 0
|
|
#define INTRA_DC 1
|
|
#define NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD 2
|
|
#define MAX_TU_IN_TU_EQ_DIV_2 4
|
|
#define MAX_MVP_LIST_CAND 2
|
|
#define MAX_COST 0x7ffffff
|
|
#define MAX_COST_64 0x7ffffffffffffff
|
|
#define NUM_32CU_AND_64CU_IN_CTB 5 /* 4 - 32x32 + 1 64x64*/
|
|
#define PING_PONG 2
|
|
#define MAX_SAO_RD_CAND 10
|
|
#define SCRATCH_BUF_STRIDE 80
|
|
|
|
/*****************************************************************************/
|
|
/* Function Macros */
|
|
/*****************************************************************************/
|
|
#define INTRA_ANGULAR(x) (x)
|
|
|
|
/** @breif max 30bit value */
|
|
#define MAX30 ((1 << 30) - 1)
|
|
|
|
/* @brief macro to clip a data to max of 30bits (assuming unsgined) */
|
|
#define CLIP30(x) ((x) > MAX30 ? MAX30 : (x))
|
|
|
|
/* @brief compute the (lambda * rate) with a qshift and clip result to 30bits */
|
|
#define COMPUTE_RATE_COST_CLIP30(r, l, qshift) ((WORD32)CLIP30((ULWORD64)((r) * (l)) >> (qshift)))
|
|
|
|
#define IHEVCE_INV_WT_PRED(inp, wt, off, shift) \
|
|
(((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
|
|
|
|
#define POPULATE_PU_STRUCT(ps_pu, mvx, mvy, offset_x, offset_y, wd, ht, ref_idx, pred_lx) \
|
|
{ \
|
|
(ps_pu)->b4_pos_x = (offset_x) >> 2; \
|
|
(ps_pu)->b4_pos_y = (offset_y) >> 2; \
|
|
(ps_pu)->b4_wd = ((wd) >> 2) - 1; \
|
|
(ps_pu)->b4_ht = ((ht) >> 2) - 1; \
|
|
(ps_pu)->b1_intra_flag = 0; \
|
|
(ps_pu)->b2_pred_mode = pred_lx; \
|
|
if(pred_lx) \
|
|
{ \
|
|
(ps_pu)->mv.i1_l0_ref_idx = -1; \
|
|
(ps_pu)->mv.i1_l1_ref_idx = ref_idx; \
|
|
(ps_pu)->mv.s_l1_mv.i2_mvx = mvx; \
|
|
(ps_pu)->mv.s_l1_mv.i2_mvy = mvy; \
|
|
} \
|
|
else \
|
|
{ \
|
|
(ps_pu)->mv.i1_l0_ref_idx = ref_idx; \
|
|
(ps_pu)->mv.i1_l1_ref_idx = -1; \
|
|
(ps_pu)->mv.s_l0_mv.i2_mvx = mvx; \
|
|
(ps_pu)->mv.s_l0_mv.i2_mvy = mvy; \
|
|
} \
|
|
}
|
|
|
|
#define GET_FRAME_QSTEP_FROM_QP(frame_qp, frame_qstep) \
|
|
{ \
|
|
double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 }; \
|
|
\
|
|
frame_qstep = (WORD32)((1 << ((frame_qp) / 6)) * q_steps[(frame_qp) % 6]); \
|
|
}
|
|
|
|
#define INITIALISE_MERGE_RESULT_STRUCT(ps_merge_data, pas_pu_results) \
|
|
{ \
|
|
WORD32 i, j, k; \
|
|
\
|
|
for(i = 0; i < TOT_NUM_PARTS; i++) \
|
|
{ \
|
|
(ps_merge_data)->s_pu_results.u1_num_results_per_part_l0[i] = 0; \
|
|
(ps_merge_data)->s_pu_results.u1_num_results_per_part_l1[i] = 0; \
|
|
} \
|
|
for(i = 0; i < 2; i++) \
|
|
{ \
|
|
for(j = 0; j < TOT_NUM_PARTS; j++) \
|
|
{ \
|
|
(ps_merge_data)->s_pu_results.aps_pu_results[i][j] = pas_pu_results[i][j]; \
|
|
for(k = 0; k < MAX_NUM_RESULTS_PER_PART_LIST; k++) \
|
|
{ \
|
|
pas_pu_results[i][j][k].i4_tot_cost = MAX_COST; \
|
|
pas_pu_results[i][j][k].pu.mv.i1_l0_ref_idx = -1; \
|
|
pas_pu_results[i][j][k].pu.mv.i1_l1_ref_idx = -1; \
|
|
} \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define POPULATE_CTB_PARAMS \
|
|
(ps_common_frm_prms, \
|
|
apu1_wt_inp, \
|
|
i4_ctb_x_off, \
|
|
i4_ctb_y_off, \
|
|
ppu1_pred, \
|
|
cu_size, \
|
|
ref_stride, \
|
|
bidir_enabled, \
|
|
num_refs, \
|
|
pps_rec_list_l0, \
|
|
pps_rec_list_l1, \
|
|
pu1_non_wt_inp, \
|
|
lambda, \
|
|
lambda_q_shift, \
|
|
wpred_log_wdc) \
|
|
{ \
|
|
WORD32 i, j; \
|
|
(ps_common_frm_prms)->i4_bidir_enabled = bidir_enabled; \
|
|
(ps_common_frm_prms)->i4_ctb_x_off = i4_ctb_x_off; \
|
|
(ps_common_frm_prms)->i4_ctb_y_off = i4_ctb_y_off; \
|
|
(ps_common_frm_prms)->i4_inp_stride = cu_size; \
|
|
(ps_common_frm_prms)->i4_lamda = lambda; \
|
|
(ps_common_frm_prms)->i4_pred_stride = cu_size; \
|
|
(ps_common_frm_prms)->i4_rec_stride = ref_stride; \
|
|
(ps_common_frm_prms)->pps_rec_list_l0 = pps_rec_list_l0; \
|
|
(ps_common_frm_prms)->pps_rec_list_l1 = pps_rec_list_l1; \
|
|
(ps_common_frm_prms)->ppu1_pred = ppu1_pred; \
|
|
(ps_common_frm_prms)->pu1_non_wt_inp = pu1_non_wt_inp; \
|
|
(ps_common_frm_prms)->pu1_wkg_mem = NULL; \
|
|
(ps_common_frm_prms)->u1_lamda_qshift = lambda_q_shift; \
|
|
(ps_common_frm_prms)->u1_num_ref = num_refs; \
|
|
(ps_common_frm_prms)->wpred_log_wdc = wpred_log_wdc; \
|
|
for(i = 0; i < 2; i++) \
|
|
{ \
|
|
for(j = 0; j < MAX_NUM_REF; j++) \
|
|
{ \
|
|
(ps_common_frm_prms)->apu1_wt_inp = (apu1_wt_inp)[i][j]; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define COMPUTE_MERGE_IDX_COST(merge_idx_0_model, merge_idx, max_merge_cand, lambda, cost) \
|
|
{ \
|
|
WORD32 cab_bits_q12 = 0; \
|
|
\
|
|
/* sanity checks */ \
|
|
ASSERT((merge_idx >= 0) && (merge_idx < max_merge_cand)); \
|
|
\
|
|
/* encode the merge idx only if required */ \
|
|
if(max_merge_cand > 1) \
|
|
{ \
|
|
WORD32 bin = (merge_idx > 0); \
|
|
\
|
|
/* bits for the context modelled first bin */ \
|
|
cab_bits_q12 += gau2_ihevce_cabac_bin_to_bits[merge_idx_0_model ^ bin]; \
|
|
\
|
|
/* bits for larged merge idx coded as bypass tunary */ \
|
|
if((max_merge_cand > 2) && (merge_idx > 0)) \
|
|
{ \
|
|
cab_bits_q12 += (MIN(merge_idx, (max_merge_cand - 2))) << CABAC_FRAC_BITS_Q; \
|
|
} \
|
|
\
|
|
cost = COMPUTE_RATE_COST_CLIP30( \
|
|
cab_bits_q12, lambda, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); \
|
|
} \
|
|
else \
|
|
{ \
|
|
cost = 0; \
|
|
} \
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
/* Typedefs */
|
|
/*****************************************************************************/
|
|
|
|
typedef FT_CALC_HAD_SATD_8BIT *pf_res_trans_luma_had_chroma;
|
|
|
|
/** \breif function pointer prototype for residue and transform enc_loop */
|
|
typedef UWORD32 (*pf_res_trans_chroma)(
|
|
UWORD8 *pu1_src,
|
|
UWORD8 *pu1_pred,
|
|
WORD32 *pi4_tmp,
|
|
WORD16 *pi2_dst,
|
|
WORD32 src_strd,
|
|
WORD32 pred_strd,
|
|
WORD32 dst_strd,
|
|
CHROMA_PLANE_ID_T e_chroma_plane);
|
|
|
|
/** \breif function pointer prototype for quantization and inv Quant for ssd
|
|
calc. for all transform sizes */
|
|
typedef WORD32 (*pf_quant_iquant_ssd)(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div, /* qpscaled / 6 */
|
|
WORD32 qp_rem, /* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 *pi4_quant_round_factor_0_1,
|
|
WORD32 *pi4_quant_round_factor_1_2,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
LWORD64 *pi8_cost);
|
|
|
|
/** \breif function pointer prototype for quantization and inv Quant for ssd
|
|
calc. for all transform sizes (in case of RDOQ + SBH) */
|
|
typedef WORD32 (*pf_quant_iquant_ssd_sbh)(
|
|
WORD16 *pi2_coeffs,
|
|
WORD16 *pi2_quant_coeff,
|
|
WORD16 *pi2_q_dst,
|
|
WORD16 *pi2_iq_dst,
|
|
WORD32 trans_size,
|
|
WORD32 qp_div, /* qpscaled / 6 */
|
|
WORD32 qp_rem, /* qpscaled % 6 */
|
|
WORD32 q_add,
|
|
WORD32 src_strd,
|
|
WORD32 dst_q_strd,
|
|
WORD32 dst_iq_strd,
|
|
UWORD8 *csbf,
|
|
WORD32 csbf_strd,
|
|
WORD32 *zero_col,
|
|
WORD32 *zero_row,
|
|
WORD16 *pi2_dequant_coeff,
|
|
WORD32 *pi4_cost,
|
|
WORD32 i4_scan_idx,
|
|
WORD32 i4_perform_rdoq);
|
|
|
|
/** \breif function pointer prototype for inverse transform and recon
|
|
for all transform sizes : Luma */
|
|
typedef void (*pf_it_recon)(
|
|
WORD16 *pi2_src,
|
|
WORD16 *pi2_tmp,
|
|
UWORD8 *pu1_pred,
|
|
UWORD8 *pu1_dst,
|
|
WORD32 src_strd,
|
|
WORD32 pred_strd,
|
|
WORD32 dst_strd,
|
|
WORD32 zero_cols,
|
|
WORD32 zero_rows);
|
|
|
|
/** \breif function pointer prototype for inverse transform and recon
|
|
for all transform sizes : Chroma */
|
|
typedef void (*pf_it_recon_chroma)(
|
|
WORD16 *pi2_src,
|
|
WORD16 *pi2_tmp,
|
|
UWORD8 *pu1_pred,
|
|
UWORD8 *pu1_dst,
|
|
WORD32 src_strd,
|
|
WORD32 pred_strd,
|
|
WORD32 dst_strd,
|
|
WORD32 zero_cols,
|
|
WORD32 zero_rows);
|
|
|
|
/** \breif function pointer prototype for luma sao. */
|
|
typedef void (*pf_sao_luma)(
|
|
UWORD8 *pu1_src,
|
|
WORD32 src_strd,
|
|
UWORD8 *pu1_src_left,
|
|
UWORD8 *pu1_src_top,
|
|
UWORD8 *pu1_src_top_left,
|
|
UWORD8 *pu1_src_top_right,
|
|
UWORD8 *pu1_src_bot_left,
|
|
UWORD8 *pu1_avail,
|
|
WORD8 *pi1_sao_offset,
|
|
WORD32 wd,
|
|
WORD32 ht);
|
|
|
|
/** \breif function pointer prototype for chroma sao. */
|
|
typedef void (*pf_sao_chroma)(
|
|
UWORD8 *pu1_src,
|
|
WORD32 src_strd,
|
|
UWORD8 *pu1_src_left,
|
|
UWORD8 *pu1_src_top,
|
|
UWORD8 *pu1_src_top_left,
|
|
UWORD8 *pu1_src_top_right,
|
|
UWORD8 *pu1_src_bot_left,
|
|
UWORD8 *pu1_avail,
|
|
WORD8 *pi1_sao_offset_u,
|
|
WORD8 *pi1_sao_offset_v,
|
|
WORD32 wd,
|
|
WORD32 ht);
|
|
|
|
/*****************************************************************************/
|
|
/* Enums */
|
|
/*****************************************************************************/
|
|
|
|
typedef enum
|
|
{
|
|
IP_FUNC_MODE_0 = 0,
|
|
IP_FUNC_MODE_1,
|
|
IP_FUNC_MODE_2,
|
|
IP_FUNC_MODE_3TO9,
|
|
IP_FUNC_MODE_10,
|
|
IP_FUNC_MODE_11TO17,
|
|
IP_FUNC_MODE_18_34,
|
|
IP_FUNC_MODE_19TO25,
|
|
IP_FUNC_MODE_26,
|
|
IP_FUNC_MODE_27TO33,
|
|
|
|
NUM_IP_FUNCS
|
|
|
|
} IP_FUNCS_T;
|
|
|
|
typedef enum
|
|
{
|
|
/* currently only cu and cu/2 modes are supported */
|
|
TU_EQ_CU = 0,
|
|
TU_EQ_CU_DIV2,
|
|
TU_EQ_SUBCU, /* only applicable for NXN mode at mincusize */
|
|
|
|
/* support for below modes needs to be added */
|
|
TU_EQ_CU_DIV4,
|
|
TU_EQ_CU_DIV8,
|
|
TU_EQ_CU_DIV16,
|
|
|
|
NUM_TU_WRT_CU,
|
|
|
|
} TU_SIZE_WRT_CU_T;
|
|
|
|
typedef enum
|
|
{
|
|
RDOPT_MODE = 0,
|
|
RDOPT_SKIP_MODE = 1,
|
|
|
|
NUM_CORE_CALL_MODES,
|
|
|
|
} CORE_FUNC_CALL_MODE_T;
|
|
|
|
typedef enum
|
|
{
|
|
ENC_LOOP_CTXT = 0,
|
|
ENC_LOOP_THRDS_CTXT,
|
|
ENC_LOOP_SCALE_MAT,
|
|
ENC_LOOP_RESCALE_MAT,
|
|
ENC_LOOP_TOP_LUMA,
|
|
ENC_LOOP_TOP_CHROMA,
|
|
ENC_LOOP_TOP_NBR4X4,
|
|
ENC_LOOP_RC_PARAMS, /* memory to dump rate control parameters by each thread for each bit-rate instance */
|
|
ENC_LOOP_QP_TOP_4X4,
|
|
ENC_LOOP_DEBLOCKING,
|
|
ENC_LOOP_422_CHROMA_INTRA_PRED,
|
|
ENC_LOOP_INTER_PRED,
|
|
ENC_LOOP_CHROMA_PRED_INTRA,
|
|
ENC_LOOP_REF_SUB_OUT,
|
|
ENC_LOOP_REF_FILT_OUT,
|
|
ENC_LOOP_CU_RECUR_LUMA_RECON,
|
|
ENC_LOOP_CU_RECUR_CHROMA_RECON,
|
|
ENC_LOOP_CU_RECUR_LUMA_PRED,
|
|
ENC_LOOP_CU_RECUR_CHROMA_PRED,
|
|
ENC_LOOP_LEFT_LUMA_DATA,
|
|
ENC_LOOP_LEFT_CHROMA_DATA,
|
|
ENC_LOOP_SAO,
|
|
ENC_LOOP_CU_COEFF_DATA,
|
|
ENC_LOOP_CU_RECUR_COEFF_DATA,
|
|
ENC_LOOP_CU_DEQUANT_DATA,
|
|
ENC_LOOP_RECON_DATA_STORE,
|
|
/* should always be the last entry */
|
|
NUM_ENC_LOOP_MEM_RECS
|
|
|
|
} ENC_LOOP_MEM_TABS_T;
|
|
|
|
/** This is for assigning the pred buiffers for luma (2 ping-pong) and
|
|
chroma(1) */
|
|
typedef enum
|
|
{
|
|
CU_ME_INTRA_PRED_LUMA_IDX0 = 0,
|
|
CU_ME_INTRA_PRED_LUMA_IDX1,
|
|
CU_ME_INTRA_PRED_CHROMA_IDX,
|
|
|
|
/* should be always the last entry */
|
|
NUM_CU_ME_INTRA_PRED_IDX
|
|
|
|
} CU_ME_INTRA_PRED_IDX_T;
|
|
|
|
/*****************************************************************************/
|
|
/* Structure */
|
|
/*****************************************************************************/
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Structure to store TU prms req. for enc_loop only
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Zero_col info. for the current TU Luma */
|
|
UWORD32 u4_luma_zero_col;
|
|
/** Zero_row info. for the current TU Luma */
|
|
UWORD32 u4_luma_zero_row;
|
|
|
|
/** Zero_col info. for the current TU Chroma Cb */
|
|
UWORD32 au4_cb_zero_col[2];
|
|
/** Zero_row info. for the current TU Chroma Cb */
|
|
UWORD32 au4_cb_zero_row[2];
|
|
/** Zero_col info. for the current TU Chroma Cr */
|
|
UWORD32 au4_cr_zero_col[2];
|
|
/** Zero_row info. for the current TU Chroma Cr */
|
|
UWORD32 au4_cr_zero_row[2];
|
|
|
|
/** bytes consumed by the luma ecd data */
|
|
WORD16 i2_luma_bytes_consumed;
|
|
/** bytes consumed by the Cb ecd data */
|
|
WORD16 ai2_cb_bytes_consumed[2];
|
|
/** bytes consumed by the Cr ecd data */
|
|
WORD16 ai2_cr_bytes_consumed[2];
|
|
|
|
/** flag to re-evaluate IQ and Coeff data of luma in the final_recon
|
|
function. If zero, uses the data from RDOPT cand. */
|
|
UWORD16 b1_eval_luma_iq_and_coeff_data : 1;
|
|
/** flag to re-evaluate IQ and Coeff data of chroma in the final_recon
|
|
function. If zero, uses the data from RDOPT cand. */
|
|
UWORD16 b1_eval_chroma_iq_and_coeff_data : 1;
|
|
|
|
/* TO DO : No support now, need to add. Always comapre ZERO_CBF cost */
|
|
/** Luma ZERO_CBF cost is compared with residue coding cost only if this
|
|
flag is enabled */
|
|
UWORD16 b1_eval_luma_zero_cbf_cost : 1;
|
|
/** Chroma ZERO_CBF cost is compared with residue coding cost only if this
|
|
flag is enabled */
|
|
UWORD16 b1_eval_chroma_zero_cbf_cost : 1;
|
|
|
|
/** Reserved to make WORD32 alignment */
|
|
UWORD16 b12_reserved : 12;
|
|
|
|
} tu_enc_loop_temp_prms_t;
|
|
|
|
typedef struct recon_datastore_t
|
|
{
|
|
/* 2 to store current and best */
|
|
void *apv_luma_recon_bufs[2];
|
|
|
|
/* 0 to store cur chroma mode recon */
|
|
/* 1 to store winning independent chroma mode with a single TU's recon */
|
|
/* 2 to store winning independent chroma mode with 4 TUs' recon */
|
|
void *apv_chroma_recon_bufs[3];
|
|
|
|
/* The following two arrays are used to store the ID's of the buffers */
|
|
/* where the winning recon is being stored */
|
|
/* For Luma buffers, the permissible values are 0, 1 and UCHAR_MAX */
|
|
/* For Chroma buffers, the permissible values are 0, 1, 2 and UCHAR_MAX */
|
|
/* The value 'UCHAR_MAX' indicates the absence of Recon for that particular TU */
|
|
UWORD8 au1_bufId_with_winning_LumaRecon[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
|
|
|
|
/* 2 - 2 Chroma planes */
|
|
/* 2 - 2 possible subTU's */
|
|
UWORD8 au1_bufId_with_winning_ChromaRecon[2][MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW][2];
|
|
|
|
WORD32 i4_lumaRecon_stride;
|
|
|
|
WORD32 i4_chromaRecon_stride;
|
|
|
|
UWORD8 au1_is_chromaRecon_available[3];
|
|
|
|
UWORD8 u1_is_lumaRecon_available;
|
|
|
|
} recon_datastore_t;
|
|
|
|
typedef struct enc_loop_cu_final_prms_t
|
|
{
|
|
recon_datastore_t s_recon_datastore;
|
|
|
|
/**
|
|
* Cu size of the current cu being processed
|
|
*/
|
|
UWORD8 u1_cu_size;
|
|
/**
|
|
* flags to indicate the final cu prediction mode
|
|
*/
|
|
UWORD8 u1_intra_flag;
|
|
|
|
/**
|
|
* flags to indicate Skip mode for CU
|
|
*/
|
|
UWORD8 u1_skip_flag;
|
|
|
|
/**
|
|
* number of tu in current cu for a given mode
|
|
* if skip then this value should be 1
|
|
*/
|
|
UWORD16 u2_num_tus_in_cu;
|
|
|
|
/**
|
|
* number of pu in current cu for a given mode
|
|
* if skip then this value should be 1
|
|
*/
|
|
UWORD16 u2_num_pus_in_cu;
|
|
|
|
/**
|
|
* total bytes produced in ECD data buffer
|
|
* if skip then this value should be 0
|
|
*/
|
|
WORD32 i4_num_bytes_ecd_data;
|
|
|
|
/**
|
|
* Partition mode of the best candidate
|
|
* if skip then this value should be SIZE_2Nx2N
|
|
* @sa PART_SIZE_E
|
|
*/
|
|
UWORD8 u1_part_mode;
|
|
|
|
/**
|
|
* indicates if inter cu has coded coeffs 1: coded, 0: not coded
|
|
* if skip then this value shoudl be ignored
|
|
*/
|
|
UWORD8 u1_is_cu_coded;
|
|
|
|
/**
|
|
* Chroma pred mode as signalled in bitstream
|
|
*/
|
|
UWORD8 u1_chroma_intra_pred_mode;
|
|
|
|
/**
|
|
* To store the best chroma mode for TU. Will be same for NxN case.
|
|
* Actual Chroma pred
|
|
*/
|
|
UWORD8 u1_chroma_intra_pred_actual_mode;
|
|
|
|
/**
|
|
* sad accumulated over all Tus of given CU
|
|
*/
|
|
UWORD32 u4_cu_sad;
|
|
|
|
/**
|
|
* sad accumulated over all Tus of given CU
|
|
*/
|
|
LWORD64 i8_cu_ssd;
|
|
|
|
/**
|
|
* open loop intra sad
|
|
*/
|
|
UWORD32 u4_cu_open_intra_sad;
|
|
|
|
/**
|
|
* header bits of cu estimated during RDO evaluation.
|
|
* Includes tu splits flags excludes cbf flags
|
|
*/
|
|
UWORD32 u4_cu_hdr_bits;
|
|
/**
|
|
* luma residual bits of a cu estimated during RDO evaluation.
|
|
*/
|
|
UWORD32 u4_cu_luma_res_bits;
|
|
|
|
/**
|
|
* chroma residual bits of a cu estimated during RDO evaluation.
|
|
*/
|
|
UWORD32 u4_cu_chroma_res_bits;
|
|
|
|
/**
|
|
* cbf bits of a cu estimated during RDO evaluation (considered as part of texture bits later)
|
|
*/
|
|
UWORD32 u4_cu_cbf_bits;
|
|
|
|
/**
|
|
* array of PU for current CU
|
|
* For Inter PUs this will contain the follwoing
|
|
* - merge flag
|
|
* - (MVD and reference indicies) or (Merge Index)
|
|
* - (if Cu is skipped then Merge index for skip
|
|
* will be in 1st PU entry in array)
|
|
* for intra PU only intra flag will be set to 1
|
|
*
|
|
*/
|
|
pu_t as_pu_enc_loop[NUM_PU_PARTS];
|
|
|
|
/**
|
|
* array of PU for chroma usage
|
|
* in case of Merge MVs and reference idx of the final candidate
|
|
* used by luma need sto be stored
|
|
* for intra PU this will not be used
|
|
*/
|
|
pu_t as_pu_chrm_proc[NUM_PU_PARTS];
|
|
|
|
/**
|
|
* array of colocated PU for current CU
|
|
* MV and Ref pic id should be stored in this
|
|
* for intra PU only intra flag will be set to 1
|
|
*/
|
|
pu_col_mv_t as_col_pu_enc_loop[NUM_INTER_PU_PARTS];
|
|
|
|
/** array to store the intra mode pred related params
|
|
* if nxn mode the all 4 lcoations will be used
|
|
*/
|
|
intra_prev_rem_flags_t as_intra_prev_rem[NUM_PU_PARTS];
|
|
|
|
/**
|
|
* array to store TU propeties of the each tu in a CU
|
|
*/
|
|
tu_enc_loop_out_t as_tu_enc_loop[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
|
|
|
|
/**
|
|
* array to store TU propeties (req. for enc_loop only and not for
|
|
* entropy) of the each tu in a CU
|
|
*/
|
|
tu_enc_loop_temp_prms_t as_tu_enc_loop_temp_prms[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
|
|
|
|
/**
|
|
* Neighbour flags stored for chroma reuse
|
|
*/
|
|
UWORD32 au4_nbr_flags[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
|
|
|
|
/**
|
|
* intra pred modes stored for chroma reuse
|
|
*/
|
|
UWORD8 au1_intra_pred_mode[4];
|
|
|
|
/**
|
|
* array for storing coeffs during RD opt stage at CU level.
|
|
* Luma and chroma together
|
|
*/
|
|
UWORD8 *pu1_cu_coeffs;
|
|
|
|
/**
|
|
* Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
|
|
*/
|
|
WORD32 i4_chrm_cu_coeff_strt_idx;
|
|
|
|
/**
|
|
* array for storing dequantized vals. during RD opt stage at CU level
|
|
* Luma and chroma together.
|
|
* Stride is assumed to be cu_size
|
|
* u-v interleaved storing is at TU level
|
|
*/
|
|
WORD16 *pi2_cu_deq_coeffs;
|
|
|
|
/**
|
|
* Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
|
|
*/
|
|
WORD32 i4_chrm_deq_coeff_strt_idx;
|
|
|
|
/**
|
|
* The total RDOPT cost of the CU for the best mode
|
|
*/
|
|
LWORD64 i8_best_rdopt_cost;
|
|
|
|
/**
|
|
* The current running RDOPT cost for the current mode
|
|
*/
|
|
LWORD64 i8_curr_rdopt_cost;
|
|
|
|
LWORD64 i8_best_distortion;
|
|
|
|
} enc_loop_cu_final_prms_t;
|
|
|
|
typedef struct
|
|
{
|
|
/** Current Cu chroma recon pointer in pic buffer */
|
|
UWORD8 *pu1_final_recon;
|
|
|
|
UWORD16 *pu2_final_recon;
|
|
|
|
/** Current Cu chroma source pointer in pic buffer */
|
|
UWORD8 *pu1_curr_src;
|
|
|
|
UWORD16 *pu2_curr_src;
|
|
|
|
/** Current CU chroma reocn buffer stride */
|
|
WORD32 i4_chrm_recon_stride;
|
|
|
|
/** Current CU chroma source buffer stride */
|
|
WORD32 i4_chrm_src_stride;
|
|
|
|
/** Current Cu chroma Left pointer for intra pred */
|
|
UWORD8 *pu1_cu_left;
|
|
|
|
UWORD16 *pu2_cu_left;
|
|
|
|
/** Left buffer stride */
|
|
WORD32 i4_cu_left_stride;
|
|
|
|
/** Current Cu chroma top pointer for intra pred */
|
|
UWORD8 *pu1_cu_top;
|
|
|
|
UWORD16 *pu2_cu_top;
|
|
|
|
/** Current Cu chroma top left pointer for intra pred */
|
|
UWORD8 *pu1_cu_top_left;
|
|
|
|
UWORD16 *pu2_cu_top_left;
|
|
|
|
} enc_loop_chrm_cu_buf_prms_t;
|
|
|
|
typedef struct
|
|
{
|
|
/** cost of the current satd cand */
|
|
WORD32 i4_cost;
|
|
|
|
/** tu size w.r.t to cu of the current satd cand
|
|
* @sa TU_SIZE_WRT_CU_T
|
|
*/
|
|
WORD8 i4_tu_depth;
|
|
|
|
/**
|
|
* access valid number of entries in this array based on u1_part_size
|
|
*/
|
|
UWORD8 au1_intra_luma_modes[NUM_PU_PARTS];
|
|
|
|
/** @remarks u1_part_size 2Nx2N or NxN */
|
|
UWORD8 u1_part_mode; /* @sa: PART_SIZE_E */
|
|
|
|
/** Flag to indicate whether current candidate needs to be evaluated */
|
|
UWORD8 u1_eval_flag;
|
|
|
|
} cu_intra_satd_out_t;
|
|
|
|
/** \brief cu level parameters for SATD / RDOPT function */
|
|
|
|
typedef struct
|
|
{
|
|
/** pointer to source luma pointer
|
|
* pointer will be pointing to CTB start location
|
|
* At CU level based on the CU position this pointer
|
|
* has to appropriately incremented
|
|
*/
|
|
UWORD8 *pu1_luma_src;
|
|
|
|
UWORD16 *pu2_luma_src;
|
|
|
|
/** pointer to source chroma pointer
|
|
* pointer will be pointing to CTB start location
|
|
* At CU level based on the CU position this pointer
|
|
* has to appropriately incremented
|
|
*/
|
|
UWORD8 *pu1_chrm_src;
|
|
|
|
UWORD16 *pu2_chrm_src;
|
|
|
|
/** pointer to recon luma pointer
|
|
* pointer will be pointing to CTB start location
|
|
* At CU level based on the CU position this pointer
|
|
* has to appropriately incremented
|
|
*/
|
|
UWORD8 *pu1_luma_recon;
|
|
|
|
UWORD16 *pu2_luma_recon;
|
|
|
|
/** pointer to recon chroma pointer
|
|
* pointer will be pointing to CTB start location
|
|
* At CU level based on the CU position this pointer
|
|
* has to appropriately incremented
|
|
*/
|
|
UWORD8 *pu1_chrm_recon;
|
|
|
|
UWORD16 *pu2_chrm_recon;
|
|
|
|
/*1st pass parallel dpb buffer pointers aimilar to the above*/
|
|
UWORD8 *pu1_luma_recon_src;
|
|
|
|
UWORD16 *pu2_luma_recon_src;
|
|
|
|
UWORD8 *pu1_chrm_recon_src;
|
|
|
|
UWORD16 *pu2_chrm_recon_src;
|
|
|
|
/** Pointer to Subpel Plane Buffer */
|
|
UWORD8 *pu1_sbpel_hxfy;
|
|
|
|
/** Pointer to Subpel Plane Buffer */
|
|
UWORD8 *pu1_sbpel_fxhy;
|
|
|
|
/** Pointer to Subpel Plane Buffer */
|
|
UWORD8 *pu1_sbpel_hxhy;
|
|
|
|
/** Luma source stride */
|
|
WORD32 i4_luma_src_stride;
|
|
|
|
/** chroma soruce stride */
|
|
WORD32 i4_chrm_src_stride;
|
|
|
|
/** Luma recon stride */
|
|
WORD32 i4_luma_recon_stride;
|
|
|
|
/** chroma recon stride */
|
|
WORD32 i4_chrm_recon_stride;
|
|
|
|
/** ctb size */
|
|
WORD32 i4_ctb_size;
|
|
|
|
/** current ctb postion horz */
|
|
WORD32 i4_ctb_pos;
|
|
|
|
/** number of PU finalized for curr CU */
|
|
WORD32 i4_num_pus_in_cu;
|
|
|
|
/** number of bytes consumed for current in ecd data buf */
|
|
WORD32 i4_num_bytes_cons;
|
|
|
|
UWORD8 u1_is_cu_noisy;
|
|
|
|
UWORD8 *pu1_is_8x8Blk_noisy;
|
|
|
|
} enc_loop_cu_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Pad inter pred recon context
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Pointer to Subpel Plane Buffer */
|
|
UWORD8 *pu1_sbpel_hxfy;
|
|
|
|
/** Pointer to Subpel Plane Buffer */
|
|
UWORD8 *pu1_sbpel_fxhy;
|
|
|
|
/** Pointer to Subpel Plane Buffer */
|
|
UWORD8 *pu1_sbpel_hxhy;
|
|
|
|
/** pointer to recon luma pointer
|
|
* pointer will be pointing to CTB start location
|
|
* At CU level based on the CU position this pointer
|
|
* has to appropriately incremented
|
|
*/
|
|
UWORD8 *pu1_luma_recon;
|
|
|
|
/** pointer to recon chroma pointer
|
|
* pointer will be pointing to CTB start location
|
|
* At CU level based on the CU position this pointer
|
|
* has to appropriately incremented
|
|
*/
|
|
UWORD8 *pu1_chrm_recon;
|
|
|
|
/*FOr recon source 1st pass starts*/
|
|
|
|
UWORD8 *pu1_luma_recon_src;
|
|
|
|
/** pointer to recon chroma pointer
|
|
* pointer will be pointing to CTB start location
|
|
* At CU level based on the CU position this pointer
|
|
* has to appropriately incremented
|
|
*/
|
|
UWORD8 *pu1_chrm_recon_src;
|
|
/*FOr recon source 1st pass ends */
|
|
/** Luma recon stride */
|
|
WORD32 i4_luma_recon_stride;
|
|
|
|
/** chroma recon stride */
|
|
WORD32 i4_chrm_recon_stride;
|
|
|
|
/** ctb size */
|
|
WORD32 i4_ctb_size;
|
|
|
|
/* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
|
|
UWORD8 u1_chroma_array_type;
|
|
|
|
} pad_interp_recon_frm_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief inter prediction (MC) context for enc loop
|
|
******************************************************************************
|
|
*/
|
|
/*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
|
|
typedef struct
|
|
{
|
|
/** pointer to reference lists */
|
|
recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
|
|
|
|
/** scratch buffer for horizontal interpolation destination */
|
|
WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
|
|
|
|
/** scratch 16 bit buffer for interpolation in l0 direction */
|
|
WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
|
|
|
|
/** scratch 16 bit buffer for interpolation in l1 direction */
|
|
WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
|
|
|
|
/** Pointer to struct containing function pointers to
|
|
functions in the 'common' library' */
|
|
func_selector_t *ps_func_selector;
|
|
|
|
/** common denominator used for luma weights */
|
|
WORD32 i4_log2_luma_wght_denom;
|
|
|
|
/** common denominator used for chroma weights */
|
|
WORD32 i4_log2_chroma_wght_denom;
|
|
|
|
/** offset w.r.t frame start in horz direction (pels) */
|
|
WORD32 i4_ctb_frm_pos_x;
|
|
|
|
/** offset w.r.t frame start in vert direction (pels) */
|
|
WORD32 i4_ctb_frm_pos_y;
|
|
|
|
/* Bit Depth of Input */
|
|
WORD32 i4_bit_depth;
|
|
|
|
/* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
|
|
UWORD8 u1_chroma_array_type;
|
|
|
|
/** weighted_pred_flag */
|
|
WORD8 i1_weighted_pred_flag;
|
|
|
|
/** weighted_bipred_flag */
|
|
WORD8 i1_weighted_bipred_flag;
|
|
|
|
/** Structure to describe extra CTBs around frame due to search
|
|
range associated with distributed-mode. Entries are top, left,
|
|
right and bottom */
|
|
WORD32 ai4_tile_xtra_pel[4];
|
|
|
|
} inter_pred_ctxt_t;
|
|
/*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
|
|
|
|
typedef IV_API_CALL_STATUS_T (*PF_LUMA_INTER_PRED_PU)(
|
|
void *pv_inter_pred_ctxt,
|
|
pu_t *ps_pu,
|
|
void *pv_dst_buf,
|
|
WORD32 dst_stride,
|
|
WORD32 i4_flag_inter_pred_source);
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Motion predictor context structure
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** pointer to reference lists */
|
|
recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
|
|
|
|
/** pointer to the slice header */
|
|
slice_header_t *ps_slice_hdr;
|
|
|
|
/** pointer to SPS */
|
|
sps_t *ps_sps;
|
|
|
|
/** CTB x. In CTB unit*/
|
|
WORD32 i4_ctb_x;
|
|
|
|
/** CTB y. In CTB unit */
|
|
WORD32 i4_ctb_y;
|
|
|
|
/** Log2 Parallel Merge Level - 2 */
|
|
WORD32 i4_log2_parallel_merge_level_minus2;
|
|
|
|
/* Number of extra CTBs external to tile due to fetched search-range around Tile */
|
|
/* TOP, left, right and bottom */
|
|
WORD32 ai4_tile_xtra_ctb[4];
|
|
|
|
} mv_pred_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Deblocking and Boundary strength CTB level structure
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Array to store the packed BS values in horizontal direction */
|
|
UWORD32 au4_horz_bs[(MAX_CTB_SIZE >> 3) + 1];
|
|
|
|
/** Array to store the packed BS values in vertical direction */
|
|
UWORD32 au4_vert_bs[(MAX_CTB_SIZE >> 3) + 1];
|
|
|
|
/** CTB neighbour availability flags for deblocking */
|
|
UWORD8 u1_not_first_ctb_col_of_frame;
|
|
UWORD8 u1_not_first_ctb_row_of_frame;
|
|
|
|
} deblk_bs_ctb_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Deblocking and CTB level structure
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/**
|
|
* BS of the last vertical 4x4 column of previous CTB
|
|
*/
|
|
UWORD8 au1_prev_bs[MAX_CTB_SIZE >> 3];
|
|
|
|
/**
|
|
* BS of the last vertical 4x4 column of previous CTB
|
|
*/
|
|
UWORD8 au1_prev_bs_uv[MAX_CTB_SIZE >> 3];
|
|
|
|
/** pointer to top 4x4 ctb nbr structure; for accessing qp */
|
|
nbr_4x4_t *ps_top_ctb_nbr_4x4;
|
|
|
|
/** pointer to left 4x4 ctb nbr structure; for accessing qp */
|
|
nbr_4x4_t *ps_left_ctb_nbr_4x4;
|
|
|
|
/** pointer to current 4x4 ctb nbr structure; for accessing qp */
|
|
nbr_4x4_t *ps_cur_ctb_4x4;
|
|
|
|
/** max of 8 such contiguous bs to be computed for 64x64 ctb */
|
|
UWORD32 *pu4_bs_horz;
|
|
|
|
/** max of 8 such contiguous bs to be computed for 64x64 ctb */
|
|
UWORD32 *pu4_bs_vert;
|
|
|
|
/** ptr to current ctb luma pel in frame */
|
|
UWORD8 *pu1_ctb_y;
|
|
|
|
UWORD16 *pu2_ctb_y;
|
|
|
|
/** ptr to current ctb sp interleaved chroma pel in frame */
|
|
UWORD8 *pu1_ctb_uv;
|
|
|
|
UWORD16 *pu2_ctb_uv;
|
|
|
|
func_selector_t *ps_func_selector;
|
|
|
|
/** left nbr buffer stride in terms of 4x4 units */
|
|
WORD32 i4_left_nbr_4x4_strd;
|
|
|
|
/** current buffer stride in terms of 4x4 units */
|
|
WORD32 i4_cur_4x4_strd;
|
|
|
|
/** size in pels 16 / 32 /64 */
|
|
WORD32 i4_ctb_size;
|
|
|
|
/** stride for luma */
|
|
WORD32 i4_luma_pic_stride;
|
|
|
|
/** stride for chroma */
|
|
WORD32 i4_chroma_pic_stride;
|
|
|
|
/** boolean indicating if left ctb edge is to be deblocked or not */
|
|
WORD32 i4_deblock_left_ctb_edge;
|
|
|
|
/** boolean indicating if top ctb edge is to be deblocked or not */
|
|
WORD32 i4_deblock_top_ctb_edge;
|
|
|
|
/** beta offset index */
|
|
WORD32 i4_beta_offset_div2;
|
|
|
|
/** tc offset index */
|
|
WORD32 i4_tc_offset_div2;
|
|
|
|
/** chroma cb qp offset index */
|
|
WORD32 i4_cb_qp_indx_offset;
|
|
|
|
/** chroma cr qp offset index */
|
|
WORD32 i4_cr_qp_indx_offset;
|
|
|
|
WORD32 i4_bit_depth;
|
|
|
|
/* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
|
|
UWORD8 u1_chroma_array_type;
|
|
|
|
} deblk_ctb_params_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Stores the BS and Qp of a CTB row. For CTB-row level deblocking
|
|
******************************************************************************
|
|
*/
|
|
typedef struct deblk_ctbrow_prms
|
|
{
|
|
/**
|
|
* Refer to ihevce_enc_loop_get_mem_recs() and
|
|
* ihevce_enc_loop_init()for more info
|
|
* regarding memory allocation to each one below.
|
|
*/
|
|
|
|
/**
|
|
* Stores the vertical boundary strength of a CTB row.
|
|
*/
|
|
UWORD32 *pu4_ctb_row_bs_vert;
|
|
|
|
/**
|
|
* Storage is same as above. Contains horizontal BS.
|
|
*/
|
|
UWORD32 *pu4_ctb_row_bs_horz;
|
|
|
|
/**
|
|
* Pointer to the CTB row's Qp storage
|
|
*/
|
|
WORD8 *pi1_ctb_row_qp;
|
|
|
|
/**
|
|
* Stride of the pu1_ctb_row_qp_p buffer in WORD32 unit
|
|
*/
|
|
WORD32 u4_qp_buffer_stride;
|
|
|
|
/*
|
|
* Pointer to the memory which contains the Qp of
|
|
* top4x4 neighbour blocks for each CTB row.
|
|
* This memory is at frame level.
|
|
*/
|
|
WORD8 *api1_qp_top_4x4_ctb_row[MAX_NUM_ENC_LOOP_PARALLEL];
|
|
|
|
/*
|
|
* Stride of the above memory location.
|
|
* Values in one-stride correspondes to one CTB row.
|
|
*/
|
|
WORD32 u4_qp_top_4x4_buf_strd;
|
|
|
|
/*size of frm level qp buffer*/
|
|
WORD32 u4_qp_top_4x4_buf_size;
|
|
|
|
} deblk_ctbrow_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Entropy rd opt context for cabac bit estimation and RDO
|
|
******************************************************************************
|
|
*/
|
|
typedef struct rdopt_entropy_ctxt
|
|
{
|
|
/**
|
|
* array for entropy contexts during RD opt stage at CU level
|
|
* one best and one current is required
|
|
*/
|
|
entropy_context_t as_cu_entropy_ctxt[2];
|
|
|
|
/**
|
|
* init state of entropy context models during CU RD opt stage,
|
|
* required for saving and restoring the cabac states
|
|
*/
|
|
UWORD8 au1_init_cabac_ctxt_states[IHEVC_CAB_CTXT_END];
|
|
|
|
/*
|
|
* ptr to top row cu skip flags (1 bit per 8x8CU)
|
|
*/
|
|
UWORD8 *pu1_cu_skip_top_row;
|
|
|
|
/**
|
|
* Current entropy ctxt idx
|
|
*/
|
|
WORD32 i4_curr_buf_idx;
|
|
|
|
} rdopt_entropy_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief structure to save predicted data from Inter SATD stage to Inter RD opt stage
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/*Buffer to store the predicted data after motion compensation for merge and
|
|
* skip candidates.
|
|
* [2] Because for a given candidate we do motion compensation for 5 merge candidates.
|
|
* store the pred data after mc for the first 2 candidates and from 3rd candidate
|
|
* onwards, overwrite the data which has higher SATD cost.
|
|
*/
|
|
void *apv_pred_data[2];
|
|
|
|
/** Stride to store the predicted data
|
|
*/
|
|
WORD32 i4_pred_data_stride;
|
|
|
|
} merge_skip_pred_data_t;
|
|
/**
|
|
******************************************************************************
|
|
* @brief Structure to hold Rate control related parameters
|
|
* for each bit-rate instance and each thread
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/**
|
|
*frame level open loop intra sad
|
|
*
|
|
*/
|
|
LWORD64 i8_frame_open_loop_ssd;
|
|
|
|
/**
|
|
*frame level open loop intra sad
|
|
*
|
|
*/
|
|
UWORD32 u4_frame_open_loop_intra_sad;
|
|
/**
|
|
* frame level intra sad accumulator
|
|
*/
|
|
UWORD32 u4_frame_intra_sad;
|
|
|
|
/**
|
|
* frame level sad accumulator
|
|
*/
|
|
UWORD32 u4_frame_sad_acc;
|
|
|
|
/**
|
|
* frame level intra sad accumulator
|
|
*/
|
|
UWORD32 u4_frame_inter_sad_acc;
|
|
|
|
/**
|
|
* frame level inter sad accumulator
|
|
*/
|
|
UWORD32 u4_frame_intra_sad_acc;
|
|
|
|
/**
|
|
* frame level cost accumulator
|
|
*/
|
|
LWORD64 i8_frame_cost_acc;
|
|
|
|
/**
|
|
* frame level intra cost accumulator
|
|
*/
|
|
LWORD64 i8_frame_inter_cost_acc;
|
|
|
|
/**
|
|
* frame level inter cost accumulator
|
|
*/
|
|
LWORD64 i8_frame_intra_cost_acc;
|
|
|
|
/**
|
|
* frame level rdopt bits accumulator
|
|
*/
|
|
UWORD32 u4_frame_rdopt_bits;
|
|
|
|
/**
|
|
* frame level rdopt header bits accumulator
|
|
*/
|
|
UWORD32 u4_frame_rdopt_header_bits;
|
|
|
|
/* Sum the Qps of each 8*8 block in CU
|
|
* 8*8 block is considered as Min CU size possible as per standard is 8
|
|
* 0 corresponds to INTER and 1 corresponds to INTRA
|
|
*/
|
|
WORD32 i4_qp_normalized_8x8_cu_sum[2];
|
|
|
|
/* Count the number of 8x8 blocks in each CU type (INTER/INTRA)
|
|
* 0 corresponds to INTER and 1 corresponds to INTRA
|
|
*/
|
|
WORD32 i4_8x8_cu_sum[2];
|
|
|
|
/* SAD/Qscale accumulated over all CUs. CU size is inherently
|
|
* taken care in SAD
|
|
*/
|
|
LWORD64 i8_sad_by_qscale[2];
|
|
|
|
} enc_loop_rc_params_t;
|
|
/**
|
|
******************************************************************************
|
|
* @brief CU information structure. This is to store the
|
|
* CU final out after Recursion
|
|
******************************************************************************
|
|
*/
|
|
typedef struct ihevce_enc_cu_node_ctxt_t
|
|
{
|
|
/* CU params */
|
|
/** CU X position in terms of min CU (8x8) units */
|
|
UWORD8 b3_cu_pos_x : 3;
|
|
|
|
/** CU Y position in terms of min CU (8x8) units */
|
|
UWORD8 b3_cu_pos_y : 3;
|
|
|
|
/** reserved bytes */
|
|
UWORD8 b2_reserved : 2;
|
|
|
|
/** CU size 2N (width or height) in pixels */
|
|
UWORD8 u1_cu_size;
|
|
|
|
/**
|
|
* array for storing cu level final params for a given mode
|
|
* one best and one current is required
|
|
*/
|
|
enc_loop_cu_final_prms_t s_cu_prms;
|
|
|
|
/**
|
|
* array for storing cu level final params for a given mode
|
|
* one best and one current is required
|
|
*/
|
|
enc_loop_cu_final_prms_t *ps_cu_prms;
|
|
|
|
/* flag to indicate if current CU is the first
|
|
CU of the Quantisation group*/
|
|
UWORD32 b1_first_cu_in_qg : 1;
|
|
|
|
/** qp used during for CU
|
|
* @remarks :
|
|
*/
|
|
WORD8 i1_cu_qp;
|
|
|
|
} ihevce_enc_cu_node_ctxt_t;
|
|
|
|
typedef struct
|
|
{
|
|
WORD32 i4_sad;
|
|
|
|
WORD32 i4_mv_cost;
|
|
|
|
WORD32 i4_tot_cost;
|
|
|
|
WORD8 i1_ref_idx;
|
|
|
|
mv_t s_mv;
|
|
|
|
} block_merge_nodes_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief This struct is used for storing output of block merge
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
block_merge_nodes_t *aps_best_results[MAX_NUM_PARTS];
|
|
|
|
/* Contains the best uni dir for each partition type */
|
|
WORD32 ai4_best_uni_dir[MAX_NUM_PARTS];
|
|
|
|
/* Contains the best pred dir for each partition type */
|
|
WORD32 ai4_best_pred_dir[MAX_NUM_PARTS];
|
|
|
|
WORD32 i4_tot_cost;
|
|
|
|
PART_TYPE_T e_part_type;
|
|
} block_merge_results_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief This struct is used for storing output of block merge and also
|
|
* all of the intermediate results required
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
block_merge_results_t as_best_results[3 + 1][NUM_BEST_ME_OUTPUTS];
|
|
|
|
block_merge_nodes_t as_nodes[3][TOT_NUM_PARTS][NUM_BEST_ME_OUTPUTS];
|
|
|
|
WORD32 part_mask;
|
|
|
|
WORD32 num_results_per_part;
|
|
|
|
WORD32 num_best_results;
|
|
|
|
/**
|
|
* Overall best CU cost, while other entries store CU costs
|
|
* in single direction, this is best CU cost, where each
|
|
* partition cost is evaluated as best of uni/bi
|
|
*/
|
|
WORD32 best_cu_cost;
|
|
|
|
} block_merge_data_t;
|
|
/**
|
|
******************************************************************************
|
|
* @brief CU nbr information structure. This is to store the
|
|
* neighbour information for final reconstruction function
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/* Pointer to top-left nbr */
|
|
nbr_4x4_t *ps_topleft_nbr_4x4;
|
|
/* Pointer to left nbr */
|
|
nbr_4x4_t *ps_left_nbr_4x4;
|
|
/* Pointer to top nbr */
|
|
nbr_4x4_t *ps_top_nbr_4x4;
|
|
/* stride of left_nbr_4x4 */
|
|
WORD32 nbr_4x4_left_strd;
|
|
|
|
/* Pointer to CU top */
|
|
UWORD8 *pu1_cu_top;
|
|
|
|
UWORD16 *pu2_cu_top;
|
|
|
|
/* Pointer to CU top-left */
|
|
UWORD8 *pu1_cu_top_left;
|
|
|
|
UWORD16 *pu2_cu_top_left;
|
|
|
|
/* Pointer to CU left */
|
|
UWORD8 *pu1_cu_left;
|
|
|
|
UWORD16 *pu2_cu_left;
|
|
|
|
/* stride of left pointer */
|
|
WORD32 cu_left_stride;
|
|
} cu_nbr_prms_t;
|
|
|
|
/** Structure to save the flags required for Final mode Reconstruction
|
|
function. These flags are set based on quality presets and
|
|
the bit-rate we are working on */
|
|
typedef struct
|
|
{
|
|
/** Flag to indicate whether Luma pred data need to recomputed in the
|
|
final_recon function. Now disabled for all modes */
|
|
UWORD8 u1_eval_luma_pred_data;
|
|
|
|
/** Flag to indicate whether Chroma pred data need to recomputed in the
|
|
final_recon function. Now disabled for MedSpeed only */
|
|
UWORD8 u1_eval_chroma_pred_data;
|
|
|
|
/** Flag to indicate whether header data need to recomputed in the
|
|
final_recon function. Now disabled for all modes */
|
|
UWORD8 u1_eval_header_data;
|
|
|
|
UWORD8 u1_eval_recon_data;
|
|
} cu_final_recon_flags_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief structure to save pred data of ME cand. 1 ping-pong to store the
|
|
* the best and current luma cand. 1 buffer to store the best chroma pred
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Pointers to store luma pred data of me/intra cand.(2) and chroma(1) */
|
|
UWORD8 *pu1_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
|
|
|
|
UWORD16 *pu2_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
|
|
|
|
/** Stride to store the predicted data of me/intra cand.(2) and chroma(1) */
|
|
WORD32 ai4_pred_data_stride[NUM_CU_ME_INTRA_PRED_IDX];
|
|
/** Counter saying how many pointers are assigned */
|
|
WORD32 i4_pointer_count;
|
|
|
|
} cu_me_intra_pred_prms_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Chroma RDOPT context structure
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Storing the inverse quantized data (cb) for the special modes*/
|
|
WORD16 ai2_iq_data_cb[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
|
|
|
|
/** Storing the inverse quantized data (cr) for the special modes*/
|
|
WORD16 ai2_iq_data_cr[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
|
|
|
|
/** Storing the scan coeffs (cb) for the special modes*/
|
|
UWORD8 au1_scan_coeff_cb[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
|
|
|
|
/** Storing the scan coeffs (cb) for the special modes*/
|
|
UWORD8 au1_scan_coeff_cr[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
|
|
|
|
/** Max number of bytes filled in scan coeff data (cb) per TU*/
|
|
WORD32 ai4_num_bytes_scan_coeff_cb_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
|
|
|
|
/** Max number of bytes filled in scan coeff data (cr) per TU*/
|
|
WORD32 ai4_num_bytes_scan_coeff_cr_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
|
|
|
|
/** Stride of the iq buffer*/
|
|
WORD32 i4_iq_buff_stride;
|
|
|
|
/** Storing the pred data
|
|
The predicted data is always interleaved. Therefore the size of this array will be
|
|
((MAX_TU_SIZE * MAX_TU_SIZE) >> 2) * 2)*/
|
|
void *pv_pred_data;
|
|
|
|
/** Predicted data stride*/
|
|
WORD32 i4_pred_stride;
|
|
|
|
/** Storing the cbfs for each tu
|
|
For 1 tu case, only the 0th element will be valid*/
|
|
UWORD8 au1_cbf_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
|
|
|
|
/** Storing the cbfs for each tu
|
|
For 1 tu case, only the 0th element will be valid*/
|
|
UWORD8 au1_cbf_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
|
|
|
|
/** To store the cabac ctxt model updated by the RDOPT of best chroma mode
|
|
[0] : for 1 TU case, [1] : for 4 TU case */
|
|
UWORD8 au1_chrm_satd_updated_ctxt_models[IHEVC_CAB_CTXT_END];
|
|
|
|
/** Best SATD chroma mode, [0] : for 1 TU case (TU_EQ_CU) , [1] : for 4 TU case
|
|
Values : 0(PLANAR), 1(VERT), 2(HOR), 3(DC) chroma mode per each TU */
|
|
UWORD8 u1_best_cr_mode;
|
|
|
|
/** Best SATD chroma mode's RDOPT cost, [0] : for 1 TU case, [1] : for 4 TU case */
|
|
LWORD64 i8_chroma_best_rdopt;
|
|
|
|
/* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
|
|
/* This is done by adding the bits for signalling chroma mode (0-3) */
|
|
/* and subtracting the bits for chroma mode same as luma mode (4) */
|
|
LWORD64 i8_cost_to_encode_chroma_mode;
|
|
|
|
/** Best SATD chroma mode's tu bits, [0] : for 1 TU case, [1] : for 4 TU case */
|
|
WORD32 i4_chrm_tu_bits;
|
|
|
|
/** Storing the zero col values for each TU for cb*/
|
|
WORD32 ai4_zero_col_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
|
|
|
|
/** Storing the zero col values for each TU for cr*/
|
|
WORD32 ai4_zero_col_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
|
|
|
|
/** Storing the zero row values for each TU for cb*/
|
|
WORD32 ai4_zero_row_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
|
|
|
|
/** Storing the zero row values for each TU for cr*/
|
|
WORD32 ai4_zero_row_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
|
|
} chroma_intra_satd_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Chroma RDOPT context structure
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Chroma SATD context structure. It is an array of two to account for the TU_EQ_CU candidate
|
|
and the TU_EQ_CU_DIV2 candidate*/
|
|
chroma_intra_satd_ctxt_t as_chr_intra_satd_ctxt[NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD];
|
|
|
|
/** Chroma SATD has has to be evaluated only for the HIGH QUALITY */
|
|
UWORD8 u1_eval_chrm_satd;
|
|
|
|
/** Chroma RDOPT has to be evaluated only for the HIGH QUALITY / MEDIUM SPEED preset */
|
|
UWORD8 u1_eval_chrm_rdopt;
|
|
|
|
} ihevce_chroma_rdopt_ctxt_t;
|
|
|
|
typedef struct
|
|
{
|
|
inter_cu_results_t s_cu_results;
|
|
|
|
inter_pu_results_t s_pu_results;
|
|
} block_merge_output_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Structure to store the Merge/Skip Cand. for EncLoop
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** List of all merge/skip candidates to be evalauted (SATD/RDOPT) for
|
|
* this CU
|
|
*/
|
|
cu_inter_cand_t as_cu_inter_merge_skip_cand[MAX_NUM_CU_MERGE_SKIP_CAND];
|
|
|
|
/** number of merge/skip candidates
|
|
*/
|
|
UWORD8 u1_num_merge_cands;
|
|
|
|
UWORD8 u1_num_skip_cands;
|
|
|
|
UWORD8 u1_num_merge_skip_cands;
|
|
|
|
} cu_inter_merge_skip_t;
|
|
|
|
/** Structure to store the Mixed mode Cand. for EncLoop */
|
|
typedef struct
|
|
{
|
|
cu_inter_cand_t as_cu_data[MAX_NUM_MIXED_MODE_INTER_RDO_CANDS];
|
|
|
|
UWORD8 u1_num_mixed_mode_type0_cands;
|
|
|
|
UWORD8 u1_num_mixed_mode_type1_cands;
|
|
|
|
} cu_mixed_mode_inter_t;
|
|
|
|
typedef struct
|
|
{
|
|
/* +2 because an additional buffer is required for */
|
|
/* storing both cur and best during merge eval */
|
|
void *apv_inter_pred_data[MAX_NUM_INTER_RDO_CANDS + 4];
|
|
|
|
/* Bit field used to determine the indices of free bufs in 'apv_pred_data' buf array */
|
|
UWORD32 u4_is_buf_in_use;
|
|
|
|
/* Assumption is that the same stride is used for the */
|
|
/* entire set of buffers above and is equal to the */
|
|
/* CU size */
|
|
WORD32 i4_pred_stride;
|
|
|
|
} ihevce_inter_pred_buf_data_t;
|
|
/** Structure to store the Inter Cand. info in EncLoop */
|
|
typedef struct
|
|
{
|
|
cu_inter_cand_t *aps_cu_data[MAX_NUM_INTER_RDO_CANDS];
|
|
|
|
UWORD32 au4_cost[MAX_NUM_INTER_RDO_CANDS];
|
|
|
|
UWORD8 au1_pred_buf_idx[MAX_NUM_INTER_RDO_CANDS];
|
|
|
|
UWORD32 u4_src_variance;
|
|
|
|
UWORD8 u1_idx_of_worst_cost_in_cost_array;
|
|
|
|
UWORD8 u1_idx_of_worst_cost_in_pred_buf_array;
|
|
|
|
UWORD8 u1_num_inter_cands;
|
|
|
|
} inter_cu_mode_info_t;
|
|
typedef struct
|
|
{
|
|
/*Frame level base pointer of buffers for each ctb row to store the top pixels
|
|
*and top left pixel for the next ctb row.These buffers are common accross all threads
|
|
*/
|
|
UWORD8 *apu1_sao_src_frm_top_luma[MAX_NUM_ENC_LOOP_PARALLEL];
|
|
/*Ctb level pointer to buffer to store the top pixels
|
|
*and top left pixel for the next ctb row.These buffers are common accross all threads
|
|
*/
|
|
UWORD8 *pu1_curr_sao_src_top_luma;
|
|
/*Buffer to store the left boundary before
|
|
* doing sao on current ctb for the next ctb in the current row
|
|
*/
|
|
UWORD8 au1_sao_src_left_luma[MAX_CTB_SIZE];
|
|
/*Frame level base pointer of buffers for each ctb row to store the top pixels
|
|
*and top left pixel for the next ctb row.These buffers are common accross all threads
|
|
*/
|
|
UWORD8 *apu1_sao_src_frm_top_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
|
|
|
|
WORD32 i4_frm_top_chroma_buf_stride;
|
|
|
|
/*Ctb level pointer to buffer to store the top chroma pixels
|
|
*and top left pixel for the next ctb row.These buffers are common accross all threads
|
|
*/
|
|
UWORD8 *pu1_curr_sao_src_top_chroma;
|
|
|
|
/*Scratch buffer to store the left boundary before
|
|
* doing sao on current ctb for the next ctb in the current row
|
|
*/
|
|
UWORD8 au1_sao_src_left_chroma[MAX_CTB_SIZE * 2];
|
|
|
|
/**
|
|
* Luma recon buffer
|
|
*/
|
|
UWORD8 *pu1_frm_luma_recon_buf;
|
|
/**
|
|
* Chroma recon buffer
|
|
*/
|
|
UWORD8 *pu1_frm_chroma_recon_buf;
|
|
/**
|
|
* Luma recon buffer for curr ctb
|
|
*/
|
|
UWORD8 *pu1_cur_luma_recon_buf;
|
|
/**
|
|
* Chroma recon buffer for curr ctb
|
|
*/
|
|
UWORD8 *pu1_cur_chroma_recon_buf;
|
|
/**
|
|
* Luma src buffer
|
|
*/
|
|
UWORD8 *pu1_frm_luma_src_buf;
|
|
/**
|
|
* Chroma src buffer
|
|
*/
|
|
UWORD8 *pu1_frm_chroma_src_buf;
|
|
/**
|
|
* Luma src(input yuv) buffer for curr ctb
|
|
*/
|
|
UWORD8 *pu1_cur_luma_src_buf;
|
|
/**
|
|
* Chroma src buffer for curr ctb
|
|
*/
|
|
UWORD8 *pu1_cur_chroma_src_buf;
|
|
/* Left luma scratch buffer required for sao RD optimisation*/
|
|
UWORD8 au1_left_luma_scratch[MAX_CTB_SIZE];
|
|
|
|
/* Left chroma scratch buffer required for sao RD optimisation*/
|
|
/* Min size required= MAX_CTB_SIZE/2 * 2
|
|
* Multiplied by 2 because size reuired is MAX_CTB_SIZE/2 each for U and V
|
|
*/
|
|
UWORD8 au1_left_chroma_scratch[MAX_CTB_SIZE * 2];
|
|
|
|
/* Top luma scratch buffer required for sao RD optimisation*/
|
|
UWORD8 au1_top_luma_scratch[MAX_CTB_SIZE + 2]; // +1 for top left pixel and +1 for top right
|
|
|
|
/* Top chroma scratch buffer required for sao RD optimisation*/
|
|
UWORD8 au1_top_chroma_scratch[MAX_CTB_SIZE + 4]; // +2 for top left pixel and +2 for top right
|
|
|
|
/* Scratch buffer to store the sao'ed output during sao RD optimisation*/
|
|
/* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
|
|
MAX_CTB _SIZE + 4*/
|
|
UWORD8 au1_sao_luma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
|
|
|
|
/* Scratch buffer to store the sao'ed output during sao RD optimisation*/
|
|
/* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
|
|
MAX_CTB _SIZE + 4*/
|
|
UWORD8 au1_sao_chroma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
|
|
|
|
/**
|
|
* CTB size
|
|
*/
|
|
WORD32 i4_ctb_size;
|
|
/**
|
|
* Luma recon buffer stride
|
|
*/
|
|
WORD32 i4_frm_luma_recon_stride;
|
|
/**
|
|
* Chroma recon buffer stride
|
|
*/
|
|
WORD32 i4_frm_chroma_recon_stride;
|
|
/**
|
|
* Luma recon buffer stride for curr ctb
|
|
*/
|
|
WORD32 i4_cur_luma_recon_stride;
|
|
/**
|
|
* Chroma recon buffer stride for curr ctb
|
|
*/
|
|
WORD32 i4_cur_chroma_recon_stride;
|
|
/**
|
|
* Luma src buffer stride
|
|
*/
|
|
WORD32 i4_frm_luma_src_stride;
|
|
/**
|
|
* Chroma src buffer stride
|
|
*/
|
|
WORD32 i4_frm_chroma_src_stride;
|
|
|
|
WORD32 i4_frm_top_luma_buf_stride;
|
|
/**
|
|
* Luma src buffer stride for curr ctb
|
|
*/
|
|
WORD32 i4_cur_luma_src_stride;
|
|
/**
|
|
* Chroma src buffer stride for curr ctb
|
|
*/
|
|
WORD32 i4_cur_chroma_src_stride;
|
|
|
|
/* Top luma buffer size */
|
|
WORD32 i4_top_luma_buf_size;
|
|
|
|
/* Top Chroma buffer size */
|
|
WORD32 i4_top_chroma_buf_size;
|
|
|
|
/*** Number of CTB units **/
|
|
WORD32 i4_num_ctb_units;
|
|
|
|
/**
|
|
* CTB x pos
|
|
*/
|
|
WORD32 i4_ctb_x;
|
|
/**
|
|
* CTB y pos
|
|
*/
|
|
WORD32 i4_ctb_y;
|
|
/* SAO block width*/
|
|
WORD32 i4_sao_blk_wd;
|
|
|
|
/* SAO block height*/
|
|
WORD32 i4_sao_blk_ht;
|
|
|
|
/* Last ctb row flag*/
|
|
WORD32 i4_is_last_ctb_row;
|
|
|
|
/* Last ctb col flag*/
|
|
WORD32 i4_is_last_ctb_col;
|
|
|
|
/* CTB aligned width */
|
|
UWORD32 u4_ctb_aligned_wd;
|
|
|
|
/* Number of ctbs in a row*/
|
|
UWORD32 u4_num_ctbs_horz;
|
|
|
|
UWORD32 u4_num_ctbs_vert;
|
|
/**
|
|
* Closed loop SSD Lambda
|
|
* This is multiplied with bits for RD cost computations in SSD mode
|
|
* This is represented in q format with shift of LAMBDA_Q_SHIFT
|
|
*/
|
|
LWORD64 i8_cl_ssd_lambda_qf;
|
|
|
|
/**
|
|
* Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
|
|
* This is multiplied with bits for RD cost computations in SSD mode
|
|
* This is represented in q format with shift of LAMBDA_Q_SHIFT
|
|
*/
|
|
LWORD64 i8_cl_ssd_lambda_chroma_qf;
|
|
/**
|
|
* Pointer to current PPS
|
|
*/
|
|
pps_t *ps_pps; //not used currently
|
|
/**
|
|
* Pointer to current SPS
|
|
*/
|
|
sps_t *ps_sps;
|
|
|
|
/**
|
|
* Pointer to current slice header structure
|
|
*/
|
|
slice_header_t *ps_slice_hdr;
|
|
/**
|
|
* Pointer to current frame ctb out array of structures
|
|
*/
|
|
ctb_enc_loop_out_t *ps_ctb_out;
|
|
/**
|
|
* context for cabac bit estimation used during rdopt stage
|
|
*/
|
|
rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt;
|
|
/**
|
|
* Pointer to sao_enc_t for the current ctb
|
|
*/
|
|
sao_enc_t *ps_sao;
|
|
/*
|
|
* Pointer to an array to store the sao information of the top ctb
|
|
* This is required for to decide top merge
|
|
*/
|
|
sao_enc_t *aps_frm_top_ctb_sao[MAX_NUM_ENC_LOOP_PARALLEL];
|
|
|
|
/*
|
|
* Pointer to structure to store the sao parameters of (x,y)th ctb
|
|
* for top merge of (x,y+1)th ctb
|
|
*/
|
|
sao_enc_t *ps_top_ctb_sao;
|
|
|
|
/* structure to store the sao parameters of (x,y)th ctb for
|
|
* the left merge of (x+1,y)th ctb
|
|
*/
|
|
sao_enc_t s_left_ctb_sao;
|
|
|
|
/* Array of structures for SAO RDO candidates*/
|
|
sao_enc_t as_sao_rd_cand[MAX_SAO_RD_CAND];
|
|
|
|
/** array of function pointers for luma sao */
|
|
pf_sao_luma apf_sao_luma[4];
|
|
|
|
/** array of function pointers for chroma sao */
|
|
pf_sao_chroma apf_sao_chroma[4];
|
|
|
|
/* Flag to do SAO luma and chroma filtering*/
|
|
WORD8 i1_slice_sao_luma_flag;
|
|
|
|
WORD8 i1_slice_sao_chroma_flag;
|
|
|
|
#if DISABLE_SAO_WHEN_NOISY
|
|
ctb_analyse_t *ps_ctb_data;
|
|
|
|
WORD32 i4_ctb_data_stride;
|
|
#endif
|
|
|
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
|
|
|
|
} sao_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Encode loop module context structure
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
|
|
void *pv_err_func_selector;
|
|
#endif
|
|
|
|
/**
|
|
* Quality preset for comtrolling numbe of RD opt cand
|
|
* @sa : IHEVCE_QUALITY_CONFIG_T
|
|
*/
|
|
WORD32 i4_quality_preset;
|
|
/**
|
|
*
|
|
*
|
|
*/
|
|
WORD32 i4_rc_pass;
|
|
/**
|
|
* Lamda to be mulitplied with bits for SATD
|
|
* should be equal to Lamda*Qp
|
|
*/
|
|
WORD32 i4_satd_lamda;
|
|
|
|
/**
|
|
* Lamda to be mulitplied with bits for SAD
|
|
* should be equal to Lamda*Qp
|
|
*/
|
|
WORD32 i4_sad_lamda;
|
|
|
|
/**
|
|
* Closed loop SSD Lambda
|
|
* This is multiplied with bits for RD cost computations in SSD mode
|
|
* This is represented in q format with shift of LAMBDA_Q_SHIFT
|
|
*/
|
|
LWORD64 i8_cl_ssd_lambda_qf;
|
|
|
|
/**
|
|
* Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
|
|
* This is multiplied with bits for RD cost computations in SSD mode
|
|
* This is represented in q format with shift of LAMBDA_Q_SHIFT
|
|
*/
|
|
LWORD64 i8_cl_ssd_lambda_chroma_qf;
|
|
|
|
/**
|
|
* Ratio of Closed loop SSD Lambda and Closed loop SSD Lambda for chroma
|
|
* This is multiplied with (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)
|
|
* to keep the precision of the ratio
|
|
*/
|
|
UWORD32 u4_chroma_cost_weighing_factor;
|
|
/**
|
|
* Frame level QP to be used
|
|
*/
|
|
WORD32 i4_frame_qp;
|
|
|
|
WORD32 i4_frame_mod_qp;
|
|
|
|
WORD32 i4_frame_qstep;
|
|
|
|
UWORD8 u1_max_tr_depth;
|
|
|
|
/**
|
|
* CU level Qp
|
|
*/
|
|
WORD32 i4_cu_qp;
|
|
|
|
/**
|
|
* CU level Qp / 6
|
|
*/
|
|
WORD32 i4_cu_qp_div6;
|
|
|
|
/**
|
|
* CU level Qp % 6
|
|
*/
|
|
WORD32 i4_cu_qp_mod6;
|
|
|
|
/**
|
|
* CU level QP to be used
|
|
*/
|
|
WORD32 i4_chrm_cu_qp;
|
|
|
|
/**
|
|
* CU level Qp / 6
|
|
*/
|
|
WORD32 i4_chrm_cu_qp_div6;
|
|
|
|
/**
|
|
* CU level Qp % 6
|
|
*/
|
|
WORD32 i4_chrm_cu_qp_mod6;
|
|
|
|
/** previous cu qp
|
|
* @remarks : This needs to be remembered to handle skip cases in deblocking.
|
|
*/
|
|
WORD32 i4_prev_cu_qp;
|
|
|
|
/** chroma qp offset
|
|
* @remarks : Used to calculate chroma qp and other qp related parameter at CU level
|
|
*/
|
|
WORD32 i4_chroma_qp_offset;
|
|
|
|
/**
|
|
* Buffer Pointer to populate the scale matrix for all transform size
|
|
*/
|
|
WORD16 *pi2_scal_mat;
|
|
|
|
/**
|
|
* Buffer Pointer to populate the rescale matrix for all transform size
|
|
*/
|
|
WORD16 *pi2_rescal_mat;
|
|
|
|
/** array of pointer to store the scaling matrices for
|
|
* all transform sizes and qp % 6 (pre computed)
|
|
*/
|
|
WORD16 *api2_scal_mat[NUM_TRANS_TYPES * 2];
|
|
|
|
/** array of pointer to store the re-scaling matrices for
|
|
* all transform sizes and qp % 6 (pre computed)
|
|
*/
|
|
WORD16 *api2_rescal_mat[NUM_TRANS_TYPES * 2];
|
|
|
|
/** array of function pointers for residual and
|
|
* forward transform for all transform sizes
|
|
*/
|
|
pf_res_trans_luma apf_resd_trns[NUM_TRANS_TYPES];
|
|
|
|
/** array of function pointers for residual and
|
|
* forward HAD transform for all transform sizes
|
|
*/
|
|
pf_res_trans_luma_had_chroma apf_chrm_resd_trns_had[NUM_TRANS_TYPES - 2];
|
|
|
|
/** array of function pointers for residual and
|
|
* forward transform for all transform sizes
|
|
* for chroma
|
|
*/
|
|
pf_res_trans_chroma apf_chrm_resd_trns[NUM_TRANS_TYPES - 2];
|
|
|
|
/** array of function pointers for qunatization and
|
|
* inv Quant for ssd calc. for all transform sizes
|
|
*/
|
|
pf_quant_iquant_ssd apf_quant_iquant_ssd[4];
|
|
|
|
/** array of function pointers for inv.transform and
|
|
* recon for all transform sizes
|
|
*/
|
|
pf_it_recon apf_it_recon[NUM_TRANS_TYPES];
|
|
|
|
/** array of function pointers for inverse transform
|
|
* and recon for all transform sizes for chroma
|
|
*/
|
|
pf_it_recon_chroma apf_chrm_it_recon[NUM_TRANS_TYPES - 2];
|
|
|
|
/** array of luma intra prediction function pointers */
|
|
pf_intra_pred apf_lum_ip[NUM_IP_FUNCS];
|
|
|
|
/** array of chroma intra prediction function pointers */
|
|
pf_intra_pred apf_chrm_ip[NUM_IP_FUNCS];
|
|
|
|
/* - Function pointer to cu_mode_decide function */
|
|
/* - The 'void *' is used since one of the parameters of */
|
|
/* this class of functions is the current structure */
|
|
/* - This function pointer is used to choose the */
|
|
/* appropriate function depending on whether bit_depth is */
|
|
/* chosen as 8 bits or greater */
|
|
/* - This function pointer's type is defined at the end */
|
|
/* of this file */
|
|
void *pv_cu_mode_decide;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_inter_rdopt_cu_mc_mvp;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_inter_rdopt_cu_ntu;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_intra_chroma_pred_mode_selector;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_intra_rdopt_cu_ntu;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_final_rdopt_mode_prcs;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_store_cu_results;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_enc_loop_cu_bot_copy;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_final_mode_reevaluation_with_modified_cu_qp;
|
|
|
|
/* Infer from the comment for the variable 'pv_cu_mode_decide' */
|
|
void *pv_enc_loop_ctb_left_copy;
|
|
|
|
/** Qunatization rounding factor for inter and intra CUs */
|
|
WORD32 i4_quant_rnd_factor[2];
|
|
|
|
/**
|
|
* Frame Buffer Pointer to store the top row luma data.
|
|
* one pixel row in every ctb row
|
|
*/
|
|
void *apv_frm_top_row_luma[MAX_NUM_ENC_LOOP_PARALLEL];
|
|
|
|
/**
|
|
* One CTB row size of Top row luma data buffer
|
|
*/
|
|
WORD32 i4_top_row_luma_stride;
|
|
|
|
/**
|
|
* One frm of Top row luma data buffer
|
|
*/
|
|
WORD32 i4_frm_top_row_luma_size;
|
|
|
|
/**
|
|
* Current luma row bottom data store pointer
|
|
*/
|
|
void *pv_bot_row_luma;
|
|
|
|
/**
|
|
* Top luma row top data access pointer
|
|
*/
|
|
void *pv_top_row_luma;
|
|
|
|
/**
|
|
* Frame Buffer Pointer to store the top row chroma data (Cb Cr pixel interleaved )
|
|
* one pixel row in every ctb row
|
|
*/
|
|
void *apv_frm_top_row_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
|
|
|
|
/**
|
|
* One CTB row size of Top row chroma data buffer (Cb Cr pixel interleaved )
|
|
*/
|
|
WORD32 i4_top_row_chroma_stride;
|
|
|
|
/**
|
|
* One frm size of Top row chroma data buffer (Cb Cr pixel interleaved )
|
|
*/
|
|
WORD32 i4_frm_top_row_chroma_size;
|
|
|
|
/**
|
|
* Current chroma row bottom data store pointer
|
|
*/
|
|
void *pv_bot_row_chroma;
|
|
|
|
/**
|
|
* Top chroma row top data access pointer
|
|
*/
|
|
void *pv_top_row_chroma;
|
|
|
|
/**
|
|
* Frame Buffer Pointer to store the top row neighbour modes stored at 4x4 level
|
|
* one 4x4 row in every ctb row
|
|
*/
|
|
nbr_4x4_t *aps_frm_top_row_nbr[MAX_NUM_ENC_LOOP_PARALLEL];
|
|
|
|
/**
|
|
* One CTB row size of Top row nbr 4x4 params buffer
|
|
*/
|
|
WORD32 i4_top_row_nbr_stride;
|
|
|
|
/**
|
|
* One frm size of Top row nbr 4x4 params buffer
|
|
*/
|
|
WORD32 i4_frm_top_row_nbr_size;
|
|
|
|
/**
|
|
* Current row nbr prms bottom data store pointer
|
|
*/
|
|
nbr_4x4_t *ps_bot_row_nbr;
|
|
|
|
/**
|
|
* Top row nbr prms top data access pointer
|
|
*/
|
|
nbr_4x4_t *ps_top_row_nbr;
|
|
|
|
/**
|
|
* Pointer to (1,1) location in au1_nbr_ctb_map
|
|
*/
|
|
UWORD8 *pu1_ctb_nbr_map;
|
|
|
|
/**
|
|
* neigbour map buffer stride;
|
|
*/
|
|
WORD32 i4_nbr_map_strd;
|
|
|
|
/**
|
|
* Array at ctb level to store the neighour map
|
|
* its size is 25x25 for ctb size of 64x64
|
|
*/
|
|
UWORD8 au1_nbr_ctb_map[MAX_PU_IN_CTB_ROW + 1 + 8][MAX_PU_IN_CTB_ROW + 1 + 8];
|
|
|
|
/**
|
|
* Array to store left ctb data for luma
|
|
* some padding is added to take care of unconditional access
|
|
*/
|
|
void *pv_left_luma_data;
|
|
|
|
/**
|
|
* Array to store left ctb data for chroma (cb abd cr pixel interleaved
|
|
* some padding is added to take care of unconditional access
|
|
*/
|
|
void *pv_left_chrm_data;
|
|
|
|
/**
|
|
* Array to store the left neighbour modes at 4x4 level
|
|
*/
|
|
nbr_4x4_t as_left_col_nbr[MAX_PU_IN_CTB_ROW];
|
|
|
|
/**
|
|
* Array to store currrent CTb pred modes at a 4x4 level
|
|
* used for prediction inside ctb
|
|
*/
|
|
nbr_4x4_t as_ctb_nbr_arr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
|
|
|
|
/**
|
|
* array for storing csbf during RD opt stage at CU level
|
|
* one best and one current is required
|
|
*/
|
|
UWORD8 au1_cu_csbf[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
|
|
|
|
/**
|
|
* Stride of csbf buffer. will be useful for scanning access
|
|
* if stored in a 2D order. right now set to max tx size >> 4;
|
|
*/
|
|
WORD32 i4_cu_csbf_strd;
|
|
|
|
/**
|
|
* Array to store pred modes during SATD and RD opt stage at CU level
|
|
* one best and one current is required
|
|
*/
|
|
nbr_4x4_t as_cu_nbr[2][MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
|
|
|
|
/**
|
|
* array to store the output of reference substitution process output
|
|
* for intra CUs
|
|
* TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
|
|
*/
|
|
void *pv_ref_sub_out;
|
|
|
|
/**
|
|
* array to store the filtered reference samples for intra CUs
|
|
* TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
|
|
*/
|
|
void *pv_ref_filt_out;
|
|
|
|
/**
|
|
* Used for 3 purposes
|
|
*
|
|
* 1. MC Intermediate buffer
|
|
* array for storing intermediate 16-bit value for hxhy subpel
|
|
* generation at CTB level (+ 16) for subpel planes boundary
|
|
* +4 is for horizontal 4pels
|
|
*
|
|
* 2. Temprory scratch buffer for transform and coeffs storage
|
|
* MAX_TRANS_SIZE *2 for trans_scratch(32bit) and MAX_TRANS_SIZE *1 for trans_values
|
|
* The first part i.e. from 0 to MAX_TRANS_SIZE is then reused for storing the quant coeffs
|
|
* Max of both are used
|
|
*
|
|
* 3. MC Intermediate buffer
|
|
* buffer for storing intermediate 16 bit values prior to conversion to 8bit in HBD
|
|
*
|
|
*/
|
|
MEM_ALIGN16 WORD16 ai2_scratch[(MAX_CTB_SIZE + 8 + 8) * (MAX_CTB_SIZE + 8 + 8 + 8) * 2];
|
|
|
|
/**
|
|
* array for storing cu level final params for a given mode
|
|
* one best and one current is required
|
|
*/
|
|
enc_loop_cu_final_prms_t as_cu_prms[2];
|
|
|
|
/**
|
|
* Scan index to be used for any gien transform
|
|
* this is a scartch variable used to communicate
|
|
* scan idx at every transform level
|
|
*/
|
|
WORD32 i4_scan_idx;
|
|
|
|
/**
|
|
* Buffer index in ping pong buffers
|
|
* to be used SATD mode evaluations
|
|
*/
|
|
WORD32 i4_satd_buf_idx;
|
|
|
|
/**
|
|
* Motion Compensation module context structre
|
|
*/
|
|
inter_pred_ctxt_t s_mc_ctxt;
|
|
|
|
/**
|
|
* MV pred module context structre
|
|
*/
|
|
mv_pred_ctxt_t s_mv_pred_ctxt;
|
|
|
|
/**
|
|
* Deblock BS ctb structure
|
|
*/
|
|
deblk_bs_ctb_ctxt_t s_deblk_bs_prms;
|
|
|
|
/**
|
|
* Deblocking ctb structure
|
|
*/
|
|
deblk_ctb_params_t s_deblk_prms;
|
|
|
|
/**
|
|
* Deblocking structure. For ctb-row level
|
|
*/
|
|
deblk_ctbrow_prms_t s_deblk_ctbrow_prms;
|
|
|
|
/**
|
|
* Deblocking enable flag
|
|
*/
|
|
WORD32 i4_deblock_type;
|
|
|
|
/**
|
|
* context for cabac bit estimation used during rdopt stage
|
|
*/
|
|
rdopt_entropy_ctxt_t s_rdopt_entropy_ctxt;
|
|
|
|
/**
|
|
* Context models stored for RDopt store and restore purpose
|
|
*/
|
|
UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
|
|
|
|
/**
|
|
* current picture slice type
|
|
*/
|
|
WORD8 i1_slice_type;
|
|
|
|
/**
|
|
* strong_intra_smoothing_enable_flag
|
|
*/
|
|
WORD8 i1_strong_intra_smoothing_enable_flag;
|
|
|
|
/** Pointer to Dep Mngr for controlling Top-Right CU dependency */
|
|
void *pv_dep_mngr_enc_loop_cu_top_right;
|
|
|
|
/** Pointer to Dep Mngr for controlling Deblocking Top dependency */
|
|
void *pv_dep_mngr_enc_loop_dblk;
|
|
|
|
/** Pointer to Dep Mngr for controlling Deblocking Top dependency */
|
|
void *pv_dep_mngr_enc_loop_sao;
|
|
|
|
/** pointer to store the cabac states at end of second CTB in current row */
|
|
UWORD8 *pu1_curr_row_cabac_state;
|
|
|
|
/** pointer to copy the cabac states at start of first CTB in current row */
|
|
UWORD8 *pu1_top_rt_cabac_state;
|
|
/** flag to indicate rate control mode.
|
|
* @remarks : To enable CU level qp modulation only when required.
|
|
*/
|
|
WORD8 i1_cu_qp_delta_enable;
|
|
|
|
/** flag to indicate rate control mode.
|
|
* @remarks : Entropy sync enable flag
|
|
*/
|
|
WORD8 i1_entropy_coding_sync_enabled_flag;
|
|
|
|
/** Use SATD or SAD for best merge candidate evaluation */
|
|
WORD32 i4_use_satd_for_merge_eval;
|
|
|
|
UWORD8 u1_use_early_cbf_data;
|
|
|
|
/** Use SATD or SAD for best CU merge candidate evaluation */
|
|
WORD32 i4_use_satd_for_cu_merge;
|
|
|
|
/** Maximum number of merge candidates to be evaluated */
|
|
WORD32 i4_max_merge_candidates;
|
|
|
|
/** Flag to indicate whether current pictute needs to be deblocked,
|
|
padded and hpel planes need to be generated.
|
|
These are turned off typically in non referecne pictures when psnr
|
|
and recon dump is disabled
|
|
*/
|
|
WORD32 i4_deblk_pad_hpel_cur_pic;
|
|
|
|
/* Array of structures for storing mc predicted data for
|
|
* merge and skip modes
|
|
*/
|
|
merge_skip_pred_data_t as_merge_skip_pred_data[MAX_NUM_CU_MERGE_SKIP_CAND];
|
|
|
|
/* Sum the Qps of each 8*8 block in CU
|
|
* 8*8 block is considered as Min CU size possible as per standard is 8
|
|
* 0 corresponds to INTER and 1 corresponds to INTRA
|
|
*/
|
|
LWORD64 i8_cl_ssd_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
|
|
UWORD32 au4_chroma_cost_weighing_factor_array[MAX_HEVC_QP_12bit + 1];
|
|
LWORD64 i8_cl_ssd_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
|
|
WORD32 i4_satd_lamda_array[MAX_HEVC_QP_12bit + 1];
|
|
WORD32 i4_sad_lamda_array[MAX_HEVC_QP_12bit + 1];
|
|
|
|
/************************************************************************/
|
|
/* The fields with the string 'type2' in their names are required */
|
|
/* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
|
|
/* to the bit_depth != internal_bit_depth are stored in these fields */
|
|
/************************************************************************/
|
|
LWORD64 i8_cl_ssd_type2_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
|
|
LWORD64 i8_cl_ssd_type2_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
|
|
WORD32 i4_satd_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
|
|
WORD32 i4_sad_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
|
|
|
|
/* Lokesh: Added to find if the CU is the first to be coded in the group */
|
|
WORD32 i4_is_first_cu_qg_coded;
|
|
|
|
/* Chroma RDOPT related parameters */
|
|
ihevce_chroma_rdopt_ctxt_t s_chroma_rdopt_ctxt;
|
|
|
|
/* Structure to save pred data of ME/Intra cand */
|
|
cu_me_intra_pred_prms_t s_cu_me_intra_pred_prms;
|
|
|
|
/* Structure to save the flags required for Final mode Reconstruction
|
|
function. These flags are set based on quality presets and bit-rate
|
|
we are working on */
|
|
cu_final_recon_flags_t s_cu_final_recon_flags;
|
|
|
|
/* Parameter to how at which level RDOQ will be implemented:
|
|
0 - RDOQ disbaled
|
|
1 - RDOQ enabled during RDOPT for all candidates
|
|
2 - RDOQ enabled only for the final candidate*/
|
|
WORD32 i4_rdoq_level;
|
|
|
|
/* Parameter to how at which level Quant rounding factors are computed:
|
|
FIXED_QUANT_ROUNDING : Fixed Quant rounding values are used
|
|
NCTB_LEVEL_QUANT_ROUNDING : NCTB level Cmputed Quant rounding values are used
|
|
CTB_LEVEL_QUANT_ROUNDING : CTB level Cmputed Quant rounding values are used
|
|
CU_LEVEL_QUANT_ROUNDING : CU level Cmputed Quant rounding values are used
|
|
TU_LEVEL_QUANT_ROUNDING : TU level Cmputed Quant rounding values are used*/
|
|
WORD32 i4_quant_rounding_level;
|
|
|
|
/* Parameter to how at which level Quant rounding factors are computed:
|
|
CHROMA_QUANT_ROUNDING : Chroma Quant rounding values are used for chroma */
|
|
WORD32 i4_chroma_quant_rounding_level;
|
|
|
|
/* Parameter to how at which level RDOQ will be implemented:
|
|
0 - SBH disbaled
|
|
1 - SBH enabled during RDOPT for all candidates
|
|
2 - SBH enabled only for the final candidate*/
|
|
WORD32 i4_sbh_level;
|
|
|
|
/* Parameter to how at which level ZERO CBF RDO will be implemented:
|
|
0 - ZCBF disbaled
|
|
1 - ZCBF enabled during RDOPT for all candidates
|
|
2 - ZCBF enabled only for the final candidate
|
|
*/
|
|
WORD32 i4_zcbf_rdo_level;
|
|
|
|
/*RDOQ-SBH context structure*/
|
|
rdoq_sbh_ctxt_t s_rdoq_sbh_ctxt;
|
|
|
|
/** Structure to store the Merge/Skip Cand. for EncLoop */
|
|
cu_inter_merge_skip_t s_cu_inter_merge_skip;
|
|
/** Structure to store the Mixed mode Cand. for EncLoop */
|
|
cu_mixed_mode_inter_t s_mixed_mode_inter_cu;
|
|
|
|
ihevce_inter_pred_buf_data_t s_pred_buf_data;
|
|
|
|
void *pv_422_chroma_intra_pred_buf;
|
|
|
|
WORD32 i4_max_num_inter_rdopt_cands;
|
|
|
|
/* Output Struct per each CU during recursions */
|
|
ihevce_enc_cu_node_ctxt_t as_enc_cu_ctxt[MAX_CU_IN_CTB + 1];
|
|
|
|
/* Used to store best inter candidate. Used only when */
|
|
/* 'CU modulated QP override' is enabled */
|
|
cu_inter_cand_t as_best_cand[MAX_CU_IN_CTB + 1];
|
|
|
|
cu_inter_cand_t *ps_best_cand;
|
|
|
|
UWORD8 au1_cu_init_cabac_state_a_priori[MAX_CU_IN_CTB + 1][IHEVC_CAB_CTXT_END];
|
|
|
|
UWORD8 (*pau1_curr_cu_a_priori_cabac_state)[IHEVC_CAB_CTXT_END];
|
|
|
|
/* Used to store pred data of each CU in the CTB. */
|
|
/* Used only when 'CU modulated QP override' is enabled */
|
|
void *pv_CTB_pred_luma;
|
|
|
|
void *pv_CTB_pred_chroma;
|
|
|
|
/**
|
|
* array for storing recon during SATD and RD opt stage at CU level
|
|
* one best and one current is required.Luma and chroma together
|
|
*/
|
|
void *pv_cu_luma_recon;
|
|
|
|
/**
|
|
* array for storing recon during SATD and RD opt stage at CU level
|
|
* one best and one current is required.Luma and chroma together
|
|
*/
|
|
void *pv_cu_chrma_recon;
|
|
|
|
/**
|
|
* Array to store pred modes during SATD and RD opt stage at CU level
|
|
* one best and one current is required
|
|
*/
|
|
nbr_4x4_t as_cu_recur_nbr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
|
|
|
|
/**
|
|
* Pointer to Array to store pred modes during SATD and RD opt stage at CU level
|
|
* one best and one current is required
|
|
*/
|
|
nbr_4x4_t *ps_cu_recur_nbr;
|
|
|
|
/**
|
|
* Context models stored for CU recursion parent evaluation
|
|
*/
|
|
UWORD8 au1_rdopt_recur_ctxt_models[4][IHEVC_CAB_CTXT_END];
|
|
|
|
ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt;
|
|
|
|
/**
|
|
* array for storing coeffs during RD opt stage at CU level
|
|
* one best and one current is required. Luma and chroma together
|
|
*/
|
|
/*UWORD8 au1_cu_recur_coeffs[MAX_LUMA_COEFFS_CTB + MAX_CHRM_COEFFS_CTB];*/
|
|
|
|
UWORD8 *pu1_cu_recur_coeffs;
|
|
|
|
UWORD8 *apu1_cu_level_pingpong_coeff_buf_addr[2];
|
|
|
|
WORD16 *api2_cu_level_pingpong_deq_buf_addr[2];
|
|
|
|
UWORD8 *pu1_ecd_data;
|
|
|
|
/* OPT: flag to skip parent CU=4TU eval during recursion */
|
|
UWORD8 is_parent_cu_rdopt;
|
|
|
|
/**
|
|
* Array of structs containing block merge data for
|
|
* 4 32x32 CU's in indices 1 - 4 and 64x64 CU at 0
|
|
*/
|
|
UWORD8 u1_cabac_states_next_row_copied_flag;
|
|
|
|
UWORD8 u1_cabac_states_first_cu_copied_flag;
|
|
|
|
UWORD32 u4_cur_ctb_wd;
|
|
|
|
UWORD32 u4_cur_ctb_ht;
|
|
|
|
/* thread id of the current context */
|
|
WORD32 thrd_id;
|
|
|
|
/** Number of processing threads created run time */
|
|
WORD32 i4_num_proc_thrds;
|
|
|
|
/* Instance number of bit-rate for multiple bit-rate encode */
|
|
WORD32 i4_bitrate_instance_num;
|
|
|
|
WORD32 i4_num_bitrates;
|
|
|
|
WORD32 i4_enc_frm_id;
|
|
|
|
/* Flag to indicate if chroma needs to be considered for cost calculation */
|
|
WORD32 i4_consider_chroma_cost;
|
|
|
|
/* Number of modes to be evaluated for intra */
|
|
WORD32 i4_num_modes_to_evaluate_intra;
|
|
|
|
/* Number of modes to be evaluated for inter */
|
|
WORD32 i4_num_modes_to_evaluate_inter;
|
|
/*pointers for struct to hold RC parameters for each bit-rate instance */
|
|
enc_loop_rc_params_t
|
|
*aaps_enc_loop_rc_params[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
|
|
|
|
/** Pointer to structure containing function pointers of common*/
|
|
func_selector_t *ps_func_selector;
|
|
|
|
/* Flag to control Top Right Sync for during Merge */
|
|
UWORD8 u1_use_top_at_ctb_boundary;
|
|
|
|
UWORD8 u1_is_input_data_hbd;
|
|
|
|
UWORD8 u1_bit_depth;
|
|
|
|
/* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
|
|
UWORD8 u1_chroma_array_type;
|
|
|
|
rc_quant_t *ps_rc_quant_ctxt;
|
|
|
|
sao_ctxt_t s_sao_ctxt_t;
|
|
|
|
/* Offset to get the Qp for the last CU of upper CTB-row.
|
|
This offset is from the current tile top row QP map start.
|
|
This will only be consumed by the first CU of current CTB-row
|
|
iff [it is skip && entropy sync is off] */
|
|
WORD32 *pi4_offset_for_last_cu_qp;
|
|
|
|
double i4_lamda_modifier;
|
|
double i4_uv_lamda_modifier;
|
|
WORD32 i4_temporal_layer_id;
|
|
|
|
UWORD8 u1_disable_intra_eval;
|
|
|
|
WORD32 i4_quant_round_tu[2][32 * 32];
|
|
|
|
WORD32 *pi4_quant_round_factor_tu_0_1[5];
|
|
WORD32 *pi4_quant_round_factor_tu_1_2[5];
|
|
|
|
WORD32 i4_quant_round_4x4[2][4 * 4];
|
|
WORD32 i4_quant_round_8x8[2][8 * 8];
|
|
WORD32 i4_quant_round_16x16[2][16 * 16];
|
|
WORD32 i4_quant_round_32x32[2][32 * 32];
|
|
|
|
WORD32 *pi4_quant_round_factor_cu_ctb_0_1[5];
|
|
WORD32 *pi4_quant_round_factor_cu_ctb_1_2[5];
|
|
|
|
WORD32 i4_quant_round_cr_4x4[2][4 * 4];
|
|
WORD32 i4_quant_round_cr_8x8[2][8 * 8];
|
|
WORD32 i4_quant_round_cr_16x16[2][16 * 16];
|
|
|
|
WORD32 *pi4_quant_round_factor_cr_cu_ctb_0_1[3];
|
|
WORD32 *pi4_quant_round_factor_cr_cu_ctb_1_2[3];
|
|
/* cost for not coding cu residue i.e forcing no residue syntax as 1 */
|
|
LWORD64 i8_cu_not_coded_cost;
|
|
|
|
/* dependency manager for forward ME sync */
|
|
void *pv_dep_mngr_encloop_dep_me;
|
|
|
|
LWORD64 ai4_source_satd_8x8[64];
|
|
|
|
LWORD64 ai4_source_chroma_satd[256];
|
|
|
|
UWORD8 u1_is_refPic;
|
|
|
|
WORD32 i4_qp_mod;
|
|
|
|
WORD32 i4_is_ref_pic;
|
|
|
|
WORD32 i4_chroma_format;
|
|
|
|
WORD32 i4_temporal_layer;
|
|
|
|
WORD32 i4_use_const_lamda_modifier;
|
|
|
|
double f_i_pic_lamda_modifier;
|
|
|
|
LWORD64 i8_distortion;
|
|
|
|
WORD32 i4_use_ctb_level_lamda;
|
|
|
|
float f_str_ratio;
|
|
|
|
/* Flag to indicate if current frame is to be shared with other clients.
|
|
Used only in distributed-encoding */
|
|
WORD32 i4_share_flag;
|
|
|
|
/* Pointer to the current recon being processed.
|
|
Needed for enabling TMVP in dist-encoding */
|
|
void *pv_frm_recon;
|
|
|
|
ihevce_cmn_opt_func_t s_cmn_opt_func;
|
|
|
|
/* The ME analogue to the struct above was not included since */
|
|
/* that would have entailed inclusion of all ME specific */
|
|
/* header files */
|
|
/*FT_SAD_EVALUATOR **/
|
|
|
|
/*FT_SAD_EVALUATOR **/
|
|
void *pv_evalsad_pt_npu_mxn_8bit;
|
|
UWORD8 u1_enable_psyRDOPT;
|
|
|
|
UWORD8 u1_is_stasino_enabled;
|
|
|
|
UWORD32 u4_psy_strength;
|
|
/*Sub PIC rc context */
|
|
|
|
WORD32 i4_sub_pic_level_rc;
|
|
WORD32 i4_num_ctb_for_out_scale;
|
|
|
|
/**
|
|
* Accumalated bits of all cu for required CTBS estimated during RDO evaluation.
|
|
* Required for sup pic level RC. Reset when required CU/CTB count is reached.
|
|
*/
|
|
UWORD32 u4_total_cu_bits;
|
|
|
|
UWORD32 u4_total_cu_bits_mul_qs;
|
|
|
|
UWORD32 u4_total_cu_hdr_bits;
|
|
|
|
UWORD32 u4_cu_tot_bits_into_qscale;
|
|
|
|
UWORD32 u4_cu_tot_bits;
|
|
|
|
/*Scale added to the current qscale, output from sub pic rc*/
|
|
WORD32 i4_cu_qp_sub_pic_rc;
|
|
|
|
/*Frame level L1 IPE sad*/
|
|
LWORD64 i8_frame_l1_ipe_sad;
|
|
|
|
/*Frame level L0 IPE satd*/
|
|
LWORD64 i8_frame_l0_ipe_satd;
|
|
|
|
/*Frame level L1 ME sad*/
|
|
LWORD64 i8_frame_l1_me_sad;
|
|
|
|
/*Frame level L1 activity factor*/
|
|
LWORD64 i8_frame_l1_activity_fact;
|
|
/*bits esimated for frame calulated for sub pic rc bit control */
|
|
WORD32 ai4_frame_bits_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
|
|
/** I Scene cut */
|
|
WORD32 i4_is_I_scenecut;
|
|
|
|
/** Non Scene cut */
|
|
WORD32 i4_is_non_I_scenecut;
|
|
|
|
/** Frames for which online/offline model is not valid */
|
|
WORD32 i4_is_model_valid;
|
|
|
|
/** Steady State Frame */
|
|
//WORD32 i4_is_steady_state;
|
|
|
|
WORD32 i4_is_first_query;
|
|
|
|
/* Pointer to Tile params base */
|
|
void *pv_tile_params_base;
|
|
|
|
/** The index of column tile for which it is working */
|
|
WORD32 i4_tile_col_idx;
|
|
|
|
WORD32 i4_max_search_range_horizontal;
|
|
|
|
WORD32 i4_max_search_range_vertical;
|
|
|
|
WORD32 i4_is_ctb_qp_modified;
|
|
|
|
WORD32 i4_display_num;
|
|
|
|
WORD32 i4_pred_qp;
|
|
|
|
/*assumption of qg size is 8x8 block size*/
|
|
WORD32 ai4_qp_qg[8 * 8];
|
|
|
|
WORD32 i4_last_cu_qp_from_prev_ctb;
|
|
|
|
WORD32 i4_prev_QP;
|
|
|
|
UWORD8 u1_max_inter_tr_depth;
|
|
|
|
UWORD8 u1_max_intra_tr_depth;
|
|
|
|
} ihevce_enc_loop_ctxt_t;
|
|
|
|
/*****************************************************************************/
|
|
/* Enums */
|
|
/*****************************************************************************/
|
|
|
|
/** @brief RDOQ_LEVELS_T: This enumeration specifies the RDOQ mode of operation
|
|
*
|
|
* NO_RDOQ : RDOQ is not performed
|
|
* BEST_CAND_RDOQ : RDOQ for final candidate only
|
|
* ALL_CAND_RDOQ : RDOQ for all candidates
|
|
*/
|
|
typedef enum
|
|
{
|
|
NO_RDOQ,
|
|
BEST_CAND_RDOQ,
|
|
ALL_CAND_RDOQ,
|
|
} RDOQ_LEVELS_T;
|
|
|
|
/** @brief QUANT_ROUNDING_COEFF_LEVELS_T: This enumeration specifies the Coef level RDOQ mode of operation
|
|
*
|
|
* FIXED_QUANT_ROUNDING : Fixed Quant rounding values are used
|
|
* NCTB_LEVEL_QUANT_ROUNDING : NCTB level Cmputed Quant rounding values are used
|
|
* CTB_LEVEL_QUANT_ROUNDING : CTB level Cmputed Quant rounding values are used
|
|
* CU_LEVEL_QUANT_ROUNDING : CU level Cmputed Quant rounding values are used
|
|
* TU_LEVEL_QUANT_ROUNDING : TU level Cmputed Quant rounding values are used
|
|
* Defaulat for all candidtes, based on RDOQ_LEVELS_T choose to best candidate
|
|
*/
|
|
typedef enum
|
|
{
|
|
FIXED_QUANT_ROUNDING,
|
|
NCTB_LEVEL_QUANT_ROUNDING,
|
|
CTB_LEVEL_QUANT_ROUNDING,
|
|
CU_LEVEL_QUANT_ROUNDING,
|
|
TU_LEVEL_QUANT_ROUNDING,
|
|
CHROMA_QUANT_ROUNDING
|
|
} QUANT_ROUNDING_COEFF_LEVELS_T;
|
|
|
|
/*****************************************************************************/
|
|
/* Enums */
|
|
/*****************************************************************************/
|
|
|
|
/** @brief SBH_LEVELS_T: This enumeration specifies the RDOQ mode of operation
|
|
*
|
|
* NO_SBH : SBH is not performed
|
|
* BEST_CAND_SBH : SBH for final candidate only
|
|
* ALL_CAND_SBH : SBH for all candidates
|
|
*/
|
|
typedef enum
|
|
{
|
|
NO_SBH,
|
|
BEST_CAND_SBH,
|
|
ALL_CAND_SBH,
|
|
} SBH_LEVELS_T;
|
|
|
|
/** @brief ZCBF_LEVELS_T: This enumeration specifies the ZeroCBF RDO mode of operation
|
|
*
|
|
* NO_ZCBF : ZCBF RDO is not performed
|
|
* ALL_CAND_ZCBF : ZCBF RDO for all candidates
|
|
*/
|
|
typedef enum
|
|
{
|
|
NO_ZCBF,
|
|
ZCBF_ENABLE,
|
|
} ZCBF_LEVELS_T;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief Encode loop master context structure
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
/** Array of encode loop structure */
|
|
ihevce_enc_loop_ctxt_t *aps_enc_loop_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
|
|
|
|
/** Number of processing threads created run time */
|
|
WORD32 i4_num_proc_thrds;
|
|
|
|
/**
|
|
* Array of top row cu skip flags (1 bit per 8x8CU)
|
|
*/
|
|
UWORD8 au1_cu_skip_top_row[HEVCE_MAX_WIDTH >> 6];
|
|
|
|
/** Context models stored at the end of second CTB in a row)
|
|
* stored in packed form pState[bits6-1] | MPS[bit0]
|
|
* for each CTB row
|
|
* using entropy sync model in RD opt
|
|
*/
|
|
UWORD8 au1_ctxt_models[MAX_NUM_CTB_ROWS_FRM][IHEVC_CAB_CTXT_END];
|
|
|
|
/** Dependency manager for controlling EncLoop Top-Right CU dependency
|
|
* One per each bit-rate and one per each frame in parallel
|
|
*/
|
|
void *aapv_dep_mngr_enc_loop_cu_top_right[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
|
|
|
|
/** Dependency manager for controlling Deblocking Top dependency
|
|
* One per each bit-rate and one per each frame in parallel
|
|
*/
|
|
void *aapv_dep_mngr_enc_loop_dblk[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
|
|
|
|
/** Dependency manager for controlling Sao Top dependency
|
|
* One per each bit-rate and one per each frame in parallel
|
|
*/
|
|
void *aapv_dep_mngr_enc_loop_sao[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
|
|
|
|
/** number of bit-rate instances running */
|
|
WORD32 i4_num_bitrates;
|
|
|
|
/** number of enc frames running in parallel */
|
|
WORD32 i4_num_enc_loop_frm_pllel;
|
|
|
|
/* Pointer to Tile params base */
|
|
void *pv_tile_params_base;
|
|
/* Offset to get the Qp for the last CU of upper CTB-row.
|
|
This offset is from the current tile top row QP map start.
|
|
|
|
This will only be consumed by the first CU of current CTB-row
|
|
iff [it is skip && entropy sync is off]
|
|
There is one entry of every tile-column bcoz offset remains constant
|
|
for all tiles lying in a tile-column */
|
|
WORD32 ai4_offset_for_last_cu_qp[MAX_TILE_COLUMNS];
|
|
} ihevce_enc_loop_master_ctxt_t;
|
|
|
|
/**
|
|
******************************************************************************
|
|
* @brief This struct is used for storing data required by the block merge
|
|
* function
|
|
******************************************************************************
|
|
*/
|
|
typedef struct
|
|
{
|
|
block_data_8x8_t *ps_8x8_data;
|
|
|
|
block_data_16x16_t *ps_16x16_data;
|
|
|
|
block_data_32x32_t *ps_32x32_data;
|
|
|
|
block_data_64x64_t *ps_64x64_data;
|
|
|
|
part_type_results_t **ps_32x32_results;
|
|
|
|
cur_ctb_cu_tree_t *ps_cu_tree;
|
|
|
|
ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
|
|
|
|
mv_pred_ctxt_t *ps_mv_pred_ctxt;
|
|
|
|
recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2];
|
|
|
|
nbr_4x4_t *ps_top_nbr_4x4;
|
|
|
|
nbr_4x4_t *ps_left_nbr_4x4;
|
|
|
|
nbr_4x4_t *ps_curr_nbr_4x4;
|
|
|
|
UWORD8 *pu1_inp;
|
|
|
|
UWORD8 *pu1_ctb_nbr_map;
|
|
|
|
WORD32 i4_nbr_map_strd;
|
|
|
|
WORD32 inp_stride;
|
|
|
|
WORD32 i4_ctb_x_off;
|
|
|
|
WORD32 i4_ctb_y_off;
|
|
|
|
WORD32 use_satd_for_err_calc;
|
|
|
|
WORD32 lambda;
|
|
|
|
WORD32 lambda_q_shift;
|
|
|
|
WORD32 frm_qstep;
|
|
|
|
WORD32 num_4x4_in_ctb;
|
|
|
|
UWORD8 *pu1_wkg_mem;
|
|
|
|
UWORD8 **ppu1_pred;
|
|
|
|
UWORD8 u1_bidir_enabled;
|
|
|
|
UWORD8 u1_max_tr_depth;
|
|
|
|
WORD32 i4_ctb_pos;
|
|
|
|
WORD32 i4_ctb_size;
|
|
|
|
UWORD8 *apu1_wt_inp[MAX_REFS_SEARCHABLE + 1];
|
|
|
|
/** Pointer of Dep Mngr for EncLoop Top-Right CU dependency */
|
|
void *pv_dep_mngr_enc_loop_cu_top_right;
|
|
/** The current cu row no. for Dep Manager to Check */
|
|
WORD32 i4_dep_mngr_cur_cu_row_no;
|
|
/** The Top cu row no. for Dep Manager to Check */
|
|
WORD32 i4_dep_mngr_top_cu_row_no;
|
|
|
|
WORD8 i1_quality_preset;
|
|
|
|
/* Flag to control Top Right Sync for during Merge */
|
|
UWORD8 u1_use_top_at_ctb_boundary;
|
|
|
|
} block_merge_input_t;
|
|
|
|
/* Structure which stores the info regarding the TU's present in the CU*/
|
|
typedef struct tu_prms_t
|
|
{
|
|
UWORD8 u1_tu_size;
|
|
|
|
UWORD8 u1_x_off;
|
|
|
|
UWORD8 u1_y_off;
|
|
|
|
WORD32 i4_tu_cost;
|
|
|
|
WORD32 i4_early_cbf;
|
|
|
|
} tu_prms_t;
|
|
|
|
typedef struct
|
|
{
|
|
cu_enc_loop_out_t **pps_cu_final;
|
|
|
|
pu_t **pps_row_pu;
|
|
|
|
tu_enc_loop_out_t **pps_row_tu;
|
|
|
|
UWORD8 **ppu1_row_ecd_data;
|
|
|
|
WORD32 *pi4_num_pus_in_ctb;
|
|
|
|
WORD32 *pi4_last_cu_pos_in_ctb;
|
|
|
|
WORD32 *pi4_last_cu_size;
|
|
|
|
UWORD8 *pu1_num_cus_in_ctb_out;
|
|
|
|
} cu_final_update_prms;
|
|
|
|
typedef struct
|
|
{
|
|
cu_nbr_prms_t *ps_cu_nbr_prms;
|
|
|
|
cu_inter_cand_t *ps_best_inter_cand;
|
|
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms;
|
|
|
|
WORD32 packed_pred_mode;
|
|
|
|
WORD32 rd_opt_best_idx;
|
|
|
|
void *pv_src;
|
|
|
|
WORD32 src_strd;
|
|
|
|
void *pv_pred;
|
|
|
|
WORD32 pred_strd;
|
|
|
|
void *pv_pred_chrm;
|
|
|
|
WORD32 pred_chrm_strd;
|
|
|
|
UWORD8 *pu1_final_ecd_data;
|
|
|
|
UWORD8 *pu1_csbf_buf;
|
|
|
|
WORD32 csbf_strd;
|
|
|
|
void *pv_luma_recon;
|
|
|
|
WORD32 recon_luma_strd;
|
|
|
|
void *pv_chrm_recon;
|
|
|
|
WORD32 recon_chrma_strd;
|
|
|
|
UWORD8 u1_cu_pos_x;
|
|
|
|
UWORD8 u1_cu_pos_y;
|
|
|
|
UWORD8 u1_cu_size;
|
|
|
|
WORD8 i1_cu_qp;
|
|
|
|
UWORD8 u1_will_cabac_state_change;
|
|
|
|
UWORD8 u1_recompute_sbh_and_rdoq;
|
|
|
|
UWORD8 u1_is_first_pass;
|
|
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
UWORD8 u1_is_cu_noisy;
|
|
#endif
|
|
|
|
} final_mode_process_prms_t;
|
|
|
|
typedef struct
|
|
{
|
|
cu_inter_cand_t s_best_cand;
|
|
|
|
/* The size is twice of what is required to ensure availability */
|
|
/* of adequate space for 'HBD' case */
|
|
UWORD8 au1_pred_luma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
|
|
|
|
/* The size is twice of what is required to ensure availability */
|
|
/* of adequate space for 422 case */
|
|
UWORD8 au1_pred_chroma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
|
|
} final_mode_state_t;
|
|
|
|
typedef struct
|
|
{
|
|
cu_mixed_mode_inter_t *ps_mixed_modes_datastore;
|
|
|
|
cu_inter_cand_t *ps_me_cands;
|
|
|
|
cu_inter_cand_t *ps_merge_cands;
|
|
|
|
mv_pred_ctxt_t *ps_mv_pred_ctxt;
|
|
|
|
inter_pred_ctxt_t *ps_mc_ctxt;
|
|
|
|
UWORD8 *pu1_ctb_nbr_map;
|
|
|
|
void *pv_src;
|
|
|
|
nbr_4x4_t *ps_cu_nbr_buf;
|
|
|
|
nbr_4x4_t *ps_left_nbr_4x4;
|
|
|
|
nbr_4x4_t *ps_top_nbr_4x4;
|
|
|
|
nbr_4x4_t *ps_topleft_nbr_4x4;
|
|
|
|
WORD32 i4_ctb_nbr_map_stride;
|
|
|
|
WORD32 i4_src_strd;
|
|
|
|
WORD32 i4_nbr_4x4_left_strd;
|
|
|
|
UWORD8 u1_cu_size;
|
|
|
|
UWORD8 u1_cu_pos_x;
|
|
|
|
UWORD8 u1_cu_pos_y;
|
|
|
|
UWORD8 u1_num_me_cands;
|
|
|
|
UWORD8 u1_num_merge_cands;
|
|
|
|
UWORD8 u1_max_num_mixed_mode_cands_to_select;
|
|
|
|
UWORD8 u1_max_merge_candidates;
|
|
|
|
UWORD8 u1_use_satd_for_merge_eval;
|
|
|
|
} ihevce_mixed_inter_modes_selector_prms_t;
|
|
|
|
typedef struct
|
|
{
|
|
LWORD64 i8_ssd;
|
|
|
|
LWORD64 i8_cost;
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
LWORD64 i8_not_coded_cost;
|
|
#endif
|
|
|
|
UWORD32 u4_sad;
|
|
|
|
WORD32 i4_bits;
|
|
|
|
WORD32 i4_num_bytes_used_for_ecd;
|
|
|
|
WORD32 i4_zero_col;
|
|
|
|
WORD32 i4_zero_row;
|
|
|
|
UWORD8 u1_cbf;
|
|
|
|
UWORD8 u1_reconBufId;
|
|
|
|
UWORD8 u1_is_valid_node;
|
|
|
|
UWORD8 u1_size;
|
|
|
|
UWORD8 u1_posx;
|
|
|
|
UWORD8 u1_posy;
|
|
} tu_node_data_t;
|
|
|
|
typedef struct tu_tree_node_t
|
|
{
|
|
struct tu_tree_node_t *ps_child_node_tl;
|
|
|
|
struct tu_tree_node_t *ps_child_node_tr;
|
|
|
|
struct tu_tree_node_t *ps_child_node_bl;
|
|
|
|
struct tu_tree_node_t *ps_child_node_br;
|
|
|
|
tu_node_data_t s_luma_data;
|
|
|
|
/* 2 because of the 2 subTU's when input is 422 */
|
|
tu_node_data_t as_cb_data[2];
|
|
|
|
tu_node_data_t as_cr_data[2];
|
|
|
|
UWORD8 u1_is_valid_node;
|
|
|
|
} tu_tree_node_t;
|
|
|
|
/*****************************************************************************/
|
|
/* Extern Variable Declarations */
|
|
/*****************************************************************************/
|
|
|
|
/*****************************************************************************/
|
|
/* Extern Function Declarations */
|
|
/*****************************************************************************/
|
|
|
|
/*****************************************************************************/
|
|
/* Typedefs */
|
|
/*****************************************************************************/
|
|
typedef LWORD64 (*pf_cu_mode_decide)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_cu_prms_t *ps_cu_prms,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
final_mode_state_t *ps_final_mode_state,
|
|
UWORD8 *pu1_ecd_data,
|
|
pu_col_mv_t *ps_col_pu,
|
|
UWORD8 *pu1_col_pu_map,
|
|
WORD32 col_start_pu_idx);
|
|
|
|
typedef LWORD64 (*pf_inter_rdopt_cu_mc_mvp)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
cu_inter_cand_t *ps_inter_cand,
|
|
WORD32 cu_size,
|
|
WORD32 cu_pos_x,
|
|
WORD32 cu_pos_y,
|
|
nbr_4x4_t *ps_left_nbr_4x4,
|
|
nbr_4x4_t *ps_top_nbr_4x4,
|
|
nbr_4x4_t *ps_topleft_nbr_4x4,
|
|
WORD32 nbr_4x4_left_strd,
|
|
WORD32 curr_buf_idx);
|
|
|
|
typedef LWORD64 (*pf_inter_rdopt_cu_ntu)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_cu_prms_t *ps_cu_prms,
|
|
void *pv_src,
|
|
WORD32 cu_size,
|
|
WORD32 cu_pos_x,
|
|
WORD32 cu_pos_y,
|
|
WORD32 curr_buf_idx,
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
|
|
cu_inter_cand_t *ps_inter_cand,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
WORD32 i4_alpha_stim_multiplier);
|
|
|
|
typedef void (*pf_intra_chroma_pred_mode_selector)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
WORD32 rd_opt_curr_idx,
|
|
WORD32 tu_mode,
|
|
WORD32 i4_alpha_stim_multiplier,
|
|
UWORD8 u1_is_cu_noisy);
|
|
|
|
typedef LWORD64 (*pf_intra_rdopt_cu_ntu)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_cu_prms_t *ps_cu_prms,
|
|
void *pv_pred_org,
|
|
WORD32 pred_strd_org,
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
|
|
UWORD8 *pu1_luma_mode,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
void *pv_curr_src,
|
|
void *pv_cu_left,
|
|
void *pv_cu_top,
|
|
void *pv_cu_top_left,
|
|
nbr_4x4_t *ps_left_nbr_4x4,
|
|
nbr_4x4_t *ps_top_nbr_4x4,
|
|
WORD32 nbr_4x4_left_strd,
|
|
WORD32 cu_left_stride,
|
|
WORD32 curr_buf_idx,
|
|
WORD32 func_proc_mode,
|
|
WORD32 i4_alpha_stim_multiplier);
|
|
|
|
typedef void (*pf_final_rdopt_mode_prcs)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms);
|
|
|
|
typedef void (*pf_store_cu_results)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_cu_prms_t *ps_cu_prms,
|
|
final_mode_state_t *ps_final_state);
|
|
|
|
typedef void (*pf_enc_loop_cu_bot_copy)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_cu_prms_t *ps_cu_prms,
|
|
ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
|
|
WORD32 curr_cu_pos_in_row,
|
|
WORD32 curr_cu_pos_in_ctb);
|
|
|
|
typedef void (*pf_enc_loop_ctb_left_copy)(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms);
|
|
|
|
#endif /* _IHEVCE_ENC_LOOP_STRUCTS_H_ */
|