LookaheadTLD::calcAdaptiveQuantFrame()

    技术2022-07-12  77

    CU级码控两大类: 1.依据空间复杂度的方差/边缘密度来调整 2.依据时间(帧间)参考关系的权重来调整 最后CU的qp = 帧级码控qp + 空间复杂度码控qp_offset + 时间参考关系码控qp_offset

    calcAdaptiveQuantFrame()是解决第一点,依据空间复杂度来定CU的QP偏移量。

    qp_adj为最后施加在CU的qp上的偏移量,一般是根据每个CU的YUV方差相对于全帧的平均方差来计算,当CU的YUV方差大于全帧的平均方差,表示该CU是一个纹理区域,则qp_adj为正,qp调高,压得更狠(纹理区域视觉不敏感),反之亦反。strength表示一个灵敏度,越大,则qp偏移量对空间复杂度/时间参考关系变化浮动越大,与para.strength有关bias应用于AUTO VARUABCE BIASED方法,用于在qp_adj上人为施加一个偏移量 /* aqMode有如下四种mode: X265_AQ_NONE X265_AQ_VARIANCE X265_AQ_AUTO_VARIANCE X265_AQ_AUTO_VARIANCE_BIASED X265_AQ_EDGE 函数根据不同aqMode来计算一个adaptive quantization偏移量qp_adj 用于后期CU的量化值计算 过程: 1.基于aq的最小CU大小来初始化blockCount、modeOneConst、modeTwoConst、modeOneConst 2.取当前帧的量化偏移量向量quantOffsets 3.初始化当前帧的低分辨率wp_ssd和wp_sum 4.若没有bStatRead || 没有开启宏块树 || 当前帧不是参考帧,则 ·若无aqMode || param中aq强度为0 1.若有aqMode && param中aq强度为0,则初始化qpCuTreeOffset[]、qpAqOffset[]和invQscaleFactor[] ·若当前帧的量化偏移量向量quantOffsets[]有数据,则基于quantOffsets[]进行初始化它们 ·否则qpCuTreeOffset[]、qpAqOffset[]初始化为0,invQscaleFactor[]为256 2.若开启了权重预测 || 权重双向预测,则遍历每个block,累计他们YUV的ssd和sum到低分辨率wp_ssd和wp_sum中 ·否则,既有aqMode,param中aq强度又非0 ·若开启了hevcAq方法,则执行 ·否则 1.申请边缘图像、高斯图像、theta图像的内存,并对他们进行初始化 2.若aqMode为EDGE,则edgeFilter 3.计算aq强度strength ·若aqMode为AUTO_VARIANCE、AUTO_VARIANCE_BIASED、EDGE,则 1.遍历每一个block,计算该block的YUV方差总和energy 2.初始化qp_adj ·若aqMode为EDGE 1.计算该block的边缘图edgeImage、theta图、边缘角度avgAngle、并得到其边缘密度edgeDensity 2.初始化qp_adj和edgeInclined[] 1.若有edgeDensity,则基于edgeDensity来初始化qp_adj,并根据边缘角度avgAngle是否在[30, 60]、[120, 150]范围内初始化edgeInclined[] 2.若无,则基于YUV方差总和energy来初始化qp_adj,并将所有的edgeInclined[]都初始化为0 ·否则,基于YUV方差总和energy初始化qp_adj 3.将宏块的qp_adj存储到qpCuTreeOffset中 2.基于帧内每一块block的qp_adj,计算帧的qp_adj均值avg_adj,qp_adj平方的均值avg_adj_pow2 3.aq强度 = avg_adj * param中的aq强度 4.bias aq强度 = param中的aq强度 ·否则,aqMode为VARIANCE,aq强度简单的由param中的aq强度计算而来 4.遍历每一个block 1.重新计算qp_adj ·若aqMode为AUTO_VARIANCE_BIASED,则基于aq强度、均值avg_adj、bias aq强度来重新计算block的qp_adj ·若aqMode为AUTO_VARIANCE,则基于aq强度、均值avg_adj来重新计算block的qp_adj ·若aqMode为EDGE,则基于边缘倾斜edgeInclined、aq强度、均值avg_adj来重新计算block的qp_adj ·若aqMode为VARIANCE,则重新计算block的YUV方差总和energy,并基于energy和aq强度来重新计算block的qp_adj 2.若开启了HDR/WCG的亮度/色度偏移,则微调qp_adj 3.若当前帧有量化偏移quantOffsets,则将其累加到qp_adj 4.将最终计算得到的qp_adj存储到qpAqOffset[]、qpCuTreeOffset[],并将其转化成qscale存储到invQscaleFactor[]中 5.若aq的最小CUsize为8x8,则基于invQscaleFactor计算invQscaleFactor8x8 5.若开启了权重预测 || 权重双向预测 1.若有bStatRead && 开启cuTree && 当前帧是参考帧,则遍历每个block,计算它们YUV的sum和ssd到低分辨率wp_ssd和wp_sum中 2.遍历三个plane,最终基于wp_ssd和wp_sum重新计算wp_ssd 6.若开启了块级动态inter优化bDynamicRefine || 区域渐入bEnableFades 1.遍历每个block,计算block的YUV方差和到blockVariance中 2.基于每个block的YUV方差和,做均值计算,得到帧的方差frameVariance */ void LookaheadTLD::calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param) { /* Actual adaptive quantization */ int maxCol = curFrame->m_fencPic->m_picWidth; int maxRow = curFrame->m_fencPic->m_picHeight; int blockCount, loopIncr; float modeOneConst, modeTwoConst; // 基于aq的最小CU大小来初始化blockCount、modeOneConst、modeTwoConst、modeOneConst if (param->rc.qgSize == 8) { blockCount = curFrame->m_lowres.maxBlocksInRowFullRes * curFrame->m_lowres.maxBlocksInColFullRes; modeOneConst = 11.427f; modeTwoConst = 8.f; loopIncr = 8; } else { blockCount = widthInCU * heightInCU; modeOneConst = 14.427f; modeTwoConst = 11.f; loopIncr = 16; } // 取当前帧的量化偏移量向量 float* quantOffsets = curFrame->m_quantOffsets; // 初始化低分辨率权重预测的ssd和sum for (int y = 0; y < 3; y++) { curFrame->m_lowres.wp_ssd[y] = 0; curFrame->m_lowres.wp_sum[y] = 0; } // 若 没有数据载入 || 没开启宏块树 || 当前帧不是参考帧 if (!(param->rc.bStatRead && param->rc.cuTree && IS_REFERENCED(curFrame))) { /* Calculate Qp offset for each 16x16 or 8x8 block in the frame */ // 若无aqMode || aq强度为0 if (param->rc.aqMode == X265_AQ_NONE || param->rc.aqStrength == 0) { // 若有aqMode && aq强度为0 if (param->rc.aqMode && param->rc.aqStrength == 0) { /* 初始化qpCuTreeOffset和invQscaleFactor */ // 若当前帧的量化偏移量非0 if (quantOffsets) { // 遍历每一个低分辨率CU for (int cuxy = 0; cuxy < blockCount; cuxy++) { // 取当前CU的quantOffsets给qpCuTreeOffset、qpAqOffset、invQscaleFactor // 其中invQscaleFactor用于CU satd到aq satd的转化 curFrame->m_lowres.qpCuTreeOffset[cuxy] = curFrame->m_lowres.qpAqOffset[cuxy] = quantOffsets[cuxy]; curFrame->m_lowres.invQscaleFactor[cuxy] = x265_exp2fix8(curFrame->m_lowres.qpCuTreeOffset[cuxy]); } } // 若无quantOffsets则将qpCuTreeOffset、qpAqOffset初始化为0 // invQscaleFactor初始化为256 else { memset(curFrame->m_lowres.qpCuTreeOffset, 0, blockCount * sizeof(double)); memset(curFrame->m_lowres.qpAqOffset, 0, blockCount * sizeof(double)); for (int cuxy = 0; cuxy < blockCount; cuxy++) curFrame->m_lowres.invQscaleFactor[cuxy] = 256; } } /* Need variance data for weighted prediction and dynamic refinement*/ // 若开启的权重预测 || 权重双向预测 if (param->bEnableWeightedPred || param->bEnableWeightedBiPred) { // 遍历每个CU,计算他们的像素差之和sum,以及像素差的平方和ssd // 存储到wp_sum[plane]和wp_ssd[plane]中 for (int blockY = 0; blockY < maxRow; blockY += loopIncr) for (int blockX = 0; blockX < maxCol; blockX += loopIncr) acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); } } // 若即有aq mode,又有aq强度 else { // 若开启了hevcAq,一种新的aq方法 if (param->rc.hevcAq) { // New method for calculating variance and qp offset xPreanalyze(curFrame); } // 没开启 else { #define AQ_EDGE_BIAS 0.5 #define EDGE_INCLINATION 45 uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize; int maxHeight = numCuInHeight * param->maxCUSize; intptr_t stride = curFrame->m_fencPic->m_stride; // 申请边缘图像、高斯图像、theta图像的内存,并对他们进行初始化 pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2))); pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2))); pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2))); memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel)); memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel)); memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel)); // 若码控的aqMode为X265_AQ_EDGE if (param->rc.aqMode == X265_AQ_EDGE) edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol); int blockXY = 0, inclinedEdge = 0; double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0; double bias_strength = 0.f; double strength = 0.f; // 若aqMode是X265_AQ_AUTO_VARIANCE、X265_AQ_AUTO_VARIANCE_BIASED、X265_AQ_EDGE if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE) { double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8))); // 遍历每一block行 for (int blockY = 0; blockY < maxRow; blockY += loopIncr) { // 遍历block行中的每一个block for (int blockX = 0; blockX < maxCol; blockX += loopIncr) { uint32_t energy, edgeDensity, avgAngle; // 得到当前CU的3个plane的方差和 energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); // 若aq mode为X265_AQ_EDGE if (param->rc.aqMode == X265_AQ_EDGE) { // 得到edge图和edgeTheta图 pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX; pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX; // 计算当前CU的边界密度 edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize); // 若边界密度非0,则使用边界密度计算qp_adj if (edgeDensity) { qp_adj = pow(edgeDensity * bit_depth_correction + 1, 0.1); //Increasing the QP of a block if its edge orientation lies around the multiples of 45 degree // 若avgAngle在[30, 60]度、[120, 150]度之间,则记录当前CU为edgeInclined if ((avgAngle >= EDGE_INCLINATION - 15 && avgAngle <= EDGE_INCLINATION + 15) || (avgAngle >= EDGE_INCLINATION + 75 && avgAngle <= EDGE_INCLINATION + 105)) curFrame->m_lowres.edgeInclined[blockXY] = 1; else curFrame->m_lowres.edgeInclined[blockXY] = 0; } // 若边界密度不可用,则使用YUV方差和来计算qp_adj,并标记edgeInclined为false else { qp_adj = pow(energy * bit_depth_correction + 1, 0.1); curFrame->m_lowres.edgeInclined[blockXY] = 0; } } // 若aq mode不是X265_AQ_EDGE,则qp_adj为(var * bit_depth_correct +1)^0.1 else qp_adj = pow(energy * bit_depth_correction + 1, 0.1); // 将得到的qp_adj存储到cuTree curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj; // 累加qp_adj到avg_adj中 avg_adj += qp_adj; // 累加qp_adj的平方到avg_adj_pow2中 avg_adj_pow2 += qp_adj * qp_adj; blockXY++; } } // 结束CU的遍历 // 计算一帧中所有CU的qp_adj和avg_adj_pow2的均值 avg_adj /= blockCount; avg_adj_pow2 /= blockCount; // strength = param中设置的强度 * avg_adj strength = param->rc.aqStrength * avg_adj; avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - modeTwoConst) / avg_adj; // 强度偏移量 = param中设置的强度 bias_strength = param->rc.aqStrength; } // 若aq mode为X265_AQ_VARIANCE else strength = param->rc.aqStrength * 1.0397f; X265_FREE(edgePic); X265_FREE(gaussianPic); X265_FREE(thetaPic); /* 重新遍历每个block */ blockXY = 0; for (int blockY = 0; blockY < maxRow; blockY += loopIncr) { for (int blockX = 0; blockX < maxCol; blockX += loopIncr) { // X265_AQ_AUTO_VARIANCE_BIASED,相比AUTO_VARIANCE多了个bias偏移量罢了 if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED) { qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY]; qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - modeTwoConst / (qp_adj * qp_adj)); } // X265_AQ_AUTO_VARIANCE else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE) { qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY]; qp_adj = strength * (qp_adj - avg_adj); } // X265_AQ_EDGE,只有当边缘角度在[30,60] [120,150]之内才有变化,其余都与AUTO_VARIANCE同 else if (param->rc.aqMode == X265_AQ_EDGE) { inclinedEdge = curFrame->m_lowres.edgeInclined[blockXY]; qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY]; if(inclinedEdge && (qp_adj - avg_adj > 0)) qp_adj = ((strength + AQ_EDGE_BIAS) * (qp_adj - avg_adj)); else qp_adj = strength * (qp_adj - avg_adj); } // X265_AQ_VARIANCE,仅根据CU的YUV方差来计算,不考虑其与全帧的关系 else { uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (modeOneConst + 2 * (X265_DEPTH - 8))); } // 若开启了HDR/WCG的亮度/色度偏移 if (param->bHDROpt) { // 得到当前block的luma的ssd和sum uint32_t sum = lumaSumCu(curFrame, blockX, blockY, param->rc.qgSize); uint32_t lumaAvg = sum / (loopIncr * loopIncr); // 基于lumaAvg微调qp_adj if (lumaAvg < 301) qp_adj += 3; else if (lumaAvg >= 301 && lumaAvg < 367) qp_adj += 2; else if (lumaAvg >= 367 && lumaAvg < 434) qp_adj += 1; else if (lumaAvg >= 501 && lumaAvg < 567) qp_adj -= 1; else if (lumaAvg >= 567 && lumaAvg < 634) qp_adj -= 2; else if (lumaAvg >= 634 && lumaAvg < 701) qp_adj -= 3; else if (lumaAvg >= 701 && lumaAvg < 767) qp_adj -= 4; else if (lumaAvg >= 767 && lumaAvg < 834) qp_adj -= 5; else if (lumaAvg >= 834) qp_adj -= 6; } // 若当前帧有量化偏移量,则累加到qp_adj中 if (quantOffsets != NULL) qp_adj += quantOffsets[blockXY]; // 存储qp_adj到qpAqOffset、qpCuTreeOffset中 curFrame->m_lowres.qpAqOffset[blockXY] = qp_adj; curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj; // 基于qp_adj计算invQscaleFactor curFrame->m_lowres.invQscaleFactor[blockXY] = x265_exp2fix8(qp_adj); blockXY++; } } // end of block的遍历 } } // end of 若即有aq mode,又有aq强度 // 若aq所允许的最小CU单元为8x8 if (param->rc.qgSize == 8) { // 遍历每个CU,计算invQscaleFactor8x8 for (int cuY = 0; cuY < heightInCU; cuY++) { for (int cuX = 0; cuX < widthInCU; cuX++) { const int cuXY = cuX + cuY * widthInCU; curFrame->m_lowres.invQscaleFactor8x8[cuXY] = (curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4] + curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + 1] + curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] + curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4; } } } } // end of 若 没有数据载入 || 没开启宏块树 || 当前帧不是参考帧 // 若允许权重预测 || 权重双向预测 if (param->bEnableWeightedPred || param->bEnableWeightedBiPred) { // 若有数据读入 && 开启了cuTree && 当前帧是参考帧 if (param->rc.bStatRead && param->rc.cuTree && IS_REFERENCED(curFrame)) { // 遍历每个CU,将CU的sum和ssd累计到wp_sum和wp_ssd for (int blockY = 0; blockY < maxRow; blockY += loopIncr) for (int blockX = 0; blockX < maxCol; blockX += loopIncr) acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); } int hShift = CHROMA_H_SHIFT(param->internalCsp); int vShift = CHROMA_V_SHIFT(param->internalCsp); maxCol = ((maxCol + 8) >> 4) << 4; maxRow = ((maxRow + 8) >> 4) << 4; int width[3] = { maxCol, maxCol >> hShift, maxCol >> hShift }; int height[3] = { maxRow, maxRow >> vShift, maxRow >> vShift }; // 遍历3个plane for (int i = 0; i < 3; i++) { uint64_t sum, ssd; // 取低分辨率帧的sum和ssd,这两个数据在acEnergyCu中计算过 sum = curFrame->m_lowres.wp_sum[i]; ssd = curFrame->m_lowres.wp_ssd[i]; // 重新计算ssd curFrame->m_lowres.wp_ssd[i] = ssd - (sum * sum + (width[i] * height[i]) / 2) / (width[i] * height[i]); } } // 若开启了块级动态inter优化 || 区域渐入 if (param->bDynamicRefine || param->bEnableFades) { uint64_t blockXY = 0, rowVariance = 0; curFrame->m_lowres.frameVariance = 0; // 遍历每个CU for (int blockY = 0; blockY < maxRow; blockY += loopIncr) { for (int blockX = 0; blockX < maxCol; blockX += loopIncr) { // 计算CU的YUV方差总和,并存储到blockVariance中 curFrame->m_lowres.blockVariance[blockXY] = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); // 累加到行方差rowVariance中 rowVariance += curFrame->m_lowres.blockVariance[blockXY]; blockXY++; } // 帧方差frameVariance为所有CU方差blockVariance的均值 curFrame->m_lowres.frameVariance += (rowVariance / maxCol); } curFrame->m_lowres.frameVariance /= maxRow; } }
    Processed: 0.009, SQL: 9