1 /******************************************************************************
3 * Copyright (c) 2020 Intel.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 *******************************************************************************/
20 * @brief xRAN BFP compression/decompression U-plane implementation and interface functions
22 * @file xran_compression.cpp
23 * @ingroup group_source_xran
24 * @author Intel Corporation
27 #include "xran_compression.hpp"
28 #include "xran_bfp_utils.hpp"
29 #include "xran_bfp_byte_packing_utils.hpp"
30 #include "xran_compression.h"
33 #include <immintrin.h>
37 namespace BFP_UPlane_9b16RB
39 /// Namespace constants
40 const int k_numREReal = 24; /// 12 IQ pairs
43 /// Compute exponent value for a set of 16 RB from the maximum absolute value.
44 /// Max Abs operates in a loop, executing 4 RB per iteration. The results are
45 /// packed into the final output register.
47 computeExponent_16RB(const BlockFloatCompander::ExpandedData& dataIn, const __m512i totShiftBits)
49 __m512i maxAbs = __m512i();
50 const __m512i* rawData = reinterpret_cast<const __m512i*>(dataIn.dataExpanded);
51 /// Max Abs loop operates on 4RB at a time
53 for (int n = 0; n < 4; ++n)
55 /// Re-order and vertical max abs
56 auto maxAbsVert = BlockFloatCompander::maxAbsVertical4RB(rawData[3 * n + 0], rawData[3 * n + 1], rawData[3 * n + 2]);
57 /// Horizontal max abs
58 auto maxAbsHorz = BlockFloatCompander::horizontalMax4x16(maxAbsVert);
59 /// Pack these 4 values into maxAbs
60 maxAbs = BlockFloatCompander::slidePermute(maxAbsHorz, maxAbs, n);
62 /// Calculate exponent
63 const auto maxAbs32 = BlockFloatCompander::maskUpperWord(maxAbs);
64 return BlockFloatCompander::expLzCnt(maxAbs32, totShiftBits);
68 /// Apply compression to 1 RB
69 template<BlockFloatCompander::PackFunction networkBytePack>
71 applyCompressionN_1RB(const BlockFloatCompander::ExpandedData& dataIn, BlockFloatCompander::CompressedData* dataOut,
72 const int numREOffset, const uint8_t thisExp, const int thisRBExpAddr, const uint16_t rbWriteMask)
74 /// Get AVX512 pointer aligned to desired RB
75 const __m512i* rawDataIn = reinterpret_cast<const __m512i*>(dataIn.dataExpanded + numREOffset);
76 /// Apply the exponent shift
77 const auto compData = _mm512_srai_epi16(*rawDataIn, thisExp);
78 /// Pack compressed data network byte order
79 const auto compDataBytePacked = networkBytePack(compData);
80 /// Store exponent first
81 dataOut->dataCompressed[thisRBExpAddr] = thisExp;
82 /// Now have 1 RB worth of bytes separated into 3 chunks (1 per lane)
83 /// Use three offset stores to join
84 _mm_mask_storeu_epi8(dataOut->dataCompressed + thisRBExpAddr + 1, rbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 0));
85 _mm_mask_storeu_epi8(dataOut->dataCompressed + thisRBExpAddr + 1 + dataIn.iqWidth, rbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 1));
86 _mm_mask_storeu_epi8(dataOut->dataCompressed + thisRBExpAddr + 1 + (2 * dataIn.iqWidth), rbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 2));
90 /// Calls compression function specific to the number of RB to be executed. For 9, 10, or 12bit iqWidth.
91 template<BlockFloatCompander::PackFunction networkBytePack>
93 compressByAllocN(const BlockFloatCompander::ExpandedData& dataIn, BlockFloatCompander::CompressedData* dataOut,
94 const __m512i totShiftBits, const int totNumBytesPerRB, const uint16_t rbWriteMask)
96 const auto exponents = computeExponent_16RB(dataIn, totShiftBits);
98 for (int n = 0; n < 16; ++n)
100 applyCompressionN_1RB<networkBytePack>(dataIn, dataOut, n * k_numREReal, ((uint8_t*)&exponents)[n * 4], n * totNumBytesPerRB, rbWriteMask);
105 /// Apply compression to 1 RB
106 template<BlockFloatCompander::UnpackFunction networkByteUnpack>
108 applyExpansionN_1RB(const BlockFloatCompander::CompressedData& dataIn, BlockFloatCompander::ExpandedData* dataOut,
109 const int expAddr, const int thisRBAddr, const int maxExpShift)
111 /// Unpack network order packed data
112 const auto dataUnpacked = networkByteUnpack(dataIn.dataCompressed + expAddr + 1);
113 /// Apply exponent scaling (by appropriate arithmetic shift right)
114 const auto dataExpanded = _mm512_srai_epi16(dataUnpacked, maxExpShift - *(dataIn.dataCompressed + expAddr));
115 /// Write expanded data to output
116 static constexpr uint32_t k_WriteMask = 0x00FFFFFF;
117 _mm512_mask_storeu_epi16(dataOut->dataExpanded + thisRBAddr, k_WriteMask, dataExpanded);
121 /// Calls compression function specific to the number of RB to be executed. For 9, 10, or 12bit iqWidth.
122 template<BlockFloatCompander::UnpackFunction networkByteUnpack>
124 expandByAllocN(const BlockFloatCompander::CompressedData& dataIn, BlockFloatCompander::ExpandedData* dataOut,
125 const int totNumBytesPerRB, const int maxExpShift)
128 for (int n = 0; n < 16; ++n)
130 applyExpansionN_1RB<networkByteUnpack>(dataIn, dataOut, n * totNumBytesPerRB, n * k_numREReal, maxExpShift);
137 /// Main kernel function for compression.
138 /// Starts by determining iqWidth specific parameters and functions.
140 BlockFloatCompander::BFPCompressUserPlaneAvx512_9b16RB(const ExpandedData& dataIn, CompressedData* dataOut)
142 /// Compensation for extra zeros in 32b leading zero count when computing exponent
143 const auto totShiftBits9 = _mm512_set1_epi32(24);
145 /// Total number of compressed bytes per RB for each iqWidth option
146 constexpr int totNumBytesPerRB9 = 28;
148 /// Compressed data write mask for each iqWidth option
149 constexpr uint16_t rbWriteMask9 = 0x01FF;
151 BFP_UPlane_9b16RB::compressByAllocN<BlockFloatCompander::networkBytePack9b>(dataIn, dataOut, totShiftBits9, totNumBytesPerRB9, rbWriteMask9);
156 /// Main kernel function for expansion.
157 /// Starts by determining iqWidth specific parameters and functions.
159 BlockFloatCompander::BFPExpandUserPlaneAvx512_9b16RB(const CompressedData& dataIn, ExpandedData* dataOut)
161 constexpr int k_totNumBytesPerRB9 = 28;
162 constexpr int k_maxExpShift9 = 7;
163 BFP_UPlane_9b16RB::expandByAllocN<BlockFloatCompander::networkByteUnpack9b>(dataIn, dataOut, k_totNumBytesPerRB9, k_maxExpShift9);