23 #include <immintrin.h> 27 static int16_t saturateAbs(int16_t inVal)
30 if (inVal == std::numeric_limits<short>::min())
32 result = std::numeric_limits<short>::max();
36 result = (int16_t)std::abs(inVal);
46 __m512i maxAbs = __m512i();
49 const __m512i* rawData =
reinterpret_cast<const __m512i*
>(dataIn.
dataExpanded);
50 constexpr
int k_numRBPerLoop = 4;
51 constexpr
int k_numInputLoopIts = BlockFloatCompander::k_numRB / k_numRBPerLoop;
53 #pragma unroll(k_numInputLoopIts) 54 for (
int n = 0; n < k_numInputLoopIts; ++n)
65 constexpr uint8_t k_msk1 = 0b11111100;
66 constexpr
int k_shuff1 = 0x41;
67 const auto z_w1 = _mm512_mask_shuffle_i64x2(rawData[3 * n + 0], k_msk1, rawData[3 * n + 1], rawData[3 * n + 2], k_shuff1);
69 constexpr uint8_t k_msk2 = 0b11000011;
70 constexpr
int k_shuff2 = 0xB1;
71 const auto z_w2 = _mm512_mask_shuffle_i64x2(rawData[3 * n + 1], k_msk2, rawData[3 * n + 0], rawData[3 * n + 2], k_shuff2);
73 constexpr uint8_t k_msk3 = 0b00111111;
74 constexpr
int k_shuff3 = 0xBE;
75 const auto z_w3 = _mm512_mask_shuffle_i64x2(rawData[3 * n + 2], k_msk3, rawData[3 * n + 0], rawData[3 * n + 1], k_shuff3);
78 const auto abs16_1 = _mm512_abs_epi16(z_w1);
79 const auto abs16_2 = _mm512_abs_epi16(z_w2);
80 const auto abs16_3 = _mm512_abs_epi16(z_w3);
81 const auto maxAbs_12 = _mm512_max_epi16(abs16_1, abs16_2);
82 const auto maxAbs_123 = _mm512_max_epi16(maxAbs_12, abs16_3);
86 const auto k_perm64b = _mm512_set_epi64(6, 7, 4, 5, 2, 3, 0, 1);
87 auto maxAbsPerm = _mm512_permutexvar_epi64(k_perm64b, maxAbs_123);
88 auto maxAbsHorz = _mm512_max_epi16(maxAbs_123, maxAbsPerm);
91 const auto k_perm32b = _mm512_set_epi32(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
92 maxAbsPerm = _mm512_permutexvar_epi32(k_perm32b, maxAbsHorz);
93 maxAbsHorz = _mm512_max_epi16(maxAbsHorz, maxAbsPerm);
96 maxAbsPerm = _mm512_rol_epi32(maxAbsHorz, BlockFloatCompander::k_numBitsIQ);
97 maxAbsHorz = _mm512_max_epi16(maxAbsHorz, maxAbsPerm);
102 const auto k_select4RB = _mm512_set_epi32(28, 24, 20, 16, 28, 24, 20, 16,
103 28, 24, 20, 16, 28, 24, 20, 16);
104 constexpr uint16_t k_expMsk[k_numInputLoopIts] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
105 maxAbs = _mm512_mask_permutex2var_epi32(maxAbs, k_expMsk[n], k_select4RB, maxAbsHorz);
109 const auto k_upperWordMask = _mm512_set_epi64(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF,
110 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF,
111 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF,
112 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF);
113 maxAbs = _mm512_and_epi64(maxAbs, k_upperWordMask);
116 const auto totShiftBits = _mm512_set1_epi32(32 - dataIn.
iqWidth + 1);
117 const auto lzCount = _mm512_lzcnt_epi32(maxAbs);
118 const auto exponent = _mm512_sub_epi32(totShiftBits, lzCount);
119 constexpr uint16_t k_expWriteMask = 0xFFFF;
120 _mm512_mask_cvtepi32_storeu_epi8(expStore, k_expWriteMask, exponent);
130 const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000100020003, 0x0004000500060007,
131 0x0000000100020003, 0x0004000500060007,
132 0x0000000100020003, 0x0004000500060007,
133 0x0000000100020003, 0x0004000500060007);
134 auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
137 const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x0000000000000000, 0x0C0D080904050001,
138 0x0000000000000000, 0x0C0D080904050001,
139 0x0000000000000000, 0x0C0D080904050001,
140 0x0000000000000000, 0x0C0D080904050001);
141 constexpr uint64_t k_byteMask1 = 0x000000FF00FF00FF;
142 auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
145 const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x000000000000000E, 0x0F0A0B0607020300,
146 0x000000000000000E, 0x0F0A0B0607020300,
147 0x000000000000000E, 0x0F0A0B0607020300,
148 0x000000000000000E, 0x0F0A0B0607020300);
149 constexpr uint64_t k_byteMask2 = 0x000001FE01FE01FE;
150 auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
153 const __m512i k_ternLogSelect = _mm512_set_epi64(0x00000000000000FF, 0x01FC07F01FC07F00,
154 0x00000000000000FF, 0x01FC07F01FC07F00,
155 0x00000000000000FF, 0x01FC07F01FC07F00,
156 0x00000000000000FF, 0x01FC07F01FC07F00);
157 return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
167 const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000200040006, 0x0000000200040006,
168 0x0000000200040006, 0x0000000200040006,
169 0x0000000200040006, 0x0000000200040006,
170 0x0000000200040006, 0x0000000200040006);
171 auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
174 const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x000000000000000C, 0x0D08090004050001,
175 0x000000000000000C, 0x0D08090004050001,
176 0x000000000000000C, 0x0D08090004050001,
177 0x000000000000000C, 0x0D08090004050001);
178 constexpr uint64_t k_byteMask1 = 0x000001EF01EF01EF;
179 auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
182 const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x0000000000000E0F, 0x0A0B000607020300,
183 0x0000000000000E0F, 0x0A0B000607020300,
184 0x0000000000000E0F, 0x0A0B000607020300,
185 0x0000000000000E0F, 0x0A0B000607020300);
186 constexpr uint64_t k_byteMask2 = 0x000003DE03DE03DE;
187 auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
190 const __m512i k_ternLogSelect = _mm512_set_epi64(0x000000000000FF03, 0xF03F00FF03F03F00,
191 0x000000000000FF03, 0xF03F00FF03F03F00,
192 0x000000000000FF03, 0xF03F00FF03F03F00,
193 0x000000000000FF03, 0xF03F00FF03F03F00);
194 return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
204 const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000400000004, 0x0000000400000004,
205 0x0000000400000004, 0x0000000400000004,
206 0x0000000400000004, 0x0000000400000004,
207 0x0000000400000004, 0x0000000400000004);
208 auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
211 const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x00000000000C0D00, 0x0809000405000001,
212 0x00000000000C0D00, 0x0809000405000001,
213 0x00000000000C0D00, 0x0809000405000001,
214 0x00000000000C0D00, 0x0809000405000001);
215 constexpr uint64_t k_byteMask1 = 0x000006DB06DB06DB;
216 auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
219 const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x000000000E0F000A, 0x0B00060700020300,
220 0x000000000E0F000A, 0x0B00060700020300,
221 0x000000000E0F000A, 0x0B00060700020300,
222 0x000000000E0F000A, 0x0B00060700020300);
223 constexpr uint64_t k_byteMask2 = 0x00000DB60DB60DB6;
224 auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
227 const __m512i k_ternLogSelect = _mm512_set_epi64(0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
228 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
229 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
230 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00);
231 return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
241 const __m512i* rawDataIn =
reinterpret_cast<const __m512i*
>(inData);
242 const auto k_expPerm = _mm512_set_epi32(15, 14, 13, 12, 7, 6, 5, 4,
243 5, 4, 3, 2, 3, 2, 1, 0);
244 auto expData = _mm512_permutexvar_epi32(k_expPerm, *rawDataIn);
249 const __m512i k_byteShuffleMask = _mm512_set_epi64(0x0F0E0D0C0B0A0908, 0x0706050403020100,
250 0x090A080907080607, 0x0506040503040203,
251 0x0809070806070506, 0x0405030402030102,
252 0x0708060705060405, 0x0304020301020001);
253 expData = _mm512_shuffle_epi8(expData, k_byteShuffleMask);
256 const __m512i k_slBits = _mm512_set_epi64(0x0007000600050004, 0x0003000200010000,
257 0x0007000600050004, 0x0003000200010000,
258 0x0007000600050004, 0x0003000200010000,
259 0x0007000600050004, 0x0003000200010000);
260 expData = _mm512_sllv_epi16(expData, k_slBits);
263 const __m512i k_expMask = _mm512_set1_epi16(0xFF80);
264 return _mm512_and_epi64(expData, k_expMask);
274 const __m512i* rawDataIn =
reinterpret_cast<const __m512i*
>(inData);
275 const auto k_expPerm = _mm512_set_epi32(15, 14, 13, 12, 8, 7, 6, 5,
276 5, 4, 3, 2, 3, 2, 1, 0);
277 auto expData = _mm512_permutexvar_epi32(k_expPerm, *rawDataIn);
282 const __m512i k_byteShuffleMask = _mm512_set_epi64(0x0809070806070506, 0x0304020301020001,
283 0x0809070806070506, 0x0304020301020001,
284 0x0A0B090A08090708, 0x0506040503040203,
285 0x0809070806070506, 0x0304020301020001);
286 expData = _mm512_shuffle_epi8(expData, k_byteShuffleMask);
289 const __m512i k_slBits = _mm512_set_epi64(0x0006000400020000, 0x0006000400020000,
290 0x0006000400020000, 0x0006000400020000,
291 0x0006000400020000, 0x0006000400020000,
292 0x0006000400020000, 0x0006000400020000);
293 expData = _mm512_sllv_epi16(expData, k_slBits);
296 const __m512i k_expMask = _mm512_set1_epi16(0xFFC0);
297 return _mm512_and_epi64(expData, k_expMask);
307 const __m512i* rawDataIn =
reinterpret_cast<const __m512i*
>(inData);
308 const auto k_expPerm = _mm512_set_epi32(15, 14, 13, 12, 9, 8, 7, 6,
309 6, 5, 4, 3, 3, 2, 1, 0);
310 auto expData = _mm512_permutexvar_epi32(k_expPerm, *rawDataIn);
314 const __m512i k_byteShuffleMask = _mm512_set_epi64(0x0A0B090A07080607, 0x0405030401020001,
315 0x0A0B090A07080607, 0x0405030401020001,
316 0x0A0B090A07080607, 0x0405030401020001,
317 0x0A0B090A07080607, 0x0405030401020001);
318 expData = _mm512_shuffle_epi8(expData, k_byteShuffleMask);
321 const __m512i k_slBits = _mm512_set_epi64(0x0004000000040000, 0x0004000000040000,
322 0x0004000000040000, 0x0004000000040000,
323 0x0004000000040000, 0x0004000000040000,
324 0x0004000000040000, 0x0004000000040000);
325 expData = _mm512_sllv_epi16(expData, k_slBits);
328 const __m512i k_expMask = _mm512_set1_epi16(0xFFF0);
329 return _mm512_and_epi64(expData, k_expMask);
338 int8_t storedExp[BlockFloatCompander::k_numRB] = {};
342 #pragma unroll(BlockFloatCompander::k_numRB) 343 for (
int n = 0; n < BlockFloatCompander::k_numRB; ++n)
345 const __m512i* rawDataIn =
reinterpret_cast<const __m512i*
>(dataIn.
dataExpanded + n * BlockFloatCompander::k_numREReal);
346 auto compData = _mm512_srai_epi16(*rawDataIn, storedExp[n]);
347 auto thisRBExpAddr = n * (BlockFloatCompander::k_numREReal + 1);
351 constexpr uint32_t k_rbMask = 0x00FFFFFF;
352 _mm256_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1, k_rbMask, _mm512_cvtepi16_epi8(compData));
362 int8_t storedExp[BlockFloatCompander::k_numRB] = {};
367 #pragma unroll(BlockFloatCompander::k_numRB) 368 for (
int n = 0; n < BlockFloatCompander::k_numRB; ++n)
371 const __m512i* rawDataIn =
reinterpret_cast<const __m512i*
>(dataIn.
dataExpanded + n * BlockFloatCompander::k_numREReal);
372 auto compData = _mm512_srai_epi16(*rawDataIn, storedExp[n]);
378 constexpr
int k_totNumBytesPerRB = 28;
379 auto thisRBExpAddr = n * k_totNumBytesPerRB;
384 constexpr uint16_t k_RbWriteMask = 0x01FF;
385 constexpr
int k_numDataBytesPerLane = 9;
386 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1, k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 0));
387 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1 + k_numDataBytesPerLane, k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 1));
388 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1 + (2 * k_numDataBytesPerLane), k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 2));
398 int8_t storedExp[BlockFloatCompander::k_numRB] = {};
403 #pragma unroll(BlockFloatCompander::k_numRB) 404 for (
int n = 0; n < BlockFloatCompander::k_numRB; ++n)
407 const __m512i* rawDataIn =
reinterpret_cast<const __m512i*
>(dataIn.
dataExpanded + n * BlockFloatCompander::k_numREReal);
408 auto compData = _mm512_srai_epi16(*rawDataIn, storedExp[n]);
414 constexpr
int k_totNumBytesPerRB = 31;
415 auto thisRBExpAddr = n * k_totNumBytesPerRB;
420 constexpr uint16_t k_RbWriteMask = 0x03FF;
421 constexpr
int k_numDataBytesPerLane = 10;
422 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1, k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 0));
423 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1 + k_numDataBytesPerLane, k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 1));
424 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1 + (2 * k_numDataBytesPerLane), k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 2));
434 int8_t storedExp[BlockFloatCompander::k_numRB] = {};
439 #pragma unroll(BlockFloatCompander::k_numRB) 440 for (
int n = 0; n < BlockFloatCompander::k_numRB; ++n)
443 const __m512i* rawDataIn =
reinterpret_cast<const __m512i*
>(dataIn.
dataExpanded + n * BlockFloatCompander::k_numREReal);
444 auto compData = _mm512_srai_epi16(*rawDataIn, storedExp[n]);
450 constexpr
int k_totNumBytesPerRB = 37;
451 auto thisRBExpAddr = n * k_totNumBytesPerRB;
456 constexpr uint16_t k_RbWriteMask = 0x0FFF;
457 constexpr
int k_numDataBytesPerLane = 12;
458 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1, k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 0));
459 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1 + k_numDataBytesPerLane, k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 1));
460 _mm_mask_storeu_epi8(dataOut->
dataCompressed + thisRBExpAddr + 1 + (2 * k_numDataBytesPerLane), k_RbWriteMask, _mm512_extracti64x2_epi64(compDataBytePacked, 2));
469 #pragma unroll(BlockFloatCompander::k_numRB) 470 for (
int n = 0; n < BlockFloatCompander::k_numRB; ++n)
473 auto expAddr = n * (BlockFloatCompander::k_numREReal + 1);
474 const __m256i* rawDataIn =
reinterpret_cast<const __m256i*
>(dataIn.
dataCompressed + expAddr + 1);
475 const auto compData16 = _mm512_cvtepi8_epi16(*rawDataIn);
476 const auto expData = _mm512_slli_epi16(compData16, *(dataIn.
dataCompressed + expAddr));
478 constexpr uint8_t k_rbMask64 = 0b00111111;
479 _mm512_mask_storeu_epi64(dataOut->
dataExpanded + n * BlockFloatCompander::k_numREReal, k_rbMask64, expData);
488 #pragma unroll(BlockFloatCompander::k_numRB) 489 for (
int n = 0; n < BlockFloatCompander::k_numRB; ++n)
491 constexpr
int k_totNumBytesPerRB = 28;
492 auto expAddr = n * k_totNumBytesPerRB;
498 constexpr
int k_maxExpShift = 7;
499 expData = _mm512_srai_epi16(expData, k_maxExpShift - *(dataIn.
dataCompressed + expAddr));
502 static constexpr uint32_t k_WriteMask = 0x00FFFFFF;
503 _mm512_mask_storeu_epi16(dataOut->
dataExpanded + n * BlockFloatCompander::k_numREReal, k_WriteMask, expData);
512 #pragma unroll(BlockFloatCompander::k_numRB) 513 for (
int n = 0; n < BlockFloatCompander::k_numRB; ++n)
515 constexpr
int k_totNumBytesPerRB = 31;
516 auto expAddr = n * k_totNumBytesPerRB;
522 constexpr
int k_maxExpShift = 6;
523 expData = _mm512_srai_epi16(expData, k_maxExpShift - *(dataIn.
dataCompressed + expAddr));
526 static constexpr uint32_t k_WriteMask = 0x00FFFFFF;
527 _mm512_mask_storeu_epi16(dataOut->
dataExpanded + n * BlockFloatCompander::k_numREReal, k_WriteMask, expData);
536 #pragma unroll(BlockFloatCompander::k_numRB) 537 for (
int n = 0; n < BlockFloatCompander::k_numRB; ++n)
539 constexpr
int k_totNumBytesPerRB = 37;
540 auto expAddr = n * k_totNumBytesPerRB;
546 constexpr
int k_maxExpShift = 4;
547 expData = _mm512_srai_epi16(expData, k_maxExpShift - *(dataIn.
dataCompressed + expAddr));
550 static constexpr uint32_t k_WriteMask = 0x00FFFFFF;
551 _mm512_mask_storeu_epi16(dataOut->
dataExpanded + n * BlockFloatCompander::k_numREReal, k_WriteMask, expData);
561 int16_t iqMask = (int16_t)((1 << dataIn.
iqWidth) - 1);
562 int byteShiftUnits = dataIn.
iqWidth - 8;
564 for (
int rb = 0;
rb < BlockFloatCompander::k_numRB; ++
rb)
568 for (
int re = 0; re < BlockFloatCompander::k_numREReal; ++re)
570 auto dataIdx =
rb * BlockFloatCompander::k_numREReal + re;
571 auto dataAbs = saturateAbs(dataIn.
dataExpanded[dataIdx]);
572 maxAbs = std::max(maxAbs, dataAbs);
576 auto thisExp = (uint8_t)(std::max(0,(16 - dataIn.
iqWidth + 1 - __lzcnt16(maxAbs))));
582 static constexpr
int k_byteMask = 0xFF;
583 int byteShiftVal = -8;
584 int byteBuffer = { 0 };
585 for (
int re = 0; re < BlockFloatCompander::k_numREReal; ++re)
587 auto dataIdxIn =
rb * BlockFloatCompander::k_numREReal + re;
588 auto thisRE = dataIn.
dataExpanded[dataIdxIn] >> thisExp;
589 byteBuffer = (byteBuffer << dataIn.
iqWidth) + (
int)(thisRE & iqMask);
591 byteShiftVal += (8 + byteShiftUnits);
592 while (byteShiftVal >= 0)
594 auto thisByte = (uint8_t)((byteBuffer >> byteShiftVal) & k_byteMask);
607 uint32_t iqMask = (uint32_t)(UINT_MAX - ((1 << (32 - dataIn.
iqWidth)) - 1));
608 uint32_t byteBuffer = { 0 };
609 int numBytesPerRB = (3 * dataIn.
iqWidth) + 1;
613 for (
int rb = 0;
rb < BlockFloatCompander::k_numRB; ++
rb)
615 auto expIdx =
rb * numBytesPerRB;
618 for (
int b = 0; b < numBytesPerRB - 1; ++b)
620 auto dataIdxIn = (expIdx + 1) + b;
622 byteBuffer = (uint32_t)((byteBuffer << 8) + thisByte);
624 while (bitPointer >= dataIn.
iqWidth)
629 int32_t thisSample32 = (int32_t)((byteBuffer << (32 - bitPointer)) & iqMask);
630 int16_t thisSample = (int16_t)(thisSample32 >> signExtShift);
643 int16_t iqMask = (int16_t)((1 << dataIn.
iqWidth) - 1);
644 int byteShiftUnits = dataIn.
iqWidth - 8;
646 for (
int rb = 0;
rb < BlockFloatCompanderBFW::k_numRB; ++
rb)
650 for (
int re = 0; re < BlockFloatCompanderBFW::k_numREReal; ++re)
652 auto dataIdx =
rb * BlockFloatCompanderBFW::k_numREReal + re;
653 auto dataAbs = saturateAbs(dataIn.
dataExpanded[dataIdx]);
654 maxAbs = std::max(maxAbs, dataAbs);
658 auto thisExp = (uint8_t)(std::max(0,(16 - dataIn.
iqWidth + 1 - __lzcnt16(maxAbs))));
664 static constexpr
int k_byteMask = 0xFF;
665 int byteShiftVal = -8;
666 int byteBuffer = { 0 };
667 for (
int re = 0; re < BlockFloatCompanderBFW::k_numREReal; ++re)
669 auto dataIdxIn =
rb * BlockFloatCompanderBFW::k_numREReal + re;
670 auto thisRE = dataIn.
dataExpanded[dataIdxIn] >> thisExp;
671 byteBuffer = (byteBuffer << dataIn.
iqWidth) + (
int)(thisRE & iqMask);
673 byteShiftVal += (8 + byteShiftUnits);
674 while (byteShiftVal >= 0)
676 auto thisByte = (uint8_t)((byteBuffer >> byteShiftVal) & k_byteMask);
689 uint32_t iqMask = (uint32_t)(UINT_MAX - ((1 << (32 - dataIn.
iqWidth)) - 1));
690 uint32_t byteBuffer = { 0 };
691 int numBytesPerRB = (3 * dataIn.
iqWidth) + 1;
695 for (
int rb = 0;
rb < BlockFloatCompanderBFW::k_numRB; ++
rb)
697 auto expIdx =
rb * numBytesPerRB;
700 for (
int b = 0; b < numBytesPerRB - 1; ++b)
702 auto dataIdxIn = (expIdx + 1) + b;
704 byteBuffer = (uint32_t)((byteBuffer << 8) + thisByte);
706 while (bitPointer >= dataIn.
iqWidth)
711 int32_t thisSample32 = (int32_t)((byteBuffer << (32 - bitPointer)) & iqMask);
712 int16_t thisSample = (int16_t)(thisSample32 >> signExtShift);
720 #define RB_NUM_ROUNDUP(rb) \ 721 (BlockFloatCompander::k_numRB * ((rb + BlockFloatCompander::k_numRB - 1) / BlockFloatCompander::k_numRB)) 748 expandedDataInput.
iqWidth = 10;
752 expandedDataInput.
iqWidth = 12;
761 for (int16_t block_idx = 0;
766 &request->
data_in[block_idx*BlockFloatCompander::k_numSampsExpanded];
770 com_fn(expandedDataInput, &compressedDataOut);
771 len += ((3 * expandedDataInput.
iqWidth) + 1) * std::min((int16_t)BlockFloatCompander::k_numRB,(int16_t)numRBs);
791 for (
int m = 0; m < BlockFloatCompander::k_numRB; ++m){
792 for (
int n = 0; n < BlockFloatCompander::k_numREReal; ++n){
793 expandedDataInput.
dataExpanded[m*BlockFloatCompander::k_numREReal+n] =
794 request->
data_in[m*BlockFloatCompander::k_numREReal+n];
812 expandedDataInput.
iqWidth = 10;
815 expandedDataInput.
iqWidth = 12;
818 printf(
"bfwIqWidth is not supported %d\n", request->
iqWidth);
823 com_fn(expandedDataInput, &compressedDataKern);
824 response->
len = ((BlockFloatCompanderBFW::k_numRE/16*4*expandedDataInput.
iqWidth)+1)*BlockFloatCompanderBFW::k_numRB;
847 compressedDataInput.
iqWidth = 8;
851 compressedDataInput.
iqWidth = 9;
855 compressedDataInput.
iqWidth = 10;
859 compressedDataInput.
iqWidth = 12;
868 for (int16_t block_idx = 0;
872 compressedDataInput.
dataCompressed = (uint8_t*)&request->
data_in[block_idx*(((3 * compressedDataInput.
iqWidth ) + 1) * BlockFloatCompander::k_numRB)];
875 decom_fn(compressedDataInput, &expandedDataOut);
876 len += std::min((int16_t)BlockFloatCompander::k_numSampsExpanded, (int16_t)(numRBs*BlockFloatCompander::k_numREReal));
879 response->
len = numRBs * BlockFloatCompander::k_numREReal*
sizeof(int16_t);
-
-
CACHE_ALIGNED uint8_t * dataCompressed
-
void BlockFloatCompress_12b_AVX512(const ExpandedData &dataIn, CompressedData *dataOut)
12 bit compression
-
-
void(* xran_bfp_compress_bfw_fn)(const BlockFloatCompanderBFW::ExpandedData &dataIn, BlockFloatCompanderBFW::CompressedData *dataOut)
-
-
__m512i networkBytePack10b(const __m512i compData)
-
#define RB_NUM_ROUNDUP(rb)
-
int iqWidth
Size of mantissa including sign bit.
-
CACHE_ALIGNED int16_t * dataExpanded
-
-
int iqWidth
Size of mantissa including sign bit.
-
-
Request structure containing pointer to data and its length.
-
void BlockFloatCompress_Basic(const ExpandedData &dataIn, CompressedData *dataOut)
Reference compression.
-
External API for compading with the use BFP algorithm.
-
-
__m512i networkByteUnpack10b(const uint8_t *inData)
-
int iqWidth
Size of mantissa including sign bit.
-
void(* xran_bfp_compress_fn)(const BlockFloatCompander::ExpandedData &dataIn, BlockFloatCompander::CompressedData *dataOut)
-
void BlockFloatCompress_9b_AVX512(const ExpandedData &dataIn, CompressedData *dataOut)
9 bit compression
-
-
void BlockFloatCompress_10b_AVX512(const ExpandedData &dataIn, CompressedData *dataOut)
10 bit compression
-
void(* xran_bfp_decompress_fn)(const BlockFloatCompander::CompressedData &dataIn, BlockFloatCompander::ExpandedData *dataOut)
-
int32_t xranlib_compress_avx512(const struct xranlib_compress_request *request, struct xranlib_compress_response *response)
-
int iqWidth
Size of mantissa including sign bit.
-
void BlockFloatExpand_8b_AVX512(const CompressedData &dataIn, ExpandedData *dataOut)
8 bit expansion
-
__m512i networkByteUnpack12b(const uint8_t *inData)
-
-
Request structure containing pointer to data and its length.
-
-
int32_t xranlib_decompress_avx512(const struct xranlib_decompress_request *request, struct xranlib_decompress_response *response)
-
__m512i networkByteUnpack9b(const uint8_t *inData)
-
void BlockFloatExpand_Basic(const CompressedData &dataIn, ExpandedData *dataOut)
Reference expansion.
-
void BlockFloatExpand_9b_AVX512(const CompressedData &dataIn, ExpandedData *dataOut)
9 bit expansion
-
void computeExponent(const BlockFloatCompander::ExpandedData &dataIn, int8_t *expStore)
Compute exponent value for a set of RB from the maximum absolute value.
-
-
-
Response structure containing pointer to data and its length.
-
void BlockFloatExpand_12b_AVX512(const CompressedData &dataIn, ExpandedData *dataOut)
12 bit expansion
-
CACHE_ALIGNED uint8_t * dataCompressed
-
void BlockFloatExpand_10b_AVX512(const CompressedData &dataIn, ExpandedData *dataOut)
10 bit expansion
-
__m512i networkBytePack9b(const __m512i compData)
-
void BlockFloatCompress_8b_AVX512(const ExpandedData &dataIn, CompressedData *dataOut)
8 bit compression
-
-
CACHE_ALIGNED int16_t * dataExpanded
-
-
-
-
__m512i networkBytePack12b(const __m512i compData)
-
-
int32_t xranlib_compress_avx512_bfw(const struct xranlib_compress_request *request, struct xranlib_compress_response *response)
-
void BlockFloatCompress_Basic(const ExpandedData &dataIn, CompressedData *dataOut)
Reference compression.
-
Response structure containing pointer to data and its length.
-
-
void BlockFloatExpand_Basic(const CompressedData &dataIn, ExpandedData *dataOut)
Reference expansion.
-