- /// Convert to 32b by removing repeated values in maxAbs
- const auto k_upperWordMask = _mm512_set_epi64(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF,
- 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF,
- 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF,
- 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF);
- maxAbs = _mm512_and_epi64(maxAbs, k_upperWordMask);
-
- /// Compute and store exponent
- const auto totShiftBits = _mm512_set1_epi32(32 - dataIn.iqWidth + 1);
- const auto lzCount = _mm512_lzcnt_epi32(maxAbs);
- const auto exponent = _mm512_sub_epi32(totShiftBits, lzCount);
- constexpr uint16_t k_expWriteMask = 0xFFFF;
- _mm512_mask_cvtepi32_storeu_epi8(expStore, k_expWriteMask, exponent);
-}
-
-
-/// Pack compressed 9 bit data in network byte order
-/// See https://soco.intel.com/docs/DOC-2665619
-__m512i
-networkBytePack9b(const __m512i compData)
-{
- /// Logical shift left to align network order byte parts
- const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000100020003, 0x0004000500060007,
- 0x0000000100020003, 0x0004000500060007,
- 0x0000000100020003, 0x0004000500060007,
- 0x0000000100020003, 0x0004000500060007);
- auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
-
- /// First epi8 shuffle of even indexed samples
- const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x0000000000000000, 0x0C0D080904050001,
- 0x0000000000000000, 0x0C0D080904050001,
- 0x0000000000000000, 0x0C0D080904050001,
- 0x0000000000000000, 0x0C0D080904050001);
- constexpr uint64_t k_byteMask1 = 0x000000FF00FF00FF;
- auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
-
- /// Second epi8 shuffle of odd indexed samples
- const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x000000000000000E, 0x0F0A0B0607020300,
- 0x000000000000000E, 0x0F0A0B0607020300,
- 0x000000000000000E, 0x0F0A0B0607020300,
- 0x000000000000000E, 0x0F0A0B0607020300);
- constexpr uint64_t k_byteMask2 = 0x000001FE01FE01FE;
- auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
-
- /// Ternary blend of the two shuffled results
- const __m512i k_ternLogSelect = _mm512_set_epi64(0x00000000000000FF, 0x01FC07F01FC07F00,
- 0x00000000000000FF, 0x01FC07F01FC07F00,
- 0x00000000000000FF, 0x01FC07F01FC07F00,
- 0x00000000000000FF, 0x01FC07F01FC07F00);
- return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
-}
-
-
-/// Pack compressed 10 bit data in network byte order
-/// See https://soco.intel.com/docs/DOC-2665619
-__m512i
-networkBytePack10b(const __m512i compData)
-{
- /// Logical shift left to align network order byte parts
- const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000200040006, 0x0000000200040006,
- 0x0000000200040006, 0x0000000200040006,
- 0x0000000200040006, 0x0000000200040006,
- 0x0000000200040006, 0x0000000200040006);
- auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
-
- /// First epi8 shuffle of even indexed samples
- const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x000000000000000C, 0x0D08090004050001,
- 0x000000000000000C, 0x0D08090004050001,
- 0x000000000000000C, 0x0D08090004050001,
- 0x000000000000000C, 0x0D08090004050001);
- constexpr uint64_t k_byteMask1 = 0x000001EF01EF01EF;
- auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
-
- /// Second epi8 shuffle of odd indexed samples
- const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x0000000000000E0F, 0x0A0B000607020300,
- 0x0000000000000E0F, 0x0A0B000607020300,
- 0x0000000000000E0F, 0x0A0B000607020300,
- 0x0000000000000E0F, 0x0A0B000607020300);
- constexpr uint64_t k_byteMask2 = 0x000003DE03DE03DE;
- auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
-
- /// Ternary blend of the two shuffled results
- const __m512i k_ternLogSelect = _mm512_set_epi64(0x000000000000FF03, 0xF03F00FF03F03F00,
- 0x000000000000FF03, 0xF03F00FF03F03F00,
- 0x000000000000FF03, 0xF03F00FF03F03F00,
- 0x000000000000FF03, 0xF03F00FF03F03F00);
- return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
-}
-
-
-/// Pack compressed 12 bit data in network byte order
-/// See https://soco.intel.com/docs/DOC-2665619
-__m512i
-networkBytePack12b(const __m512i compData)
-{
- /// Logical shift left to align network order byte parts
- const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000400000004, 0x0000000400000004,
- 0x0000000400000004, 0x0000000400000004,
- 0x0000000400000004, 0x0000000400000004,
- 0x0000000400000004, 0x0000000400000004);
- auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
-
- /// First epi8 shuffle of even indexed samples
- const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x00000000000C0D00, 0x0809000405000001,
- 0x00000000000C0D00, 0x0809000405000001,
- 0x00000000000C0D00, 0x0809000405000001,
- 0x00000000000C0D00, 0x0809000405000001);
- constexpr uint64_t k_byteMask1 = 0x000006DB06DB06DB;
- auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
-
- /// Second epi8 shuffle of odd indexed samples
- const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x000000000E0F000A, 0x0B00060700020300,
- 0x000000000E0F000A, 0x0B00060700020300,
- 0x000000000E0F000A, 0x0B00060700020300,
- 0x000000000E0F000A, 0x0B00060700020300);
- constexpr uint64_t k_byteMask2 = 0x00000DB60DB60DB6;
- auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
-
- /// Ternary blend of the two shuffled results
- const __m512i k_ternLogSelect = _mm512_set_epi64(0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
- 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
- 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
- 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00);
- return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
-}
-
-
-/// Unpack compressed 9 bit data in network byte order
-/// See https://soco.intel.com/docs/DOC-2665619
-__m512i
-networkByteUnpack9b(const uint8_t* inData)
-{
- /// Align chunks of compressed bytes into lanes to allow for expansion
- const __m512i* rawDataIn = reinterpret_cast<const __m512i*>(inData);
- const auto k_expPerm = _mm512_set_epi32(15, 14, 13, 12, 7, 6, 5, 4,
- 5, 4, 3, 2, 3, 2, 1, 0);
- auto expData = _mm512_permutexvar_epi32(k_expPerm, *rawDataIn);
-
- /// Byte shuffle to get all bits for each sample into 16b chunks
- /// Due to previous permute to get chunks of bytes into each lane, there is
- /// a different shuffle offset in each lane
- const __m512i k_byteShuffleMask = _mm512_set_epi64(0x0F0E0D0C0B0A0908, 0x0706050403020100,
- 0x090A080907080607, 0x0506040503040203,
- 0x0809070806070506, 0x0405030402030102,
- 0x0708060705060405, 0x0304020301020001);
- expData = _mm512_shuffle_epi8(expData, k_byteShuffleMask);
-
- /// Logical shift left to set sign bit
- const __m512i k_slBits = _mm512_set_epi64(0x0007000600050004, 0x0003000200010000,
- 0x0007000600050004, 0x0003000200010000,
- 0x0007000600050004, 0x0003000200010000,
- 0x0007000600050004, 0x0003000200010000);
- expData = _mm512_sllv_epi16(expData, k_slBits);
-
- /// Mask to zero unwanted bits
- const __m512i k_expMask = _mm512_set1_epi16(0xFF80);
- return _mm512_and_epi64(expData, k_expMask);
-}