- /// Pack compressed 9 bit data in network byte order
- /// See https://soco.intel.com/docs/DOC-2665619
- inline __m512i
- networkBytePack9b(const __m512i compData)
- {
- /// Logical shift left to align network order byte parts
- const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000100020003, 0x0004000500060007,
- 0x0000000100020003, 0x0004000500060007,
- 0x0000000100020003, 0x0004000500060007,
- 0x0000000100020003, 0x0004000500060007);
- const auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
-
- /// First epi8 shuffle of even indexed samples
- const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x0000000000000000, 0x0C0D080904050001,
- 0x0000000000000000, 0x0C0D080904050001,
- 0x0000000000000000, 0x0C0D080904050001,
- 0x0000000000000000, 0x0C0D080904050001);
- constexpr uint64_t k_byteMask1 = 0x00FF00FF00FF00FF;
- const auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
-
- /// Second epi8 shuffle of odd indexed samples
- const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x000000000000000E, 0x0F0A0B0607020300,
- 0x000000000000000E, 0x0F0A0B0607020300,
- 0x000000000000000E, 0x0F0A0B0607020300,
- 0x000000000000000E, 0x0F0A0B0607020300);
- constexpr uint64_t k_byteMask2 = 0x01FE01FE01FE01FE;
- const auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
-
- /// Ternary blend of the two shuffled results
- const __m512i k_ternLogSelect = _mm512_set_epi64(0x00000000000000FF, 0x01FC07F01FC07F00,
- 0x00000000000000FF, 0x01FC07F01FC07F00,
- 0x00000000000000FF, 0x01FC07F01FC07F00,
- 0x00000000000000FF, 0x01FC07F01FC07F00);
- return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
- }
-
-
- /// Pack compressed 10 bit data in network byte order
- /// See https://soco.intel.com/docs/DOC-2665619
- inline __m512i
- networkBytePack10b(const __m512i compData)
- {
- /// Logical shift left to align network order byte parts
- const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000200040006, 0x0000000200040006,
- 0x0000000200040006, 0x0000000200040006,
- 0x0000000200040006, 0x0000000200040006,
- 0x0000000200040006, 0x0000000200040006);
- const auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
-
- /// First epi8 shuffle of even indexed samples
- const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x000000000000000C, 0x0D08090004050001,
- 0x000000000000000C, 0x0D08090004050001,
- 0x000000000000000C, 0x0D08090004050001,
- 0x000000000000000C, 0x0D08090004050001);
- constexpr uint64_t k_byteMask1 = 0x01EF01EF01EF01EF;
- const auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
-
- /// Second epi8 shuffle of odd indexed samples
- const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x0000000000000E0F, 0x0A0B000607020300,
- 0x0000000000000E0F, 0x0A0B000607020300,
- 0x0000000000000E0F, 0x0A0B000607020300,
- 0x0000000000000E0F, 0x0A0B000607020300);
- constexpr uint64_t k_byteMask2 = 0x03DE03DE03DE03DE;
- const auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
-
- /// Ternary blend of the two shuffled results
- const __m512i k_ternLogSelect = _mm512_set_epi64(0x000000000000FF03, 0xF03F00FF03F03F00,
- 0x000000000000FF03, 0xF03F00FF03F03F00,
- 0x000000000000FF03, 0xF03F00FF03F03F00,
- 0x000000000000FF03, 0xF03F00FF03F03F00);
- return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
- }
-
-
- /// Pack compressed 12 bit data in network byte order
- /// See https://soco.intel.com/docs/DOC-2665619
- inline __m512i
- networkBytePack12b(const __m512i compData)
- {
- /// Logical shift left to align network order byte parts
- const __m512i k_shiftLeft = _mm512_set_epi64(0x0000000400000004, 0x0000000400000004,
- 0x0000000400000004, 0x0000000400000004,
- 0x0000000400000004, 0x0000000400000004,
- 0x0000000400000004, 0x0000000400000004);
- const auto compDataPacked = _mm512_sllv_epi16(compData, k_shiftLeft);
-
- /// First epi8 shuffle of even indexed samples
- const __m512i k_byteShuffleMask1 = _mm512_set_epi64(0x00000000000C0D00, 0x0809000405000001,
- 0x00000000000C0D00, 0x0809000405000001,
- 0x00000000000C0D00, 0x0809000405000001,
- 0x00000000000C0D00, 0x0809000405000001);
- constexpr uint64_t k_byteMask1 = 0x06DB06DB06DB06DB;
- const auto compDataShuff1 = _mm512_maskz_shuffle_epi8(k_byteMask1, compDataPacked, k_byteShuffleMask1);
-
- /// Second epi8 shuffle of odd indexed samples
- const __m512i k_byteShuffleMask2 = _mm512_set_epi64(0x000000000E0F000A, 0x0B00060700020300,
- 0x000000000E0F000A, 0x0B00060700020300,
- 0x000000000E0F000A, 0x0B00060700020300,
- 0x000000000E0F000A, 0x0B00060700020300);
- constexpr uint64_t k_byteMask2 = 0x0DB60DB60DB60DB6;
- const auto compDataShuff2 = _mm512_maskz_shuffle_epi8(k_byteMask2, compDataPacked, k_byteShuffleMask2);
-
- /// Ternary blend of the two shuffled results
- const __m512i k_ternLogSelect = _mm512_set_epi64(0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
- 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
- 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00,
- 0x00000000FF0F00FF, 0x0F00FF0F00FF0F00);
- return _mm512_ternarylogic_epi64(compDataShuff1, compDataShuff2, k_ternLogSelect, 0xd8);
- }
-