/****************************************************************************** * * Copyright (c) 2020 Intel. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *******************************************************************************/ #include #include #include "xran_mod_compression.h" #ifdef C_Module_Used void mod_compression_qpsk_c(int16_t *pData,int8_t *pOut,int16_t unit, int32_t nSc) { for (int32_t iSc = 0 ; iSc=0 ? 0 :1; int8_t bit_q = pData[iSc*2+1] >=0 ? 0 :1; *pOut |= bit_i<<(7-(bit_pos*2))|bit_q<<(6-(bit_pos*2)); if (3 == bit_pos) pOut++; } } void mod_compression_16qam_c(int16_t *pData,int8_t *pOut,int16_t unit, int32_t nSc) { int16_t bit_unit = unit>>1; for (int32_t iSc = 0 ; iSc>2; for (int32_t iSc = 0 ; iSc>1; pOut++; *pOut |= bit_i<<7|bit_q<<4; } else if (2 == bit_pos) { *pOut |= bit_i<<1|bit_q>>2; pOut++; *pOut |= bit_q<<6; } else if (3 == bit_pos) { *pOut |= bit_i<<3|bit_q; pOut++; } } } void mod_compression_256qam_c(int16_t *pData,int8_t *pOut,int16_t unit,int32_t nSc) { int16_t bit_unit = unit>>3; for (int32_t iSc = 0 ; iSc>2)+1);idx++) { *pOut = *(((int8_t *)&bits)+idx); pOut++; } } } inline __m512i byte_pack2b(const __m512i comp_data) { const __m512i k_shift_left = _mm512_set_epi64(0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006); const auto comp_data_packed = _mm512_sllv_epi16(comp_data, k_shift_left); const __m512i k_byte_shufflemask1 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000800, 0x0000000000000000, 0x0000000000000800, 0x0000000000000000, 0x0000000000000800, 0x0000000000000000, 0x0000000000000800); constexpr uint64_t k_bytemask1 = 0x0003000300030003; const auto comp_data_shuff1 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask1); const __m512i k_byte_shufflemask2 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000A02, 0x0000000000000000, 0x0000000000000A02, 0x0000000000000000, 0x0000000000000A02, 0x0000000000000000, 0x0000000000000A02); const auto comp_data_shuff2 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask2); const __m512i k_byte_shufflemask3 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000C04, 0x0000000000000000, 0x0000000000000C04, 0x0000000000000000, 0x0000000000000C04, 0x0000000000000000, 0x0000000000000C04); const auto comp_data_shuff3 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask3); const __m512i k_byte_shufflemask4 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000E06, 0x0000000000000000, 0x0000000000000E06, 0x0000000000000000, 0x0000000000000E06, 0x0000000000000000, 0x0000000000000E06); const auto comp_data_shuff4 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask4); /// Ternary blend of the two shuffled results const __m512i k_ternlog_select1 = _mm512_set_epi64(0x0000000000000000, 0x0000000000003030, 0x0000000000000000, 0x0000000000003030, 0x0000000000000000, 0x0000000000003030, 0x0000000000000000, 0x0000000000003030); const __m512i k_ternlog_select2 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000C0C, 0x0000000000000000, 0x0000000000000C0C, 0x0000000000000000, 0x0000000000000C0C, 0x0000000000000000, 0x0000000000000C0C); const __m512i k_ternlog_select3 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000303, 0x0000000000000000, 0x0000000000000303, 0x0000000000000000, 0x0000000000000303, 0x0000000000000000, 0x0000000000000303); auto comp_data_packed2 = _mm512_ternarylogic_epi64(comp_data_shuff1, comp_data_shuff2, k_ternlog_select1, 0xd8); auto comp_data_packed3 = _mm512_ternarylogic_epi64(comp_data_packed2, comp_data_shuff3, k_ternlog_select2, 0xd8); return _mm512_ternarylogic_epi64(comp_data_packed3, comp_data_shuff4, k_ternlog_select3, 0xd8); } inline __m512i byte_pack2b_snc(const __m512i comp_data) { const __m512i k_shift_left = _mm512_set_epi64(0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006, 0x0000000200040006); const auto comp_data_packed = _mm512_sllv_epi16(comp_data, k_shift_left); const __m512i k_byte_shufflemask1 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000800, 0x0000000000000000, 0x0000000000000800, 0x0000000000000000, 0x0000000000000800, 0x0000000000000000, 0x0000000000000800); constexpr uint64_t k_bytemask1 = 0x0003000300030003; const auto comp_data_shuff1 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask1); const __m512i k_byte_shufflemask2 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000A02, 0x0000000000000000, 0x0000000000000A02, 0x0000000000000000, 0x0000000000000A02, 0x0000000000000000, 0x0000000000000A02); const auto comp_data_shuff2 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask2); const __m512i k_byte_shufflemask3 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000C04, 0x0000000000000000, 0x0000000000000C04, 0x0000000000000000, 0x0000000000000C04, 0x0000000000000000, 0x0000000000000C04); const auto comp_data_shuff3 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask3); const __m512i k_byte_shufflemask4 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000E06, 0x0000000000000000, 0x0000000000000E06, 0x0000000000000000, 0x0000000000000E06, 0x0000000000000000, 0x0000000000000E06); const auto comp_data_shuff4 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask4); /// Ternary blend of the two shuffled results const __m512i k_ternlog_select1 = _mm512_set_epi64(0x0000000000000000, 0x0000000000003030, 0x0000000000000000, 0x0000000000003030, 0x0000000000000000, 0x0000000000003030, 0x0000000000000000, 0x0000000000003030); const __m512i k_ternlog_select2 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000C0C, 0x0000000000000000, 0x0000000000000C0C, 0x0000000000000000, 0x0000000000000C0C, 0x0000000000000000, 0x0000000000000C0C); const __m512i k_ternlog_select3 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000303, 0x0000000000000000, 0x0000000000000303, 0x0000000000000000, 0x0000000000000303, 0x0000000000000000, 0x0000000000000303); auto comp_data_packed2 = _mm512_ternarylogic_epi64(comp_data_shuff1, comp_data_shuff2, k_ternlog_select1, 0xd8); auto comp_data_packed3 = _mm512_ternarylogic_epi64(comp_data_packed2, comp_data_shuff3, k_ternlog_select2, 0xd8); auto comp_data_packed4 = _mm512_ternarylogic_epi64(comp_data_packed3, comp_data_shuff4, k_ternlog_select3, 0xd8); const auto k_byte_permute = _mm512_setr_epi32( 0x11100100, 0x31302120, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); return _mm512_permutexvar_epi8(k_byte_permute,comp_data_packed4); } void mod_compression_16qam_avx512(int16_t *pData, int8_t *pOut, int16_t unit, int32_t nSc) { int16_t bit_unit = unit>>1; if (0 == bit_unit) { printf("modulation compression unit is too low!\n "); bit_unit = 1; } __m512i symbol,symbol_unit ,bit_convert,byte_pack; __mmask32 mask32 ; __mmask16 mask_store = 0x3; int32_t nSc0,nSc1; nSc0 = nSc&0xfffffff0; nSc1 = nSc&0xf; symbol_unit = _mm512_set1_epi16(bit_unit); bit_convert = _mm512_set1_epi16(3); for (int32_t iSc = 0 ; iSc>2)&0x1)<<1); _mm_mask_storeu_epi8(pOut , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 0)); left_mask = ((k1>>4)&0x1)|(((k1>>6)&0x1)<<1); _mm_mask_storeu_epi8(pOut+2 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 1)); left_mask = ((k1>>8)&0x1)|(((k1>>10)&0x1)<<1); _mm_mask_storeu_epi8(pOut+4 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 2)); left_mask = ((k1>>12)&0x1)|(((k1>>14)&0x1)<<1); _mm_mask_storeu_epi8(pOut+6 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 3)); } } void mod_compression_16qam_snc(int16_t *pData, int8_t *pOut, int16_t unit, int32_t nSc) { int16_t bit_unit = unit>>1; if (0 == bit_unit) { printf("modulation compression unit is too low!\n "); bit_unit = 1; } __m512i symbol,symbol_unit ,bit_convert,byte_pack; __mmask32 mask32 ; __mmask16 mask_store = 0x3; int32_t nSc0,nSc1; nSc0 = nSc&0xfffffff0; nSc1 = nSc&0xf; symbol_unit = _mm512_set1_epi16(bit_unit); bit_convert = _mm512_set1_epi16(3); for (int32_t iSc = 0 ; iSc>1; left_mask = ((__mmask16)1<>2; if (0 == bit_unit) { printf("modulation compression unit is too low!\n "); bit_unit = 1; } __m512i symbol,symbol_unit ,bit_convert,byte_pack; __mmask32 mask32 ; __mmask16 mask_store = 0x7; int32_t nSc0,nSc1; nSc0 = nSc&0xfffffff0; nSc1 = nSc&0xf; symbol_unit = _mm512_set1_epi16(bit_unit); bit_convert = _mm512_set1_epi16(7); for (int32_t iSc = 0 ; iSc>4)&mask_store; _mm_mask_storeu_epi8(pOut+3 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 1)); left_mask = (k1>>8)&mask_store; _mm_mask_storeu_epi8(pOut+6 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 2)); left_mask = (k1>>12)&mask_store; _mm_mask_storeu_epi8(pOut+9 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 3)); } } void mod_compression_64qam_snc(int16_t *pData, int8_t *pOut, int16_t unit, int32_t nSc) { int16_t bit_unit = unit>>2; if (0 == bit_unit) { printf("modulation compression unit is too low!\n "); bit_unit = 1; } __m512i symbol,symbol_unit ,bit_convert,byte_pack; __mmask32 mask32 ; __mmask16 mask_store = 0x7; int32_t nSc0,nSc1; nSc0 = nSc&0xfffffff0; nSc1 = nSc&0xf; symbol_unit = _mm512_set1_epi16(bit_unit); bit_convert = _mm512_set1_epi16(7); for (int32_t iSc = 0 ; iSc>2; left_mask = ((__mmask16)1<>3; if (0 == bit_unit) { printf("modulation compression unit is too low!\n "); bit_unit = 1; } __m512i symbol,symbol_unit ,bit_convert; __mmask32 mask32 ; __mmask16 mask_store =0xF; int32_t nSc0,nSc1; nSc0 = nSc&0xfffffff0; nSc1 = nSc&0xf; symbol_unit = _mm512_set1_epi16(bit_unit); bit_convert = _mm512_set1_epi16(15); for (int32_t iSc = 0 ; iSc>3; if (0 == bit_unit) { printf("modulation compression unit is too low!\n "); bit_unit = 1; } __m512i symbol,symbol_unit ,bit_convert; __mmask32 mask32 ; __mmask16 mask_store =0xF; int32_t nSc0,nSc1; nSc0 = nSc&0xfffffff0; nSc1 = nSc&0xf; symbol_unit = _mm512_set1_epi16(bit_unit); bit_convert = _mm512_set1_epi16(15); for (int32_t iSc = 0 ; iScmodulation) { case XRAN_QPSK: mod_compression_qpsk_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM16: mod_compression_16qam_snc(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM64: mod_compression_64qam_snc(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM256: mod_compression_256qam_snc(request->data_in, response->data_out, request->unit, request->num_symbols); break; default: printf("Error invalid modulation compression request\n"); return -1; } return 0; } int xranlib_5gnr_mod_compression(const struct xranlib_5gnr_mod_compression_request* request, struct xranlib_5gnr_mod_compression_response* response){ #ifdef C_Module_Used return (xranlib_5gnr_mod_compression_c(request, response)); #else if(_may_i_use_cpu_feature(_FEATURE_AVX512IFMA52)) return (xranlib_5gnr_mod_compression_snc(request, response)); else return (xranlib_5gnr_mod_compression_avx512(request, response)); #endif } #ifdef C_Module_Used int xranlib_5gnr_mod_compression_c(const struct xranlib_5gnr_mod_compression_request* request, struct xranlib_5gnr_mod_compression_response* response){ switch(request->modulation) { case XRAN_QPSK: mod_compression_qpsk_c(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM16: mod_compression_16qam_c(request->data_in, response->data_out, request->unit,request->num_symbols); break; case XRAN_QAM64: mod_compression_64qam_c(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM256: mod_compression_256qam_c(request->data_in, response->data_out, request->unit, request->num_symbols); break; default: printf("Error invalid modulation compression request\n"); return -1; } return 0; } #endif int xranlib_5gnr_mod_compression_avx512(const struct xranlib_5gnr_mod_compression_request* request, struct xranlib_5gnr_mod_compression_response* response){ switch(request->modulation) { case XRAN_QPSK: mod_compression_qpsk_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM16: mod_compression_16qam_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM64: mod_compression_64qam_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM256: mod_compression_256qam_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); break; default: printf("Error invalid modulation compression request\n"); return -1; } return 0; } void mod_decompression_qpsk_c(int8_t *pData,int16_t *pOut,int16_t unit, int32_t nSc ,int16_t re_mask) { int16_t symbol_unit[2] = {0}; symbol_unit[0] = (unit>>1); symbol_unit[1] = (unit>>1)*-1; for (int32_t iSc = 0 ; iSc> mask_pos)&0x1)) { uint8_t symbol_pos= iSc &0x3; uint32_t byte_pos= iSc >>2; uint8_t bit_i = (pData[byte_pos]>>(7-(symbol_pos*2)))&0x1; pOut[iSc*2] = symbol_unit[bit_i]; uint8_t bit_q = (pData[byte_pos]>>(6-(symbol_pos*2)))&0x1; pOut[iSc*2+1] = symbol_unit[bit_q]; } } } void mod_decompression_16qam_c(int8_t *pData,int16_t *pOut,int16_t unit, int32_t nSc) { int16_t symbol_unit[4] = {0}; symbol_unit[0] = (unit>>2); symbol_unit[1] = (unit>>2)*3; symbol_unit[3] = (unit>>2)*-1; symbol_unit[2] = (unit>>2)*-3; for (int32_t iSc = 0 ; iSc>1; uint8_t bit_i = (pData[byte_pos]>>(6-(symbol_pos*4)))&0x3; pOut[iSc*2] = symbol_unit[bit_i]; uint8_t bit_q = (pData[byte_pos]>>(4-(symbol_pos*4)))&0x3; pOut[iSc*2+1] = symbol_unit[bit_q]; } } void mod_decompression_64qam_c(int8_t *pData,int16_t *pOut,int16_t unit, int32_t nSc) { int16_t symbol_unit[8] = {0}; symbol_unit[0] = (unit>>3); symbol_unit[1] = (unit>>3)*3; symbol_unit[2] = (unit>>3)*5; symbol_unit[3] = (unit>>3)*7; symbol_unit[7] = (unit>>3)*-1; symbol_unit[6] = (unit>>3)*-3; symbol_unit[5] = (unit>>3)*-5; symbol_unit[4] = (unit>>3)*-7; uint8_t bit_i , bit_q ; for (int32_t iSc = 0 ; iSc>5)&0x7; bit_q = (pData[0]>>2)&0x7; } else if (1 == symbol_pos) { bit_i = ((pData[0]&0x3)<<1)|((pData[1]>>7)&0x1); bit_q = (pData[1]>>4)&0x7; } else if (2 == symbol_pos) { bit_q = ((pData[1]&0x1)<<2)|((pData[2]>>6)&0x3); bit_i = (pData[1]>>1)&0x7; } else if (3 == symbol_pos) { bit_i = (pData[2]>>3)&0x7; bit_q = pData[2]&0x7; pData +=3; } pOut[iSc*2] = symbol_unit[bit_i]; pOut[iSc*2+1] = symbol_unit[bit_q]; } } void mod_decompression_256qam_c(int8_t *pData,int16_t *pOut,int16_t unit,int32_t nSc) { int16_t symbol_unit[16] = {0}; symbol_unit[0] = (unit>>4); symbol_unit[1] = (unit>>4)*3; symbol_unit[2] = (unit>>4)*5; symbol_unit[3] = (unit>>4)*7; symbol_unit[4] = (unit>>4)*9; symbol_unit[5] = (unit>>4)*11; symbol_unit[6] = (unit>>4)*13; symbol_unit[7] = (unit>>4)*15; symbol_unit[15] = (unit>>4)*-1; symbol_unit[14] = (unit>>4)*-3; symbol_unit[13] = (unit>>4)*-5; symbol_unit[12] = (unit>>4)*-7; symbol_unit[11] = (unit>>4)*-9; symbol_unit[10] = (unit>>4)*-11; symbol_unit[9] = (unit>>4)*-13; symbol_unit[8] = (unit>>4)*-15; for (int32_t iSc = 0 ; iSc>4)&0xF; uint8_t bit_q = pData[iSc]&0xF; pOut[iSc*2] = symbol_unit[bit_i]; pOut[iSc*2+1] = symbol_unit[bit_q]; } } int xranlib_5gnr_mod_decompression(const struct xranlib_5gnr_mod_decompression_request* request, struct xranlib_5gnr_mod_decompression_response* response){ switch(request->modulation) { case XRAN_QPSK: mod_decompression_qpsk_c(request->data_in, response->data_out, request->unit, request->num_symbols, request->re_mask); break; case XRAN_QAM16: mod_decompression_16qam_c(request->data_in, response->data_out, request->unit,request->num_symbols); break; case XRAN_QAM64: mod_decompression_64qam_c(request->data_in, response->data_out, request->unit, request->num_symbols); break; case XRAN_QAM256: mod_decompression_256qam_c(request->data_in, response->data_out, request->unit, request->num_symbols); break; default: printf("Error invalid modulation compression request\n"); return -1; } return 0; }