X-Git-Url: https://gerrit.o-ran-sc.org/r/gitweb?p=o-du%2Fphy.git;a=blobdiff_plain;f=fhi_lib%2Flib%2Fsrc%2Fxran_mod_compression.cpp;fp=fhi_lib%2Flib%2Fsrc%2Fxran_mod_compression.cpp;h=7d4a5d0fc1f59659a0f6f7d9946485fa20c17474;hp=0000000000000000000000000000000000000000;hb=2de97529a4c5a1922214ba0e6f0fb84cacbd0bc7;hpb=81a09690b36b3a4e89b4dae34f30933de13f7f90 diff --git a/fhi_lib/lib/src/xran_mod_compression.cpp b/fhi_lib/lib/src/xran_mod_compression.cpp new file mode 100644 index 0000000..7d4a5d0 --- /dev/null +++ b/fhi_lib/lib/src/xran_mod_compression.cpp @@ -0,0 +1,941 @@ +/****************************************************************************** +* +* Copyright (c) 2020 Intel. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +*******************************************************************************/ +#include +#include +#include "xran_mod_compression.h" + +#ifdef C_Module_Used +void +mod_compression_qpsk_c(int16_t *pData,int8_t *pOut,int16_t unit, int32_t nSc) +{ + for (int32_t iSc = 0 ; iSc=0 ? 0 :1; + int8_t bit_q = pData[iSc*2+1] >=0 ? 0 :1; + *pOut |= bit_i<<(7-(bit_pos*2))|bit_q<<(6-(bit_pos*2)); + if (3 == bit_pos) + pOut++; + } +} + +void +mod_compression_16qam_c(int16_t *pData,int8_t *pOut,int16_t unit, int32_t nSc) +{ + int16_t bit_unit = unit>>1; + for (int32_t iSc = 0 ; iSc>2; + for (int32_t iSc = 0 ; iSc>1; + pOut++; + *pOut |= bit_i<<7|bit_q<<4; + } + else if (2 == bit_pos) + { + *pOut |= bit_i<<1|bit_q>>2; + pOut++; + *pOut |= bit_q<<6; + } + else if (3 == bit_pos) + { + *pOut |= bit_i<<3|bit_q; + pOut++; + } + } +} + +void +mod_compression_256qam_c(int16_t *pData,int8_t *pOut,int16_t unit,int32_t nSc) +{ + int16_t bit_unit = unit>>3; + for (int32_t iSc = 0 ; iSc>2)+1);idx++) + { + *pOut = *(((int8_t *)&bits)+idx); + pOut++; + } + } +} + +inline __m512i +byte_pack2b(const __m512i comp_data) +{ + const __m512i k_shift_left = _mm512_set_epi64(0x0000000200040006, 0x0000000200040006, + 0x0000000200040006, 0x0000000200040006, + 0x0000000200040006, 0x0000000200040006, + 0x0000000200040006, 0x0000000200040006); + const auto comp_data_packed = _mm512_sllv_epi16(comp_data, k_shift_left); + + const __m512i k_byte_shufflemask1 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000800, + 0x0000000000000000, 0x0000000000000800, + 0x0000000000000000, 0x0000000000000800, + 0x0000000000000000, 0x0000000000000800); + constexpr uint64_t k_bytemask1 = 0x0003000300030003; + const auto comp_data_shuff1 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask1); + + const __m512i k_byte_shufflemask2 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000A02, + 0x0000000000000000, 0x0000000000000A02, + 0x0000000000000000, 0x0000000000000A02, + 0x0000000000000000, 0x0000000000000A02); + const auto comp_data_shuff2 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask2); + + const __m512i k_byte_shufflemask3 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000C04, + 0x0000000000000000, 0x0000000000000C04, + 0x0000000000000000, 0x0000000000000C04, + 0x0000000000000000, 0x0000000000000C04); + const auto comp_data_shuff3 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask3); + + const __m512i k_byte_shufflemask4 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000E06, + 0x0000000000000000, 0x0000000000000E06, + 0x0000000000000000, 0x0000000000000E06, + 0x0000000000000000, 0x0000000000000E06); + const auto comp_data_shuff4 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask4); + + /// Ternary blend of the two shuffled results + const __m512i k_ternlog_select1 = _mm512_set_epi64(0x0000000000000000, 0x0000000000003030, + 0x0000000000000000, 0x0000000000003030, + 0x0000000000000000, 0x0000000000003030, + 0x0000000000000000, 0x0000000000003030); + + const __m512i k_ternlog_select2 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000C0C, + 0x0000000000000000, 0x0000000000000C0C, + 0x0000000000000000, 0x0000000000000C0C, + 0x0000000000000000, 0x0000000000000C0C); + + const __m512i k_ternlog_select3 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000303, + 0x0000000000000000, 0x0000000000000303, + 0x0000000000000000, 0x0000000000000303, + 0x0000000000000000, 0x0000000000000303); + + auto comp_data_packed2 = _mm512_ternarylogic_epi64(comp_data_shuff1, comp_data_shuff2, k_ternlog_select1, 0xd8); + auto comp_data_packed3 = _mm512_ternarylogic_epi64(comp_data_packed2, comp_data_shuff3, k_ternlog_select2, 0xd8); + return _mm512_ternarylogic_epi64(comp_data_packed3, comp_data_shuff4, k_ternlog_select3, 0xd8); +} + +inline __m512i +byte_pack2b_snc(const __m512i comp_data) +{ + const __m512i k_shift_left = _mm512_set_epi64(0x0000000200040006, 0x0000000200040006, + 0x0000000200040006, 0x0000000200040006, + 0x0000000200040006, 0x0000000200040006, + 0x0000000200040006, 0x0000000200040006); + const auto comp_data_packed = _mm512_sllv_epi16(comp_data, k_shift_left); + + const __m512i k_byte_shufflemask1 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000800, + 0x0000000000000000, 0x0000000000000800, + 0x0000000000000000, 0x0000000000000800, + 0x0000000000000000, 0x0000000000000800); + constexpr uint64_t k_bytemask1 = 0x0003000300030003; + const auto comp_data_shuff1 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask1); + + const __m512i k_byte_shufflemask2 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000A02, + 0x0000000000000000, 0x0000000000000A02, + 0x0000000000000000, 0x0000000000000A02, + 0x0000000000000000, 0x0000000000000A02); + const auto comp_data_shuff2 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask2); + + const __m512i k_byte_shufflemask3 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000C04, + 0x0000000000000000, 0x0000000000000C04, + 0x0000000000000000, 0x0000000000000C04, + 0x0000000000000000, 0x0000000000000C04); + const auto comp_data_shuff3 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask3); + + const __m512i k_byte_shufflemask4 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000E06, + 0x0000000000000000, 0x0000000000000E06, + 0x0000000000000000, 0x0000000000000E06, + 0x0000000000000000, 0x0000000000000E06); + const auto comp_data_shuff4 = _mm512_maskz_shuffle_epi8(k_bytemask1, comp_data_packed, k_byte_shufflemask4); + + /// Ternary blend of the two shuffled results + const __m512i k_ternlog_select1 = _mm512_set_epi64(0x0000000000000000, 0x0000000000003030, + 0x0000000000000000, 0x0000000000003030, + 0x0000000000000000, 0x0000000000003030, + 0x0000000000000000, 0x0000000000003030); + + const __m512i k_ternlog_select2 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000C0C, + 0x0000000000000000, 0x0000000000000C0C, + 0x0000000000000000, 0x0000000000000C0C, + 0x0000000000000000, 0x0000000000000C0C); + + const __m512i k_ternlog_select3 = _mm512_set_epi64(0x0000000000000000, 0x0000000000000303, + 0x0000000000000000, 0x0000000000000303, + 0x0000000000000000, 0x0000000000000303, + 0x0000000000000000, 0x0000000000000303); + + auto comp_data_packed2 = _mm512_ternarylogic_epi64(comp_data_shuff1, comp_data_shuff2, k_ternlog_select1, 0xd8); + auto comp_data_packed3 = _mm512_ternarylogic_epi64(comp_data_packed2, comp_data_shuff3, k_ternlog_select2, 0xd8); + auto comp_data_packed4 = _mm512_ternarylogic_epi64(comp_data_packed3, comp_data_shuff4, k_ternlog_select3, 0xd8); + const auto k_byte_permute = + _mm512_setr_epi32( + 0x11100100, 0x31302120, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + + return _mm512_permutexvar_epi8(k_byte_permute,comp_data_packed4); +} + +void mod_compression_16qam_avx512(int16_t *pData, int8_t *pOut, int16_t unit, int32_t nSc) +{ + int16_t bit_unit = unit>>1; + if (0 == bit_unit) + { + printf("modulation compression unit is too low!\n "); + bit_unit = 1; + } + __m512i symbol,symbol_unit ,bit_convert,byte_pack; + __mmask32 mask32 ; + __mmask16 mask_store = 0x3; + int32_t nSc0,nSc1; + nSc0 = nSc&0xfffffff0; + nSc1 = nSc&0xf; + symbol_unit = _mm512_set1_epi16(bit_unit); + bit_convert = _mm512_set1_epi16(3); + for (int32_t iSc = 0 ; iSc>2)&0x1)<<1); + _mm_mask_storeu_epi8(pOut , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 0)); + left_mask = ((k1>>4)&0x1)|(((k1>>6)&0x1)<<1); + _mm_mask_storeu_epi8(pOut+2 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 1)); + left_mask = ((k1>>8)&0x1)|(((k1>>10)&0x1)<<1); + _mm_mask_storeu_epi8(pOut+4 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 2)); + left_mask = ((k1>>12)&0x1)|(((k1>>14)&0x1)<<1); + _mm_mask_storeu_epi8(pOut+6 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 3)); + } +} + +void mod_compression_16qam_snc(int16_t *pData, int8_t *pOut, int16_t unit, int32_t nSc) +{ + int16_t bit_unit = unit>>1; + if (0 == bit_unit) + { + printf("modulation compression unit is too low!\n "); + bit_unit = 1; + } + __m512i symbol,symbol_unit ,bit_convert,byte_pack; + __mmask32 mask32 ; + __mmask16 mask_store = 0x3; + int32_t nSc0,nSc1; + nSc0 = nSc&0xfffffff0; + nSc1 = nSc&0xf; + symbol_unit = _mm512_set1_epi16(bit_unit); + bit_convert = _mm512_set1_epi16(3); + for (int32_t iSc = 0 ; iSc>1; + left_mask = ((__mmask16)1<>2; + if (0 == bit_unit) + { + printf("modulation compression unit is too low!\n "); + bit_unit = 1; + } + __m512i symbol,symbol_unit ,bit_convert,byte_pack; + __mmask32 mask32 ; + __mmask16 mask_store = 0x7; + int32_t nSc0,nSc1; + nSc0 = nSc&0xfffffff0; + nSc1 = nSc&0xf; + symbol_unit = _mm512_set1_epi16(bit_unit); + bit_convert = _mm512_set1_epi16(7); + for (int32_t iSc = 0 ; iSc>4)&mask_store; + _mm_mask_storeu_epi8(pOut+3 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 1)); + left_mask = (k1>>8)&mask_store; + _mm_mask_storeu_epi8(pOut+6 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 2)); + left_mask = (k1>>12)&mask_store; + _mm_mask_storeu_epi8(pOut+9 , left_mask ,_mm512_extracti64x2_epi64(byte_pack, 3)); + } +} + +void mod_compression_64qam_snc(int16_t *pData, int8_t *pOut, int16_t unit, int32_t nSc) +{ + int16_t bit_unit = unit>>2; + if (0 == bit_unit) + { + printf("modulation compression unit is too low!\n "); + bit_unit = 1; + } + __m512i symbol,symbol_unit ,bit_convert,byte_pack; + __mmask32 mask32 ; + __mmask16 mask_store = 0x7; + int32_t nSc0,nSc1; + nSc0 = nSc&0xfffffff0; + nSc1 = nSc&0xf; + symbol_unit = _mm512_set1_epi16(bit_unit); + bit_convert = _mm512_set1_epi16(7); + for (int32_t iSc = 0 ; iSc>2; + left_mask = ((__mmask16)1<>3; + if (0 == bit_unit) + { + printf("modulation compression unit is too low!\n "); + bit_unit = 1; + } + __m512i symbol,symbol_unit ,bit_convert; + __mmask32 mask32 ; + __mmask16 mask_store =0xF; + int32_t nSc0,nSc1; + nSc0 = nSc&0xfffffff0; + nSc1 = nSc&0xf; + symbol_unit = _mm512_set1_epi16(bit_unit); + bit_convert = _mm512_set1_epi16(15); + for (int32_t iSc = 0 ; iSc>3; + if (0 == bit_unit) + { + printf("modulation compression unit is too low!\n "); + bit_unit = 1; + } + __m512i symbol,symbol_unit ,bit_convert; + __mmask32 mask32 ; + __mmask16 mask_store =0xF; + int32_t nSc0,nSc1; + nSc0 = nSc&0xfffffff0; + nSc1 = nSc&0xf; + symbol_unit = _mm512_set1_epi16(bit_unit); + bit_convert = _mm512_set1_epi16(15); + for (int32_t iSc = 0 ; iScmodulation) + { + case XRAN_QPSK: + mod_compression_qpsk_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM16: + mod_compression_16qam_snc(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM64: + mod_compression_64qam_snc(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM256: + mod_compression_256qam_snc(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + default: + printf("Error invalid modulation compression request\n"); + return -1; + } + return 0; +} + +int xranlib_5gnr_mod_compression(const struct xranlib_5gnr_mod_compression_request* request, + struct xranlib_5gnr_mod_compression_response* response){ +#ifdef C_Module_Used + return (xranlib_5gnr_mod_compression_c(request, response)); +#else + if(_may_i_use_cpu_feature(_FEATURE_AVX512IFMA52)) + return (xranlib_5gnr_mod_compression_snc(request, response)); + else + return (xranlib_5gnr_mod_compression_avx512(request, response)); +#endif +} + +#ifdef C_Module_Used +int xranlib_5gnr_mod_compression_c(const struct xranlib_5gnr_mod_compression_request* request, + struct xranlib_5gnr_mod_compression_response* response){ + + switch(request->modulation) + { + case XRAN_QPSK: + mod_compression_qpsk_c(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM16: + mod_compression_16qam_c(request->data_in, response->data_out, request->unit,request->num_symbols); + break; + case XRAN_QAM64: + mod_compression_64qam_c(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM256: + mod_compression_256qam_c(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + default: + printf("Error invalid modulation compression request\n"); + return -1; + } + return 0; +} +#endif +int xranlib_5gnr_mod_compression_avx512(const struct xranlib_5gnr_mod_compression_request* request, + struct xranlib_5gnr_mod_compression_response* response){ + + switch(request->modulation) + { + case XRAN_QPSK: + mod_compression_qpsk_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM16: + mod_compression_16qam_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM64: + mod_compression_64qam_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM256: + mod_compression_256qam_avx512(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + default: + printf("Error invalid modulation compression request\n"); + return -1; + } + return 0; +} + +void +mod_decompression_qpsk_c(int8_t *pData,int16_t *pOut,int16_t unit, int32_t nSc ,int16_t re_mask) +{ + int16_t symbol_unit[2] = {0}; + symbol_unit[0] = (unit>>1); + symbol_unit[1] = (unit>>1)*-1; + for (int32_t iSc = 0 ; iSc> mask_pos)&0x1)) + { + uint8_t symbol_pos= iSc &0x3; + uint32_t byte_pos= iSc >>2; + uint8_t bit_i = (pData[byte_pos]>>(7-(symbol_pos*2)))&0x1; + pOut[iSc*2] = symbol_unit[bit_i]; + uint8_t bit_q = (pData[byte_pos]>>(6-(symbol_pos*2)))&0x1; + pOut[iSc*2+1] = symbol_unit[bit_q]; + } + } +} + +void +mod_decompression_16qam_c(int8_t *pData,int16_t *pOut,int16_t unit, int32_t nSc) +{ + int16_t symbol_unit[4] = {0}; + symbol_unit[0] = (unit>>2); + symbol_unit[1] = (unit>>2)*3; + symbol_unit[3] = (unit>>2)*-1; + symbol_unit[2] = (unit>>2)*-3; + for (int32_t iSc = 0 ; iSc>1; + uint8_t bit_i = (pData[byte_pos]>>(6-(symbol_pos*4)))&0x3; + pOut[iSc*2] = symbol_unit[bit_i]; + uint8_t bit_q = (pData[byte_pos]>>(4-(symbol_pos*4)))&0x3; + pOut[iSc*2+1] = symbol_unit[bit_q]; + } +} + +void +mod_decompression_64qam_c(int8_t *pData,int16_t *pOut,int16_t unit, int32_t nSc) +{ + int16_t symbol_unit[8] = {0}; + symbol_unit[0] = (unit>>3); + symbol_unit[1] = (unit>>3)*3; + symbol_unit[2] = (unit>>3)*5; + symbol_unit[3] = (unit>>3)*7; + symbol_unit[7] = (unit>>3)*-1; + symbol_unit[6] = (unit>>3)*-3; + symbol_unit[5] = (unit>>3)*-5; + symbol_unit[4] = (unit>>3)*-7; + uint8_t bit_i , bit_q ; + for (int32_t iSc = 0 ; iSc>5)&0x7; + bit_q = (pData[0]>>2)&0x7; + } + else if (1 == symbol_pos) + { + bit_i = ((pData[0]&0x3)<<1)|((pData[1]>>7)&0x1); + bit_q = (pData[1]>>4)&0x7; + } + else if (2 == symbol_pos) + { + bit_q = ((pData[1]&0x1)<<2)|((pData[2]>>6)&0x3); + bit_i = (pData[1]>>1)&0x7; + } + else if (3 == symbol_pos) + { + bit_i = (pData[2]>>3)&0x7; + bit_q = pData[2]&0x7; + pData +=3; + } + pOut[iSc*2] = symbol_unit[bit_i]; + pOut[iSc*2+1] = symbol_unit[bit_q]; + } +} + +void +mod_decompression_256qam_c(int8_t *pData,int16_t *pOut,int16_t unit,int32_t nSc) +{ + int16_t symbol_unit[16] = {0}; + symbol_unit[0] = (unit>>4); + symbol_unit[1] = (unit>>4)*3; + symbol_unit[2] = (unit>>4)*5; + symbol_unit[3] = (unit>>4)*7; + symbol_unit[4] = (unit>>4)*9; + symbol_unit[5] = (unit>>4)*11; + symbol_unit[6] = (unit>>4)*13; + symbol_unit[7] = (unit>>4)*15; + symbol_unit[15] = (unit>>4)*-1; + symbol_unit[14] = (unit>>4)*-3; + symbol_unit[13] = (unit>>4)*-5; + symbol_unit[12] = (unit>>4)*-7; + symbol_unit[11] = (unit>>4)*-9; + symbol_unit[10] = (unit>>4)*-11; + symbol_unit[9] = (unit>>4)*-13; + symbol_unit[8] = (unit>>4)*-15; + for (int32_t iSc = 0 ; iSc>4)&0xF; + uint8_t bit_q = pData[iSc]&0xF; + pOut[iSc*2] = symbol_unit[bit_i]; + pOut[iSc*2+1] = symbol_unit[bit_q]; + } +} + +int xranlib_5gnr_mod_decompression(const struct xranlib_5gnr_mod_decompression_request* request, + struct xranlib_5gnr_mod_decompression_response* response){ + + switch(request->modulation) + { + case XRAN_QPSK: + mod_decompression_qpsk_c(request->data_in, response->data_out, request->unit, request->num_symbols, request->re_mask); + break; + case XRAN_QAM16: + mod_decompression_16qam_c(request->data_in, response->data_out, request->unit,request->num_symbols); + break; + case XRAN_QAM64: + mod_decompression_64qam_c(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + case XRAN_QAM256: + mod_decompression_256qam_c(request->data_in, response->data_out, request->unit, request->num_symbols); + break; + default: + printf("Error invalid modulation compression request\n"); + return -1; + } + return 0; +} + +