}
while(src < src_end);
115}
while(src < src_end);
130 __m128iaccA, accB, accC, accD;
171}
while(src < src_end);
180 returnmacc[0] | macc[1] | macc[2] | macc[3];
263 __m128im1A, m2A, m1B, m2B, m1C, m2C, m1D, m2D;
264 __m128iaccA, accB, accC, accD;
298}
while(src < src_end);
307 returnmacc[0] | macc[1] | macc[2] | macc[3];
316 __m128im1A, m2A, m1B, m2B, m1C, m2C, m1D, m2D;
317 __m128iaccA, accB, accC, accD;
351}
while(src < src_end);
360 returnmacc[0] | macc[1] | macc[2] | macc[3];
375 __m128im1A, m2A, m1B, m2B, m1C, m2C, m1D, m2D;
409}
while(src < src_end);
416 return(maskA == 0xFFFFu);
430 __m128im1A, m2A, m1B, m2B, m1C, m2C, m1D, m2D;
461}
while(src < src_end);
467 return(maskA == 0xFFFFu);
505src1 += 4; src2 += 4; dst += 4;
507}
while(src1 < src_end1);
513 return(maskA == 0xFFFFu);
556src1 += 4; src2 += 4; dst += 4;
558}
while(src1 < src_end1);
564 return(maskA == 0xFFFFu);
620src1 += 4; src2 += 4;
621src3 += 4; src4 += 4;
628}
while(src1 < src_end1);
634 return(maskA == 0xFFFFu);
650 __m128iaccA, accB, accC, accD;
675}
while(src < src_end);
683 returnmacc[0] | macc[1] | macc[2] | macc[3];
698 __m128iaccA, accB, accC, accD;
722src1 += 4; src2 += 4; dst += 4;
723}
while(src1 < src1_end);
731 returnmacc[0] | macc[1] | macc[2] | macc[3];
748 __m128iaccA, accB, accC, accD;
788}
while(src < src_end);
797 returnmacc[0] | macc[1] | macc[2] | macc[3];
828}
while(dst < dst_end);
841 __m128ixmm0, xmm1, xmm2, xmm3;
869}
while(src < src_end);
882 __m128ixmm0, xmm1, xmm2, xmm3;
910}
while(src < src_end);
924 __m128ixmm0, xmm1, xmm2, xmm3;
952}
while(src < src_end);
965 __m128ixmm0, xmm1, xmm2, xmm3;
993}
while(src < src_end);
1031}
while(dst < (
__m128i*)dst_end);
1073 unsignedsse_vect_waves,
1078 for(
unsigned i= 0;
i< sse_vect_waves; ++
i)
1088 unsigned short* cnt8 = (
unsigned short*)&xcnt;
1089*sum += (cnt8[0]) + (cnt8[2]) + (cnt8[4]) + (cnt8[6]);
1110 unsignedunroll_factor = 8;
1111 unsigned len= to - from + 1;
1112 unsignedlen_unr =
len- (
len% unroll_factor);
1119 __m128ivect40, vect41, norm_vect40, norm_vect41, cmp_mask_ge;
1122 for(; k < len_unr; k+=unroll_factor)
1135 returnfrom + k + (bsf / 4);
1148 return4 + from + k + (bsf / 4);
1152 for(; k <
len; ++k)
1154 if(arr_base[k] >= target)
1162 #pragma GCC diagnostic popncbi::TMaskedQueryRegions mask
SSE2 reinitialization guard class.
BMFORCEINLINE sse_empty_guard() BMNOEXCEPT
BMFORCEINLINE ~sse_empty_guard() BMNOEXCEPT
static vector< string > arr
void sse2_copy_block(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
SSE2 block copy dst = *src.
unsigned sse2_xor_block_2way(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
3 operand XOR dst = *src1 ^ src2
bool sse2_or_block_5way(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4.
void sse2_stream_block(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
SSE2 block copy dst = *src.
void sse2_stream_block_unalign(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
SSE2 block copy (unaligned src) dst = *src.
unsigned sse2_sub_block(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
AND-NOT (SUB) array elements against another array dst &= ~*src.
void sse2_xor_arr_2_mask(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) BMNOEXCEPT
XOR array elements to specified mask dst = *src ^ mask.
void sse2_copy_block_unalign(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
SSE2 block copy (unaligned SRC) dst = *src.
unsigned sse2_lower_bound_scan_u32(const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to) BMNOEXCEPT
lower bound (great or equal) linear scan in ascending order sorted array
unsigned sse2_xor_block(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
XOR block against another dst ^= *src.
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
OR array elements against another array (unaligned) dst |= *src.
unsigned sse2_and_block(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
AND blocks2 dst &= *src.
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
AND array elements against another array (unaligned) dst &= *src.
void sse2_andnot_arr_2_mask(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) BMNOEXCEPT
Inverts array elements and NOT them to specified mask dst = ~*src & mask.
bool sse2_or_block_3way(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
OR array elements against another 2 arrays dst |= *src1 | src2.
void sse2_set_block(__m128i *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
SSE2 block memset dst = value.
void sse2_invert_block(__m128i *BMRESTRICT dst) BMNOEXCEPT
Invert bit block dst = ~*dst or dst ^= *dst.
bool sse2_or_block(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
OR array elements against another array dst |= *src.
bool sse2_or_block_2way(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
OR 2 blocks anc copy result to the destination dst = *src1 | src2.
BMFORCEINLINE __m128i sse2_or(__m128i a, __m128i b) BMNOEXCEPT
BMFORCEINLINE __m128i sse2_and(__m128i a, __m128i b) BMNOEXCEPT
const unsigned set_block_size
BMFORCEINLINE __m128i sse2_sub(__m128i a, __m128i b) BMNOEXCEPT
unsigned bit_scan_forward32(unsigned w) noexcept
unsigned short gap_word_t
BMFORCEINLINE __m128i sse2_xor(__m128i a, __m128i b) BMNOEXCEPT
const bm::gap_word_t * sse2_gap_sum_arr(const bm::gap_word_t *BMRESTRICT pbuf, unsigned sse_vect_waves, unsigned *sum) BMNOEXCEPT
Gap block population count (array sum) utility.
const GenericPointer< typename T::ValueType > T2 value
static __m128i _mm_setzero_si128()
static __m128i _mm_xor_si128(__m128i a, __m128i b)
static __m128i _mm_sub_epi16(__m128i a, __m128i b)
static void _mm_stream_si128(__m128i *p, __m128i a)
static __m128i _mm_add_epi16(__m128i a, __m128i b)
#define _mm_srli_epi32(a, imm)
static int _mm_movemask_epi8(__m128i a)
static __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
static __m128i _mm_loadu_si128(const __m128i *p)
static void _mm_store_si128(__m128i *p, __m128i a)
static __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
static void _mm_prefetch(const void *p, int i)
static __m128i _mm_load_si128(const __m128i *p)
static __m128i _mm_or_si128(__m128i, __m128i)
static __m128i _mm_sub_epi32(__m128i a, __m128i b)
static __m128i _mm_set1_epi32(int)
static __m128i _mm_andnot_si128(__m128i a, __m128i b)
static __m128i _mm_and_si128(__m128i, __m128i)
static __m128i _mm_cmpeq_epi32(__m128i, __m128i)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4