mu1 = 0x55555555;
69 const unsignedmu2 = 0x33333333;
70 const unsignedmu3 = 0x0F0F0F0F;
71 const unsignedmu4 = 0x0000003F;
115}
while(block < block_end);
121 returntcnt[0] + tcnt[1] + tcnt[2] + tcnt[3];
126 template<
classFunc>
132 const unsignedmu1 = 0x55555555;
133 const unsignedmu2 = 0x33333333;
134 const unsignedmu3 = 0x0F0F0F0F;
135 const unsignedmu4 = 0x0000003F;
151 b= sse2_func(
b,
tmp1);
181}
while(block < block_end);
186 returntcnt[0] + tcnt[1] + tcnt[2] + tcnt[3];
207 if(m1 != 0xFFFF || m2 != 0xFFFF)
210}
while(block < block_end);
232 if(m1 != 0xFFFF || m2 != 0xFFFF)
235}
while(block < block_end);
258 if(m1 != 0xFFFF || m2 != 0xFFFF)
1002 unsignedsimd_lane = 0;
1019 unsignedwidx = bsf >> 2;
1020 unsignedw = simd_buf[widx];
1022*pos = (off * 32) +(simd_lane * 128) + (widx * 32) + bsf;
1030 unsignedwidx = bsf >> 2;
1031 unsignedw = simd_buf[widx];
1033*pos = (off * 32) + ((++simd_lane) * 128) + (widx * 32) + bsf;
1038}
while(block < block_end);
1058 unsignedsimd_lane = 0;
1075 unsignedwidx = bsf >> 2;
1076 unsignedw = simd_buf[widx];
1078*pos = (simd_lane * 128) + (widx * 32) + bsf;
1086 unsignedwidx = bsf >> 2;
1087 unsignedw = simd_buf[widx];
1089*pos = ((++simd_lane) * 128) + (widx * 32) + bsf;
1093block1+=2; block2+=2;
1094}
while(block1 < block1_end);
1122 for(;block < block_end; block += 2)
1174 for(--block_end; block_end >= block; block_end -= 2)
1225 const unsignedmu1 = 0x55555555;
1226 const unsignedmu2 = 0x33333333;
1227 const unsignedmu3 = 0x0F0F0F0F;
1228 const unsignedmu4 = 0x0000003F;
1241 int count= (
int)(block_end - block)*4;
1244 const intw_shift =
sizeof(w) * 8 - 1;
1245 boolfirst_word =
true;
1253 count-= (w_prev = (w0 >> w_shift));
1314first_word =
false;
1321 count-= !(w_prev ^ (w0 & 1));
1322 count-= w_prev = (w0 >> w_shift);
1326 count-= !w_prev; w_prev ^= w_prev;
1332 count-= !(w_prev ^ (w0 & 1));
1333 count-= w_prev = (w0 >> w_shift);
1337 count-= !w_prev; w_prev ^= w_prev;
1342 count-= !(w_prev ^ (w0 & 1));
1343 count-= w_prev = (w0 >> w_shift);
1347 count-= !w_prev; w_prev ^= w_prev;
1352 count-= !(w_prev ^ (w0 & 1));
1353 count-= w_prev = (w0 >> w_shift);
1357 count-= !w_prev; w_prev ^= w_prev;
1360}
while(++block < block_end);
1363*bit_count = tcnt[0] + tcnt[1] + tcnt[2] + tcnt[3];
1365 returnunsigned(
count);
1370 #pragma GCC diagnostic push 1371 #pragma GCC diagnostic ignored "-Warray-bounds" 1385 const unsignedunroll_factor = 8;
1388 if(pbuf[0] >= pos) {
size= 0; }
1389 else if(pbuf[1] >= pos) {
size= 1; }
1394 __m128im1, mz, maskF, maskFL;
1401 intshiftL = (64 - (unroll_factor -
size) * 16);
1429 return size- (unroll_factor - bsr_i);
1449 unsignedend = 1 + ((*buf) >> 3);
1451 const unsignedarr_end = end;
1453 unsigned size= end - start;
1455 for(;
size>= 64;
size= end - start)
1457 unsignedmid = (start + end) >> 1;
1458 if(
buf[mid] < pos)
1462 if(
buf[mid = (start + end) >> 1] < pos)
1466 if(
buf[mid = (start + end) >> 1] < pos)
1470 if(
buf[mid = (start + end) >> 1] < pos)
1476 for(;
size>= 16;
size= end - start)
1478 if(
unsignedmid = (start + end) >> 1;
buf[mid] < pos)
1482 if(
unsignedmid = (start + end) >> 1;
buf[mid] < pos)
1488 size+= (end != arr_end);
1493*is_set = ((*buf) & 1) ^ ((start-1) & 1);
1513 #pragma GCC diagnostic pop 1517 #define VECT_XOR_ARR_2_MASK(dst, src, src_end, mask)\ 1518 sse2_xor_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask) 1520 #define VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, mask)\ 1521 sse2_andnot_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask) 1523 #define VECT_BITCOUNT(first, last) \ 1524 sse2_bit_count((__m128i*) (first), (__m128i*) (last)) 1526 #define VECT_BITCOUNT_AND(first, last, mask) \ 1527 sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_and) 1529 #define VECT_BITCOUNT_OR(first, last, mask) \ 1530 sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_or) 1532 #define VECT_BITCOUNT_XOR(first, last, mask) \ 1533 sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_xor) 1535 #define VECT_BITCOUNT_SUB(first, last, mask) \ 1536 sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_sub) 1538 #define VECT_INVERT_BLOCK(first) \ 1539 sse2_invert_block((__m128i*)first); 1541 #define VECT_AND_BLOCK(dst, src) \ 1542 sse2_and_block((__m128i*) dst, (__m128i*) (src)) 1544 #define VECT_AND_DIGEST(dst, src) \ 1545 sse2_and_digest((__m128i*) dst, (const __m128i*) (src)) 1547 #define VECT_AND_OR_DIGEST_2WAY(dst, src1, src2) \ 1548 sse2_and_or_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 1550 #define VECT_AND_DIGEST_5WAY(dst, src1, src2, src3, src4) \ 1551 sse2_and_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4)) 1553 #define VECT_AND_DIGEST_3WAY(dst, src1, src2) \ 1554 sse2_and_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 1556 #define VECT_AND_DIGEST_2WAY(dst, src1, src2) \ 1557 sse2_and_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 1559 #define VECT_OR_BLOCK(dst, src) \ 1560 sse2_or_block((__m128i*) dst, (__m128i*) (src)) 1562 #define VECT_OR_BLOCK_2WAY(dst, src1, src2) \ 1563 sse2_or_block_2way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2)) 1565 #define VECT_OR_BLOCK_3WAY(dst, src1, src2) \ 1566 sse2_or_block_3way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2)) 1568 #define VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4) \ 1569 sse2_or_block_5way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2), (__m128i*) (src3), (__m128i*) (src4)) 1571 #define VECT_SUB_BLOCK(dst, src) \ 1572 sse2_sub_block((__m128i*) dst, (__m128i*) (src)) 1574 #define VECT_SUB_DIGEST(dst, src) \ 1575 sse2_sub_digest((__m128i*) dst, (const __m128i*) (src)) 1577 #define VECT_SUB_DIGEST_2WAY(dst, src1, src2) \ 1578 sse2_sub_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 1580 #define VECT_SUB_DIGEST_5WAY(dst, src1, src2, src3, src4) \ 1581 sse2_sub_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4)) 1583 #define VECT_SUB_DIGEST_3WAY(dst, src1, src2) \ 1584 sse2_sub_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 1586 #define VECT_XOR_BLOCK(dst, src) \ 1587 sse2_xor_block((__m128i*) dst, (__m128i*) (src)) 1589 #define VECT_XOR_BLOCK_2WAY(dst, src1, src2) \ 1590 sse2_xor_block_2way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2)) 1592 #define VECT_COPY_BLOCK(dst, src) \ 1593 sse2_copy_block((__m128i*) dst, (__m128i*) (src)) 1595 #define VECT_COPY_BLOCK_UNALIGN(dst, src) \ 1596 sse2_copy_block_unalign((__m128i*) dst, (__m128i*) (src)) 1598 #define VECT_STREAM_BLOCK(dst, src) \ 1599 sse2_stream_block((__m128i*) dst, (__m128i*) (src)) 1601 #define VECT_STREAM_BLOCK_UNALIGN(dst, src) \ 1602 sse2_stream_block_unalign((__m128i*) dst, (__m128i*) (src)) 1604 #define VECT_SET_BLOCK(dst, value) \ 1605 sse2_set_block((__m128i*) dst, value) 1607 #define VECT_IS_ZERO_BLOCK(dst) \ 1608 sse2_is_all_zero((__m128i*) dst) 1610 #define VECT_IS_ONE_BLOCK(dst) \ 1611 sse2_is_all_one((__m128i*) dst) 1613 #define VECT_IS_DIGEST_ZERO(start) \ 1614 sse2_is_digest_zero((__m128i*)start) 1616 #define VECT_BLOCK_SET_DIGEST(dst, val) \ 1617 sse2_block_set_digest((__m128i*)dst, val) 1619 #define VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to) \ 1620 sse2_lower_bound_scan_u32(arr, target, from, to) 1622 #define VECT_SHIFT_R1(b, acc, co) \ 1623 sse2_shift_r1((__m128i*)b, acc, co) 1626 #define VECT_BIT_FIND_FIRST(src, off, pos) \ 1627 sse2_bit_find_first((__m128i*) src, off, pos) 1629 #define VECT_BIT_FIND_DIFF(src1, src2, pos) \ 1630 sse2_bit_find_first_diff((__m128i*) src1, (__m128i*) (src2), pos) 1632 #define VECT_BIT_BLOCK_XOR(t, src, src_xor, d) \ 1633 sse2_bit_block_xor(t, src, src_xor, d) 1635 #define VECT_BIT_BLOCK_XOR_2WAY(t, src_xor, d) \ 1636 sse2_bit_block_xor_2way(t, src_xor, d) 1638 #define VECT_GAP_BFIND(buf, pos, is_set) \ 1639 sse2_gap_bfind(buf, pos, is_set) 1641 #define VECT_GAP_TEST(buf, pos) \ 1642 sse2_gap_test(buf, pos) 1648 #pragma GCC diagnostic popncbi::TMaskedQueryRegions mask
Compute functions for SSE SIMD instruction set (internal)
Bit manipulation primitives (internal)
bool sse2_bit_find_first(const __m128i *block, unsigned off, unsigned *pos) noexcept
Find first non-zero bit.
bool sse2_sub_digest_2way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2
bm::id_t sse2_bit_count(const __m128i *block, const __m128i *block_end)
bool sse2_and_or_digest_2way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
AND-OR block digest stride dst |= *src1 & src2.
void sse2_bit_block_xor_2way(bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) noexcept
Build partial XOR product of 2 bit-blocks using digest mask.
unsigned sse2_gap_test(const unsigned short *buf, unsigned pos)
Hybrid binary search, starts as binary, then switches to scan.
bool sse2_and_digest_5way(__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept
AND block digest stride.
bool sse2_and_digest_3way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
AND block digest stride.
bool sse2_and_digest_2way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
AND block digest stride dst = *src1 & src2.
bool sse2_is_all_one(const __m128i *block) noexcept
check if block is all ONE bits
bool sse2_shift_l1(__m128i *block, unsigned *empty_acc, unsigned co1) noexcept
block shift left by 1
bool sse2_and_digest(__m128i *dst, const __m128i *src) noexcept
AND block digest stride dst &= *src.
bool sse2_is_digest_zero(const __m128i *block) noexcept
check if digest stride is all zero bits
void sse2_bit_block_xor(bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) noexcept
Build partial XOR product of 2 bit-blocks using digest mask.
bool sse2_is_all_zero(const __m128i *block) noexcept
check if block is all zero bits
unsigned sse2_gap_bfind(const unsigned short *buf, unsigned pos, unsigned *is_set)
Hybrid binary search, starts as binary, then switches to linear scan.
bool sse2_bit_find_first_diff(const __m128i *block1, const __m128i *block2, unsigned *pos) noexcept
Find first bit which is different between two bit-blocks.
void sse2_block_set_digest(__m128i *dst, unsigned value) noexcept
set digest stride to 0xFF.. or 0x0 value
bool sse2_shift_r1(__m128i *block, unsigned *empty_acc, unsigned co1) noexcept
block shift right by 1
bool sse2_sub_digest(__m128i *dst, const __m128i *src) noexcept
SUB (AND NOT) block digest stride dst &= ~*src.
bool sse2_sub_digest_5way(__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept
SUB block digest stride.
bool sse2_sub_digest_3way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
SUB block digest stride.
unsigned word_bitcount64(bm::id64_t x) noexcept
bm::id_t word_bitcount(bm::id_t w) noexcept
unsigned int
A callback function used to compare two keys in a database.
const unsigned set_block_digest_wave_size
bm::id_t sse2_bit_block_calc_count_change(const __m128i *block, const __m128i *block_end, unsigned *bit_count)
bm::id_t sse2_bit_count_op(const __m128i *block, const __m128i *block_end, const __m128i *mask_block, Func sse2_func)
unsigned long long bmi_bslr_u64(unsigned long long w) noexcept
unsigned sse2_gap_find(const bm::gap_word_t *pbuf, const bm::gap_word_t pos, unsigned size)
const unsigned set_block_size
unsigned long long int id64_t
const unsigned block_waves
unsigned bit_scan_forward32(unsigned w) noexcept
T bit_scan_fwd(T v) noexcept
unsigned short gap_word_t
unsigned long long bmi_blsi_u64(unsigned long long w)
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
static __m128i _mm_subs_epu16(__m128i a, __m128i b)
static __m128i _mm_setzero_si128()
static __m128i _mm_xor_si128(__m128i a, __m128i b)
static int _mm_cvtsi128_si32(__m128i a)
#define _mm_srli_epi32(a, imm)
static __m128i _mm_srli_si128(__m128i a, int imm)
static __m128i _mm_slli_epi64(__m128i a, int imm)
static int _mm_movemask_epi8(__m128i a)
static __m128i _mm_slli_si128(__m128i a, int imm)
static __m128i _mm_set1_epi16(short w)
static __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
static __m128i _mm_slli_epi32(__m128i a, int imm)
static __m128i _mm_loadu_si128(const __m128i *p)
static void _mm_store_si128(__m128i *p, __m128i a)
static __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
static __m128i _mm_load_si128(const __m128i *p)
static __m128i _mm_or_si128(__m128i, __m128i)
static __m128i _mm_add_epi32(__m128i a, __m128i b)
static __m128i _mm_set_epi32(int, int, int, int)
static __m128i _mm_set1_epi32(int)
static __m128i _mm_andnot_si128(__m128i a, __m128i b)
#define _mm_shuffle_epi32(a, imm)
static __m128i _mm_and_si128(__m128i, __m128i)
static __m128i _mm_cmpeq_epi32(__m128i, __m128i)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4