RetroSearch Browse

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/bmsse4_8h_source.html below:

NCBI C++ ToolKit: include/util/bitset/bmsse4.h Source File

1 #ifndef BMSSE4__H__INCLUDED__ 2 #define BMSSE4__H__INCLUDED__ 50 #pragma GCC diagnostic push 51 #pragma GCC diagnostic ignored "-Wconversion" 55 #pragma warning( push ) 56 #pragma warning( disable : 4146) 62 # define _mm_popcnt_u32 __builtin_popcount 63 # define _mm_popcnt_u64 __builtin_popcountll 64 # define BM_BSF32 __builtin_ctz 66 # define BM_BSF32 bm::bsf_asm32 106

}

while

(

b

< b_end);

110 const unsigned

*

b

= (

unsigned

*) block;

115

}

while

(++block < block_end);

165 unsigned

ret = (

a

^

b

);

188 template

<

class

Func>

206

block+=2; mask_block+=2;

207

}

while

(block < block_end);

220

++block; ++mask_block;

221

}

while

(block < block_end);

248

}

while

(block < block_end);

298 __m128i

accA, accB, accC, accD;

323

}

while

(src < src_end);

890

}

while

(block < block_end);

960 unsigned

co2, co1 = 0;

961 for

(;block < block_end; block += 2)

1041 unsigned

gap_count = 1;

1042 unsigned

bit_count = 0;

1044 unsigned

co2, co1 = 0;

1045 for

(;block < block_end; block += 2, xor_block += 2)

1127

gap_count -= (w0 & 1u);

1151 unsigned

bit_count = 0;

1152 unsigned

gap_count = 1;

1154 unsigned

co2, co1 = 0;

1155 for

(;block < block_end; block += 2)

1205

gap_count -= (w0 & 1u);

1228 unsigned

simd_lane = 0;

1243 unsigned

widx = bsf >> 2;

1244 unsigned

w = simd_buf[widx];

1246

*pos = (simd_lane * 128) + (widx * 32) + bsf;

1254 unsigned

widx = bsf >> 2;

1255 unsigned

w = simd_buf[widx];

1257

*pos = ((++simd_lane) * 128) + (widx * 32) + bsf;

1262

block1+=2; block2+=2;

1264

}

while

(block1 < block1_end);

1285 unsigned

simd_lane = 0;

1299 unsigned

widx = bsf >> 2;

1300 unsigned

w = simd_buf[widx];

1302

*pos = (off * 32) + (simd_lane * 128) + (widx * 32) + bsf;

1310 unsigned

widx = bsf >> 2;

1311 unsigned

w = simd_buf[widx];

1313

*pos = (off * 32) + ((++simd_lane) * 128) + (widx * 32) + bsf;

1320

}

while

(block < block_end);

1329 #pragma GCC diagnostic push 1330 #pragma GCC diagnostic ignored "-Warray-bounds" 1345 const unsigned

unroll_factor = 8;

1347 __m128i

m1, mz, maskF, maskFL;

1354 int

shiftL= (64 - (unroll_factor -

) * 16);

1365 return

unroll_factor - bc;

1395 unsigned

end = ((*buf) >> 3);

1399 unsigned size

= end - start;

1400 for

(;

>= 64;

= end - start)

1402 unsigned

mid = (start + end) >> 1;

1403 if

(

[mid] < pos)

1407 if

(

[mid = (start + end) >> 1] < pos)

1411 if

(

[mid = (start + end) >> 1] < pos)

1415 if

(

[mid = (start + end) >> 1] < pos)

1422 for

(;

>= 16;

= end - start)

1424 if

(

unsigned

mid = (start + end) >> 1;

[mid] < pos)

1428 if

(

unsigned

mid = (start + end) >> 1;

[mid] < pos)

1438 if

(pbuf[0] >= pos) { }

1439 else if

(pbuf[1] >= pos) { start++; }

1450

*is_set = ((*buf) & 1) ^ ((start-1) & 1);

1465 unsigned

end = ((*buf) >> 3);

1466 unsigned size

= end - start;

1468 for

(;

>= 64;

= end - start)

1470 unsigned

mid = (start + end) >> 1;

1471 if

(

[mid] < pos)

1475 if

(

[mid = (start + end) >> 1] < pos)

1479 if

(

[mid = (start + end) >> 1] < pos)

1483 if

(

[mid = (start + end) >> 1] < pos)

1488 for

(;

>= 16;

= end - start)

1490 if

(

unsigned

mid = (start + end) >> 1;

[mid] < pos)

1500 if

(pbuf[0] >= pos) { }

1501 else if

(pbuf[1] >= pos) { start++; }

1515 return

((*

) & 1) ^ ((--start) & 1);

1561 const unsigned

unroll_factor = 8;

1562 const unsigned len

= (

- start);

1563 const unsigned

len_unr =

- (

% unroll_factor);

1570 for

(k = 0; k < len_unr; k+=unroll_factor)

1587 for

(; k <

; ++k)

1604 const unsigned

unroll_factor = 4;

1605 const unsigned len

= (stop - start);

1606 const unsigned

len_unr =

- (

% unroll_factor);

1617 for

(; k < len_unr; k+=unroll_factor)

1634

block[nword] |= (1u << mshift_v[0]) | (1u << mshift_v[1])

1635

|(1u << mshift_v[2]) | (1u << mshift_v[3]);

1641

block[mword_v[0]] |= (1u << mshift_v[0]);

1642

block[mword_v[1]] |= (1u << mshift_v[1]);

1643

block[mword_v[2]] |= (1u << mshift_v[2]);

1644

block[mword_v[3]] |= (1u << mshift_v[3]);

1649 for

(; k <

; ++k)

1651 unsigned n

= idx[k];

1655

block[nword] |= (1u << nbit);

1691 const unsigned

unroll_factor = 4;

1692 const unsigned len

= (

- start);

1693 const unsigned

len_unr =

- (

% unroll_factor);

1706 unsigned

base = start + k;

1709 for

(; k < len_unr; k+=unroll_factor)

1740

mask_0 =

(1 << mshift_v[3], 1 << mshift_v[2], 1 << mshift_v[1], 1 << mshift_v[0]);

1746

blk[mword_v[1]], blk[mword_v[0]]),

1760

++idx_ptr; ++target_ptr;

1764 for

(; k <

; ++k)

1786 for

(--block_end; block_end >= block; block_end -= 2)

1836 for

(;block < block_end; block += 2)

1909 for

(; di < 64 ; ++di)

1915

block = (

*) &wblock[d_base];

1916

mask_block = (

*) &mblock[d_base];

1918 for

(

unsigned i

= 0;

i

< 4; ++

i

, block += 2, mask_block += 2)

1968 bm::id64_t

w0 = wblock[d_base] = co1 & mblock[d_base];

1969

d |= (dmask & (w0 << di));

2073 const __m128i

* sub_block = (

const __m128i

*) (xor_block + off);

2111 #define VECT_XOR_ARR_2_MASK(dst, src, src_end, mask)\ 2112 sse2_xor_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask) 2114 #define VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, mask)\ 2115 sse2_andnot_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask) 2117 #define VECT_BITCOUNT(first, last) \ 2118 sse4_bit_count((__m128i*) (first), (__m128i*) (last)) 2125 #define VECT_BITCOUNT_AND(first, last, mask) \ 2126 sse4_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_and) 2128 #define VECT_BITCOUNT_OR(first, last, mask) \ 2129 sse4_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_or) 2131 #define VECT_BITCOUNT_XOR(first, last, mask) \ 2132 sse4_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_xor) 2134 #define VECT_BITCOUNT_SUB(first, last, mask) \ 2135 sse4_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_sub) 2137 #define VECT_INVERT_BLOCK(first) \ 2138 sse2_invert_block((__m128i*)first); 2140 #define VECT_AND_BLOCK(dst, src) \ 2141 sse4_and_block((__m128i*) dst, (__m128i*) (src)) 2143 #define VECT_AND_DIGEST(dst, src) \ 2144 sse4_and_digest((__m128i*) dst, (const __m128i*) (src)) 2146 #define VECT_AND_OR_DIGEST_2WAY(dst, src1, src2) \ 2147 sse4_and_or_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 2149 #define VECT_AND_DIGEST_5WAY(dst, src1, src2, src3, src4) \ 2150 sse4_and_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4)) 2152 #define VECT_AND_DIGEST_3WAY(dst, src1, src2) \ 2153 sse4_and_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 2155 #define VECT_AND_DIGEST_2WAY(dst, src1, src2) \ 2156 sse4_and_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 2158 #define VECT_OR_BLOCK(dst, src) \ 2159 sse2_or_block((__m128i*) dst, (__m128i*) (src)) 2161 #define VECT_OR_BLOCK_2WAY(dst, src1, src2) \ 2162 sse2_or_block_2way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2)) 2164 #define VECT_OR_BLOCK_3WAY(dst, src1, src2) \ 2165 sse2_or_block_3way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2)) 2167 #define VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4) \ 2168 sse2_or_block_5way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2), (__m128i*) (src3), (__m128i*) (src4)) 2170 #define VECT_SUB_BLOCK(dst, src) \ 2171 sse2_sub_block((__m128i*) dst, (const __m128i*) (src)) 2173 #define VECT_SUB_DIGEST(dst, src) \ 2174 sse4_sub_digest((__m128i*) dst, (const __m128i*) (src)) 2176 #define VECT_SUB_DIGEST_2WAY(dst, src1, src2) \ 2177 sse4_sub_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 2179 #define VECT_SUB_DIGEST_5WAY(dst, src1, src2, src3, src4) \ 2180 sse4_sub_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4)) 2182 #define VECT_SUB_DIGEST_3WAY(dst, src1, src2) \ 2183 sse4_sub_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2)) 2185 #define VECT_XOR_BLOCK(dst, src) \ 2186 sse2_xor_block((__m128i*) dst, (__m128i*) (src)) 2188 #define VECT_XOR_BLOCK_2WAY(dst, src1, src2) \ 2189 sse2_xor_block_2way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2)) 2191 #define VECT_COPY_BLOCK(dst, src) \ 2192 sse2_copy_block((__m128i*) dst, (__m128i*) (src)) 2194 #define VECT_COPY_BLOCK_UNALIGN(dst, src) \ 2195 sse2_copy_block_unalign((__m128i*) dst, (__m128i*) (src)) 2197 #define VECT_STREAM_BLOCK(dst, src) \ 2198 sse2_stream_block((__m128i*) dst, (__m128i*) (src)) 2200 #define VECT_STREAM_BLOCK_UNALIGN(dst, src) \ 2201 sse2_stream_block_unalign((__m128i*) dst, (__m128i*) (src)) 2203 #define VECT_SET_BLOCK(dst, value) \ 2204 sse2_set_block((__m128i*) dst, value) 2206 #define VECT_IS_ZERO_BLOCK(dst) \ 2207 sse4_is_all_zero((__m128i*) dst) 2209 #define VECT_IS_ONE_BLOCK(dst) \ 2210 sse4_is_all_one((__m128i*) dst) 2212 #define VECT_IS_DIGEST_ZERO(start) \ 2213 sse4_is_digest_zero((__m128i*)start) 2215 #define VECT_BLOCK_SET_DIGEST(dst, val) \ 2216 sse4_block_set_digest((__m128i*)dst, val) 2218 #define VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to) \ 2219 sse2_lower_bound_scan_u32(arr, target, from, to) 2221 #define VECT_SHIFT_L1(b, acc, co) \ 2222 sse42_shift_l1((__m128i*)b, acc, co) 2224 #define VECT_SHIFT_R1(b, acc, co) \ 2225 sse42_shift_r1((__m128i*)b, acc, co) 2227 #define VECT_SHIFT_R1_AND(b, co, m, digest) \ 2228 sse42_shift_r1_and((__m128i*)b, co, (__m128i*)m, digest) 2230 #define VECT_ARR_BLOCK_LOOKUP(idx, size, nb, start) \ 2231 sse42_idx_arr_block_lookup(idx, size, nb, start) 2233 #define VECT_SET_BLOCK_BITS(block, idx, start, stop) \ 2234 sse42_set_block_bits(block, idx, start, stop) 2236 #define VECT_BLOCK_CHANGE(block, size) \ 2237 sse42_bit_block_calc_change((__m128i*)block, size) 2239 #define VECT_BLOCK_XOR_CHANGE(block, xor_block, size, gc, bc) \ 2240 sse42_bit_block_calc_xor_change((__m128i*)block, (__m128i*)xor_block, size, gc, bc) 2243 #define VECT_BLOCK_CHANGE_BC(block, gc, bc) \ 2244 sse42_bit_block_calc_change_bc((__m128i*)block, gc, bc) 2247 #define VECT_BIT_FIND_FIRST(src, off, pos) \ 2248 sse42_bit_find_first((__m128i*) src, off, pos) 2250 #define VECT_BIT_FIND_DIFF(src1, src2, pos) \ 2251 sse42_bit_find_first_diff((__m128i*) src1, (__m128i*) (src2), pos) 2253 #define VECT_BIT_BLOCK_XOR(t, src, src_xor, d) \ 2254 sse42_bit_block_xor(t, src, src_xor, d) 2256 #define VECT_BIT_BLOCK_XOR_2WAY(t, src_xor, d) \ 2257 sse42_bit_block_xor_2way(t, src_xor, d) 2260 #define VECT_GAP_BFIND(buf, pos, is_set) \ 2261 sse42_gap_bfind(buf, pos, is_set) 2263 #define VECT_GAP_TEST(buf, pos) \ 2264 sse42_gap_test(buf, pos) 2267 #pragma GCC diagnostic pop 2276 #pragma warning( pop )

ncbi::TMaskedQueryRegions mask

Compute functions for SSE SIMD instruction set (internal)

Bit manipulation primitives (internal)

static vector< string > arr

bool sse42_shift_l1(__m128i *block, unsigned *empty_acc, unsigned co1) noexcept

block shift left by 1

bool sse42_test_all_one_wave(const void *ptr) noexcept

check if SSE wave is all oxFFFF...FFF

bool sse4_sub_digest_3way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept

SUB block digest stride.

void sse4_block_set_digest(__m128i *dst, unsigned value) noexcept

set digest stride to 0xFF.. or 0x0 value

bool sse42_bit_find_first_diff(const __m128i *block1, const __m128i *block2, unsigned *pos) noexcept

Find first bit which is different between two bit-blocks.

bool sse4_is_all_zero(const __m128i *block) noexcept

check if block is all zero bits

bm::id_t sse4_bit_count(const __m128i *block, const __m128i *block_end) noexcept

bool sse4_and_or_digest_2way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept

AND-OR block digest stride dst |= *src1 & src2.

unsigned sse4_gap_find(const bm::gap_word_t *pbuf, const bm::gap_word_t pos, const unsigned size) noexcept

void sse42_bit_block_xor(bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) noexcept

Build partial XOR product of 2 bit-blocks using digest mask.

bool sse4_and_digest_2way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept

AND block digest stride dst = *src1 & src2.

unsigned sse42_gap_bfind(const unsigned short *buf, unsigned pos, unsigned *is_set) noexcept

Hybrid binary search, starts as binary, then switches to linear scan.

bool sse4_and_digest_5way(__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept

AND block digest stride.

void sse42_bit_block_calc_xor_change(const __m128i *block, const __m128i *xor_block, unsigned size, unsigned *gc, unsigned *bc) noexcept

bool sse42_test_all_zero_wave(const void *ptr) noexcept

check if wave of pointers is all NULL

unsigned sse4_and_block(__m128i *dst, const __m128i *src) noexcept

AND blocks2 dst &= *src.

bool sse42_test_all_zero_wave2(const void *ptr0, const void *ptr1) noexcept

check if 2 waves of pointers are all NULL

bool sse4_is_all_one(const __m128i *block) noexcept

check if block is all ONE bits

bool sse4_sub_digest_5way(__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept

SUB block digest stride.

bool sse42_bit_find_first(const __m128i *block, unsigned off, unsigned *pos) noexcept

Find first non-zero bit.

int sse42_cmpge_u32(__m128i vect4, unsigned value) noexcept

Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array.

unsigned sse42_gap_test(const unsigned short *buf, unsigned pos) noexcept

Hybrid binary search to test GAP value, starts as binary, then switches to scan.

bool sse4_and_digest_3way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept

AND block digest stride.

bool sse4_and_digest(__m128i *dst, const __m128i *src) noexcept

AND block digest stride dst &= *src.

bool sse42_shift_r1(__m128i *block, unsigned *empty_acc, unsigned co1) noexcept

block shift right by 1

bool sse42_shift_r1_and(__m128i *block, bm::word_t co1, const __m128i *mask_block, bm::id64_t *digest) noexcept

block shift right by 1 plus AND

void sse42_bit_block_xor_2way(bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) noexcept

Build partial XOR product of 2 bit-blocks using digest mask.

bool sse4_sub_digest(__m128i *dst, const __m128i *src) noexcept

SUB (AND NOT) block digest stride dst &= ~*src.

unsigned sse42_bit_block_calc_change(const __m128i *block, unsigned size) noexcept

bool sse4_sub_digest_2way(__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept

2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2

bool sse42_test_all_eq_wave2(const void *ptr0, const void *ptr1) noexcept

check if wave of 2 pointers are the same (null or FULL)

bool sse4_is_digest_zero(const __m128i *block) noexcept

check if digest stride is all zero bits

unsigned sse42_idx_arr_block_lookup(const unsigned *idx, unsigned size, unsigned nb, unsigned start) noexcept

void sse42_set_block_bits(bm::word_t *block, const unsigned *idx, unsigned start, unsigned stop) noexcept

const unsigned set_block_digest_wave_size

bm::id_t sse4_bit_count_op(const __m128i *block, const __m128i *block_end, const __m128i *mask_block, Func sse2_func) noexcept

const unsigned set_block_mask

unsigned long long bmi_bslr_u64(unsigned long long w) noexcept

unsigned op_and(unsigned a, unsigned b) noexcept

const unsigned set_word_shift

unsigned op_or(unsigned a, unsigned b) noexcept

const unsigned set_block_size

unsigned long long int id64_t

const unsigned block_waves

void sse4_bit_block_gather_scatter(unsigned *arr, const unsigned *blk, const unsigned *idx, unsigned size, unsigned start, unsigned bit_idx) noexcept

unsigned short gap_word_t

unsigned op_xor(unsigned a, unsigned b) noexcept

const unsigned set_block_shift

const unsigned set_word_mask

unsigned long long bmi_blsi_u64(unsigned long long w)

const struct ncbi::grid::netcache::search::fields::SIZE size

const GenericPointer< typename T::ValueType > T2 value

static __m128i _mm_subs_epu16(__m128i a, __m128i b)

static __m128i _mm_setzero_si128()

static int _mm_test_all_ones(__m128i a)

static __m128i _mm_xor_si128(__m128i a, __m128i b)

static int _mm_popcnt_u32(unsigned int a)

#define _mm_srli_epi32(a, imm)

static __m128i _mm_srli_si128(__m128i a, int imm)

static __m128i _mm_slli_epi64(__m128i a, int imm)

static int _mm_movemask_epi8(__m128i a)

static __m128i _mm_slli_si128(__m128i a, int imm)

static __m128i _mm_set1_epi16(short w)

#define _mm_insert_epi32(a, b, imm)

static __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)

static __m128i _mm_slli_epi16(__m128i a, int imm)

static int _mm_test_all_zeros(__m128i a, __m128i mask)

static __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)

static __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)

static __m128i _mm_slli_epi32(__m128i a, int imm)

static __m128i _mm_loadu_si128(const __m128i *p)

static void _mm_store_si128(__m128i *p, __m128i a)

static __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)

static void _mm_prefetch(const void *p, int i)

static __m128i _mm_load_si128(const __m128i *p)

static __m128i _mm_or_si128(__m128i, __m128i)

static int _mm_testz_si128(__m128i a, __m128i b)

static __m128i _mm_set_epi32(int, int, int, int)

static __m128i _mm_sub_epi32(__m128i a, __m128i b)

static __m128i _mm_set1_epi32(int)

static int64_t _mm_popcnt_u64(uint64_t a)

static __m128i _mm_andnot_si128(__m128i a, __m128i b)

static void _mm_storeu_si128(__m128i *p, __m128i a)

#define _mm_extract_epi32(a, imm)

#define _mm_shuffle_epi32(a, imm)

#define _mm_extract_epi64(a, imm)

static __m128i _mm_and_si128(__m128i, __m128i)

static __m128i _mm_cmpeq_epi32(__m128i, __m128i)

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4