A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://clang.llvm.org/doxygen/____clang__cuda__device__functions_8h_source.html below:

clang: lib/Headers/__clang_cuda_device_functions.h Source File

10#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__ 11#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__ 13#ifndef __OPENMP_NVPTX__ 14#if CUDA_VERSION < 9000 15#error This file is intended to be used with CUDA-9+ only. 22#pragma push_macro("__DEVICE__"

)

23#ifdef __OPENMP_NVPTX__ 24#define __DEVICE__ static __attribute__((always_inline, nothrow)) 26#define __DEVICE__ static __device__ __forceinline__ 36#if defined(__cplusplus) 37__DEVICE__ void

__brkpt() { __asm__ __volatile__(

"brkpt;"

); }

41

__asm__ __volatile__(

"brkpt;"

);

53 return

__nvvm_atom_add_gen_d(

__p

,

__v

);

56 return

__nvvm_atom_cta_add_gen_d(

__p

,

__v

);

59 return

__nvvm_atom_sys_add_gen_d(

__p

,

__v

);

177 return

__nvvm_atom_add_gen_f(

__p

,

__v

);

180 return

__nvvm_atom_cta_add_gen_f(

__p

,

__v

);

183 return

__nvvm_atom_sys_add_gen_f(

__p

,

__v

);

341 return

__nvvm_atom_add_gen_i(

__p

,

__v

);

344 return

__nvvm_atom_cta_add_gen_i(

__p

,

__v

);

347 return

__nvvm_atom_sys_add_gen_i(

__p

,

__v

);

350 return

__nvvm_atom_and_gen_i(

__p

,

__v

);

353 return

__nvvm_atom_cta_and_gen_i(

__p

,

__v

);

356 return

__nvvm_atom_sys_and_gen_i(

__p

,

__v

);

359 return

__nvvm_atom_cas_gen_i(

__p

, __cmp,

__v

);

362 return

__nvvm_atom_cta_cas_gen_i(

__p

, __cmp,

__v

);

365 return

__nvvm_atom_sys_cas_gen_i(

__p

, __cmp,

__v

);

368 return

__nvvm_atom_xchg_gen_i(

__p

,

__v

);

371 return

__nvvm_atom_cta_xchg_gen_i(

__p

,

__v

);

374 return

__nvvm_atom_sys_xchg_gen_i(

__p

,

__v

);

377 return

__nvvm_atom_max_gen_i(

__p

,

__v

);

380 return

__nvvm_atom_cta_max_gen_i(

__p

,

__v

);

383 return

__nvvm_atom_sys_max_gen_i(

__p

,

__v

);

386 return

__nvvm_atom_min_gen_i(

__p

,

__v

);

389 return

__nvvm_atom_cta_min_gen_i(

__p

,

__v

);

392 return

__nvvm_atom_sys_min_gen_i(

__p

,

__v

);

395 return

__nvvm_atom_or_gen_i(

__p

,

__v

);

398 return

__nvvm_atom_cta_or_gen_i(

__p

,

__v

);

401 return

__nvvm_atom_sys_or_gen_i(

__p

,

__v

);

404 return

__nvvm_atom_xor_gen_i(

__p

,

__v

);

407 return

__nvvm_atom_cta_xor_gen_i(

__p

,

__v

);

410 return

__nvvm_atom_sys_xor_gen_i(

__p

,

__v

);

413 return

__nvvm_atom_max_gen_ll(

__p

,

__v

);

416 return

__nvvm_atom_cta_max_gen_ll(

__p

,

__v

);

419 return

__nvvm_atom_sys_max_gen_ll(

__p

,

__v

);

422 return

__nvvm_atom_min_gen_ll(

__p

,

__v

);

425 return

__nvvm_atom_cta_min_gen_ll(

__p

,

__v

);

428 return

__nvvm_atom_sys_min_gen_ll(

__p

,

__v

);

464 return

__nvvm_atom_and_gen_ll(

__p

,

__v

);

467 return

__nvvm_atom_cta_and_gen_ll(

__p

,

__v

);

470 return

__nvvm_atom_sys_and_gen_ll(

__p

,

__v

);

473 return

__nvvm_atom_or_gen_ll(

__p

,

__v

);

476 return

__nvvm_atom_cta_or_gen_ll(

__p

,

__v

);

479 return

__nvvm_atom_sys_or_gen_ll(

__p

,

__v

);

482 return

__nvvm_atom_xor_gen_ll(

__p

,

__v

);

485 return

__nvvm_atom_cta_xor_gen_ll(

__p

,

__v

);

488 return

__nvvm_atom_sys_xor_gen_ll(

__p

,

__v

);

512#define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;"

::"i"(__a))

534 return

__nvvm_atom_cas_gen_us(

__p

, __cmp,

__v

);

537 unsigned short

__cmp,

538 unsigned short __v

) {

539 return

__nvvm_atom_cta_cas_gen_us(

__p

, __cmp,

__v

);

542 unsigned short

__cmp,

543 unsigned short __v

) {

544 return

__nvvm_atom_sys_cas_gen_us(

__p

, __cmp,

__v

);

547 return

__nvvm_atom_add_gen_i((

int

*)

__p

,

__v

);

551 return

__nvvm_atom_cta_add_gen_i((

int

*)

__p

,

__v

);

555 return

__nvvm_atom_sys_add_gen_i((

int

*)

__p

,

__v

);

558 return

__nvvm_atom_and_gen_i((

int

*)

__p

,

__v

);

562 return

__nvvm_atom_cta_and_gen_i((

int

*)

__p

,

__v

);

566 return

__nvvm_atom_sys_and_gen_i((

int

*)

__p

,

__v

);

570 return

__nvvm_atom_cas_gen_i((

int

*)

__p

, __cmp,

__v

);

574 return

__nvvm_atom_cta_cas_gen_i((

int

*)

__p

, __cmp,

__v

);

578 return

__nvvm_atom_sys_cas_gen_i((

int

*)

__p

, __cmp,

__v

);

581 return

__nvvm_atom_dec_gen_ui(

__p

,

__v

);

585 return

__nvvm_atom_cta_dec_gen_ui(

__p

,

__v

);

589 return

__nvvm_atom_sys_dec_gen_ui(

__p

,

__v

);

592 return

__nvvm_atom_xchg_gen_i((

int

*)

__p

,

__v

);

596 return

__nvvm_atom_cta_xchg_gen_i((

int

*)

__p

,

__v

);

600 return

__nvvm_atom_sys_xchg_gen_i((

int

*)

__p

,

__v

);

603 return

__nvvm_atom_inc_gen_ui(

__p

,

__v

);

607 return

__nvvm_atom_cta_inc_gen_ui(

__p

,

__v

);

611 return

__nvvm_atom_sys_inc_gen_ui(

__p

,

__v

);

614 return

__nvvm_atom_max_gen_ui(

__p

,

__v

);

618 return

__nvvm_atom_cta_max_gen_ui(

__p

,

__v

);

622 return

__nvvm_atom_sys_max_gen_ui(

__p

,

__v

);

625 return

__nvvm_atom_min_gen_ui(

__p

,

__v

);

629 return

__nvvm_atom_cta_min_gen_ui(

__p

,

__v

);

633 return

__nvvm_atom_sys_min_gen_ui(

__p

,

__v

);

636 return

__nvvm_atom_or_gen_i((

int

*)

__p

,

__v

);

639 return

__nvvm_atom_cta_or_gen_i((

int

*)

__p

,

__v

);

643 return

__nvvm_atom_sys_or_gen_i((

int

*)

__p

,

__v

);

646 return

__nvvm_atom_xor_gen_i((

int

*)

__p

,

__v

);

650 return

__nvvm_atom_cta_xor_gen_i((

int

*)

__p

,

__v

);

654 return

__nvvm_atom_sys_xor_gen_i((

int

*)

__p

,

__v

);

702 unsigned long long __v

) {

703 return

__nvvm_atom_add_gen_ll((

long long

*)

__p

,

__v

);

706 unsigned long long __v

) {

707 return

__nvvm_atom_cta_add_gen_ll((

long long

*)

__p

,

__v

);

710 unsigned long long __v

) {

711 return

__nvvm_atom_sys_add_gen_ll((

long long

*)

__p

,

__v

);

714 unsigned long long __v

) {

715 return

__nvvm_atom_and_gen_ll((

long long

*)

__p

,

__v

);

718 unsigned long long __v

) {

719 return

__nvvm_atom_cta_and_gen_ll((

long long

*)

__p

,

__v

);

722 unsigned long long __v

) {

723 return

__nvvm_atom_sys_and_gen_ll((

long long

*)

__p

,

__v

);

726 unsigned long long

__cmp,

727 unsigned long long __v

) {

728 return

__nvvm_atom_cas_gen_ll((

long long

*)

__p

, __cmp,

__v

);

731 unsigned long long

__cmp,

732 unsigned long long __v

) {

733 return

__nvvm_atom_cta_cas_gen_ll((

long long

*)

__p

, __cmp,

__v

);

736 unsigned long long

__cmp,

737 unsigned long long __v

) {

738 return

__nvvm_atom_sys_cas_gen_ll((

long long

*)

__p

, __cmp,

__v

);

741 unsigned long long __v

) {

742 return

__nvvm_atom_xchg_gen_ll((

long long

*)

__p

,

__v

);

745 unsigned long long __v

) {

746 return

__nvvm_atom_cta_xchg_gen_ll((

long long

*)

__p

,

__v

);

749 unsigned long long __v

) {

750 return

__nvvm_atom_sys_xchg_gen_ll((

long long

*)

__p

,

__v

);

753 unsigned long long __v

) {

754 return

__nvvm_atom_max_gen_ull(

__p

,

__v

);

757 unsigned long long __v

) {

758 return

__nvvm_atom_cta_max_gen_ull(

__p

,

__v

);

761 unsigned long long __v

) {

762 return

__nvvm_atom_sys_max_gen_ull(

__p

,

__v

);

765 unsigned long long __v

) {

766 return

__nvvm_atom_min_gen_ull(

__p

,

__v

);

769 unsigned long long __v

) {

770 return

__nvvm_atom_cta_min_gen_ull(

__p

,

__v

);

773 unsigned long long __v

) {

774 return

__nvvm_atom_sys_min_gen_ull(

__p

,

__v

);

777 unsigned long long __v

) {

778 return

__nvvm_atom_or_gen_ll((

long long

*)

__p

,

__v

);

781 unsigned long long __v

) {

782 return

__nvvm_atom_cta_or_gen_ll((

long long

*)

__p

,

__v

);

785 unsigned long long __v

) {

786 return

__nvvm_atom_sys_or_gen_ll((

long long

*)

__p

,

__v

);

789 unsigned long long __v

) {

790 return

__nvvm_atom_xor_gen_ll((

long long

*)

__p

,

__v

);

793 unsigned long long __v

) {

794 return

__nvvm_atom_cta_xor_gen_ll((

long long

*)

__p

,

__v

);

797 unsigned long long __v

) {

798 return

__nvvm_atom_sys_xor_gen_ll((

long long

*)

__p

,

__v

);

804 unsigned long long __b

) {

818#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 822 return

__nv_vabsdiffs2(

__a

,

__b

);

825 return

__nv_vabsdiffs4(

__a

,

__b

);

828 return

__nv_vabsdiffu2(

__a

,

__b

);

831 return

__nv_vabsdiffu4(

__a

,

__b

);

834 return

__nv_vabsss2(

__a

);

837 return

__nv_vabsss4(

__a

);

840 return

__nv_vadd2(

__a

,

__b

);

843 return

__nv_vadd4(

__a

,

__b

);

846 return

__nv_vaddss2(

__a

,

__b

);

849 return

__nv_vaddss4(

__a

,

__b

);

852 return

__nv_vaddus2(

__a

,

__b

);

855 return

__nv_vaddus4(

__a

,

__b

);

858 return

__nv_vavgs2(

__a

,

__b

);

861 return

__nv_vavgs4(

__a

,

__b

);

864 return

__nv_vavgu2(

__a

,

__b

);

867 return

__nv_vavgu4(

__a

,

__b

);

870 return

__nv_vcmpeq2(

__a

,

__b

);

873 return

__nv_vcmpeq4(

__a

,

__b

);

876 return

__nv_vcmpges2(

__a

,

__b

);

879 return

__nv_vcmpges4(

__a

,

__b

);

882 return

__nv_vcmpgeu2(

__a

,

__b

);

885 return

__nv_vcmpgeu4(

__a

,

__b

);

888 return

__nv_vcmpgts2(

__a

,

__b

);

891 return

__nv_vcmpgts4(

__a

,

__b

);

894 return

__nv_vcmpgtu2(

__a

,

__b

);

897 return

__nv_vcmpgtu4(

__a

,

__b

);

900 return

__nv_vcmples2(

__a

,

__b

);

903 return

__nv_vcmples4(

__a

,

__b

);

906 return

__nv_vcmpleu2(

__a

,

__b

);

909 return

__nv_vcmpleu4(

__a

,

__b

);

912 return

__nv_vcmplts2(

__a

,

__b

);

915 return

__nv_vcmplts4(

__a

,

__b

);

918 return

__nv_vcmpltu2(

__a

,

__b

);

921 return

__nv_vcmpltu4(

__a

,

__b

);

924 return

__nv_vcmpne2(

__a

,

__b

);

927 return

__nv_vcmpne4(

__a

,

__b

);

930 return

__nv_vhaddu2(

__a

,

__b

);

933 return

__nv_vhaddu4(

__a

,

__b

);

936 return

__nv_vmaxs2(

__a

,

__b

);

939 return

__nv_vmaxs4(

__a

,

__b

);

942 return

__nv_vmaxu2(

__a

,

__b

);

945 return

__nv_vmaxu4(

__a

,

__b

);

948 return

__nv_vmins2(

__a

,

__b

);

951 return

__nv_vmins4(

__a

,

__b

);

954 return

__nv_vminu2(

__a

,

__b

);

957 return

__nv_vminu4(

__a

,

__b

);

962 return

__nv_vnegss2(

__a

);

965 return

__nv_vnegss4(

__a

);

968 return

__nv_vsads2(

__a

,

__b

);

971 return

__nv_vsads4(

__a

,

__b

);

974 return

__nv_vsadu2(

__a

,

__b

);

977 return

__nv_vsadu4(

__a

,

__b

);

980 return

__nv_vseteq2(

__a

,

__b

);

983 return

__nv_vseteq4(

__a

,

__b

);

986 return

__nv_vsetges2(

__a

,

__b

);

989 return

__nv_vsetges4(

__a

,

__b

);

992 return

__nv_vsetgeu2(

__a

,

__b

);

995 return

__nv_vsetgeu4(

__a

,

__b

);

998 return

__nv_vsetgts2(

__a

,

__b

);

1001 return

__nv_vsetgts4(

__a

,

__b

);

1004 return

__nv_vsetgtu2(

__a

,

__b

);

1007 return

__nv_vsetgtu4(

__a

,

__b

);

1010 return

__nv_vsetles2(

__a

,

__b

);

1013 return

__nv_vsetles4(

__a

,

__b

);

1016 return

__nv_vsetleu2(

__a

,

__b

);

1019 return

__nv_vsetleu4(

__a

,

__b

);

1022 return

__nv_vsetlts2(

__a

,

__b

);

1025 return

__nv_vsetlts4(

__a

,

__b

);

1028 return

__nv_vsetltu2(

__a

,

__b

);

1031 return

__nv_vsetltu4(

__a

,

__b

);

1034 return

__nv_vsetne2(

__a

,

__b

);

1037 return

__nv_vsetne4(

__a

,

__b

);

1040 return

__nv_vsub2(

__a

,

__b

);

1043 return

__nv_vsub4(

__a

,

__b

);

1046 return

__nv_vsubss2(

__a

,

__b

);

1049 return

__nv_vsubss4(

__a

,

__b

);

1052 return

__nv_vsubus2(

__a

,

__b

);

1055 return

__nv_vsubus4(

__a

,

__b

);

1066 return

(

__a

<< shift) -

__a

;

1070

__asm__(

"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" 1072

:

"r"

(

__a

),

"r"

(0),

"r"

(0));

1077

__asm__(

"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" 1079

:

"r"

(

__a

),

"r"

(0),

"r"

(0));

1084

__asm__(

"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" 1086

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1092

__asm__(

"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" 1094

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1099

__asm__(

"vabsdiff2.u32.u32.u32 %0,%1,%2,%3;" 1101

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1106

__asm__(

"vabsdiff4.u32.u32.u32 %0,%1,%2,%3;" 1108

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1113

__asm__(

"vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;" 1115

:

"r"

(

__a

),

"r"

(0),

"r"

(0));

1120

__asm__(

"vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;" 1122

:

"r"

(

__a

),

"r"

(0),

"r"

(0));

1127

__asm__(

"vadd2.u32.u32.u32 %0,%1,%2,%3;" 1129

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1134

__asm__(

"vadd4.u32.u32.u32 %0,%1,%2,%3;" 1136

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1141

__asm__(

"vadd2.s32.s32.s32.sat %0,%1,%2,%3;" 1143

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1148

__asm__(

"vadd4.s32.s32.s32.sat %0,%1,%2,%3;" 1150

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1155

__asm__(

"vadd2.u32.u32.u32.sat %0,%1,%2,%3;" 1157

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1162

__asm__(

"vadd4.u32.u32.u32.sat %0,%1,%2,%3;" 1164

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1169

__asm__(

"vavrg2.s32.s32.s32 %0,%1,%2,%3;" 1171

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1176

__asm__(

"vavrg4.s32.s32.s32 %0,%1,%2,%3;" 1178

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1183

__asm__(

"vavrg2.u32.u32.u32 %0,%1,%2,%3;" 1185

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1190

__asm__(

"vavrg4.u32.u32.u32 %0,%1,%2,%3;" 1192

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1197

__asm__(

"vset2.u32.u32.eq %0,%1,%2,%3;" 1199

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1207

__asm__(

"vset4.u32.u32.eq %0,%1,%2,%3;" 1209

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1217

__asm__(

"vset2.s32.s32.ge %0,%1,%2,%3;" 1219

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1227

__asm__(

"vset4.s32.s32.ge %0,%1,%2,%3;" 1229

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1237

__asm__(

"vset2.u32.u32.ge %0,%1,%2,%3;" 1239

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1247

__asm__(

"vset4.u32.u32.ge %0,%1,%2,%3;" 1249

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1257

__asm__(

"vset2.s32.s32.gt %0,%1,%2,%3;" 1259

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1267

__asm__(

"vset4.s32.s32.gt %0,%1,%2,%3;" 1269

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1277

__asm__(

"vset2.u32.u32.gt %0,%1,%2,%3;" 1279

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1287

__asm__(

"vset4.u32.u32.gt %0,%1,%2,%3;" 1289

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1297

__asm__(

"vset2.s32.s32.le %0,%1,%2,%3;" 1299

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1307

__asm__(

"vset4.s32.s32.le %0,%1,%2,%3;" 1309

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1317

__asm__(

"vset2.u32.u32.le %0,%1,%2,%3;" 1319

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1327

__asm__(

"vset4.u32.u32.le %0,%1,%2,%3;" 1329

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1337

__asm__(

"vset2.s32.s32.lt %0,%1,%2,%3;" 1339

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1347

__asm__(

"vset4.s32.s32.lt %0,%1,%2,%3;" 1349

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1357

__asm__(

"vset2.u32.u32.lt %0,%1,%2,%3;" 1359

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1367

__asm__(

"vset4.u32.u32.lt %0,%1,%2,%3;" 1369

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1377

__asm__(

"vset2.u32.u32.ne %0,%1,%2,%3;" 1379

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1387

__asm__(

"vset4.u32.u32.ne %0,%1,%2,%3;" 1389

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1402 return

(((

__a

^

__b

) >> 1) & ~0x80008000u) + (

__a

&

__b

);

1405 return

(((

__a

^

__b

) >> 1) & ~0x80808080u) + (

__a

&

__b

);

1410 if

((

__a

& 0x8000) && (

__b

& 0x8000)) {

1414

r = (

__a

& mask) | (

__b

& ~mask);

1416

__asm__(

"vmax2.s32.s32.s32 %0,%1,%2,%3;" 1418

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1424

__asm__(

"vmax4.s32.s32.s32 %0,%1,%2,%3;" 1426

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1431

__asm__(

"vmax2.u32.u32.u32 %0,%1,%2,%3;" 1433

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1438

__asm__(

"vmax4.u32.u32.u32 %0,%1,%2,%3;" 1440

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1445

__asm__(

"vmin2.s32.s32.s32 %0,%1,%2,%3;" 1447

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1452

__asm__(

"vmin4.s32.s32.s32 %0,%1,%2,%3;" 1454

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1459

__asm__(

"vmin2.u32.u32.u32 %0,%1,%2,%3;" 1461

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1466

__asm__(

"vmin4.u32.u32.u32 %0,%1,%2,%3;" 1468

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1473

__asm__(

"vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;" 1475

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1480

__asm__(

"vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;" 1482

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1487

__asm__(

"vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;" 1489

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1494

__asm__(

"vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;" 1496

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1502

__asm__(

"vsub2.u32.u32.u32 %0,%1,%2,%3;" 1504

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1511

__asm__(

"vsub4.u32.u32.u32 %0,%1,%2,%3;" 1513

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1519

__asm__(

"vsub2.s32.s32.s32.sat %0,%1,%2,%3;" 1521

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1529

__asm__(

"vsub4.s32.s32.s32.sat %0,%1,%2,%3;" 1531

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1539

__asm__(

"vsub2.u32.u32.u32.sat %0,%1,%2,%3;" 1541

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1546

__asm__(

"vsub4.u32.u32.u32.sat %0,%1,%2,%3;" 1548

:

"r"

(

__a

),

"r"

(

__b

),

"r"

(0));

1555#ifndef __OPENMP_NVPTX__ 1562#ifndef __OPENMP_NVPTX__ 1564 return

__builtin_memcpy(

__a

,

__b

,

__c

);

1567 return

__builtin_memset(

__a

,

__b

,

__c

);

1571#pragma pop_macro("__DEVICE__"

)

__DEVICE__ int __iAtomicExch_block(int *__p, int __v)

__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned long long __umul64hi(unsigned long long __a, unsigned long long __b)

__DEVICE__ unsigned short __usAtomicCAS(unsigned short *__p, unsigned short __cmp, unsigned short __v)

__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v)

__DEVICE__ float __fsqrt_rd(float __a)

__DEVICE__ float __ull2float_rz(unsigned long long __a)

__DEVICE__ float __double2float_rz(double __a)

__DEVICE__ unsigned long long __double2ull_rz(double __a)

__DEVICE__ void __threadfence(void)

__DEVICE__ unsigned int __double2uint_rd(double __a)

__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b, unsigned int __c)

__DEVICE__ unsigned short __usAtomicCAS_block(unsigned short *__p, unsigned short __cmp, unsigned short __v)

__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p, unsigned long long __v)

__DEVICE__ float __ll2float_rn(long long __a)

__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p, unsigned long long __v)

__DEVICE__ float __fdiv_rd(float __a, float __b)

__DEVICE__ void __trap(void)

__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p, unsigned int __v)

__DEVICE__ float __frcp_rn(float __a)

__DEVICE__ double __dAtomicAdd_system(double *__p, double __v)

__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b)

__DEVICE__ float __uint_as_float(unsigned int __a)

__DEVICE__ unsigned long long __float2ull_rz(float __a)

__DEVICE__ double __ll2double_rz(long long __a)

__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __float_as_uint(float __a)

__DEVICE__ long long __float2ll_ru(float __a)

__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p, unsigned long long __v)

__DEVICE__ long long __double2ll_rn(double __a)

__DEVICE__ unsigned int __double2uint_ru(double __a)

__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p, unsigned int __v)

__DEVICE__ double __dsub_ru(double __a, double __b)

__DEVICE__ long long __float2ll_rn(float __a)

__DEVICE__ int __any(int __a)

__DEVICE__ int __isinff(float __a)

__DEVICE__ double __uint2double_rn(unsigned int __a)

__DEVICE__ float __ull2float_rd(unsigned long long __a)

__DEVICE__ long long __float2ll_rd(float __a)

__DEVICE__ int __ffsll(long long __a)

__DEVICE__ double __drcp_ru(double __a)

__DEVICE__ int __popc(unsigned int __a)

__DEVICE__ long long clock64()

__DEVICE__ long long __mul64hi(long long __a, long long __b)

__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b)

__DEVICE__ int __iAtomicAdd(int *__p, int __v)

__DEVICE__ long long __double_as_longlong(double __a)

__DEVICE__ double __ddiv_rn(double __a, double __b)

__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b)

__DEVICE__ int __mul24(int __a, int __b)

__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b)

__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v)

__DEVICE__ unsigned long long __double2ull_rd(double __a)

__DEVICE__ unsigned int __vabsss2(unsigned int __a)

__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b)

__DEVICE__ int __iAtomicMax(int *__p, int __v)

__DEVICE__ unsigned int __float2uint_rn(float __a)

__DEVICE__ float __uint2float_ru(unsigned int __a)

__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp, unsigned int __v)

__DEVICE__ float __uint2float_rz(unsigned int __a)

__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b)

__DEVICE__ float __frcp_rz(float __a)

__DEVICE__ int __float2int_rn(float __a)

__DEVICE__ float __fmul_ru(float __a, float __b)

__DEVICE__ double __dsub_rd(double __a, double __b)

__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p, unsigned int __v)

__DEVICE__ float __int2float_rz(int __a)

__DEVICE__ int __iAtomicExch_system(int *__p, int __v)

__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b)

__DEVICE__ int __double2int_rd(double __a)

__DEVICE__ void __threadfence_block(void)

__DEVICE__ int __isnan(double __a)

__DEVICE__ int __isinf(double __a)

__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v)

__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v)

__DEVICE__ double __ull2double_rn(unsigned long long __a)

__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b)

__DEVICE__ float __frcp_ru(float __a)

__DEVICE__ double __dsqrt_rn(double __a)

__DEVICE__ float __double2float_rn(double __a)

__DEVICE__ float __frcp_rd(float __a)

__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b)

__DEVICE__ int __clzll(long long __a)

__DEVICE__ double __dmul_ru(double __a, double __b)

__DEVICE__ float __logf(float __a)

__DEVICE__ float __fmaf_ru(float __a, float __b, float __c)

__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b)

__DEVICE__ int __rhadd(int __a, int __b)

__DEVICE__ float __saturatef(float __a)

__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p, unsigned long long __v)

__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v)

__DEVICE__ int __mulhi(int __a, int __b)

__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __pm1(void)

__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c)

__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b)

__DEVICE__ int __iAtomicAnd_system(int *__p, int __v)

__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v)

__DEVICE__ double __fma_rz(double __a, double __b, double __c)

__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c)

__DEVICE__ int __double2int_rz(double __a)

__DEVICE__ float __powf(float __a, float __b)

__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b)

__DEVICE__ double __fma_rd(double __a, double __b, double __c)

__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v)

__DEVICE__ int __double2loint(double __a)

__DEVICE__ double __dmul_rd(double __a, double __b)

__DEVICE__ unsigned long long __brevll(unsigned long long __a)

__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __ballot(int __a)

__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __vabs4(unsigned int __a)

__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b)

__DEVICE__ int __iAtomicAdd_block(int *__p, int __v)

__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b)

__DEVICE__ float __log10f(float __a)

__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p, unsigned long long __v)

__DEVICE__ double __ddiv_ru(double __a, double __b)

__DEVICE__ int __syncthreads_and(int __a)

__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b)

__DEVICE__ double __ddiv_rd(double __a, double __b)

__DEVICE__ int __iAtomicOr_system(int *__p, int __v)

__DEVICE__ unsigned long long __float2ull_rd(float __a)

__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b)

__DEVICE__ double __dadd_ru(double __a, double __b)

__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v)

__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b)

__DEVICE__ float __fmul_rd(float __a, float __b)

__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v)

__DEVICE__ double __dAtomicAdd(double *__p, double __v)

__DEVICE__ unsigned long long __float2ull_rn(float __a)

__DEVICE__ float __fsub_rd(float __a, float __b)

__DEVICE__ double __dsub_rn(double __a, double __b)

__DEVICE__ float __expf(float __a)

__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v)

__DEVICE__ unsigned long long __double2ull_ru(double __a)

__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b)

__DEVICE__ long long __illAtomicMax(long long *__p, long long __v)

__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b)

__DEVICE__ long long __llAtomicXor(long long *__p, long long __v)

__DEVICE__ double __int2double_rn(int __a)

__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p, unsigned long long __v)

__DEVICE__ float __fAtomicAdd(float *__p, float __v)

__DEVICE__ double __hiloint2double(int __a, int __b)

__DEVICE__ unsigned int __pm0(void)

__DEVICE__ unsigned int __vabsss4(unsigned int __a)

__DEVICE__ unsigned int __vabs2(unsigned int __a)

__DEVICE__ unsigned int __brev(unsigned int __a)

__DEVICE__ float __cosf(float __a)

__DEVICE__ float __ull2float_ru(unsigned long long __a)

__DEVICE__ float __double2float_ru(double __a)

__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b)

__DEVICE__ double __dmul_rn(double __a, double __b)

__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p, unsigned int __v)

__DEVICE__ float __fAtomicAdd_block(float *__p, float __v)

__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p, unsigned long long __v)

__DEVICE__ float __fsub_rz(float __a, float __b)

__DEVICE__ float __uint2float_rd(unsigned int __a)

__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v)

__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b)

__DEVICE__ double __fma_ru(double __a, double __b, double __c)

__DEVICE__ int __float_as_int(float __a)

__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b)

__DEVICE__ int __ffs(int __a)

__DEVICE__ double __dsqrt_ru(double __a)

__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v)

__DEVICE__ float __fsqrt_rz(float __a)

__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)

__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v)

__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p, unsigned long long __v)

__DEVICE__ double __dsub_rz(double __a, double __b)

__DEVICE__ double __ull2double_ru(unsigned long long __a)

__DEVICE__ float __fAtomicExch(float *__p, float __v)

__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v)

__DEVICE__ float __uint2float_rn(unsigned int __a)

__DEVICE__ int __iAtomicAnd_block(int *__p, int __v)

__DEVICE__ void __threadfence_system(void)

__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b)

__DEVICE__ int __signbitf(float __a)

__DEVICE__ float __fadd_rd(float __a, float __b)

__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b)

__DEVICE__ float __fmul_rz(float __a, float __b)

__DEVICE__ float __fmul_rn(float __a, float __b)

__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b)

__DEVICE__ int __hadd(int __a, int __b)

__DEVICE__ float __fadd_rz(float __a, float __b)

__DEVICE__ int __finite(double __a)

__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b)

__DEVICE__ double __fma_rn(double __a, double __b, double __c)

__DEVICE__ long long __float2ll_rz(float __a)

__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b)

__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c)

__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p, unsigned int __v)

__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v)

__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b)

__DEVICE__ int __float2int_ru(float __a)

__DEVICE__ float __int2float_rd(int __a)

__DEVICE__ float __fdiv_rn(float __a, float __b)

__DEVICE__ unsigned int __double2uint_rn(double __a)

__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __float2uint_rd(float __a)

__DEVICE__ int __float2int_rz(float __a)

__DEVICE__ float __fmaf_rn(float __a, float __b, float __c)

__DEVICE__ int __iAtomicMin(int *__p, int __v)

__DEVICE__ int __iAtomicAdd_system(int *__p, int __v)

__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b)

__DEVICE__ float __ll2float_rz(long long __a)

__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vneg4(unsigned int __a)

__DEVICE__ int __float2int_rd(float __a)

__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p, unsigned int __v)

__DEVICE__ float __fsqrt_rn(float __a)

__DEVICE__ double __dsqrt_rd(double __a)

__DEVICE__ int __iAtomicMax_system(int *__p, int __v)

__DEVICE__ unsigned long long __float2ull_ru(float __a)

__DEVICE__ float __fAtomicExch_block(float *__p, float __v)

__DEVICE__ int __iAtomicExch(int *__p, int __v)

__DEVICE__ float __int2float_ru(int __a)

__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v)

__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p, unsigned long long __v)

__DEVICE__ double __drcp_rn(double __a)

__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b)

__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v)

__DEVICE__ int __clz(int __a)

__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b)

__DEVICE__ float __fmaf_rd(float __a, float __b, float __c)

__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v)

__DEVICE__ double __dadd_rd(double __a, double __b)

__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p, unsigned long long __v)

__DEVICE__ double __dsqrt_rz(double __a)

__DEVICE__ double __dadd_rn(double __a, double __b)

__DEVICE__ unsigned int __float2uint_rz(float __a)

__DEVICE__ long long __double2ll_ru(double __a)

__DEVICE__ int __iAtomicMax_block(int *__p, int __v)

__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c)

__DEVICE__ float __ll2float_ru(long long __a)

__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b)

__DEVICE__ int __iAtomicOr_block(int *__p, int __v)

__DEVICE__ float __frsqrt_rn(float __a)

__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b)

__DEVICE__ double __ll2double_ru(long long __a)

__DEVICE__ unsigned int __vneg2(unsigned int __a)

__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p, unsigned int __v)

__DEVICE__ int __iAtomicMin_system(int *__p, int __v)

__DEVICE__ double __drcp_rd(double __a)

__DEVICE__ float __exp10f(float __a)

__DEVICE__ double __longlong_as_double(long long __a)

__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v)

__DEVICE__ float __int2float_rn(int __a)

__DEVICE__ long long __double2ll_rd(double __a)

__DEVICE__ int __double2int_ru(double __a)

__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b)

__DEVICE__ int __all(int __a)

__DEVICE__ unsigned int __vnegss2(unsigned int __a)

__DEVICE__ float __tanf(float __a)

__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p, unsigned int __v)

__DEVICE__ float __double2float_rd(double __a)

__DEVICE__ unsigned long long __double2ull_rn(double __a)

__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v)

__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v)

__DEVICE__ unsigned short __usAtomicCAS_system(unsigned short *__p, unsigned short __cmp, unsigned short __v)

__DEVICE__ float __fdiv_rz(float __a, float __b)

__DEVICE__ float __fmaf_rz(float __a, float __b, float __c)

__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b)

__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v)

__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b)

__DEVICE__ int __iAtomicXor_block(int *__p, int __v)

__DEVICE__ int __finitef(float __a)

__DEVICE__ unsigned int __vnegss4(unsigned int __a)

__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b)

__DEVICE__ float __ull2float_rn(unsigned long long __a)

__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p, unsigned long long __v)

__DEVICE__ double __drcp_rz(double __a)

__DEVICE__ float __sinf(float __a)

__DEVICE__ float __fsub_ru(float __a, float __b)

__DEVICE__ float __fAtomicAdd_system(float *__p, float __v)

__DEVICE__ int __double2int_rn(double __a)

__DEVICE__ int __iAtomicOr(int *__p, int __v)

__DEVICE__ void * memset(void *__a, int __b, size_t __c)

__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b)

__DEVICE__ float __fAtomicExch_system(float *__p, float __v)

__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c)

__DEVICE__ float __int_as_float(int __a)

__DEVICE__ int __syncthreads_or(int __a)

__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b)

__DEVICE__ int __iAtomicMin_block(int *__p, int __v)

__DEVICE__ double __ull2double_rd(unsigned long long __a)

__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b)

__DEVICE__ float __fdividef(float __a, float __b)

__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v)

__DEVICE__ int __syncthreads_count(int __a)

__DEVICE__ long long __llAtomicOr(long long *__p, long long __v)

__DEVICE__ float __ll2float_rd(long long __a)

__DEVICE__ double __dmul_rz(double __a, double __b)

__DEVICE__ float __log2f(float __a)

__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p, unsigned int __v)

__DEVICE__ float __fadd_rn(float __a, float __b)

__DEVICE__ double __ull2double_rz(unsigned long long __a)

__DEVICE__ long long __illAtomicMin(long long *__p, long long __v)

__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p, unsigned long long __v)

__DEVICE__ float __fsqrt_ru(float __a)

__DEVICE__ long long __double2ll_rz(double __a)

__DEVICE__ int __signbitd(double __a)

__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift)

__DEVICE__ unsigned int __double2uint_rz(double __a)

__DEVICE__ unsigned int __uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v)

__DEVICE__ double __dAtomicAdd_block(double *__p, double __v)

__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b)

__DEVICE__ int __double2hiint(double __a)

__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b)

__DEVICE__ int __isnanf(float __a)

__DEVICE__ unsigned int __pm3(void)

__DEVICE__ double __ll2double_rd(long long __a)

__DEVICE__ float __fadd_ru(float __a, float __b)

__DEVICE__ float __fsub_rn(float __a, float __b)

__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v)

__DEVICE__ int __iAtomicXor_system(int *__p, int __v)

__DEVICE__ int __iAtomicAnd(int *__p, int __v)

__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b)

__DEVICE__ float __fdiv_ru(float __a, float __b)

__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b)

__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b)

__DEVICE__ double __ddiv_rz(double __a, double __b)

__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p, unsigned long long __v)

__DEVICE__ unsigned int __float2uint_ru(float __a)

__DEVICE__ double __dadd_rz(double __a, double __b)

__DEVICE__ int __isfinited(double __a)

__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p, unsigned int __v)

__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b, unsigned int __c)

__DEVICE__ void __sincosf(float __a, float *__s, float *__c)

__DEVICE__ double __ll2double_rn(long long __a)

__DEVICE__ unsigned int __pm2(void)

__DEVICE__ int __popcll(unsigned long long __a)

__DEVICE__ int __iAtomicXor(int *__p, int __v)

__DEVICE__ float __nv_fsub_ru(float __a, float __b)

__DEVICE__ unsigned int __nv_double2uint_ru(double __a)

__DEVICE__ float __nv_int2float_rd(int __a)

__DEVICE__ float __nv_frcp_rz(float __a)

__DEVICE__ float __nv_fsqrt_rd(float __a)

__DEVICE__ double __nv_dsub_ru(double __a, double __b)

__DEVICE__ float __nv_fadd_rn(float __a, float __b)

__DEVICE__ unsigned long long __nv_double2ull_rn(double __a)

__DEVICE__ double __nv_uint2double_rn(unsigned int __i)

__DEVICE__ int __nv_rhadd(int __a, int __b)

__DEVICE__ int __nv_float2int_rn(float __a)

__DEVICE__ float __nv_fast_cosf(float __a)

__DEVICE__ long long __nv_double2ll_rd(double __a)

__DEVICE__ float __nv_frcp_rn(float __a)

__DEVICE__ long long __nv_double2ll_ru(double __a)

__DEVICE__ double __nv_fma_rz(double __a, double __b, double __c)

__DEVICE__ unsigned int __nv_float2uint_rd(float __a)

__DEVICE__ double __nv_dsub_rz(double __a, double __b)

__DEVICE__ float __nv_uint2float_rz(unsigned int __a)

__DEVICE__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c)

__DEVICE__ float __nv_ull2float_rn(unsigned long long __a)

__DEVICE__ double __nv_ddiv_rz(double __a, double __b)

__DEVICE__ float __nv_fsub_rd(float __a, float __b)

__DEVICE__ float __nv_ll2float_rd(long long __a)

__DEVICE__ unsigned int __nv_float_as_uint(float __a)

__DEVICE__ int __nv_ffsll(long long __a)

__DEVICE__ long long __nv_float2ll_ru(float __a)

__DEVICE__ float __nv_fsub_rz(float __a, float __b)

__DEVICE__ double __nv_hiloint2double(int __a, int __b)

__DEVICE__ float __nv_fast_expf(float __a)

__DEVICE__ int __nv_double2int_ru(double __a)

__DEVICE__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b)

__DEVICE__ double __nv_ll2double_rn(long long __a)

__DEVICE__ double __nv_drcp_rz(double __a)

__DEVICE__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c)

__DEVICE__ double __nv_dmul_rd(double __a, double __b)

__DEVICE__ double __nv_ll2double_rd(long long __a)

__DEVICE__ float __nv_uint2float_rn(unsigned int __a)

__DEVICE__ float __nv_double2float_rz(double __a)

__DEVICE__ unsigned long long __nv_float2ull_rn(float __a)

__DEVICE__ float __nv_fdiv_ru(float __a, float __b)

__DEVICE__ long long __nv_mul64hi(long long __a, long long __b)

__DEVICE__ long long __nv_double2ll_rn(double __a)

__DEVICE__ float __nv_fdiv_rd(float __a, float __b)

__DEVICE__ double __nv_longlong_as_double(long long __a)

__DEVICE__ int __nv_isnanf(float __a)

__DEVICE__ double __nv_dmul_ru(double __a, double __b)

__DEVICE__ double __nv_ull2double_rz(unsigned long long __a)

__DEVICE__ int __nv_float_as_int(float __a)

__DEVICE__ float __nv_int2float_ru(int __a)

__DEVICE__ float __nv_fmul_ru(float __a, float __b)

__DEVICE__ double __nv_ll2double_rz(long long __a)

__DEVICE__ unsigned int __nv_double2uint_rd(double __a)

__DEVICE__ float __nv_uint2float_ru(unsigned int __a)

__DEVICE__ long long __nv_float2ll_rd(float __a)

__DEVICE__ double __nv_fma_rn(double __a, double __b, double __c)

__DEVICE__ int __nv_isnand(double __a)

__DEVICE__ int __nv_float2int_ru(float __a)

__DEVICE__ double __nv_ll2double_ru(long long __a)

__DEVICE__ float __nv_fsqrt_rz(float __a)

__DEVICE__ float __nv_fast_fdividef(float __a, float __b)

__DEVICE__ float __nv_fsqrt_ru(float __a)

__DEVICE__ int __nv_isinff(float __a)

__DEVICE__ int __nv_signbitf(float __a)

__DEVICE__ double __nv_int2double_rn(int __a)

__DEVICE__ unsigned long long __nv_float2ull_rz(float __a)

__DEVICE__ float __nv_uint_as_float(unsigned int __a)

__DEVICE__ float __nv_fmul_rn(float __a, float __b)

__DEVICE__ int __nv_double2int_rn(double __a)

__DEVICE__ float __nv_fast_tanf(float __a)

__DEVICE__ int __nv_popcll(unsigned long long __a)

__DEVICE__ unsigned int __nv_float2uint_rn(float __a)

__DEVICE__ long long __nv_float2ll_rn(float __a)

__DEVICE__ int __nv_float2int_rz(float __a)

__DEVICE__ int __nv_uhadd(unsigned int __a, unsigned int __b)

__DEVICE__ long long __nv_double2ll_rz(double __a)

__DEVICE__ double __nv_fma_ru(double __a, double __b, double __c)

__DEVICE__ float __nv_fast_log2f(float __a)

__DEVICE__ int __nv_double2hiint(double __a)

__DEVICE__ float __nv_fmaf_rz(float __a, float __b, float __c)

__DEVICE__ float __nv_int2float_rn(int __a)

__DEVICE__ float __nv_fsqrt_rn(float __a)

__DEVICE__ double __nv_ull2double_rn(unsigned long long __a)

__DEVICE__ double __nv_ddiv_rn(double __a, double __b)

__DEVICE__ float __nv_frcp_ru(float __a)

__DEVICE__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c)

__DEVICE__ float __nv_ll2float_rz(long long __a)

__DEVICE__ int __nv_ffs(int __a)

__DEVICE__ double __nv_dmul_rn(double __a, double __b)

__DEVICE__ float __nv_fdiv_rn(float __a, float __b)

__DEVICE__ int __nv_double2int_rd(double __a)

__DEVICE__ void __nv_fast_sincosf(float __a, float *__s, float *__c)

__DEVICE__ int __nv_float2int_rd(float __a)

__DEVICE__ int __nv_isinfd(double __a)

__DEVICE__ float __nv_fmaf_ru(float __a, float __b, float __c)

__DEVICE__ int __nv_brev(int __a)

__DEVICE__ int __nv_mulhi(int __a, int __b)

__DEVICE__ double __nv_drcp_rn(double __a)

__DEVICE__ unsigned long long __nv_double2ull_rd(double __a)

__DEVICE__ double __nv_dsub_rn(double __a, double __b)

__DEVICE__ unsigned int __nv_float2uint_ru(float __a)

__DEVICE__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b)

__DEVICE__ float __nv_ll2float_ru(long long __a)

__DEVICE__ double __nv_dadd_ru(double __a, double __b)

__DEVICE__ unsigned int __nv_double2uint_rz(double __a)

__DEVICE__ float __nv_fmaf_rn(float __a, float __b, float __c)

__DEVICE__ unsigned int __nv_float2uint_rz(float __a)

__DEVICE__ unsigned int __nv_double2uint_rn(double __a)

__DEVICE__ double __nv_dsqrt_ru(double __a)

__DEVICE__ double __nv_dadd_rn(double __a, double __b)

__DEVICE__ double __nv_ddiv_ru(double __a, double __b)

__DEVICE__ unsigned long long __nv_double2ull_rz(double __a)

__DEVICE__ int __nv_mul24(int __a, int __b)

__DEVICE__ double __nv_drcp_ru(double __a)

__DEVICE__ float __nv_ull2float_rd(unsigned long long __a)

__DEVICE__ int __nv_double2loint(double __a)

__DEVICE__ float __nv_fast_logf(float __a)

__DEVICE__ unsigned long long __nv_float2ull_ru(float __a)

__DEVICE__ float __nv_fmul_rz(float __a, float __b)

__DEVICE__ int __nv_clzll(long long __a)

__DEVICE__ int __nv_popc(unsigned int __a)

__DEVICE__ float __nv_int2float_rz(int __a)

__DEVICE__ unsigned long long __nv_double2ull_ru(double __a)

__DEVICE__ double __nv_dadd_rz(double __a, double __b)

__DEVICE__ int __nv_finitef(float __a)

__DEVICE__ float __nv_saturatef(float __a)

__DEVICE__ float __nv_double2float_ru(double __a)

__DEVICE__ unsigned int __nv_usad(unsigned int __a, unsigned int __b, unsigned int __c)

__DEVICE__ long long __nv_brevll(long long __a)

__DEVICE__ float __nv_fsub_rn(float __a, float __b)

__DEVICE__ int __nv_double2int_rz(double __a)

__DEVICE__ int __nv_hadd(int __a, int __b)

__DEVICE__ float __nv_double2float_rn(double __a)

__DEVICE__ float __nv_fadd_rd(float __a, float __b)

__DEVICE__ double __nv_ull2double_rd(unsigned long long __a)

__DEVICE__ double __nv_dsqrt_rn(double __a)

__DEVICE__ unsigned long long __nv_double_as_longlong(double __a)

__DEVICE__ float __nv_fast_log10f(float __a)

__DEVICE__ int __nv_isfinited(double __a)

__DEVICE__ float __nv_frsqrt_rn(float __a)

__DEVICE__ float __nv_fast_exp10f(float __a)

__DEVICE__ double __nv_dmul_rz(double __a, double __b)

__DEVICE__ double __nv_dsqrt_rd(double __a)

__DEVICE__ float __nv_frcp_rd(float __a)

__DEVICE__ float __nv_double2float_rd(double __a)

__DEVICE__ unsigned long long __nv_float2ull_rd(float __a)

__DEVICE__ float __nv_uint2float_rd(unsigned int __a)

__DEVICE__ long long __nv_float2ll_rz(float __a)

__DEVICE__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c)

__DEVICE__ float __nv_ll2float_rn(long long __a)

__DEVICE__ double __nv_fma_rd(double __a, double __b, double __c)

__DEVICE__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b)

__DEVICE__ float __nv_fmul_rd(float __a, float __b)

__DEVICE__ float __nv_fast_powf(float __a, float __b)

__DEVICE__ float __nv_ull2float_ru(unsigned long long __a)

__DEVICE__ float __nv_fadd_ru(float __a, float __b)

__DEVICE__ int __nv_sad(int __a, int __b, int __c)

__DEVICE__ int __nv_signbitd(double __a)

__DEVICE__ int __nv_clz(int __a)

__DEVICE__ int __nv_byte_perm(int __a, int __b, int __c)

__DEVICE__ double __nv_dsub_rd(double __a, double __b)

__DEVICE__ double __nv_dsqrt_rz(double __a)

__DEVICE__ unsigned long long __nv_umul64hi(unsigned long long __a, unsigned long long __b)

__DEVICE__ float __nv_fadd_rz(float __a, float __b)

__DEVICE__ double __nv_drcp_rd(double __a)

__DEVICE__ double __nv_ull2double_ru(unsigned long long __a)

__DEVICE__ float __nv_int_as_float(int __a)

__DEVICE__ float __nv_fast_sinf(float __a)

__DEVICE__ float __nv_ull2float_rz(unsigned long long __a)

__DEVICE__ float __nv_fdiv_rz(float __a, float __b)

__DEVICE__ double __nv_ddiv_rd(double __a, double __b)

__DEVICE__ double __nv_dadd_rd(double __a, double __b)

__DEVICE__ float __nv_fmaf_rd(float __a, float __b, float __c)

_Float16 __2f16 __attribute__((ext_vector_type(2)))

Zeroes the upper 128 bits (bits 255:128) of all YMM registers.

static __inline__ vector float vector float vector float __c

static __inline__ vector float vector float __b

static __inline__ uint32_t volatile uint32_t * __p

static __inline__ void int __a

struct __storeu_i16 *__P __v


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4