A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/ocaml/ocaml/issues/11934 below:

Crash on parallel usage of Weak HashSet · Issue #11934 · ocaml/ocaml · GitHub

By using the HashSet from the Weak module in parallel we are seeing a crash on OCaml 5 on both the released version and trunk. I'm testing this on Linux, but we have also seen macOS crashes via GitHub actions.

I'm aware this is playing dirty as Weak isn't safe for parallel usage as documented in the interface.
Nevertheless, we would have expected a "memory-safe behaviour" (an exception, a weird OCaml return value, ...) to a crash.

Here's a stand alone test case:

module WHS = Weak.Make(String)
let weak_size = 16
let init_sut () = WHS.create weak_size

let interp_sut_res sut cs = List.map (fun c -> c sut) cs

let agree_prop_par () =
  let sut = init_sut () in
  let pref_obs = interp_sut_res sut [(fun t -> WHS.add t "=\0059\184");
                                     (fun t -> WHS.remove t "=\0059\184")] in
  let wait = Atomic.make true in
  let dom1 =
    Domain.spawn (fun () ->
        while Atomic.get wait do Domain.cpu_relax() done;
        try
          Ok (interp_sut_res sut
                [(fun t -> WHS.mem t "" |>ignore);
                 (fun t -> WHS.merge t "K\220\181\153\"yT\137\206\165\235\198OQ\015k\214@\155X\017}\233\016\215\232\183\025\029\203B\138\167E\219P\137s\127\201\151q\184\216\131y\027\222\171\215@" |>ignore);
                 (fun t -> WHS.clear t)])
        with exn -> Error exn) in
  let dom2 =
    Domain.spawn (fun () ->
        Atomic.set wait false;
        try
          Ok (interp_sut_res sut
                [(fun t -> WHS.find_all t "=\0059\184" |>ignore);
                 (fun t -> WHS.remove t "\004\219");
                 (fun t -> WHS.add t "=\0059\184");
                 (fun t -> WHS.mem t "=\0059\184"|>ignore);
                 (fun t -> WHS.find_opt t "=\0059\184" |>ignore);
                 (fun t -> WHS.find t "\148" |>ignore)]) with exn -> Error exn) in
  let obs1 = Domain.join dom1 in
  let obs2 = Domain.join dom2 in
  (pref_obs,obs1,obs2)

let _ =
  for _i = 0 to 10_000 do
    agree_prop_par () |> ignore
  done

On my machine it sometimes aborts with an allocation failure and sometimes crashes with a segfault:

/tmp/weaktest$ ocamlopt.opt -o weaktest.exe weaktest.ml
:/tmp/weaktest$ ./weaktest.exe 
Fatal error: allocation failure during minor GC
Aborted (core dumped)
/tmp/weaktest$ ./weaktest.exe 
Segmentation fault (core dumped)

Below follows a representative stack trace consisting of 6 threads - 3 domains and 3 backup threads.
As far I can see

Stack trace
(gdb) thread apply all bt

Thread 6 (Thread 0x7f0b5bfff700 (LWP 2284399)):
#0  0x0000558b2896ca6b in pool_initialize (owner=<optimized out>, sz=4, r=0x7f0b60720000) at runtime/shared_heap.c:246
#1  pool_find (sz=4, local=0x558b28b01dd0) at runtime/shared_heap.c:352
#2  pool_allocate (sz=4, local=0x558b28b01dd0) at runtime/shared_heap.c:360
#3  caml_shared_try_alloc (local=0x558b28b01dd0, wosize=wosize@entry=4, tag=252, pinned=pinned@entry=0) at runtime/shared_heap.c:407
#4  0x0000558b2896928b in alloc_shared (wosize=wosize@entry=4, tag=<optimized out>, d=<optimized out>, d=<optimized out>) at runtime/minor_gc.c:149
#5  0x0000558b289695d2 in oldify_one (st_v=st_v@entry=0x7f0b5bffee00, v=139686860234608, p=0x558b2899bd68) at runtime/minor_gc.c:322
#6  0x0000558b28969b98 in caml_empty_minor_heap_promote (domain=domain@entry=0x558b28b01940, participating_count=participating_count@entry=3, participating=participating@entry=0x558b289b76c0 <stw_request+64>) at runtime/minor_gc.c:539
#7  0x0000558b28969fc2 in caml_stw_empty_minor_heap_no_major_slice (domain=domain@entry=0x558b28b01940, participating_count=3, participating=0x558b289b76c0 <stw_request+64>, unused=<optimized out>) at runtime/minor_gc.c:695
#8  0x0000558b2896a0b2 in caml_stw_empty_minor_heap (domain=0x558b28b01940, unused=<optimized out>, participating_count=<optimized out>, participating=<optimized out>) at runtime/minor_gc.c:735
#9  0x0000558b28958ed8 in stw_handler (domain=0x558b28b01940) at runtime/domain.c:1280
#10 handle_incoming (s=<optimized out>) at runtime/domain.c:314
#11 0x0000558b2895908a in caml_handle_incoming_interrupts () at runtime/domain.c:946
#12 backup_thread_func (v=0x558b289aea20 <all_domains>) at runtime/domain.c:946
#13 0x00007f0b71aef609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#14 0x00007f0b71a14133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

Thread 5 (Thread 0x7f0b5affd700 (LWP 2284401)):
#0  __lll_lock_wait (futex=futex@entry=0x558b289aecf8 <all_domains+728>, private=0) at lowlevellock.c:52
#1  0x00007f0b71af20a3 in __GI___pthread_mutex_lock (mutex=mutex@entry=0x558b289aecf8 <all_domains+728>) at ../nptl/pthread_mutex_lock.c:80
#2  0x0000558b28958fb1 in caml_plat_lock (m=0x558b289aecf8 <all_domains+728>) at runtime/caml/platform.h:156
#3  backup_thread_func (v=0x558b289aec50 <all_domains+560>) at runtime/domain.c:965
#4  0x00007f0b71aef609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5  0x00007f0b71a14133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

Thread 4 (Thread 0x7f0b60f6b700 (LWP 2284398)):
#0  __lll_lock_wait (futex=futex@entry=0x558b289aebe0 <all_domains+448>, private=0) at lowlevellock.c:52
#1  0x00007f0b71af20a3 in __GI___pthread_mutex_lock (mutex=mutex@entry=0x558b289aebe0 <all_domains+448>) at ../nptl/pthread_mutex_lock.c:80
#2  0x0000558b28958fb1 in caml_plat_lock (m=0x558b289aebe0 <all_domains+448>) at runtime/caml/platform.h:156
#3  backup_thread_func (v=0x558b289aeb38 <all_domains+280>) at runtime/domain.c:965
#4  0x00007f0b71aef609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5  0x00007f0b71a14133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

Thread 3 (Thread 0x7f0b718f3b80 (LWP 2284396)):
#0  __lll_lock_wait (futex=futex@entry=0x558b28b4ca30, private=0) at lowlevellock.c:52
#1  0x00007f0b71af407b in __pthread_mutex_cond_lock (mutex=mutex@entry=0x558b28b4ca30) at ../nptl/pthread_mutex_lock.c:80
#2  0x00007f0b71af6260 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x558b28b4ca30, cond=0x558b28b4ca60) at pthread_cond_wait.c:636
#3  __pthread_cond_wait (cond=cond@entry=0x558b28b4ca60, mutex=mutex@entry=0x558b28b4ca30) at pthread_cond_wait.c:647
#4  0x0000558b2896f690 in sync_condvar_wait (m=0x558b28b4ca30, c=0x558b28b4ca60) at runtime/sync_posix.h:122
#5  caml_ml_condition_wait (wcond=<optimized out>, wmut=<optimized out>) at runtime/sync.c:172
#6  <signal handler called>
#7  0x0000558b2895272c in camlStdlib__Domain__loop_710 () at domain.ml:250
#8  0x0000558b289526cd in camlStdlib__Domain__join_705 () at domain.ml:256
#9  0x0000558b289357f0 in camlWeaktest__agree_prop_par_554 ()
#10 0x0000558b28935eae in camlWeaktest__entry ()
#11 0x0000558b28934a9b in caml_program ()
#12 <signal handler called>
#13 0x0000558b28973a30 in caml_startup_common (pooling=<optimized out>, argv=0x7ffceb64bee8) at runtime/startup_nat.c:129
#14 caml_startup_common (argv=0x7ffceb64bee8, pooling=<optimized out>) at runtime/startup_nat.c:85
#15 0x0000558b28973a7f in caml_startup_exn (argv=<optimized out>) at runtime/startup_nat.c:141
#16 caml_startup (argv=<optimized out>) at runtime/startup_nat.c:141
#17 0x0000558b28934852 in main (argc=<optimized out>, argv=<optimized out>) at runtime/main.c:37

Thread 2 (Thread 0x7f0b6176c700 (LWP 2284397)):
#0  __pthread_mutex_unlock_usercnt (decr=1, mutex=0x558b289bb460 <pool_freelist>) at pthread_mutex_unlock.c:58
#1  __GI___pthread_mutex_unlock (mutex=mutex@entry=0x558b289bb460 <pool_freelist>) at pthread_mutex_unlock.c:357
#2  0x0000558b2896c9af in caml_plat_unlock (m=0x558b289bb460 <pool_freelist>) at runtime/caml/platform.h:175
#3  pool_acquire (local=0x7f0b5c003000) at runtime/shared_heap.c:185
#4  pool_find (sz=5, local=0x7f0b5c003000) at runtime/shared_heap.c:342
#5  pool_allocate (sz=5, local=0x7f0b5c003000) at runtime/shared_heap.c:360
#6  caml_shared_try_alloc (local=0x7f0b5c003000, wosize=wosize@entry=5, tag=247, pinned=pinned@entry=0) at runtime/shared_heap.c:407
#7  0x0000558b2896928b in alloc_shared (wosize=wosize@entry=5, tag=<optimized out>, d=<optimized out>, d=<optimized out>) at runtime/minor_gc.c:149
#8  0x0000558b2896943f in oldify_one (st_v=st_v@entry=0x7f0b6176bcb0, v=139686860233280, p=p@entry=0x7f0b6187ff88) at runtime/minor_gc.c:292
#9  0x0000558b28969752 in oldify_mopup (st=st@entry=0x7f0b6176bcb0, do_ephemerons=do_ephemerons@entry=1) at runtime/minor_gc.c:406
#10 0x0000558b28969c53 in caml_empty_minor_heap_promote (domain=domain@entry=0x7f0b5c002b70, participating_count=participating_count@entry=3, participating=participating@entry=0x558b289b76c0 <stw_request+64>) at runtime/minor_gc.c:590
#11 0x0000558b28969fc2 in caml_stw_empty_minor_heap_no_major_slice (domain=domain@entry=0x7f0b5c002b70, participating_count=3, participating=participating@entry=0x558b289b76c0 <stw_request+64>, unused=0x0) at runtime/minor_gc.c:695
#12 0x0000558b2896a0b2 in caml_stw_empty_minor_heap (domain=domain@entry=0x7f0b5c002b70, unused=unused@entry=0x0, participating_count=<optimized out>, participating=participating@entry=0x558b289b76c0 <stw_request+64>) at runtime/minor_gc.c:735
#13 0x0000558b289592c4 in caml_try_run_on_all_domains_with_spin_work (handler=handler@entry=0x558b2896a0a0 <caml_stw_empty_minor_heap>, data=data@entry=0x0, leader_setup=leader_setup@entry=0x558b28969120 <caml_empty_minor_heap_setup>, enter_spin_callback=enter_spin_callback@entry=0x558b28969300 <caml_do_opportunistic_major_slice>, enter_spin_data=enter_spin_data@entry=0x0) at runtime/domain.c:1462
#14 0x0000558b2896a179 in caml_try_stw_empty_minor_heap_on_all_domains () at runtime/minor_gc.c:771
#15 0x0000558b2896a195 in caml_empty_minor_heaps_once () at runtime/minor_gc.c:791
#16 0x0000558b28958a77 in domain_terminate () at runtime/domain.c:1675
#17 domain_thread_func (v=<optimized out>) at runtime/domain.c:1094
#18 0x00007f0b71aef609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#19 0x00007f0b71a14133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

Thread 1 (Thread 0x7f0b5b7fe700 (LWP 2284400)):
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x00007f0b71917859 in __GI_abort () at abort.c:79
#2  0x0000558b2896a692 in caml_fatal_error (msg=msg@entry=0x558b2897e848 "allocation failure during minor GC") at runtime/misc.c:122
#3  0x0000558b289692b2 in alloc_shared (wosize=wosize@entry=7533666103311081, tag=<optimized out>, d=<optimized out>, d=<optimized out>) at runtime/minor_gc.c:153
#4  0x0000558b2896943f in oldify_one (st_v=0x7f0b5b7fda00, v=139686864428416, p=0x7f0b5401b4d0) at runtime/minor_gc.c:292
#5  0x0000558b2895c29d in scan_stack_frames (fflags=(unknown: 0), stack=0x7f0b540135b0, stack=0x7f0b540135b0, gc_regs=0x0, fdata=0x7f0b5b7fda00, f=0x558b28969330 <oldify_one>) at runtime/fiber.c:266
#6  caml_scan_stack (f=f@entry=0x558b28969330 <oldify_one>, fflags=fflags@entry=(unknown: 0), fdata=fdata@entry=0x7f0b5b7fda00, stack=0x7f0b540135b0, stack@entry=0x558b28af9930, gc_regs=gc_regs@entry=0x0) at runtime/fiber.c:288
#7  0x0000558b2896b547 in caml_do_local_roots (f=f@entry=0x558b28969330 <oldify_one>, fflags=(unknown: 0), fflags@entry=SCANNING_ONLY_YOUNG_VALUES, fdata=fdata@entry=0x7f0b5b7fda00, local_roots=<optimized out>, current_stack=0x558b28af9930, v_gc_regs=0x0) at runtime/roots.c:69
#8  0x0000558b28969cd3 in caml_empty_minor_heap_promote (domain=domain@entry=0x7f0b54002b70, participating_count=participating_count@entry=3, participating=participating@entry=0x558b289b76c0 <stw_request+64>) at runtime/minor_gc.c:621
#9  0x0000558b28969fc2 in caml_stw_empty_minor_heap_no_major_slice (domain=domain@entry=0x7f0b54002b70, participating_count=3, participating=0x558b289b76c0 <stw_request+64>, unused=<optimized out>) at runtime/minor_gc.c:695
#10 0x0000558b2896a0b2 in caml_stw_empty_minor_heap (domain=0x7f0b54002b70, unused=<optimized out>, participating_count=<optimized out>, participating=<optimized out>) at runtime/minor_gc.c:735
#11 0x0000558b28958ed8 in stw_handler (domain=0x7f0b54002b70) at runtime/domain.c:1280
#12 handle_incoming (s=<optimized out>) at runtime/domain.c:314
#13 0x0000558b28959720 in caml_handle_incoming_interrupts () at runtime/domain.c:1541
#14 caml_handle_gc_interrupt () at runtime/domain.c:1541
#15 0x0000558b2896d90c in caml_do_pending_actions_exn () at runtime/signals.c:304
#16 0x0000558b2896d9dc in caml_process_pending_actions_with_root_exn (root=<optimized out>, root@entry=1) at runtime/signals.c:338
#17 0x0000558b2896da42 in caml_process_pending_actions_with_root (root=1) at runtime/signals.c:358
#18 caml_process_pending_actions () at runtime/signals.c:358
#19 0x0000558b28971b54 in ephe_get_field_copy (e=<optimized out>, offset=<optimized out>) at runtime/weak.c:333
#20 <signal handler called>
#21 0x0000558b28952ecc in camlStdlib__Weak__get_copy_397 () at weak.ml:55
#22 0x0000558b289550fc in camlStdlib__Weak__loop_825 () at weak.ml:332
#23 0x0000558b28935cec in camlWeaktest__fun_716 ()
#24 0x0000558b289410a8 in camlStdlib__List__map_483 () at list.ml:92
#25 0x0000558b289410bf in camlStdlib__List__map_483 () at list.ml:92
#26 0x0000558b289410bf in camlStdlib__List__map_483 () at list.ml:92
#27 0x0000558b289410bf in camlStdlib__List__map_483 () at list.ml:92
#28 0x0000558b28935baf in camlWeaktest__fun_684 ()
#29 0x0000558b289524d7 in camlStdlib__Domain__body_696 () at domain.ml:202
#30 <signal handler called>
#31 0x0000558b289568d0 in caml_callback_exn (closure=<optimized out>, arg=<optimized out>, arg@entry=1) at runtime/callback.c:168
#32 0x0000558b28956e1d in caml_callback (closure=<optimized out>, arg=arg@entry=1) at runtime/callback.c:256
#33 0x0000558b289589aa in domain_thread_func (v=0x7ffceb64bbc0) at runtime/domain.c:1093
#34 0x00007f0b71aef609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#35 0x00007f0b71a14133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) 

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4