You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
Weird assertion error, but only on epoch 27 (tested 5 times now, crash is always the same spot, even if batch size is changed). Using main branch.
Error is this line:
let branches = Tensor::cat(branches,0).to_device(&self.device);
[Train - Epoch 27 - Iteration 966] Loss 0.614
thread '<unnamed>' panicked at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-core-22.1.0/src/id.rs:50:9:
assertion `left == right` failed
left: 0
right: 1
stack backtrace:
0: 0x6226c800fb65 - std::backtrace_rs::backtrace::libunwind::trace::h1a07e5dba0da0cd2
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/../../backtrace/src/backtrace/libunwind.rs:105:5
1: 0x6226c800fb65 - std::backtrace_rs::backtrace::trace_unsynchronized::h61b9b8394328c0bc
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
2: 0x6226c800fb65 - std::sys_common::backtrace::_print_fmt::h1c5e18b460934cff
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/sys_common/backtrace.rs:68:5
3: 0x6226c800fb65 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h1e1a1972118942ad
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/sys_common/backtrace.rs:44:22
4: 0x6226c803de2b - core::fmt::rt::Argument::fmt::h07af2b4071d536cd
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/core/src/fmt/rt.rs:165:63
5: 0x6226c803de2b - core::fmt::write::hc090a2ffd6b28c4a
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/core/src/fmt/mod.rs:1157:21
6: 0x6226c800c00f - std::io::Write::write_fmt::h8898bac6ff039a23
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/io/mod.rs:1832:15
7: 0x6226c800f93e - std::sys_common::backtrace::_print::h4e80c5803d4ee35b
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/sys_common/backtrace.rs:47:5
8: 0x6226c800f93e - std::sys_common::backtrace::print::ha96650907276675e
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/sys_common/backtrace.rs:34:9
9: 0x6226c8010d99 - std::panicking::default_hook::{{closure}}::h215c2a0a8346e0e0
10: 0x6226c8010add - std::panicking::default_hook::h207342be97478370
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/panicking.rs:298:9
11: 0x6226c80112a3 - std::panicking::rust_panic_with_hook::hac8bdceee1e4fe2c
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/panicking.rs:795:13
12: 0x6226c8011184 - std::panicking::begin_panic_handler::{{closure}}::h00d785e82757ce3c
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/panicking.rs:664:13
13: 0x6226c8010029 - std::sys_common::backtrace::__rust_end_short_backtrace::h1628d957bcd06996
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/sys_common/backtrace.rs:171:18
14: 0x6226c8010eb7 - rust_begin_unwind
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/panicking.rs:652:5
15: 0x6226c803acf3 - core::panicking::panic_fmt::hdc63834ffaaefae5
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/core/src/panicking.rs:72:14
16: 0x6226c803b0ae - core::panicking::assert_failed_inner::hda4754f94c1c1cb1
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/core/src/panicking.rs:409:17
17: 0x6226c7cf845f - core::panicking::assert_failed::h649a82cc51cfb044
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/core/src/panicking.rs:364:5
18: 0x6226c7c9ff37 - wgpu_core::id::RawId::zip::hec6d2da2eb326b53
at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-core-22.1.0/src/id.rs:50:9
19: 0x6226c7be9b10 - wgpu_core::id::Id<T>::zip::hbcca811fb4ed870d
at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-core-22.1.0/src/id.rs:171:12
20: 0x6226c7be9b10 - wgpu_core::identity::IdentityValues::alloc::h1b97904fda39e24e
21: 0x6226c7be9b10 - wgpu_core::identity::IdentityManager<T>::process::h017551fa92e06b31
at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-core-22.1.0/src/identity.rs:107:9
22: 0x6226c7ba56bc - wgpu_core::registry::Registry<T>::prepare::h788c73050a88a59a
at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-core-22.1.0/src/registry.rs:99:25
23: 0x6226c7ba56bc - wgpu_core::device::queue::<impl wgpu_core::global::Global>::queue_create_staging_buffer::he2972d22a76e66d3
at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-core-22.1.0/src/device/queue.rs:449:19
24: 0x6226c7c8879c - <wgpu::backend::wgpu_core::ContextWgpuCore as wgpu::context::Context>::queue_create_staging_buffer::h5871eafe5e3fbf41
at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-22.1.0/src/backend/wgpu_core.rs:2214:15
25: 0x6226c7c8bb59 - <T as wgpu::context::DynContext>::queue_create_staging_buffer::h8a64aa15e08d06ee
at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-22.1.0/src/context.rs:2994:9
26: 0x6226c7af18a8 - wgpu::Queue::write_buffer_with::hc01876246eee9544
at /home/joseph/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-22.1.0/src/lib.rs:5454:30
27: 0x6226c77a294c - <cubecl_wgpu::compute::server::WgpuServer<MM> as cubecl_runtime::server::ComputeServer>::create::hf9ef8fe387450daf
at /home/joseph/.cargo/git/checkouts/cubecl-aa41a28b39b598f9/bee7886/crates/cubecl-wgpu/src/compute/server.rs:212:13
28: 0x6226c76a25c9 - <cubecl_runtime::channel::mutex::MutexComputeChannel<Server> as cubecl_runtime::channel::base::ComputeChannel<Server>>::create::hc5cb12efe10a7882
at /home/joseph/.cargo/git/checkouts/cubecl-aa41a28b39b598f9/bee7886/crates/cubecl-runtime/src/channel/mutex.rs:53:9
29: 0x6226c76a25c9 - cubecl_runtime::client::ComputeClient<Server,Channel>::create::h3c68eb7059e2cd1c
at /home/joseph/.cargo/git/checkouts/cubecl-aa41a28b39b598f9/bee7886/crates/cubecl-runtime/src/client.rs:66:9
30: 0x6226c76a25c9 - cubecl_core::codegen::execution::execute_settings::h4acd522494b4b311
at /home/joseph/.cargo/git/checkouts/cubecl-aa41a28b39b598f9/bee7886/crates/cubecl-core/src/codegen/execution.rs:290:23
31: 0x6226c76a1db2 - cubecl_core::codegen::execution::execute_dynamic::hd392bcd47d3e989f
at /home/joseph/.cargo/git/checkouts/cubecl-aa41a28b39b598f9/bee7886/crates/cubecl-core/src/codegen/execution.rs:206:20
32: 0x6226c770c0b5 - cubecl_core::codegen::execution::Execution<K,R,(&[E],)>::execute::he44daeb471916329
at /home/joseph/.cargo/git/checkouts/cubecl-aa41a28b39b598f9/bee7886/crates/cubecl-core/src/codegen/execution.rs:112:9
33: 0x6226c770c0b5 - burn_jit::kernel::index::slice_assign::slice_assign::h286485b57eab134e
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-jit/src/kernel/index/slice_assign.rs:141:10
34: 0x6226c7819eb8 - burn_jit::ops::int_ops::<impl burn_tensor::tensor::ops::int_tensor::IntTensorOps<burn_jit::backend::JitBackend<R,F,I>> for burn_jit::backend::JitBackend<R,F,I>>::int_slice_assign::h168b198041f8de54
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-jit/src/ops/int_ops.rs:66:9
35: 0x6226c7819eb8 - <burn_tensor::tensor::api::kind::Int as burn_tensor::tensor::api::base::BasicOps<B>>::slice_assign::h4eaa0ba2665ac0b9
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-tensor/src/tensor/api/base.rs:1924:9
36: 0x6226c7819eb8 - burn_tensor::tensor::api::base::Tensor<B,_,K>::slice_assign::hb52c9bbfaf427b66
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-tensor/src/tensor/api/base.rs:679:19
37: 0x6226c7714fad - burn_tensor::tensor::ops::modules::cat::cat_with_slice_assign::h0d06cc9d4fb6c978
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-tensor/src/tensor/ops/modules/cat.rs:33:25
38: 0x6226c7831af6 - burn_tensor::tensor::ops::int_tensor::IntTensorOps::int_cat::h417db617652049ea
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-tensor/src/tensor/ops/int_tensor.rs:278:9
39: 0x6226c7831af6 - <burn_fusion::ops::int::<impl burn_tensor::tensor::ops::int_tensor::IntTensorOps<burn_fusion::backend::Fusion<B>> for burn_fusion::backend::Fusion<B>>::int_cat::CatOps<B,_> as burn_fusion::stream::execution::base::Operation<<B as burn_fusion::backend::FusionBackend>::FusionRuntime>>::execute::hf1bbbcc82dffc798
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/ops/int.rs:506:30
40: 0x6226c76f8ee6 - burn_fusion::stream::execution::base::<impl burn_fusion::stream::base::OperationQueue<R>>::execute_operations::h799e53f486001ee6
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/stream/execution/base.rs:58:13
41: 0x6226c76f8ee6 - burn_fusion::stream::execution::base::<impl burn_fusion::stream::base::OperationQueue<R>>::execute::h05d5b66d64105ede
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/stream/execution/base.rs:36:46
42: 0x6226c76f8ee6 - <burn_fusion::stream::multi::Segment<R> as burn_fusion::stream::execution::processor::StreamSegment<<R as burn_fusion::backend::FusionRuntime>::Optimization>>::execute::h04f93808bbcbdc92
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/stream/multi.rs:146:9
43: 0x6226c76f53e4 - burn_fusion::stream::execution::processor::Processor<O>::process::ha3056b7c1e2e2f5a
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/stream/execution/processor.rs:71:21
44: 0x6226c76f4019 - burn_fusion::stream::multi::MultiStream<R>::register::h9d50ee63f080a775
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/stream/multi.rs:51:9
45: 0x6226c76d170e - burn_fusion::server::FusionServer<R>::register::he6e1a1c050ff6f8b
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/server.rs:30:9
46: 0x6226c76d170e - <burn_fusion::client::mutex::MutexFusionClient<R> as burn_fusion::client::base::FusionClient<R>>::register::ha452d1ebe6b87173
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/client/mutex.rs:48:14
47: 0x6226c76d170e - burn_fusion::ops::int::<impl burn_tensor::tensor::ops::int_tensor::IntTensorOps<burn_fusion::backend::Fusion<B>> for burn_fusion::backend::Fusion<B>>::int_cat::h8e540e8505fcdda8
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-fusion/src/ops/int.rs:530:9
48: 0x6226c781da28 - burn_autodiff::ops::int_tensor::<impl burn_tensor::tensor::ops::int_tensor::IntTensorOps<burn_autodiff::backend::Autodiff<B,C>> for burn_autodiff::backend::Autodiff<B,C>>::int_cat::h1fe9094b5d526e05
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-autodiff/src/ops/int_tensor.rs:63:9
49: 0x6226c781da28 - <burn_tensor::tensor::api::kind::Int as burn_tensor::tensor::api::base::BasicOps<B>>::cat::h930f6cbeecd6bd27
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-tensor/src/tensor/api/base.rs:1969:9
50: 0x6226c781da28 - burn_tensor::tensor::api::base::Tensor<B,_,K>::cat::he3ab3b7ade020c91
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-tensor/src/tensor/api/base.rs:775:19
51: 0x6226c7789bd7 - <taxotangolib::model::TangoBatcher<B> as burn_core::data::dataloader::batcher::Batcher<taxotangolib::model::TaxaDistance<_>,taxotangolib::model::TangoBatch<B>>>::batch::h983835e16d77a25b
at /mnt/data/development/taxotango/src/model.rs:217:24
52: 0x6226c7730cc6 - <burn_core::data::dataloader::batch::BatchDataloaderIterator<I,O> as core::iter::traits::iterator::Iterator>::next::hd56063b4f6fae01b
53: 0x6226c773a1ca - <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next::hdba816b3ec983080
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/alloc/src/boxed.rs:1956:9
54: 0x6226c773a1ca - <burn_core::data::dataloader::multithread::MultiThreadDataLoader<O> as burn_core::data::dataloader::base::DataLoader<O>>::iter::{{closure}}::{{closure}}::hc0d15202e4c51e2c
at /home/joseph/.cargo/git/checkouts/burn-178c6829f420dae1/58129d1/crates/burn-core/src/data/dataloader/multithread.rs:64:53
55: 0x6226c773a1ca - std::sys_common::backtrace::__rust_begin_short_backtrace::hfa668fbfef1f2b94
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/sys_common/backtrace.rs:155:18
56: 0x6226c773c5a5 - std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}}::hde37980062f244db
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/thread/mod.rs:542:17
57: 0x6226c773c5a5 - <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once::h1a272682b6ffed44
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/core/src/panic/unwind_safe.rs:272:9
58: 0x6226c773c5a5 - std::panicking::try::do_call::ha187b82e101f9b62
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/panicking.rs:559:40
59: 0x6226c773c5a5 - std::panicking::try::h0d99e57983f49568
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/panicking.rs:523:19
60: 0x6226c773c5a5 - std::panic::catch_unwind::hb8fab7762983e8ef
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/panic.rs:149:14
61: 0x6226c773c5a5 - std::thread::Builder::spawn_unchecked_::{{closure}}::h1008ce57a16fa979
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/thread/mod.rs:541:30
62: 0x6226c773c5a5 - core::ops::function::FnOnce::call_once{{vtable.shim}}::h0747bd60cd55a715
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/core/src/ops/function.rs:250:5
63: 0x6226c801658b - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::h09e5a4c541afa800
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/alloc/src/boxed.rs:2022:9
64: 0x6226c801658b - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::h9c8b03c22f4e7026
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/alloc/src/boxed.rs:2022:9
65: 0x6226c801658b - std::sys::pal::unix::thread::Thread::new::thread_start::h522bc89a54da820a
at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/sys/pal/unix/thread.rs:108:17
66: 0x7cb5b129ca94 - start_thread
at ./nptl/pthread_create.c:447:8
67: 0x7cb5b1329c3c - __GI___clone3
at ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
68: 0x0 - <unknown>
Desktop (please complete the following information):
Linux - Description: Ubuntu 24.04 LTS
rustc 1.79.0 (129f3b996 2024-06-10)
Additional context
None, but let me know what I can do to help! Will keep trying variations, and updating. It takes ~4 hours on my 2080 to get to the problem.
The text was updated successfully, but these errors were encountered:
Describe the bug
Weird assertion error, but only on epoch 27 (tested 5 times now, crash is always the same spot, even if batch size is changed). Using main branch.
Error is this line:
here: https://github.com/jguhlin/taxotango/blob/9fb413e7531a8bd9f5724d215c47eade9163489a/src/model.rs#L217
To Reproduce
Going to be here: https://github.com/jguhlin/taxotango/tree/Inference-and-Queries
cargo run --release
But you need these files and the path: wget https://ftp.ncbi.nlm.nih.gov/blast/db/taxdb.tar.gz
Expected behavior
Screenshots
Desktop (please complete the following information):
Linux - Description: Ubuntu 24.04 LTS
rustc 1.79.0 (129f3b996 2024-06-10)
Additional context
None, but let me know what I can do to help! Will keep trying variations, and updating. It takes ~4 hours on my 2080 to get to the problem.
The text was updated successfully, but these errors were encountered: