用户您好,请详细描述您所遇到的问题,这会帮助我们快速定位问题~
2023-08-31 14:00:56,859 ERROR [ddp_trainer.py:429] Node[0] Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/hat/engine/ddp_trainer.py", line 425, in _with_exception
fn(*args)
File "/horizon/horizon_j5_open_explorer_v1.1.62-py38_20230802/ddk/samples/ai_toolchain/horizon_model_train_sample/scripts/tools/train.py", line 186, in train_entrance
trainer.fit()
File "/usr/local/lib/python3.8/dist-packages/hat/engine/loop_base.py", line 500, in fit
_, (batch, _is_last_batch) = next(self.data_loader_pr)
File "/usr/local/lib/python3.8/dist-packages/hat/profiler/profilers.py", line 103, in profile_iterable
value = next(iterator)
File "/usr/local/lib/python3.8/dist-packages/hat/utils/generator.py", line 22, in prefetch_iterator
last = next(it)
File "/root/.local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
data = self._next_data()
File "/root/.local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data
return self._process_data(data)
File "/root/.local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
data.reraise()
File "/root/.local/lib/python3.8/site-packages/torch/_utils.py", line 543, in reraise
raise exception
KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/root/.local/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/root/.local/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/root/.local/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.8/dist-packages/hat/data/datasets/nuscenes_dataset.py", line 1397, in __getitem__
data = self.sampler(sample)
File "/usr/local/lib/python3.8/dist-packages/hat/data/datasets/nuscenes_dataset.py", line 1501, in __call__
self._get_map_info(sample)
File "/usr/local/lib/python3.8/dist-packages/hat/data/datasets/nuscenes_dataset.py", line 1471, in _get_map_info
sample["ego2global_translation"],
KeyError: 'ego2global_translation'
ERROR:__main__:train failed! process 0 terminated with exit code 1
Traceback (most recent call last):
File "tools/train.py", line 287, in <module>
raise e
File "tools/train.py", line 273, in <module>
train(
File "tools/train.py", line 254, in train
launch(
File "/usr/local/lib/python3.8/dist-packages/hat/engine/ddp_trainer.py", line 394, in launch
mp.spawn(
File "/root/.local/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/root/.local/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/root/.local/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 149, in join
raise ProcessExitedException(
torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
