valeriaWong commited on Apr 6

Commit

bfd1982

verified ·

1 Parent(s): 084f828

Upload folder using huggingface_hub

Browse files

Files changed (27) hide show

20250323_172321/20250323_172321.log +193 -0
20250323_172357/20250323_172357.log +303 -0
20250323_172357/vis_data/events.out.tfevents.1742721837.172-16-21-188.3426618.0 +3 -0
20250323_172626/20250323_172626.log +0 -0
20250323_172626/vis_data/events.out.tfevents.1742721987.172-16-21-188.3428486.0 +3 -0
internvl_v2_internlm2_2b_qlora_finetune_copy.py +13 -14
iter_1000.pth +2 -2
iter_10000.pth +2 -2
iter_11000.pth +2 -2
iter_12000.pth +2 -2
iter_13000.pth +2 -2
iter_14000.pth +2 -2
iter_15000.pth +2 -2
iter_16000.pth +2 -2
iter_17000.pth +2 -2
iter_18000.pth +2 -2
iter_19000.pth +2 -2
iter_19176.pth +3 -0
iter_2000.pth +2 -2
iter_3000.pth +2 -2
iter_4000.pth +2 -2
iter_5000.pth +2 -2
iter_6000.pth +2 -2
iter_7000.pth +2 -2
iter_8000.pth +2 -2
iter_9000.pth +2 -2
last_checkpoint +1 -1

20250323_172321/20250323_172321.log ADDED Viewed

	@@ -0,0 +1,193 @@

+2025/03/23 17:23:21 - mmengine - DEBUG - An `DeepSpeedStrategy` instance is built from registry, and its implementation can be found in xtuner.engine._strategy.deepspeed
+2025/03/23 17:23:21 - mmengine - INFO -
+------------------------------------------------------------
+System environment:
+    sys.platform: linux
+    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
+    CUDA available: True
+    MUSA available: False
+    numpy_random_seed: 1239231278
+    GPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 4090
+    CUDA_HOME: /usr/local/cuda-12.4
+    NVCC: Cuda compilation tools, release 12.4, V12.4.99
+    GCC: gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+    PyTorch: 2.5.1+cu124
+    PyTorch compiling details: PyTorch built with:
+  - GCC 9.3
+  - C++ Version: 201703
+  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications
+  - Intel(R) MKL-DNN v3.5.3 (Git Hash 66f0cb9eb66affd2da3bf5f8d897376f04aae6af)
+  - OpenMP 201511 (a.k.a. OpenMP 4.5)
+  - LAPACK is enabled (usually provided by MKL)
+  - NNPACK is enabled
+  - CPU capability usage: AVX2
+  - CUDA Runtime 12.4
+  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90
+  - CuDNN 90.1
+  - Magma 2.6.1
+  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.4, CUDNN_VERSION=9.1.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF,
+    TorchVision: 0.20.1+cu124
+    OpenCV: 4.9.0
+    MMEngine: 0.10.7
+Runtime environment:
+    launcher: none
+    randomness: {'seed': None, 'deterministic': False}
+    cudnn_benchmark: False
+    mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}
+    dist_cfg: {'backend': 'nccl'}
+    seed: None
+    deterministic: False
+    Distributed launcher: none
+    Distributed training: False
+    GPU number: 1
+------------------------------------------------------------
+2025/03/23 17:23:21 - mmengine - INFO - Config:
+accumulative_counts = 2
+batch_size = 1
+betas = (
+    0.9,
+    0.999,
+)
+custom_hooks = [
+    dict(
+        tokenizer=dict(
+            pretrained_model_name_or_path='/data/wangqun/models/InternVL2_5-2B',
+            trust_remote_code=True,
+            type='transformers.AutoTokenizer.from_pretrained'),
+        type='xtuner.engine.hooks.DatasetInfoHook'),
+]
+data_path = '/home/wangqun/data/layout_ocr_multi.json'
+dataloader_num_workers = 4
+default_hooks = dict(
+    checkpoint=dict(
+        by_epoch=False,
+        interval=1000,
+        max_keep_ckpts=-1,
+        save_optimizer=False,
+        type='mmengine.hooks.CheckpointHook'),
+    logger=dict(
+        interval=10,
+        log_metric_by_epoch=False,
+        type='mmengine.hooks.LoggerHook'),
+    param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'),
+    sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'),
+    timer=dict(type='mmengine.hooks.IterTimerHook'))
+env_cfg = dict(
+    cudnn_benchmark=False,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+image_folder = '/'
+launcher = 'none'
+llava_dataset = dict(
+    data_paths='/home/wangqun/data/layout_ocr_multi.json',
+    image_folders='/',
+    max_length=8192,
+    model_path='/data/wangqun/models/InternVL2_5-2B',
+    template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
+    type='xtuner.dataset.InternVL_V1_5_Dataset')
+load_from = None
+log_level = 'DEBUG'
+log_processor = dict(by_epoch=False)
+lr = 2e-05
+max_epochs = 4
+max_length = 8192
+max_norm = 1
+model = dict(
+    freeze_llm=True,
+    freeze_visual_encoder=True,
+    llm_lora=dict(
+        lora_alpha=256,
+        lora_dropout=0.05,
+        r=128,
+        target_modules=None,
+        task_type='CAUSAL_LM',
+        type='peft.LoraConfig'),
+    model_path='/data/wangqun/models/InternVL2_5-2B',
+    quantization_llm=True,
+    quantization_vit=False,
+    type='xtuner.model.InternVL_V1_5')
+optim_type = 'torch.optim.AdamW'
+optim_wrapper = dict(
+    optimizer=dict(
+        betas=(
+            0.9,
+            0.999,
+        ),
+        lr=2e-05,
+        type='torch.optim.AdamW',
+        weight_decay=0.05),
+    type='DeepSpeedOptimWrapper')
+param_scheduler = [
+    dict(
+        begin=0,
+        by_epoch=True,
+        convert_to_iter_based=True,
+        end=0.12,
+        start_factor=1e-05,
+        type='mmengine.optim.LinearLR'),
+    dict(
+        begin=0.12,
+        by_epoch=True,
+        convert_to_iter_based=True,
+        end=4,
+        eta_min=0.0,
+        type='mmengine.optim.CosineAnnealingLR'),
+]
+path = '/data/wangqun/models/InternVL2_5-2B'
+prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.internlm2_chat'
+randomness = dict(deterministic=False, seed=None)
+resume = False
+runner_type = 'FlexibleRunner'
+save_steps = 1000
+save_total_limit = -1
+strategy = dict(
+    config=dict(
+        bf16=dict(enabled=True),
+        fp16=dict(enabled=False, initial_scale_power=16),
+        gradient_accumulation_steps='auto',
+        gradient_clipping='auto',
+        train_micro_batch_size_per_gpu='auto',
+        zero_allow_untested_optimizer=True,
+        zero_force_ds_cpu_optimizer=False,
+        zero_optimization=dict(overlap_comm=True, stage=2)),
+    exclude_frozen_parameters=True,
+    gradient_accumulation_steps=2,
+    gradient_clipping=1,
+    sequence_parallel_size=1,
+    train_micro_batch_size_per_gpu=1,
+    type='xtuner.engine.DeepSpeedStrategy')
+tokenizer = dict(
+    pretrained_model_name_or_path='/data/wangqun/models/InternVL2_5-2B',
+    trust_remote_code=True,
+    type='transformers.AutoTokenizer.from_pretrained')
+train_cfg = dict(max_epochs=4, type='xtuner.engine.runner.TrainLoop')
+train_dataloader = dict(
+    batch_size=1,
+    collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'),
+    dataset=dict(
+        data_paths='/home/wangqun/data/layout_ocr_multi.json',
+        image_folders='/',
+        max_length=8192,
+        model_path='/data/wangqun/models/InternVL2_5-2B',
+        template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
+        type='xtuner.dataset.InternVL_V1_5_Dataset'),
+    num_workers=4,
+    sampler=dict(
+        length_property='modality_length',
+        per_device_batch_size=2,
+        type='xtuner.dataset.samplers.LengthGroupedSampler'))
+visualizer = dict(
+    type='mmengine.visualization.Visualizer',
+    vis_backends=[
+        dict(type='mmengine.visualization.TensorboardVisBackend'),
+    ])
+warmup_ratio = 0.03
+weight_decay = 0.05
+work_dir = '/home/wangqun/work_dirs/internvl_ft_run_14_filter'
+2025/03/23 17:23:21 - mmengine - DEBUG - An `TensorboardVisBackend` instance is built from registry, and its implementation can be found in mmengine.visualization.vis_backend
+2025/03/23 17:23:21 - mmengine - DEBUG - An `Visualizer` instance is built from registry, and its implementation can be found in mmengine.visualization.visualizer
+2025/03/23 17:23:21 - mmengine - DEBUG - Attribute `_env_initialized` is not defined in <class 'mmengine.visualization.vis_backend.TensorboardVisBackend'> or `<class 'mmengine.visualization.vis_backend.TensorboardVisBackend'>._env_initialized is False, `_init_env` will be called and <class 'mmengine.visualization.vis_backend.TensorboardVisBackend'>._env_initialized will be set to True

20250323_172357/20250323_172357.log ADDED Viewed

	@@ -0,0 +1,303 @@

+2025/03/23 17:23:57 - mmengine - DEBUG - An `DeepSpeedStrategy` instance is built from registry, and its implementation can be found in xtuner.engine._strategy.deepspeed
+2025/03/23 17:23:57 - mmengine - INFO -
+------------------------------------------------------------
+System environment:
+    sys.platform: linux
+    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
+    CUDA available: True
+    MUSA available: False
+    numpy_random_seed: 116888592
+    GPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 4090
+    CUDA_HOME: /usr/local/cuda-12.4
+    NVCC: Cuda compilation tools, release 12.4, V12.4.99
+    GCC: gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+    PyTorch: 2.5.1+cu124
+    PyTorch compiling details: PyTorch built with:
+  - GCC 9.3
+  - C++ Version: 201703
+  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications
+  - Intel(R) MKL-DNN v3.5.3 (Git Hash 66f0cb9eb66affd2da3bf5f8d897376f04aae6af)
+  - OpenMP 201511 (a.k.a. OpenMP 4.5)
+  - LAPACK is enabled (usually provided by MKL)
+  - NNPACK is enabled
+  - CPU capability usage: AVX2
+  - CUDA Runtime 12.4
+  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90
+  - CuDNN 90.1
+  - Magma 2.6.1
+  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.4, CUDNN_VERSION=9.1.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF,
+    TorchVision: 0.20.1+cu124
+    OpenCV: 4.9.0
+    MMEngine: 0.10.7
+Runtime environment:
+    launcher: none
+    randomness: {'seed': None, 'deterministic': False}
+    cudnn_benchmark: False
+    mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}
+    dist_cfg: {'backend': 'nccl'}
+    seed: None
+    deterministic: False
+    Distributed launcher: none
+    Distributed training: False
+    GPU number: 1
+------------------------------------------------------------
+2025/03/23 17:23:57 - mmengine - INFO - Config:
+accumulative_counts = 2
+batch_size = 1
+betas = (
+    0.9,
+    0.999,
+)
+custom_hooks = [
+    dict(
+        tokenizer=dict(
+            pretrained_model_name_or_path='/data/wangqun/models/InternVL2_5-2B',
+            trust_remote_code=True,
+            type='transformers.AutoTokenizer.from_pretrained'),
+        type='xtuner.engine.hooks.DatasetInfoHook'),
+]
+data_path = '/home/wangqun/data/layout_ocr_multi.json'
+dataloader_num_workers = 4
+default_hooks = dict(
+    checkpoint=dict(
+        by_epoch=False,
+        interval=1000,
+        max_keep_ckpts=-1,
+        save_optimizer=False,
+        type='mmengine.hooks.CheckpointHook'),
+    logger=dict(
+        interval=10,
+        log_metric_by_epoch=False,
+        type='mmengine.hooks.LoggerHook'),
+    param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'),
+    sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'),
+    timer=dict(type='mmengine.hooks.IterTimerHook'))
+env_cfg = dict(
+    cudnn_benchmark=False,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+image_folder = '/'
+launcher = 'none'
+llava_dataset = dict(
+    data_paths='/home/wangqun/data/layout_ocr_multi.json',
+    image_folders='/',
+    max_length=8192,
+    model_path='/data/wangqun/models/InternVL2_5-2B',
+    template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
+    type='xtuner.dataset.InternVL_V1_5_Dataset')
+load_from = None
+log_level = 'DEBUG'
+log_processor = dict(by_epoch=False)
+lr = 2e-05
+max_epochs = 4
+max_length = 8192
+max_norm = 1
+model = dict(
+    freeze_llm=True,
+    freeze_visual_encoder=True,
+    llm_lora=dict(
+        lora_alpha=256,
+        lora_dropout=0.05,
+        r=128,
+        target_modules=None,
+        task_type='CAUSAL_LM',
+        type='peft.LoraConfig'),
+    model_path='/data/wangqun/models/InternVL2_5-2B',
+    quantization_llm=True,
+    quantization_vit=False,
+    type='xtuner.model.InternVL_V1_5')
+optim_type = 'torch.optim.AdamW'
+optim_wrapper = dict(
+    optimizer=dict(
+        betas=(
+            0.9,
+            0.999,
+        ),
+        lr=2e-05,
+        type='torch.optim.AdamW',
+        weight_decay=0.05),
+    type='DeepSpeedOptimWrapper')
+param_scheduler = [
+    dict(
+        begin=0,
+        by_epoch=True,
+        convert_to_iter_based=True,
+        end=0.12,
+        start_factor=1e-05,
+        type='mmengine.optim.LinearLR'),
+    dict(
+        begin=0.12,
+        by_epoch=True,
+        convert_to_iter_based=True,
+        end=4,
+        eta_min=0.0,
+        type='mmengine.optim.CosineAnnealingLR'),
+]
+path = '/data/wangqun/models/InternVL2_5-2B'
+prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.internlm2_chat'
+randomness = dict(deterministic=False, seed=None)
+resume = False
+runner_type = 'FlexibleRunner'
+save_steps = 1000
+save_total_limit = -1
+strategy = dict(
+    config=dict(
+        bf16=dict(enabled=True),
+        fp16=dict(enabled=False, initial_scale_power=16),
+        gradient_accumulation_steps='auto',
+        gradient_clipping='auto',
+        train_micro_batch_size_per_gpu='auto',
+        zero_allow_untested_optimizer=True,
+        zero_force_ds_cpu_optimizer=False,
+        zero_optimization=dict(overlap_comm=True, stage=2)),
+    exclude_frozen_parameters=True,
+    gradient_accumulation_steps=2,
+    gradient_clipping=1,
+    sequence_parallel_size=1,
+    train_micro_batch_size_per_gpu=1,
+    type='xtuner.engine.DeepSpeedStrategy')
+tokenizer = dict(
+    pretrained_model_name_or_path='/data/wangqun/models/InternVL2_5-2B',
+    trust_remote_code=True,
+    type='transformers.AutoTokenizer.from_pretrained')
+train_cfg = dict(max_epochs=4, type='xtuner.engine.runner.TrainLoop')
+train_dataloader = dict(
+    batch_size=1,
+    collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'),
+    dataset=dict(
+        data_paths='/home/wangqun/data/layout_ocr_multi.json',
+        image_folders='/',
+        max_length=8192,
+        model_path='/data/wangqun/models/InternVL2_5-2B',
+        template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
+        type='xtuner.dataset.InternVL_V1_5_Dataset'),
+    num_workers=4,
+    sampler=dict(
+        length_property='modality_length',
+        per_device_batch_size=2,
+        type='xtuner.dataset.samplers.LengthGroupedSampler'))
+visualizer = dict(
+    type='mmengine.visualization.Visualizer',
+    vis_backends=[
+        dict(type='mmengine.visualization.TensorboardVisBackend'),
+    ])
+warmup_ratio = 0.03
+weight_decay = 0.05
+work_dir = '/home/wangqun/work_dirs/internvl_ft_run_14_filter'
+2025/03/23 17:23:57 - mmengine - DEBUG - An `TensorboardVisBackend` instance is built from registry, and its implementation can be found in mmengine.visualization.vis_backend
+2025/03/23 17:23:57 - mmengine - DEBUG - An `Visualizer` instance is built from registry, and its implementation can be found in mmengine.visualization.visualizer
+2025/03/23 17:23:57 - mmengine - DEBUG - Attribute `_env_initialized` is not defined in <class 'mmengine.visualization.vis_backend.TensorboardVisBackend'> or `<class 'mmengine.visualization.vis_backend.TensorboardVisBackend'>._env_initialized is False, `_init_env` will be called and <class 'mmengine.visualization.vis_backend.TensorboardVisBackend'>._env_initialized will be set to True
+2025/03/23 17:23:57 - mmengine - DEBUG - Get class `RuntimeInfoHook` from "hook" registry in "mmengine"
+2025/03/23 17:23:57 - mmengine - DEBUG - An `RuntimeInfoHook` instance is built from registry, and its implementation can be found in mmengine.hooks.runtime_info_hook
+2025/03/23 17:23:57 - mmengine - DEBUG - An `IterTimerHook` instance is built from registry, and its implementation can be found in mmengine.hooks.iter_timer_hook
+2025/03/23 17:23:57 - mmengine - DEBUG - An `DistSamplerSeedHook` instance is built from registry, and its implementation can be found in mmengine.hooks.sampler_seed_hook
+2025/03/23 17:23:57 - mmengine - DEBUG - An `LoggerHook` instance is built from registry, and its implementation can be found in mmengine.hooks.logger_hook
+2025/03/23 17:23:57 - mmengine - DEBUG - An `ParamSchedulerHook` instance is built from registry, and its implementation can be found in mmengine.hooks.param_scheduler_hook
+2025/03/23 17:23:57 - mmengine - DEBUG - An `CheckpointHook` instance is built from registry, and its implementation can be found in mmengine.hooks.checkpoint_hook
+2025/03/23 17:23:57 - mmengine - WARNING - Failed to search registry with scope "mmengine" in the "builder" registry tree. As a workaround, the current "builder" registry in "xtuner" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "mmengine" is a correct scope, or whether the registry is initialized.
+2025/03/23 17:23:57 - mmengine - DEBUG - An `from_pretrained` instance is built from registry, and its implementation can be found in transformers.models.auto.tokenization_auto
+2025/03/23 17:23:57 - mmengine - DEBUG - An `DatasetInfoHook` instance is built from registry, and its implementation can be found in xtuner.engine.hooks.dataset_info_hook
+2025/03/23 17:23:57 - mmengine - INFO - Hooks will be executed in the following order:
+before_run:
+(VERY_HIGH   ) RuntimeInfoHook
+(BELOW_NORMAL) LoggerHook
+ --------------------
+before_train:
+(VERY_HIGH   ) RuntimeInfoHook
+(NORMAL      ) IterTimerHook
+(NORMAL      ) DatasetInfoHook
+(VERY_LOW    ) CheckpointHook
+ --------------------
+before_train_epoch:
+(VERY_HIGH   ) RuntimeInfoHook
+(NORMAL      ) IterTimerHook
+(NORMAL      ) DistSamplerSeedHook
+ --------------------
+before_train_iter:
+(VERY_HIGH   ) RuntimeInfoHook
+(NORMAL      ) IterTimerHook
+ --------------------
+after_train_iter:
+(VERY_HIGH   ) RuntimeInfoHook
+(NORMAL      ) IterTimerHook
+(BELOW_NORMAL) LoggerHook
+(LOW         ) ParamSchedulerHook
+(VERY_LOW    ) CheckpointHook
+ --------------------
+after_train_epoch:
+(NORMAL      ) IterTimerHook
+(LOW         ) ParamSchedulerHook
+(VERY_LOW    ) CheckpointHook
+ --------------------
+before_val:
+(VERY_HIGH   ) RuntimeInfoHook
+(NORMAL      ) DatasetInfoHook
+ --------------------
+before_val_epoch:
+(NORMAL      ) IterTimerHook
+ --------------------
+before_val_iter:
+(NORMAL      ) IterTimerHook
+ --------------------
+after_val_iter:
+(NORMAL      ) IterTimerHook
+(BELOW_NORMAL) LoggerHook
+ --------------------
+after_val_epoch:
+(VERY_HIGH   ) RuntimeInfoHook
+(NORMAL      ) IterTimerHook
+(BELOW_NORMAL) LoggerHook
+(LOW         ) ParamSchedulerHook
+(VERY_LOW    ) CheckpointHook
+ --------------------
+after_val:
+(VERY_HIGH   ) RuntimeInfoHook
+ --------------------
+after_train:
+(VERY_HIGH   ) RuntimeInfoHook
+(VERY_LOW    ) CheckpointHook
+ --------------------
+before_test:
+(VERY_HIGH   ) RuntimeInfoHook
+(NORMAL      ) DatasetInfoHook
+ --------------------
+before_test_epoch:
+(NORMAL      ) IterTimerHook
+ --------------------
+before_test_iter:
+(NORMAL      ) IterTimerHook
+ --------------------
+after_test_iter:
+(NORMAL      ) IterTimerHook
+(BELOW_NORMAL) LoggerHook
+ --------------------
+after_test_epoch:
+(VERY_HIGH   ) RuntimeInfoHook
+(NORMAL      ) IterTimerHook
+(BELOW_NORMAL) LoggerHook
+ --------------------
+after_test:
+(VERY_HIGH   ) RuntimeInfoHook
+ --------------------
+after_run:
+(BELOW_NORMAL) LoggerHook
+ --------------------
+2025/03/23 17:23:57 - mmengine - DEBUG - An `FlexibleRunner` instance is built from registry, its implementation can be found inmmengine.runner._flexible_runner
+2025/03/23 17:23:57 - mmengine - INFO - Starting to loading data and calc length
+2025/03/23 17:23:57 - mmengine - INFO - =======Starting to process /home/wangqun/data/layout_ocr_multi.json =======
+2025/03/23 17:24:04 - mmengine - INFO - =======total 4794 samples of /home/wangqun/data/layout_ocr_multi.json=======
+2025/03/23 17:24:04 - mmengine - INFO - end loading data and calc length
+2025/03/23 17:24:04 - mmengine - INFO - =======total 4794 samples=======
+2025/03/23 17:24:04 - mmengine - DEBUG - An `InternVL_V1_5_Dataset` instance is built from registry, and its implementation can be found in xtuner.dataset.internvl_dataset
+2025/03/23 17:24:04 - mmengine - INFO - LengthGroupedSampler is used.
+2025/03/23 17:24:04 - mmengine - INFO - LengthGroupedSampler construction is complete, and the selected attribute is modality_length
+2025/03/23 17:24:04 - mmengine - DEBUG - An `LengthGroupedSampler` instance is built from registry, and its implementation can be found in xtuner.dataset.samplers.length_grouped
+2025/03/23 17:24:04 - mmengine - WARNING - Dataset InternVL_V1_5_Dataset has no metainfo. ``dataset_meta`` in visualizer will be None.
+2025/03/23 17:24:04 - mmengine - DEBUG - An `TrainLoop` instance is built from registry, and its implementation can be found in xtuner.engine.runner.loops
+2025/03/23 17:24:04 - mmengine - INFO - Start to load InternVL_V1_5 model.
+2025/03/23 17:24:04 - mmengine - DEBUG - Get class `BaseDataPreprocessor` from "model" registry in "mmengine"
+2025/03/23 17:24:04 - mmengine - DEBUG - An `BaseDataPreprocessor` instance is built from registry, and its implementation can be found in mmengine.model.base_model.data_preprocessor

20250323_172357/vis_data/events.out.tfevents.1742721837.172-16-21-188.3426618.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7eb646f809dd6caacb7bb21276ee0b7dcd67db0e5ff9f62f7806cc00cab7a12f
+size 4671

20250323_172626/20250323_172626.log ADDED Viewed

The diff for this file is too large to render. See raw diff

20250323_172626/vis_data/events.out.tfevents.1742721987.172-16-21-188.3428486.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf2678d6c7c2da82f71d45e4afad5337e3c47a54e858431e68139b7ebefa9639
+size 510444

internvl_v2_internlm2_2b_qlora_finetune_copy.py CHANGED Viewed

@@ -7,13 +7,12 @@ betas = (
 custom_hooks = [
     dict(
         tokenizer=dict(
-            pretrained_model_name_or_path='/data/wangqun/models/internvl2-2B',
             trust_remote_code=True,
             type='transformers.AutoTokenizer.from_pretrained'),
         type='xtuner.engine.hooks.DatasetInfoHook'),
 ]
-data_path = '/home/wangqun/data/screenshot_od/layout_ocr_multi.json'
-data_root = '/home/wangqun/data/'
 dataloader_num_workers = 4
 default_hooks = dict(
     checkpoint=dict(
@@ -33,13 +32,13 @@ env_cfg = dict(
     cudnn_benchmark=False,
     dist_cfg=dict(backend='nccl'),
     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
-image_folder = '/home/wangqun/data/llava_images'
 launcher = 'none'
 llava_dataset = dict(
-    data_paths='/home/wangqun/data/screenshot_od/layout_ocr_multi.json',
-    image_folders='/home/wangqun/data/llava_images',
     max_length=8192,
-    model_path='/data/wangqun/models/internvl2-2B',
     template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
     type='xtuner.dataset.InternVL_V1_5_Dataset')
 load_from = None
@@ -59,7 +58,7 @@ model = dict(
         target_modules=None,
         task_type='CAUSAL_LM',
         type='peft.LoraConfig'),
-    model_path='/data/wangqun/models/internvl2-2B',
     quantization_llm=True,
     quantization_vit=False,
     type='xtuner.model.InternVL_V1_5')
@@ -90,7 +89,7 @@ param_scheduler = [
         eta_min=0.0,
         type='mmengine.optim.CosineAnnealingLR'),
 ]
-path = '/data/wangqun/models/internvl2-2B'
 prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.internlm2_chat'
 randomness = dict(deterministic=False, seed=None)
 resume = False
@@ -114,7 +113,7 @@ strategy = dict(
     train_micro_batch_size_per_gpu=1,
     type='xtuner.engine.DeepSpeedStrategy')
 tokenizer = dict(
-    pretrained_model_name_or_path='/data/wangqun/models/internvl2-2B',
     trust_remote_code=True,
     type='transformers.AutoTokenizer.from_pretrained')
 train_cfg = dict(max_epochs=4, type='xtuner.engine.runner.TrainLoop')
@@ -122,10 +121,10 @@ train_dataloader = dict(
     batch_size=1,
     collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'),
     dataset=dict(
-        data_paths='/home/wangqun/data/screenshot_od/layout_ocr_multi.json',
-        image_folders='/home/wangqun/data/llava_images',
         max_length=8192,
-        model_path='/data/wangqun/models/internvl2-2B',
         template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
         type='xtuner.dataset.InternVL_V1_5_Dataset'),
     num_workers=4,
@@ -140,4 +139,4 @@ visualizer = dict(
     ])
 warmup_ratio = 0.03
 weight_decay = 0.05
-work_dir = '/home/wangqun/work_dirs/internvl_ft_run_6_filter'

 custom_hooks = [
     dict(
         tokenizer=dict(
+            pretrained_model_name_or_path='/data/wangqun/models/InternVL2_5-2B',
             trust_remote_code=True,
             type='transformers.AutoTokenizer.from_pretrained'),
         type='xtuner.engine.hooks.DatasetInfoHook'),
 ]
+data_path = '/home/wangqun/data/layout_ocr_multi.json'
 dataloader_num_workers = 4
 default_hooks = dict(
     checkpoint=dict(
     cudnn_benchmark=False,
     dist_cfg=dict(backend='nccl'),
     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+image_folder = '/'
 launcher = 'none'
 llava_dataset = dict(
+    data_paths='/home/wangqun/data/layout_ocr_multi.json',
+    image_folders='/',
     max_length=8192,
+    model_path='/data/wangqun/models/InternVL2_5-2B',
     template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
     type='xtuner.dataset.InternVL_V1_5_Dataset')
 load_from = None
         target_modules=None,
         task_type='CAUSAL_LM',
         type='peft.LoraConfig'),
+    model_path='/data/wangqun/models/InternVL2_5-2B',
     quantization_llm=True,
     quantization_vit=False,
     type='xtuner.model.InternVL_V1_5')
         eta_min=0.0,
         type='mmengine.optim.CosineAnnealingLR'),
 ]
+path = '/data/wangqun/models/InternVL2_5-2B'
 prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.internlm2_chat'
 randomness = dict(deterministic=False, seed=None)
 resume = False
     train_micro_batch_size_per_gpu=1,
     type='xtuner.engine.DeepSpeedStrategy')
 tokenizer = dict(
+    pretrained_model_name_or_path='/data/wangqun/models/InternVL2_5-2B',
     trust_remote_code=True,
     type='transformers.AutoTokenizer.from_pretrained')
 train_cfg = dict(max_epochs=4, type='xtuner.engine.runner.TrainLoop')
     batch_size=1,
     collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'),
     dataset=dict(
+        data_paths='/home/wangqun/data/layout_ocr_multi.json',
+        image_folders='/',
         max_length=8192,
+        model_path='/data/wangqun/models/InternVL2_5-2B',
         template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
         type='xtuner.dataset.InternVL_V1_5_Dataset'),
     num_workers=4,
     ])
 warmup_ratio = 0.03
 weight_decay = 0.05
+work_dir = '/home/wangqun/work_dirs/internvl_ft_run_14_filter'

iter_1000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba0e9eb945a64ae450ac8e967acbb5285f57c31cae909afe67ec235207b99855
-size 301243842

 version https://git-lfs.github.com/spec/v1
+oid sha256:481e53fb7856747b4264e40d18add7468dac96f819e28cee759eca4a5e024ace
+size 301244482

iter_10000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7769493395d9ac7f55d2ca270d26be331773800f4f9a13c1a0ca6269a2e06f5e
-size 301911874

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfa8914dd504a4f929216c6c0ebff3ca41ef7ed02f50f09c721b8b92111089f5
+size 301919426

iter_11000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdabfae07367cc98118fcf09c3310059e18f468a7c518421cb1e86ec6eb69fba
-size 301985986

 version https://git-lfs.github.com/spec/v1
+oid sha256:be8212140d3eb14e0d0601c943b7dc8695addea3b9819b7692d19d35abc5f630
+size 301994178

iter_12000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb4d3d27576ac85df0c9616fc521ffc50b4dc79aca03b17c4f92a391f1998a74
-size 302060098

 version https://git-lfs.github.com/spec/v1
+oid sha256:be457d0e6cf6e2be1e41e8c419bb13fc91a8c09feed416d317afb220737d8dd5
+size 302069186

iter_13000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e55df320dee30c9c96d43502527d0f3b71f8fd255b3187a7e5eec11ec426ca31
-size 302134274

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb8f86fabdfe2cc0e9f46aab2a186b36484bfb8d152ecb2a1886d5759aad12c2
+size 302144194

iter_14000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8cd67b12f66e5699c07c9cc800b9018c6fd02be3667a93d8ea70572e18489d7c
-size 302208450

 version https://git-lfs.github.com/spec/v1
+oid sha256:566df872765cb8ca258be1e0a4bcab172724429008d51d2e5b84d86e279864ed
+size 302219202

iter_15000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5ba8ed14ae1a4f3c4eff2da4423f4e967f7c5c71911eb2d192c668882f08875
-size 302282690

 version https://git-lfs.github.com/spec/v1
+oid sha256:50adf58892fd870d5d7f8b3682ed8687349ac1a9b515b637dc96000cd8455d0a
+size 302294146

iter_16000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a08c742bdcadda633b8aa1b396266d4bdbdc221688c1910020123c20ed8b86d
-size 302356866

 version https://git-lfs.github.com/spec/v1
+oid sha256:13265f1440c0dccd0c36aa2bbe3282a23eaa58e6e9014a2162ce9d31a4ab196e
+size 302369090

iter_17000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:663a8de1f96ddb228e7e31cb8f3db4e56c4f62bd0c9327aacf280d43b7a78716
-size 302431042

 version https://git-lfs.github.com/spec/v1
+oid sha256:462de647258ce67f55c412156b3efd8dd1eb207acac1e7c18c3a71328b736151
+size 302444098

iter_18000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4db2450244c72bacd77b0e7bd28468134d9c534adbec5c2512bcee89c9fa669f
-size 302505218

 version https://git-lfs.github.com/spec/v1
+oid sha256:954491018e38ed55a4cf8277358c960e288fb84b7228a89f44b36f80643adbce
+size 302518978

iter_19000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78a79f979503ab7cf651d48187fbfda7255c87935a673c8219af8a89174281f6
-size 302578626

 version https://git-lfs.github.com/spec/v1
+oid sha256:62abfe06034f232dbce3f5b6e1d11ae1b83262335f5bc67a764c14fbc520921f
+size 302593154

iter_19176.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4eb7af7065d00e98ceddc5ce42b5635c915a93e3852c7fe5109d9f978d7af8e
+size 302606274

iter_2000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cf4c0bbf0bfec626773e96a2bb690a2dde372d508317f6ea3fbdc9b36ebf65f
-size 301318466

 version https://git-lfs.github.com/spec/v1
+oid sha256:a35968ff67ec88471423ef474217211f9fa39addedead8a86cfd19290c6a0037
+size 301319810

iter_3000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:125ac4fd4dfbfe8fc7bd2a7b91a95599a4351c3cf4bc9d52d885052393f7319b
-size 301392578

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2496d44620dcf04afb3df31c29853532c4fef4e75a2f651b811fa696f2bf171
+size 301394626

iter_4000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66e3aa886d88d9627aabd2ceea48050fbf757a548d7c857523e26ff58a4c797f
-size 301466882

 version https://git-lfs.github.com/spec/v1
+oid sha256:a235055e07709b1acc4e022ce322f1fa278396af64ff11a47c8ccd28348fc4ca
+size 301469698

iter_5000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5708634ddeeb502bfbfdc02a2a9359e577d40b8831892d344b42110588e41c0
-size 301540994

 version https://git-lfs.github.com/spec/v1
+oid sha256:adea6aa165836ef74883dbf62b91204872f44ac4fe42e24747f99caf68a4b189
+size 301544642

iter_6000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4188a9b675959b4df3c8037ac46868b433eae25ff03a9cc00c6f0cf9be9a59c6
-size 301615106

 version https://git-lfs.github.com/spec/v1
+oid sha256:23f6f730153c5f00af9a854ab9365e664777d1187edbc0d05be7c27e9c86ace0
+size 301619650

iter_7000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aad793ebcee9c4a7a263e8bf230df4583fcc7d0cc1e2ba235d9a0ccebba46c80
-size 301689282

 version https://git-lfs.github.com/spec/v1
+oid sha256:f015f9c0c64a53ea727ac17617475fcbc86cd384b4be0819787d84155c609006
+size 301694466

iter_8000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09c0652dba256c98fb28c5cdbb52de1f3d40b86828fb8e28bd2f28f24c6d486d
-size 301763458

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec56cd5e746d31071835fc4fba317c119de176ed1366b243d3a0f0c9a7a9f7a3
+size 301769474

iter_9000.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:362e09b3dbf898e8c30dd51064c406ecd7e5b14dcd01d9fcf21b5fd1929fe4e2
-size 301837634

 version https://git-lfs.github.com/spec/v1
+oid sha256:244c6e3d325c6caaa0dd1d8e9c9f0778dba51a9cc9ceaa3cc68e5391561723eb
+size 301844354

last_checkpoint CHANGED Viewed

	@@ -1 +1 @@
1	- /~~root~~/wangqun/work_dirs/~~internvl_ft_run_6_filter~~/~~iter_19224~~.pth


1	+ /home/wangqun/work_dirs/internvl_ft_run_14_filter/iter_19176.pth