Skip to content

[Bug] Deepseek OCR finetune failed #3674

@jil-olala

Description

@jil-olala

Enviroment:
Ubuntu 22.04
conda

Program

from unsloth import FastVisionModel # FastLanguageModel for LLMs
import torch
from transformers import AutoModel
import os
# os.environ["UNSLOTH_WARN_UNINITIALIZED"] = '1'

model, tokenizer = FastVisionModel.from_pretrained(
    "./deepseek_ocr", 
    load_in_4bit = False, # Use 4bit to reduce memory use. False for 16bit LoRA.
    auto_model = AutoModel,
    trust_remote_code=True,
    unsloth_force_compile=True,
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for long context
    # token = "111"
)

from datasets import load_dataset
dataset = load_dataset("hezarai/parsynth-ocr-200k", split = "train[:2000]")

prompt = """<image>\n# Free OCR"

image_file = '/home/dataset/2.png'
output_path = '/output'
# infer(self, tokenizer, prompt='', image_file='', output_path = ' ', base_size = 1024, image_size = 640, crop_mode = True, test_compress = False, save_results = False):

# Tiny: base_size = 512, image_size = 512, crop_mode = False
# Small: base_size = 640, image_size = 640, crop_mode = False
# Base: base_size = 1024, image_size = 1024, crop_mode = False
# Large: base_size = 1280, image_size = 1280, crop_mode = False

# Gundam: base_size = 1024, image_size = 640, crop_mode = True

res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 640, crop_mode=True, save_results = True, test_compress = False)

print(res)
print(dataset[0]["text"])

Log

Torch: 2.9.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
"-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.
You are using a model of type deepseek_vl_v2 to instantiate a model of type DeepseekOCR. This is not supported for all configurations of models and can yield errors.
You are using a model of type deepseek_vl_v2 to instantiate a model of type DeepseekOCR. This is not supported for all configurations of models and can yield errors.
Traceback (most recent call last):
 File "/home/ds_ocr/fintune.py", line 12, in <module>
   model, tokenizer = FastVisionModel.from_pretrained(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 File "/root/miniconda3/envs/unsloth/lib/python3.12/site-packages/unsloth/models/loader.py", line 1154, in from_pretrained
   model, tokenizer = FastBaseModel.from_pretrained(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 File "/root/miniconda3/envs/unsloth/lib/python3.12/site-packages/unsloth/models/vision.py", line 661, in from_pretrained
   model = auto_model.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 File "/root/miniconda3/envs/unsloth/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py", line 597, in from_pretrained
   return model_class.from_pretrained(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 File "/root/miniconda3/envs/unsloth/lib/python3.12/site-packages/transformers/modeling_utils.py", line 277, in _wrapper
   return func(*args, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^
 File "/root/miniconda3/envs/unsloth/lib/python3.12/site-packages/transformers/modeling_utils.py", line 5048, in from_pretrained
   ) = cls._load_pretrained_model(
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 File "/root/miniconda3/envs/unsloth/lib/python3.12/site-packages/transformers/modeling_utils.py", line 5535, in _load_pretrained_model
   logger.warning(
 File "/root/miniconda3/envs/unsloth/lib/python3.12/logging/__init__.py", line 1551, in warning
   self._log(WARNING, msg, args, **kwargs)
 File "/root/miniconda3/envs/unsloth/lib/python3.12/logging/__init__.py", line 1684, in _log
   self.handle(record)
 File "/root/miniconda3/envs/unsloth/lib/python3.12/logging/__init__.py", line 1700, in handle
   self.callHandlers(record)
 File "/root/miniconda3/envs/unsloth/lib/python3.12/logging/__init__.py", line 1762, in callHandlers
   hdlr.handle(record)
 File "/root/miniconda3/envs/unsloth/lib/python3.12/logging/__init__.py", line 1028, in handle
   self.emit(record)
 File "/root/miniconda3/envs/unsloth/lib/python3.12/site-packages/unsloth/models/_utils.py", line 434, in emit
   raise Exception(
Exception: Unsloth: Critical error since some weights are not initialized.
Please try updating Unsloth, transformers and timm via:
`pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo transformers timm`
<LogRecord: transformers.modeling_utils, 30, /root/miniconda3/envs/unsloth/lib/python3.12/site-packages/transformers/modeling_utils.py, 5535, "Some weights of DeepseekOCRForCausalLM were not initialized from the model checkpoint at ./deepseek_ocr and are newly initialized: ['model.vision_model.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.">
pip list
Package                  Version
------------------------ ------------
accelerate               1.12.0
addict                   2.4.0
aiohappyeyeballs         2.6.1
aiohttp                  3.13.2
aiosignal                1.4.0
anyio                    4.12.0
attrs                    25.4.0
bitsandbytes             0.48.2
certifi                  2025.11.12
charset-normalizer       3.4.4
click                    8.3.1
contourpy                1.3.3
cut-cross-entropy        25.1.1
cycler                   0.12.1
datasets                 4.3.0
diffusers                0.35.2
dill                     0.4.0
docstring_parser         0.17.0
easydict                 1.13
einops                   0.8.1
filelock                 3.20.0
fonttools                4.61.0
frozenlist               1.8.0
fsspec                   2025.9.0
h11                      0.16.0
hf_transfer              0.1.9
hf-xet                   1.2.0
httpcore                 1.0.9
httpx                    0.28.1
huggingface-hub          0.36.0
idna                     3.11
importlib_metadata       8.7.0
Jinja2                   3.1.6
jiwer                    4.0.0
kiwisolver               1.4.9
markdown-it-py           4.0.0
MarkupSafe               3.0.3
matplotlib               3.10.7
mdurl                    0.1.2
mpmath                   1.3.0
msgspec                  0.20.0
multidict                6.7.0
multiprocess             0.70.16
networkx                 3.6
numpy                    2.3.5
nvidia-cublas-cu12       12.8.4.1
nvidia-cuda-cupti-cu12   12.8.90
nvidia-cuda-nvrtc-cu12   12.8.93
nvidia-cuda-runtime-cu12 12.8.90
nvidia-cudnn-cu12        9.10.2.21
nvidia-cufft-cu12        11.3.3.83
nvidia-cufile-cu12       1.13.1.3
nvidia-curand-cu12       10.3.9.90
nvidia-cusolver-cu12     11.7.3.90
nvidia-cusparse-cu12     12.5.8.93
nvidia-cusparselt-cu12   0.7.1
nvidia-nccl-cu12         2.27.5
nvidia-nvjitlink-cu12    12.8.93
nvidia-nvshmem-cu12      3.3.20
nvidia-nvtx-cu12         12.8.90
packaging                25.0
pandas                   2.3.3
peft                     0.18.0
pillow                   12.0.0
pip                      25.3
propcache                0.4.1
protobuf                 6.33.1
psutil                   7.1.3
pyarrow                  22.0.0
Pygments                 2.19.2
pyparsing                3.2.5
python-dateutil          2.9.0.post0
pytz                     2025.2
PyYAML                   6.0.3
RapidFuzz                3.14.3
regex                    2025.11.3
requests                 2.32.5
rich                     14.2.0
safetensors              0.7.0
sentencepiece            0.2.1
setuptools               80.9.0
shtab                    1.8.0
six                      1.17.0
sympy                    1.14.0
timm                     1.0.22
tokenizers               0.22.1
torch                    2.9.0
torchao                  0.14.1
torchvision              0.24.0
tqdm                     4.67.1
transformers             4.57.3
triton                   3.5.0
trl                      0.22.2
typeguard                4.4.4
typing_extensions        4.15.0
tyro                     0.9.35
tzdata                   2025.2
unsloth                  2025.11.6
unsloth_zoo              2025.11.6
urllib3                  2.5.0
wheel                    0.45.1
xformers                 0.0.33.post1
xxhash                   3.6.0
yarl                     1.22.0
zipp                     3.23.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions