Skip to content

[Bug] Vicuna chat template #3667

@james5635

Description

@james5635
from unsloth import FastLanguageModel
import torch
from accelerate import PartialState

fourbit_models = [
    "unsloth/granite-4.0-micro",
    "unsloth/granite-4.0-h-micro",
    "unsloth/granite-4.0-h-tiny",
    "unsloth/granite-4.0-h-small",

    # Base pretrained Granite 4 models
    "unsloth/granite-4.0-micro-base",
    "unsloth/granite-4.0-h-micro-base",
    "unsloth/granite-4.0-h-tiny-base",
    "unsloth/granite-4.0-h-small-base",

    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/Phi-4",
    "unsloth/Llama-3.1-8B",
    "unsloth/Llama-3.2-3B",
    "unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit" # [NEW] We support TTS models!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    # model_name = "unsloth/Llama-3.2-3B-Instruct",
    # model_name = "unsloth/Llama-3.2-1B-Instruct",
    # model_name = "mistralai/Mistral-7B-Instruct-v0.3",
    # model_name = "unsloth/gemma-3-270m-it",
    # model_name = "unsloth/granite-4.0-h-350m",
    model_name = "unsloth/mistral-7b-instruct-v0.3",
    max_seq_length = 2048,   # Choose any for long context!
    load_in_4bit = False,    # 4 bit quantization to reduce memory
    load_in_8bit = False,    # [NEW!] A bit more accurate, uses 2x memory
    # load_in_16bit = True,
    full_finetuning = False, # [NEW!] We have full finetuning now!
    device_map="balanced" # error when using with mamba_ssm
    # device_map={"": PartialState().local_process_index},
)

from unsloth.chat_templates import CHAT_TEMPLATES
print(list(CHAT_TEMPLATES.keys()))

from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    # chat_template = "llama-3.1",
    # chat_template = "mistral",
    # chat_template = "gemma3",
    # chat_template = "unsloth",
    # chat_template = "phi-3",
    chat_template = "vicuna",
    # chat_template = "vicuna_old",
    # chat_template = "chatml",
    # chat_template = "alpaca",
    map_eos_token = True
)
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
def make_chatml(example):
    return {
        "conversations": [[
            # {"role": "system", "content": content + "\n\n" + summary + "\n\n"},
            {"role": "user", "content": qa["question"] if qa['question'] != None else ""}, # There are 2 question=None in the dataset
            {"role": "assistant", "content": qa["answer"]}
        ] for content, summary, qas in zip(example['content'], example['summary'], example['QAs']) for qa in qas]
    }
from datasets import load_dataset

dataset = load_dataset( ... , split = "train")
dataset = dataset.map(make_chatml, batched=True, remove_columns=['summary', 'qa_pairs', 'content', 'QAs'])
dataset = dataset.map(formatting_prompts_func, batched = True,)

print(dataset[0]["text"])
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.11.6: Fast Mistral patching. Transformers: 4.57.2.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
model.safetensors.index.json: 23.9kB [00:00, 96.8MB/s]
model-00001-of-00003.safetensors: 100%|█████| 4.95G/4.95G [00:13<00:00, 366MB/s]
model-00002-of-00003.safetensors: 100%|█████| 5.00G/5.00G [00:19<00:00, 251MB/s]
model-00003-of-00003.safetensors: 100%|█████| 4.55G/4.55G [00:18<00:00, 250MB/s]
Loading checkpoint shards: 100%|██████████████████| 3/3 [00:15<00:00,  5.14s/it]
generation_config.json: 100%|██████████████████| 157/157 [00:00<00:00, 1.31MB/s]
tokenizer_config.json: 141kB [00:00, 37.9MB/s]
tokenizer.model: 100%|███████████████████████| 587k/587k [00:00<00:00, 2.76MB/s]
special_tokens_map.json: 100%|█████████████████| 446/446 [00:00<00:00, 4.94MB/s]
tokenizer.json: 1.96MB [00:00, 24.9MB/s]
Unsloth: You added custom modules, but Unsloth hasn't optimized for this.
Beware - your finetuning might be noticeably slower!
Unsloth: You added custom modules, but Unsloth hasn't optimized for this.
Beware - your finetuning might be noticeably slower!
Unsloth 2025.11.6 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.
['unsloth', 'zephyr', 'chatml', 'mistral', 'llama', 'vicuna', 'vicuna_old', 'vicuna old', 'alpaca', 'gemma', 'gemma_chatml', 'gemma2', 'gemma2_chatml', 'llama-3', 'llama3', 'phi-3', 'phi-35', 'phi-3.5', 'llama-3.1', 'llama-31', 'llama-3.2', 'llama-3.3', 'llama-32', 'llama-33', 'qwen-2.5', 'qwen-25', 'qwen25', 'qwen2.5', 'phi-4', 'gemma-3', 'gemma3', 'qwen-3', 'qwen3', 'gemma-3n', 'gemma3n', 'gpt-oss', 'gptoss', 'qwen3-instruct', 'qwen3-thinking', 'lfm-2', 'starling', 'yi-chat']
Map:   0%|                                    | 0/126235 [00:00<?, ? examples/s]
Traceback (most recent call last):
  File "/kaggle/working/load_model.py", line 108, in <module>
    dataset = dataset.map(formatting_prompts_func, batched = True,)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
                                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/datasets/arrow_dataset.py", line 3332, in map
    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
                               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/datasets/arrow_dataset.py", line 3688, in _map_single
    for i, batch in iter_outputs(shard_iterable):
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/datasets/arrow_dataset.py", line 3638, in iter_outputs
    yield i, apply_function(example, i, offset=offset)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/datasets/arrow_dataset.py", line 3561, in apply_function
    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/load_model.py", line 94, in formatting_prompts_func
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/transformers/tokenization_utils_base.py", line 1667, in apply_chat_template
    rendered_chat, generation_indices = render_jinja_template(
                                        ^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/transformers/utils/chat_template_utils.py", line 482, in render_jinja_template
    compiled_template = _compile_jinja_template(chat_template)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/transformers/utils/chat_template_utils.py", line 463, in _compile_jinja_template
    return jinja_env.from_string(chat_template)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/jinja2/environment.py", line 1111, in from_string
    return cls.from_code(self, self.compile(source), gs, None)
                               ^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/.venv/lib/python3.12/site-packages/jinja2/environment.py", line 771, in compile
    self.handle_exception(source=source_hint)
  File "/kaggle/working/.venv/lib/python3.12/site-packages/jinja2/environment.py", line 942, in handle_exception
    raise rewrite_traceback_stack(source=source)
  File "<unknown>", line 1, in template
jinja2.exceptions.TemplateSyntaxError: expected token 'end of print statement', got 's'

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions