huggingface · tengomucho · Aug 22, 2024 · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024
diff --git a/examples/language-modeling/gemma_tuning.ipynb b/examples/language-modeling/gemma_tuning.ipynb
@@ -118,6 +118,8 @@
    "outputs": [],
    "source": [
     "from optimum.tpu import fsdp_v2\n",
+    "\n",
+    "\n",
     "fsdp_v2.use_fsdp_v2()"
    ]
   },
@@ -141,6 +143,8 @@
    "outputs": [],
    "source": [
     "from datasets import load_dataset\n",
+    "\n",
+    "\n",
     "dataset = load_dataset(\"databricks/databricks-dolly-15k\", split=\"train\")"
    ]
   },
@@ -199,6 +203,8 @@
    "outputs": [],
    "source": [
     "from transformers import AutoTokenizer\n",
+    "\n",
+    "\n",
     "model_id = \"google/gemma-2b\"\n",
     "\n",
     "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
@@ -249,8 +255,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from optimum.tpu import AutoModelForCausalLM\n",
-    "model = AutoModelForCausalLM.from_pretrained(model_id, use_cache=False)"
+    "import torch\n",
+    "from transformers import AutoModelForCausalLM\n",
+    "\n",
+    "\n",
+    "model = AutoModelForCausalLM.from_pretrained(model_id, use_cache=False, torch_dtype=torch.bfloat16)"
    ]
   },
   {
@@ -270,6 +279,7 @@
    "source": [
     "from peft import LoraConfig\n",
     "\n",
+    "\n",
     "# Set up PEFT LoRA for fine-tuning.\n",
     "lora_config = LoraConfig(\n",
     "    r=8,\n",
@@ -293,8 +303,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from trl import SFTTrainer\n",
     "from transformers import TrainingArguments\n",
+    "from trl import SFTTrainer\n",
+    "\n",
     "\n",
     "# Set up the FSDP arguments\n",
     "fsdp_training_args = fsdp_v2.get_fsdp_training_args(model)\n",

diff --git a/examples/language-modeling/llama_tuning.md b/examples/language-modeling/llama_tuning.md
@@ -47,14 +47,13 @@ Then, the tokenizer and model need to be loaded. We will choose [`meta-llama/Met
 
 ```python
 import torch
-from transformers import AutoTokenizer
-from optimum.tpu import AutoModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM
 
 model_id = "meta-llama/Meta-Llama-3-8B"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Add custom token for padding Llama
 tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
-model = AutoModelForCausalLM.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
 ```
 
 To tune the model with the [Abirate/english_quotes](https://huggingface.co/datasets/Abirate/english_quotes) dataset, you can load it and obtain the `quote` column:

diff --git a/optimum/tpu/fsdp_v2.py b/optimum/tpu/fsdp_v2.py
@@ -82,9 +82,11 @@ def get_fsdp_training_args(model: PreTrainedModel) -> Dict:
     model_type = model.config.model_type
     matched_model = False
     if model_type == "gemma":
+        from transformers import GemmaForCausalLM as HFGemmaForCausalLLM
+
         from .modeling_gemma import GemmaForCausalLM
 
-        if isinstance(model, GemmaForCausalLM):
+        if isinstance(model, GemmaForCausalLM) or isinstance(model, HFGemmaForCausalLLM):
             logger = logging.get_logger(__name__)
             from torch_xla import __version__ as xla_version
             if xla_version == "2.3.0":
@@ -94,9 +96,11 @@ def get_fsdp_training_args(model: PreTrainedModel) -> Dict:
             cls_to_wrap = "GemmaDecoderLayer"
             matched_model = True
     elif model_type == "llama":
+        from transformers import LlamaForCausalLM as HFLlamaForCausalLLM
+
         from .modeling_llama import LlamaForCausalLM
 
-        if isinstance(model, LlamaForCausalLM):
+        if isinstance(model, LlamaForCausalLM) or isinstance(model, HFLlamaForCausalLLM):
             cls_to_wrap = "LlamaDecoderLayer"
             matched_model = True