dataset update
This commit is contained in:
parent
4730204816
commit
136eddef07
4
hft.py
4
hft.py
|
|
@ -2,7 +2,7 @@ import os
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
|
||||
from datasets import load_dataset
|
||||
from datasets import Dataset
|
||||
from PIL import Image
|
||||
import re
|
||||
import pytesseract
|
||||
|
|
@ -112,7 +112,7 @@ model = CustomModel.from_pretrained(model_name)
|
|||
# Przygotowanie datasetu
|
||||
catalog_path = "file_catalog.json"
|
||||
data = prepare_dataset("files", catalog_path)
|
||||
dataset = load_dataset("dict", data=data)
|
||||
dataset = Dataset.from_list(data)
|
||||
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names)
|
||||
|
||||
# Konfiguracja treningu
|
||||
|
|
|
|||
Loading…
Reference in New Issue