I am trying to fine-tune a pre-trained model from huggingface (PyTorch version). I am using my custom dataset.
This is my code for my custom dataset
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch
import numpy as np
data = datasets.ImageFolder(root=img_dir)
print(data.classes)
class MyDataset(Dataset):
def __init__(self, subset, transform=None):
self.subset = subset
self.transform = transform
def __getitem__(self, index):
x, y = self.subset[index]
if self.transform:
x = self.transform(x)
return x, y
def __len__(self):
return len(self.subset)
train_size = int(0.8 * len(data))
validation_size = int((len(data) - train_size) * 0.5)
test_size = int((len(data) - train_size) * 0.5)
train_dataset, validation_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, validation_size, test_size])
train = MyDataset(train_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
test = MyDataset(test_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
validation = MyDataset(validation_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
This is my code for fine-tuning pre-trained model from huggingface transformers.
from transformers import AutoFeatureExtractor
# feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
batch_size = 16
# Defining training arguments (set push_to_hub to false if you don't want to upload it to HuggingFace's model hub)
training_args = training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
# Instantiate the Trainer object
trainer = Trainer(
model=model,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train,
eval_dataset=validation
)
trainer.train()
When I run the code I am getting this error
TypeError Traceback (most recent call last)
C:\Users\SHANTA~1\AppData\Local\Temp/ipykernel_2320/4032920361.py in <module>
----> 1 trainer.train()
c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1315 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1316 )
-> 1317 return inner_training_loop(
1318 args=args,
1319 resume_from_checkpoint=resume_from_checkpoint,
c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1526
1527 step = -1
-> 1528 for step, inputs in enumerate(epoch_iterator):
1529
1530 # Skip past any already trained steps if resuming training
c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
519 if self._sampler_iter is None:
520 self._reset()
--> 521 data = self._next_data()
522 self._num_yielded += 1
523 if self._dataset_kind == _DatasetKind.Iterable and \
...
--> 106 features = [vars(f) for f in features]
107 first = features[0]
108 batch = {}
TypeError: vars() argument must have __dict__ attribute
Can anyone help me out please? Thanks !!