How to fix TypeError: vars() argument must have __dict__ attribute for huggingface (PyTorch version) Trainer?
Asked Answered
K

0

8

I am trying to fine-tune a pre-trained model from huggingface (PyTorch version). I am using my custom dataset.

This is my code for my custom dataset

from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch
import numpy as np

data = datasets.ImageFolder(root=img_dir)

print(data.classes)

class MyDataset(Dataset):
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform
        
    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)
        return x, y
        
    def __len__(self):
        return len(self.subset)

train_size = int(0.8 * len(data))
validation_size = int((len(data) - train_size) * 0.5)
test_size = int((len(data) - train_size) * 0.5)
train_dataset, validation_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, validation_size, test_size])

train = MyDataset(train_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
test = MyDataset(test_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
validation = MyDataset(validation_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))

This is my code for fine-tuning pre-trained model from huggingface transformers.

from transformers import AutoFeatureExtractor
# feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
batch_size = 16
# Defining training arguments (set push_to_hub to false if you don't want to upload it to HuggingFace's model hub)
training_args = training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")

# Instantiate the Trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train,
    eval_dataset=validation
)

trainer.train()

When I run the code I am getting this error

TypeError                                 Traceback (most recent call last)
C:\Users\SHANTA~1\AppData\Local\Temp/ipykernel_2320/4032920361.py in <module>
----> 1 trainer.train()

c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1315             self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
   1316         )
-> 1317         return inner_training_loop(
   1318             args=args,
   1319             resume_from_checkpoint=resume_from_checkpoint,

c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   1526 
   1527             step = -1
-> 1528             for step, inputs in enumerate(epoch_iterator):
   1529 
   1530                 # Skip past any already trained steps if resuming training

c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
    519             if self._sampler_iter is None:
    520                 self._reset()
--> 521             data = self._next_data()
    522             self._num_yielded += 1
    523             if self._dataset_kind == _DatasetKind.Iterable and \
...
--> 106         features = [vars(f) for f in features]
    107     first = features[0]
    108     batch = {}

TypeError: vars() argument must have __dict__ attribute

Can anyone help me out please? Thanks !!

Kurgan answered 2/7, 2022 at 21:39 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.