C

I am trying to run following code that reported running well with other users, but I found this error.

-- coding: utf-8 --

Import the Stuff

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

import cv2

import numpy as np

import csv

Step1: Read from the log file

samples = []
with open('data/driving_log.csv') as csvfile:
    reader = csv.reader(csvfile)
    next(reader, None)
    for line in reader:
        samples.append(line)

Step2: Divide the data into training set and validation set

train_len = int(0.8*len(samples))
valid_len = len(samples) - train_len
train_samples, validation_samples = data.random_split(samples, lengths=[train_len, valid_len])

Step3a: Define the augmentation, transformation processes, parameters and dataset for dataloader

def augment(imgName, angle):
  name = 'data/IMG/' + imgName.split('/')[-1]
  current_image = cv2.imread(name)
  current_image = current_image[65:-25, :, :]
  if np.random.rand() < 0.5:
    current_image = cv2.flip(current_image, 1)
    angle = angle * -1.0  
  return current_image, angle

class Dataset(data.Dataset):

    def __init__(self, samples, transform=None):

        self.samples = samples
        self.transform = transform

    def __getitem__(self, index):
      
        batch_samples = self.samples[index]
        
        steering_angle = float(batch_samples[3])
        
        center_img, steering_angle_center = augment(batch_samples[0], steering_angle)
        left_img, steering_angle_left = augment(batch_samples[1], steering_angle + 0.4)
        right_img, steering_angle_right = augment(batch_samples[2], steering_angle - 0.4)

        center_img = self.transform(center_img)
        left_img = self.transform(left_img)
        right_img = self.transform(right_img)

        return (center_img, steering_angle_center), (left_img, steering_angle_left), (right_img, steering_angle_right)
      
    def __len__(self):
        return len(self.samples)

Step3b: Creating generator using the dataloader to parallasize the process

transformations = transforms.Compose([transforms.Lambda(lambda x: (x / 255.0) - 0.5)])

params = {'batch_size': 32,
          'shuffle': True,
          'num_workers': 4}

training_set = Dataset(train_samples, transformations)
training_generator = data.DataLoader(training_set, **params)

validation_set = Dataset(validation_samples, transformations)
validation_generator = data.DataLoader(validation_set, **params)

Step4: Define the network

class NetworkDense(nn.Module):

def __init__(self):
    super(NetworkDense, self).__init__()
    self.conv_layers = nn.Sequential(
        nn.Conv2d(3, 24, 5, stride=2),
        nn.ELU(),
        nn.Conv2d(24, 36, 5, stride=2),
        nn.ELU(),
        nn.Conv2d(36, 48, 5, stride=2),
        nn.ELU(),
        nn.Conv2d(48, 64, 3),
        nn.ELU(),
        nn.Conv2d(64, 64, 3),
        nn.Dropout(0.25)
    )
    self.linear_layers = nn.Sequential(
        nn.Linear(in_features=64 * 2 * 33, out_features=100),
        nn.ELU(),
        nn.Linear(in_features=100, out_features=50),
        nn.ELU(),
        nn.Linear(in_features=50, out_features=10),
        nn.Linear(in_features=10, out_features=1)
    )
    
def forward(self, input):  
    input = input.view(input.size(0), 3, 70, 320)
    output = self.conv_layers(input)
    output = output.view(output.size(0), -1)
    output = self.linear_layers(output)
    return output


class NetworkLight(nn.Module):

def __init__(self):
    super(NetworkLight, self).__init__()
    self.conv_layers = nn.Sequential(
        nn.Conv2d(3, 24, 3, stride=2),
        nn.ELU(),
        nn.Conv2d(24, 48, 3, stride=2),
        nn.MaxPool2d(4, stride=4),
        nn.Dropout(p=0.25)
    )
    self.linear_layers = nn.Sequential(
        nn.Linear(in_features=48*4*19, out_features=50),
        nn.ELU(),
        nn.Linear(in_features=50, out_features=10),
        nn.Linear(in_features=10, out_features=1)
    )
    

def forward(self, input):
    input = input.view(input.size(0), 3, 70, 320)
    output = self.conv_layers(input)
    output = output.view(output.size(0), -1)
    output = self.linear_layers(output)
    return output

Step5: Define optimizer

model = NetworkLight()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

criterion = nn.MSELoss()

Step6: Check the device and define function to move tensors to that device

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
print('device is: ', device)

def toDevice(datas, device):
  
  imgs, angles = datas
  return imgs.float().to(device), angles.float().to(device)

Step7: Train and validate network based on maximum epochs defined

max_epochs = 22

for epoch in range(max_epochs):
    
    model.to(device)
    
    # Training
    train_loss = 0
    model.train()
    for local_batch, (centers, lefts, rights) in enumerate(training_generator):
        # Transfer to GPU
        centers, lefts, rights = toDevice(centers, device), toDevice(lefts, device), toDevice(rights, device)
        
        # Model computations
        optimizer.zero_grad()
        datas = [centers, lefts, rights]        
        for data in datas:
            imgs, angles = data
#             print("training image: ", imgs.shape)
            outputs = model(imgs)
            loss = criterion(outputs, angles.unsqueeze(1))
            loss.backward()
            optimizer.step()

            train_loss += loss.data[0].item()
            
        if local_batch % 100 == 0:
            print('Loss: %.3f '
                 % (train_loss/(local_batch+1)))

    
    # Validation
    model.eval()
    valid_loss = 0
    with torch.set_grad_enabled(False):
        for local_batch, (centers, lefts, rights) in enumerate(validation_generator):
            # Transfer to GPU
            centers, lefts, rights = toDevice(centers, device), toDevice(lefts, device), toDevice(rights, device)
        
            # Model computations
            optimizer.zero_grad()
            datas = [centers, lefts, rights]        
            for data in datas:
                imgs, angles = data
#                 print("Validation image: ", imgs.shape)
                outputs = model(imgs)
                loss = criterion(outputs, angles.unsqueeze(1))
                
                valid_loss += loss.data[0].item()

            if local_batch % 100 == 0:
                print('Valid Loss: %.3f '
                     % (valid_loss/(local_batch+1)))

Step8: Define state and save the model wrt to state

state = {
        'model': model.module if device == 'cuda' else model,
        }

torch.save(state, 'model.h5')

this is the error message:

"D:\VICO\Back up\venv\Scripts\python.exe" "D:/VICO/Back up/venv/Scripts/self_driving_car.py"
device is:  cpu
Traceback (most recent call last):
  File "D:/VICO/Back up/venv/Scripts/self_driving_car.py", line 163, in <module>
    for local_batch, (centers, lefts, rights) in enumerate(training_generator):
  File "D:\VICO\Back up\venv\lib\site-packages\torch\utils\data\dataloader.py", line 291, in __iter__
    return _MultiProcessingDataLoaderIter(self)
  File "D:\VICO\Back up\venv\lib\site-packages\torch\utils\data\dataloader.py", line 737, in __init__
    w.start()
  File "C:\Users\isonata\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 112, in start
    self._popen = self._Popen(self)
  File "C:\Users\isonata\AppData\Local\Programs\Python\Python37\lib\multiprocessing\context.py", line 223, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "C:\Users\isonata\AppData\Local\Programs\Python\Python37\lib\multiprocessing\context.py", line 322, in _Popen
    return Popen(process_obj)
  File "C:\Users\isonata\AppData\Local\Programs\Python\Python37\lib\multiprocessing\popen_spawn_win32.py", line 89, in __init__
    reduction.dump(process_obj, to_child)
  File "C:\Users\isonata\AppData\Local\Programs\Python\Python37\lib\multiprocessing\reduction.py", line 60, in dump
    ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <function <lambda> at 0x0000002F2175B048>: attribute lookup <lambda> on __main__ failed

Process finished with exit code 1

I am not sure the next step to resolve the problem.

Craniate answered 14/10, 2020 at 5:34 Comment(1)

Use state = {'model' : model.state_dict()}, followed by model.load_state_dict(...) when you load – Linetta 14/10, 2020 at 5:41

N

8

pickle doesn't pickle function objects. It expects to find the function object by importing its module and looking up its name. lambdas are anonymous functions (no name) so that doesn't work. The solution is to name the function at module level. The only lambda I found in your code is

transformations = transforms.Compose([transforms.Lambda(lambda x: (x / 255.0) - 0.5)])

Assuming that's the troublesome function, you can

def _my_normalization(x):
    return x/255.0 - 0.5

transformations = transforms.Compose([transforms.Lambda(_my_normalization])

You may have other problems because it looks like you are doing work at module level. If this is a multiprocessing thing and you are running on windows, the new process will import the file and run all of that module level code again. This isn't a problem on linux/mac where a forked process already has the modules loaded from the parent.

Nicolella answered 14/10, 2020 at 5:55 Comment(4)

Yes, actually I have another problem with the multiprocessing."RuntimeError: An attempt has been made to start a new process before the current process has finished its bootstrapping phase. This probably means that you are not using fork to start your child processes and you have forgotten to use the proper idiom in the main module: if name == 'main': freeze_support() ... The "freeze_support()" line can be omitted if the program is not going to be frozen to produce an executable. – Craniate 14/10, 2020 at 7:29

Hi tdelaney, any Idea to solve this problem? thanks – Craniate 14/10, 2020 at 14:5

It sounds like you are running on windows. In windows, modules are reimported in the child processes. Anything at module level runs in the child, in your case that includes the code that creates subprocesses, leading to an infinite spawning of processes. Use if __name__ == "__main__": in your main script. Just search "RuntimeError: An attempt has been made to start a new process" and you'll get a hundred hits. – Nicolella 14/10, 2020 at 16:26

Thanks tdelaney, I put if name == "main": and main() at step 7 and everything work well. – Craniate 15/10, 2020 at 4:59

E

3

params = {'batch_size': 32,
          'shuffle': True,
          'num_workers': 4}

Change 'num_workers': 0. This worked for me.

I don't know why, but sometimes setting num_workers value as non-zero value makes some errors.

Edson answered 18/4 at 14:35 Comment(0)