I am using JupyterLab
on AWS SageMaker
. Kernel: conda_pytorch_latest_p36
.
I have successfully performed training.
Now, I attempt to set up the model for predictions, i.e. testing.
I suspect last.ckpt
file is corrupt; as it fails on line:
model = OntologyTaggerModel.load_from_checkpoint('last.ckpt.2cCC2f52', map_location=torch.device(device), from_checkpoint=True)
Where does last.ckpt
file come from - BERT download or my own model definition?
How do I regenerate it?
Update: I was able to re-generate it: last.ckpt.E342d53e
.
Run model load with last.ckpt.**E342d53e**
:
RuntimeError: [enforce fail at inline_container.cc:145] . PytorchStreamReader failed reading zip archive: failed finding central directory
Run model load with last.ckpt
(without unique string in filename):
FileNotFoundError: [Errno 2] No such file or directory: '/home/ec2-user/SageMaker/last.ckpt'
I launched a new AWS SageMaker
instance without luck.
Suspect Code (2nd last line):
def get_device():
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
return device
def preprocess(input, preprocessor):
result = [torch.tensor(preprocessor.tokenise(i)).unsqueeze(dim=0) for i in input]
result = torch.cat(result)
return result
def predict_fn(input, model_artifacts):
preprocessor, model, label_mapper = model_artifacts
# Pre-process
input_tensor = preprocess(input, preprocessor)
# Copy input to gpu if available
device = get_device()
input_tensor = input_tensor.to(device=device)
# Invoke
model.eval()
classes = []
probs = []
with torch.no_grad():
output_tensors = model(input_tensor)[1]
# Convert to probabilities
softmax = torch.nn.Softmax()
for class_index, output_tensor in enumerate(output_tensors):
output_tensor = softmax(output_tensor)
prob, predictions = torch.max(output_tensor, dim=1)
classes.append(label_mapper.reverse_map(predictions, class_index))
probs.append(prob)
classes = [c for c in zip(*classes)]
probs = [c for c in zip(*probs)]
return classes, probs
device = get_device()
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
label_mapper = LabelMapper('classes.txt')
model = OntologyTaggerModel.load_from_checkpoint('last.ckpt.2cCC2f52', map_location=torch.device(device), from_checkpoint=True) # CRASH !
model = model.to(device)
Traceback:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-8-ba98e0974205> in <module>
36 tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
37 label_mapper = LabelMapper('classes.txt')
---> 38 model = OntologyTaggerModel.load_from_checkpoint('last.ckpt.2cCC2f52', map_location=torch.device(device), from_checkpoint=True)
39 model = model.to(device)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/core/saving.py in load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
131 """
132 if map_location is not None:
--> 133 checkpoint = pl_load(checkpoint_path, map_location=map_location)
134 else:
135 checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/utilities/cloud_io.py in load(path_or_url, map_location)
44 fs = get_filesystem(path_or_url)
45 with fs.open(path_or_url, "rb") as f:
---> 46 return torch.load(f, map_location=map_location)
47
48
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
585 # reset back to the original position.
586 orig_position = opened_file.tell()
--> 587 with _open_zipfile_reader(opened_file) as opened_zipfile:
588 if _is_torchscript_zip(opened_zipfile):
589 warnings.warn("'torch.load' received a zip file that looks like a TorchScript archive"
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/serialization.py in __init__(self, name_or_buffer)
240 class _open_zipfile_reader(_opener):
241 def __init__(self, name_or_buffer) -> None:
--> 242 super(_open_zipfile_reader, self).__init__(torch._C.PyTorchFileReader(name_or_buffer))
243
244
RuntimeError: [enforce fail at inline_container.cc:145] . PytorchStreamReader failed reading zip archive: failed finding central directory
Please let me know if I should add anything else.
last.ckpt
from ? The training code is more important than the inference code. – Phenanthrene