I made two wrapper functions for retrieving from the web with a specific number of retries and the amount of timeout
class DownloadUnsuccessful(socket.timeout):
pass
def web2soup(url, tries=9, timeout=30, sleepBetween=1):
failures = 0
while True:
if failures == tries:
raise DownloadUnsuccessful()
try:
with urllib.request.urlopen(url, timeout=timeout) as con:
content = con.read().decode('utf-8')
break
except urllib.error.HTTPError:
raise DownloadUnsuccessful()
except urllib.error.URLError:
time.sleep(sleepBetween)
except TimeoutError:
pass
except socket.timeout:
pass
failures += 1
soup = BeautifulSoup(content, 'html.parser')
return soup
def web2file(url, filePath, tries=9, timeout=30, sleepBetween=1, tempExt='.temporary_filename'):
tempPath = filePath + tempExt
failures = 0
while True:
if failures == tries:
try:
os.remove(tempPath)
except:
pass
raise DownloadUnsuccessful()
try:
socket.setdefaulttimeout(timeout)
urllib.request.urlretrieve(url, tempPath)
break
except urllib.error.HTTPError:
raise DownloadUnsuccessful()
except urllib.error.URLError:
time.sleep(sleepBetween)
except TimeoutError:
pass
except socket.timeout:
pass
fileExt = os.path.splitext(url)[1]
filePath = filePath+fileExt
os.rename(tempPath, filePath)
return filePath
This way i can just call them and know what exception to expect if something goes wrong.