from difflib import *
import urllib.request,urllib.parse,urllib.error
from urllib.parse import unquote
import time
import pdb
try:
file2 = urllib.request.Request('site goes here')
file2.add_header("User-Agent", 'Opera/9.61 (Windows NT 5.1; U; en) Presto/2.1.1')
ResponseData = urllib.request.urlopen(file2).read().decode("utf8", 'ignore')
except urllib.error.URLError as e: print('http'); ResponseData = ''
except socket.error as e: ResponseData = ''
except socket.timeout as e: ResponseData = ''
except UnicodeEncodeError as e: ResponseData = ''
except http.client.BadStatusLine as e: ResponseData = ''
except http.client.IncompleteRead as e: ResponseData = ''
except urllib.error.HTTPError as e: ResponseData = ''
Hi, when I run the following code on a page containing errors such as 'Microsoft VBScript runtime error' ... the request fails and returns as urllib.error.URLError ... even though the page contains plenty of other code. How can I return ALL the html from the page and not just the exception error. I would like to keep my current code as much as possible (if that is possible). Thanks