# --coding:utf-8--
# author = 'AlenWesker'
import os
import os.path
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.expected_conditions import visibility_of_element_located
from selenium.webdriver.support.ui import WebDriverWait
import pyautogui
TIME_OUT = 60
MAX_LENGTH_GUI = 1024
def split_string(input_string, max_length):
result = []
for i in range(0, len(input_string), max_length):
result.append(input_string[i:i + max_length])
return result
def download_as_mhtml(url, path):
# https://mcmap.net/q/1279871/-how-to-download-webpage-as-mhtml
# New to download Chrome driver: http://chromedriver.storage.googleapis.com/index.html
# Or https://googlechromelabs.github.io/chrome-for-testing/
# Make sure you download the correct version
# chromedriver.exe Place to chrome's exe folder, add folder to your system path. Make sure you can run chromedriver directly in cmd.exe
URL = url
FILE_NAME = path
# open page with selenium
# (first need to download Chrome webdriver, or a firefox webdriver, etc)
options = webdriver.ChromeOptions()
options.add_argument('--save-page-as-mhtml')
driver = webdriver.Chrome(options=options)
# driver = webdriver.Chrome()
driver.get(URL)
# wait until body is loaded
WebDriverWait(driver, TIME_OUT).until(visibility_of_element_located((By.TAG_NAME, 'body')))
time.sleep(1)
# open 'Save as...' to save html and assets
pyautogui.hotkey('ctrl', 's')
time.sleep(1)
if FILE_NAME != '':
for s in split_string(FILE_NAME, MAX_LENGTH_GUI):
pyautogui.typewrite(s)
time.sleep(1)
pyautogui.hotkey('enter')
pyautogui.hotkey('alt', 's') # You need to trigger it
# time.sleep(20)
driver.implicitly_wait(10) # You need to wait for some time
if __name__ == '__main__':
# dir = os.path.join(os.path.dirname(__file__), "../temp");
dir = "d:\\temp" # Don't make it too long, I have not figured why pyautogui can't input long string
# dir = ""
p = os.path.normpath(os.path.join(dir, "poet.mhtml")) # .replace('\\', '/')
download_as_mhtml("https://poet.so", p) # Make sure your default chrome can access it
My version is guaranteed to run in Chrome 120. And I have listed all the steps within. Based on all the above, thank you guys.
wget
? – Suborder.mhtml
. I don't think there's a way to do that withwget
but it should not be hard to do with Python once you understand the format. Basically, create anemail.message.EmailMessage
andattach
each downloaded page element. – Thundersquallemail.message.EmailMessage
option you mentioned work in my case? Thanks. – Sethsethi