Here is true jedi approach :) uses only XMLHttpRequests
, there aren't IE disadvantages or dependencies from it. Output window created dynamically via mshta
without temp files. Processing speed can be improved by implementing async requests or multiprocess environment. The only way to stop the script at the moment unfortunately is wscript.exe
process termination.
Option Explicit
Dim oDisplay, sUrl, sRespHeaders, sRespText, arrSetHeaders, sEventTarget, arrFormData, lPage, lMember, i, arrFormStrings, sFormData, arrMembers, arrMemeber, sUrlEmail, sRespTextEmail, sEmail
Set oDisplay = New OutputWindow
sUrl = "https://netforum.avectra.com/eweb/DynamicPage.aspx?Site=NEFAR&WebCode=IndResult&FromSearchControl=Yes"
lPage = 0
lMember = 0
' Initial webpage request
oDisplay.Write("Connecting " & vbCrLf & sUrl)
XmlHttpRequest "GET", sUrl, Array(), "", sRespHeaders, sRespText
' Loop through all pages
Do
' Get cookies, form data, listctrl
oDisplay.Write("Processing page #" & (lPage + 1))
sEventTarget = ParseFragm("__doPostBack\('(ListControl_[\s\S]*?)',", sRespText)
ParseResponse "^Set-(Cookie): ([\S]*?=[\S]*?);[\s\S]*?$", sRespHeaders, arrSetHeaders
ParseResponse "<input type=""hidden"" name=""([\S]*?)""[\s\S]*?value=""([\s\S]*?)"" />", sRespText, arrFormData
' Update form params
For i = 0 To UBound(arrFormData)
Select Case arrFormData(i)(0)
Case "__POSTBACKCONTROL"
arrFormData(i)(1) = "JumpToPage"
Case "__EVENTTARGET"
arrFormData(i)(1) = sEventTarget
Case "__EVENTARGUMENT"
arrFormData(i)(1) = CStr(lPage)
End Select
Next
' Jump to page #lPage
arrFormStrings = Array()
ReDim arrFormStrings(UBound(arrFormData))
For i = 0 To UBound(arrFormData)
arrFormStrings(i) = EncodeUriComponent(arrFormData(i)(0)) & "=" & EncodeUriComponent(arrFormData(i)(1))
Next
sFormData = Join(arrFormStrings, "&")
PushItem arrSetHeaders, Array("Content-Type", "application/x-www-form-urlencoded")
PushItem arrSetHeaders, Array("Content-Length", CStr(Len(sFormData)))
' New page POST request
XmlHttpRequest "POST", sUrl, arrSetHeaders, sFormData, sRespHeaders, sRespText
' Parse members from new page
ParseMembers sRespText, arrMembers
' Parse members emails, and output
For Each arrMemeber in arrMembers
lMember = lMember + 1
sUrlEmail = "https://netforum.avectra.com/eweb/DynamicPage.aspx?Site=NEFAR&WebCode=PrimaryContactInfo&ind_cst_key=" & arrMemeber(0)
XmlHttpRequest "GET", sUrlEmail, Array(), "", "", sRespTextEmail
sEmail = ParseFragm("""mailto:([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6})""", sRespTextEmail)
oDisplay.WriteTable(Array(CStr(lMember), sEmail, arrMemeber(0), arrMemeber(1)))
Next
lPage = lPage + 1
Loop
Sub ParseResponse(sPattern, sResponse, arrData)
Dim oMatch
arrData = Array()
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
PushItem arrData, Array(oMatch.SubMatches(0), oMatch.SubMatches(1))
Next
End With
End Sub
Function ParseFragm(sPattern, sResponse)
Dim oMatches
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = sPattern
Set oMatches = .Execute(sResponse)
If oMatches.Count > 0 Then ParseFragm = oMatches(0).SubMatches(0)
End With
End Function
Sub ParseMembers(sRespText, arrMembers)
Dim oMatch
arrMembers = Array()
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = "<td class[\s\S]*?>([\s\S]*?<[\s\S]*?Key=([\s\S]*?)&[\s\S]*?)</td>"
For Each oMatch In .Execute(sRespText)
PushItem arrMembers, Array(oMatch.SubMatches(1), GetInnerText(oMatch.SubMatches(0)))
Next
End With
End Sub
Sub PushItem(arrList, varItem)
ReDim Preserve arrList(UBound(arrList) + 1)
arrList(UBound(arrList)) = varItem
End Sub
Function EncodeUriComponent(sText)
With CreateObject("htmlfile")
.Write ("<script language='JScript'></script>")
EncodeUriComponent = .DocumentElement.Document.Script.EncodeUriComponent(sText)
End With
End Function
Function GetInnerText(sText)
With CreateObject("htmlfile")
.Write ("<body>" & sText & "</body>")
GetInnerText = .DocumentElement.Document.GetElementsByTagName("body")(0).InnerText
End With
End Function
Sub XmlHttpRequest(sMethod, sUrl, arrSetHeaders, sFormData, sRespHeaders, sRespText)
Dim arrHeader
With CreateObject("Msxml2.ServerXMLHTTP.3.0")
.SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
.Open sMethod, sUrl, False
For Each arrHeader In arrSetHeaders
.SetRequestHeader arrHeader(0), arrHeader(1)
Next
.Send sFormData
sRespHeaders = .GetAllResponseHeaders
sRespText = .ResponseText
End With
End Sub
Class OutputWindow
Dim oWnd, oDoc, oOutDiv, oCursorDiv, oOutTBody, sSignature, lCols
Private Sub Class_Initialize()
sSignature = "OutputWindow"
ProvideWindow()
End Sub
Private Sub ProvideWindow()
' source http://forum.script-coding.com/viewtopic.php?pid=75356#p75356
Dim lWidth, lHeight
GetWindow()
If oWnd Is Nothing Then
CreateWindow()
With oWnd
With .Document
.GetElementsByTagName("head")(0).AppendChild .CreateElement("style")
.stylesheets(0).cssText = "body, td, #output {font-family: consolas, courier new; font-size: 9pt;} #cursor {margin: 3px;} body {background-color: buttonface;} #output {height: 100%; width: 100%; overflow: scroll; background: #FFF;} div.hline {height: 1px; width: 100%; background-color: #000; overflow: hidden;} table {width: 100%; TEXT-ALIGN: center; border-COLLAPSE: collapse; background: transparent; margin-top: 1px;} td {border: black 1px solid;}"
.Title = "Output Window"
.Body.InnerHtml = "<div id='output'><div id='cursor'><img src='data:image/gif;base64,R0lGODlhAwAJAPAAAAAAAAAAACH5BAkeAAEAIf8LTkVUU0NBUEUyLjADAf//ACwAAAAAAwAJAAACBwxieMnrGgoAIfkECR4AAAAsAAAAAAMACQAAAgSEj6laADs=' /></div></div>"
End With
lWidth = CInt(.Screen.AvailWidth * 0.75)
lHeight = CInt(.Screen.AvailHeight * 0.75)
.ResizeTo .Screen.AvailWidth, .Screen.AvailHeight
.ResizeTo lWidth + .Screen.AvailWidth - .Document.Body.OffsetWidth, lHeight + .Screen.AvailHeight - .Document.Body.OffsetHeight
.MoveTo CInt((.Screen.AvailWidth - lWidth) / 2), CInt((.Screen.AvailHeight - lHeight) / 2)
End With
End If
Set oDoc = oWnd.Document
Set oOutDiv = oWnd.output
Set oCursorDiv = oWnd.cursor
lCols = -1
End Sub
Private Sub GetWindow()
Dim oShellWnd
On Error Resume Next
For Each oShellWnd In CreateObject("Shell.Application").Windows
Set oWnd = oShellWnd.GetProperty(sSignature)
If Err.Number = 0 Then Exit Sub
Err.Clear
Next
Set oWnd = Nothing
End Sub
Private Sub CreateWindow()
Dim oProc
Do
Set oProc = CreateObject("WScript.Shell").exec("mshta ""about:<head><script>moveTo(-32000,-32000);window.document.title=' ';</script><hta:application id=app border=dialog minimizebutton=no maximizebutton=no scroll=no showintaskbar=yes contextmenu=yes selection=yes innerborder=no /><object id='shellwindow' classid='clsid:8856F961-340A-11D0-A96B-00C04FD705A2'><param name=RegisterAsBrowser value=1></object><script>shellwindow.putproperty('" & sSignature & "',window);</script></head>""")
Do
If oProc.Status > 0 Then Exit Do
GetWindow()
If Not (oWnd Is Nothing) Then Exit Sub
Loop
Loop
End Sub
Private Sub ChkDoc()
On Error Resume Next
If TypeName(oDoc) <> "HTMLDocument" Then ProvideWindow()
End Sub
Public Sub Write(sText)
Dim oDiv
ChkDoc()
On Error Resume Next
Set oDiv = oDoc.CreateElement("div")
oDiv.InnerHtml = EscapeHtml(sText) & "<div class='hline'></div>"
oOutDiv.AppendChild oDiv
oOutDiv.AppendChild oCursorDiv
oOutDiv.ScrollTop = oOutDiv.ScrollHeight
lCols = -1
End Sub
Public Sub WriteTable(arrCells)
Dim sInner, oTable, oRow, oTr, oCell, n
ChkDoc()
On Error Resume Next
If UBound(arrCells) <> lCols Then
Set oTable = oDoc.CreateElement("table")
oOutDiv.AppendChild oTable
Set oOutTBody = oDoc.CreateElement("tbody")
oTable.AppendChild oOutTBody
lCols = UBound(arrCells)
End If
Set oTr = oDoc.CreateElement("tr")
oOutTBody.AppendChild oTr
For n = 0 To lCols
Set oCell = oTr.InsertCell(n)
oCell.InnerHtml = EscapeHtml(arrCells(n))
Next
oOutDiv.AppendChild oCursorDiv
oOutDiv.ScrollTop = oOutDiv.ScrollHeight
End Sub
Public Sub BreakTable()
lCols = -1
End Sub
Private Function EscapeHtml(sCnt)
Dim n
sCnt = Replace(sCnt, "&", "&")
sCnt = Replace(sCnt, """", """)
sCnt = Replace(sCnt, "<", "<")
sCnt = Replace(sCnt, ">", ">")
sCnt = Replace(sCnt, "'", "'")
sCnt = Replace(sCnt, vbCrLf, "<br>")
sCnt = Replace(sCnt, Chr(9), " ")
sCnt = Replace(sCnt, " ", " ")
sCnt = Replace(sCnt, " ", " ")
For n = 0 To 31
sCnt = Replace(sCnt, Chr(n), "¶")
Next
EscapeHtml = sCnt
End Function
Private Sub Class_Terminate()
' oWnd.close
End Sub
End Class
InternetExplorer.Application
, or (if you don't want to use IE due to it's wellknown :) disadvantages) to make fewXMLHTTP
requests -GET
to receive cookies and other necessary form data, thenPOST
for each page, and thenGET
for each link to find out e-mail addresses. – Wolbrom