Displaying Source Code(s)
|
|
Spider Object
--------------------------------------------------------------------------------
Description : The Spider Object returns information about a web
page from an outside server. The Spider object is used to parse
the content returned by a web page after it is retrieved. By
calling the various methods of the Spider object, you can
effectively read and save parts of a web page for further
reference. The spider object requires you to install a free java
class file (IOSockets.class) onto your server. The installation
instructions and java files can be found here: http://www.aspemporium.com/aspEmporium/downloads/java/IOSockets.asp
Properties and methods for spider can be found in the source
code.
<%
Class spider
'###################################################################
' The ASP Emporium
' Spider Object v2.0
' Last Modified: 2/6/2001
'
'
' Purpose
' The spider object allows a VBScript programmer to use sockets
' to access the html and headers returned by the target server
' for a given URL.
'
'
' Properties
'+---------------+------------------+------------------------------+
'| property name | data type | notes |
'+---------------+------------------+------------------------------+
' URL variant (string) sets/returns the URL to
' retrieve
'+---------------+------------------+------------------------------+
' ErrorMsg variant (string) read only. returns the error
' message of the java com object.
' Only contains data after the
' fetch method is called. Will
' contain empty string "" if there
' is no error.
'+---------------+------------------+------------------------------+
' RawHTMLStream variant (string) read only. returns the html
' and headers returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' HTML variant (string) read only. returns the html
' returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' AllHeaders variant (string) read only. returns all
' headers returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' HeaderNames variant (array) read only. returns array of
' header names returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' HeaderValues variant (array) read only. returns array of
' header values returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' HeaderCount variant (long) read only. returns count of all
' header/value pairs returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' AllMetaTags variant (string) read only. returns all
' meta tags returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' MetaTagNames variant (array) read only. returns array of
' meta tag names returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' MetaTagValues variant (array) read only. returns array of
' meta tag values returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
' MetaTagCount variant (long) read only. returns count of all
' meta tag name/value pairs
' returned by URL.
' Only contains data after the
' fetch method is called.
'+---------------+------------------+------------------------------+
'
'
' Methods
'+---------------------------+------------------+------------------------------+
'| method name | data type | notes |
'+---------------------------+------------------+------------------------------+
' Fetch void visits the URL specified in
' the URL property and fills
' all read only properties
' with information about the
' URL's contents.
'+---------------------------+------------------+------------------------------+
' GetHeaderValueByName(name) variant (string) returns the header
value of
' the header name specified in
' the name argument. If name is
' not found empty string "" is
' returned. Only contains data
' after the fetch method is
' called.
'+---------------------------+------------------+------------------------------+
' GetMetaTagValueByName(name) variant (string) returns the meta
tag value of
' the meta tag name specified in
' the name argument. If name is
' not found empty string "" is
' returned. Only contains data
' after the fetch method is
' called.
'+---------------------------+------------------+------------------------------+
'
'
' Requirements
' requires: VBScript 5.5
' SCRRUN.DLL - Scripting Run Time Library
' IOSockets.class java com object (provided with this class)
'
'###################################################################
'---------------------------
'private class scope variables
'---------------------------
Private sRawHTMLStream 'holds the html and headers returned from
a given url
Private sErrorMsg 'holds the error message returned by the java
object
Private d, d2 'holds class scope dictionary object reference
Private sRawMetaTags 'holds raw meta tags
'---------------------------
'public read/write properties
'---------------------------
'url to retrieve
Public url
'---------------------------
'public methods
'---------------------------
Public Sub fetch()
'exposes the private GetWebPage function
If url <> "" Then getwebpage
End Sub
Public Function getheadervaluebyname(ByVal sheadername)
'returns variant (string) containing the header value
'of the corresponding header name (if any)
If d.exists(LCase(sheadername)) Then
getheadervaluebyname = d.item(LCase(sheadername))
End If
End Function
Public Function getmetatagvaluebyname(ByVal stagname)
'returns variant (string) containing the metatag value
'of the corresponding tag name (if any)
If d2.exists(LCase(stagname)) Then
getmetatagvaluebyname = d2.item(LCase(stagname))
End If
End Function
'---------------------------
'public read only properties
'---------------------------
Public Property Get rawhtmlstream
'returns variant (string) containing html and
'header stream returned from a given URL
rawhtmlstream = srawhtmlstream
End Property
Public Property Get html
'returns variant (string) containing html
'stream returned from a given URL
Dim max
max = InStr(1, srawhtmlstream, Chr(10) & Chr(10), 1)
html = Mid(srawhtmlstream, max + 1)
End Property
Public Property Get errormsg
'returns variant (string) containing error
'message returned from java object (if any)
errormsg = serrormsg
End Property
Public Property Get headernames
'returns variant (array) containing all header names
'returned from a given URL
headernames = d.keys
End Property
Public Property Get headervalues
'returns variant (array) containing all header values
'returned from a given URL
headervalues = d.items
End Property
Public Property Get headercount
'returns variant (long) containing the count
'of all header name/value pairs returned from
'a given URL
headercount = d.count
End Property
Public Property Get allheaders
'returns variant (string) containing headers
'returned from a given URL
Dim max
max = InStr(1, srawhtmlstream, Chr(10) & Chr(10), 1)
If max <> 0 Then
allheaders = Left(srawhtmlstream, max - 1)
Else
allheaders = ""
End If
End Property
Public Property Get allmetatags
'returns a string of meta tags
allmetatags = srawmetatags
End Property
Public Property Get metatagnames
'returns variant (array) containing all meta tag names
'returned from a given URL
metatagnames = d2.keys
End Property
Public Property Get metatagvalues
'returns variant (array) containing all meta tag values
'returned from a given URL
metatagvalues = d2.items
End Property
Public Property Get metatagcount
'returns variant (long) containing the count
'of all meta tag name/value pairs returned from
'a given URL
metatagcount = d2.count
End Property
'---------------------------
'class events
'---------------------------
Private Sub class_initialize()
'make the dictionary available to all routines
'within the class scope
Set d = CreateObject("scripting.dictionary")
Set d2 = CreateObject("scripting.dictionary")
d.removeall
d2.removeall
End Sub
Private Sub class_terminate()
'clean up the dictionary
Set d = Nothing
Set d2 = Nothing
End Sub
'---------------------------
'private class routines
'---------------------------
Private Sub filldictionary_headers
'fills dictionary object with header name/value pairs
Dim sheaders, vtmp, header
Dim s, i, j, a, b, bnoadd
bnoadd = False
sheaders = allheaders
vtmp = Split(sheaders, Chr(10))
j = 0
On Error Resume Next
For Each header In vtmp
a = ""
b = ""
If j = 0 Then
a = "status"
b = Trim(header)
Else
s = header
i = InStr(1, s, ":", 1)
If i <> 0 Then
a = LCase(Left(s, i - 1))
b = Trim(Mid(s, i + 1))
Else
bnoadd = True
End If
End If
If Not bnoadd Then d.add a, b
j = j + 1
Next
End Sub
Private Sub filldictionary_meta(ByVal key, ByVal item)
'fills dictionary object with meta tag name/value pairs
On Error Resume Next
d2.add LCase(key), item
End Sub
Private Sub resolvemetatags
'resolve meta tags from html stream
Dim re, omatches, metatag
Set re = New regexp
re.pattern = "<meta (http-equiv|name)=""?([a-za-z0-9_-]+)""? " &
_<BR>content=""?([ws.'-[]]+)""? ?/?>"
re.ignorecase = True
re.global = True
Set omatches = re.execute(srawhtmlstream)
For Each metatag In omatches
'response.write "<PRE>" & server.htmlencode(MetaTag) & "</PRE>"
srawmetatags = srawmetatags & metatag & vbCrLf
'response.write "meta type -> " & MetaTag.SubMatches(0) & "<BR>"
'response.write "meta name -> " & MetaTag.SubMatches(1) & "<BR>"
'response.write "meta content -> " & MetaTag.SubMatches(2) &
"<BR>"
filldictionary_meta metatag.submatches(1), metatag.submatches(2)
Next
Set omatches = Nothing
Set re = Nothing
End Sub
Private Sub getwebpage()
'calls the java com object and
'gathers the http stream returned
'for a given url and sets the read
'only properties exposed by the class
Dim a, surl
If url <> "" Then
surl = url
On Error Resume Next
Set a = GetObject("java:iosockets")
If err Then
serrormsg = "iosockets.class is not in the " & _
"<small><i>java rustlib</i></small> " & _
"library on the server."
Exit Sub
End If
On Error goto 0
With a
.url = surl
.fetchurl()
If .errormsg <> "" Then
serrormsg = .errormsg
Else
srawhtmlstream = .rawhtmlreturned
filldictionary_headers
resolvemetatags
End If
End With
Set a = Nothing
End If
End Sub
End Class
%> |
|
|