| Displaying  Source Code(s)  
 
 
          
            |  |  |  
            | 
              
                | Spider Object 
 --------------------------------------------------------------------------------
 
 Description : The Spider Object returns information about a web 
                page from an outside server. The Spider object is used to parse 
                the content returned by a web page after it is retrieved. By 
                calling the various methods of the Spider object, you can 
                effectively read and save parts of a web page for further 
                reference. The spider object requires you to install a free java 
                class file (IOSockets.class) onto your server. The installation 
                instructions and java files can be found here: http://www.aspemporium.com/aspEmporium/downloads/java/IOSockets.asp 
                Properties and methods for spider can be found in the source 
                code.
 
 <%
 Class spider
 '###################################################################
 ' The ASP Emporium
 ' Spider Object v2.0
 ' Last Modified: 2/6/2001
 '
 '
 ' Purpose
 ' The spider object allows a VBScript programmer to use sockets
 ' to access the html and headers returned by the target server
 ' for a given URL.
 '
 '
 ' Properties
 '+---------------+------------------+------------------------------+
 '| property name | data type | notes |
 '+---------------+------------------+------------------------------+
 ' URL variant (string) sets/returns the URL to
 ' retrieve
 '+---------------+------------------+------------------------------+
 ' ErrorMsg variant (string) read only. returns the error
 ' message of the java com object.
 ' Only contains data after the
 ' fetch method is called. Will
 ' contain empty string "" if there
 ' is no error.
 '+---------------+------------------+------------------------------+
 ' RawHTMLStream variant (string) read only. returns the html
 ' and headers returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' HTML variant (string) read only. returns the html
 ' returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' AllHeaders variant (string) read only. returns all
 ' headers returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' HeaderNames variant (array) read only. returns array of
 ' header names returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' HeaderValues variant (array) read only. returns array of
 ' header values returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' HeaderCount variant (long) read only. returns count of all
 ' header/value pairs returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' AllMetaTags variant (string) read only. returns all
 ' meta tags returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' MetaTagNames variant (array) read only. returns array of
 ' meta tag names returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' MetaTagValues variant (array) read only. returns array of
 ' meta tag values returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 ' MetaTagCount variant (long) read only. returns count of all
 ' meta tag name/value pairs
 ' returned by URL.
 ' Only contains data after the
 ' fetch method is called.
 '+---------------+------------------+------------------------------+
 '
 '
 ' Methods
 '+---------------------------+------------------+------------------------------+
 '| method name | data type | notes |
 '+---------------------------+------------------+------------------------------+
 ' Fetch void visits the URL specified in
 ' the URL property and fills
 ' all read only properties
 ' with information about the
 ' URL's contents.
 '+---------------------------+------------------+------------------------------+
 ' GetHeaderValueByName(name) variant (string) returns the header 
                value of
 ' the header name specified in
 ' the name argument. If name is
 ' not found empty string "" is
 ' returned. Only contains data
 ' after the fetch method is
 ' called.
 '+---------------------------+------------------+------------------------------+
 ' GetMetaTagValueByName(name) variant (string) returns the meta 
                tag value of
 ' the meta tag name specified in
 ' the name argument. If name is
 ' not found empty string "" is
 ' returned. Only contains data
 ' after the fetch method is
 ' called.
 '+---------------------------+------------------+------------------------------+
 '
 '
 ' Requirements
 ' requires: VBScript 5.5
 ' SCRRUN.DLL - Scripting Run Time Library
 ' IOSockets.class java com object (provided with this class)
 '
 '###################################################################
 
 
 
 '---------------------------
 'private class scope variables
 '---------------------------
 Private sRawHTMLStream 'holds the html and headers returned from 
                a given url
 Private sErrorMsg 'holds the error message returned by the java 
                object
 Private d, d2 'holds class scope dictionary object reference
 Private sRawMetaTags 'holds raw meta tags
 
 
 '---------------------------
 'public read/write properties
 '---------------------------
 'url to retrieve
 Public url
 
 
 
 '---------------------------
 'public methods
 '---------------------------
 Public Sub fetch()
 'exposes the private GetWebPage function
 If url <> "" Then getwebpage
 End Sub
 
 Public Function getheadervaluebyname(ByVal sheadername)
 'returns variant (string) containing the header value
 'of the corresponding header name (if any)
 If d.exists(LCase(sheadername)) Then
 getheadervaluebyname = d.item(LCase(sheadername))
 End If
 End Function
 
 Public Function getmetatagvaluebyname(ByVal stagname)
 'returns variant (string) containing the metatag value
 'of the corresponding tag name (if any)
 If d2.exists(LCase(stagname)) Then
 getmetatagvaluebyname = d2.item(LCase(stagname))
 End If
 End Function
 
 
 
 '---------------------------
 'public read only properties
 '---------------------------
 Public Property Get rawhtmlstream
 'returns variant (string) containing html and
 'header stream returned from a given URL
 rawhtmlstream = srawhtmlstream
 End Property
 
 Public Property Get html
 'returns variant (string) containing html
 'stream returned from a given URL
 Dim max
 
 max = InStr(1, srawhtmlstream, Chr(10) & Chr(10), 1)
 html = Mid(srawhtmlstream, max + 1)
 End Property
 
 Public Property Get errormsg
 'returns variant (string) containing error
 'message returned from java object (if any)
 errormsg = serrormsg
 End Property
 
 Public Property Get headernames
 'returns variant (array) containing all header names
 'returned from a given URL
 headernames = d.keys
 End Property
 
 Public Property Get headervalues
 'returns variant (array) containing all header values
 'returned from a given URL
 headervalues = d.items
 End Property
 
 Public Property Get headercount
 'returns variant (long) containing the count
 'of all header name/value pairs returned from
 'a given URL
 headercount = d.count
 End Property
 
 Public Property Get allheaders
 'returns variant (string) containing headers
 'returned from a given URL
 Dim max
 
 max = InStr(1, srawhtmlstream, Chr(10) & Chr(10), 1)
 If max <> 0 Then
 allheaders = Left(srawhtmlstream, max - 1)
 Else
 allheaders = ""
 End If
 End Property
 
 Public Property Get allmetatags
 'returns a string of meta tags
 allmetatags = srawmetatags
 End Property
 
 Public Property Get metatagnames
 'returns variant (array) containing all meta tag names
 'returned from a given URL
 metatagnames = d2.keys
 End Property
 
 Public Property Get metatagvalues
 'returns variant (array) containing all meta tag values
 'returned from a given URL
 metatagvalues = d2.items
 End Property
 
 Public Property Get metatagcount
 'returns variant (long) containing the count
 'of all meta tag name/value pairs returned from
 'a given URL
 metatagcount = d2.count
 End Property
 
 
 
 '---------------------------
 'class events
 '---------------------------
 Private Sub class_initialize()
 'make the dictionary available to all routines
 'within the class scope
 Set d = CreateObject("scripting.dictionary")
 Set d2 = CreateObject("scripting.dictionary")
 d.removeall
 d2.removeall
 End Sub
 
 Private Sub class_terminate()
 'clean up the dictionary
 Set d = Nothing
 Set d2 = Nothing
 End Sub
 
 
 
 '---------------------------
 'private class routines
 '---------------------------
 Private Sub filldictionary_headers
 'fills dictionary object with header name/value pairs
 Dim sheaders, vtmp, header
 Dim s, i, j, a, b, bnoadd
 
 bnoadd = False
 sheaders = allheaders
 vtmp = Split(sheaders, Chr(10))
 j = 0
 On Error Resume Next
 For Each header In vtmp
 a = ""
 b = ""
 If j = 0 Then
 a = "status"
 b = Trim(header)
 Else
 s = header
 i = InStr(1, s, ":", 1)
 If i <> 0 Then
 a = LCase(Left(s, i - 1))
 b = Trim(Mid(s, i + 1))
 Else
 bnoadd = True
 End If
 End If
 
 If Not bnoadd Then d.add a, b
 j = j + 1
 Next
 End Sub
 
 Private Sub filldictionary_meta(ByVal key, ByVal item)
 'fills dictionary object with meta tag name/value pairs
 On Error Resume Next
 d2.add LCase(key), item
 End Sub
 
 Private Sub resolvemetatags
 'resolve meta tags from html stream
 Dim re, omatches, metatag
 
 Set re = New regexp
 re.pattern = "<meta (http-equiv|name)=""?([a-za-z0-9_-]+)""? " & 
                _<BR>content=""?([ws.'-[]]+)""? ?/?>"
 re.ignorecase = True
 re.global = True
 Set omatches = re.execute(srawhtmlstream)
 For Each metatag In omatches
 'response.write "<PRE>" & server.htmlencode(MetaTag) & "</PRE>"
 srawmetatags = srawmetatags & metatag & vbCrLf
 
 'response.write "meta type -> " & MetaTag.SubMatches(0) & "<BR>"
 'response.write "meta name -> " & MetaTag.SubMatches(1) & "<BR>"
 'response.write "meta content -> " & MetaTag.SubMatches(2) & 
                "<BR>"
 
 filldictionary_meta metatag.submatches(1), metatag.submatches(2)
 
 Next
 Set omatches = Nothing
 Set re = Nothing
 End Sub
 
 Private Sub getwebpage()
 'calls the java com object and
 'gathers the http stream returned
 'for a given url and sets the read
 'only properties exposed by the class
 Dim a, surl
 
 If url <> "" Then
 surl = url
 
 On Error Resume Next
 Set a = GetObject("java:iosockets")
 If err Then
 serrormsg = "iosockets.class is not in the " & _
 "<small><i>java rustlib</i></small> " & _
 "library on the server."
 Exit Sub
 End If
 On Error goto 0
 With a
 .url = surl
 .fetchurl()
 If .errormsg <> "" Then
 serrormsg = .errormsg
 Else
 srawhtmlstream = .rawhtmlreturned
 filldictionary_headers
 resolvemetatags
 End If
 End With
 Set a = Nothing
 End If
 End Sub
 End Class
 %>
 |  |    |