实例讲解ASP小偷是抓取网上房产信息的
2018-09-06 12:17
网上的小偷程序越来越多,有没有兴趣研究一下抓取原理呢,有兴趣的话,跟我来话,COME BABY,我们实例分析:
  <%@LANGUAGE=VBSCRIPT CODEPAGE=936%>
<!-- #include file=conn.asp -->
  <!-- #include file=inc/function.asp -->
<!DOCTYPE HTML PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN 
<html>
<head>
<title>Untitled Document</title>
<meta http-equiv=Content-Type content=text/html; charset=gb2312>
<meta http-equiv=refresh content=300;URL=steal_house.asp>
</head>
  <body>
<%
on error resume next
Server.ScriptTimeout = 999999
========================================================
字符编码函数
====================================================
Function BytesToBstr(body,code) 
 dim objstream 
 set objstream = Server.CreateObject(adodb.stream) 
 objstream.Type = 1 
 objstream.Mode =3 
 objstream.Open 
 objstream.Write body 
 objstream.Position = 0 
 objstream.Type = 2 
 objstream.Charset =code
 BytesToBstr = objstream.ReadText 
 objstream.Close 
 set objstream = nothing 
End Function 
  取行字符串在另一字符串中的出现位置
Function Newstring(wstr,strng) 
 Newstring=Instr(lcase(wstr),lcase(strng)) 
 if Newstring<=0 then Newstring=Len(wstr) 
End Function 
替换字符串函数
function ReplaceStr(ori,str1,str2)
ReplaceStr=replace(ori,str1,str2)
end function
====================================================
function Readxml(标准化越来越近了)(url,code,start,ends)
set oSend=createobject(Microsoft.xml(标准化越来越近了)HTTP)
SourceCode = oSend.open (GET,url,false) 
oSend.send()
Readxml(标准化越来越近了)=BytesToBstr(oSend.responseBody,code )
start=Instr(Readxml(标准化越来越近了),start)
Readxml(标准化越来越近了)=mid(Readxml(标准化越来越近了),start)
ends=Instr(Readxml(标准化越来越近了),ends)
Readxml(标准化越来越近了)=left(Readxml(标准化越来越近了),ends-1)
end function
  function SubStr(body,start,ends)
start=Instr(body,start)
SubStr=mid(body,start+len(start)+1)
ends=Instr(SubStr,ends)
SubStr=left(SubStr,ends-1)
end function
  dim getcont,NewsContent
dim url,title
getcont=Readxml(标准化越来越近了)(url,gb2312,<table class=k2 border=0,</table>)
getcont=RegexHtml(getcont)
dim KeyId,NewsClass,City,Position,HouseType,Level,Area,Price,Demostra
  dim ContactMan,Contact
for i=2 to ubound(getcont)
response.Write(getcont(i)__<br>)
tempLink=mid(getcont(i),instr(getcont(i),href=)+6,instr(getcont(i), onClick)-10)
tempLink=replace(tempLink,../,)
response.Write(i:tempLink<br>)
NewsContent=Readxml(标准化越来越近了)(tempLink,gb2312,<td valign=bottom width=400>,<hr width=760 noshade size=1 color=#808080> )