1.获取网页的长度
HOST = 'www.sina.com'
#!/usr/bin/env python
import socket
HOST = 'www.baidu.com'
PORT = 80
BUFSIZE = 1024
ADDR = (socket.gethostbyname(HOST),PORT)
count = 0
header = "GET /" + " HTTP/1.1\r\n" + "Accept-Language: zh-cn\r\n" + "Host: " + HOST + "\r\n" +
"Connection: Keep-Alive\r\n\r\n"
def getLength():
#First :
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
#Second:
sock.connect(ADDR)
#Third :
sock.send('%s\r\n' % header)
#Fourth:
while(sock.recv(BUFSIZE) > 0):
count = count + sock.recv(BUFSIZE)
#Fifth:
sock.close()
print count
2、获取网页的内容
使用httplib访问某个url然后获取返回的内容:
import httplib
conn=httplib.HTTPConnection("www.baidu.com")
conn.request("GET", "/")
r=conn.getresponse()
print r.read() #获取所有内容
|
|