python扫目录程序
来源:岁月联盟
时间:2012-02-24
写了2个版本的,先贴单线程的,程序用HTTP头判断页面是否存在速度较快。使用时需要指定字典文件,要在程序里改改。(相信你懂的)
#usr/bin/python
#encoding=utf-8
import sys
import httplib
import re
import time
def Usage():
print 'Usage: python scan.py'
sys.exit()
if len(sys.argv)!=2:
Usage()
start = time.time()
target = sys.argv[1]
port = 80
dict_path = "/media/sf_TDDOWNLOAD/dict.txt"
f = file(dict_path)
while True:
line = f.readline()
line = re.split('//r',line,2)
path = line[0].decode("gbk").encode("utf-8")
#print line
conn = httplib.HTTPConnection(target,80)
#conn.set_debuglevel(2)
conn.request('GET',path,headers = {"Host": target,"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5","Accept": "text/plain"})
ret = conn.getresponse().status
if ret==200 or ret==500 or ret==403 or ret==301:
print target+path+' found! status:', ret
else:
print target+path+" not found!"
if len(line)==0:
print "done..."
break
f.close()
print "Elapsed Time: %s" % (time.time() - start)
==========================================
下面是www.2cto.com多线程版本,使用了一个消息队列来处理要扫描的路径。注意线程不要开的太多。不然会出莫名其妙的错误。
#!/usr/bin/env python
import Queue
import threading
import httplib
import time
import re
queue = Queue.Queue()
class ThreadUrl(threading.Thread):
"""Threaded Url Grab"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
#
path = self.queue.get()
target = "www.xjbaihe.com"
port = 80
conn = httplib.HTTPConnection(target,80)
conn.request('GET',path,headers = {"Host": target,"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5","Accept": "text/plain"})
ret = conn.getresponse().status
if ret==200 or ret==500 or ret==403 or ret==301:
print path+' found! status:', ret
else:
print path+" not found!"
#signals to queue job is done
self.queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
t = ThreadUrl(queue)
t.setDaemon(True)
t.start()
#populate queue with data
print "reading dict..."
dict_path = "/media/sf_TDDOWNLOAD/dictest.txt"
f = file(dict_path)
while True:
line = f.readline()
line = re.split('//r',line,2)
path = line[0].decode("gbk").encode("utf-8")
queue.put(path)
if len(path)==0:
print "done..."
break
f.close()
#wait on the queue until everything has been processed
queue.join()
if __name__ == '__main__':
main()
print "Elapsed Time: %s" % (time.time() - start)
=================
上一篇:python理解-对象类型