練習
gethtml-.pyで保存
# gethtml- http://... => normal # gethtml- - => run with file (='list.txt', url list) # save.name = 'http_--sample.net-_200709120808.htm' etc. import sys import urllib2 import time from datetime import date import string def strr(d,le): return ('0'+str(d))[-le:] def timestr(ti): return strr(ti[0],4)+strr(ti[1],2)+strr(ti[2],2)+strr(ti[3],2)+strr(ti[4],2) # +strr(ti[5],2) def nowt(): return timestr(time.localtime()) def escape(f): return f.replace('/','-').replace(':','_').replace('?','+') def getd(url): sf=escape(url) tof=sf+'_'+nowt()+".htm" print tof o=open(tof,'w+') for line in urllib2.urlopen(url): #print line, o.write(line) o.close url=sys.argv[1] if url=='-' : inf=open('list.txt') li=[] for x in inf.readlines(): li.append(string.strip(x)) for url in li: getd(url) sys.exit() getd(url)