Teadmised note
Allikas: Lambda
import urllib import urllib.request import json
def m(url):
#print ("reading html")
try:
u=urllib.request.urlopen(url)
html = u.read().decode("utf-8")
except:
print("failed to read url")
return
#print (str(html))
r=html.replace("."," . ").replace(","," ").replace("\""," ")
r=r.replace("<"," ").replace(">", " ")
s=r.split()
d={}
for w in s:
if w in d:
d[w]=d[w]+1
else:
d[w]=1
l=[]
for k in d:
l.append((k,d[k]))
t=sorted(l, key=lambda e: e[1])
print (str(t))
def g():
print ("googling")
start=0
while start<100:
print("start "+str(start))
url="https://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=8"
url+="&start="+str(start)
url+="&q="
url+="Andrus+Ansip"
try:
u=urllib.request.urlopen(url)
html = u.read().decode("iso-8859-1")
j=json.loads(html)
if not j or not "responseData" in j:
print ("failed to read/parse")
return
p=j["responseData"]
if not p or not "results" in p:
print("no more results")
return
p=p["results"]
for r in p:
url=r["url"]
print ("******* url *****")
print (str(url))
m(url)
except:
print("failed to read google search url")
start+=8
g()