#! python3 ## lucky.py - Opens several Google search results. import requests, sys, webbrowser from bs4 import BeautifulSoup print('Googling...') ## display text while downloading the Google page res = requests.get('http://google.com/search?q=' + ' '.join(sys.argv[1:])) res.raise_for_status()
# Retrieve top search result links. soup = BeautifulSoup(res.text, 'lxml') # Open a browser tab for each result. linkElems = soup.select('div.rc>.r>a')
然后针对每个结果打开 Web 浏览器即可,
1 2 3 4
numOpen = min(5, len(linkElems)) ## 默认情况下,在新的选项卡中会打开前5个查询结果,但如果你搜的乱七八糟,结果可能会小于5个,所以我们取最小的。 for i in range(numOpen): webbrowser.open(linkElems[i].get('href'))
import requests, sys, webbrowser from bs4 import BeautifulSoup
keywords = ' '.join(sys.argv[1:]) # for better search results query = keywords.replace(' ', '+') # display text while downloading the Google page print('Googling...' + query)
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', }
res = requests.get('https://google.com/search?q=' + query, headers = headers) try: res.raise_for_status() ## 如果出错,将抛出异常 except Exception as exc: print('There was a problem: %s' % (exc)) # Retrieve top search result links. soup = BeautifulSoup(res.text,'lxml')
# Open a browser tab for each result. linkElems = soup.select('div.rc>.r>a') numOpen = min(10, len(linkElems)) for i in range(numOpen): link = linkElems[i].get('href') webbrowser.open(link)
接下去可能还需要selenium来爬取完整的搜索结果。
XKCD
xkcd是兰道尔·门罗(Randall Munroe)的网名,又是他所创作的漫画的名称。作者兰道尔·门罗(Randall Munroe)给作品的定义是一部“关于浪漫、讽刺、数学和语言的网络漫画”(A webcomic of romance,sarcasm, math, and language),被网友誉为深度宅向网络漫画。
for i in range(0, 1400, 100): ## 循环14次,创建14个线程 downloadThread = threading.Thread(target=downloadXkcd, args=(i, i + 99)) downloadThreads.append(downloadThread) downloadThread.start()
## 阻塞, 等待所有线程结束 for downloadThread in downloadThreads: downloadThread.join() print('Done.')