数据库文章资源汇总
爬虫
import requests
from bs4 import BeautifulSoup
prefix = 'http://mysql.taobao.org'
# 获取文章名和url(文章名,url)
def query_name_url(url: str):
resp = requests.get(url)
soup = BeautifulSoup(resp.content.decode('utf-8'), "html.parser")
tags = soup.findAll('a', {'target': {'_top'}})
urls = [v for v in tags if v['href'].find('/monthly/') != -1]
return [(str(v.string).strip(), prefix + v['href']) for v in urls]
# 获取所有月报链接(月报名,url)
def query_monthly_url():
resp = requests.get('http://mysql.taobao.org/monthly/')
soup = BeautifulSoup(resp.content.decode('utf-8'), "html.parser")
tags = soup.findAll('a', {'class': {'main'}})
urls = [v for v in tags if v['href'].find('/monthly/') != -1]
return [(str(v.string).strip(), prefix + v['href']) for v in urls]
# 获取所有文章名、URL和对应的月报链接(文章类型,文章名,url,月报名,url)
def query_all_name_url():
result = []
monthly_urls = query_monthly_url()
for data1 in monthly_urls:
name_urls = query_name_url(data1[1])
for data2 in name_urls:
result.append((data2[0][0:data2[0].find('·')].strip(), data2[0], data2[1], data1[0], data1[1]))
return result
# 下载所有数据库月报,并按照类别进行分类,写入到mysql.md文件中
name = ''
data = []
result = query_all_name_url()
result.sort(key=lambda v: v[0])
for v in result:
if v[0] != name:
name = v[0]
data.append('## {}'.format(v[0]))
data.append('[{}]({}) [[{}]({})]\n'.format(v[1], v[2], v[3][v[3].find(' - ') + 3:], v[4]))
with open('mysql.md', 'w') as file:
for v in data:
print(v)
file.write(v + '\n')
Read other posts