文章目录
显示
注意content的字符集:
最终的插入数据库的所有代码:
from selenium.webdriver import Chrome,ChromeOptions
import time
import traceback #异常处理
import pymysql
def get_bili_hot():
option = ChromeOptions()
option.add_argument("--headless") #隐藏浏览器
option.add_argument("--no-sandbox") #Linux去除沙盒
browser = Chrome(options=option)
url = "https://space.bilibili.com/473837611"
browser.get(url)
time.sleep(2)
#print(browser.page_source)
c = browser.find_elements_by_xpath('//*[@id="page-index"]/div[1]/div[2]/div/div/a[2]')
context = [i.text for i in c]
browser.close()
print(context)
return context
def get_conn():
#创建连接
conn = pymysql.connect(host="192.168.31.203",
user='root',
password='123456',
db='cov')
#创建游标
cursor = conn.cursor()
return conn, cursor
def close_conn(conn, cursor):
if cursor:
cursor.close()
if conn:
conn.close()
def update_hotsearch():
"""
将b站数据插入到数据库中
"""
cursor = None
conn = None
try:
context = get_bili_hot()
print(f"{time.asctime()}开始更新b站的数据")
conn, cursor = get_conn()
sql = "insert into hotsearch(dt, content) values(%s,%s)"
ts = time.strftime("%Y-%m-%d %X")
for i in context:
print(i)
print(ts)
cursor.execute(sql, (ts, i))
conn.commit()
print(f"{time.asctime()}数据更新完毕哦")
except:
traceback.print_exc()
finally:
close_conn(conn, cursor)
update_hotsearch()