|
python寫入xlsx檔案有兩種方案
- 使用 pandas 庫產生 DataFrame 數據,然後呼叫 to_excel 方法寫入xlsx檔案
註:pandas本身並不能寫入xlsx,最後仍需藉助第三方庫 openpyxl 或 xlsxwriter 實現。 - 直接使用 openpyxl 庫寫入xlsx(當然還有其他庫,但我用得少不了解)。
如果只是單純的讀取和寫入,我習慣使用pandas,涉及到單元格格式修改,才使用openpyxl庫(openpyxl修改格式很強大)。
案例:爬取gitee專案日榜和周榜數據,並寫入到excel
請確保已安裝好依賴的庫
[Plain Text] 純文本查看 複制代碼 pip install pandas
pip install openpyxl
pip install beautifulsoup4
pip install requests
[Python] 純文本查看 複制代碼
from pandas import DataFrame, ExcelWriter
from bs4 import BeautifulSoup
import requests
def get_data_from_gitee_explore(tab) -> DataFrame:
"""爬蟲抓取gitee今日日榜和今日周榜數據
[url]https://gitee.com/explore[/url]
:param tab: daily-獲取日榜數據, weekly-獲取周榜數據
:return: DataFrame, 用於後續寫入到xlsx檔案
最終返回以下欄位的數據:
- title 專案名稱
- url 專案倉庫地址
- description 專案描述
- start_count 星星數量
"""
r = requests.get("https://gitee.com/explore")
soup = BeautifulSoup(r.text, 'html.parser')
trending = soup.find("div", attrs={"class": "tab", "data-tab": f"{tab}-trending"})
items = trending.find_all("div", attrs={"class": "explore-trending-projects__list-item"})
data = []
for item in items:
title = item.find("div", attrs={"class": "title"}).find("a").text
url = "https://gitee.com" + item.find("div", attrs={"class": "title"}).find("a").get("href")
start_count = item.find("div", attrs={"class": "stars-count"}).text
description = item.find("div", attrs={"class": "description"}).text
data.append({"TITLE": title,
"URL": url,
"DESCRIPTION": description,
"START_COUNT": start_count})
return pd.DataFrame(data)
if __name__ == '__main__':
daily_df = get_data_from_gitee_explore("daily")
print("日榜")
print(daily_df)
weekly_df = get_data_from_gitee_explore("weekly")
print("--------------------")
print("周榜")
print(weekly_df)
with ExcelWriter("gitee熱門專案.xlsx") as writer:
# 寫入到第一個sheet
daily_df.to_excel(writer,
sheet_name="今日熱門專案",
index=False,
engine="openpyxl"
)
# 寫入到第二個sheet
weekly_df.to_excel(writer,
sheet_name="本週熱門專案",
index=False,
engine="openpyxl"
)
執行效果:
openpyxl 快速上手
[Python] 純文本查看 複制代碼
import openpyxl
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font, Color, numbers
from openpyxl.utils import get_column_letter
import pandas as pd
# 讀取Excel檔案
wb = openpyxl.load_workbook('wbName.xlsx')
# 只讀模式
# wb = openpyxl.load_workbook(filename='large_file.xlsx', read_only=True)
# 只寫模式
# wb = openpyxl.load_workbook(filename='large_file.xlsx', write_only=True)
# 選擇工作表
ws = wb.active
# ws = wb['Sheet']
# 從表中選擇單元格
cell = ws.cell(row=1, column=2)
print(cell.value)
for i in range(1, 8, 2):
print(i, ws.cell(row=i, column=2).value)
# 獲取有效單元格的最大行和最大列
print(ws.max_row)
print(ws.max_column)
# 把數據追加到sheet(使用dataframe_to_rows)
data = {'one': [1., 2., 3., 4.],
'two': [4., 3., 2., 1.]}
df = pd.DataFrame(data)
for r in dataframe_to_rows(df, index=False, header=False):
ws.append(r)
data = [(1, 2, 3, 4, 5, 6), (7, 8, 9, 10, 11, 12)]
for row in data:
ws.append(row)
# 修改數據格式
font = Font(name='Microsoft Sans Serif', size=10, family=2, b=False, i=False, color=Color(theme=1))
alignment = Alignment(horizontal='center', vertical='center')
# 填充格式
fill = PatternFill(fill_type='solid', fgColor='8EA9DB')
border = Border(left=Side(border_style='thin', color='FF000000'),
right=Side(border_style='thin', color='FF000000'),
top=Side(border_style='thin', color='FF000000'),
bottom=Side(border_style='thin', color='FF000000')
)
for cols in ws.iter_cols(min_row=2, max_row=ws.max_row, min_col=20, max_col=ws.max_column):
for cell in cols:
cell.number_format = '0.00000000'
# 填充顏色
cell.font = font
cell.alignment = alignment
cell.fill = fill
cell.border = border
# 內建數字格式
print(dir(numbers))
# 儲存工作簿
wb.save('workbookName.xlsx')
|
|