获取广东电视台节目表python版

获取广东电视台节目表 环境依赖Python3 + Django4 + Nginx + uWSGI,用于部署WEB项目站点展示节目列表、节目列表下载预览或提供节目列表API接口使用,过于繁杂,以下代码基于Python3抓取官方节目表后直接生成需要的epg.xml,方便部署在OpenWRT路由器上使用。

获取代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import datetime
import requests
import warnings
from bs4 import BeautifulSoup as bs
from xml.etree.ElementTree import Element, SubElement, ElementTree

# 定义请求头,模拟浏览器访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
}

# 忽略 XMLParsedAsHTMLWarning
warnings.filterwarnings('ignore')

# 获取广东电视台频道列表
def get_channels_gdtv():
url = 'http://epg.gdtv.cn/f/1.xml' # 获取广东电视台频道列表的XML文件
res = requests.get(url, headers=headers)
res.encoding = 'utf-8'
soup = bs(res.text, 'html.parser')
contents = soup.select('channel')
channels = []

for content in contents:
channel_id = content.attrs['id']
name = content.ctitle.text.strip() # 使用 strip() 清理可能的空格和换行
channels.append({'id': channel_id, 'name': name})
return channels

# 处理节目标题中的 HTML 实体和标签
def clean_html(text):
# 将 HTML 实体转换为普通字符(例如 & -> &)
text = bs(text, "html.parser").get_text()
return text.replace('<br />', '\n').replace('<br>', '\n').strip()

# 获取指定日期范围内的节目表
def get_epgs_gdtv(channel_id, start_date, end_date):
epgs = []
current_date = start_date
while current_date <= end_date:
url = f'http://epg.gdtv.cn/f/{channel_id}/{current_date.strftime("%Y-%m-%d")}.xml'
res = requests.get(url, headers=headers, timeout=8)
res.encoding = 'utf-8'
soup = bs(res.text, 'html.parser')
epgs_contents = soup.select('content')

for epga in epgs_contents:
starttime = datetime.datetime.fromtimestamp(int(epga.attrs['time1']))
endtime = datetime.datetime.fromtimestamp(int(epga.attrs['time2']))
title = clean_html(epga.get_text())
epgs.append({
'title': title,
'start': starttime,
'end': endtime
})
current_date += datetime.timedelta(days=1)
return epgs

# 创建 XMLTV 文件
def create_epg_xml(channels, epg_data, output_file):
tv = Element('tv')

# 添加频道信息
for channel in channels:
channel_element = SubElement(tv, 'channel', id=channel['id'])
display_name = SubElement(channel_element, 'display-name')
display_name.text = channel['name']

# 添加节目表
for channel_id, programs in epg_data.items():
for program in programs:
programme = SubElement(tv, 'programme', start=program['start'].strftime('%Y%m%d%H%M%S +0800'),
stop=program['end'].strftime('%Y%m%d%H%M%S +0800'),
channel=channel_id)
title = SubElement(programme, 'title')
title.text = program['title']

# 保存为文件
tree = ElementTree(tv)
tree.write(output_file, encoding='utf-8', xml_declaration=True)

# 主函数
if __name__ == "__main__":
today = datetime.datetime.now().date()
start_date = today - datetime.timedelta(days=6) # 六天前
end_date = today + datetime.timedelta(days=2) # 两天后

print("获取频道列表...")
channels = get_channels_gdtv()

epg_data = {}
print("抓取节目表...")
for channel in channels:
channel_id = channel['id']
epg_data[channel_id] = get_epgs_gdtv(channel_id, start_date, end_date)

print("生成 XMLTV 文件...")
output_file = "epg.xml"
create_epg_xml(channels, epg_data, output_file)
print(f"完成!EPG 文件已保存为 {output_file}")

执行代码

1
2
3
4
5
[root@localhost IPTV]# python gdtv_to_epg.py
获取频道列表...
抓取节目表...
生成 XMLTV 文件...
完成!EPG 文件已保存为 epg.xml