-
Notifications
You must be signed in to change notification settings - Fork 3
/
scrape.py
executable file
·77 lines (61 loc) · 2.07 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python3
import json
import xml.sax.saxutils as saxutils
import podcastindex
def get_podcasts_index():
try:
# Read the contents of secrets.txt into a string
with open("secrets.txt", "r") as secrets_file:
secrets_json = secrets_file.read()
# Parse the JSON string into a dictionary
podcastindex_config = json.loads(secrets_json)
return podcastindex.init(podcastindex_config)
except FileNotFoundError as e:
print("secrets.txt with the PodcastIndex API key is missing.")
exit(1)
except json.JSONDecodeError as e:
print("secrets.txt parsing error.")
exit(1)
def podcasts_index_get_rss_feed(podcasts):
rss_feeds = {}
index = get_podcasts_index()
for name, url in podcasts.items():
print(name)
try:
result = index.search(name)
rss_url = result["feeds"][0]['url']
rss_feeds[name] = rss_url
except Exception as e:
print(f"Error while searching for {name}: {e}")
return rss_feeds
def create_opml_file(data, filename):
opml = """\
<?xml version="1.0" encoding="UTF-8"?>
<opml version="2.0">
<head>
<title>Podcasts</title>
</head>
<body>
<outline text="Podcasts">
"""
for title, url in data.items():
escaped_title = saxutils.escape(title, entities={ "'" : "'" })
opml += f" <outline type='rss' text='{escaped_title}' xmlUrl='{url}' />\n"
opml += """\
</outline>
</body>
</opml>"""
with open(filename, "w") as f:
f.write(opml)
def load_podcasts_names_from_file(filename):
with open(filename, "r") as file:
return json.load(file)
if __name__ == "__main__":
podcasts = load_podcasts_names_from_file('data.json')
# get podcast rss feed urls
rss_feeds = podcasts_index_get_rss_feed(podcasts)
# save the podcasts as OPML XML file
create_opml_file(rss_feeds, "podcasts.opml")
# summary
print(f"Found {len(podcasts)} podcasts in data.json")
print(f"Wrote {len(rss_feeds)} RSS feeds to podcasts.opml")