remove gevent monkey-patch from feed-downloader
[mygpo.git] / sanitizing-rules.ini
blob97b70fd40f48a878e1dcb30bf7a51bd243302c84
1 # This files contains rules to rewrite Podcast and Episode URLs
2 # All rules should be given in the following format
4 #[some-unique-slug]
5 #podcast=1 1 if the rule applies to podcast URLs, otherwise 0
6 #episode=1 1 if the rule applies to episode URLs, otherwise 0
7 #search=regex search-regex that should be replaced
8 #replace=regex string with which the search string should be replace. may contain references
9 #priority=x rules are applied in order of increasing priority
10 #description text describing the rule, possibly mentioning a bug
13 [feedburner-feeds2]
14 podcast=1
15 episode=1
16 search=feeds2\.feedburner\.com
17 replace=feeds.feedburner.com
18 priority=1
19 description=Rewriting for feedburner should happen as "feeds2.feedburner.com" -> "feeds.feedburner.com"
21 [feedburner-format]
22 podcast=1
23 episode=1
24 search=(?P<unchanged>feedburner\.com.+)\?format=xml
25 replace=\g<unchanged>
26 priority=2
27 description=Feedburner URLs should have their "?format=xml" query string removed
29 [remove-leading-whitespace]
30 podcast=1
31 episode=1
32 search=^\s+
33 replace=
34 priority=0
35 description=Remove leading whitespaces
37 [remove-trailing-whitespace]
38 podcast=1
39 episode=1
40 search=\s+$
41 replace=
42 priority=0
43 description=Remove trailing whitespaces
45 [unknown-protocol]
46 podcast=1
47 episode=1
48 search=^[^(https?):].+
49 replace=
50 priority=100
51 description=Empty any string that doesn't start with either http or https
53 [feedburner-trailing-slash]
54 podcast=1
55 episode=0
56 search=(?P<unchanged>feedburner\.com.+)\/$
57 replace=\g<unchanged>
58 priority=2
59 description=Feedburner URLs sometimes have a trailing slash, which can be removed safely
61 [non-ascii]
62 podcast=1
63 episode=1
64 search=^.*[^\x20-\x7E].*$
65 replace=
66 priority=50
67 description=Remove URLs with non-ascii characters
69 [twit-podcasts]
70 podcast=1
71 episode=0
72 search=^http://leoville\.tv/podcasts/(?P<podcast>\w+)\.xml$
73 replace=http://leo.am/podcasts/\g<podcast>
74 priority=10
75 description=Rewrite URLs of TWiT Podcasts because most users use a URL that is going to break soon (bug 885)
77 [hardcore-history-old-url]
78 podcast=1
79 episode=0
80 search=^http://www\.dancarlin\.com/dchh\.xml$
81 replace=http://feeds.feedburner.com/dancarlin/history
82 priority=10
83 description=Rewrite podcast URL of Dan Carlin's Hardcore History because the old URL doesn't work anymore (bug 855)
85 [spaces]
86 podcast=1
87 episode=1
88 search=^.*\s.*$
89 replace=
90 priority=10
91 description=All URLs that contain spaces are considered invalid
93 [libsyn-podcasts]
94 podcast=0
95 episode=1
96 search=http://media.libsyn.com/media/(?P<res>.*)$
97 replace=http://traffic.libsyn.com/\g<res>
98 priority=10
99 description=Update new URL for libsy Podcasts (Learn Japanese with Beb and Alex)
101 [abc-podcasts]
102 podcast=1
103 episode=0
104 search=^http://site\.abc\.go\.com/abc/xml/podcastRSS\?(.*&)?feedPublishKey=(?P<key>\d+)(&.*)?$
105 replace=http://a.abc.com/abc/xml/podcastRSS?feedPublishKey=\g<key>
106 priority=100
107 description=Merge URLs for ABC Podcasts (bug 977)
109 [remove-http-auth]
110 podcast=1
111 episode=1
112 search=^(?P<protocol>[a-zA-Z])://[-_\w]+(:[^@]+)?@(?P<rest>.+)$
113 replace=\g<protocol>://\g<rest>
114 priority=20
115 description=Remove HTTP-Authentication from URLs
117 [rpod-ru-parameters]
118 podcast=0
119 episode=1
120 search=^(?P<unchanged>http://rpod\.ru/personal/.+\.mp[34])\?[0-9a-z]+$
121 replace=\g<unchanged>
122 priority=100
123 description=
125 [collegehumor]
126 podcast=0
127 episode=1
128 search=^http://\d+\.media\.collegehumor\.com/(?P<unchanged>.+)$
129 replace=http://1.media.collegehumor.com/\g<unchanged>
130 priority=100
131 description=
133 [shot-of-jaq-merge]
134 podcast=1
135 episode=0
136 search=(?i)^http://feeds.feedburner.com/ShotOfJaq$
137 replace=http://shotofjaq.org/feed/
138 priority=100
139 description=Merges Shot of Jaq feeds to the URL given on their Website
141 [shot-of-jaq-trailing-slash]
142 podcast=1
143 episode=0
144 search=http://shotofjaq.org/feed$
145 replace=http://shotofjaq.org/feed/
146 priority=100
147 description=Add trailing slash to Shot of Jaq feed URL
149 [shot-of-jaq-feedburner]
150 podcast=1
151 episode=0
152 search=(?i)^http://feeds\.feedburner\.com/ShotOfJaqOGG$
153 replace=http://feeds.feedburner.com/ShotOfJaqOgg
154 priority=100
155 description=Unify all Feedburner URLs for the Shot of Jaq Ogg feed (seems it doesn't have an shotofjaq.org url)
157 [ted-talks-video]
158 podcast=1
159 episode=0
160 search=(?i)^http://feeds\.feedburner\.com/TEDTalks_video$
161 replace=http://feeds.feedburner.com/tedtalks_video
162 priority=100
163 description=Unify all TEDTalks podcasts
165 [ted-talks-audio]
166 podcast=1
167 episode=0
168 search=(?i)^http://feeds\.feedburner\.com/TEDTalks_audio$
169 replace=http://feeds.feedburner.com/tedtalks_audio
170 priority=100
171 description=Unify all TEDTalks podcasts
173 [ted-talks-hd]
174 podcast=1
175 episode=0
176 search=(?i)^http://feeds\.feedburner\.com/TedtalksHD$
177 replace=http://feeds.feedburner.com/tedtalkshd
178 priority=100
179 description=Unify all TEDTalks podcasts
181 [mintcast-feedburner]
182 podcast=1
183 episode=0
184 search=(?i)^http://feeds\.feedburner\.com/mintcastpodcast$
185 replace=http://www.mintcast.org/feed/podcast/
186 priority=100
187 description=Unify mintCast feeds (bug 1035
189 [mintcast]
190 podcast=1
191 episode=0
192 search=(?i)^http://www.mintcast.org/feed/$
193 replace=http://www.mintcast.org/feed/podcast/
194 priority=100
195 description=Unify mintCast feeds (bug 1035
197 [crankygeeks-feedburner]
198 podcast=1
199 episode=0
200 search=(?i)^http://feeds\.feedburner\.com/ziffdavis/crankygeekspodcast$
201 replace=http://feeds.ziffdavis.com/ziffdavis/crankygeekspodcast
202 priority=100
203 description=Unify Cranky Geed Podcasts (MP3) (bug 1032
205 [crankygeeks-xml]
206 podcast=1
207 episode=0
208 search=^http://rssnewsapps\.ziffdavis\.com/audioblogs/crankygeeks/cg\.audio\.xml/?$
209 replace=http://feeds.ziffdavis.com/ziffdavis/crankygeekspodcast
210 priority=100
211 description=Unify Cranky Geed Podcasts (MP3) (bug 1032
213 [crankygeeks-slash]
214 podcast=1
215 episode=0
216 search=^http://feeds\.ziffdavis\.com/ziffdavis/crankygeekspodcast/$
217 replace=http://feeds.ziffdavis.com/ziffdavis/crankygeekspodcast
218 priority=100
219 description=Unify Cranky Geed Podcasts (MP3) (bug 1032
221 [crankygeeks-video]
222 podcast=1
223 episode=0
224 search=(?i)^http://feeds\.feedburner\.com/ziffdavis/cgh264video$
225 replace=http://feeds.ziffdavis.com/ziffdavis/cgh264video
226 priority=100
227 description=Unify Cranky Geed Podcasts (H.264) (bug 1032
229 [crankygeeks-xml-video]
230 podcast=1
231 episode=0
232 search=^http://rssnewsapps\.ziffdavis\.com/audioblogs/crankygeeks/cg\.h\.264\.xml$
233 replace=http://feeds.ziffdavis.com/ziffdavis/cgh264video
234 priority=100
235 description=Unify Cranky Geed Podcasts (H.264) (bug 1032
237 [crankygeeks-xml-h264]
238 podcast=1
239 episode=0
240 search=^http://rssnewsapps\.ziffdavis\.com/audioblogs/crankygeeks/cg\.ipod\.xml$
241 replace=http://feeds.ziffdavis.com/ziffdavis/cgipodvideo
242 priority=100
243 description=Unify Cranky Geed Podcasts (iPod Video) (bug 1032
245 [crankygeeks-xml-mp4]
246 podcast=1
247 episode=0
248 search=^http://rssnewsapps\.ziffdavis\.com/audioblogs/crankygeeks/cg\.mp4\.xml$
249 replace=http://feeds.ziffdavis.com/ziffdavis/cgipodvideo
250 priority=100
251 description=Unify Cranky Geed Podcasts (iPod Video) (bug 1032
253 [crankygeeks-ipod]
254 podcast=1
255 episode=0
256 search=(?i)^http://feeds\.feedburner\.com/ziffdavis/cgipodvideo$
257 replace=http://feeds.ziffdavis.com/ziffdavis/cgipodvideo
258 priority=100
259 description=Unify Cranky Geed Podcasts (iPod Video) (bug 1032
261 [crankygeeks-mpeg4]
262 podcast=1
263 episode=0
264 search=^http://feeds\.ziffdavis\.com/ziffdavis/cgmpeg4video/$
265 replace=http://feeds.ziffdavis.com/ziffdavis/cgipodvideo
266 priority=100
267 description=Unify Cranky Geed Podcasts (iPod Video) (bug 1032
269 [crankygeeks-ipod-cgipod-video]
270 podcast=1
271 episode=0
272 search=^http://feeds\.ziffdavis\.com/ziffdavis/cgipodvideo\?format=xml$
273 replace=http://feeds.ziffdavis.com/ziffdavis/cgipodvideo
274 priority=100
275 description=Unify Cranky Geed Podcasts (iPod Video) (bug 1032
277 [crankygeeks-wmv]
278 podcast=1
279 episode=0
280 search=^http://rssnewsapps\.ziffdavis\.com/audioblogs/crankygeeks/cg\.wmv\.xml$
281 replace=http://feeds.feedburner.com/ziffdavis/cgwmvvideo
282 priority=100
283 description=Unify Cranky Geed Podcasts (WMV) (bug 1032
285 [no-agenda]
286 podcast=1
287 episode=0
288 search=^http://noagenda\.podshow\.com/feed$
289 replace=http://www.mevio.com/feeds/noagenda.xml
290 priority=100
291 description=Unify No Agend Feeds
293 [escape-pods]
294 podcast=1
295 episode=0
296 search=(?i)^http://feeds\.feedburner\.com/EscapePod$
297 replace=http://escapepod.org/feed/
298 priority=100
299 description=Unify Escape Pod Feeds
301 [hacker-public-radio]
302 podcast=1
303 episode=0
304 search=http://hackerpublicradio\.org/(?P<res>.*)$
305 replace=http://www.hackerpublicradio.org/\g<res>
306 priority=100
307 description=Unify Hacker Public Radio (bug 1090)
309 [hacker-medley]
310 podcast=1
311 episode=0
312 search=(?i)^http://feeds\.feedburner\.com/HackerMedley$
313 replace=http://hackermedley.org/feed/podcast/
314 priority=100
315 description=Unify Hacker Medley Podcast
317 [phones-show]
318 podcast=1
319 episode=0
320 search=.*http://3lib\.ukonline\.co\.uk/sshow/sshowchat\.rss.*
321 replace=http://stevelitchfield.com/sshow/sshowchat.rss
322 priority=100
323 description=Rewrite old URL of The Phones Show (by request of Steve Litchfield on 2011-04-01)