- commit
- bcb252d
- parent
- e1bec7f
- author
- Eric Bower
- date
- 2025-08-06 15:42:44 -0400 EDT
fix(feeds): sanitize feed content to be utf8 Our postgres db expects utf8
4 files changed,
+98,
-6
+5,
-5
1@@ -106,10 +106,10 @@ func DigestOptionToTime(lastDigest time.Time, interval string) time.Time {
2 }
3
4 func getFeedItemID(logger *slog.Logger, item *gofeed.Item) string {
5- guid := item.GUID
6+ guid := strings.ToValidUTF8(item.GUID, "")
7 if item.GUID == "" {
8 logger.Info("no <guid> found for feed item, using <link> instead for its unique id")
9- return item.Link
10+ return strings.ToValidUTF8(item.Link, "")
11 }
12 return guid
13 }
14@@ -487,9 +487,9 @@ func (f *Fetcher) FetchAll(logger *slog.Logger, urls []string, inlineContent boo
15 PostID: post.ID,
16 GUID: uid,
17 Data: db.FeedItemData{
18- Title: item.Title,
19- Description: item.Description,
20- Content: item.Content,
21+ Title: strings.ToValidUTF8(item.Title, ""),
22+ Description: strings.ToValidUTF8(item.Description, ""),
23+ Content: strings.ToValidUTF8(item.Content, ""),
24 Link: item.Link,
25 PublishedAt: item.PublishedParsed,
26 },
+1,
-1
1@@ -1561,7 +1561,7 @@ func (me *PsqlDB) InsertFeedItems(postID string, items []*db.FeedItem) error {
2 item.Data,
3 )
4 if err != nil {
5- return err
6+ return fmt.Errorf("post id:%s, guid:%s, err:%w", item.PostID, item.GUID, err)
7 }
8 }
9
A
test.txt
+47,
-0
1@@ -0,0 +1,47 @@
2+=: email tmp@erock.io
3+=: digest_interval 1day
4+=: inline_content false
5+=> https://blog.pico.sh/rss
6+=> https://uncenter.dev/feed.xml
7+=> https://ryanccn.dev/feed/rss.xml
8+=> https://shivjm.blog/feed.xml
9+=> https://binyam.in/feeds/blog.xml
10+=> https://tonsky.me/blog/atom.xml
11+=> https://tty1.blog/feed/
12+=> https://sindresorhus.com/rss.xml
13+=> https://bower.sh/rss
14+=> https://fasterthanli.me/index.xml
15+=> https://antfu.me/feed.xml
16+=> https://zackoverflow.dev/rss.xml
17+=> https://humanwhocodes.com/feeds/blog.xml
18+=> https://mitchellh.com/feed.xml
19+=> https://chrisdone.com/rss.xml
20+=> https://www.hoeser.dev/feed.xml
21+=> https://daverupert.com/atom.xml
22+=> https://blog.orhun.dev/rss.xml
23+=> https://evanhahn.com/blog/index.xml
24+=> https://www.11ty.dev/blog/feed.xml
25+=> https://thorstenball.com/atom.xml
26+=> https://registerspill.thorstenball.com/feed
27+=> https://isabelroses.com/rss.xml
28+=> https://boehs.org/in/blog.xml
29+=> https://huonw.github.io/blog/atom.xml
30+=> https://nixpkgs.news/rss.xml
31+=> https://rbluethl.com/rss
32+=> https://lucumr.pocoo.org/feed.atom
33+=> https://yorickpeterse.com/feed.xml
34+=> https://ardislu.dev/atom.xml
35+=> https://kettanaito.com/blog/rss.xml
36+=> https://kilo.bytesize.xyz/feed/
37+=> https://bytesize.xyz/feed/
38+=> https://zed.dev/blog.rss
39+=> https://www.sophiajt.com/atom.xml
40+=> https://robinmalfait.com/feed.xml
41+=> https://twobithistory.org/feed.xml
42+=> https://ruudvanasseldonk.com/feed.xml
43+=> https://blog.rust-lang.org/feed.xml
44+=> https://nolanlawson.com/feed/
45+=> https://matklad.github.io/feed.xml
46+=> https://www.ntietz.com/atom.xml
47+=> https://2ality.com/feeds/posts.atom
48+=> https://dbushell.com/rss.xml
+45,
-0
1@@ -0,0 +1,45 @@
2+=: email tmp@erock.io
3+=: digest_interval 1day
4+=: inline_content false
5+=> https://blog.pico.sh/rss
6+=> https://uncenter.dev/feed.xml
7+=> https://ryanccn.dev/feed/rss.xml
8+=> https://shivjm.blog/feed.xml
9+=> https://binyam.in/feeds/blog.xml
10+=> https://tonsky.me/blog/atom.xml
11+=> https://tty1.blog/feed/
12+=> https://sindresorhus.com/rss.xml
13+=> https://bower.sh/rss
14+=> https://fasterthanli.me/index.xml
15+=> https://antfu.me/feed.xml
16+=> https://zackoverflow.dev/rss.xml
17+=> https://humanwhocodes.com/feeds/blog.xml
18+=> https://mitchellh.com/feed.xml
19+=> https://chrisdone.com/rss.xml
20+=> https://www.hoeser.dev/feed.xml
21+=> https://daverupert.com/atom.xml
22+=> https://blog.orhun.dev/rss.xml
23+=> https://evanhahn.com/blog/index.xml
24+=> https://www.11ty.dev/blog/feed.xml
25+=> https://thorstenball.com/atom.xml
26+=> https://registerspill.thorstenball.com/feed
27+=> https://isabelroses.com/rss.xml
28+=> https://boehs.org/in/blog.xml
29+=> https://huonw.github.io/blog/atom.xml
30+=> https://nixpkgs.news/rss.xml
31+=> https://rbluethl.com/rss
32+=> https://lucumr.pocoo.org/feed.atom
33+=> https://yorickpeterse.com/feed.xml
34+=> https://ardislu.dev/atom.xml
35+=> https://kettanaito.com/blog/rss.xml
36+=> https://kilo.bytesize.xyz/feed/
37+=> https://bytesize.xyz/feed/
38+=> https://zed.dev/blog.rss
39+=> https://www.sophiajt.com/atom.xml
40+=> https://robinmalfait.com/feed.xml
41+=> https://twobithistory.org/feed.xml
42+=> https://ruudvanasseldonk.com/feed.xml
43+=> https://blog.rust-lang.org/feed.xml
44+=> https://nolanlawson.com/feed/
45+=> https://matklad.github.io/feed.xml
46+=> https://www.ntietz.com/atom.xml