repos / pico

pico services mono repo
git clone https://github.com/picosh/pico.git

commit
bcb252d
parent
e1bec7f
author
Eric Bower
date
2025-08-06 15:42:44 -0400 EDT
fix(feeds): sanitize feed content to be utf8

Our postgres db expects utf8
4 files changed,  +98, -6
M pkg/apps/feeds/cron.go
+5, -5
 1@@ -106,10 +106,10 @@ func DigestOptionToTime(lastDigest time.Time, interval string) time.Time {
 2 }
 3 
 4 func getFeedItemID(logger *slog.Logger, item *gofeed.Item) string {
 5-	guid := item.GUID
 6+	guid := strings.ToValidUTF8(item.GUID, "")
 7 	if item.GUID == "" {
 8 		logger.Info("no <guid> found for feed item, using <link> instead for its unique id")
 9-		return item.Link
10+		return strings.ToValidUTF8(item.Link, "")
11 	}
12 	return guid
13 }
14@@ -487,9 +487,9 @@ func (f *Fetcher) FetchAll(logger *slog.Logger, urls []string, inlineContent boo
15 				PostID: post.ID,
16 				GUID:   uid,
17 				Data: db.FeedItemData{
18-					Title:       item.Title,
19-					Description: item.Description,
20-					Content:     item.Content,
21+					Title:       strings.ToValidUTF8(item.Title, ""),
22+					Description: strings.ToValidUTF8(item.Description, ""),
23+					Content:     strings.ToValidUTF8(item.Content, ""),
24 					Link:        item.Link,
25 					PublishedAt: item.PublishedParsed,
26 				},
M pkg/db/postgres/storage.go
+1, -1
1@@ -1561,7 +1561,7 @@ func (me *PsqlDB) InsertFeedItems(postID string, items []*db.FeedItem) error {
2 			item.Data,
3 		)
4 		if err != nil {
5-			return err
6+			return fmt.Errorf("post id:%s, guid:%s, err:%w", item.PostID, item.GUID, err)
7 		}
8 	}
9 
A test.txt
+47, -0
 1@@ -0,0 +1,47 @@
 2+=: email tmp@erock.io
 3+=: digest_interval 1day
 4+=: inline_content false
 5+=> https://blog.pico.sh/rss
 6+=> https://uncenter.dev/feed.xml
 7+=> https://ryanccn.dev/feed/rss.xml
 8+=> https://shivjm.blog/feed.xml
 9+=> https://binyam.in/feeds/blog.xml
10+=> https://tonsky.me/blog/atom.xml
11+=> https://tty1.blog/feed/
12+=> https://sindresorhus.com/rss.xml
13+=> https://bower.sh/rss
14+=> https://fasterthanli.me/index.xml
15+=> https://antfu.me/feed.xml
16+=> https://zackoverflow.dev/rss.xml
17+=> https://humanwhocodes.com/feeds/blog.xml
18+=> https://mitchellh.com/feed.xml
19+=> https://chrisdone.com/rss.xml
20+=> https://www.hoeser.dev/feed.xml
21+=> https://daverupert.com/atom.xml
22+=> https://blog.orhun.dev/rss.xml
23+=> https://evanhahn.com/blog/index.xml
24+=> https://www.11ty.dev/blog/feed.xml
25+=> https://thorstenball.com/atom.xml
26+=> https://registerspill.thorstenball.com/feed
27+=> https://isabelroses.com/rss.xml
28+=> https://boehs.org/in/blog.xml
29+=> https://huonw.github.io/blog/atom.xml
30+=> https://nixpkgs.news/rss.xml
31+=> https://rbluethl.com/rss
32+=> https://lucumr.pocoo.org/feed.atom
33+=> https://yorickpeterse.com/feed.xml
34+=> https://ardislu.dev/atom.xml
35+=> https://kettanaito.com/blog/rss.xml
36+=> https://kilo.bytesize.xyz/feed/
37+=> https://bytesize.xyz/feed/
38+=> https://zed.dev/blog.rss
39+=> https://www.sophiajt.com/atom.xml
40+=> https://robinmalfait.com/feed.xml
41+=> https://twobithistory.org/feed.xml
42+=> https://ruudvanasseldonk.com/feed.xml
43+=> https://blog.rust-lang.org/feed.xml
44+=> https://nolanlawson.com/feed/
45+=> https://matklad.github.io/feed.xml
46+=> https://www.ntietz.com/atom.xml
47+=> https://2ality.com/feeds/posts.atom
48+=> https://dbushell.com/rss.xml
A test_utf8.txt
+45, -0
 1@@ -0,0 +1,45 @@
 2+=: email tmp@erock.io
 3+=: digest_interval 1day
 4+=: inline_content false
 5+=> https://blog.pico.sh/rss
 6+=> https://uncenter.dev/feed.xml
 7+=> https://ryanccn.dev/feed/rss.xml
 8+=> https://shivjm.blog/feed.xml
 9+=> https://binyam.in/feeds/blog.xml
10+=> https://tonsky.me/blog/atom.xml
11+=> https://tty1.blog/feed/
12+=> https://sindresorhus.com/rss.xml
13+=> https://bower.sh/rss
14+=> https://fasterthanli.me/index.xml
15+=> https://antfu.me/feed.xml
16+=> https://zackoverflow.dev/rss.xml
17+=> https://humanwhocodes.com/feeds/blog.xml
18+=> https://mitchellh.com/feed.xml
19+=> https://chrisdone.com/rss.xml
20+=> https://www.hoeser.dev/feed.xml
21+=> https://daverupert.com/atom.xml
22+=> https://blog.orhun.dev/rss.xml
23+=> https://evanhahn.com/blog/index.xml
24+=> https://www.11ty.dev/blog/feed.xml
25+=> https://thorstenball.com/atom.xml
26+=> https://registerspill.thorstenball.com/feed
27+=> https://isabelroses.com/rss.xml
28+=> https://boehs.org/in/blog.xml
29+=> https://huonw.github.io/blog/atom.xml
30+=> https://nixpkgs.news/rss.xml
31+=> https://rbluethl.com/rss
32+=> https://lucumr.pocoo.org/feed.atom
33+=> https://yorickpeterse.com/feed.xml
34+=> https://ardislu.dev/atom.xml
35+=> https://kettanaito.com/blog/rss.xml
36+=> https://kilo.bytesize.xyz/feed/
37+=> https://bytesize.xyz/feed/
38+=> https://zed.dev/blog.rss
39+=> https://www.sophiajt.com/atom.xml
40+=> https://robinmalfait.com/feed.xml
41+=> https://twobithistory.org/feed.xml
42+=> https://ruudvanasseldonk.com/feed.xml
43+=> https://blog.rust-lang.org/feed.xml
44+=> https://nolanlawson.com/feed/
45+=> https://matklad.github.io/feed.xml
46+=> https://www.ntietz.com/atom.xml