Antonio Mika
·
2025-03-12
mdparser.go
1package shared
2
3import (
4 "bytes"
5 "fmt"
6 "strings"
7 "time"
8
9 "github.com/alecthomas/chroma/v2/formatters/html"
10 "github.com/araddon/dateparse"
11 "github.com/microcosm-cc/bluemonday"
12 "github.com/yuin/goldmark"
13 highlighting "github.com/yuin/goldmark-highlighting/v2"
14 meta "github.com/yuin/goldmark-meta"
15 "github.com/yuin/goldmark/ast"
16 "github.com/yuin/goldmark/extension"
17 "github.com/yuin/goldmark/parser"
18 ghtml "github.com/yuin/goldmark/renderer/html"
19 gtext "github.com/yuin/goldmark/text"
20 "go.abhg.dev/goldmark/anchor"
21 "go.abhg.dev/goldmark/hashtag"
22 "go.abhg.dev/goldmark/toc"
23 yaml "gopkg.in/yaml.v2"
24)
25
26type Link struct {
27 URL string
28 Text string
29}
30
31type MetaData struct {
32 PublishAt *time.Time
33 Title string
34 Description string
35 Nav []Link
36 Tags []string
37 Aliases []string
38 Layout string
39 Image string
40 ImageCard string
41 Favicon string
42 Hidden bool
43 WithStyles bool
44 Domain string
45}
46
47type ParsedText struct {
48 Html string
49 *MetaData
50}
51
52func HtmlPolicy() *bluemonday.Policy {
53 policy := bluemonday.UGCPolicy()
54 policy.AllowStyling()
55 policy.AllowAttrs("rel").OnElements("a")
56 return policy
57}
58
59var policy = HtmlPolicy()
60
61func toString(obj interface{}) (string, error) {
62 if obj == nil {
63 return "", nil
64 }
65 switch val := obj.(type) {
66 case string:
67 return val, nil
68 default:
69 return "", fmt.Errorf("incorrect type for value: %T, should be string", val)
70 }
71}
72
73func toBool(obj interface{}, fallback bool) (bool, error) {
74 if obj == nil {
75 return fallback, nil
76 }
77 switch val := obj.(type) {
78 case bool:
79 return val, nil
80 default:
81 return false, fmt.Errorf("incorrect type for value: %T, should be bool", val)
82 }
83}
84
85// The toc frontmatter can take a boolean or an integer.
86//
87// A value of -1 or false means "do not generate a toc".
88// A value of 0 or true means "generate a toc with no depth limit".
89// A value of >0 means "generate a toc with a depth limit of $value past title".
90func toToc(obj interface{}) (int, error) {
91 if obj == nil {
92 return -1, nil
93 }
94 switch val := obj.(type) {
95 case bool:
96 if val {
97 return 0, nil
98 }
99 return -1, nil
100 case int:
101 if val < -1 {
102 val = -1
103 }
104 return val, nil
105 default:
106 return -1, fmt.Errorf("incorrect type for value: %T, should be bool or int", val)
107 }
108}
109
110func toLinks(orderedMetaData yaml.MapSlice) ([]Link, error) {
111 var navData interface{}
112 for i := 0; i < len(orderedMetaData); i++ {
113 var item = orderedMetaData[i]
114 if item.Key == "nav" {
115 navData = item.Value
116 break
117 }
118 }
119
120 links := []Link{}
121 if navData == nil {
122 return links, nil
123 }
124
125 addLinks := func(raw yaml.MapSlice) {
126 for _, k := range raw {
127 links = append(links, Link{
128 Text: k.Key.(string),
129 URL: k.Value.(string),
130 })
131 }
132 }
133
134 switch raw := navData.(type) {
135 case yaml.MapSlice:
136 addLinks(raw)
137 case []interface{}:
138 for _, v := range raw {
139 switch linkRaw := v.(type) {
140 case yaml.MapSlice:
141 addLinks(v.(yaml.MapSlice))
142 default:
143 return links, fmt.Errorf("unsupported type for `nav` link item (%T), looking for map (`text: href`)", linkRaw)
144 }
145 }
146 default:
147 return links, fmt.Errorf("unsupported type for `nav` variable: %T", raw)
148 }
149
150 return links, nil
151}
152
153func toAliases(obj interface{}) ([]string, error) {
154 arr := make([]string, 0)
155 if obj == nil {
156 return arr, nil
157 }
158
159 switch raw := obj.(type) {
160 case []interface{}:
161 for _, alias := range raw {
162 als := strings.TrimSpace(alias.(string))
163 arr = append(arr, strings.TrimPrefix(als, "/"))
164 }
165 case string:
166 aliases := strings.Split(raw, " ")
167 for _, alias := range aliases {
168 als := strings.TrimSpace(alias)
169 arr = append(arr, strings.TrimPrefix(als, "/"))
170 }
171 default:
172 return arr, fmt.Errorf("unsupported type for `aliases` variable: %T", raw)
173 }
174
175 return arr, nil
176}
177
178func toTags(obj interface{}) ([]string, error) {
179 arr := make([]string, 0)
180 if obj == nil {
181 return arr, nil
182 }
183
184 switch raw := obj.(type) {
185 case []interface{}:
186 for _, tag := range raw {
187 arr = append(arr, tag.(string))
188 }
189 case string:
190 tags := strings.Split(raw, " ")
191 for _, tag := range tags {
192 arr = append(arr, strings.TrimSpace(tag))
193 }
194 default:
195 return arr, fmt.Errorf("unsupported type for `tags` variable: %T", raw)
196 }
197
198 return arr, nil
199}
200
201func CreateGoldmark(extenders ...goldmark.Extender) goldmark.Markdown {
202 return goldmark.New(
203 goldmark.WithExtensions(
204 extenders...,
205 ),
206 goldmark.WithParserOptions(
207 parser.WithAutoHeadingID(),
208 ),
209 goldmark.WithRendererOptions(
210 ghtml.WithUnsafe(),
211 ),
212 )
213}
214
215func ParseText(text string) (*ParsedText, error) {
216 parsed := ParsedText{
217 MetaData: &MetaData{
218 Tags: []string{},
219 Aliases: []string{},
220 WithStyles: true,
221 PublishAt: &time.Time{},
222 },
223 }
224 hili := highlighting.NewHighlighting(
225 highlighting.WithFormatOptions(
226 html.WithLineNumbers(true),
227 html.WithClasses(true),
228 ),
229 )
230 extenders := []goldmark.Extender{
231 extension.GFM,
232 extension.Footnote,
233 meta.Meta,
234 &hashtag.Extender{},
235 hili,
236 &anchor.Extender{
237 Position: anchor.After,
238 Texter: anchor.Text("#"),
239 },
240 }
241 md := CreateGoldmark(extenders...)
242 context := parser.NewContext()
243 // we do the Parse/Render steps manually to get a chance to examine the AST
244 btext := []byte(text)
245 doc := md.Parser().Parse(gtext.NewReader(btext), parser.WithContext(context))
246 metaData := meta.Get(context)
247
248 // title:
249 // 1. if specified in frontmatter, use that
250 title, err := toString(metaData["title"])
251 if err != nil {
252 return &parsed, fmt.Errorf("front-matter field (%s): %w", "title", err)
253 }
254 // 2. If an <h1> is found before a <p> or other heading is found, use that
255 if title == "" {
256 title = AstTitle(doc, btext, true)
257 }
258 // 3. else, set it to nothing (slug should get used later down the line)
259 // this is implicit since it's already ""
260 parsed.MetaData.Title = title
261
262 // only handle toc after the title is extracted (if it's getting extracted)
263 mtoc, err := toToc(metaData["toc"])
264 if err != nil {
265 return &parsed, fmt.Errorf("front-matter field (%s): %w", "toc", err)
266 }
267 if mtoc >= 0 {
268 err = AstToc(doc, btext, mtoc)
269 if err != nil {
270 return &parsed, fmt.Errorf("error generating toc: %w", err)
271 }
272 }
273
274 description, err := toString(metaData["description"])
275 if err != nil {
276 return &parsed, fmt.Errorf("front-matter field (%s): %w", "description", err)
277 }
278 parsed.MetaData.Description = description
279
280 domain, err := toString(metaData["domain"])
281 if err != nil {
282 return &parsed, fmt.Errorf("front-matter field (%s): %w", "domain", err)
283 }
284 parsed.MetaData.Domain = domain
285
286 layout, err := toString(metaData["layout"])
287 if err != nil {
288 return &parsed, fmt.Errorf("front-matter field (%s): %w", "layout", err)
289 }
290 parsed.MetaData.Layout = layout
291
292 image, err := toString(metaData["image"])
293 if err != nil {
294 return &parsed, fmt.Errorf("front-matter field (%s): %w", "image", err)
295 }
296 parsed.MetaData.Image = image
297
298 card, err := toString(metaData["card"])
299 if err != nil {
300 return &parsed, fmt.Errorf("front-matter field (%s): %w", "card", err)
301 }
302 parsed.MetaData.ImageCard = card
303
304 hidden, err := toBool(metaData["draft"], false)
305 if err != nil {
306 return &parsed, fmt.Errorf("front-matter field (%s): %w", "draft", err)
307 }
308 parsed.MetaData.Hidden = hidden
309
310 withStyles, err := toBool(metaData["with_styles"], true)
311 if err != nil {
312 return &parsed, fmt.Errorf("front-matter field (%s): %w", "with_style", err)
313 }
314 parsed.MetaData.WithStyles = withStyles
315
316 favicon, err := toString(metaData["favicon"])
317 if err != nil {
318 return &parsed, fmt.Errorf("front-matter field (%s): %w", "favicon", err)
319 }
320 parsed.MetaData.Favicon = favicon
321
322 publishAt := &time.Time{}
323 date, err := toString(metaData["date"])
324 if err != nil {
325 return &parsed, fmt.Errorf("front-matter field (%s): %w", "date", err)
326 }
327
328 if date != "" {
329 nextDate, err := dateparse.ParseStrict(date)
330 if err != nil {
331 return &parsed, err
332 }
333 publishAt = &nextDate
334 }
335 parsed.MetaData.PublishAt = publishAt
336
337 orderedMetaData := meta.GetItems(context)
338
339 nav, err := toLinks(orderedMetaData)
340 if err != nil {
341 return &parsed, err
342 }
343 parsed.MetaData.Nav = nav
344
345 aliases, err := toAliases(metaData["aliases"])
346 if err != nil {
347 return &parsed, err
348 }
349 parsed.MetaData.Aliases = aliases
350
351 rtags := metaData["tags"]
352 tags, err := toTags(rtags)
353 if err != nil {
354 return &parsed, err
355 }
356 // fill from hashtag ASTs as fallback
357 if rtags == nil {
358 tags = AstTags(doc)
359 }
360 parsed.MetaData.Tags = tags
361
362 // Rendering happens last to allow any of the previous steps to manipulate
363 // the AST.
364 var buf bytes.Buffer
365 if err := md.Renderer().Render(&buf, btext, doc); err != nil {
366 return &parsed, err
367 }
368 parsed.Html = policy.Sanitize(buf.String())
369
370 return &parsed, nil
371}
372
373func AstTags(doc ast.Node) []string {
374 var tags []string
375 err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
376 switch n.Kind() {
377 // ignore hashtags inside of these sections
378 case ast.KindBlockquote, ast.KindCodeBlock, ast.KindCodeSpan:
379 return ast.WalkSkipChildren, nil
380 // register hashtags
381 case hashtag.Kind:
382 t := n.(*hashtag.Node)
383 if entering { // only add each tag once
384 tags = append(tags, string(t.Tag))
385 }
386 }
387 // out-of-switch default
388 return ast.WalkContinue, nil
389 })
390 if err != nil {
391 panic(err) // unreachable
392 }
393
394 // sort and deduplicate results
395 dedupe := removeDuplicateStr(tags)
396 return dedupe
397}
398
399// https://stackoverflow.com/a/66751055
400func removeDuplicateStr(strSlice []string) []string {
401 allKeys := make(map[string]bool)
402 list := []string{}
403 for _, item := range strSlice {
404 if _, value := allKeys[item]; !value {
405 allKeys[item] = true
406 list = append(list, item)
407 }
408 }
409 return list
410}
411
412// AstTitle extracts the title (if any) from a parsed markdown document.
413//
414// If "clean" is true, it will also remove the heading node from the AST.
415func AstTitle(doc ast.Node, src []byte, clean bool) string {
416 out := ""
417 err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
418 if n.Kind() == ast.KindHeading {
419 if h := n.(*ast.Heading); h.Level == 1 {
420 if clean {
421 p := h.Parent()
422 p.RemoveChild(p, n)
423 }
424 out = string(h.Lines().Value(src))
425 }
426 return ast.WalkStop, nil
427 }
428 if ast.IsParagraph(n) {
429 return ast.WalkStop, nil
430 }
431 return ast.WalkContinue, nil
432 })
433 if err != nil {
434 panic(err) // unreachable
435 }
436 return out
437}
438
439func AstToc(doc ast.Node, src []byte, mtoc int) error {
440 var tree *toc.TOC
441 if mtoc >= 0 {
442 var err error
443 if mtoc > 0 {
444 tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2), toc.MaxDepth(mtoc+1))
445 } else {
446 tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2))
447 }
448 if err != nil {
449 return err
450 }
451 if tree == nil {
452 return nil // no headings?
453 }
454 }
455 list := toc.RenderList(tree)
456 if list == nil {
457 return nil // no headings
458 }
459
460 list.SetAttributeString("id", []byte("toc-list"))
461
462 // generate # toc
463 heading := ast.NewHeading(2)
464 heading.SetAttributeString("id", []byte("toc"))
465 heading.AppendChild(heading, ast.NewString([]byte("Table of Contents")))
466
467 // insert
468 doc.InsertBefore(doc, doc.FirstChild(), list)
469 doc.InsertBefore(doc, doc.FirstChild(), heading)
470 return nil
471}