repos / pico

pico services mono repo
git clone https://github.com/picosh/pico.git

pico / pkg / shared
Antonio Mika  ·  2025-03-12

mdparser.go

  1package shared
  2
  3import (
  4	"bytes"
  5	"fmt"
  6	"strings"
  7	"time"
  8
  9	"github.com/alecthomas/chroma/v2/formatters/html"
 10	"github.com/araddon/dateparse"
 11	"github.com/microcosm-cc/bluemonday"
 12	"github.com/yuin/goldmark"
 13	highlighting "github.com/yuin/goldmark-highlighting/v2"
 14	meta "github.com/yuin/goldmark-meta"
 15	"github.com/yuin/goldmark/ast"
 16	"github.com/yuin/goldmark/extension"
 17	"github.com/yuin/goldmark/parser"
 18	ghtml "github.com/yuin/goldmark/renderer/html"
 19	gtext "github.com/yuin/goldmark/text"
 20	"go.abhg.dev/goldmark/anchor"
 21	"go.abhg.dev/goldmark/hashtag"
 22	"go.abhg.dev/goldmark/toc"
 23	yaml "gopkg.in/yaml.v2"
 24)
 25
 26type Link struct {
 27	URL  string
 28	Text string
 29}
 30
 31type MetaData struct {
 32	PublishAt   *time.Time
 33	Title       string
 34	Description string
 35	Nav         []Link
 36	Tags        []string
 37	Aliases     []string
 38	Layout      string
 39	Image       string
 40	ImageCard   string
 41	Favicon     string
 42	Hidden      bool
 43	WithStyles  bool
 44	Domain      string
 45}
 46
 47type ParsedText struct {
 48	Html string
 49	*MetaData
 50}
 51
 52func HtmlPolicy() *bluemonday.Policy {
 53	policy := bluemonday.UGCPolicy()
 54	policy.AllowStyling()
 55	policy.AllowAttrs("rel").OnElements("a")
 56	return policy
 57}
 58
 59var policy = HtmlPolicy()
 60
 61func toString(obj interface{}) (string, error) {
 62	if obj == nil {
 63		return "", nil
 64	}
 65	switch val := obj.(type) {
 66	case string:
 67		return val, nil
 68	default:
 69		return "", fmt.Errorf("incorrect type for value: %T, should be string", val)
 70	}
 71}
 72
 73func toBool(obj interface{}, fallback bool) (bool, error) {
 74	if obj == nil {
 75		return fallback, nil
 76	}
 77	switch val := obj.(type) {
 78	case bool:
 79		return val, nil
 80	default:
 81		return false, fmt.Errorf("incorrect type for value: %T, should be bool", val)
 82	}
 83}
 84
 85// The toc frontmatter can take a boolean or an integer.
 86//
 87// A value of -1 or false means "do not generate a toc".
 88// A value of 0 or true means "generate a toc with no depth limit".
 89// A value of >0 means "generate a toc with a depth limit of $value past title".
 90func toToc(obj interface{}) (int, error) {
 91	if obj == nil {
 92		return -1, nil
 93	}
 94	switch val := obj.(type) {
 95	case bool:
 96		if val {
 97			return 0, nil
 98		}
 99		return -1, nil
100	case int:
101		if val < -1 {
102			val = -1
103		}
104		return val, nil
105	default:
106		return -1, fmt.Errorf("incorrect type for value: %T, should be bool or int", val)
107	}
108}
109
110func toLinks(orderedMetaData yaml.MapSlice) ([]Link, error) {
111	var navData interface{}
112	for i := 0; i < len(orderedMetaData); i++ {
113		var item = orderedMetaData[i]
114		if item.Key == "nav" {
115			navData = item.Value
116			break
117		}
118	}
119
120	links := []Link{}
121	if navData == nil {
122		return links, nil
123	}
124
125	addLinks := func(raw yaml.MapSlice) {
126		for _, k := range raw {
127			links = append(links, Link{
128				Text: k.Key.(string),
129				URL:  k.Value.(string),
130			})
131		}
132	}
133
134	switch raw := navData.(type) {
135	case yaml.MapSlice:
136		addLinks(raw)
137	case []interface{}:
138		for _, v := range raw {
139			switch linkRaw := v.(type) {
140			case yaml.MapSlice:
141				addLinks(v.(yaml.MapSlice))
142			default:
143				return links, fmt.Errorf("unsupported type for `nav` link item (%T), looking for map (`text: href`)", linkRaw)
144			}
145		}
146	default:
147		return links, fmt.Errorf("unsupported type for `nav` variable: %T", raw)
148	}
149
150	return links, nil
151}
152
153func toAliases(obj interface{}) ([]string, error) {
154	arr := make([]string, 0)
155	if obj == nil {
156		return arr, nil
157	}
158
159	switch raw := obj.(type) {
160	case []interface{}:
161		for _, alias := range raw {
162			als := strings.TrimSpace(alias.(string))
163			arr = append(arr, strings.TrimPrefix(als, "/"))
164		}
165	case string:
166		aliases := strings.Split(raw, " ")
167		for _, alias := range aliases {
168			als := strings.TrimSpace(alias)
169			arr = append(arr, strings.TrimPrefix(als, "/"))
170		}
171	default:
172		return arr, fmt.Errorf("unsupported type for `aliases` variable: %T", raw)
173	}
174
175	return arr, nil
176}
177
178func toTags(obj interface{}) ([]string, error) {
179	arr := make([]string, 0)
180	if obj == nil {
181		return arr, nil
182	}
183
184	switch raw := obj.(type) {
185	case []interface{}:
186		for _, tag := range raw {
187			arr = append(arr, tag.(string))
188		}
189	case string:
190		tags := strings.Split(raw, " ")
191		for _, tag := range tags {
192			arr = append(arr, strings.TrimSpace(tag))
193		}
194	default:
195		return arr, fmt.Errorf("unsupported type for `tags` variable: %T", raw)
196	}
197
198	return arr, nil
199}
200
201func CreateGoldmark(extenders ...goldmark.Extender) goldmark.Markdown {
202	return goldmark.New(
203		goldmark.WithExtensions(
204			extenders...,
205		),
206		goldmark.WithParserOptions(
207			parser.WithAutoHeadingID(),
208		),
209		goldmark.WithRendererOptions(
210			ghtml.WithUnsafe(),
211		),
212	)
213}
214
215func ParseText(text string) (*ParsedText, error) {
216	parsed := ParsedText{
217		MetaData: &MetaData{
218			Tags:       []string{},
219			Aliases:    []string{},
220			WithStyles: true,
221			PublishAt:  &time.Time{},
222		},
223	}
224	hili := highlighting.NewHighlighting(
225		highlighting.WithFormatOptions(
226			html.WithLineNumbers(true),
227			html.WithClasses(true),
228		),
229	)
230	extenders := []goldmark.Extender{
231		extension.GFM,
232		extension.Footnote,
233		meta.Meta,
234		&hashtag.Extender{},
235		hili,
236		&anchor.Extender{
237			Position: anchor.After,
238			Texter:   anchor.Text("#"),
239		},
240	}
241	md := CreateGoldmark(extenders...)
242	context := parser.NewContext()
243	// we do the Parse/Render steps manually to get a chance to examine the AST
244	btext := []byte(text)
245	doc := md.Parser().Parse(gtext.NewReader(btext), parser.WithContext(context))
246	metaData := meta.Get(context)
247
248	// title:
249	// 1. if specified in frontmatter, use that
250	title, err := toString(metaData["title"])
251	if err != nil {
252		return &parsed, fmt.Errorf("front-matter field (%s): %w", "title", err)
253	}
254	// 2. If an <h1> is found before a <p> or other heading is found, use that
255	if title == "" {
256		title = AstTitle(doc, btext, true)
257	}
258	// 3. else, set it to nothing (slug should get used later down the line)
259	// this is implicit since it's already ""
260	parsed.MetaData.Title = title
261
262	// only handle toc after the title is extracted (if it's getting extracted)
263	mtoc, err := toToc(metaData["toc"])
264	if err != nil {
265		return &parsed, fmt.Errorf("front-matter field (%s): %w", "toc", err)
266	}
267	if mtoc >= 0 {
268		err = AstToc(doc, btext, mtoc)
269		if err != nil {
270			return &parsed, fmt.Errorf("error generating toc: %w", err)
271		}
272	}
273
274	description, err := toString(metaData["description"])
275	if err != nil {
276		return &parsed, fmt.Errorf("front-matter field (%s): %w", "description", err)
277	}
278	parsed.MetaData.Description = description
279
280	domain, err := toString(metaData["domain"])
281	if err != nil {
282		return &parsed, fmt.Errorf("front-matter field (%s): %w", "domain", err)
283	}
284	parsed.MetaData.Domain = domain
285
286	layout, err := toString(metaData["layout"])
287	if err != nil {
288		return &parsed, fmt.Errorf("front-matter field (%s): %w", "layout", err)
289	}
290	parsed.MetaData.Layout = layout
291
292	image, err := toString(metaData["image"])
293	if err != nil {
294		return &parsed, fmt.Errorf("front-matter field (%s): %w", "image", err)
295	}
296	parsed.MetaData.Image = image
297
298	card, err := toString(metaData["card"])
299	if err != nil {
300		return &parsed, fmt.Errorf("front-matter field (%s): %w", "card", err)
301	}
302	parsed.MetaData.ImageCard = card
303
304	hidden, err := toBool(metaData["draft"], false)
305	if err != nil {
306		return &parsed, fmt.Errorf("front-matter field (%s): %w", "draft", err)
307	}
308	parsed.MetaData.Hidden = hidden
309
310	withStyles, err := toBool(metaData["with_styles"], true)
311	if err != nil {
312		return &parsed, fmt.Errorf("front-matter field (%s): %w", "with_style", err)
313	}
314	parsed.MetaData.WithStyles = withStyles
315
316	favicon, err := toString(metaData["favicon"])
317	if err != nil {
318		return &parsed, fmt.Errorf("front-matter field (%s): %w", "favicon", err)
319	}
320	parsed.MetaData.Favicon = favicon
321
322	publishAt := &time.Time{}
323	date, err := toString(metaData["date"])
324	if err != nil {
325		return &parsed, fmt.Errorf("front-matter field (%s): %w", "date", err)
326	}
327
328	if date != "" {
329		nextDate, err := dateparse.ParseStrict(date)
330		if err != nil {
331			return &parsed, err
332		}
333		publishAt = &nextDate
334	}
335	parsed.MetaData.PublishAt = publishAt
336
337	orderedMetaData := meta.GetItems(context)
338
339	nav, err := toLinks(orderedMetaData)
340	if err != nil {
341		return &parsed, err
342	}
343	parsed.MetaData.Nav = nav
344
345	aliases, err := toAliases(metaData["aliases"])
346	if err != nil {
347		return &parsed, err
348	}
349	parsed.MetaData.Aliases = aliases
350
351	rtags := metaData["tags"]
352	tags, err := toTags(rtags)
353	if err != nil {
354		return &parsed, err
355	}
356	// fill from hashtag ASTs as fallback
357	if rtags == nil {
358		tags = AstTags(doc)
359	}
360	parsed.MetaData.Tags = tags
361
362	// Rendering happens last to allow any of the previous steps to manipulate
363	// the AST.
364	var buf bytes.Buffer
365	if err := md.Renderer().Render(&buf, btext, doc); err != nil {
366		return &parsed, err
367	}
368	parsed.Html = policy.Sanitize(buf.String())
369
370	return &parsed, nil
371}
372
373func AstTags(doc ast.Node) []string {
374	var tags []string
375	err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
376		switch n.Kind() {
377		// ignore hashtags inside of these sections
378		case ast.KindBlockquote, ast.KindCodeBlock, ast.KindCodeSpan:
379			return ast.WalkSkipChildren, nil
380		// register hashtags
381		case hashtag.Kind:
382			t := n.(*hashtag.Node)
383			if entering { // only add each tag once
384				tags = append(tags, string(t.Tag))
385			}
386		}
387		// out-of-switch default
388		return ast.WalkContinue, nil
389	})
390	if err != nil {
391		panic(err) // unreachable
392	}
393
394	// sort and deduplicate results
395	dedupe := removeDuplicateStr(tags)
396	return dedupe
397}
398
399// https://stackoverflow.com/a/66751055
400func removeDuplicateStr(strSlice []string) []string {
401	allKeys := make(map[string]bool)
402	list := []string{}
403	for _, item := range strSlice {
404		if _, value := allKeys[item]; !value {
405			allKeys[item] = true
406			list = append(list, item)
407		}
408	}
409	return list
410}
411
412// AstTitle extracts the title (if any) from a parsed markdown document.
413//
414// If "clean" is true, it will also remove the heading node from the AST.
415func AstTitle(doc ast.Node, src []byte, clean bool) string {
416	out := ""
417	err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
418		if n.Kind() == ast.KindHeading {
419			if h := n.(*ast.Heading); h.Level == 1 {
420				if clean {
421					p := h.Parent()
422					p.RemoveChild(p, n)
423				}
424				out = string(h.Lines().Value(src))
425			}
426			return ast.WalkStop, nil
427		}
428		if ast.IsParagraph(n) {
429			return ast.WalkStop, nil
430		}
431		return ast.WalkContinue, nil
432	})
433	if err != nil {
434		panic(err) // unreachable
435	}
436	return out
437}
438
439func AstToc(doc ast.Node, src []byte, mtoc int) error {
440	var tree *toc.TOC
441	if mtoc >= 0 {
442		var err error
443		if mtoc > 0 {
444			tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2), toc.MaxDepth(mtoc+1))
445		} else {
446			tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2))
447		}
448		if err != nil {
449			return err
450		}
451		if tree == nil {
452			return nil // no headings?
453		}
454	}
455	list := toc.RenderList(tree)
456	if list == nil {
457		return nil // no headings
458	}
459
460	list.SetAttributeString("id", []byte("toc-list"))
461
462	// generate # toc
463	heading := ast.NewHeading(2)
464	heading.SetAttributeString("id", []byte("toc"))
465	heading.AppendChild(heading, ast.NewString([]byte("Table of Contents")))
466
467	// insert
468	doc.InsertBefore(doc, doc.FirstChild(), list)
469	doc.InsertBefore(doc, doc.FirstChild(), heading)
470	return nil
471}