repos / pico

pico services mono repo
git clone https://github.com/picosh/pico.git

pico / pkg / shared / router
Eric Bower  ·  2026-01-25

analytics.go

  1package router
  2
  3import (
  4	"context"
  5	"crypto/hmac"
  6	"crypto/sha256"
  7	"encoding/hex"
  8	"encoding/json"
  9	"errors"
 10	"fmt"
 11	"log/slog"
 12	"net"
 13	"net/http"
 14	"net/url"
 15	"strings"
 16	"time"
 17
 18	"github.com/picosh/pico/pkg/db"
 19	"github.com/picosh/pico/pkg/shared"
 20	"github.com/picosh/utils/pipe/metrics"
 21	"github.com/simplesurance/go-ip-anonymizer/ipanonymizer"
 22	"github.com/x-way/crawlerdetect"
 23)
 24
 25var internalCrawlers *crawlerdetect.CrawlerDetect
 26
 27func init() {
 28	internalCrawlers = crawlerdetect.New()
 29	internalCrawlers.SetCrawlers([]string{
 30		`^Azure Traffic Manager Endpoint Monitor$`,
 31		`^Blackbox Exporter\/`,
 32		`^Prometheus\/`,
 33	})
 34}
 35
 36func HmacString(secret, data string) string {
 37	hmacer := hmac.New(sha256.New, []byte(secret))
 38	hmacer.Write([]byte(data))
 39	dataHmac := hmacer.Sum(nil)
 40	return hex.EncodeToString(dataHmac)
 41}
 42
 43func trackableUserAgent(agent string) error {
 44	// dont store requests from bots
 45	if crawlerdetect.IsCrawler(agent) || internalCrawlers.IsCrawler(agent) {
 46		return fmt.Errorf(
 47			"request is likely from a bot (User-Agent: %s)",
 48			CleanUserAgent(agent),
 49		)
 50	}
 51	return nil
 52}
 53
 54func trackableRequest(r *http.Request) error {
 55	agent := r.UserAgent()
 56	return trackableUserAgent(agent)
 57}
 58
 59func cleanIpAddress(ip string) (string, error) {
 60	host, _, err := net.SplitHostPort(ip)
 61	if err != nil {
 62		host = ip
 63	}
 64	// /24 IPv4 subnet mask
 65	// /64 IPv6 subnet mask
 66	anonymizer := ipanonymizer.NewWithMask(
 67		net.CIDRMask(24, 32),
 68		net.CIDRMask(64, 128),
 69	)
 70	anonIp, err := anonymizer.IPString(host)
 71	return anonIp, err
 72}
 73
 74func cleanUrl(orig string) (string, string) {
 75	u, err := url.Parse(orig)
 76	if err != nil {
 77		return "", ""
 78	}
 79	return u.Host, u.Path
 80}
 81
 82func cleanUrlFromRequest(r *http.Request) (string, string) {
 83	host := r.Header.Get("x-forwarded-host")
 84	if host == "" {
 85		host = r.URL.Host
 86	}
 87	if host == "" {
 88		host = r.Host
 89	}
 90	// we don't want query params in the url for security reasons
 91	return host, r.URL.Path
 92}
 93
 94func CleanUserAgent(ua string) string {
 95	// truncate user-agent because http headers have no text limit
 96	if len(ua) > 1000 {
 97		return ua[:1000]
 98	}
 99	return strings.TrimSpace(ua)
100}
101
102func filterIp(host string) (string, error) {
103	if host == "" {
104		return "", nil
105	}
106	addr := net.ParseIP(host)
107	if addr != nil {
108		return "", fmt.Errorf("host is an ip")
109	}
110	return host, nil
111}
112
113func CleanReferer(raw string) (string, error) {
114	ref := raw
115	if ref == "" {
116		return "", nil
117	}
118	// referer sometimes dont include scheme but we need it
119	if !strings.HasPrefix(ref, "http") {
120		ref = "https://" + ref
121	}
122	// we only want to store host for security reasons
123	// https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
124	u, err := url.Parse(ref)
125	if err != nil {
126		return "", err
127	}
128	hostname := u.Hostname()
129	hostname, _ = filterIp(hostname)
130	hostname = strings.TrimSpace(strings.ToLower(hostname))
131	return hostname, err
132}
133
134func CleanHost(raw string) (string, error) {
135	prep := strings.TrimSpace(strings.ToLower(raw))
136	if prep == "" {
137		return "", fmt.Errorf("host is blank")
138	}
139	// hosts dont usually include scheme but we need it
140	if !strings.HasPrefix(prep, "http") {
141		prep = "https://" + prep
142	}
143	// no clue why but our prod data contains periods
144	prep = strings.Trim(prep, ".")
145	// we only want to store host for security reasons
146	// https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
147	u, err := url.Parse(prep)
148	if err != nil {
149		return raw, err
150	}
151	host := u.Hostname()
152	host, err = filterIp(host)
153	return host, err
154}
155
156var ErrAnalyticsDisabled = errors.New("owner does not have site analytics enabled")
157
158func AnalyticsVisitFromVisit(visit *db.AnalyticsVisits, dbpool db.DB, secret string) error {
159	if !dbpool.HasFeatureByUser(visit.UserID, "analytics") {
160		return ErrAnalyticsDisabled
161	}
162
163	err := trackableUserAgent(visit.UserAgent)
164	if err != nil {
165		return err
166	}
167
168	ipAddress, err := cleanIpAddress(visit.IpAddress)
169	if err != nil {
170		return err
171	}
172	visit.IpAddress = HmacString(secret, ipAddress)
173	_, path := cleanUrl(visit.Path)
174	visit.Path = path
175
176	referer, err := CleanReferer(visit.Referer)
177	if err != nil {
178		return err
179	}
180	visit.Referer = referer
181
182	hostname, err := CleanHost(visit.Host)
183	if err != nil {
184		return err
185	}
186	visit.Host = hostname
187	visit.UserAgent = CleanUserAgent(visit.UserAgent)
188
189	return nil
190}
191
192func ipFromRequest(r *http.Request) string {
193	// https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#defaults
194	ipOrig := r.Header.Get("x-forwarded-for")
195	if ipOrig == "" {
196		ipOrig = r.RemoteAddr
197	}
198	// probably means this is a web tunnel
199	if ipOrig == "" || ipOrig == "@" {
200		sshCtx, err := GetSshCtx(r)
201		if err == nil {
202			ipOrig = sshCtx.RemoteAddr().String()
203		}
204	}
205
206	return ipOrig
207}
208
209func AnalyticsVisitFromRequest(r *http.Request, dbpool db.DB, userID string) (*db.AnalyticsVisits, error) {
210	if !dbpool.HasFeatureByUser(userID, "analytics") {
211		return nil, ErrAnalyticsDisabled
212	}
213
214	err := trackableRequest(r)
215	if err != nil {
216		return nil, err
217	}
218
219	ipAddress := ipFromRequest(r)
220	host, path := cleanUrlFromRequest(r)
221
222	return &db.AnalyticsVisits{
223		UserID:    userID,
224		Host:      host,
225		Path:      path,
226		IpAddress: ipAddress,
227		UserAgent: r.UserAgent(),
228		Referer:   r.Referer(),
229		Status:    http.StatusOK,
230	}, nil
231}
232
233func AnalyticsCollect(ch chan *db.AnalyticsVisits, dbpool db.DB, logger *slog.Logger) {
234	drain := metrics.RegisterReconnectMetricRecorder(
235		context.Background(),
236		logger,
237		shared.NewPicoPipeClient(),
238		100,
239		10*time.Millisecond,
240	)
241
242	for visit := range ch {
243		data, err := json.Marshal(visit)
244		if err != nil {
245			logger.Error("could not json marshall visit record", "err", err)
246			continue
247		}
248
249		data = append(data, '\n')
250
251		_, err = drain.Write(data)
252		if err != nil {
253			logger.Error("could not write to metric-drain", "err", err)
254		}
255	}
256}