repos / pico

pico services mono repo
git clone https://github.com/picosh/pico.git

pico / pkg / shared
Antonio Mika  ·  2025-03-12

analytics.go

  1package shared
  2
  3import (
  4	"context"
  5	"crypto/hmac"
  6	"crypto/sha256"
  7	"encoding/hex"
  8	"encoding/json"
  9	"errors"
 10	"fmt"
 11	"log/slog"
 12	"net"
 13	"net/http"
 14	"net/url"
 15	"strings"
 16	"time"
 17
 18	"github.com/picosh/pico/pkg/db"
 19	"github.com/picosh/utils/pipe/metrics"
 20	"github.com/simplesurance/go-ip-anonymizer/ipanonymizer"
 21	"github.com/x-way/crawlerdetect"
 22)
 23
 24var internalCrawlers *crawlerdetect.CrawlerDetect
 25
 26func init() {
 27	internalCrawlers = crawlerdetect.New()
 28	internalCrawlers.SetCrawlers([]string{
 29		`^Azure Traffic Manager Endpoint Monitor$`,
 30		`^Blackbox Exporter\/`,
 31		`^Prometheus\/`,
 32	})
 33}
 34
 35func HmacString(secret, data string) string {
 36	hmacer := hmac.New(sha256.New, []byte(secret))
 37	hmacer.Write([]byte(data))
 38	dataHmac := hmacer.Sum(nil)
 39	return hex.EncodeToString(dataHmac)
 40}
 41
 42func trackableUserAgent(agent string) error {
 43	// dont store requests from bots
 44	if crawlerdetect.IsCrawler(agent) || internalCrawlers.IsCrawler(agent) {
 45		return fmt.Errorf(
 46			"request is likely from a bot (User-Agent: %s)",
 47			CleanUserAgent(agent),
 48		)
 49	}
 50	return nil
 51}
 52
 53func trackableRequest(r *http.Request) error {
 54	agent := r.UserAgent()
 55	return trackableUserAgent(agent)
 56}
 57
 58func cleanIpAddress(ip string) (string, error) {
 59	host, _, err := net.SplitHostPort(ip)
 60	if err != nil {
 61		host = ip
 62	}
 63	// /24 IPv4 subnet mask
 64	// /64 IPv6 subnet mask
 65	anonymizer := ipanonymizer.NewWithMask(
 66		net.CIDRMask(24, 32),
 67		net.CIDRMask(64, 128),
 68	)
 69	anonIp, err := anonymizer.IPString(host)
 70	return anonIp, err
 71}
 72
 73func cleanUrl(orig string) (string, string) {
 74	u, err := url.Parse(orig)
 75	if err != nil {
 76		return "", ""
 77	}
 78	return u.Host, u.Path
 79}
 80
 81func cleanUrlFromRequest(r *http.Request) (string, string) {
 82	host := r.Header.Get("x-forwarded-host")
 83	if host == "" {
 84		host = r.URL.Host
 85	}
 86	if host == "" {
 87		host = r.Host
 88	}
 89	// we don't want query params in the url for security reasons
 90	return host, r.URL.Path
 91}
 92
 93func CleanUserAgent(ua string) string {
 94	// truncate user-agent because http headers have no text limit
 95	if len(ua) > 1000 {
 96		return ua[:1000]
 97	}
 98	return strings.TrimSpace(ua)
 99}
100
101func filterIp(host string) (string, error) {
102	if host == "" {
103		return "", nil
104	}
105	addr := net.ParseIP(host)
106	if addr != nil {
107		return "", fmt.Errorf("host is an ip")
108	}
109	return host, nil
110}
111
112func CleanReferer(raw string) (string, error) {
113	ref := raw
114	if ref == "" {
115		return "", nil
116	}
117	// referer sometimes dont include scheme but we need it
118	if !strings.HasPrefix(ref, "http") {
119		ref = "https://" + ref
120	}
121	// we only want to store host for security reasons
122	// https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
123	u, err := url.Parse(ref)
124	if err != nil {
125		return "", err
126	}
127	hostname := u.Hostname()
128	hostname, _ = filterIp(hostname)
129	hostname = strings.TrimSpace(strings.ToLower(hostname))
130	return hostname, err
131}
132
133func CleanHost(raw string) (string, error) {
134	prep := strings.TrimSpace(strings.ToLower(raw))
135	if prep == "" {
136		return "", fmt.Errorf("host is blank")
137	}
138	// hosts dont usually include scheme but we need it
139	if !strings.HasPrefix(prep, "http") {
140		prep = "https://" + prep
141	}
142	// no clue why but our prod data contains periods
143	prep = strings.Trim(prep, ".")
144	// we only want to store host for security reasons
145	// https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
146	u, err := url.Parse(prep)
147	if err != nil {
148		return raw, err
149	}
150	host := u.Hostname()
151	host, err = filterIp(host)
152	return host, err
153}
154
155var ErrAnalyticsDisabled = errors.New("owner does not have site analytics enabled")
156
157func AnalyticsVisitFromVisit(visit *db.AnalyticsVisits, dbpool db.DB, secret string) error {
158	if !dbpool.HasFeatureForUser(visit.UserID, "analytics") {
159		return ErrAnalyticsDisabled
160	}
161
162	err := trackableUserAgent(visit.UserAgent)
163	if err != nil {
164		return err
165	}
166
167	ipAddress, err := cleanIpAddress(visit.IpAddress)
168	if err != nil {
169		return err
170	}
171	visit.IpAddress = HmacString(secret, ipAddress)
172	_, path := cleanUrl(visit.Path)
173	visit.Path = path
174
175	referer, err := CleanReferer(visit.Referer)
176	if err != nil {
177		return err
178	}
179	visit.Referer = referer
180
181	hostname, err := CleanHost(visit.Host)
182	if err != nil {
183		return err
184	}
185	visit.Host = hostname
186	visit.UserAgent = CleanUserAgent(visit.UserAgent)
187
188	return nil
189}
190
191func ipFromRequest(r *http.Request) string {
192	// https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#defaults
193	ipOrig := r.Header.Get("x-forwarded-for")
194	if ipOrig == "" {
195		ipOrig = r.RemoteAddr
196	}
197	// probably means this is a web tunnel
198	if ipOrig == "" || ipOrig == "@" {
199		sshCtx, err := GetSshCtx(r)
200		if err == nil {
201			ipOrig = sshCtx.RemoteAddr().String()
202		}
203	}
204
205	return ipOrig
206}
207
208func AnalyticsVisitFromRequest(r *http.Request, dbpool db.DB, userID string) (*db.AnalyticsVisits, error) {
209	if !dbpool.HasFeatureForUser(userID, "analytics") {
210		return nil, ErrAnalyticsDisabled
211	}
212
213	err := trackableRequest(r)
214	if err != nil {
215		return nil, err
216	}
217
218	ipAddress := ipFromRequest(r)
219	host, path := cleanUrlFromRequest(r)
220
221	return &db.AnalyticsVisits{
222		UserID:    userID,
223		Host:      host,
224		Path:      path,
225		IpAddress: ipAddress,
226		UserAgent: r.UserAgent(),
227		Referer:   r.Referer(),
228		Status:    http.StatusOK,
229	}, nil
230}
231
232func AnalyticsCollect(ch chan *db.AnalyticsVisits, dbpool db.DB, logger *slog.Logger) {
233	drain := metrics.RegisterReconnectMetricRecorder(
234		context.Background(),
235		logger,
236		NewPicoPipeClient(),
237		100,
238		10*time.Millisecond,
239	)
240
241	for visit := range ch {
242		data, err := json.Marshal(visit)
243		if err != nil {
244			logger.Error("could not json marshall visit record", "err", err)
245			continue
246		}
247
248		data = append(data, '\n')
249
250		_, err = drain.Write(data)
251		if err != nil {
252			logger.Error("could not write to metric-drain", "err", err)
253		}
254	}
255}