Eric Bower
·
2026-01-25
analytics.go
1package router
2
3import (
4 "context"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "encoding/json"
9 "errors"
10 "fmt"
11 "log/slog"
12 "net"
13 "net/http"
14 "net/url"
15 "strings"
16 "time"
17
18 "github.com/picosh/pico/pkg/db"
19 "github.com/picosh/pico/pkg/shared"
20 "github.com/picosh/utils/pipe/metrics"
21 "github.com/simplesurance/go-ip-anonymizer/ipanonymizer"
22 "github.com/x-way/crawlerdetect"
23)
24
25var internalCrawlers *crawlerdetect.CrawlerDetect
26
27func init() {
28 internalCrawlers = crawlerdetect.New()
29 internalCrawlers.SetCrawlers([]string{
30 `^Azure Traffic Manager Endpoint Monitor$`,
31 `^Blackbox Exporter\/`,
32 `^Prometheus\/`,
33 })
34}
35
36func HmacString(secret, data string) string {
37 hmacer := hmac.New(sha256.New, []byte(secret))
38 hmacer.Write([]byte(data))
39 dataHmac := hmacer.Sum(nil)
40 return hex.EncodeToString(dataHmac)
41}
42
43func trackableUserAgent(agent string) error {
44 // dont store requests from bots
45 if crawlerdetect.IsCrawler(agent) || internalCrawlers.IsCrawler(agent) {
46 return fmt.Errorf(
47 "request is likely from a bot (User-Agent: %s)",
48 CleanUserAgent(agent),
49 )
50 }
51 return nil
52}
53
54func trackableRequest(r *http.Request) error {
55 agent := r.UserAgent()
56 return trackableUserAgent(agent)
57}
58
59func cleanIpAddress(ip string) (string, error) {
60 host, _, err := net.SplitHostPort(ip)
61 if err != nil {
62 host = ip
63 }
64 // /24 IPv4 subnet mask
65 // /64 IPv6 subnet mask
66 anonymizer := ipanonymizer.NewWithMask(
67 net.CIDRMask(24, 32),
68 net.CIDRMask(64, 128),
69 )
70 anonIp, err := anonymizer.IPString(host)
71 return anonIp, err
72}
73
74func cleanUrl(orig string) (string, string) {
75 u, err := url.Parse(orig)
76 if err != nil {
77 return "", ""
78 }
79 return u.Host, u.Path
80}
81
82func cleanUrlFromRequest(r *http.Request) (string, string) {
83 host := r.Header.Get("x-forwarded-host")
84 if host == "" {
85 host = r.URL.Host
86 }
87 if host == "" {
88 host = r.Host
89 }
90 // we don't want query params in the url for security reasons
91 return host, r.URL.Path
92}
93
94func CleanUserAgent(ua string) string {
95 // truncate user-agent because http headers have no text limit
96 if len(ua) > 1000 {
97 return ua[:1000]
98 }
99 return strings.TrimSpace(ua)
100}
101
102func filterIp(host string) (string, error) {
103 if host == "" {
104 return "", nil
105 }
106 addr := net.ParseIP(host)
107 if addr != nil {
108 return "", fmt.Errorf("host is an ip")
109 }
110 return host, nil
111}
112
113func CleanReferer(raw string) (string, error) {
114 ref := raw
115 if ref == "" {
116 return "", nil
117 }
118 // referer sometimes dont include scheme but we need it
119 if !strings.HasPrefix(ref, "http") {
120 ref = "https://" + ref
121 }
122 // we only want to store host for security reasons
123 // https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
124 u, err := url.Parse(ref)
125 if err != nil {
126 return "", err
127 }
128 hostname := u.Hostname()
129 hostname, _ = filterIp(hostname)
130 hostname = strings.TrimSpace(strings.ToLower(hostname))
131 return hostname, err
132}
133
134func CleanHost(raw string) (string, error) {
135 prep := strings.TrimSpace(strings.ToLower(raw))
136 if prep == "" {
137 return "", fmt.Errorf("host is blank")
138 }
139 // hosts dont usually include scheme but we need it
140 if !strings.HasPrefix(prep, "http") {
141 prep = "https://" + prep
142 }
143 // no clue why but our prod data contains periods
144 prep = strings.Trim(prep, ".")
145 // we only want to store host for security reasons
146 // https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
147 u, err := url.Parse(prep)
148 if err != nil {
149 return raw, err
150 }
151 host := u.Hostname()
152 host, err = filterIp(host)
153 return host, err
154}
155
156var ErrAnalyticsDisabled = errors.New("owner does not have site analytics enabled")
157
158func AnalyticsVisitFromVisit(visit *db.AnalyticsVisits, dbpool db.DB, secret string) error {
159 if !dbpool.HasFeatureByUser(visit.UserID, "analytics") {
160 return ErrAnalyticsDisabled
161 }
162
163 err := trackableUserAgent(visit.UserAgent)
164 if err != nil {
165 return err
166 }
167
168 ipAddress, err := cleanIpAddress(visit.IpAddress)
169 if err != nil {
170 return err
171 }
172 visit.IpAddress = HmacString(secret, ipAddress)
173 _, path := cleanUrl(visit.Path)
174 visit.Path = path
175
176 referer, err := CleanReferer(visit.Referer)
177 if err != nil {
178 return err
179 }
180 visit.Referer = referer
181
182 hostname, err := CleanHost(visit.Host)
183 if err != nil {
184 return err
185 }
186 visit.Host = hostname
187 visit.UserAgent = CleanUserAgent(visit.UserAgent)
188
189 return nil
190}
191
192func ipFromRequest(r *http.Request) string {
193 // https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#defaults
194 ipOrig := r.Header.Get("x-forwarded-for")
195 if ipOrig == "" {
196 ipOrig = r.RemoteAddr
197 }
198 // probably means this is a web tunnel
199 if ipOrig == "" || ipOrig == "@" {
200 sshCtx, err := GetSshCtx(r)
201 if err == nil {
202 ipOrig = sshCtx.RemoteAddr().String()
203 }
204 }
205
206 return ipOrig
207}
208
209func AnalyticsVisitFromRequest(r *http.Request, dbpool db.DB, userID string) (*db.AnalyticsVisits, error) {
210 if !dbpool.HasFeatureByUser(userID, "analytics") {
211 return nil, ErrAnalyticsDisabled
212 }
213
214 err := trackableRequest(r)
215 if err != nil {
216 return nil, err
217 }
218
219 ipAddress := ipFromRequest(r)
220 host, path := cleanUrlFromRequest(r)
221
222 return &db.AnalyticsVisits{
223 UserID: userID,
224 Host: host,
225 Path: path,
226 IpAddress: ipAddress,
227 UserAgent: r.UserAgent(),
228 Referer: r.Referer(),
229 Status: http.StatusOK,
230 }, nil
231}
232
233func AnalyticsCollect(ch chan *db.AnalyticsVisits, dbpool db.DB, logger *slog.Logger) {
234 drain := metrics.RegisterReconnectMetricRecorder(
235 context.Background(),
236 logger,
237 shared.NewPicoPipeClient(),
238 100,
239 10*time.Millisecond,
240 )
241
242 for visit := range ch {
243 data, err := json.Marshal(visit)
244 if err != nil {
245 logger.Error("could not json marshall visit record", "err", err)
246 continue
247 }
248
249 data = append(data, '\n')
250
251 _, err = drain.Write(data)
252 if err != nil {
253 logger.Error("could not write to metric-drain", "err", err)
254 }
255 }
256}