- commit
- 23438de
- parent
- 4cd5fc0
- author
- Eric Bower
- date
- 2026-05-03 13:36:05 -0400 EDT
refactor(visits): aggregate visit tables Previously we had an analytics_visits table that held all the raw visit data. This mean our analytics UI was constantly executing queries across 18 mil records which was extremely slow. This change aggregates those analytics every month and then deletes all the raw data. We keep 2 months worth of raw data and then everything else gets pushed into the aggregate tables.
9 files changed,
+1825,
-113
M
Makefile
+4,
-2
1@@ -150,10 +150,11 @@ migrate:
2 $(DOCKER_CMD) exec -i $(DB_CONTAINER) psql -U $(PGUSER) -d $(PGDATABASE) < ./sql/migrations/20251226_add_pipe_monitoring.sql
3 $(DOCKER_CMD) exec -i $(DB_CONTAINER) psql -U $(PGUSER) -d $(PGDATABASE) < ./sql/migrations/20260116_add_analytics_filter_indexes.sql
4 $(DOCKER_CMD) exec -i $(DB_CONTAINER) psql -U $(PGUSER) -d $(PGDATABASE) < ./sql/migrations/20260305_add_forms_table.sql
5+ $(DOCKER_CMD) exec -i $(DB_CONTAINER) psql -U $(PGUSER) -d $(PGDATABASE) < ./sql/migrations/20260503_add_analytics_summary_tables.sql
6 .PHONY: migrate
7
8 latest:
9- $(DOCKER_CMD) exec -i $(DB_CONTAINER) psql -U $(PGUSER) -d $(PGDATABASE) < ./sql/migrations/20260305_add_forms_table.sql
10+ $(DOCKER_CMD) exec -i $(DB_CONTAINER) psql -U $(PGUSER) -d $(PGDATABASE) < ./sql/migrations/20260503_add_analytics_summary_tables.sql
11 .PHONY: latest
12
13 psql:
14@@ -164,8 +165,9 @@ dump:
15 $(DOCKER_CMD) exec $(DB_CONTAINER) pg_dump -U $(PGUSER) $(PGDATABASE) > ./backup.sql
16 .PHONY: dump
17
18+BACKUP_FILE?=./backup.sql
19 restore:
20- $(DOCKER_CMD) cp ./backup.sql $(DB_CONTAINER):/backup.sql
21+ $(DOCKER_CMD) cp $(BACKUP_FILE) $(DB_CONTAINER):/backup.sql
22 $(DOCKER_CMD) exec -it $(DB_CONTAINER) /bin/bash
23 # psql postgres -U postgres -d pico < /backup.sql
24 .PHONY: restore
+101,
-0
1@@ -0,0 +1,101 @@
2+package main
3+
4+import (
5+ "flag"
6+ "fmt"
7+ "log/slog"
8+ "os"
9+ "time"
10+
11+ "github.com/picosh/pico/pkg/apps/auth"
12+ "github.com/picosh/pico/pkg/db/postgres"
13+)
14+
15+func main() {
16+ monthPtr := flag.String("month", "", "target month in YYYY-MM format (default: previous month)")
17+ backfill := flag.Bool("backfill", false, "aggregate all historical months up to the month before last")
18+ dryRun := flag.Bool("dry-run", false, "print months that would be processed without running aggregation")
19+ flag.Parse()
20+
21+ logger := slog.Default()
22+ dbURL := os.Getenv("DATABASE_URL")
23+ if dbURL == "" {
24+ fmt.Fprintln(os.Stderr, "DATABASE_URL must be set")
25+ os.Exit(1)
26+ }
27+ dbpool := postgres.NewDB(dbURL, logger)
28+ defer func() { _ = dbpool.Close() }()
29+
30+ if *backfill {
31+ runBackfill(dbpool, logger, *dryRun)
32+ return
33+ }
34+
35+ targetMonth := parseMonth(*monthPtr, logger)
36+ if err := auth.RunAnalyticsAggregation(dbpool, logger, targetMonth); err != nil {
37+ logger.Error("aggregation failed", "err", err)
38+ os.Exit(1)
39+ }
40+}
41+
42+func runBackfill(dbpool *postgres.PsqlDB, logger *slog.Logger, dryRun bool) {
43+ months, err := fetchHistoricalMonths(dbpool)
44+ if err != nil {
45+ logger.Error("failed to fetch historical months", "err", err)
46+ os.Exit(1)
47+ }
48+ if dryRun {
49+ fmt.Println("Months to backfill:")
50+ for _, m := range months {
51+ fmt.Println(" ", m.Format("2006-01"))
52+ }
53+ return
54+ }
55+ for _, m := range months {
56+ if err := auth.RunAnalyticsAggregation(dbpool, logger, m); err != nil {
57+ logger.Error("aggregation failed for month", "month", m.Format("2006-01"), "err", err)
58+ }
59+ }
60+ logger.Info("backfill complete", "months", len(months))
61+}
62+
63+// fetchHistoricalMonths returns all distinct months that have data in analytics_visits,
64+// excluding the current month and the previous month (handled by the auth service cron).
65+func fetchHistoricalMonths(dbpool *postgres.PsqlDB) ([]time.Time, error) {
66+ cutoff := time.Now().AddDate(0, -1, 0)
67+ cutoffMonth := time.Date(cutoff.Year(), cutoff.Month(), 1, 0, 0, 0, 0, time.UTC)
68+
69+ rows, err := dbpool.Db.Queryx(`
70+ SELECT DISTINCT date_trunc('month', created_at)::date AS month_start
71+ FROM analytics_visits
72+ WHERE created_at < $1
73+ ORDER BY month_start ASC
74+ `, cutoffMonth)
75+ if err != nil {
76+ return nil, err
77+ }
78+ defer func() { _ = rows.Close() }()
79+
80+ var months []time.Time
81+ for rows.Next() {
82+ var monthDate time.Time
83+ if err := rows.Scan(&monthDate); err != nil {
84+ return nil, err
85+ }
86+ months = append(months, monthDate)
87+ }
88+ return months, rows.Err()
89+}
90+
91+func parseMonth(arg string, logger *slog.Logger) time.Time {
92+ now := time.Now()
93+ if arg != "" {
94+ t, err := time.Parse("2006-01", arg)
95+ if err != nil {
96+ logger.Error("invalid month format, use YYYY-MM", "err", err, "input", arg)
97+ os.Exit(1)
98+ }
99+ return t
100+ }
101+ return now.AddDate(0, -1, 0)
102+}
+306,
-0
1@@ -0,0 +1,306 @@
2+package main
3+
4+import (
5+ "flag"
6+ "fmt"
7+ "log/slog"
8+ "os"
9+ "time"
10+
11+ "github.com/picosh/pico/pkg/db"
12+ "github.com/picosh/pico/pkg/db/postgres"
13+)
14+
15+func main() {
16+ host := flag.String("host", "", "host to compare (required)")
17+ userID := flag.String("user-id", "", "user ID to compare (required)")
18+ interval := flag.String("interval", "day", "interval: day, week, month")
19+ origin := flag.String("origin", "", "origin date in YYYY-MM-DD format (default: start of year)")
20+ flag.Parse()
21+
22+ if *host == "" || *userID == "" {
23+ fmt.Fprintln(os.Stderr, "usage: compare-analytics -host example.com -user-id <uuid> [-interval day|week|month] [-origin 2025-01-01]")
24+ os.Exit(1)
25+ }
26+
27+ logger := slog.Default()
28+ dbURL := os.Getenv("DATABASE_URL")
29+ if dbURL == "" {
30+ fmt.Fprintln(os.Stderr, "DATABASE_URL must be set")
31+ os.Exit(1)
32+ }
33+
34+ dbpool := postgres.NewDB(dbURL, logger)
35+ defer func() { _ = dbpool.Close() }()
36+
37+ originTime := time.Date(time.Now().AddDate(-1, 0, 0).Year(), time.January, 1, 0, 0, 0, 0, time.UTC)
38+ if *origin != "" {
39+ var err error
40+ originTime, err = time.Parse("2006-01-02", *origin)
41+ if err != nil {
42+ logger.Error("invalid origin format, use YYYY-MM-DD", "err", err)
43+ os.Exit(1)
44+ }
45+ }
46+
47+ opts := &db.SummaryOpts{
48+ Host: *host,
49+ UserID: *userID,
50+ Interval: *interval,
51+ Origin: originTime,
52+ }
53+
54+ // New path: reads from summary tables + current month raw
55+ newSummary, err := dbpool.VisitSummary(opts)
56+ if err != nil {
57+ logger.Error("VisitSummary failed", "err", err)
58+ os.Exit(1)
59+ }
60+
61+ // Old path: direct queries against analytics_visits
62+ oldSummary, err := queryRawVisits(dbpool, opts)
63+ if err != nil {
64+ logger.Error("raw query failed", "err", err)
65+ os.Exit(1)
66+ }
67+
68+ // Compare
69+ fmt.Println("=== Unique Visitors (Intervals) ===")
70+ compareIntervals(newSummary.Intervals, oldSummary.Intervals)
71+
72+ fmt.Println("\n=== Top URLs ===")
73+ compareUrls(newSummary.TopUrls, oldSummary.TopUrls)
74+
75+ fmt.Println("\n=== Top Referers ===")
76+ compareUrls(newSummary.TopReferers, oldSummary.TopReferers)
77+
78+ fmt.Println("\n=== 404 URLs ===")
79+ compareUrls(newSummary.NotFoundUrls, oldSummary.NotFoundUrls)
80+}
81+
82+func queryRawVisits(dbpool *postgres.PsqlDB, opts *db.SummaryOpts) (*db.SummaryVisits, error) {
83+ intervals, err := rawVisitUnique(dbpool, opts)
84+ if err != nil {
85+ return nil, fmt.Errorf("raw visit unique: %w", err)
86+ }
87+ urls, err := rawVisitUrl(dbpool, opts)
88+ if err != nil {
89+ return nil, fmt.Errorf("raw visit url: %w", err)
90+ }
91+ refs, err := rawVisitReferer(dbpool, opts)
92+ if err != nil {
93+ return nil, fmt.Errorf("raw visit referer: %w", err)
94+ }
95+ notFound, err := rawVisitUrlNotFound(dbpool, opts)
96+ if err != nil {
97+ return nil, fmt.Errorf("raw visit url not found: %w", err)
98+ }
99+ return &db.SummaryVisits{
100+ Intervals: intervals,
101+ TopUrls: urls,
102+ TopReferers: refs,
103+ NotFoundUrls: notFound,
104+ }, nil
105+}
106+
107+func rawVisitUnique(dbpool *postgres.PsqlDB, opts *db.SummaryOpts) ([]*db.VisitInterval, error) {
108+ where, with := visitFilterBy(opts)
109+ query := fmt.Sprintf(`
110+ SELECT date_trunc('%s', created_at)::timestamptz as interval_start,
111+ count(DISTINCT ip_address) as unique_visitors
112+ FROM analytics_visits
113+ WHERE created_at >= $1 AND %s = $2 AND user_id = $3 AND status <> 404
114+ GROUP BY interval_start
115+ ORDER BY interval_start`, opts.Interval, where)
116+
117+ rows, err := dbpool.Db.Queryx(query, opts.Origin, with, opts.UserID)
118+ if err != nil {
119+ return nil, err
120+ }
121+ defer func() { _ = rows.Close() }()
122+
123+ var intervals []*db.VisitInterval
124+ for rows.Next() {
125+ iv := &db.VisitInterval{}
126+ if err := rows.Scan(&iv.Interval, &iv.Visitors); err != nil {
127+ return nil, err
128+ }
129+ intervals = append(intervals, iv)
130+ }
131+ return intervals, rows.Err()
132+}
133+
134+func rawVisitUrl(dbpool *postgres.PsqlDB, opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
135+ where, with := visitFilterBy(opts)
136+ query := fmt.Sprintf(`
137+ SELECT path as url, count(DISTINCT ip_address) as count
138+ FROM analytics_visits
139+ WHERE created_at >= $1 AND %s = $2 AND user_id = $3 AND path <> '' AND status <> 404
140+ GROUP BY path
141+ ORDER BY count DESC
142+ LIMIT 10`, where)
143+
144+ rows, err := dbpool.Db.Queryx(query, opts.Origin, with, opts.UserID)
145+ if err != nil {
146+ return nil, err
147+ }
148+ defer func() { _ = rows.Close() }()
149+
150+ var urls []*db.VisitUrl
151+ for rows.Next() {
152+ u := &db.VisitUrl{}
153+ if err := rows.Scan(&u.Url, &u.Count); err != nil {
154+ return nil, err
155+ }
156+ urls = append(urls, u)
157+ }
158+ return urls, rows.Err()
159+}
160+
161+func rawVisitReferer(dbpool *postgres.PsqlDB, opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
162+ where, with := visitFilterBy(opts)
163+ query := fmt.Sprintf(`
164+ SELECT referer as url, count(DISTINCT ip_address) as count
165+ FROM analytics_visits
166+ WHERE created_at >= $1 AND %s = $2 AND user_id = $3 AND referer <> '' AND status <> 404
167+ GROUP BY referer
168+ ORDER BY count DESC
169+ LIMIT 10`, where)
170+
171+ rows, err := dbpool.Db.Queryx(query, opts.Origin, with, opts.UserID)
172+ if err != nil {
173+ return nil, err
174+ }
175+ defer func() { _ = rows.Close() }()
176+
177+ var urls []*db.VisitUrl
178+ for rows.Next() {
179+ u := &db.VisitUrl{}
180+ if err := rows.Scan(&u.Url, &u.Count); err != nil {
181+ return nil, err
182+ }
183+ urls = append(urls, u)
184+ }
185+ return urls, rows.Err()
186+}
187+
188+func rawVisitUrlNotFound(dbpool *postgres.PsqlDB, opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
189+ where, with := visitFilterBy(opts)
190+ limit := opts.Limit
191+ if limit == 0 {
192+ limit = 10
193+ }
194+ query := fmt.Sprintf(`
195+ SELECT path as url, count(DISTINCT ip_address) as count
196+ FROM analytics_visits
197+ WHERE created_at >= $1 AND %s = $2 AND user_id = $3 AND path <> '' AND status = 404
198+ GROUP BY path
199+ ORDER BY count DESC
200+ LIMIT %d`, where, limit)
201+
202+ rows, err := dbpool.Db.Queryx(query, opts.Origin, with, opts.UserID)
203+ if err != nil {
204+ return nil, err
205+ }
206+ defer func() { _ = rows.Close() }()
207+
208+ var urls []*db.VisitUrl
209+ for rows.Next() {
210+ u := &db.VisitUrl{}
211+ if err := rows.Scan(&u.Url, &u.Count); err != nil {
212+ return nil, err
213+ }
214+ urls = append(urls, u)
215+ }
216+ return urls, rows.Err()
217+}
218+
219+func visitFilterBy(opts *db.SummaryOpts) (string, string) {
220+ if opts.Host != "" {
221+ return "host", opts.Host
222+ }
223+ if opts.Path != "" {
224+ return "path", opts.Path
225+ }
226+ return "host", ""
227+}
228+
229+func compareIntervals(newData, oldData []*db.VisitInterval) {
230+ newMap := make(map[string]int)
231+ for _, iv := range newData {
232+ key := iv.Interval.Format("2006-01-02")
233+ newMap[key] = iv.Visitors
234+ }
235+ oldMap := make(map[string]int)
236+ for _, iv := range oldData {
237+ key := iv.Interval.Format("2006-01-02")
238+ oldMap[key] = iv.Visitors
239+ }
240+
241+ // Collect all keys
242+ keys := make(map[string]bool)
243+ for k := range newMap {
244+ keys[k] = true
245+ }
246+ for k := range oldMap {
247+ keys[k] = true
248+ }
249+
250+ mismatches := 0
251+ for k := range keys {
252+ n, nOk := newMap[k]
253+ o, oOk := oldMap[k]
254+ if nOk && oOk {
255+ if n != o {
256+ fmt.Printf(" MISMATCH %s: new=%d old=%d\n", k, n, o)
257+ mismatches++
258+ }
259+ } else if nOk {
260+ fmt.Printf(" NEW ONLY %s: %d\n", k, n)
261+ mismatches++
262+ } else {
263+ fmt.Printf(" OLD ONLY %s: %d\n", k, o)
264+ mismatches++
265+ }
266+ }
267+ if mismatches == 0 {
268+ fmt.Println(" OK: all intervals match")
269+ } else {
270+ fmt.Printf(" %d mismatches\n", mismatches)
271+ }
272+}
273+
274+func compareUrls(newData, oldData []*db.VisitUrl) {
275+ newMap := make(map[string]int)
276+ for _, u := range newData {
277+ newMap[u.Url] = u.Count
278+ }
279+ oldMap := make(map[string]int)
280+ for _, u := range oldData {
281+ oldMap[u.Url] = u.Count
282+ }
283+
284+ mismatches := 0
285+ for url, nCount := range newMap {
286+ if oCount, ok := oldMap[url]; ok {
287+ if nCount != oCount {
288+ fmt.Printf(" MISMATCH %s: new=%d old=%d\n", url, nCount, oCount)
289+ mismatches++
290+ }
291+ } else {
292+ fmt.Printf(" NEW ONLY %s: %d\n", url, nCount)
293+ mismatches++
294+ }
295+ }
296+ for url, oCount := range oldMap {
297+ if _, ok := newMap[url]; !ok {
298+ fmt.Printf(" OLD ONLY %s: %d\n", url, oCount)
299+ mismatches++
300+ }
301+ }
302+ if mismatches == 0 {
303+ fmt.Println(" OK: all entries match")
304+ } else {
305+ fmt.Printf(" %d mismatches\n", mismatches)
306+ }
307+}
+336,
-0
1@@ -0,0 +1,336 @@
2+# Analytics Data Retention Proposal
3+
4+## Problem
5+
6+The `analytics_visits` table has **18 million rows** in production. All analytics queries (unique visitors, top URLs, top referers, 404s) aggregate raw visit data on every request, causing slow query performance. The table continues to grow unbounded.
7+
8+## Goal
9+
10+- Keep `analytics_visits` small by deleting raw data older than **1 month**
11+- Pre-aggregate monthly stats into summary tables so historical queries remain fast
12+- Preserve all data the user currently sees in the TUI and SSH CLI
13+
14+---
15+
16+## What We Display Today
17+
18+### TUI Analytics Screen (`pkg/tui/analytics.go`)
19+
20+When a user views analytics for a site, we show:
21+
22+| Section | Query | Aggregation |
23+|---------|-------|-------------|
24+| **Site list** (left pane) | `visitHost()` | `COUNT(DISTINCT ip_address)` grouped by `host` |
25+| **Visits by period** | `visitUnique()` | `COUNT(DISTINCT ip_address)` grouped by `date_trunc(interval, created_at)` |
26+| **Top URLs** | `visitUrl()` | `COUNT(DISTINCT ip_address)` grouped by `path`, LIMIT 10, excludes 404s |
27+| **Top Referers** | `visitReferer()` | `COUNT(DISTINCT ip_address)` grouped by `referer`, LIMIT 10, excludes 404s |
28+| **Top 404s** | `VisitUrlNotFound()` | `COUNT(DISTINCT ip_address)` grouped by `path`, LIMIT 10, status=404 |
29+
30+The TUI supports two intervals:
31+- **"day"** — daily buckets from start of current month (`StartOfMonth()`)
32+- **"month"** — monthly buckets from 1 year ago (`StartOfYear()` which is `now - 1 year`)
33+
34+### SSH CLI (`pkg/apps/pico/cli.go`)
35+
36+| Command | Query | Aggregation |
37+|---------|-------|-------------|
38+| `not-found hostname.com [year\|month]` | `VisitUrlNotFound()` | `COUNT(DISTINCT ip_address)` grouped by `path`, LIMIT 100, status=404 |
39+
40+---
41+
42+## Proposed Schema
43+
44+### New Table: `analytics_user_sites`
45+
46+Tracks every site a user has had traffic on. Powers the site list in the left pane of the TUI.
47+
48+```sql
49+CREATE TABLE analytics_user_sites (
50+ id SERIAL PRIMARY KEY,
51+ user_id UUID NOT NULL REFERENCES app_users(id) ON DELETE CASCADE,
52+ host VARCHAR(253) NOT NULL,
53+ total_visits INT NOT NULL DEFAULT 0, -- running total of all-time unique visitors
54+ last_seen DATE NOT NULL, -- most recent month with traffic
55+ created_at TIMESTAMP NOT NULL DEFAULT now(),
56+ updated_at TIMESTAMP NOT NULL DEFAULT now(),
57+
58+ UNIQUE (user_id, host)
59+);
60+
61+CREATE INDEX idx_user_sites_user ON analytics_user_sites (user_id);
62+```
63+
64+**How it's populated:** During monthly aggregation, for each `(user_id, host)` pair found in the previous month's raw data:
65+- `INSERT ... ON CONFLICT (user_id, host) DO UPDATE` — add that month's unique visitor count to `total_visits`, set `last_seen` to the month's date.
66+
67+**Query replacement:** `visitHost()` becomes:
68+```sql
69+SELECT host, total_visits as host_count
70+FROM analytics_user_sites
71+WHERE user_id = $1
72+ORDER BY total_visits DESC
73+```
74+
75+No GROUP BY, no scan of raw data — just an indexed lookup.
76+
77+### New Table: `analytics_monthly_visits`
78+
79+Stores pre-aggregated daily unique visitor counts per host. This powers the "visits by period" chart and site list.
80+
81+```sql
82+CREATE TABLE analytics_monthly_visits (
83+ id SERIAL PRIMARY KEY,
84+ user_id UUID NOT NULL REFERENCES app_users(id) ON DELETE CASCADE,
85+ host VARCHAR(253) NOT NULL,
86+ visit_date DATE NOT NULL, -- the day this count is for
87+ unique_visits INT NOT NULL DEFAULT 0, -- COUNT(DISTINCT ip_address) for that day
88+ mobile_visits INT NOT NULL DEFAULT 0, -- unique visitors from mobile user-agents
89+ desktop_visits INT NOT NULL DEFAULT 0, -- unique visitors from desktop user-agents
90+ created_at TIMESTAMP NOT NULL DEFAULT now(),
91+
92+ UNIQUE (user_id, host, visit_date)
93+);
94+
95+CREATE INDEX idx_monthly_visits_user_host ON analytics_monthly_visits (user_id, host);
96+CREATE INDEX idx_monthly_visits_user_date ON analytics_monthly_visits (user_id, visit_date DESC);
97+```
98+
99+**Why this shape:** The TUI queries `date_trunc('day', created_at)` or `date_trunc('month', created_at)` with `COUNT(DISTINCT ip_address)`. We pre-compute the daily distinct count. For "month" interval view, the app sums daily rows within each month client-side or via a simple `GROUP BY date_trunc('month', visit_date)`.
100+
101+**Device detection:** `mobile_visits` and `desktop_visits` are derived from the `user_agent` column during aggregation. A user-agent library (e.g., [`pdericx/ua-parser`](https://github.com/pdericx/ua-parser) or [`mssola/useragent`](https://github.com/mssola/useragent)) classifies each visit. An IP counted as mobile if its user-agent is mobile, desktop otherwise. Note: `mobile_visits + desktop_visits = unique_visits` since every visit is one or the other.
102+
103+### New Table: `analytics_monthly_top_urls`
104+
105+Stores the top URLs per host per month. Powers "Top URLs" and "Top 404s" sections.
106+
107+```sql
108+CREATE TABLE analytics_monthly_top_urls (
109+ id SERIAL PRIMARY KEY,
110+ user_id UUID NOT NULL REFERENCES app_users(id) ON DELETE CASCADE,
111+ host VARCHAR(253) NOT NULL,
112+ month DATE NOT NULL, -- first day of the month (e.g., 2025-01-01)
113+ path VARCHAR(2048) NOT NULL,
114+ unique_visits INT NOT NULL DEFAULT 0, -- COUNT(DISTINCT ip_address) for that path
115+ is_404 BOOLEAN NOT NULL DEFAULT false,
116+ rank INT NOT NULL, -- 1-10 (top 10) or 1-100 for CLI
117+ created_at TIMESTAMP NOT NULL DEFAULT now(),
118+
119+ UNIQUE (user_id, host, month, path, is_404)
120+);
121+
122+CREATE INDEX idx_monthly_top_urls_user_host_month ON analytics_monthly_top_urls (user_id, host, month);
123+CREATE INDEX idx_monthly_top_urls_404 ON analytics_monthly_top_urls (user_id, host, month, is_404, rank);
124+```
125+
126+### New Table: `analytics_monthly_top_referers`
127+
128+Stores top referers per host per month. Powers "Top Referers" section.
129+
130+```sql
131+CREATE TABLE analytics_monthly_top_referers (
132+ id SERIAL PRIMARY KEY,
133+ user_id UUID NOT NULL REFERENCES app_users(id) ON DELETE CASCADE,
134+ host VARCHAR(253) NOT NULL,
135+ month DATE NOT NULL, -- first day of the month
136+ referer VARCHAR(253) NOT NULL,
137+ unique_visits INT NOT NULL DEFAULT 0,
138+ rank INT NOT NULL, -- 1-10
139+ created_at TIMESTAMP NOT NULL DEFAULT now(),
140+
141+ UNIQUE (user_id, host, month, referer)
142+);
143+
144+CREATE INDEX idx_monthly_referers_user_host_month ON analytics_monthly_top_referers (user_id, host, month);
145+```
146+
147+---
148+
149+## Monthly Aggregation Job
150+
151+A script (e.g., `cmd/scripts/analytics-aggregate/main.go`) runs **once per month** (e.g., on the 1st). It:
152+
153+### Step 1: Aggregate the previous month
154+
155+For each user with the `analytics` feature flag who has raw visits in the previous month:
156+
157+```
158+FOR each (user_id, host) pair in analytics_visits WHERE created_at IN previous_month:
159+ -- Daily unique visitors (with device breakdown)
160+ -- Device detection runs in Go (user-agent parsing), not pure SQL.
161+ -- Pseudo-SQL for clarity:
162+ INSERT INTO analytics_monthly_visits (user_id, host, visit_date, unique_visits, mobile_visits, desktop_visits)
163+ SELECT user_id, host, date_trunc('day', created_at)::date,
164+ COUNT(DISTINCT ip_address),
165+ COUNT(DISTINCT CASE WHEN is_mobile(user_agent) THEN ip_address END),
166+ COUNT(DISTINCT CASE WHEN NOT is_mobile(user_agent) THEN ip_address END)
167+ FROM analytics_visits
168+ WHERE user_id = $1 AND host = $2 AND created_at >= start_of_month AND created_at < start_of_next_month
169+ GROUP BY user_id, host, date_trunc('day', created_at)
170+ ON CONFLICT (user_id, host, visit_date) DO NOTHING;
171+
172+ -- In practice, `is_mobile()` is a Go function. The aggregation job fetches
173+ -- raw rows in batches, classifies each user_agent, and issues INSERTs.
174+
175+ -- Top 10 URLs (non-404)
176+ INSERT INTO analytics_monthly_top_urls (user_id, host, month, path, unique_visits, is_404, rank)
177+ SELECT user_id, host, date_trunc('month', created_at)::date, path, COUNT(DISTINCT ip_address), false,
178+ ROW_NUMBER() OVER (PARTITION BY user_id, host ORDER BY COUNT(DISTINCT ip_address) DESC)
179+ FROM analytics_visits
180+ WHERE user_id = $1 AND host = $2 AND created_at >= start_of_month AND created_at < start_of_next_month AND status <> 404
181+ GROUP BY user_id, host, path
182+ HAVING ROW_NUMBER() <= 10
183+ ON CONFLICT (user_id, host, month, path, is_404) DO NOTHING;
184+
185+ -- Top 10 URLs (404s)
186+ INSERT INTO analytics_monthly_top_urls (user_id, host, month, path, unique_visits, is_404, rank)
187+ SELECT user_id, host, date_trunc('month', created_at)::date, path, COUNT(DISTINCT ip_address), true,
188+ ROW_NUMBER() OVER (PARTITION BY user_id, host ORDER BY COUNT(DISTINCT ip_address) DESC)
189+ FROM analytics_visits
190+ WHERE user_id = $1 AND host = $2 AND created_at >= start_of_month AND created_at < start_of_next_month AND status = 404
191+ GROUP BY user_id, host, path
192+ HAVING ROW_NUMBER() <= 100 -- 100 for CLI, 10 for TUI (use 100 to cover both)
193+ ON CONFLICT (user_id, host, month, path, is_404) DO NOTHING;
194+
195+ -- Top 10 referers
196+ INSERT INTO analytics_monthly_top_referers (user_id, host, month, referer, unique_visits, rank)
197+ SELECT user_id, host, date_trunc('month', created_at)::date, referer, COUNT(DISTINCT ip_address),
198+ ROW_NUMBER() OVER (PARTITION BY user_id, host ORDER BY COUNT(DISTINCT ip_address) DESC)
199+ FROM analytics_visits
200+ WHERE user_id = $1 AND host = $2 AND created_at >= start_of_month AND created_at < start_of_next_month AND referer <> '' AND status <> 404
201+ GROUP BY user_id, host, referer
202+ HAVING ROW_NUMBER() <= 10
203+ ON CONFLICT (user_id, host, month, referer) DO NOTHING;
204+```
205+
206+### Step 2: Delete raw data older than 1 month
207+
208+```sql
209+DELETE FROM analytics_visits
210+WHERE created_at < date_trunc('month', now())::timestamp;
211+```
212+
213+This keeps only the current month's raw data. After the first run, `analytics_visits` should drop from ~18M rows to ~1 month of data (estimated 150K–500K depending on traffic).
214+
215+### Step 3: Backfill (one-time)
216+
217+On first deployment, run the aggregation for all historical months before deleting anything. This is a separate migration pass that iterates month-by-month from the oldest data to 2 months ago.
218+
219+---
220+
221+## Query Changes
222+
223+### Current queries → New queries
224+
225+Each existing query method gets a `since` parameter. If `since` is within the last month, query `analytics_visits` directly (current behavior). If `since` spans older months, union results from summary tables with raw data.
226+
227+#### `visitUnique()` — visits by period
228+
229+```
230+-- For "day" interval (current month only, always from raw):
231+-- No change — reads from analytics_visits
232+
233+-- For "month" interval (historical):
234+-- Read from analytics_monthly_visits, sum daily counts per month:
235+SELECT visit_date::date as interval_start, SUM(unique_visits) as visitors
236+FROM analytics_monthly_visits
237+WHERE user_id = $1 AND host = $2 AND visit_date >= $3
238+GROUP BY date_trunc('month', visit_date), visit_date
239+ORDER BY visit_date
240+```
241+
242+#### `visitUrl()` — top URLs
243+
244+```
245+-- Current month: read from analytics_visits (no change)
246+-- Historical months: read from analytics_monthly_top_urls WHERE is_404 = false
247+```
248+
249+#### `visitReferer()` — top referers
250+
251+```
252+-- Current month: read from analytics_visits (no change)
253+-- Historical months: read from analytics_monthly_top_referers
254+```
255+
256+#### `VisitUrlNotFound()` — top 404s
257+
258+```
259+-- Current month: read from analytics_visits (no change)
260+-- Historical months: read from analytics_monthly_top_urls WHERE is_404 = true
261+```
262+
263+#### `visitHost()` — site list
264+
265+```
266+-- Replaced entirely by analytics_user_sites lookup:
267+SELECT host, total_visits as host_count
268+FROM analytics_user_sites
269+WHERE user_id = $1
270+ORDER BY total_visits DESC
271+```
272+
273+No raw table scan. Current month's new hosts are upserted during the monthly aggregation job.
274+
275+---
276+
277+## Implementation Phases
278+
279+### Phase 1: Schema + Backfill (no behavior change)
280+1. Create the four new tables (user_sites, monthly_visits, monthly_top_urls, monthly_top_referers)
281+2. Write the aggregation script (`cmd/scripts/analytics-aggregate/`)
282+3. Run backfill for all historical data
283+4. **Do not delete any data yet** — verify summary tables are correct
284+
285+### Phase 2: Query migration
286+1. Add new DB interface methods that read from summary tables
287+2. Modify existing query methods to union summary + raw data
288+3. Verify TUI and CLI output matches before/after
289+4. Run `make check`
290+
291+### Phase 3: Enable deletion
292+1. Add raw data deletion to the aggregation job
293+2. Schedule the job to run monthly (cron, systemd timer, or app-level scheduler)
294+3. Monitor `analytics_visits` row count after first deletion
295+
296+### Phase 4: Cleanup
297+1. Remove any code paths that no longer need raw historical data
298+2. Consider dropping unused indexes on `analytics_visits` since the table is now small
299+
300+---
301+
302+## Trade-offs
303+
304+| Aspect | Before | After |
305+|--------|--------|-------|
306+| `analytics_visits` size | 18M+ rows (unbounded) | ~1 month of data (~150K-500K) |
307+| Query latency | Scans full table each time | Reads small table or pre-aggregated rows |
308+| Historical granularity | Full raw data | Monthly top-10/100 summaries |
309+| Disk overhead | Single table | 4 additional tables (user_sites, monthly_visits, monthly_top_urls, monthly_top_referers) |
310+| Mobile/desktop | Not tracked | Tracked via user-agent classification during aggregation |
311+| Data loss | None | Raw data older than 1 month is gone |
312+
313+The key trade-off is that we lose the ability to run *arbitrary* ad-hoc queries on historical data. But we preserve exactly what the user sees today in the TUI and CLI. If we need new historical queries in the future, we'd add them to the aggregation job.
314+
315+---
316+
317+## Open Questions
318+
319+1. **Scheduling**: How do we run the monthly job? Options:
320+ - Cron job on the server
321+ - `systemd` timer
322+ - Built into one of our services as a startup check
323+ - External scheduler (GitHub Actions, etc.)
324+
325+2. **`visitHost()` (site list)**: Solved by `analytics_user_sites` table — simple indexed lookup, no aggregation needed.
326+
327+3. **Retention period**: Is 1 month the right window for raw data? Could adjust based on observed query patterns.
328+
329+4. **Concurrent writes**: During the aggregation window, new visits are still being inserted. The job needs to handle this (e.g., aggregate up to a specific timestamp, then delete up to that same timestamp).
330+
331+5. **Users who disable analytics**: The banner says "when analytics are disabled we do not purge usage statistics." Deletion should respect this — only delete for users with the `analytics` feature flag active.
332+
333+6. **Device detection library**: Which Go user-agent parser to use for mobile/desktop classification? Candidates:
334+ - [`mssola/useragent`](https://github.com/mssola/useragent) — lightweight, device.OS / device.Type
335+ - [`pdericx/ua-parser`](https://github.com/pdericx/ua-parser) — fuller UAParser port
336+ - [`danielgtaylor/mobile`](https://github.com/danielgtaylor/mobile) — simple mobile-only detection
337+ The existing codebase already uses `x-way/crawlerdetect` for bot filtering, so a similar pattern fits.
+437,
-0
1@@ -0,0 +1,437 @@
2+package auth
3+
4+import (
5+ "context"
6+ "fmt"
7+ "log/slog"
8+ "strings"
9+ "time"
10+
11+ "github.com/picosh/pico/pkg/db/postgres"
12+)
13+
14+// visitRow represents a single raw visit record for aggregation.
15+type visitRow struct {
16+ UserID string
17+ Host string
18+ Path string
19+ IPAddress string
20+ Referer string
21+ Status int
22+ UserAgent string
23+ CreatedAt time.Time
24+}
25+
26+// dayStats accumulates per-day visitor counts for a (user_id, host) pair.
27+type dayStats struct {
28+ Date time.Time
29+ AllIPs map[string]bool
30+ MobileIPs map[string]bool
31+ DesktopIPs map[string]bool
32+}
33+
34+// pathStats accumulates per-path visitor counts for a (user_id, host, status_code) triplet.
35+type pathStats struct {
36+ Path string
37+ Status int
38+ IPs map[string]bool
39+}
40+
41+// userHostPair represents a unique (user_id, host) combination.
42+type userHostPair struct {
43+ UserID string
44+ Host string
45+}
46+
47+// RunAnalyticsAggregation aggregates raw visits for the given month into summary tables.
48+// Exported for use by the analytics-aggregate CLI script.
49+func RunAnalyticsAggregation(dbpool *postgres.PsqlDB, logger *slog.Logger, targetMonth time.Time) error {
50+ monthStart := time.Date(targetMonth.Year(), targetMonth.Month(), 1, 0, 0, 0, 0, time.UTC)
51+ monthEnd := monthStart.AddDate(0, 1, 0)
52+
53+ logger.Info("starting analytics aggregation", "month", targetMonth.Format("2006-01"), "from", monthStart, "to", monthEnd)
54+
55+ pairs, err := fetchUserHostPairs(dbpool, monthStart, monthEnd)
56+ if err != nil {
57+ return fmt.Errorf("fetch user/host pairs: %w", err)
58+ }
59+ if len(pairs) == 0 {
60+ logger.Info("no user/host pairs to aggregate", "month", targetMonth.Format("2006-01"))
61+ return nil
62+ }
63+ logger.Info("found user/host pairs to process", "count", len(pairs), "month", targetMonth.Format("2006-01"))
64+
65+ for _, pair := range pairs {
66+ visits, err := fetchVisitsForPair(dbpool, pair.UserID, pair.Host, monthStart, monthEnd)
67+ if err != nil {
68+ logger.Error("failed to fetch visits", "err", err, "user_id", pair.UserID, "host", pair.Host)
69+ continue
70+ }
71+
72+ // Aggregate daily stats
73+ dayMap := aggregateDays(visits)
74+ if err := insertMonthlyVisits(dbpool, pair.UserID, pair.Host, dayMap); err != nil {
75+ logger.Error("failed to insert monthly visits", "err", err)
76+ continue
77+ }
78+
79+ // Aggregate top URLs grouped by status code
80+ topURLsByStatus := aggregatePaths(visits)
81+ for status, paths := range topURLsByStatus {
82+ if err := insertTopURLs(dbpool, pair.UserID, pair.Host, targetMonth, status, paths); err != nil {
83+ logger.Error("failed to insert top URLs", "err", err, "status", status)
84+ continue
85+ }
86+ }
87+
88+ // Aggregate top referers
89+ topReferers := aggregateReferers(visits)
90+ if err := insertTopReferers(dbpool, pair.UserID, pair.Host, targetMonth, topReferers); err != nil {
91+ logger.Error("failed to insert top referers", "err", err)
92+ continue
93+ }
94+
95+ // Upsert user site
96+ totalUnique := countTotalUnique(dayMap)
97+ if err := upsertUserSite(dbpool, pair.UserID, pair.Host, totalUnique, targetMonth); err != nil {
98+ logger.Error("failed to upsert user site", "err", err)
99+ continue
100+ }
101+ }
102+
103+ // Delete raw visit data for users with analytics enabled
104+ if err := deleteAggregatedVisits(dbpool, logger, monthStart, monthEnd); err != nil {
105+ logger.Error("failed to delete aggregated visits", "err", err, "month", targetMonth.Format("2006-01"))
106+ }
107+
108+ logger.Info("analytics aggregation complete", "month", targetMonth.Format("2006-01"))
109+ return nil
110+}
111+
112+// analyticsAggregationCron runs once per day to check if the previous month needs aggregation.
113+func analyticsAggregationCron(ctx context.Context, dbpool *postgres.PsqlDB, logger *slog.Logger) {
114+ // Check at midnight UTC every day
115+ ticker := time.NewTicker(24 * time.Hour)
116+ defer ticker.Stop()
117+
118+ // Run immediately on startup to catch up if the service was down
119+ if err := checkAndRunAggregation(dbpool, logger); err != nil {
120+ logger.Error("startup analytics aggregation check failed", "err", err)
121+ }
122+
123+ for {
124+ select {
125+ case <-ctx.Done():
126+ logger.Info("analytics aggregation cron stopped")
127+ return
128+ case <-ticker.C:
129+ if err := checkAndRunAggregation(dbpool, logger); err != nil {
130+ logger.Error("analytics aggregation check failed", "err", err)
131+ }
132+ }
133+ }
134+}
135+
136+// checkAndRunAggregation checks if the previous month has been aggregated and runs if needed.
137+func checkAndRunAggregation(dbpool *postgres.PsqlDB, logger *slog.Logger) error {
138+ prevMonth := time.Now().AddDate(0, -1, 0)
139+ monthStart := time.Date(prevMonth.Year(), prevMonth.Month(), 1, 0, 0, 0, 0, time.UTC)
140+ monthEnd := monthStart.AddDate(0, 1, 0)
141+
142+ // Check if any data exists for this month in the summary table
143+ var count int
144+ err := dbpool.Db.QueryRowContext(context.Background(),
145+ `SELECT COUNT(DISTINCT host) FROM analytics_monthly_visits WHERE visit_date >= $1 AND visit_date < $2`,
146+ monthStart, monthEnd,
147+ ).Scan(&count)
148+ if err != nil {
149+ return fmt.Errorf("check existing aggregation: %w", err)
150+ }
151+
152+ if count > 0 {
153+ logger.Info("previous month already aggregated, skipping", "month", prevMonth.Format("2006-01"), "hosts", count)
154+ return nil
155+ }
156+
157+ logger.Info("previous month not aggregated, running now", "month", prevMonth.Format("2006-01"))
158+ return RunAnalyticsAggregation(dbpool, logger, prevMonth)
159+}
160+
161+func fetchUserHostPairs(dbpool *postgres.PsqlDB, monthStart, monthEnd time.Time) ([]userHostPair, error) {
162+ rows, err := dbpool.Db.Queryx(
163+ `SELECT DISTINCT user_id, host FROM analytics_visits WHERE created_at >= $1 AND created_at < $2 AND host <> '' ORDER BY user_id, host`,
164+ monthStart, monthEnd,
165+ )
166+ if err != nil {
167+ return nil, err
168+ }
169+ defer func() { _ = rows.Close() }()
170+
171+ var pairs []userHostPair
172+ for rows.Next() {
173+ var p userHostPair
174+ if err := rows.Scan(&p.UserID, &p.Host); err != nil {
175+ return nil, err
176+ }
177+ pairs = append(pairs, p)
178+ }
179+ return pairs, rows.Err()
180+}
181+
182+func fetchVisitsForPair(dbpool *postgres.PsqlDB, userID, host string, monthStart, monthEnd time.Time) ([]visitRow, error) {
183+ rows, err := dbpool.Db.Queryx(
184+ `SELECT user_id, host, path, ip_address, referer, status, user_agent, created_at
185+ FROM analytics_visits
186+ WHERE user_id = $1 AND host = $2 AND created_at >= $3 AND created_at < $4`,
187+ userID, host, monthStart, monthEnd,
188+ )
189+ if err != nil {
190+ return nil, err
191+ }
192+ defer func() { _ = rows.Close() }()
193+
194+ var visits []visitRow
195+ for rows.Next() {
196+ var v visitRow
197+ if err := rows.Scan(&v.UserID, &v.Host, &v.Path, &v.IPAddress, &v.Referer, &v.Status, &v.UserAgent, &v.CreatedAt); err != nil {
198+ return nil, err
199+ }
200+ visits = append(visits, v)
201+ }
202+ return visits, rows.Err()
203+}
204+
205+func aggregateDays(visits []visitRow) map[string]*dayStats {
206+ dayMap := make(map[string]*dayStats)
207+
208+ for _, v := range visits {
209+ // Exclude 404s from unique visitor counts — bots hitting non-existent
210+ // paths shouldn't count as visitors
211+ if v.Status == 404 {
212+ continue
213+ }
214+
215+ dateKey := v.CreatedAt.UTC().Format("2006-01-02")
216+ ds, ok := dayMap[dateKey]
217+ if !ok {
218+ ds = &dayStats{
219+ Date: time.Date(v.CreatedAt.Year(), v.CreatedAt.Month(), v.CreatedAt.Day(), 0, 0, 0, 0, time.UTC),
220+ AllIPs: make(map[string]bool),
221+ MobileIPs: make(map[string]bool),
222+ DesktopIPs: make(map[string]bool),
223+ }
224+ dayMap[dateKey] = ds
225+ }
226+
227+ ds.AllIPs[v.IPAddress] = true
228+ if isMobile(v.UserAgent) {
229+ ds.MobileIPs[v.IPAddress] = true
230+ } else {
231+ ds.DesktopIPs[v.IPAddress] = true
232+ }
233+ }
234+ return dayMap
235+}
236+
237+func aggregatePaths(visits []visitRow) map[int][]pathStats {
238+ // Maps status_code -> list of pathStats
239+ statusMap := make(map[int]map[string]*pathStats)
240+
241+ for _, v := range visits {
242+ if v.Path == "" {
243+ continue
244+ }
245+
246+ if _, ok := statusMap[v.Status]; !ok {
247+ statusMap[v.Status] = make(map[string]*pathStats)
248+ }
249+
250+ key := fmt.Sprintf("%s/%d", v.Path, v.Status)
251+ ps, ok := statusMap[v.Status][key]
252+ if !ok {
253+ ps = &pathStats{Path: v.Path, Status: v.Status, IPs: make(map[string]bool)}
254+ statusMap[v.Status][key] = ps
255+ }
256+ ps.IPs[v.IPAddress] = true
257+ }
258+
259+ result := make(map[int][]pathStats)
260+ for status, paths := range statusMap {
261+ list := make([]pathStats, 0, len(paths))
262+ for _, ps := range paths {
263+ list = append(list, *ps)
264+ }
265+ sortByCount(list)
266+ result[status] = list
267+ }
268+ return result
269+}
270+
271+func aggregateReferers(visits []visitRow) []pathStats {
272+ refMap := make(map[string]*pathStats)
273+
274+ for _, v := range visits {
275+ if v.Referer == "" || v.Status == 404 {
276+ continue
277+ }
278+
279+ rs, ok := refMap[v.Referer]
280+ if !ok {
281+ rs = &pathStats{Path: v.Referer, IPs: make(map[string]bool)}
282+ refMap[v.Referer] = rs
283+ }
284+ rs.IPs[v.IPAddress] = true
285+ }
286+
287+ result := make([]pathStats, 0, len(refMap))
288+ for _, rs := range refMap {
289+ result = append(result, *rs)
290+ }
291+ sortByCount(result)
292+ return result
293+}
294+
295+func sortByCount(paths []pathStats) {
296+ for i := 1; i < len(paths); i++ {
297+ for j := i; j > 0 && len(paths[j].IPs) > len(paths[j-1].IPs); j-- {
298+ paths[j], paths[j-1] = paths[j-1], paths[j]
299+ }
300+ }
301+}
302+
303+func insertMonthlyVisits(dbpool *postgres.PsqlDB, userID, host string, dayMap map[string]*dayStats) error {
304+ for _, ds := range dayMap {
305+ _, err := dbpool.Db.Exec(
306+ `INSERT INTO analytics_monthly_visits (user_id, host, visit_date, unique_visits, mobile_visits, desktop_visits)
307+ VALUES ($1, $2, $3, $4, $5, $6)
308+ ON CONFLICT (user_id, host, visit_date) DO UPDATE
309+ SET unique_visits = EXCLUDED.unique_visits,
310+ mobile_visits = EXCLUDED.mobile_visits,
311+ desktop_visits = EXCLUDED.desktop_visits`,
312+ userID, host, ds.Date,
313+ len(ds.AllIPs), len(ds.MobileIPs), len(ds.DesktopIPs),
314+ )
315+ if err != nil {
316+ return fmt.Errorf("insert monthly visits for %s: %w", ds.Date.Format("2006-01-02"), err)
317+ }
318+ }
319+ return nil
320+}
321+
322+func insertTopURLs(dbpool *postgres.PsqlDB, userID, host string, month time.Time, statusCode int, paths []pathStats) error {
323+ limit := 10
324+ if statusCode == 404 {
325+ limit = 100
326+ }
327+ if len(paths) > limit {
328+ paths = paths[:limit]
329+ }
330+
331+ for rank, ps := range paths {
332+ _, err := dbpool.Db.Exec(
333+ `INSERT INTO analytics_monthly_top_urls (user_id, host, month, path, unique_visits, status_code, rank)
334+ VALUES ($1, $2, $3, $4, $5, $6, $7)
335+ ON CONFLICT (user_id, host, month, path, status_code) DO UPDATE
336+ SET unique_visits = EXCLUDED.unique_visits,
337+ rank = EXCLUDED.rank`,
338+ userID, host, month, ps.Path, len(ps.IPs), statusCode, rank+1,
339+ )
340+ if err != nil {
341+ return fmt.Errorf("insert top url %s: %w", ps.Path, err)
342+ }
343+ }
344+ return nil
345+}
346+
347+func insertTopReferers(dbpool *postgres.PsqlDB, userID, host string, month time.Time, refs []pathStats) error {
348+ limit := 10
349+ if len(refs) > limit {
350+ refs = refs[:limit]
351+ }
352+
353+ for rank, rs := range refs {
354+ _, err := dbpool.Db.Exec(
355+ `INSERT INTO analytics_monthly_top_referers (user_id, host, month, referer, unique_visits, rank)
356+ VALUES ($1, $2, $3, $4, $5, $6)
357+ ON CONFLICT (user_id, host, month, referer) DO UPDATE
358+ SET unique_visits = EXCLUDED.unique_visits,
359+ rank = EXCLUDED.rank`,
360+ userID, host, month, rs.Path, len(rs.IPs), rank+1,
361+ )
362+ if err != nil {
363+ return fmt.Errorf("insert top referer %s: %w", rs.Path, err)
364+ }
365+ }
366+ return nil
367+}
368+
369+func countTotalUnique(dayMap map[string]*dayStats) int {
370+ allIPs := make(map[string]bool)
371+ for _, ds := range dayMap {
372+ for ip := range ds.AllIPs {
373+ allIPs[ip] = true
374+ }
375+ }
376+ return len(allIPs)
377+}
378+
379+func upsertUserSite(dbpool *postgres.PsqlDB, userID, host string, monthUnique int, month time.Time) error {
380+ _, err := dbpool.Db.Exec(
381+ `INSERT INTO analytics_user_sites (user_id, host, total_visits, last_seen)
382+ VALUES ($1, $2, $3, $4)
383+ ON CONFLICT (user_id, host) DO UPDATE
384+ SET total_visits = analytics_user_sites.total_visits + EXCLUDED.total_visits,
385+ last_seen = EXCLUDED.last_seen`,
386+ userID, host, monthUnique, month,
387+ )
388+ return err
389+}
390+
391+// isMobile checks common mobile user-agent patterns.
392+// TODO: replace with a proper UA parser library (e.g., mssola/useragent).
393+func isMobile(userAgent string) bool {
394+ ua := strings.ToLower(userAgent)
395+ mobileKeywords := []string{
396+ "mobile",
397+ "android",
398+ "iphone",
399+ "ipad",
400+ "ipod",
401+ "blackberry",
402+ "windows phone",
403+ }
404+ for _, kw := range mobileKeywords {
405+ if strings.Contains(ua, kw) {
406+ return true
407+ }
408+ }
409+ return false
410+}
411+
412+// deleteAggregatedVisits deletes raw visit data from analytics_visits for the given month.
413+// Data is preserved in summary tables, so this is safe regardless of feature flags.
414+// Raw data for the current and previous months is never deleted since visitUniqueFromRaw
415+// still reads from analytics_visits for those months.
416+func deleteAggregatedVisits(dbpool *postgres.PsqlDB, logger *slog.Logger, monthStart, monthEnd time.Time) error {
417+ now := time.Now()
418+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
419+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
420+
421+ // Never delete raw data for the current or previous month —
422+ // visitUniqueFromRaw still reads from analytics_visits for those months.
423+ if !monthStart.Before(previousMonthStart) {
424+ logger.Info("skipping raw visit deletion for recent month", "month_start", monthStart.Format("2006-01"))
425+ return nil
426+ }
427+
428+ result, err := dbpool.Db.Exec(`
429+ DELETE FROM analytics_visits
430+ WHERE created_at >= $1 AND created_at < $2`, monthStart, monthEnd)
431+ if err != nil {
432+ return fmt.Errorf("delete aggregated visits: %w", err)
433+ }
434+
435+ deleted, _ := result.RowsAffected()
436+ logger.Info("deleted aggregated visits", "month_start", monthStart.Format("2006-01"), "deleted", deleted)
437+ return nil
438+}
+2,
-0
1@@ -922,6 +922,8 @@ func StartApiServer() {
2 go metricDrainSub(ctx, db, logger, cfg.Secret)
3 // gather connect/disconnect logs from tuns
4 go tunsEventLogDrainSub(ctx, db, logger, cfg.Secret)
5+ // monthly analytics aggregation (checks daily, runs if previous month is missing)
6+ go analyticsAggregationCron(ctx, db, logger)
7
8 apiConfig := &router.ApiConfig{
9 Cfg: cfg,
+4,
-2
1@@ -159,8 +159,10 @@ type Paginate[T any] struct {
2 }
3
4 type VisitInterval struct {
5- Interval *time.Time `json:"interval" db:"interval"`
6- Visitors int `json:"visitors" db:"visitors"`
7+ Interval *time.Time `json:"interval" db:"interval"`
8+ Visitors int `json:"visitors" db:"visitors"`
9+ MobileVisitors int `json:"mobile_visitors" db:"mobile_visitors"`
10+ DesktopVisitors int `json:"desktop_visitors" db:"desktop_visitors"`
11 }
12
13 type VisitUrl struct {
+550,
-109
1@@ -7,6 +7,7 @@ import (
2 "log/slog"
3 "math"
4 "slices"
5+ "sort"
6 "strings"
7 "time"
8
9@@ -16,6 +17,9 @@ import (
10 "github.com/picosh/pico/pkg/shared"
11 )
12
13+// mobileUserAgentExpr is a SQL expression to detect mobile user agents.
14+const mobileUserAgentExpr = `user_agent ILIKE '%mobile%' OR user_agent ILIKE '%android%' OR user_agent ILIKE '%iphone%' OR user_agent ILIKE '%ipad%' OR user_agent ILIKE '%ipod%' OR user_agent ILIKE '%blackberry%' OR user_agent ILIKE '%windows phone%'`
15+
16 var PAGER_SIZE = 15
17
18 var SelectPost = `
19@@ -720,109 +724,444 @@ func visitFilterBy(opts *db.SummaryOpts) (string, string) {
20 }
21
22 func (me *PsqlDB) visitUnique(opts *db.SummaryOpts) ([]*db.VisitInterval, error) {
23- where, with := visitFilterBy(opts)
24- uniqueVisitors := fmt.Sprintf(`SELECT
25- date_trunc('%s', created_at) as interval_start,
26- count(DISTINCT ip_address) as unique_visitors
27- FROM analytics_visits
28- WHERE created_at >= $1 AND %s = $2 AND user_id = $3 AND status <> 404
29- GROUP BY interval_start`, opts.Interval, where)
30+ intervals, err := me.visitUniqueFromSummary(opts)
31+ if err != nil {
32+ return nil, fmt.Errorf("query summary visits: %w", err)
33+ }
34+
35+ currentIntervals, err := me.visitUniqueFromRaw(opts)
36+ if err != nil {
37+ return nil, fmt.Errorf("query raw visits: %w", err)
38+ }
39+
40+ // Merge: current month data may overlap with summary data, combine counts
41+ return mergeVisitIntervals(intervals, currentIntervals), nil
42+}
43+
44+// visitUniqueFromSummary reads unique visitor counts from analytics_monthly_visits for historical data.
45+func (me *PsqlDB) visitUniqueFromSummary(opts *db.SummaryOpts) ([]*db.VisitInterval, error) {
46+ now := time.Now()
47+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
48+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
49+
50+ // Always include at least the previous month in the summary query, since raw data
51+ // for the previous month may have been deleted by aggregation. visitUniqueFromRaw
52+ // covers the previous and current months, but once aggregation runs and deletes raw
53+ // data, the summary table is the only source. Using min(origin, previousMonthStart)
54+ // ensures we don't skip the previous month even when origin is set to the current month.
55+ effectiveOrigin := opts.Origin
56+ if opts.Origin.After(previousMonthStart) {
57+ effectiveOrigin = previousMonthStart
58+ }
59+
60+ // If effective origin is in or after current month, no historical data to fetch
61+ if !effectiveOrigin.Before(currentMonthStart) {
62+ return nil, nil
63+ }
64
65- intervals := []*db.VisitInterval{}
66- rs, err := me.Db.Queryx(uniqueVisitors, opts.Origin, with, opts.UserID)
67+ where := ""
68+ args := []interface{}{opts.UserID, effectiveOrigin, currentMonthStart}
69+ argIdx := 4
70+ if opts.Host != "" {
71+ where = "AND host = $" + fmt.Sprintf("%d", argIdx)
72+ args = append(args, opts.Host)
73+ }
74+
75+ query := fmt.Sprintf(`
76+ SELECT
77+ date_trunc('%s', visit_date)::timestamptz as interval_start,
78+ sum(unique_visits) as unique_visitors,
79+ sum(mobile_visits) as mobile_visits,
80+ sum(desktop_visits) as desktop_visits
81+ FROM analytics_monthly_visits
82+ WHERE user_id = $1 AND visit_date >= $2 AND visit_date < $3 %s
83+ GROUP BY interval_start
84+ ORDER BY interval_start`, opts.Interval, where)
85+
86+ rows, err := me.Db.Queryx(query, args...)
87 if err != nil {
88 return nil, err
89 }
90- defer func() { _ = rs.Close() }()
91+ defer func() { _ = rows.Close() }()
92
93- for rs.Next() {
94+ var intervals []*db.VisitInterval
95+ for rows.Next() {
96 interval := &db.VisitInterval{}
97- err := rs.Scan(
98- &interval.Interval,
99- &interval.Visitors,
100- )
101- if err != nil {
102+ if err := rows.Scan(&interval.Interval, &interval.Visitors, &interval.MobileVisitors, &interval.DesktopVisitors); err != nil {
103 return nil, err
104 }
105+ intervals = append(intervals, interval)
106+ }
107+ return intervals, rows.Err()
108+}
109+
110+// visitUniqueFromRaw reads unique visitor counts from analytics_visits for the previous and current months.
111+// This covers the gap between the last aggregated month and the current month.
112+func (me *PsqlDB) visitUniqueFromRaw(opts *db.SummaryOpts) ([]*db.VisitInterval, error) {
113+ now := time.Now()
114+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
115+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
116
117+ where, with := visitFilterBy(opts)
118+
119+ // Determine the effective start: max(origin, previousMonthStart)
120+ effectiveStart := previousMonthStart
121+ if opts.Origin.After(previousMonthStart) {
122+ effectiveStart = opts.Origin
123+ }
124+
125+ uniqueVisitors := fmt.Sprintf(`
126+ SELECT
127+ date_trunc('%s', created_at)::timestamptz as interval_start,
128+ count(DISTINCT CASE WHEN %s THEN ip_address END) as mobile_visitors,
129+ count(DISTINCT CASE WHEN NOT %s THEN ip_address END) as desktop_visitors,
130+ count(DISTINCT ip_address) as unique_visitors
131+ FROM analytics_visits
132+ WHERE created_at >= $1 AND created_at < $2 AND %s = $3 AND user_id = $4 AND status <> 404
133+ GROUP BY interval_start
134+ ORDER BY interval_start`,
135+ opts.Interval, mobileUserAgentExpr, mobileUserAgentExpr, where)
136+
137+ rows, err := me.Db.Queryx(uniqueVisitors, effectiveStart, currentMonthStart.AddDate(0, 1, 0), with, opts.UserID)
138+ if err != nil {
139+ return nil, err
140+ }
141+ defer func() { _ = rows.Close() }()
142+
143+ var intervals []*db.VisitInterval
144+ for rows.Next() {
145+ interval := &db.VisitInterval{}
146+ if err := rows.Scan(&interval.Interval, &interval.MobileVisitors, &interval.DesktopVisitors, &interval.Visitors); err != nil {
147+ return nil, err
148+ }
149 intervals = append(intervals, interval)
150 }
151- if rs.Err() != nil {
152- return nil, rs.Err()
153+ return intervals, rows.Err()
154+}
155+
156+// mergeVisitIntervals combines historical (summary table) and current (raw) intervals.
157+// Summary data is preferred when both sources have the same interval to avoid double-counting.
158+func mergeVisitIntervals(historical, current []*db.VisitInterval) []*db.VisitInterval {
159+ if len(historical) == 0 {
160+ return current
161+ }
162+ if len(current) == 0 {
163+ return historical
164+ }
165+
166+ // Build a map by interval timestamp for merging.
167+ // Summary data takes precedence over raw data for the same interval
168+ // (e.g. when a month is aggregated but raw data still exists in a local dump).
169+ intervalMap := make(map[int64]*db.VisitInterval)
170+ for _, ci := range current {
171+ ts := ci.Interval.Unix()
172+ intervalMap[ts] = ci
173+ }
174+ for _, hi := range historical {
175+ ts := hi.Interval.Unix()
176+ intervalMap[ts] = hi // summary overwrites raw if both exist
177+ }
178+
179+ // Sort by interval
180+ result := make([]*db.VisitInterval, 0, len(intervalMap))
181+ for _, iv := range intervalMap {
182+ result = append(result, iv)
183 }
184- return intervals, nil
185+ sort.Slice(result, func(i, j int) bool {
186+ return result[i].Interval.Before(*result[j].Interval)
187+ })
188+ return result
189+}
190+
191+// mergeTopUrls combines historical and current top URLs, summing counts for overlapping URLs,
192+// then returns the top 10 by total count.
193+func mergeTopUrls(historical, current []*db.VisitUrl) []*db.VisitUrl {
194+ if len(historical) == 0 {
195+ return current
196+ }
197+ if len(current) == 0 {
198+ return historical
199+ }
200+
201+ // Build a map by URL for merging
202+ urlMap := make(map[string]*db.VisitUrl)
203+ for _, hu := range historical {
204+ urlMap[hu.Url] = hu
205+ }
206+
207+ for _, cu := range current {
208+ if existing, ok := urlMap[cu.Url]; ok {
209+ existing.Count += cu.Count
210+ } else {
211+ urlMap[cu.Url] = cu
212+ }
213+ }
214+
215+ // Sort by count descending and take top 10
216+ result := make([]*db.VisitUrl, 0, len(urlMap))
217+ for _, u := range urlMap {
218+ result = append(result, u)
219+ }
220+ sort.Slice(result, func(i, j int) bool {
221+ return result[i].Count > result[j].Count
222+ })
223+ if len(result) > 10 {
224+ result = result[:10]
225+ }
226+ return result
227+}
228+
229+// mergeTopReferers combines historical and current top referers, summing counts for overlapping referers,
230+// then returns the top 10 by total count.
231+func mergeTopReferers(historical, current []*db.VisitUrl) []*db.VisitUrl {
232+ return mergeTopUrls(historical, current) // Same logic as mergeTopUrls
233+}
234+
235+// mergeHosts combines historical and current hosts, summing counts for overlapping hosts,
236+// then returns sorted by total count descending.
237+func mergeHosts(historical, current []*db.VisitUrl) []*db.VisitUrl {
238+ if len(historical) == 0 {
239+ return current
240+ }
241+ if len(current) == 0 {
242+ return historical
243+ }
244+
245+ // Build a map by host for merging
246+ hostMap := make(map[string]*db.VisitUrl)
247+ for _, hu := range historical {
248+ hostMap[hu.Url] = hu
249+ }
250+
251+ for _, cu := range current {
252+ if existing, ok := hostMap[cu.Url]; ok {
253+ existing.Count += cu.Count
254+ } else {
255+ hostMap[cu.Url] = cu
256+ }
257+ }
258+
259+ // Sort by count descending
260+ result := make([]*db.VisitUrl, 0, len(hostMap))
261+ for _, h := range hostMap {
262+ result = append(result, h)
263+ }
264+ sort.Slice(result, func(i, j int) bool {
265+ return result[i].Count > result[j].Count
266+ })
267+ return result
268 }
269
270 func (me *PsqlDB) visitReferer(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
271- where, with := visitFilterBy(opts)
272- topUrls := fmt.Sprintf(`SELECT
273- referer,
274- count(DISTINCT ip_address) as referer_count
275- FROM analytics_visits
276- WHERE created_at >= $1 AND %s = $2 AND user_id = $3 AND referer <> '' AND status <> 404
277- GROUP BY referer
278- ORDER BY referer_count DESC
279- LIMIT 10`, where)
280+ historical, err := me.visitRefererFromSummary(opts)
281+ if err != nil {
282+ return nil, fmt.Errorf("query summary referers: %w", err)
283+ }
284
285- intervals := []*db.VisitUrl{}
286- rs, err := me.Db.Queryx(topUrls, opts.Origin, with, opts.UserID)
287+ current, err := me.visitRefererFromRaw(opts)
288+ if err != nil {
289+ return nil, fmt.Errorf("query raw referers: %w", err)
290+ }
291+
292+ return mergeTopReferers(historical, current), nil
293+}
294+
295+// visitRefererFromSummary reads top referers from analytics_monthly_top_referers for historical data.
296+func (me *PsqlDB) visitRefererFromSummary(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
297+ now := time.Now()
298+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
299+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
300+
301+ effectiveOrigin := opts.Origin
302+ if opts.Origin.After(previousMonthStart) {
303+ effectiveOrigin = previousMonthStart
304+ }
305+
306+ // If effective origin is in or after current month, no historical data to fetch
307+ if !effectiveOrigin.Before(currentMonthStart) {
308+ return nil, nil
309+ }
310+
311+ // Clamp origin to month boundary for summary table lookup
312+ originMonthStart := time.Date(effectiveOrigin.Year(), effectiveOrigin.Month(), 1, 0, 0, 0, 0, time.UTC)
313+
314+ where := ""
315+ args := []interface{}{opts.UserID, originMonthStart, currentMonthStart}
316+ if opts.Host != "" {
317+ where = "AND host = $4"
318+ args = append(args, opts.Host)
319+ }
320+
321+ query := fmt.Sprintf(`
322+ SELECT referer, sum(unique_visits) as total_visits
323+ FROM analytics_monthly_top_referers
324+ WHERE user_id = $1 AND month >= $2 AND month < $3 %s
325+ GROUP BY referer
326+ ORDER BY total_visits DESC
327+ LIMIT 10`, where)
328+
329+ rows, err := me.Db.Queryx(query, args...)
330 if err != nil {
331 return nil, err
332 }
333- defer func() { _ = rs.Close() }()
334+ defer func() { _ = rows.Close() }()
335
336- for rs.Next() {
337- interval := &db.VisitUrl{}
338- err := rs.Scan(
339- &interval.Url,
340- &interval.Count,
341- )
342- if err != nil {
343+ var results []*db.VisitUrl
344+ for rows.Next() {
345+ result := &db.VisitUrl{}
346+ if err := rows.Scan(&result.Url, &result.Count); err != nil {
347 return nil, err
348 }
349+ results = append(results, result)
350+ }
351+ return results, rows.Err()
352+}
353
354- intervals = append(intervals, interval)
355+// visitRefererFromRaw reads top referers from analytics_visits for the previous and current months.
356+func (me *PsqlDB) visitRefererFromRaw(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
357+ now := time.Now()
358+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
359+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
360+
361+ where, with := visitFilterBy(opts)
362+
363+ // Determine the effective start: max(origin, previousMonthStart)
364+ effectiveStart := previousMonthStart
365+ if opts.Origin.After(previousMonthStart) {
366+ effectiveStart = opts.Origin
367 }
368- if rs.Err() != nil {
369- return nil, rs.Err()
370+
371+ topUrls := fmt.Sprintf(`
372+ SELECT
373+ referer,
374+ count(DISTINCT ip_address) as referer_count
375+ FROM analytics_visits
376+ WHERE created_at >= $1 AND created_at < $2 AND %s = $3 AND user_id = $4 AND referer <> '' AND status <> 404
377+ GROUP BY referer
378+ ORDER BY referer_count DESC
379+ LIMIT 10`, where)
380+
381+ rows, err := me.Db.Queryx(topUrls, effectiveStart, currentMonthStart.AddDate(0, 1, 0), with, opts.UserID)
382+ if err != nil {
383+ return nil, err
384 }
385- return intervals, nil
386+ defer func() { _ = rows.Close() }()
387+
388+ var results []*db.VisitUrl
389+ for rows.Next() {
390+ result := &db.VisitUrl{}
391+ if err := rows.Scan(&result.Url, &result.Count); err != nil {
392+ return nil, err
393+ }
394+ results = append(results, result)
395+ }
396+ return results, rows.Err()
397 }
398
399 func (me *PsqlDB) visitUrl(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
400- where, with := visitFilterBy(opts)
401- topUrls := fmt.Sprintf(`SELECT
402- path,
403- count(DISTINCT ip_address) as path_count
404- FROM analytics_visits
405- WHERE created_at >= $1 AND %s = $2 AND user_id = $3 AND path <> '' AND status <> 404
406- GROUP BY path
407- ORDER BY path_count DESC
408- LIMIT 10`, where)
409+ historical, err := me.visitUrlFromSummary(opts)
410+ if err != nil {
411+ return nil, fmt.Errorf("query summary urls: %w", err)
412+ }
413+
414+ current, err := me.visitUrlFromRaw(opts)
415+ if err != nil {
416+ return nil, fmt.Errorf("query raw urls: %w", err)
417+ }
418+
419+ return mergeTopUrls(historical, current), nil
420+}
421+
422+// visitUrlFromSummary reads top URLs from analytics_monthly_top_urls for historical data.
423+func (me *PsqlDB) visitUrlFromSummary(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
424+ now := time.Now()
425+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
426+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
427
428- intervals := []*db.VisitUrl{}
429- rs, err := me.Db.Queryx(topUrls, opts.Origin, with, opts.UserID)
430+ effectiveOrigin := opts.Origin
431+ if opts.Origin.After(previousMonthStart) {
432+ effectiveOrigin = previousMonthStart
433+ }
434+
435+ // If effective origin is in or after current month, no historical data to fetch
436+ if !effectiveOrigin.Before(currentMonthStart) {
437+ return nil, nil
438+ }
439+
440+ // Clamp origin to month boundary for summary table lookup
441+ originMonthStart := time.Date(effectiveOrigin.Year(), effectiveOrigin.Month(), 1, 0, 0, 0, 0, time.UTC)
442+
443+ where := ""
444+ args := []interface{}{opts.UserID, originMonthStart, currentMonthStart}
445+ if opts.Host != "" {
446+ where = "AND host = $4"
447+ args = append(args, opts.Host)
448+ }
449+
450+ query := fmt.Sprintf(`
451+ SELECT path, sum(unique_visits) as total_visits
452+ FROM analytics_monthly_top_urls
453+ WHERE user_id = $1 AND month >= $2 AND month < $3 AND status_code <> 404 %s
454+ GROUP BY path
455+ ORDER BY total_visits DESC
456+ LIMIT 10`, where)
457+
458+ rows, err := me.Db.Queryx(query, args...)
459 if err != nil {
460 return nil, err
461 }
462- defer func() { _ = rs.Close() }()
463+ defer func() { _ = rows.Close() }()
464
465- for rs.Next() {
466- interval := &db.VisitUrl{}
467- err := rs.Scan(
468- &interval.Url,
469- &interval.Count,
470- )
471- if err != nil {
472+ var results []*db.VisitUrl
473+ for rows.Next() {
474+ result := &db.VisitUrl{}
475+ if err := rows.Scan(&result.Url, &result.Count); err != nil {
476 return nil, err
477 }
478+ results = append(results, result)
479+ }
480+ return results, rows.Err()
481+}
482
483- intervals = append(intervals, interval)
484+// visitUrlFromRaw reads top URLs from analytics_visits for the previous and current months.
485+func (me *PsqlDB) visitUrlFromRaw(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
486+ now := time.Now()
487+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
488+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
489+
490+ where, with := visitFilterBy(opts)
491+
492+ // Determine the effective start: max(origin, previousMonthStart)
493+ effectiveStart := previousMonthStart
494+ if opts.Origin.After(previousMonthStart) {
495+ effectiveStart = opts.Origin
496 }
497- if rs.Err() != nil {
498- return nil, rs.Err()
499+
500+ topUrls := fmt.Sprintf(`
501+ SELECT
502+ path,
503+ count(DISTINCT ip_address) as path_count
504+ FROM analytics_visits
505+ WHERE created_at >= $1 AND created_at < $2 AND %s = $3 AND user_id = $4 AND path <> '' AND status <> 404
506+ GROUP BY path
507+ ORDER BY path_count DESC
508+ LIMIT 10`, where)
509+
510+ rows, err := me.Db.Queryx(topUrls, effectiveStart, currentMonthStart.AddDate(0, 1, 0), with, opts.UserID)
511+ if err != nil {
512+ return nil, err
513 }
514- return intervals, nil
515+ defer func() { _ = rows.Close() }()
516+
517+ var results []*db.VisitUrl
518+ for rows.Next() {
519+ result := &db.VisitUrl{}
520+ if err := rows.Scan(&result.Url, &result.Count); err != nil {
521+ return nil, err
522+ }
523+ results = append(results, result)
524+ }
525+ return results, rows.Err()
526 }
527
528 func (me *PsqlDB) VisitUrlNotFound(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
529@@ -830,73 +1169,175 @@ func (me *PsqlDB) VisitUrlNotFound(opts *db.SummaryOpts) ([]*db.VisitUrl, error)
530 if limit == 0 {
531 limit = 10
532 }
533- where, with := visitFilterBy(opts)
534- topUrls := fmt.Sprintf(`SELECT
535- path,
536- count(DISTINCT ip_address) as path_count
537- FROM analytics_visits
538- WHERE created_at >= $1 AND %s = $2 AND user_id = $3 AND path <> '' AND status = 404
539- GROUP BY path
540- ORDER BY path_count DESC
541- LIMIT %d`, where, limit)
542
543- intervals := []*db.VisitUrl{}
544- rs, err := me.Db.Queryx(topUrls, opts.Origin, with, opts.UserID)
545+ historical, err := me.visitUrlNotFoundFromSummary(opts, limit)
546+ if err != nil {
547+ return nil, fmt.Errorf("query summary 404 urls: %w", err)
548+ }
549+
550+ current, err := me.visitUrlNotFoundFromRaw(opts, limit)
551+ if err != nil {
552+ return nil, fmt.Errorf("query raw 404 urls: %w", err)
553+ }
554+
555+ return mergeTopUrls(historical, current), nil
556+}
557+
558+// visitUrlNotFoundFromSummary reads top 404 URLs from analytics_monthly_top_urls for historical data.
559+func (me *PsqlDB) visitUrlNotFoundFromSummary(opts *db.SummaryOpts, limit int) ([]*db.VisitUrl, error) {
560+ now := time.Now()
561+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
562+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
563+
564+ effectiveOrigin := opts.Origin
565+ if opts.Origin.After(previousMonthStart) {
566+ effectiveOrigin = previousMonthStart
567+ }
568+
569+ // If effective origin is in or after current month, no historical data to fetch
570+ if !effectiveOrigin.Before(currentMonthStart) {
571+ return nil, nil
572+ }
573+
574+ // Clamp origin to month boundary for summary table lookup
575+ originMonthStart := time.Date(effectiveOrigin.Year(), effectiveOrigin.Month(), 1, 0, 0, 0, 0, time.UTC)
576+
577+ where := ""
578+ args := []interface{}{opts.UserID, originMonthStart, currentMonthStart}
579+ argIdx := 4
580+ if opts.Host != "" {
581+ where = "AND host = $" + fmt.Sprintf("%d", argIdx)
582+ args = append(args, opts.Host)
583+ }
584+
585+ query := fmt.Sprintf(`
586+ SELECT path, sum(unique_visits) as total_visits
587+ FROM analytics_monthly_top_urls
588+ WHERE user_id = $1 AND month >= $2 AND month < $3 AND status_code = 404 %s
589+ GROUP BY path
590+ ORDER BY total_visits DESC
591+ LIMIT %d`, where, limit)
592+
593+ rows, err := me.Db.Queryx(query, args...)
594 if err != nil {
595 return nil, err
596 }
597- defer func() { _ = rs.Close() }()
598+ defer func() { _ = rows.Close() }()
599
600- for rs.Next() {
601- interval := &db.VisitUrl{}
602- err := rs.Scan(
603- &interval.Url,
604- &interval.Count,
605- )
606- if err != nil {
607+ var results []*db.VisitUrl
608+ for rows.Next() {
609+ result := &db.VisitUrl{}
610+ if err := rows.Scan(&result.Url, &result.Count); err != nil {
611 return nil, err
612 }
613+ results = append(results, result)
614+ }
615+ return results, rows.Err()
616+}
617
618- intervals = append(intervals, interval)
619+// visitUrlNotFoundFromRaw reads top 404 URLs from analytics_visits for the previous and current months.
620+func (me *PsqlDB) visitUrlNotFoundFromRaw(opts *db.SummaryOpts, limit int) ([]*db.VisitUrl, error) {
621+ now := time.Now()
622+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
623+ previousMonthStart := currentMonthStart.AddDate(0, -1, 0)
624+
625+ where, with := visitFilterBy(opts)
626+
627+ // Determine the effective start: max(origin, previousMonthStart)
628+ effectiveStart := previousMonthStart
629+ if opts.Origin.After(previousMonthStart) {
630+ effectiveStart = opts.Origin
631 }
632- if rs.Err() != nil {
633- return nil, rs.Err()
634+
635+ topUrls := fmt.Sprintf(`
636+ SELECT
637+ path,
638+ count(DISTINCT ip_address) as path_count
639+ FROM analytics_visits
640+ WHERE created_at >= $1 AND created_at < $2 AND %s = $3 AND user_id = $4 AND path <> '' AND status = 404
641+ GROUP BY path
642+ ORDER BY path_count DESC
643+ LIMIT %d`, where, limit)
644+
645+ rows, err := me.Db.Queryx(topUrls, effectiveStart, currentMonthStart.AddDate(0, 1, 0), with, opts.UserID)
646+ if err != nil {
647+ return nil, err
648 }
649- return intervals, nil
650+ defer func() { _ = rows.Close() }()
651+
652+ var results []*db.VisitUrl
653+ for rows.Next() {
654+ result := &db.VisitUrl{}
655+ if err := rows.Scan(&result.Url, &result.Count); err != nil {
656+ return nil, err
657+ }
658+ results = append(results, result)
659+ }
660+ return results, rows.Err()
661 }
662
663 func (me *PsqlDB) visitHost(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
664- topUrls := `SELECT
665- host,
666- count(DISTINCT ip_address) as host_count
667- FROM analytics_visits
668- WHERE user_id = $1 AND host <> ''
669- GROUP BY host
670- ORDER BY host_count DESC`
671+ historical, err := me.visitHostFromSummary(opts)
672+ if err != nil {
673+ return nil, fmt.Errorf("query summary hosts: %w", err)
674+ }
675+
676+ current, err := me.visitHostFromRaw(opts)
677+ if err != nil {
678+ return nil, fmt.Errorf("query raw hosts: %w", err)
679+ }
680+
681+ return mergeHosts(historical, current), nil
682+}
683
684- intervals := []*db.VisitUrl{}
685- rs, err := me.Db.Queryx(topUrls, opts.UserID)
686+// visitHostFromSummary reads host data from analytics_user_sites for historical data.
687+func (me *PsqlDB) visitHostFromSummary(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
688+ rows, err := me.Db.Queryx(`
689+ SELECT host, total_visits
690+ FROM analytics_user_sites
691+ WHERE user_id = $1 AND host <> ''
692+ ORDER BY total_visits DESC`, opts.UserID)
693 if err != nil {
694 return nil, err
695 }
696- defer func() { _ = rs.Close() }()
697+ defer func() { _ = rows.Close() }()
698
699- for rs.Next() {
700- interval := &db.VisitUrl{}
701- err := rs.Scan(
702- &interval.Url,
703- &interval.Count,
704- )
705- if err != nil {
706+ var results []*db.VisitUrl
707+ for rows.Next() {
708+ result := &db.VisitUrl{}
709+ if err := rows.Scan(&result.Url, &result.Count); err != nil {
710 return nil, err
711 }
712+ results = append(results, result)
713+ }
714+ return results, rows.Err()
715+}
716
717- intervals = append(intervals, interval)
718+// visitHostFromRaw reads hosts from analytics_visits for the current month that aren't in summary.
719+func (me *PsqlDB) visitHostFromRaw(opts *db.SummaryOpts) ([]*db.VisitUrl, error) {
720+ now := time.Now()
721+ currentMonthStart := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, time.UTC)
722+
723+ rows, err := me.Db.Queryx(`
724+ SELECT host, count(DISTINCT ip_address) as host_count
725+ FROM analytics_visits
726+ WHERE created_at >= $1 AND user_id = $2 AND host <> ''
727+ GROUP BY host
728+ ORDER BY host_count DESC`, currentMonthStart, opts.UserID)
729+ if err != nil {
730+ return nil, err
731 }
732- if rs.Err() != nil {
733- return nil, rs.Err()
734+ defer func() { _ = rows.Close() }()
735+
736+ var results []*db.VisitUrl
737+ for rows.Next() {
738+ result := &db.VisitUrl{}
739+ if err := rows.Scan(&result.Url, &result.Count); err != nil {
740+ return nil, err
741+ }
742+ results = append(results, result)
743 }
744- return intervals, nil
745+ return results, rows.Err()
746 }
747
748 func (me *PsqlDB) VisitSummary(opts *db.SummaryOpts) (*db.SummaryVisits, error) {
1@@ -0,0 +1,85 @@
2+-- analytics_user_sites: tracks every site a user has had traffic on
3+CREATE TABLE IF NOT EXISTS analytics_user_sites (
4+ id serial NOT NULL,
5+ user_id uuid NOT NULL,
6+ host character varying(253) NOT NULL,
7+ total_visits integer NOT NULL DEFAULT 0,
8+ last_seen date NOT NULL,
9+ created_at timestamp without time zone NOT NULL DEFAULT now(),
10+ updated_at timestamp without time zone NOT NULL DEFAULT now(),
11+ CONSTRAINT analytics_user_sites_pkey PRIMARY KEY (id),
12+ CONSTRAINT analytics_user_sites_unique UNIQUE (user_id, host),
13+ CONSTRAINT fk_user_sites_app_users
14+ FOREIGN KEY(user_id)
15+ REFERENCES app_users(id)
16+ ON DELETE CASCADE
17+ ON UPDATE CASCADE
18+);
19+
20+CREATE INDEX IF NOT EXISTS idx_user_sites_user ON analytics_user_sites (user_id);
21+
22+-- analytics_monthly_visits: daily unique visitor counts per host with device breakdown
23+CREATE TABLE IF NOT EXISTS analytics_monthly_visits (
24+ id serial NOT NULL,
25+ user_id uuid NOT NULL,
26+ host character varying(253) NOT NULL,
27+ visit_date date NOT NULL,
28+ unique_visits integer NOT NULL DEFAULT 0,
29+ mobile_visits integer NOT NULL DEFAULT 0,
30+ desktop_visits integer NOT NULL DEFAULT 0,
31+ created_at timestamp without time zone NOT NULL DEFAULT now(),
32+ CONSTRAINT analytics_monthly_visits_pkey PRIMARY KEY (id),
33+ CONSTRAINT analytics_monthly_visits_unique UNIQUE (user_id, host, visit_date),
34+ CONSTRAINT fk_monthly_visits_app_users
35+ FOREIGN KEY(user_id)
36+ REFERENCES app_users(id)
37+ ON DELETE CASCADE
38+ ON UPDATE CASCADE
39+);
40+
41+CREATE INDEX IF NOT EXISTS idx_monthly_visits_user_host ON analytics_monthly_visits (user_id, host);
42+CREATE INDEX IF NOT EXISTS idx_monthly_visits_user_date ON analytics_monthly_visits (user_id, visit_date DESC);
43+
44+-- analytics_monthly_top_urls: top URLs per host per month per status code
45+CREATE TABLE IF NOT EXISTS analytics_monthly_top_urls (
46+ id serial NOT NULL,
47+ user_id uuid NOT NULL,
48+ host character varying(253) NOT NULL,
49+ month date NOT NULL,
50+ path character varying(2048) NOT NULL,
51+ unique_visits integer NOT NULL DEFAULT 0,
52+ status_code integer NOT NULL,
53+ rank integer NOT NULL,
54+ created_at timestamp without time zone NOT NULL DEFAULT now(),
55+ CONSTRAINT analytics_monthly_top_urls_pkey PRIMARY KEY (id),
56+ CONSTRAINT analytics_monthly_top_urls_unique UNIQUE (user_id, host, month, path, status_code),
57+ CONSTRAINT fk_monthly_top_urls_app_users
58+ FOREIGN KEY(user_id)
59+ REFERENCES app_users(id)
60+ ON DELETE CASCADE
61+ ON UPDATE CASCADE
62+);
63+
64+CREATE INDEX IF NOT EXISTS idx_monthly_top_urls_user_host_month ON analytics_monthly_top_urls (user_id, host, month);
65+CREATE INDEX IF NOT EXISTS idx_monthly_top_urls_status ON analytics_monthly_top_urls (user_id, host, month, status_code, rank);
66+
67+-- analytics_monthly_top_referers: top referers per host per month
68+CREATE TABLE IF NOT EXISTS analytics_monthly_top_referers (
69+ id serial NOT NULL,
70+ user_id uuid NOT NULL,
71+ host character varying(253) NOT NULL,
72+ month date NOT NULL,
73+ referer character varying(253) NOT NULL,
74+ unique_visits integer NOT NULL DEFAULT 0,
75+ rank integer NOT NULL,
76+ created_at timestamp without time zone NOT NULL DEFAULT now(),
77+ CONSTRAINT analytics_monthly_top_referers_pkey PRIMARY KEY (id),
78+ CONSTRAINT analytics_monthly_top_referers_unique UNIQUE (user_id, host, month, referer),
79+ CONSTRAINT fk_monthly_top_referers_app_users
80+ FOREIGN KEY(user_id)
81+ REFERENCES app_users(id)
82+ ON DELETE CASCADE
83+ ON UPDATE CASCADE
84+);
85+
86+CREATE INDEX IF NOT EXISTS idx_monthly_referers_user_host_month ON analytics_monthly_top_referers (user_id, host, month);