feat: Pass around one big global context
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
PapaTutuWawa 2024-02-03 12:05:10 +01:00
parent cb123537d5
commit 315bb39f44
5 changed files with 183 additions and 44 deletions

View File

@ -7,13 +7,16 @@ import (
"net" "net"
"net/http" "net/http"
"os" "os"
"regexp"
"strings" "strings"
"sync" "sync"
"time" "time"
"git.polynom.me/rio/internal/acme" "git.polynom.me/rio/internal/acme"
"git.polynom.me/rio/internal/certificates" "git.polynom.me/rio/internal/certificates"
"git.polynom.me/rio/internal/context"
"git.polynom.me/rio/internal/dns" "git.polynom.me/rio/internal/dns"
"git.polynom.me/rio/internal/metrics"
"git.polynom.me/rio/internal/pages" "git.polynom.me/rio/internal/pages"
"git.polynom.me/rio/internal/repo" "git.polynom.me/rio/internal/repo"
"git.polynom.me/rio/internal/server" "git.polynom.me/rio/internal/server"
@ -24,14 +27,14 @@ import (
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
) )
func handleSubdomain(pagesDomain, domain, cname, path, giteaUrl, defaultCsp string, giteaClient *repo.GiteaClient, lokiConfig *pages.LokiMetricConfig, w http.ResponseWriter) { func handleSubdomain(ctx *context.GlobalContext, domain, cname, path string, req *http.Request, w http.ResponseWriter) {
username := "" username := ""
if cname != "" { if cname != "" {
// If we are accessed via a CNAME, then CNAME contains our <user>.<pages domain> value. // If we are accessed via a CNAME, then CNAME contains our <user>.<pages domain> value.
username = dns.ExtractUsername(pagesDomain, cname) username = dns.ExtractUsername(ctx.PagesDomain, cname)
} else { } else {
// If we are directly accessed, then domain contains our <user>.<pages domain> value. // If we are directly accessed, then domain contains our <user>.<pages domain> value.
username = dns.ExtractUsername(pagesDomain, domain) username = dns.ExtractUsername(ctx.PagesDomain, domain)
} }
// Strip the leading / // Strip the leading /
@ -52,7 +55,7 @@ func handleSubdomain(pagesDomain, domain, cname, path, giteaUrl, defaultCsp stri
domain, domain,
cname, cname,
path, path,
giteaClient, ctx.Gitea,
) )
if err != nil { if err != nil {
log.Errorf("Failed to get repo: %s", err) log.Errorf("Failed to get repo: %s", err)
@ -60,15 +63,30 @@ func handleSubdomain(pagesDomain, domain, cname, path, giteaUrl, defaultCsp stri
return return
} }
pages.ServeFile(username, repo.Name, path, defaultCsp, domain, giteaClient, lokiConfig, w) d := domain
if cname != "" {
d = cname
}
c := &context.Context{
Username: username,
Reponame: repo.Name,
Domain: d,
Path: path,
Referrer: req.Header.Get("Referer"),
UserAgent: req.Header.Get("User-Agent"),
Writer: w,
Global: ctx,
}
pages.ServeFile(c)
} }
func Handler(pagesDomain, giteaUrl, defaultCsp string, giteaClient *repo.GiteaClient, lokiConfig *pages.LokiMetricConfig) http.HandlerFunc { func Handler(ctx *context.GlobalContext) http.HandlerFunc {
return func(w http.ResponseWriter, req *http.Request) { return func(w http.ResponseWriter, req *http.Request) {
w.Header().Set("Server", "rio") w.Header().Set("Server", "rio")
// Is the direct domain requested? // Is the direct domain requested?
if req.Host == pagesDomain { if req.Host == ctx.PagesDomain {
log.Debug("Direct pages domain is requested.") log.Debug("Direct pages domain is requested.")
// TODO: Handle // TODO: Handle
@ -77,9 +95,9 @@ func Handler(pagesDomain, giteaUrl, defaultCsp string, giteaClient *repo.GiteaCl
} }
// Is a direct subdomain requested? // Is a direct subdomain requested?
if strings.HasSuffix(req.Host, pagesDomain) { if strings.HasSuffix(req.Host, ctx.PagesDomain) {
log.Debug("Domain can be directly handled") log.Debug("Domain can be directly handled")
handleSubdomain(pagesDomain, req.Host, "", req.URL.Path, giteaUrl, defaultCsp, giteaClient, lokiConfig, w) handleSubdomain(ctx, req.Host, "", req.URL.Path, req, w)
return return
} }
@ -94,9 +112,9 @@ func Handler(pagesDomain, giteaUrl, defaultCsp string, giteaClient *repo.GiteaCl
// Is a direct subdomain requested after CNAME lookup? // Is a direct subdomain requested after CNAME lookup?
// NOTE: We now require the leading dot because a CNAME to the direct // NOTE: We now require the leading dot because a CNAME to the direct
// pages domain makes no sense. // pages domain makes no sense.
if strings.HasSuffix(cname, "."+pagesDomain) { if strings.HasSuffix(cname, "."+ctx.PagesDomain) {
log.Debugf("%s is alias of %s and can be handled after a CNAME query", req.Host, cname) log.Debugf("%s is alias of %s and can be handled after a CNAME query", req.Host, cname)
handleSubdomain(pagesDomain, req.Host, cname, req.URL.Path, giteaUrl, defaultCsp, giteaClient, lokiConfig, w) handleSubdomain(ctx, req.Host, cname, req.URL.Path, req, w)
return return
} }
@ -132,6 +150,7 @@ func runServer(ctx *cli.Context) error {
acmeDisable := ctx.Bool("acme-disable") acmeDisable := ctx.Bool("acme-disable")
defaultCsp := ctx.String("default-csp") defaultCsp := ctx.String("default-csp")
lokiUrl := ctx.String("loki-url") lokiUrl := ctx.String("loki-url")
metricsBotList := ctx.String("metrics-bot-list")
// Init Logging // Init Logging
if ctx.Bool("debug") { if ctx.Bool("debug") {
@ -141,14 +160,23 @@ func runServer(ctx *cli.Context) error {
} }
// Set up the Loki metrics // Set up the Loki metrics
var lokiConfig pages.LokiMetricConfig var lokiConfig metrics.LokiMetricConfig
if lokiUrl == "" { if lokiUrl == "" {
lokiConfig = pages.LokiMetricConfig{ lokiConfig = metrics.LokiMetricConfig{
Enabled: false, Enabled: false,
} }
} else { } else {
lokiConfig = pages.LokiMetricConfig{ var patterns []regexp.Regexp
if metricsBotList != "" {
patterns, _ = metrics.ReadBotPatterns(metricsBotList)
} else {
patterns = make([]regexp.Regexp, 0)
}
log.Infof("Read %d bot patterns from disk", len(patterns))
lokiConfig = metrics.LokiMetricConfig{
Enabled: true, Enabled: true,
BotUserAgents: &patterns,
Url: lokiUrl, Url: lokiUrl,
} }
} }
@ -243,6 +271,13 @@ func runServer(ctx *cli.Context) error {
listener = tls.NewListener(listener, tlsConfig) listener = tls.NewListener(listener, tlsConfig)
} }
globalCtx := &context.GlobalContext{
DefaultCSP: defaultCsp,
PagesDomain: domain,
Gitea: &giteaClient,
MetricConfig: &lokiConfig,
}
var waitGroup sync.WaitGroup var waitGroup sync.WaitGroup
servers := 2 servers := 2
if acmeDisable { if acmeDisable {
@ -254,7 +289,7 @@ func runServer(ctx *cli.Context) error {
defer waitGroup.Done() defer waitGroup.Done()
log.Debug("Listening on main HTTP server") log.Debug("Listening on main HTTP server")
if err := http.Serve(listener, Handler(domain, giteaUrl, defaultCsp, &giteaClient, &lokiConfig)); err != nil { if err := http.Serve(listener, Handler(globalCtx)); err != nil {
log.Fatal(fmt.Errorf("Listening failed: %v", err)) log.Fatal(fmt.Errorf("Listening failed: %v", err))
} }
log.Debug("Listening on main HTTP server done!") log.Debug("Listening on main HTTP server done!")
@ -370,6 +405,12 @@ func main() {
Value: "", Value: "",
EnvVars: []string{"LOKI_URL"}, EnvVars: []string{"LOKI_URL"},
}, },
&cli.StringFlag{
Name: "metrics-bot-list",
Usage: "File to read a list of regular expressions modelling bot user agents from",
Value: "",
EnvVars: []string{"METRICS_BOT_LIST"},
},
}, },
} }

View File

@ -0,0 +1,30 @@
package context
import (
"net/http"
"git.polynom.me/rio/internal/metrics"
"git.polynom.me/rio/internal/repo"
)
type GlobalContext struct {
DefaultCSP string
PagesDomain string
Gitea *repo.GiteaClient
MetricConfig *metrics.LokiMetricConfig
}
type Context struct {
Username string
Reponame string
Domain string
Path string
// HTTP Stuff
Referrer string
UserAgent string
Writer http.ResponseWriter
// Pointer to the global context
Global *GlobalContext
}

View File

@ -1,9 +1,11 @@
package pages package metrics
import ( import (
"encoding/json" "encoding/json"
"fmt"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"regexp"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -13,15 +15,28 @@ import (
type LokiMetricConfig struct { type LokiMetricConfig struct {
Url string Url string
BotUserAgents *[]regexp.Regexp
Enabled bool Enabled bool
} }
// Checks if we should send a metric ping to Loki based on the served path. // Checks if we should send a metric ping to Loki based on the served path.
func (c *LokiMetricConfig) shouldSendMetrics(path string) bool { func (c *LokiMetricConfig) ShouldSendMetrics(path, userAgent string) bool {
return strings.HasSuffix(path, ".html") && c.Enabled if !strings.HasSuffix(path, ".html") || !c.Enabled {
return false
}
// Filter out bots
for _, pattern := range *c.BotUserAgents {
if pattern.MatchString(userAgent) {
return false
}
}
return true
} }
func (c *LokiMetricConfig) sendMetricPing(domain, path string) { func (c *LokiMetricConfig) SendMetricPing(domain, path, referrer string) {
msg := fmt.Sprintf("path=\"%s\" referrer=\"%s\"", path, referrer)
data := map[string]interface{}{ data := map[string]interface{}{
"streams": []map[string]interface{}{ "streams": []map[string]interface{}{
{ {
@ -34,7 +49,7 @@ func (c *LokiMetricConfig) sendMetricPing(domain, path string) {
"values": [][]interface{}{ "values": [][]interface{}{
{ {
strconv.Itoa(int(time.Now().UnixNano())), strconv.Itoa(int(time.Now().UnixNano())),
"path=" + path, msg,
}, },
}, },
}, },
@ -72,3 +87,30 @@ func (c *LokiMetricConfig) sendMetricPing(domain, path string) {
} }
}() }()
} }
// Reads a JSON array of bot user agents from disk and parses them
// into regular expressions.
func ReadBotPatterns(file string) ([]regexp.Regexp, error) {
content, err := ioutil.ReadFile(file)
if err != nil {
log.Warnf("Failed to read bot metrics file: %v", err)
return []regexp.Regexp{}, err
}
var payload []string
err = json.Unmarshal(content, &payload)
if err != nil {
log.Warnf("Failed to unmarshal file: %v", err)
return []regexp.Regexp{}, err
}
patterns := make([]regexp.Regexp, 0)
for _, v := range payload {
patterns = append(
patterns,
*regexp.MustCompile(v),
)
}
return patterns, nil
}

View File

@ -0,0 +1,24 @@
package metrics
import (
"regexp"
"testing"
)
func TestShouldPing(t *testing.T) {
cfg := LokiMetricConfig{
Enabled: true,
Url: "",
BotUserAgents: &[]regexp.Regexp{
*regexp.MustCompile("random-bot/.*"),
},
}
if cfg.ShouldSendMetrics("/index.html", "random-bot/v23.5") {
t.Fatalf("Accepted bot user-agent")
}
if !cfg.ShouldSendMetrics("/index.html", "Firefox/...") {
t.Fatalf("Rejected real user-agent")
}
}

View File

@ -8,6 +8,7 @@ import (
"time" "time"
"git.polynom.me/rio/internal/constants" "git.polynom.me/rio/internal/constants"
"git.polynom.me/rio/internal/context"
"git.polynom.me/rio/internal/repo" "git.polynom.me/rio/internal/repo"
"github.com/patrickmn/go-cache" "github.com/patrickmn/go-cache"
@ -45,13 +46,14 @@ func addHeaders(csp, contentType string, contentLength int, w http.ResponseWrite
} }
} }
func ServeFile(username, reponame, path, defaultCsp, domain string, giteaClient *repo.GiteaClient, metricConfig *LokiMetricConfig, w http.ResponseWriter) { func ServeFile(context *context.Context) {
// Strip away a starting / as it messes with Gitea // Strip away a starting / as it messes with Gitea
path := context.Path
if path[:1] == "/" { if path[:1] == "/" {
path = path[1:] path = path[1:]
} }
key := makePageContentCacheEntry(username, path) key := makePageContentCacheEntry(context.Username, path)
entry, found := pageCache.Get(key) entry, found := pageCache.Get(key)
var content []byte var content []byte
var mimeType string var mimeType string
@ -65,25 +67,25 @@ func ServeFile(username, reponame, path, defaultCsp, domain string, giteaClient
since = &sinceRaw since = &sinceRaw
} }
content, changed, err := giteaClient.GetFile( content, changed, err := context.Global.Gitea.GetFile(
username, context.Username,
reponame, context.Reponame,
constants.PagesBranch, constants.PagesBranch,
path, path,
since, since,
) )
csp := repo.GetCSPForRepository(username, reponame, "", giteaClient) csp := repo.GetCSPForRepository(context.Username, context.Reponame, "", context.Global.Gitea)
if err != nil { if err != nil {
if !found { if !found {
log.Errorf("Failed to get file %s/%s/%s (%s)", username, reponame, path, err) log.Errorf("Failed to get file %s/%s/%s (%s)", context.Username, context.Reponame, path, err)
addHeaders(csp, "text/html", 0, w) addHeaders(csp, "text/html", 0, context.Writer)
w.WriteHeader(404) context.Writer.WriteHeader(404)
} else { } else {
log.Debugf("Request failed but page %s is cached in memory", path) log.Debugf("Request failed but page %s is cached in memory", path)
addHeaders(csp, mimeType, len(content), w) addHeaders(csp, mimeType, len(content), context.Writer)
w.WriteHeader(200) context.Writer.WriteHeader(200)
w.Write(content) context.Writer.Write(content)
} }
return return
@ -91,9 +93,9 @@ func ServeFile(username, reponame, path, defaultCsp, domain string, giteaClient
if found && !changed { if found && !changed {
log.Debugf("Page %s is unchanged and cached in memory", path) log.Debugf("Page %s is unchanged and cached in memory", path)
addHeaders(csp, mimeType, len(content), w) addHeaders(csp, mimeType, len(content), context.Writer)
w.WriteHeader(200) context.Writer.WriteHeader(200)
w.Write(content) context.Writer.Write(content)
return return
} }
@ -113,12 +115,12 @@ func ServeFile(username, reponame, path, defaultCsp, domain string, giteaClient
) )
log.Debugf("Page %s requested from Gitea and cached in memory at %v", path, now) log.Debugf("Page %s requested from Gitea and cached in memory at %v", path, now)
addHeaders(csp, mimeType, len(content), w) addHeaders(csp, mimeType, len(content), context.Writer)
w.WriteHeader(200) context.Writer.WriteHeader(200)
w.Write(content) context.Writer.Write(content)
// Tell Loki about if, if desired // Tell Loki about if, if desired
if metricConfig.shouldSendMetrics(path) { if context.Global.MetricConfig.ShouldSendMetrics(path, context.UserAgent) {
metricConfig.sendMetricPing(domain, path) context.Global.MetricConfig.SendMetricPing(context.Domain, path, context.Referrer)
} }
} }