diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index e1b1b8e..6716eb1 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: - go-version: [1.24.5] + go-version: [1.25.1] steps: - name: Checkout code @@ -40,12 +40,9 @@ jobs: run: go mod verify - name: Install golangci-lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v7 with: version: latest - skip-cache: false - skip-pkg-cache: false - skip-build-cache: false - name: Run linting run: make lint diff --git a/Dockerfile b/Dockerfile index cddbeec..20500cb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.24.5-bullseye AS build +FROM golang:1.25.1-bookworm AS build WORKDIR /go/src/sei-load diff --git a/go.mod b/go.mod index 6784e82..1678f0c 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/sei-protocol/sei-load -go 1.24.5 +go 1.25.1 require ( github.com/ethereum/go-ethereum v1.16.1 @@ -9,14 +9,19 @@ require ( github.com/prometheus/client_golang v1.22.0 github.com/spf13/cobra v1.9.1 github.com/spf13/viper v1.20.1 - github.com/stretchr/testify v1.10.0 - go.opentelemetry.io/otel v1.37.0 + github.com/stretchr/testify v1.11.1 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 + go.opentelemetry.io/otel v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 go.opentelemetry.io/otel/exporters/prometheus v0.59.1 - go.opentelemetry.io/otel/metric v1.37.0 - go.opentelemetry.io/otel/sdk/metric v1.37.0 - golang.org/x/sync v0.16.0 + go.opentelemetry.io/otel/metric v1.43.0 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/sdk/metric v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 + golang.org/x/sync v0.20.0 golang.org/x/time v0.9.0 - google.golang.org/protobuf v1.36.6 + google.golang.org/protobuf v1.36.11 ) require ( @@ -24,6 +29,7 @@ require ( github.com/StackExchange/wmi v1.2.1 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bits-and-blooms/bitset v1.22.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/consensys/gnark-crypto v0.18.0 // indirect github.com/crate-crypto/go-eth-kzg v1.3.0 // indirect @@ -33,6 +39,7 @@ require ( github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect github.com/ethereum/c-kzg-4844/v2 v2.1.0 // indirect github.com/ethereum/go-verkle v0.2.2 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -41,6 +48,7 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/holiman/uint256 v1.3.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect @@ -60,13 +68,17 @@ require ( github.com/supranational/blst v0.3.15 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/otel/sdk v1.37.0 // indirect - go.opentelemetry.io/otel/trace v1.37.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect + go.opentelemetry.io/proto/otlp v1.10.0 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.9.0 // indirect - golang.org/x/crypto v0.40.0 // indirect - golang.org/x/sys v0.34.0 // indirect - golang.org/x/text v0.27.0 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/net v0.52.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 0852756..528de49 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4= github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/cp v0.1.0 h1:SE+dxFebS7Iik5LK0tsi1k9ZCxEaFX4AjQmoyA+1dJk= github.com/cespare/cp v0.1.0/go.mod h1:SOGHArjBr4JWaSDEVpWpo/hNg6RoKrls6Oh40hiwW+s= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -51,6 +53,8 @@ github.com/ethereum/go-ethereum v1.16.1 h1:7684NfKCb1+IChudzdKyZJ12l1Tq4ybPZOITi github.com/ethereum/go-ethereum v1.16.1/go.mod h1:ngYIvmMAYdo4sGW9cGzLvSsPGhDOOzL0jK5S5iXpj0g= github.com/ethereum/go-verkle v0.2.2 h1:I2W0WjnrFUIzzVPwm8ykY+7pL2d4VhlsePn4j7cnFk8= github.com/ethereum/go-verkle v0.2.2/go.mod h1:M3b90YRnzqKyyzBEWJGqj8Qff4IDeXnzFw0P9bFw3uk= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/ferranbt/fastssz v0.1.2 h1:Dky6dXlngF6Qjc+EfDipAkE83N5I5DE68bY6O0VLNPk= github.com/ferranbt/fastssz v0.1.2/go.mod h1:X5UPrE2u1UJjxHA8X54u04SBwdAQjG2sFtWs39YxyWs= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= @@ -77,6 +81,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v4 v4.5.1 h1:JdqV9zKUdtaa9gdPlywC3aeoEsR681PlKC+4F5gQgeo= github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= @@ -91,6 +97,8 @@ github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrR github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= github.com/graph-gophers/graphql-go v1.3.0 h1:Eb9x/q6MFpCLz7jBCiP/WTxjSDrYLR1QY41SORZyNJ0= github.com/graph-gophers/graphql-go v1.3.0/go.mod h1:9CQHMSxwO4MprSdzoIEobiHpoLtHm77vfxsvsIN5Vuc= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= github.com/hashicorp/go-bexpr v0.1.10 h1:9kuI5PFotCboP3dkDYFr/wi0gg0QVbSNz5oFRpxn4uE= github.com/hashicorp/go-bexpr v0.1.10/go.mod h1:oxlubA2vC/gFVfX1A6JGp7ls7uCDlfJn732ehYYg+g0= github.com/holiman/billy v0.0.0-20240216141850-2abb0c79d3c4 h1:X4egAf/gcS1zATw6wn4Ej8vjuVGxeHdan+bRb2ebyv4= @@ -171,8 +179,8 @@ github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7D github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= -github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= @@ -196,8 +204,8 @@ github.com/spf13/viper v1.20.1 h1:ZMi+z/lvLyPSCoNtFCpqjy0S4kPbirhpTMwl8BkW9X4= github.com/spf13/viper v1.20.1/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqjJvu4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/supranational/blst v0.3.15 h1:rd9viN6tfARE5wv3KZJ9H8e1cg0jXW8syFCcsbHa76o= @@ -212,44 +220,64 @@ github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 h1:8UQVDcZxOJLtX6gxtDt3vY2WTgvZqMQRzjsqiIHQdkc= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0/go.mod h1:2lmweYCiHYpEjQ/lSJBYhj9jP1zvCvQW4BqL9dnT7FQ= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk= go.opentelemetry.io/otel/exporters/prometheus v0.59.1 h1:HcpSkTkJbggT8bjYP+BjyqPWlD17BH9C5CYNKeDzmcA= go.opentelemetry.io/otel/exporters/prometheus v0.59.1/go.mod h1:0FJL+gjuUoM07xzik3KPBaN+nz/CoB15kV6WLMiXZag= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= -go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= -go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= -go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= -golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= -golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df h1:UA2aFVmmsIlefxMk29Dp2juaUSth8Pyn3Tq5Y5mJGME= golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= -golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= -golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= -golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= -golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= -golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= -golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/main.go b/main.go index cf61350..4cf9cd4 100644 --- a/main.go +++ b/main.go @@ -3,6 +3,7 @@ package main import ( "context" "encoding/json" + "errors" "fmt" "log" "net/http" @@ -13,15 +14,15 @@ import ( "time" "github.com/ethereum/go-ethereum/ethclient" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/cobra" "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/exporters/prometheus" - "go.opentelemetry.io/otel/sdk/metric" "golang.org/x/time/rate" "github.com/sei-protocol/sei-load/config" "github.com/sei-protocol/sei-load/generator" + "github.com/sei-protocol/sei-load/observability" "github.com/sei-protocol/sei-load/sender" "github.com/sei-protocol/sei-load/stats" "github.com/sei-protocol/sei-load/utils" @@ -147,9 +148,48 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command, args []string) error { listenAddr := cmd.Flag("metricsListenAddr").Value.String() log.Printf("serving metrics at %s/metrics", listenAddr) - if err := exportPrometheusMetrics(ctx, listenAddr); err != nil { - return err + obsShutdown, err := observability.Setup(ctx, observability.Config{ + RunScope: observability.RunScopeFromEnv(), + OTLPEndpoint: os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"), + }) + if err != nil { + return fmt.Errorf("observability setup: %w", err) + } + defer func() { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := obsShutdown(shutdownCtx); err != nil { + log.Printf("observability shutdown: %v", err) + } + }() + + // EnableOpenMetrics is load-bearing: the default promhttp.Handler() strips + // exemplars regardless of the scraper's Accept header. + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.HandlerFor( + prometheus.DefaultGatherer, + promhttp.HandlerOpts{EnableOpenMetrics: true}, + )) + metricsServer := &http.Server{ + Addr: listenAddr, + Handler: mux, + ReadHeaderTimeout: 5 * time.Second, } + go func() { + if err := metricsServer.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + log.Printf("failed to serve metrics: %v", err) + } + }() + defer func() { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := metricsServer.Shutdown(shutdownCtx); err != nil { + log.Printf("metrics server shutdown: %v", err) + } + }() + + ctx, runSpan := otel.Tracer("github.com/sei-protocol/sei-load").Start(ctx, "seiload.run") + defer runSpan.End() // Create statistics collector and logger collector := stats.NewCollector() @@ -315,28 +355,11 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command, args []string) error { if settings.RampUp && ramper != nil { ramper.LogFinalStats() } + collector.EmitRunSummary(ctx) log.Printf("👋 Shutdown complete") return err } -func exportPrometheusMetrics(ctx context.Context, listenAddr string) error { - metricsExporter, err := prometheus.New(prometheus.WithNamespace("seiload")) - if err != nil { - return fmt.Errorf("failed to create Prometheus exporter: %w", err) - } - otel.SetMeterProvider(metric.NewMeterProvider(metric.WithReader(metricsExporter))) - go func() { - defer func() { _ = metricsExporter.Shutdown(ctx) }() - http.Handle("/metrics", promhttp.Handler()) - err := http.ListenAndServe(listenAddr, nil) - if err != nil { - log.Printf("failed to serve metrics: %v", err) - return - } - }() - return nil -} - // loadConfig reads and parses the configuration file func loadConfig(filename string) (*config.LoadConfig, error) { data, err := os.ReadFile(filename) diff --git a/observability/README.md b/observability/README.md new file mode 100644 index 0000000..7c974c2 --- /dev/null +++ b/observability/README.md @@ -0,0 +1,24 @@ +# observability + +OTel SDK bootstrap for sei-load. `Setup` installs MeterProvider + TracerProvider, Resource, Prometheus + optional OTLP exporters, and the W3C propagator. + +Full design: [`docs/designs/sei-load-observability.md`](https://github.com/sei-protocol/platform/blob/main/docs/designs/sei-load-observability.md) in `sei-protocol/platform`. + +## Invariants you won't want to break + +- **Run scope rides on the Resource, never per-sample labels.** `run_id`, `commit_id`, `chain_id`, `workload`, `service.instance.id` are process-lifetime constants. Putting them on every metric sample would multiply cardinality by (runs × endpoints × workers × buckets). Prometheus joins them in via `target_info{run_id=...}`. +- **`newSchemaless` merging with `resource.Default()`.** `resource.NewWithAttributes(semconv.SchemaURL, ...)` collides with `Default()`'s own schema URL and errors. Schemaless app attributes + schema-stamped Default is the supported pattern. +- **Shutdown providers before exporters.** `MeterProvider.Shutdown` / `TracerProvider.Shutdown` cascade a final flush into their readers and exporters. Explicit exporter shutdown before provider shutdown drops the last OTLP batch (PeriodicReader + BatchSpanProcessor buffer in memory). Prometheus pull-readers are immune but we keep the invariant uniform. +- **Composite propagator is not optional.** Without `SetTextMapPropagator(TraceContext + Baggage)`, `otelhttp` creates spans but silently omits `traceparent` on outbound requests. + +## Exemplar requirements + +Trace-ID exemplars on histograms need all three: + +1. OTel SDK ≥ v1.28 (we pin v1.43). +2. `promhttp.HandlerFor(DefaultGatherer, HandlerOpts{EnableOpenMetrics: true})` — the default `promhttp.Handler()` never negotiates OpenMetrics and silently strips exemplars regardless of the scraper's `Accept` header. +3. Prometheus server with `enableFeatures: [exemplar-storage]` (set in `clusters/harbor/monitoring/prometheus-operator.yaml`). + +## Cluster-of-seiload + +`service.instance.id` defaults to `os.Hostname()` when `SEILOAD_INSTANCE_ID` is unset, so multi-pod deployments disambiguate by pod name automatically. In Kubernetes, wire the env var via downward API (`fieldRef: metadata.name`) for a human-readable attribute. diff --git a/observability/setup.go b/observability/setup.go new file mode 100644 index 0000000..177368f --- /dev/null +++ b/observability/setup.go @@ -0,0 +1,178 @@ +// Package observability configures OpenTelemetry for sei-load. +// See README.md for invariants and exemplar requirements. +package observability + +import ( + "context" + "errors" + "fmt" + "os" + "strings" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/propagation" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" +) + +// RunScope identifies a single sei-load invocation. Values ride on the OTel +// Resource, not per-sample metric labels (see README cardinality rationale). +type RunScope struct { + ServiceVersion string // sei-load's own build version (distinct from CommitID, which names what's under test) + RunID string // e.g. GHA run id for autobake, benchmark job id elsewhere + ChainID string // ephemeral test chain for this run + CommitID string // sei-chain commit under test; exported as seiload.commit_id + 8-char seiload.commit_id_short + Workload string // "autobake" | "benchmark" | "loadtest"; alert rules match on this + InstanceID string // unique per process; falls back to hostname. Disambiguates cluster-of-seiload pods. +} + +// RunScopeFromEnv reads SEILOAD_*. Missing values stay empty. +func RunScopeFromEnv() RunScope { + return RunScope{ + ServiceVersion: os.Getenv("SEILOAD_SERVICE_VERSION"), + RunID: os.Getenv("SEILOAD_RUN_ID"), + ChainID: os.Getenv("SEILOAD_CHAIN_ID"), + CommitID: os.Getenv("SEILOAD_COMMIT_ID"), + Workload: os.Getenv("SEILOAD_WORKLOAD"), + InstanceID: os.Getenv("SEILOAD_INSTANCE_ID"), + } +} + +func shortCommit(full string) string { + if len(full) <= 8 { + return full + } + return full[:8] +} + +func buildResource(rs RunScope) (*resource.Resource, error) { + instanceID := rs.InstanceID + if instanceID == "" { + host, err := os.Hostname() + if err == nil { + instanceID = host + } + } + + attrs := []attribute.KeyValue{ + semconv.ServiceName("seiload"), + semconv.ServiceInstanceID(instanceID), + } + if rs.ServiceVersion != "" { + attrs = append(attrs, semconv.ServiceVersion(rs.ServiceVersion)) + } + if rs.RunID != "" { + attrs = append(attrs, attribute.String("seiload.run_id", rs.RunID)) + } + if rs.ChainID != "" { + attrs = append(attrs, attribute.String("seiload.chain_id", rs.ChainID)) + } + if rs.CommitID != "" { + attrs = append(attrs, + attribute.String("seiload.commit_id", rs.CommitID), + attribute.String("seiload.commit_id_short", shortCommit(rs.CommitID)), + ) + } + if rs.Workload != "" { + attrs = append(attrs, attribute.String("seiload.workload", rs.Workload)) + } + + // NewSchemaless avoids "conflicting Schema URL" on merge with Default(). + return resource.Merge( + resource.Default(), + resource.NewSchemaless(attrs...), + ) +} + +// Config knobs for Setup. Zero value is usable (Prometheus-only, no OTLP). +type Config struct { + RunScope RunScope + PrometheusNamespace string // defaults to "seiload" + OTLPEndpoint string // non-empty activates OTLP gRPC for metrics + traces +} + +// Setup installs MeterProvider, TracerProvider, W3C propagator, and returns a +// shutdown func. Call once from main before any telemetry emits. +func Setup(ctx context.Context, cfg Config) (shutdown func(context.Context) error, err error) { + res, err := buildResource(cfg.RunScope) + if err != nil { + return nil, fmt.Errorf("build resource: %w", err) + } + + ns := cfg.PrometheusNamespace + if ns == "" { + ns = "seiload" + } + promExporter, err := prometheus.New(prometheus.WithNamespace(ns)) + if err != nil { + return nil, fmt.Errorf("prometheus exporter: %w", err) + } + + meterOpts := []sdkmetric.Option{ + sdkmetric.WithResource(res), + sdkmetric.WithReader(promExporter), + } + tracerOpts := []sdktrace.TracerProviderOption{ + sdktrace.WithResource(res), + } + + if cfg.OTLPEndpoint != "" { + metricExp, mErr := otlpmetricgrpc.New(ctx, + otlpmetricgrpc.WithEndpoint(stripScheme(cfg.OTLPEndpoint)), + otlpmetricgrpc.WithInsecure(), + ) + if mErr != nil { + return nil, fmt.Errorf("otlp metric exporter: %w", mErr) + } + meterOpts = append(meterOpts, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp))) + + traceExp, tErr := otlptracegrpc.New(ctx, + otlptracegrpc.WithEndpoint(stripScheme(cfg.OTLPEndpoint)), + otlptracegrpc.WithInsecure(), + ) + if tErr != nil { + return nil, fmt.Errorf("otlp trace exporter: %w", tErr) + } + tracerOpts = append(tracerOpts, sdktrace.WithBatcher(traceExp)) + } + + mp := sdkmetric.NewMeterProvider(meterOpts...) + otel.SetMeterProvider(mp) + tp := sdktrace.NewTracerProvider(tracerOpts...) + otel.SetTracerProvider(tp) + + // Provider.Shutdown cascades flush into exporters; see README. + shutdowns := []func(context.Context) error{mp.Shutdown, tp.Shutdown} + + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + return func(shutdownCtx context.Context) error { + var errs []error + for _, fn := range shutdowns { + if sErr := fn(shutdownCtx); sErr != nil { + errs = append(errs, sErr) + } + } + return errors.Join(errs...) + }, nil +} + +// stripScheme trims http://, https://, grpc://, dns:/// so the value fits +// otlpmetricgrpc.WithEndpoint (which wants bare host:port). +func stripScheme(endpoint string) string { + for _, prefix := range []string{"http://", "https://", "grpc://", "dns:///"} { + if strings.HasPrefix(endpoint, prefix) { + return strings.TrimPrefix(endpoint, prefix) + } + } + return endpoint +} diff --git a/observability/setup_test.go b/observability/setup_test.go new file mode 100644 index 0000000..1c4b768 --- /dev/null +++ b/observability/setup_test.go @@ -0,0 +1,144 @@ +package observability + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/propagation" +) + +func TestShortCommit(t *testing.T) { + require.Equal(t, "", shortCommit("")) + require.Equal(t, "abc", shortCommit("abc")) + require.Equal(t, "abcdef12", shortCommit("abcdef1234567890")) + require.Equal(t, "12345678", shortCommit("12345678")) +} + +func TestRunScopeFromEnv(t *testing.T) { + t.Setenv("SEILOAD_RUN_ID", "run-42") + t.Setenv("SEILOAD_CHAIN_ID", "autobake-42-1") + t.Setenv("SEILOAD_COMMIT_ID", "deadbeefcafef00d") + t.Setenv("SEILOAD_WORKLOAD", "autobake") + t.Setenv("SEILOAD_INSTANCE_ID", "seiload-abc-0") + t.Setenv("SEILOAD_SERVICE_VERSION", "v1.2.3") + + rs := RunScopeFromEnv() + require.Equal(t, "run-42", rs.RunID) + require.Equal(t, "autobake-42-1", rs.ChainID) + require.Equal(t, "deadbeefcafef00d", rs.CommitID) + require.Equal(t, "autobake", rs.Workload) + require.Equal(t, "seiload-abc-0", rs.InstanceID) + require.Equal(t, "v1.2.3", rs.ServiceVersion) +} + +func TestBuildResource_FullScope(t *testing.T) { + rs := RunScope{ + ServiceVersion: "v1.2.3", + RunID: "run-42", + ChainID: "autobake-42-1", + CommitID: "deadbeefcafef00d", + Workload: "autobake", + InstanceID: "seiload-abc-0", + } + res, err := buildResource(rs) + require.NoError(t, err) + + want := map[string]string{ + "service.name": "seiload", + "service.version": "v1.2.3", + "service.instance.id": "seiload-abc-0", + "seiload.run_id": "run-42", + "seiload.chain_id": "autobake-42-1", + "seiload.commit_id": "deadbeefcafef00d", + "seiload.commit_id_short": "deadbeef", + "seiload.workload": "autobake", + } + got := resourceAttrs(res.Attributes()) + for k, v := range want { + require.Equal(t, v, got[k], "attr %q", k) + } +} + +func TestBuildResource_EmptyScope(t *testing.T) { + // Empty RunScope should still produce a Resource with service.name. + // service.instance.id falls back to os.Hostname(); just ensure it's not + // empty so downstream doesn't see a missing label. + res, err := buildResource(RunScope{}) + require.NoError(t, err) + got := resourceAttrs(res.Attributes()) + require.Equal(t, "seiload", got["service.name"]) + require.NotEmpty(t, got["service.instance.id"], "service.instance.id should fall back to hostname") + require.Empty(t, got["seiload.run_id"]) + require.Empty(t, got["seiload.commit_id_short"]) +} + +func TestSetup_PrometheusOnly(t *testing.T) { + // Ensure OTLP is NOT activated when endpoint is empty. + t.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "") // belt-and-suspenders + + ctx := context.Background() + shutdown, err := Setup(ctx, Config{ + RunScope: RunScope{RunID: "test-run"}, + PrometheusNamespace: "seiload_test", + }) + require.NoError(t, err) + t.Cleanup(func() { _ = shutdown(ctx) }) + + // Global providers should be populated (not NoOp). + require.NotNil(t, otel.GetMeterProvider()) + require.NotNil(t, otel.GetTracerProvider()) + + // Propagator should include TraceContext + Baggage fields. + fields := otel.GetTextMapPropagator().Fields() + require.Contains(t, fields, "traceparent", "traceparent field must be present for W3C trace propagation") + require.Contains(t, fields, "baggage", "baggage field must be present for cross-process attribute propagation") +} + +func TestSetup_PropagatorInstalled(t *testing.T) { + // After Setup, the global propagator should be a CompositeTextMapPropagator + // with TraceContext + Baggage — without this, otelhttp creates spans but + // never injects traceparent, and cross-process traces silently break. + ctx := context.Background() + shutdown, err := Setup(ctx, Config{}) + require.NoError(t, err) + t.Cleanup(func() { _ = shutdown(ctx) }) + + p := otel.GetTextMapPropagator() + require.NotNil(t, p) + + // TraceContext registers "traceparent" + "tracestate"; Baggage registers "baggage". + fields := p.Fields() + require.Contains(t, fields, "traceparent") + require.Contains(t, fields, "tracestate") + require.Contains(t, fields, "baggage") + + // Sanity check with a sample carrier: injection shouldn't panic. + p.Inject(ctx, propagation.MapCarrier{}) +} + +func TestStripScheme(t *testing.T) { + cases := map[string]string{ + "": "", + "otel-collector:4317": "otel-collector:4317", + "http://host:4317": "host:4317", + "https://host:4317": "host:4317", + "grpc://host:4317": "host:4317", + "dns:///host:4317": "host:4317", + "host:4317/path": "host:4317/path", + "grpc://already/stripped": "already/stripped", + } + for in, want := range cases { + require.Equal(t, want, stripScheme(in), "input=%q", in) + } +} + +func resourceAttrs(kvs []attribute.KeyValue) map[string]string { + out := make(map[string]string, len(kvs)) + for _, kv := range kvs { + out[string(kv.Key)] = kv.Value.Emit() + } + return out +} diff --git a/sender/metrics.go b/sender/metrics.go index 268faa0..ab9d39d 100644 --- a/sender/metrics.go +++ b/sender/metrics.go @@ -9,47 +9,75 @@ import ( "go.opentelemetry.io/otel/metric" ) +// Acquired at package init, before observability.Setup installs the real +// MeterProvider. Safe because OTel Go's global is a delegating provider: +// meters and instruments created against it forward to the real provider +// once SetMeterProvider is called. See go.opentelemetry.io/otel/internal/global. +var meter = otel.Meter("github.com/sei-protocol/sei-load/sender") + +// Synchronous instruments — read by Record/Add call sites. var ( - meter = otel.Meter("seiload/sender") + sendLatency = must(meter.Float64Histogram( + "send_latency", + metric.WithDescription("Latency of sending transactions in seconds"), + metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(0.1, 0.2, 0.3, 0.5, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0))) - meteredChainWorkers = &chainWorkerObserver{ - workers: make(map[chainWorkerID]*Worker), - } + receiptLatency = must(meter.Float64Histogram( + "receipt_latency", + metric.WithDescription("Latency from transaction submission to receipt confirmation in seconds"), + metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(0.1, 0.2, 0.3, 0.5, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0))) - metrics = struct { - sendLatency metric.Float64Histogram - receiptLatency metric.Float64Histogram - workerQueueLength metric.Int64ObservableGauge - }{ - sendLatency: must(meter.Float64Histogram( - "send_latency", - metric.WithDescription("Latency of sending transactions in seconds"), - metric.WithUnit("s"), - metric.WithExplicitBucketBoundaries(0.1, 0.2, 0.3, 0.5, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0))), - receiptLatency: must(meter.Float64Histogram( - "receipt_latency", - metric.WithDescription("Latency of sending transactions in seconds"), - metric.WithUnit("s"), - metric.WithExplicitBucketBoundaries(0.1, 0.2, 0.3, 0.5, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0))), - workerQueueLength: must(meter.Int64ObservableGauge( - "worker_queue_length", - metric.WithDescription("Length of the worker's queue"), - metric.WithUnit("{count}"), - metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { - meteredChainWorkers.lock.RLock() - defer meteredChainWorkers.lock.RUnlock() - for _, worker := range meteredChainWorkers.workers { - observer.Observe(int64(worker.GetChannelLength()), metric.WithAttributes( - attribute.String("endpoint", worker.GetEndpoint()), - attribute.Int("worker_id", worker.id), - attribute.String("chain_id", worker.seiChainID), - )) - } - return nil - }))), - } + httpErrors = must(meter.Int64Counter( + "http_errors", + metric.WithDescription("HTTP error responses from the target endpoint, by status code"), + metric.WithUnit("{errors}"))) + + txsAccepted = must(meter.Int64Counter( + "txs_accepted", + metric.WithDescription("Transactions successfully submitted to an endpoint"), + metric.WithUnit("{transactions}"))) + + txsRejected = must(meter.Int64Counter( + "txs_rejected", + metric.WithDescription("Transactions rejected by the target or local client, by reason"), + metric.WithUnit("{transactions}"))) ) +// Observable instruments — registered in init for their callback side effect. +// Return values are discarded because OTel invokes the callbacks on each +// collection; we never read the instrument handles. +func init() { + must(meter.Int64ObservableGauge( + "worker_queue_length", + metric.WithDescription("Length of the worker's queue"), + metric.WithUnit("{count}"), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + meteredChainWorkers.lock.RLock() + defer meteredChainWorkers.lock.RUnlock() + for _, worker := range meteredChainWorkers.workers { + observer.Observe(int64(worker.GetChannelLength()), metric.WithAttributes( + attribute.String("endpoint", worker.GetEndpoint()), + attribute.Int("worker_id", worker.id), + attribute.String("chain_id", worker.seiChainID), + )) + } + return nil + }))) + + must(meter.Float64ObservableGauge( + "tps_achieved", + metric.WithDescription("Most recent TPS sample observed by the sender, per endpoint/scenario"), + metric.WithUnit("{transactions}/s"), + metric.WithFloat64Callback(observeTPS))) +} + +// meteredChainWorkers is the registry the worker_queue_length callback reads. +var meteredChainWorkers = &chainWorkerObserver{ + workers: make(map[chainWorkerID]*Worker), +} + type chainWorkerObserver struct { lock sync.RWMutex workers map[chainWorkerID]*Worker @@ -71,6 +99,39 @@ func meterWorkerQueueLength(worker *Worker) { } } +var tpsObserverRegistry = struct { + lock sync.RWMutex + samples map[tpsSampleKey]float64 +}{ + samples: make(map[tpsSampleKey]float64), +} + +type tpsSampleKey struct { + endpoint string + chainID string + scenario string +} + +// RecordTPSSample publishes the latest TPS sample read by the tps_achieved gauge. +func RecordTPSSample(endpoint, chainID, scenario string, tps float64) { + tpsObserverRegistry.lock.Lock() + defer tpsObserverRegistry.lock.Unlock() + tpsObserverRegistry.samples[tpsSampleKey{endpoint, chainID, scenario}] = tps +} + +func observeTPS(_ context.Context, observer metric.Float64Observer) error { + tpsObserverRegistry.lock.RLock() + defer tpsObserverRegistry.lock.RUnlock() + for k, v := range tpsObserverRegistry.samples { + observer.Observe(v, metric.WithAttributes( + attribute.String("endpoint", k.endpoint), + attribute.String("chain_id", k.chainID), + attribute.String("scenario", k.scenario), + )) + } + return nil +} + func statusAttrFromError(err error) attribute.KeyValue { const key = "status" if err == nil { @@ -79,7 +140,6 @@ func statusAttrFromError(err error) attribute.KeyValue { return attribute.String(key, "failure") } -// must panics if err is non-nil, otherwise returns v. func must[V any](v V, err error) V { if err != nil { panic(err) diff --git a/sender/ramper.go b/sender/ramper.go index a207908..209384c 100644 --- a/sender/ramper.go +++ b/sender/ramper.go @@ -18,7 +18,7 @@ import ( // If we successfully pass a given TPS, we will pause for PauseTime, and then start the next step. // If we fail to pass a given TPS, we will stop the loadtest. -var ErrRampTestFailedSLO = errors.New("Ramp Test failed SLO") +var ErrRampTestFailedSLO = errors.New("ramp test failed SLO") func (r *Ramper) FormatRampStats() string { return fmt.Sprintf(` diff --git a/sender/worker.go b/sender/worker.go index d7a17fb..3c15862 100644 --- a/sender/worker.go +++ b/sender/worker.go @@ -13,8 +13,11 @@ import ( "github.com/ethereum/go-ethereum" "github.com/ethereum/go-ethereum/ethclient" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/trace" "golang.org/x/time/rate" "github.com/sei-protocol/sei-load/stats" @@ -23,6 +26,8 @@ import ( "github.com/sei-protocol/sei-load/utils/service" ) +var tracer = otel.Tracer("github.com/sei-protocol/sei-load/sender") + // Worker handles sending transactions to a specific endpoint type Worker struct { id int @@ -39,21 +44,49 @@ type Worker struct { limiter *rate.Limiter // Shared rate limiter for transaction sending } -func newHttpClient() *http.Client { +// HttpClientOption configures the Transport used by newHttpClient. +type HttpClientOption func(*http.Transport) + +// WithMaxIdleConns overrides the global idle-connection pool size. +func WithMaxIdleConns(n int) HttpClientOption { + return func(t *http.Transport) { t.MaxIdleConns = n } +} + +// WithMaxIdleConnsPerHost overrides the per-host idle-connection pool size. +// Scale with goroutine count to avoid TCP re-dial on each completion. +func WithMaxIdleConnsPerHost(n int) HttpClientOption { + return func(t *http.Transport) { t.MaxIdleConnsPerHost = n } +} + +// newHttpTransport is the base transport factory. Exists separately so tests +// can inspect the unwrapped *http.Transport; newHttpClient returns it wrapped +// in otelhttp, whose inner transport isn't publicly accessible. +func newHttpTransport(opts ...HttpClientOption) *http.Transport { + t := &http.Transport{ + DialContext: (&net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + MaxIdleConns: 500, + MaxIdleConnsPerHost: 50, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + DisableKeepAlives: false, + } + for _, opt := range opts { + opt(t) + } + return t +} + +// newHttpClient returns an otelhttp-wrapped client: injects traceparent on +// outbound, emits http.client.* metrics. Requires observability.Setup to have +// installed the global TextMapPropagator. +func newHttpClient(opts ...HttpClientOption) *http.Client { return &http.Client{ - Timeout: 30 * time.Second, - Transport: &http.Transport{ - DialContext: (&net.Dialer{ - Timeout: 10 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, - MaxIdleConns: 100, - MaxIdleConnsPerHost: 10, - IdleConnTimeout: 90 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - DisableKeepAlives: false, - }, + Timeout: 30 * time.Second, + Transport: otelhttp.NewTransport(newHttpTransport(opts...)), } } @@ -115,31 +148,51 @@ func (w *Worker) watchTransactions(ctx context.Context) error { if w.dryRun || !w.trackReceipts { return nil } - eth, err := ethclient.Dial(w.endpoint) + dialCtx, dialSpan := tracer.Start(ctx, "sender.dial_endpoint", trace.WithAttributes( + attribute.String("seiload.endpoint", w.endpoint), + attribute.String("seiload.chain_id", w.seiChainID), + attribute.Int("seiload.worker_id", w.id), + )) + eth, err := ethclient.DialContext(dialCtx, w.endpoint) if err != nil { + dialSpan.RecordError(err) + dialSpan.End() return fmt.Errorf("ethclient.Dial(%q): %w", w.endpoint, err) } + dialSpan.End() for ctx.Err() == nil { tx, err := utils.Recv(ctx, w.sentTxs) if err != nil { return err } - ctx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - if err := w.waitForReceipt(ctx, eth, tx); err != nil { + // Cancel per-iteration; defer would leak contexts under sustained load. + waitCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + if err := w.waitForReceipt(waitCtx, eth, tx); err != nil { log.Printf("❌ %v", err) } + cancel() } return ctx.Err() } func (w *Worker) waitForReceipt(ctx context.Context, eth *ethclient.Client, tx *types.LoadTx) (_err error) { + ctx, span := tracer.Start(ctx, "sender.check_receipt", trace.WithAttributes( + attribute.String("seiload.scenario", tx.Scenario.Name), + attribute.String("seiload.endpoint", w.endpoint), + attribute.Int("seiload.worker_id", w.id), + attribute.String("seiload.chain_id", w.seiChainID), + )) defer func(start time.Time) { - metrics.receiptLatency.Record(ctx, time.Since(start).Seconds(), + if _err != nil { + span.RecordError(_err) + } + span.End() + // Record inside the span ctx so exemplars link to the trace. + // worker_id stays off the histogram (cardinality); available via span. + receiptLatency.Record(ctx, time.Since(start).Seconds(), metric.WithAttributes( attribute.String("scenario", tx.Scenario.Name), attribute.String("endpoint", w.endpoint), - attribute.Int("worker_id", w.id), attribute.String("chain_id", w.seiChainID), statusAttrFromError(_err)), ) @@ -200,12 +253,22 @@ func (w *Worker) processTransactions(ctx context.Context, client *http.Client) e // sendTransaction sends a single transaction to the endpoint func (w *Worker) sendTransaction(ctx context.Context, client *http.Client, tx *types.LoadTx) (_err error) { + ctx, span := tracer.Start(ctx, "sender.send_tx", trace.WithAttributes( + attribute.String("seiload.scenario", tx.Scenario.Name), + attribute.String("seiload.endpoint", w.endpoint), + attribute.Int("seiload.worker_id", w.id), + attribute.String("seiload.chain_id", w.seiChainID), + )) defer func(start time.Time) { - metrics.sendLatency.Record(ctx, time.Since(start).Seconds(), + if _err != nil { + span.RecordError(_err) + } + span.End() + // See receiptLatency above re: span-context recording + no worker_id. + sendLatency.Record(ctx, time.Since(start).Seconds(), metric.WithAttributes( attribute.String("scenario", tx.Scenario.Name), attribute.String("endpoint", w.endpoint), - attribute.Int("worker_id", w.id), attribute.String("chain_id", w.seiChainID), statusAttrFromError(_err)), ) @@ -229,6 +292,11 @@ func (w *Worker) sendTransaction(ctx context.Context, client *http.Client, tx *t // Send the request resp, err := client.Do(req) if err != nil { + txsRejected.Add(ctx, 1, metric.WithAttributes( + attribute.String("endpoint", w.endpoint), + attribute.String("scenario", tx.Scenario.Name), + attribute.String("reason", "transport"), + )) return fmt.Errorf("Worker %d: Failed to send transaction: %w", w.id, err) } defer func() { @@ -247,9 +315,23 @@ func (w *Worker) sendTransaction(ctx context.Context, client *http.Client, tx *t // Check response status if resp.StatusCode != http.StatusOK { + httpErrors.Add(ctx, 1, metric.WithAttributes( + attribute.Int("status_code", resp.StatusCode), + attribute.String("endpoint", w.endpoint), + )) + txsRejected.Add(ctx, 1, metric.WithAttributes( + attribute.String("endpoint", w.endpoint), + attribute.String("scenario", tx.Scenario.Name), + attribute.String("reason", "http_status"), + )) return fmt.Errorf("Worker %d: HTTP error %d for transaction to %s", w.id, resp.StatusCode, w.endpoint) } + txsAccepted.Add(ctx, 1, metric.WithAttributes( + attribute.String("endpoint", w.endpoint), + attribute.String("scenario", tx.Scenario.Name), + )) + // Write to sentTxs channel without blocking select { case w.sentTxs <- tx: diff --git a/sender/worker_test.go b/sender/worker_test.go new file mode 100644 index 0000000..cc55ec3 --- /dev/null +++ b/sender/worker_test.go @@ -0,0 +1,50 @@ +package sender + +import ( + "net/http" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestNewHttpTransport_Defaults(t *testing.T) { + tr := newHttpTransport() + + require.Equal(t, 500, tr.MaxIdleConns) + require.Equal(t, 50, tr.MaxIdleConnsPerHost) + require.Equal(t, 90*time.Second, tr.IdleConnTimeout) + require.False(t, tr.DisableKeepAlives) +} + +func TestNewHttpTransport_WithMaxIdleConns(t *testing.T) { + tr := newHttpTransport(WithMaxIdleConns(2048)) + + require.Equal(t, 2048, tr.MaxIdleConns) + require.Equal(t, 50, tr.MaxIdleConnsPerHost, "per-host default preserved") +} + +func TestNewHttpTransport_WithMaxIdleConnsPerHost(t *testing.T) { + tr := newHttpTransport(WithMaxIdleConnsPerHost(1024)) + + require.Equal(t, 1024, tr.MaxIdleConnsPerHost) + require.Equal(t, 500, tr.MaxIdleConns, "global default preserved") +} + +func TestNewHttpTransport_MultipleOptions(t *testing.T) { + tr := newHttpTransport( + WithMaxIdleConns(4096), + WithMaxIdleConnsPerHost(1024), + ) + + require.Equal(t, 4096, tr.MaxIdleConns) + require.Equal(t, 1024, tr.MaxIdleConnsPerHost) +} + +func TestNewHttpClient_Smoke(t *testing.T) { + c := newHttpClient() + require.Equal(t, 30*time.Second, c.Timeout) + require.NotNil(t, c.Transport, "Transport must be set") + _, isBareTransport := c.Transport.(*http.Transport) + require.False(t, isBareTransport, "Transport should be wrapped by otelhttp, not bare *http.Transport") +} diff --git a/stats/block_collector.go b/stats/block_collector.go index 9feaa56..6a1794c 100644 --- a/stats/block_collector.go +++ b/stats/block_collector.go @@ -93,22 +93,22 @@ func (bc *BlockCollector) processNewBlock(header *types.Header) { for stats := range bc.stats.Lock() { now := time.Now() blockNum := header.Number.Uint64() - gasUsed := header.GasUsed - metrics.gasUsed.Record(context.Background(), int64(gasUsed), metric.WithAttributes(attribute.String("chain_id", bc.seiChainID))) + gas := header.GasUsed + gasUsed.Record(context.Background(), int64(gas), metric.WithAttributes(attribute.String("chain_id", bc.seiChainID))) // Update max block number if blockNum > stats.maxBlockNum { - metrics.blockNumber.Record(context.Background(), int64(blockNum), metric.WithAttributes(attribute.String("chain_id", bc.seiChainID))) + blockNumber.Record(context.Background(), int64(blockNum), metric.WithAttributes(attribute.String("chain_id", bc.seiChainID))) stats.maxBlockNum = blockNum } // Track gas used - stats.allGasUsed = append(stats.allGasUsed, gasUsed) - stats.windowGasUsed = append(stats.windowGasUsed, gasUsed) + stats.allGasUsed = append(stats.allGasUsed, gas) + stats.windowGasUsed = append(stats.windowGasUsed, gas) // Calculate time between blocks if !stats.lastBlockTime.IsZero() { timeBetween := now.Sub(stats.lastBlockTime) - metrics.blockTime.Record(context.Background(), timeBetween.Seconds(), metric.WithAttributes(attribute.String("chain_id", bc.seiChainID))) + blockTime.Record(context.Background(), timeBetween.Seconds(), metric.WithAttributes(attribute.String("chain_id", bc.seiChainID))) stats.allBlockTimes = append(stats.allBlockTimes, timeBetween) stats.windowBlockTimes = append(stats.windowBlockTimes, timeBetween) } diff --git a/stats/logger.go b/stats/logger.go index 207ce47..1f8479b 100644 --- a/stats/logger.go +++ b/stats/logger.go @@ -365,7 +365,7 @@ func (l *Logger) LogFinalStats() { log.Printf("Error creating report file: %v", err) return } - defer reportFile.Close() + defer func() { _ = reportFile.Close() }() _, err = reportFile.WriteString(finalStats.String()) if err != nil { log.Printf("Error writing report file: %v", err) diff --git a/stats/metrics.go b/stats/metrics.go index 931231d..984b3bf 100644 --- a/stats/metrics.go +++ b/stats/metrics.go @@ -5,32 +5,44 @@ import ( "go.opentelemetry.io/otel/metric" ) +// See sender/metrics.go for why package-level acquisition is safe before Setup. +var meter = otel.Meter("github.com/sei-protocol/sei-load/stats") + var ( - meter = otel.Meter("seiload/stats") - - metrics = struct { - gasUsed metric.Int64Histogram - blockNumber metric.Int64Gauge - blockTime metric.Float64Histogram - }{ - gasUsed: must(meter.Int64Histogram( - "gas_used", - metric.WithDescription("Gas used in transactions"), - metric.WithUnit("{gas}"), - metric.WithExplicitBucketBoundaries(1, 1000, 10_000, 50_000, 100_000, 200_000, 300_000, 400_000, 500_000, 600_000, 700_000, 800_000, 1_000_000))), - blockNumber: must(meter.Int64Gauge( - "block_number", - metric.WithDescription("Block number in the chain"), - metric.WithUnit("{height}"))), - blockTime: must(meter.Float64Histogram( - "block_time", - metric.WithDescription("Time taken to produce a block"), - metric.WithUnit("s"), - metric.WithExplicitBucketBoundaries(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0, 5.0, 10.0, 20.0))), - } + gasUsed = must(meter.Int64Histogram( + "gas_used", + metric.WithDescription("Gas used in transactions"), + metric.WithUnit("{gas}"), + metric.WithExplicitBucketBoundaries(1, 1000, 10_000, 50_000, 100_000, 200_000, 300_000, 400_000, 500_000, 600_000, 700_000, 800_000, 1_000_000))) + + blockNumber = must(meter.Int64Gauge( + "block_number", + metric.WithDescription("Block number in the chain"), + metric.WithUnit("{height}"))) + + blockTime = must(meter.Float64Histogram( + "block_time", + metric.WithDescription("Time taken to produce a block"), + metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0, 5.0, 10.0, 20.0))) + + // Run-summary: gauges emitted once at run end → 1 series/run via Resource join. + runTPSFinal = must(meter.Float64Gauge( + "run_tps_final", + metric.WithDescription("Final observed TPS for this run (emitted once at run end)"), + metric.WithUnit("{transactions}/s"))) + + runDurationSeconds = must(meter.Float64Gauge( + "run_duration_seconds", + metric.WithDescription("Wall-clock duration of this run (emitted once at run end)"), + metric.WithUnit("s"))) + + runTxsAcceptedTotal = must(meter.Int64Gauge( + "run_txs_accepted_total", + metric.WithDescription("Total transactions accepted by endpoints over this run (emitted once at run end)"), + metric.WithUnit("{transactions}"))) ) -// must panics if err is non-nil, otherwise returns v. func must[V any](v V, err error) V { if err != nil { panic(err) diff --git a/stats/run_summary.go b/stats/run_summary.go new file mode 100644 index 0000000..c8480df --- /dev/null +++ b/stats/run_summary.go @@ -0,0 +1,19 @@ +package stats + +import ( + "context" + "time" +) + +// EmitRunSummary records the run-summary gauges. Call once at shutdown. +func (c *Collector) EmitRunSummary(ctx context.Context) { + c.mu.RLock() + duration := time.Since(c.startTime) + totalTxs := c.totalTxs + finalTPS := c.overallTpsWindow.maxTPS + c.mu.RUnlock() + + runDurationSeconds.Record(ctx, duration.Seconds()) + runTPSFinal.Record(ctx, finalTPS) + runTxsAcceptedTotal.Record(ctx, int64(totalTxs)) +}