diff --git a/api/dbv1/get_track_ids_by_isrc.sql.go b/api/dbv1/get_track_ids_by_isrc.sql.go index 116a02e7..886df586 100644 --- a/api/dbv1/get_track_ids_by_isrc.sql.go +++ b/api/dbv1/get_track_ids_by_isrc.sql.go @@ -12,10 +12,13 @@ import ( const getTrackIdsByISRC = `-- name: GetTrackIdsByISRC :many SELECT track_id FROM tracks -WHERE isrc = ANY($1::text[]) +WHERE regexp_replace(upper(isrc), '[^A-Z0-9]', '', 'g') = ANY($1::text[]) AND access_authorities IS NULL ` +// Match ISRC ignoring dashes/whitespace/case so a stored value like +// "US-ANG-21-03742" matches a query of "USANG2103742" and vice versa. +// Inputs in @isrcs must already be normalized (uppercased, non-alphanumerics stripped). func (q *Queries) GetTrackIdsByISRC(ctx context.Context, isrcs []string) ([]int32, error) { rows, err := q.db.Query(ctx, getTrackIdsByISRC, isrcs) if err != nil { diff --git a/api/dbv1/queries/get_track_ids_by_isrc.sql b/api/dbv1/queries/get_track_ids_by_isrc.sql index f377e27d..ad989398 100644 --- a/api/dbv1/queries/get_track_ids_by_isrc.sql +++ b/api/dbv1/queries/get_track_ids_by_isrc.sql @@ -1,5 +1,8 @@ -- name: GetTrackIdsByISRC :many +-- Match ISRC ignoring dashes/whitespace/case so a stored value like +-- "US-ANG-21-03742" matches a query of "USANG2103742" and vice versa. +-- Inputs in @isrcs must already be normalized (uppercased, non-alphanumerics stripped). SELECT track_id FROM tracks -WHERE isrc = ANY(@isrcs::text[]) +WHERE regexp_replace(upper(isrc), '[^A-Z0-9]', '', 'g') = ANY(@isrcs::text[]) AND access_authorities IS NULL; diff --git a/api/v1_tracks.go b/api/v1_tracks.go index 862d2cee..e4be2103 100644 --- a/api/v1_tracks.go +++ b/api/v1_tracks.go @@ -1,6 +1,7 @@ package api import ( + "regexp" "strings" "api.audius.co/api/dbv1" @@ -8,6 +9,14 @@ import ( "github.com/gofiber/fiber/v2" ) +// ISRCs may be stored with or without dashes (e.g. "US-ANG-21-03742" vs +// "USANG2103742"); normalize so either form matches the same row. +var isrcNonAlphanum = regexp.MustCompile(`[^A-Z0-9]`) + +func normalizeISRC(s string) string { + return isrcNonAlphanum.ReplaceAllString(strings.ToUpper(s), "") +} + func (app *ApiServer) v1Tracks(c *fiber.Ctx) error { myId, _ := trashid.DecodeHashId(c.Query("user_id")) ids := decodeIdList(c) @@ -41,7 +50,11 @@ func (app *ApiServer) v1Tracks(c *fiber.Ctx) error { // Add ISRC ID mappings isrcs := queryMulti(c, "isrc") if len(isrcs) > 0 { - newIds, err := app.queries.GetTrackIdsByISRC(c.Context(), isrcs) + normalized := make([]string, len(isrcs)) + for i, isrc := range isrcs { + normalized[i] = normalizeISRC(isrc) + } + newIds, err := app.queries.GetTrackIdsByISRC(c.Context(), normalized) if err != nil { return err } diff --git a/api/v1_tracks_test.go b/api/v1_tracks_test.go index 8506da73..2a999f9a 100644 --- a/api/v1_tracks_test.go +++ b/api/v1_tracks_test.go @@ -5,6 +5,7 @@ import ( "testing" "api.audius.co/api/dbv1" + "api.audius.co/trashid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -36,6 +37,47 @@ func TestGetTracksByPermalink(t *testing.T) { }) } +func TestGetTracksByISRC(t *testing.T) { + app := testAppWithFixtures(t) + ctx := context.Background() + require.NotNil(t, app.writePool, "test requires write pool") + + // Track 100 ("T1") stored with dashes; track 101 ("T2") stored without. + _, err := app.writePool.Exec(ctx, + `UPDATE tracks SET isrc = 'US-ANG-21-03742' WHERE track_id = 100 AND is_current = true`) + require.NoError(t, err) + _, err = app.writePool.Exec(ctx, + `UPDATE tracks SET isrc = 'QMEU31610080' WHERE track_id = 101 AND is_current = true`) + require.NoError(t, err) + + track100Id := trashid.MustEncodeHashID(100) + track101Id := trashid.MustEncodeHashID(101) + + cases := []struct { + name string + query string + wantId string + }{ + {"stored-with-dashes, queried without", "USANG2103742", track100Id}, + {"stored-with-dashes, queried with same dashes", "US-ANG-21-03742", track100Id}, + {"stored-with-dashes, queried lowercased and undashed", "usang2103742", track100Id}, + {"stored-without-dashes, queried as-is", "QMEU31610080", track101Id}, + {"stored-without-dashes, queried with inserted dashes", "QM-EU3-16-10080", track101Id}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + var resp struct { + Data []dbv1.Track + } + status, body := testGet(t, app, "/v1/tracks?isrc="+tc.query, &resp) + require.Equal(t, 200, status, string(body)) + require.Len(t, resp.Data, 1, "expected exactly one track for %q: %s", tc.query, string(body)) + jsonAssert(t, body, map[string]any{"data.0.id": tc.wantId}) + }) + } +} + func TestGetTracksExcludesAccessAuthorities(t *testing.T) { app := testAppWithFixtures(t) ctx := context.Background() diff --git a/ddl/migrations/0202_tracks_isrc_normalized_idx.sql b/ddl/migrations/0202_tracks_isrc_normalized_idx.sql new file mode 100644 index 00000000..fb47ecaf --- /dev/null +++ b/ddl/migrations/0202_tracks_isrc_normalized_idx.sql @@ -0,0 +1,20 @@ +-- Functional index supporting GetTrackIdsByISRC. The handler normalizes the +-- incoming ?isrc= param (uppercase + strip non-alphanumerics) and the SQL +-- compares against regexp_replace(upper(isrc), '[^A-Z0-9]', '', 'g'), so a +-- plain btree on isrc cannot be used. This index materializes the same +-- normalized expression, allowing dash/case-insensitive ISRC lookups to be +-- index-scanned instead of seq-scanning ~all tracks. Partial on +-- isrc IS NOT NULL since the vast majority of rows have no ISRC. +-- +-- NOTE: intentionally NOT wrapped in BEGIN/COMMIT so CREATE INDEX +-- CONCURRENTLY can run without taking an ACCESS EXCLUSIVE lock on tracks. +-- IF NOT EXISTS makes the migration idempotent. + +create index concurrently if not exists tracks_isrc_normalized_idx + on public.tracks ( + (regexp_replace(upper(isrc), '[^A-Z0-9]'::text, ''::text, 'g')) + ) + where isrc is not null; + +comment on index public.tracks_isrc_normalized_idx is + 'Functional index supporting GetTrackIdsByISRC; matches regexp_replace(upper(isrc), ''[^A-Z0-9]'', '''', ''g'') so dash/case-insensitive ?isrc= lookups hit the index.';