diff --git a/docs/reference/schema.graphql b/docs/reference/schema.graphql index 2d98d3b..8d7aabf 100644 --- a/docs/reference/schema.graphql +++ b/docs/reference/schema.graphql @@ -3,6 +3,19 @@ type Agency { label: String! } +type AgencyPeriodMetrics { + period: String! + agencyKey: String! + agencyName: String + articleCount: Int! + avgSentimentScore: Float + pctPositive: Float + pctNegative: Float + avgReadabilityFlesch: Float + avgWordCount: Float + topThemes: [ThemeStats!]! +} + """Agency statistics with article count""" type AgencyStats { name: String! @@ -115,6 +128,14 @@ enum ArticleSort { VIEWS } +type ArticleSummary { + uniqueId: String! + title: String! + agencyName: String + publishedAt: String + trendingScore: Float +} + type ArticlesResult { articles: [Article!]! page: Int! @@ -246,6 +267,15 @@ input DeliveryChannelsInput { webhook: Boolean! = false } +type EntityCoveragePoint { + period: String! + agencyKey: String! + agencyName: String + articleCount: Int! + totalMentions: Int! + avgSentimentScore: Float +} + type EntityFacet { value: String! count: Int! @@ -253,6 +283,15 @@ type EntityFacet { label: String } +enum EntityKind { + ORG + PER + LOC + EVENT + POLICY + LAW +} + type EntityNetwork { nodes: [EntityNetworkNode!]! edges: [EntityNetworkEdge!]! @@ -283,6 +322,19 @@ type EntityNode { agencyKey: String } +type EntitySearchResult { + entityId: String! + canonicalName: String! + type: String! + description: String + wikidataUrl: String + agencyKey: String + aliases: [String!]! + articleCount: Int! + confidence: Float! + matchType: String! +} + type EntityType { text: String! type: String! @@ -322,6 +374,12 @@ type FollowedListing { followedAt: DateTime } +enum Granularity { + DAY + WEEK + MONTH +} + type IntegrityCandidateType { uniqueId: String! url: String! @@ -373,6 +431,13 @@ type MarketplaceRecorte { keywords: [String!]! } +enum MetricType { + VOLUME + SENTIMENT + READABILITY + THEMES +} + type Mutation { """Cria um novo clipping""" createClipping(input: ClippingInput!): Clipping! @@ -521,6 +586,12 @@ type Query { """Daily article counts for the given date range""" articlesTimeline(range: DateRange!): [DailyCount!]! + """Métricas de publicação por agência e período""" + agencyAnalytics(agencies: [String!]!, dateFrom: String!, dateTo: String!, granularity: Granularity! = MONTH, metrics: [MetricType!] = null): [AgencyPeriodMetrics!]! + + """Temas em crescimento comparando janela recente com baseline histórico""" + trendingThemes(windowDays: Int! = 7, baselineDays: Int! = 28, minArticles: Int! = 3, growthThreshold: Float! = 1.5, agencyKey: String = null, limit: Int! = 10): [TrendingThemeResult!]! + """ Lista todos os clippings do usuario autenticado (autorados + inscritos) """ @@ -624,6 +695,15 @@ type Query { Estima quantos artigos um recorte capturaria nas ultimas `sinceHours` horas. Replica `lib/estimate-recorte-count.ts`: filtro = themes OR-levels + agencies OR'd + published_at >= now-sinceHours; para keywords, conta por keyword (q em title,summary) e retorna o MAX; sem keywords, uma unica contagem. Substitui o mock `clippingEstimate`. PUBLICO. """ estimateRecorteCount(themes: [String!]!, agencies: [String!]!, keywords: [String!]!, sinceHours: Int! = 24): Int! + + """Série temporal de cobertura de uma entidade por agência""" + entityCoverage(entityId: String!, dateFrom: String = null, dateTo: String = null, granularity: Granularity! = MONTH): [EntityCoveragePoint!]! + + """Busca fuzzy de entidades por nome ou alias""" + entitySearch(query: String!, entityType: EntityKind = null, limit: Int! = 5): [EntitySearchResult!]! + + """Entidades NER com maior crescimento de cobertura (pré-computado)""" + trendingEntities(limit: Int! = 10): [TrendingEntityResult!]! } type Recorte { @@ -716,6 +796,26 @@ type ThemeStats { count: Int! } +type TrendingEntityResult { + entityId: String! + canonicalName: String! + type: String! + trendingScore: Float! + volumeRatio: Float! + windowCount: Int! + windowAgencies: Int! + computedAt: String +} + +type TrendingThemeResult { + themeLabel: String! + themeCode: String + windowCount: Int! + baselineDailyAvg: Float! + growthScore: Float! + topArticles: [ArticleSummary!]! +} + type TypesenseDocRecordType { uniqueId: String! title: String! diff --git a/docs/reference/schema.md b/docs/reference/schema.md index 0ed3207..98924da 100644 --- a/docs/reference/schema.md +++ b/docs/reference/schema.md @@ -14,6 +14,19 @@ type Agency { label: String! } +type AgencyPeriodMetrics { + period: String! + agencyKey: String! + agencyName: String + articleCount: Int! + avgSentimentScore: Float + pctPositive: Float + pctNegative: Float + avgReadabilityFlesch: Float + avgWordCount: Float + topThemes: [ThemeStats!]! +} + """Agency statistics with article count""" type AgencyStats { name: String! @@ -126,6 +139,14 @@ enum ArticleSort { VIEWS } +type ArticleSummary { + uniqueId: String! + title: String! + agencyName: String + publishedAt: String + trendingScore: Float +} + type ArticlesResult { articles: [Article!]! page: Int! @@ -257,6 +278,15 @@ input DeliveryChannelsInput { webhook: Boolean! = false } +type EntityCoveragePoint { + period: String! + agencyKey: String! + agencyName: String + articleCount: Int! + totalMentions: Int! + avgSentimentScore: Float +} + type EntityFacet { value: String! count: Int! @@ -264,6 +294,15 @@ type EntityFacet { label: String } +enum EntityKind { + ORG + PER + LOC + EVENT + POLICY + LAW +} + type EntityNetwork { nodes: [EntityNetworkNode!]! edges: [EntityNetworkEdge!]! @@ -294,6 +333,19 @@ type EntityNode { agencyKey: String } +type EntitySearchResult { + entityId: String! + canonicalName: String! + type: String! + description: String + wikidataUrl: String + agencyKey: String + aliases: [String!]! + articleCount: Int! + confidence: Float! + matchType: String! +} + type EntityType { text: String! type: String! @@ -333,6 +385,12 @@ type FollowedListing { followedAt: DateTime } +enum Granularity { + DAY + WEEK + MONTH +} + type IntegrityCandidateType { uniqueId: String! url: String! @@ -384,6 +442,13 @@ type MarketplaceRecorte { keywords: [String!]! } +enum MetricType { + VOLUME + SENTIMENT + READABILITY + THEMES +} + type Mutation { """Cria um novo clipping""" createClipping(input: ClippingInput!): Clipping! @@ -532,6 +597,12 @@ type Query { """Daily article counts for the given date range""" articlesTimeline(range: DateRange!): [DailyCount!]! + """Métricas de publicação por agência e período""" + agencyAnalytics(agencies: [String!]!, dateFrom: String!, dateTo: String!, granularity: Granularity! = MONTH, metrics: [MetricType!] = null): [AgencyPeriodMetrics!]! + + """Temas em crescimento comparando janela recente com baseline histórico""" + trendingThemes(windowDays: Int! = 7, baselineDays: Int! = 28, minArticles: Int! = 3, growthThreshold: Float! = 1.5, agencyKey: String = null, limit: Int! = 10): [TrendingThemeResult!]! + """ Lista todos os clippings do usuario autenticado (autorados + inscritos) """ @@ -635,6 +706,15 @@ type Query { Estima quantos artigos um recorte capturaria nas ultimas `sinceHours` horas. Replica `lib/estimate-recorte-count.ts`: filtro = themes OR-levels + agencies OR'd + published_at >= now-sinceHours; para keywords, conta por keyword (q em title,summary) e retorna o MAX; sem keywords, uma unica contagem. Substitui o mock `clippingEstimate`. PUBLICO. """ estimateRecorteCount(themes: [String!]!, agencies: [String!]!, keywords: [String!]!, sinceHours: Int! = 24): Int! + + """Série temporal de cobertura de uma entidade por agência""" + entityCoverage(entityId: String!, dateFrom: String = null, dateTo: String = null, granularity: Granularity! = MONTH): [EntityCoveragePoint!]! + + """Busca fuzzy de entidades por nome ou alias""" + entitySearch(query: String!, entityType: EntityKind = null, limit: Int! = 5): [EntitySearchResult!]! + + """Entidades NER com maior crescimento de cobertura (pré-computado)""" + trendingEntities(limit: Int! = 10): [TrendingEntityResult!]! } type Recorte { @@ -727,6 +807,26 @@ type ThemeStats { count: Int! } +type TrendingEntityResult { + entityId: String! + canonicalName: String! + type: String! + trendingScore: Float! + volumeRatio: Float! + windowCount: Int! + windowAgencies: Int! + computedAt: String +} + +type TrendingThemeResult { + themeLabel: String! + themeCode: String + windowCount: Int! + baselineDailyAvg: Float! + growthScore: Float! + topArticles: [ArticleSummary!]! +} + type TypesenseDocRecordType { uniqueId: String! title: String! diff --git a/src/graphql_api/datasources/postgres.py b/src/graphql_api/datasources/postgres.py index 0906d2c..b05115c 100644 --- a/src/graphql_api/datasources/postgres.py +++ b/src/graphql_api/datasources/postgres.py @@ -409,6 +409,16 @@ class TypesenseDocRecord(NewsRecord): """ +_TRENDING_ENTITIES_SQL = """ + SELECT entity_id, canonical_name, type, + trending_score, volume_ratio, window_count, window_agencies, + computed_at::text + FROM entity_trending_scores + ORDER BY trending_score DESC + LIMIT $1 +""" + + def _row_to_news_record(row: dict) -> NewsRecord: tags = row.get("tags") or [] if isinstance(tags, str): @@ -916,3 +926,9 @@ async def entity_search( row["aliases"] = [] results.append(row) return results + + async def get_trending_entities(self, limit: int = 10) -> list[dict]: + """Retorna entidades NER com maior trending score (pré-computado pelo DAG).""" + async with self._pool.acquire() as conn: + rows = await conn.fetch(_TRENDING_ENTITIES_SQL, limit) + return [dict(r) for r in rows] diff --git a/src/graphql_api/schema/resolvers/entities.py b/src/graphql_api/schema/resolvers/entities.py index 752771a..718e62c 100644 --- a/src/graphql_api/schema/resolvers/entities.py +++ b/src/graphql_api/schema/resolvers/entities.py @@ -8,6 +8,7 @@ EntityCoveragePoint, EntityKind, EntitySearchResult, + TrendingEntityResult, ) @@ -64,3 +65,25 @@ async def entity_search( ) for row in rows ] + + @strawberry.field(description="Entidades NER com maior crescimento de cobertura (pré-computado)") + async def trending_entities( + self, + info: Info, + limit: int = 10, + ) -> list[TrendingEntityResult]: + ds = info.context.postgres_ds + rows = await ds.get_trending_entities(min(limit, 50)) + return [ + TrendingEntityResult( + entity_id=row["entity_id"], + canonical_name=row.get("canonical_name") or "", + type=row.get("type") or "", + trending_score=float(row.get("trending_score") or 0.0), + volume_ratio=float(row.get("volume_ratio") or 0.0), + window_count=int(row.get("window_count") or 0), + window_agencies=int(row.get("window_agencies") or 0), + computed_at=row.get("computed_at"), + ) + for row in rows + ] diff --git a/src/graphql_api/schema/types/entities.py b/src/graphql_api/schema/types/entities.py index cc62556..b57b7ab 100644 --- a/src/graphql_api/schema/types/entities.py +++ b/src/graphql_api/schema/types/entities.py @@ -40,3 +40,15 @@ class EntitySearchResult: article_count: int confidence: float match_type: str + + +@strawberry.type +class TrendingEntityResult: + entity_id: str + canonical_name: str + type: str + trending_score: float + volume_ratio: float + window_count: int + window_agencies: int + computed_at: Optional[str] diff --git a/tests/datasources/test_trending_entities_datasource.py b/tests/datasources/test_trending_entities_datasource.py new file mode 100644 index 0000000..cacfceb --- /dev/null +++ b/tests/datasources/test_trending_entities_datasource.py @@ -0,0 +1,81 @@ +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from graphql_api.datasources.postgres import PostgresDatasource + + +def _make_mock_pool(fetch_results=None): + pool = MagicMock() + conn = AsyncMock() + conn.fetch = AsyncMock(side_effect=list(fetch_results or [[]])) + acm = AsyncMock() + acm.__aenter__ = AsyncMock(return_value=conn) + acm.__aexit__ = AsyncMock(return_value=False) + pool.acquire.return_value = acm + return pool, conn + + +def _rows(): + return [ + { + "entity_id": "Q12345", + "canonical_name": "Ministério da Saúde", + "type": "ORG", + "trending_score": 4.25, + "volume_ratio": 3.2, + "window_count": 12, + "window_agencies": 8, + "computed_at": "2026-06-23 10:00:00", + }, + { + "entity_id": "Q67890", + "canonical_name": "Ministério da Educação", + "type": "ORG", + "trending_score": 3.1, + "volume_ratio": 2.0, + "window_count": 7, + "window_agencies": 4, + "computed_at": "2026-06-23 10:00:00", + }, + ] + + +class TestGetTrendingEntities: + @pytest.mark.asyncio + async def test_retorna_lista_de_dicts(self): + pool, conn = _make_mock_pool([_rows()]) + ds = PostgresDatasource(pool) + result = await ds.get_trending_entities(10) + assert len(result) == 2 + assert all(isinstance(r, dict) for r in result) + + @pytest.mark.asyncio + async def test_campos_mapeados_corretamente(self): + pool, conn = _make_mock_pool([_rows()]) + ds = PostgresDatasource(pool) + result = await ds.get_trending_entities(10) + first = result[0] + assert first["entity_id"] == "Q12345" + assert first["canonical_name"] == "Ministério da Saúde" + assert first["type"] == "ORG" + assert first["trending_score"] == 4.25 + assert first["volume_ratio"] == 3.2 + assert first["window_count"] == 12 + assert first["window_agencies"] == 8 + assert first["computed_at"] == "2026-06-23 10:00:00" + + @pytest.mark.asyncio + async def test_retorna_vazio_quando_sem_dados(self): + pool, conn = _make_mock_pool([[]]) + ds = PostgresDatasource(pool) + result = await ds.get_trending_entities(10) + assert result == [] + + @pytest.mark.asyncio + async def test_usa_limit_parametro(self): + pool, conn = _make_mock_pool([[]]) + ds = PostgresDatasource(pool) + await ds.get_trending_entities(7) + args = conn.fetch.await_args.args + assert args[1] == 7 diff --git a/tests/resolvers/test_trending_entities.py b/tests/resolvers/test_trending_entities.py new file mode 100644 index 0000000..8aa5fd0 --- /dev/null +++ b/tests/resolvers/test_trending_entities.py @@ -0,0 +1,125 @@ +from unittest.mock import AsyncMock + +import pytest +import strawberry + +from graphql_api.context import GraphQLContext +from graphql_api.schema.resolvers.entities import EntityQuery +from graphql_api.schema.resolvers.health import HealthQuery + + +@strawberry.type +class _Query(HealthQuery, EntityQuery): + pass + + +test_schema = strawberry.Schema(query=_Query) + + +def _make_ctx(postgres_ds=None): + return GraphQLContext(postgres_ds=postgres_ds) + + +_FULL_QUERY = """ +query { + trendingEntities(limit: 10) { + entityId + canonicalName + type + trendingScore + volumeRatio + windowCount + windowAgencies + computedAt + } +} +""" + + +def _row(): + return { + "entity_id": "Q12345", + "canonical_name": "Ministério da Saúde", + "type": "ORG", + "trending_score": 4.25, + "volume_ratio": 3.2, + "window_count": 12, + "window_agencies": 8, + "computed_at": "2026-06-23 10:00:00", + } + + +class TestTrendingEntities: + @pytest.mark.asyncio + async def test_retorna_lista_com_campos_corretos(self): + mock_pg = AsyncMock() + mock_pg.get_trending_entities = AsyncMock(return_value=[_row()]) + result = await test_schema.execute(_FULL_QUERY, context_value=_make_ctx(postgres_ds=mock_pg)) + assert result.errors is None + items = result.data["trendingEntities"] + assert len(items) == 1 + item = items[0] + assert item["entityId"] == "Q12345" + assert item["canonicalName"] == "Ministério da Saúde" + assert item["type"] == "ORG" + assert item["trendingScore"] == 4.25 + assert item["volumeRatio"] == 3.2 + assert item["windowCount"] == 12 + assert item["windowAgencies"] == 8 + assert item["computedAt"] == "2026-06-23 10:00:00" + + @pytest.mark.asyncio + async def test_limit_clampado_a_50(self): + mock_pg = AsyncMock() + mock_pg.get_trending_entities = AsyncMock(return_value=[]) + query = "{ trendingEntities(limit: 100) { entityId } }" + result = await test_schema.execute(query, context_value=_make_ctx(postgres_ds=mock_pg)) + assert result.errors is None + mock_pg.get_trending_entities.assert_awaited_once_with(50) + + @pytest.mark.asyncio + async def test_datasource_chamado_com_limit_correto(self): + mock_pg = AsyncMock() + mock_pg.get_trending_entities = AsyncMock(return_value=[]) + query = "{ trendingEntities(limit: 5) { entityId } }" + result = await test_schema.execute(query, context_value=_make_ctx(postgres_ds=mock_pg)) + assert result.errors is None + mock_pg.get_trending_entities.assert_awaited_once_with(5) + + @pytest.mark.asyncio + async def test_retorna_lista_vazia(self): + mock_pg = AsyncMock() + mock_pg.get_trending_entities = AsyncMock(return_value=[]) + query = "{ trendingEntities(limit: 10) { entityId } }" + result = await test_schema.execute(query, context_value=_make_ctx(postgres_ds=mock_pg)) + assert result.errors is None + assert result.data["trendingEntities"] == [] + + @pytest.mark.asyncio + async def test_campos_nulos_sao_tratados_com_fallback(self): + mock_pg = AsyncMock() + mock_pg.get_trending_entities = AsyncMock( + return_value=[ + { + "entity_id": "Q999", + "canonical_name": None, + "type": None, + "trending_score": None, + "volume_ratio": None, + "window_count": None, + "window_agencies": None, + "computed_at": None, + } + ] + ) + result = await test_schema.execute(_FULL_QUERY, context_value=_make_ctx(postgres_ds=mock_pg)) + assert result.errors is None + item = result.data["trendingEntities"][0] + assert item["entityId"] == "Q999" + assert item["canonicalName"] == "" + assert item["type"] == "" + assert item["trendingScore"] == 0.0 + assert item["volumeRatio"] == 0.0 + assert item["windowCount"] == 0 + assert item["windowAgencies"] == 0 + assert item["computedAt"] is None