Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ The easiest way to get data as a pandas DataFrame:
from pysus import sinan, sinasc, sim, sih, sia, pni, ibge, cnes, ciha

# Download SINAN Dengue data for 2024
df = sinan(disease="deng", year=2024)
df = sinan(disease="deng", year=2000)

# Multiple years
df = sinan(disease="deng", year=[2023, 2024])
Expand Down Expand Up @@ -81,7 +81,7 @@ async def main():
df = pysus.read_parquet(paths, mode="union").df()
```

### Using the TUI
### Using the TUI (unstable/under testing)

Launch the interactive text-based interface:

Expand Down
1 change: 1 addition & 0 deletions pysus/api/_impl/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def _query():
"name": r.path.split("/")[-1],
"path": r.path,
"dataset": r.dataset.name if r.dataset else None,
"group": r.group.name if r.group else None,
"year": r.year,
"month": r.month,
"state": r.state,
Expand Down
50 changes: 46 additions & 4 deletions pysus/api/ducklake/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,42 @@
from sqlalchemy.pool import StaticPool

from .catalog import CatalogDataset, CatalogFile, DatasetGroup
from .models import Dataset, File
from .models import DuckDataset, File


class CatalogDatasetAdapter:
def __init__(self, catalog_dataset: CatalogDataset, ducklake):
self.name = catalog_dataset.name
self.long_name = catalog_dataset.long_name or ""
self.description = catalog_dataset.description or ""
self.group_definitions: dict[str, str] = {}
self.ducklake = ducklake
self.client = ducklake

@property
def content(self):
return self.ducklake.query(dataset=self.name.upper())


class DatasetGroupAdapter:
def __init__(self, dataset_group: DatasetGroup, dataset):
self.name = dataset_group.name
self.long_name = dataset_group.long_name or ""
self.description = dataset_group.description or ""
self.dataset = dataset

def __str__(self):
return self.name

@property
async def files(self):
return []

async def _fetch_files(self):
return []

async def search(self, **kwargs):
return []


class DuckLakeCredentials(BaseModel):
Expand Down Expand Up @@ -66,7 +101,7 @@ def _catalog_url(self) -> str:
def _is_authenticated(self) -> bool:
return self.credentials is not None

async def datasets(self, **kwargs) -> list[Dataset]:
async def datasets(self, **kwargs) -> list[DuckDataset]:
if not self._Session:
await self.connect()

Expand All @@ -86,7 +121,7 @@ def _fetch():
return results

records = await to_thread.run_sync(_fetch)
return [Dataset(record=rec, client=self) for rec in records]
return [DuckDataset(record=rec, client=self) for rec in records]

async def login(
self,
Expand Down Expand Up @@ -300,6 +335,13 @@ def _query():

records = await to_thread.run_sync(_query)
return [
File(path=r.path, record=r, dataset=r.dataset, group=r.group)
File(
path=r.path,
record=r,
dataset=CatalogDatasetAdapter(r.dataset, self),
group=(
DatasetGroupAdapter(r.group, r.dataset) if r.group else None
),
)
for r in records
]
85 changes: 44 additions & 41 deletions pysus/api/ducklake/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections.abc import Callable
from datetime import datetime
from pathlib import Path
from typing import Any, Union

import anyio
from pydantic import Field
Expand All @@ -19,6 +20,8 @@
class File(BaseRemoteFile):
record: CatalogFile = Field(exclude=True)
type: str = "remote"
dataset: Any
group: Any = None

@property
def basename(self) -> str:
Expand Down Expand Up @@ -51,6 +54,7 @@ async def _download(
) -> Path:
if not output:
output = CACHEPATH / self.name

return await self.client._download_file(
self,
output,
Expand All @@ -72,41 +76,7 @@ def _calculate():
return actual_hash == self.sha256


class Group(BaseRemoteGroup):
record: DatasetGroup = Field(exclude=True)
dataset: "Dataset" = Field(exclude=True)

@property
def name(self) -> str:
return self.record.name

@property
def long_name(self) -> str:
return (
self.record.group_metadata.long_name
if self.record.group_metadata
else self.name
)

@property
def description(self) -> str:
if self.record.group_metadata:
return self.record.group_metadata.description
return ""

async def _fetch_files(self) -> list[BaseRemoteFile]:
return [
File(
path=f.path,
record=f,
group=self,
dataset=self.dataset,
)
for f in self.record.files
]


class Dataset(BaseRemoteDataset):
class DuckDataset(BaseRemoteDataset):
record: CatalogDataset = Field(exclude=True)
client: BaseRemoteClient = Field(exclude=True)

Expand All @@ -133,14 +103,12 @@ def description(self) -> str:
else ""
)

async def _fetch_content(
self,
) -> list[Group | File]:
items: list[Group | File] = []
async def _fetch_content(self) -> list[Union["DuckGroup", File]]:
items: list[Union["DuckGroup", File]] = []

if self.record.groups:
items.extend(
[Group(record=g, dataset=self) for g in self.record.groups],
[DuckGroup(record=g, dataset=self) for g in self.record.groups]
)

if self.record.files:
Expand All @@ -152,7 +120,42 @@ async def _fetch_content(
dataset=self,
)
for f in self.record.files
],
]
)

return items


class DuckGroup(BaseRemoteGroup):
record: DatasetGroup = Field(exclude=True)
dataset: DuckDataset = Field(exclude=True)

@property
def name(self) -> str:
return self.record.name

@property
def long_name(self) -> str:
return (
self.record.group_metadata.long_name
if self.record.group_metadata
else self.name
)

@property
def description(self) -> str:
if self.record.group_metadata:
return self.record.group_metadata.description
return ""

async def _fetch_files(self) -> list[BaseRemoteFile]:
files: list[BaseRemoteFile] = [
File(
path=f.path,
record=f,
group=self,
dataset=self.dataset,
)
for f in self.record.files
]
return files
49 changes: 46 additions & 3 deletions pysus/api/ftp/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,55 @@ class SINAN(Dataset):
]

group_definitions: dict[str, str] = {
"DENG": "Dengue",
"ZIKA": "Zika Vírus",
"ACBI": "Acidente de trabalho com material biológico",
"ACGR": "Acidente de trabalho",
"ANIM": "Acidente por Animais Peçonhentos",
"ANTR": "Atendimento Antirrabico",
"BOTU": "Botulismo",
"CANC": "Cancêr relacionado ao trabalho",
"CHAG": "Doença de Chagas Aguda",
"CHIK": "Febre de Chikungunya",
"COLE": "Cólera",
"COQU": "Coqueluche",
"DENG": "Dengue",
"DERM": "Dermatoses ocupacionais",
"DIFT": "Difteria",
"ESQU": "Esquistossomose",
"EXAN": "Doença exantemáticas",
"FMAC": "Febre Maculosa",
"FTIF": "Febre Tifóide",
"HANS": "Hanseníase",
"HANT": "Hantavirose",
"HEPA": "Hepatites Virais",
"IEXO": "Intoxicação Exógena",
"INFL": "Influenza Pandêmica",
"LEIV": "Leishmaniose Visceral",
"LEPT": "Leptospirose",
"LERD": "LER/Dort",
"LTAN": "Leishmaniose Tegumentar Americana",
"MALA": "Malária",
"MENI": "Meningite",
"MENT": "Transtornos mentais relacionados ao trabalho",
"NTRA": "Notificação de Tracoma",
"PAIR": "Perda auditiva por ruído relacionado ao trabalho",
"PEST": "Peste",
"PFAN": "Paralisia Flácida Aguda",
"PNEU": "Pneumoconioses realacionadas ao trabalho",
"RAIV": "Raiva",
"SDTA": "Surto Doenças Transmitidas por Alimentos",
"SIFA": "Sífilis Adquirida",
"SIFC": "Sífilis Congênita",
"SIFG": "Sífilis em Gestante",
"SRC": "Síndrome da Rubéola Congênia",
"TETA": "Tétano Acidental",
"TETN": "Tétano Neonatal",
"TOXC": "Toxoplasmose Congênita",
"TOXG": "Toxoplasmose Gestacional",
"TRAC": "Inquérito de Tracoma",
"TUBE": "Tuberculose",
"ANIM": "Acidente por Animais Peçonhentos",
"VARC": "Varicela",
"VIOL": "Violência doméstica, sexual e/ou outras violências",
"ZIKA": "Zika Vírus",
}

@property
Expand Down
1 change: 1 addition & 0 deletions pysus/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ async def search(self, **kwargs) -> list[BaseRemoteFile]:

class BaseRemoteDataset(BaseRemoteObject, SearchableMixin, ABC):
client: BaseRemoteClient = Field(exclude=True)
group_definitions: dict[str, str] = {}
_content: Sequence[BaseRemoteGroup | BaseRemoteFile] | None = PrivateAttr(
default=None
)
Expand Down
Loading
Loading