diff --git a/baseten/client/_inference.py b/baseten/client/_inference.py index 350338d..d07b669 100644 --- a/baseten/client/_inference.py +++ b/baseten/client/_inference.py @@ -127,7 +127,7 @@ def __init__( request_headers: dict[str, str] = {**(headers or {})} # Empty api_key is an advanced opt-out from sending Authorization. if api_key != "": - request_headers["Authorization"] = f"Api-Key {api_key}" + request_headers["Authorization"] = f"Bearer {api_key}" self._http_client = httpx.Client( base_url=self._options.base_url, headers=with_user_agent(request_headers), @@ -242,7 +242,7 @@ def __init__( request_headers: dict[str, str] = {**(headers or {})} # Empty api_key is an advanced opt-out from sending Authorization. if api_key != "": - request_headers["Authorization"] = f"Api-Key {api_key}" + request_headers["Authorization"] = f"Bearer {api_key}" self._http_client = httpx.AsyncClient( base_url=self._options.base_url, headers=with_user_agent(request_headers), diff --git a/baseten/client/_management.py b/baseten/client/_management.py index e6a7013..a3cd37c 100644 --- a/baseten/client/_management.py +++ b/baseten/client/_management.py @@ -78,7 +78,7 @@ def __init__( request_headers: dict[str, str] = {**(headers or {})} # Empty api_key is an advanced opt-out from sending Authorization. if api_key != "": - request_headers["Authorization"] = f"Api-Key {api_key}" + request_headers["Authorization"] = f"Bearer {api_key}" self._http_client = httpx.Client( base_url=self._options.base_url, headers=with_user_agent(request_headers), @@ -171,7 +171,7 @@ def __init__( request_headers: dict[str, str] = {**(headers or {})} # Empty api_key is an advanced opt-out from sending Authorization. if api_key != "": - request_headers["Authorization"] = f"Api-Key {api_key}" + request_headers["Authorization"] = f"Bearer {api_key}" self._http_client = httpx.AsyncClient( base_url=self._options.base_url, headers=with_user_agent(request_headers), diff --git a/baseten/client/managementapi/__init__.py b/baseten/client/managementapi/__init__.py index 9af74bd..499f059 100644 --- a/baseten/client/managementapi/__init__.py +++ b/baseten/client/managementapi/__init__.py @@ -42,17 +42,36 @@ ChainletEnvironmentSettingsRequest, Chains, CheckpointFile, + CheckpointSearchRequest, + CheckpointSearchResponse, CheckpointSyncStatus, Checkpoints, CreateAPIKeyRequest, + CreateApiKeyForGroupRequest, + CreateApiKeyForGroupResponse, CreateChainEnvironmentRequest, CreateEnvironmentRequest, + CreateGroupRequest, CreateJobWeightConfig, CreateLLMModelRequest, CreateLLMModelVersionRequest, CreateLibraryListingRequest, CreateLibraryListingVersionRequest, + CreateLoopsRunRequest, + CreateLoopsRunResponse, + CreateLoopsSamplerRequest, + CreateLoopsSamplerResponse, + CreateLoopsSessionRequest, + CreateLoopsSessionResponse, + CreateModelDeploymentRequest, + CreateModelRequest, CreateModelWeightSnapshotRequest, + CreateSamplingServerRequest, + CreateSamplingServerResponse, + CreateTrainerServerRequest, + CreateTrainerServerResponse, + CreateTrainerSessionRequest, + CreateTrainerSessionResponse, CreateTrainingJob, CreateTrainingJobAccelerator, CreateTrainingJobCacheConfig, @@ -63,30 +82,54 @@ CreateTrainingJobResponse, CreateTrainingJobRuntime, CreateTrainingJobS3Artifact, + CreatedModelDeployment, CreditsUsed, DailyDedicatedUsage, DailyModelApiUsage, DailyTrainingUsage, + DeactivateLoopsDeploymentResponse, DeactivateResponse, DedicatedItem, DedicatedUsage, Deployment, + DeploymentArchivePayload, + DeploymentArchiveSource, + DeploymentConfigOutputFormat, + DeploymentConfigResponse, DeploymentStatus, DeploymentTombstone, Deployments, DockerAuth, DockerAuthType, + DownloadDeploymentResponse, DownloadTrainingJobResponse, + EffectiveModelConfig, + EffectiveRateLimit, + EffectiveUsageLimit, Environment, Environments, FileSummary, + GatewayKeyInfo, GcpOidcDockerAuth, GcpServiceAccountJsonDockerAuth, GetAuthCodesResponse, GetBlobCredentialsResponse, GetCacheSummaryResponse, + GetDeploymentConfigRequest, GetDeploymentLogsRequest, GetLogsResponse, + GetLoopsCapabilitiesResponse, + GetLoopsDeploymentLogsRequest, + GetLoopsDeploymentMetricsRequest, + GetLoopsDeploymentMetricsResponse, + GetLoopsDeploymentResponse, + GetLoopsRunResponse, + GetLoopsSamplerResponse, + GetLoopsSessionResponse, + GetTrainerServerCheckpointFilesResponse, + GetTrainerServerCheckpointsResponse, + GetTrainingGpuCapacityResponse, + GetTrainingJobCheckpointFilesRequest, GetTrainingJobCheckpointFilesResponse, GetTrainingJobCheckpointsResponse, GetTrainingJobLogsRequest, @@ -95,52 +138,96 @@ GetTrainingJobResponse, GetTrainingProjectResponse, GitInfo, + Group, + GroupHierarchy, + GroupMetadata, + GroupsResponse, InProgressPromotion, InProgressPromotionStatus, + InferenceVolumeByStatusDatapoint, InstanceType, InstanceTypePrices, InstanceTypeWithPrice, InstanceTypes, InteractiveSession, InteractiveSessionConfig, - LLMModel, - LLMModelVersion, + KeysForGroupResponse, + LLMModelHandle, LibraryListing, + LibraryListingSource, LibraryListingTombstone, LibraryListingVersion, LibraryListingVersionTombstone, LibraryListingVersions, LibraryListings, Limit, + LimitEnforcement, + LimitType, + ListLoopsCheckpointsQueryParams, + ListLoopsCheckpointsResponse, + ListLoopsDeploymentsResponse, + ListLoopsRunsQueryParams, + ListLoopsRunsResponse, + ListLoopsSamplersResponse, ListTrainingJobsResponse, ListTrainingProjectsResponse, LoadCheckpointConfig, Log, + LoopsCheckpoint, + LoopsCheckpointFilesResponse, + LoopsDeployment, + LoopsDeploymentMetrics, + LoopsDeploymentNodeMetrics, + LoopsDeploymentStatus, + LoopsRun, + LoopsSampler, + LoopsSamplerStatus, + LoopsSession, + MaxScaleDownRate, Model, ModelApiItem, ModelApisUsage, + ModelArchiveSource, + ModelConfig, ModelTombstone, ModelWeightSnapshot, Models, + Name, + Name1, OrderBy, + PaginationResponse, PatchInteractiveSessionRequest, PatchInteractiveSessionResponse, + PrepareModelUploadRequest, + PrepareModelUploadResponse, PromoteRequest, PromoteToChainEnvironmentRequest, PromoteToEnvironmentRequest, PromotionCleanupStrategy, PromotionSettings, + RateLimit, + RateLimitUnit, RecreateTrainingJobResponse, + RegisterAPIKeyRequest, + RegisterAPIKeyResponse, RegistrySecretDockerAuth, ResourceKind, + ResponseTimeDatapoint, RetryDeploymentResponse, RollingDeployConfig, RollingDeployStrategy, + SamplingServer, + SearchTrainersRequest, + SearchTrainersResponse, SearchTrainingJobsRequest, SearchTrainingJobsResponse, + SearchedTrainer, Secret, SecretReference, + SecretTombstone, Secrets, + SignSSHCertificateRequest, + SignSSHCertificateResponse, SignalPromotionResponse, SortOrder, StopTrainingJobRequest, @@ -153,10 +240,16 @@ Subtotal6, Subtotal7, Subtotal8, + SupportedModel, Team, Teams, TerminateReplicaResponse, Total, + TrainerCheckpointTarget, + TrainerServer, + TrainerServerCheckpoint, + TrainerSession, + TrainingGpuCapacityItem, TrainingItem, TrainingJob, TrainingJobCheckpoint, @@ -178,6 +271,8 @@ UpdateChainletEnvironmentInstanceTypeRequest, UpdateChainletEnvironmentInstanceTypeResponse, UpdateEnvironmentRequest, + UpdateGroupMetadata, + UpdateGroupRequest, UpdateLibraryListingRequest, UpdateLibraryListingVersionRequest, UpdatePromotionSettings, @@ -186,11 +281,17 @@ UpsertTrainingProject, UpsertTrainingProjectRequest, UpsertTrainingProjectResponse, + UsageLimit, + UsageLimitUnit, UsageSummary, + UsageSummaryRequest, User, + UserInfo, V1InteractiveSessionAuthProvider, V1InteractiveSessionProvider, V1InteractiveSessionTrigger, + ValidateLoopsCheckpointRequest, + ValidateLoopsCheckpointResponse, ) __all__ = [ @@ -227,17 +328,36 @@ "ChainletEnvironmentSettingsRequest", "Chains", "CheckpointFile", + "CheckpointSearchRequest", + "CheckpointSearchResponse", "CheckpointSyncStatus", "Checkpoints", "CreateAPIKeyRequest", + "CreateApiKeyForGroupRequest", + "CreateApiKeyForGroupResponse", "CreateChainEnvironmentRequest", "CreateEnvironmentRequest", + "CreateGroupRequest", "CreateJobWeightConfig", "CreateLLMModelRequest", "CreateLLMModelVersionRequest", "CreateLibraryListingRequest", "CreateLibraryListingVersionRequest", + "CreateLoopsRunRequest", + "CreateLoopsRunResponse", + "CreateLoopsSamplerRequest", + "CreateLoopsSamplerResponse", + "CreateLoopsSessionRequest", + "CreateLoopsSessionResponse", + "CreateModelDeploymentRequest", + "CreateModelRequest", "CreateModelWeightSnapshotRequest", + "CreateSamplingServerRequest", + "CreateSamplingServerResponse", + "CreateTrainerServerRequest", + "CreateTrainerServerResponse", + "CreateTrainerSessionRequest", + "CreateTrainerSessionResponse", "CreateTrainingJob", "CreateTrainingJobAccelerator", "CreateTrainingJobCacheConfig", @@ -248,30 +368,54 @@ "CreateTrainingJobResponse", "CreateTrainingJobRuntime", "CreateTrainingJobS3Artifact", + "CreatedModelDeployment", "CreditsUsed", "DailyDedicatedUsage", "DailyModelApiUsage", "DailyTrainingUsage", + "DeactivateLoopsDeploymentResponse", "DeactivateResponse", "DedicatedItem", "DedicatedUsage", "Deployment", + "DeploymentArchivePayload", + "DeploymentArchiveSource", + "DeploymentConfigOutputFormat", + "DeploymentConfigResponse", "DeploymentStatus", "DeploymentTombstone", "Deployments", "DockerAuth", "DockerAuthType", + "DownloadDeploymentResponse", "DownloadTrainingJobResponse", + "EffectiveModelConfig", + "EffectiveRateLimit", + "EffectiveUsageLimit", "Environment", "Environments", "FileSummary", + "GatewayKeyInfo", "GcpOidcDockerAuth", "GcpServiceAccountJsonDockerAuth", "GetAuthCodesResponse", "GetBlobCredentialsResponse", "GetCacheSummaryResponse", + "GetDeploymentConfigRequest", "GetDeploymentLogsRequest", "GetLogsResponse", + "GetLoopsCapabilitiesResponse", + "GetLoopsDeploymentLogsRequest", + "GetLoopsDeploymentMetricsRequest", + "GetLoopsDeploymentMetricsResponse", + "GetLoopsDeploymentResponse", + "GetLoopsRunResponse", + "GetLoopsSamplerResponse", + "GetLoopsSessionResponse", + "GetTrainerServerCheckpointFilesResponse", + "GetTrainerServerCheckpointsResponse", + "GetTrainingGpuCapacityResponse", + "GetTrainingJobCheckpointFilesRequest", "GetTrainingJobCheckpointFilesResponse", "GetTrainingJobCheckpointsResponse", "GetTrainingJobLogsRequest", @@ -280,52 +424,96 @@ "GetTrainingJobResponse", "GetTrainingProjectResponse", "GitInfo", + "Group", + "GroupHierarchy", + "GroupMetadata", + "GroupsResponse", "InProgressPromotion", "InProgressPromotionStatus", + "InferenceVolumeByStatusDatapoint", "InstanceType", "InstanceTypePrices", "InstanceTypeWithPrice", "InstanceTypes", "InteractiveSession", "InteractiveSessionConfig", - "LLMModel", - "LLMModelVersion", + "KeysForGroupResponse", + "LLMModelHandle", "LibraryListing", + "LibraryListingSource", "LibraryListingTombstone", "LibraryListingVersion", "LibraryListingVersionTombstone", "LibraryListingVersions", "LibraryListings", "Limit", + "LimitEnforcement", + "LimitType", + "ListLoopsCheckpointsQueryParams", + "ListLoopsCheckpointsResponse", + "ListLoopsDeploymentsResponse", + "ListLoopsRunsQueryParams", + "ListLoopsRunsResponse", + "ListLoopsSamplersResponse", "ListTrainingJobsResponse", "ListTrainingProjectsResponse", "LoadCheckpointConfig", "Log", + "LoopsCheckpoint", + "LoopsCheckpointFilesResponse", + "LoopsDeployment", + "LoopsDeploymentMetrics", + "LoopsDeploymentNodeMetrics", + "LoopsDeploymentStatus", + "LoopsRun", + "LoopsSampler", + "LoopsSamplerStatus", + "LoopsSession", + "MaxScaleDownRate", "Model", "ModelApiItem", "ModelApisUsage", + "ModelArchiveSource", + "ModelConfig", "ModelTombstone", "ModelWeightSnapshot", "Models", + "Name", + "Name1", "OrderBy", + "PaginationResponse", "PatchInteractiveSessionRequest", "PatchInteractiveSessionResponse", + "PrepareModelUploadRequest", + "PrepareModelUploadResponse", "PromoteRequest", "PromoteToChainEnvironmentRequest", "PromoteToEnvironmentRequest", "PromotionCleanupStrategy", "PromotionSettings", + "RateLimit", + "RateLimitUnit", "RecreateTrainingJobResponse", + "RegisterAPIKeyRequest", + "RegisterAPIKeyResponse", "RegistrySecretDockerAuth", "ResourceKind", + "ResponseTimeDatapoint", "RetryDeploymentResponse", "RollingDeployConfig", "RollingDeployStrategy", + "SamplingServer", + "SearchTrainersRequest", + "SearchTrainersResponse", "SearchTrainingJobsRequest", "SearchTrainingJobsResponse", + "SearchedTrainer", "Secret", "SecretReference", + "SecretTombstone", "Secrets", + "SignSSHCertificateRequest", + "SignSSHCertificateResponse", "SignalPromotionResponse", "SortOrder", "StopTrainingJobRequest", @@ -338,10 +526,16 @@ "Subtotal6", "Subtotal7", "Subtotal8", + "SupportedModel", "Team", "Teams", "TerminateReplicaResponse", "Total", + "TrainerCheckpointTarget", + "TrainerServer", + "TrainerServerCheckpoint", + "TrainerSession", + "TrainingGpuCapacityItem", "TrainingItem", "TrainingJob", "TrainingJobCheckpoint", @@ -363,6 +557,8 @@ "UpdateChainletEnvironmentInstanceTypeRequest", "UpdateChainletEnvironmentInstanceTypeResponse", "UpdateEnvironmentRequest", + "UpdateGroupMetadata", + "UpdateGroupRequest", "UpdateLibraryListingRequest", "UpdateLibraryListingVersionRequest", "UpdatePromotionSettings", @@ -371,9 +567,15 @@ "UpsertTrainingProject", "UpsertTrainingProjectRequest", "UpsertTrainingProjectResponse", + "UsageLimit", + "UsageLimitUnit", "UsageSummary", + "UsageSummaryRequest", "User", + "UserInfo", "V1InteractiveSessionAuthProvider", "V1InteractiveSessionProvider", "V1InteractiveSessionTrigger", + "ValidateLoopsCheckpointRequest", + "ValidateLoopsCheckpointResponse", ] diff --git a/baseten/client/managementapi/_client.py b/baseten/client/managementapi/_client.py index b21f9b7..d5692f9 100644 --- a/baseten/client/managementapi/_client.py +++ b/baseten/client/managementapi/_client.py @@ -22,28 +22,62 @@ ChainEnvironment, ChainTombstone, Chains, + CheckpointSearchRequest, + CheckpointSearchResponse, CreateAPIKeyRequest, + CreateApiKeyForGroupRequest, + CreateApiKeyForGroupResponse, CreateChainEnvironmentRequest, CreateEnvironmentRequest, + CreateGroupRequest, CreateLLMModelRequest, CreateLLMModelVersionRequest, CreateLibraryListingRequest, CreateLibraryListingVersionRequest, + CreateLoopsRunRequest, + CreateLoopsRunResponse, + CreateLoopsSamplerRequest, + CreateLoopsSamplerResponse, + CreateLoopsSessionRequest, + CreateLoopsSessionResponse, + CreateModelDeploymentRequest, + CreateModelRequest, CreateModelWeightSnapshotRequest, + CreateSamplingServerRequest, + CreateSamplingServerResponse, + CreateTrainerServerRequest, + CreateTrainerServerResponse, + CreateTrainerSessionRequest, + CreateTrainerSessionResponse, CreateTrainingJobRequest, CreateTrainingJobResponse, + CreatedModelDeployment, + DeactivateLoopsDeploymentResponse, DeactivateResponse, Deployment, + DeploymentConfigResponse, DeploymentTombstone, Deployments, + DownloadDeploymentResponse, DownloadTrainingJobResponse, Environment, Environments, + GatewayKeyInfo, GetAuthCodesResponse, GetBlobCredentialsResponse, GetCacheSummaryResponse, GetDeploymentLogsRequest, GetLogsResponse, + GetLoopsCapabilitiesResponse, + GetLoopsDeploymentMetricsRequest, + GetLoopsDeploymentMetricsResponse, + GetLoopsDeploymentResponse, + GetLoopsRunResponse, + GetLoopsSamplerResponse, + GetLoopsSessionResponse, + GetTrainerServerCheckpointFilesResponse, + GetTrainerServerCheckpointsResponse, + GetTrainingGpuCapacityResponse, GetTrainingJobCheckpointFilesResponse, GetTrainingJobCheckpointsResponse, GetTrainingJobLogsRequest, @@ -51,33 +85,49 @@ GetTrainingJobMetricsResponse, GetTrainingJobResponse, GetTrainingProjectResponse, + Group, + GroupsResponse, InstanceTypePrices, InstanceTypes, - LLMModel, - LLMModelVersion, + KeysForGroupResponse, + LLMModelHandle, LibraryListing, LibraryListingTombstone, LibraryListingVersion, LibraryListingVersionTombstone, LibraryListingVersions, LibraryListings, + ListLoopsCheckpointsResponse, + ListLoopsDeploymentsResponse, + ListLoopsRunsResponse, + ListLoopsSamplersResponse, ListTrainingJobsResponse, ListTrainingProjectsResponse, + LoopsCheckpointFilesResponse, Model, ModelTombstone, ModelWeightSnapshot, Models, PatchInteractiveSessionRequest, PatchInteractiveSessionResponse, + PrepareModelUploadRequest, + PrepareModelUploadResponse, PromoteRequest, PromoteToChainEnvironmentRequest, PromoteToEnvironmentRequest, RecreateTrainingJobResponse, + RegisterAPIKeyRequest, + RegisterAPIKeyResponse, RetryDeploymentResponse, + SearchTrainersRequest, + SearchTrainersResponse, SearchTrainingJobsRequest, SearchTrainingJobsResponse, Secret, + SecretTombstone, Secrets, + SignSSHCertificateRequest, + SignSSHCertificateResponse, SignalPromotionResponse, StopTrainingJobRequest, StopTrainingJobResponse, @@ -93,12 +143,16 @@ UpdateChainletEnvironmentInstanceTypeRequest, UpdateChainletEnvironmentInstanceTypeResponse, UpdateEnvironmentRequest, + UpdateGroupRequest, UpdateLibraryListingRequest, UpdateLibraryListingVersionRequest, UpsertSecretRequest, UpsertTrainingProjectRequest, UpsertTrainingProjectResponse, UsageSummary, + UserInfo, + ValidateLoopsCheckpointRequest, + ValidateLoopsCheckpointResponse, ) _T = TypeVar("_T", bound=BaseModel) @@ -257,6 +311,36 @@ def delete_models_deployments_replicas( ), ) + def delete_secrets(self, *, secret_name: str) -> SecretTombstone: + """Deletes a secret by name""" + return self._do_json( + SecretTombstone, + _ApiRequest( + method="DELETE", + path_fmt="/v1/secrets/{}", + path_args=[secret_name], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def delete_teams_secrets( + self, *, team_id: str, secret_name: str + ) -> SecretTombstone: + """Deletes a secret by name""" + return self._do_json( + SecretTombstone, + _ApiRequest( + method="DELETE", + path_fmt="/v1/teams/{}/secrets/{}", + path_args=[team_id, secret_name], + body=None, + success_code=200, + error_codes=None, + ), + ) + def delete_training_projects( self, *, training_project_id: str ) -> TrainingProjectTombstone: @@ -290,7 +374,7 @@ def delete_training_projects_jobs( ) def get_api_keys(self) -> APIKeys: - """Lists the user's API keys""" + """Lists the user's API keys (metadata only, no plain text keys)""" return self._do_json( APIKeys, _ApiRequest( @@ -403,6 +487,22 @@ def get_chains_deployments_chain_deployment_id( ), ) + def get_chains_deployments_chainlets_logs( + self, *, chain_id: str, chain_deployment_id: str, chainlet_id: str + ) -> GetLogsResponse: + """Gets the logs for a chainlet within a chain deployment.""" + return self._do_json( + GetLogsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/chains/{}/deployments/{}/chainlets/{}/logs", + path_args=[chain_id, chain_deployment_id, chainlet_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + def get_chains_environments(self, *, chain_id: str) -> Environments: """Get all chain environments""" return self._do_json( @@ -433,6 +533,64 @@ def get_chains_environments_env_name( ), ) + def get_gateway_groups(self) -> GroupsResponse: + """List groups""" + return self._do_json( + GroupsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/gateway/groups", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_gateway_groups_api_keys(self, *, group_id: str) -> KeysForGroupResponse: + """List API keys for a group""" + return self._do_json( + KeysForGroupResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/gateway/groups/{}/api_keys", + path_args=[group_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_gateway_groups_api_keys_api_key_prefix( + self, *, group_id: str, api_key_prefix: str + ) -> GatewayKeyInfo: + """Get an API key for a group""" + return self._do_json( + GatewayKeyInfo, + _ApiRequest( + method="GET", + path_fmt="/v1/gateway/groups/{}/api_keys/{}", + path_args=[group_id, api_key_prefix], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_gateway_groups_group_id(self, *, group_id: str) -> Group: + """Get a group""" + return self._do_json( + Group, + _ApiRequest( + method="GET", + path_fmt="/v1/gateway/groups/{}", + path_args=[group_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + def get_instance_type_prices(self) -> InstanceTypePrices: """Gets prices for available instance types.""" return self._do_json( @@ -523,6 +681,166 @@ def get_library_listings_versions_version_tag( ), ) + def get_loops_capabilities(self) -> GetLoopsCapabilitiesResponse: + """Get Loops server capabilities.""" + return self._do_json( + GetLoopsCapabilitiesResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/capabilities", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_checkpoints(self) -> ListLoopsCheckpointsResponse: + """List Loops checkpoints.""" + return self._do_json( + ListLoopsCheckpointsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/checkpoints", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_checkpoints_files( + self, *, checkpoint_id: str + ) -> LoopsCheckpointFilesResponse: + """Get Loops checkpoint files.""" + return self._do_json( + LoopsCheckpointFilesResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/checkpoints/{}/files", + path_args=[checkpoint_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_deployments(self) -> ListLoopsDeploymentsResponse: + """List Loops deployments.""" + return self._do_json( + ListLoopsDeploymentsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/deployments", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_deployments_deployment_id( + self, *, deployment_id: str + ) -> GetLoopsDeploymentResponse: + """Get a Loops deployment.""" + return self._do_json( + GetLoopsDeploymentResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/deployments/{}", + path_args=[deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_deployments_logs(self, *, deployment_id: str) -> GetLogsResponse: + """Get logs for a Loops trainer deployment.""" + return self._do_json( + GetLogsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/deployments/{}/logs", + path_args=[deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_runs(self) -> ListLoopsRunsResponse: + """List Loops runs.""" + return self._do_json( + ListLoopsRunsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/runs", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_runs_run_id(self, *, run_id: str) -> GetLoopsRunResponse: + """Get a Loops run.""" + return self._do_json( + GetLoopsRunResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/runs/{}", + path_args=[run_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_samplers(self) -> ListLoopsSamplersResponse: + """List Loops samplers.""" + return self._do_json( + ListLoopsSamplersResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/samplers", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_samplers_sampler_id( + self, *, sampler_id: str + ) -> GetLoopsSamplerResponse: + """Get a Loops sampler.""" + return self._do_json( + GetLoopsSamplerResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/samplers/{}", + path_args=[sampler_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_loops_sessions(self, *, session_id: str) -> GetLoopsSessionResponse: + """Get a Loops session.""" + return self._do_json( + GetLoopsSessionResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/sessions/{}", + path_args=[session_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + def get_model_apis_snapshots(self) -> ModelWeightSnapshot: """Get the latest model weight snapshot""" return self._do_json( @@ -581,6 +899,22 @@ def get_models_deployments(self, *, model_id: str) -> Deployments: ), ) + def get_models_deployments_config( + self, *, model_id: str, deployment_id: str + ) -> DeploymentConfigResponse: + """Gets a deployment's config""" + return self._do_json( + DeploymentConfigResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/models/{}/deployments/{}/config", + path_args=[model_id, deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + def get_models_deployments_deployment_id( self, *, model_id: str, deployment_id: str ) -> Deployment: @@ -611,6 +945,38 @@ def get_models_deployments_development(self, *, model_id: str) -> Deployment: ), ) + def get_models_deployments_download( + self, *, model_id: str, deployment_id: str + ) -> DownloadDeploymentResponse: + """Gets a presigned download URL for a deployment's truss""" + return self._do_json( + DownloadDeploymentResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/models/{}/deployments/{}/download", + path_args=[model_id, deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_models_deployments_logs( + self, *, model_id: str, deployment_id: str + ) -> GetLogsResponse: + """Gets the logs for a model deployment.""" + return self._do_json( + GetLogsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/models/{}/deployments/{}/logs", + path_args=[model_id, deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + def get_models_deployments_production(self, *, model_id: str) -> Deployment: """Gets a model's production deployment""" return self._do_json( @@ -670,7 +1036,7 @@ def get_models_model_id(self, *, model_id: str) -> Model: ) def get_secrets(self) -> Secrets: - """Gets all secrets""" + """Gets all secrets (metadata only, no plain text keys)""" return self._do_json( Secrets, _ApiRequest( @@ -697,8 +1063,22 @@ def get_teams(self) -> Teams: ), ) + def get_teams_models(self, *, team_id: str) -> Models: + """Gets all models""" + return self._do_json( + Models, + _ApiRequest( + method="GET", + path_fmt="/v1/teams/{}/models", + path_args=[team_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + def get_teams_secrets(self, *, team_id: str) -> Secrets: - """Gets all secrets for a team""" + """Gets all secrets for a team (metadata only, no plain text keys)""" return self._do_json( Secrets, _ApiRequest( @@ -711,6 +1091,52 @@ def get_teams_secrets(self, *, team_id: str) -> Secrets: ), ) + def get_trainer_sessions_trainers_checkpoints( + self, *, session_id: str, trainer_id: str + ) -> GetTrainerServerCheckpointsResponse: + """List trainer server checkpoints.""" + return self._do_json( + GetTrainerServerCheckpointsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/trainer_sessions/{}/trainers/{}/checkpoints", + path_args=[session_id, trainer_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_trainer_sessions_trainers_checkpoints_files( + self, *, session_id: str, trainer_id: str, checkpoint_id: str + ) -> GetTrainerServerCheckpointFilesResponse: + """Get trainer server checkpoint files.""" + return self._do_json( + GetTrainerServerCheckpointFilesResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/trainer_sessions/{}/trainers/{}/checkpoints/{}/files", + path_args=[session_id, trainer_id, checkpoint_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_training_capacity(self) -> GetTrainingGpuCapacityResponse: + """Get training GPU capacity.""" + return self._do_json( + GetTrainingGpuCapacityResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/training/capacity", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + def get_training_projects(self) -> ListTrainingProjectsResponse: """List training projects.""" return self._do_json( @@ -821,15 +1247,47 @@ def get_training_projects_jobs_download( ), ) - def get_training_projects_jobs_training_job_id( + def get_training_projects_jobs_logs( self, *, training_project_id: str, training_job_id: str - ) -> GetTrainingJobResponse: - """Get a training job.""" + ) -> GetLogsResponse: + """Get the logs for a training job.""" return self._do_json( - GetTrainingJobResponse, + GetLogsResponse, _ApiRequest( method="GET", - path_fmt="/v1/training_projects/{}/jobs/{}", + path_fmt="/v1/training_projects/{}/jobs/{}/logs", + path_args=[training_project_id, training_job_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_training_projects_jobs_metrics( + self, *, training_project_id: str, training_job_id: str + ) -> GetTrainingJobMetricsResponse: + """Get the metrics for a training job.""" + return self._do_json( + GetTrainingJobMetricsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/training_projects/{}/jobs/{}/metrics", + path_args=[training_project_id, training_job_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def get_training_projects_jobs_training_job_id( + self, *, training_project_id: str, training_job_id: str + ) -> GetTrainingJobResponse: + """Get a training job.""" + return self._do_json( + GetTrainingJobResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/training_projects/{}/jobs/{}", path_args=[training_project_id, training_job_id], body=None, success_code=200, @@ -853,6 +1311,20 @@ def get_training_projects_training_project_id( ), ) + def get_users(self, *, user_id: str) -> UserInfo: + """Gets a user by ID""" + return self._do_json( + UserInfo, + _ApiRequest( + method="GET", + path_fmt="/v1/users/{}", + path_args=[user_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + def patch_chains_environments( self, *, chain_id: str, env_name: str, body: UpdateChainEnvironmentRequest ) -> UpdateChainEnvironmentResponse: @@ -889,6 +1361,20 @@ def patch_chains_environments_chainlet_settings_autoscaling_settings( ), ) + def patch_gateway_groups(self, *, group_id: str, body: UpdateGroupRequest) -> Group: + """Update a group""" + return self._do_json( + Group, + _ApiRequest( + method="PATCH", + path_fmt="/v1/gateway/groups/{}", + path_args=[group_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + def patch_library_listings( self, *, user_defined_listing_id: str, body: UpdateLibraryListingRequest ) -> LibraryListing: @@ -1092,6 +1578,52 @@ def post_chains_environments_promote( ), ) + def post_gateway_groups(self, *, body: CreateGroupRequest) -> Group: + """Create a group""" + return self._do_json( + Group, + _ApiRequest( + method="POST", + path_fmt="/v1/gateway/groups", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_gateway_groups_api_keys( + self, *, group_id: str, body: CreateApiKeyForGroupRequest + ) -> CreateApiKeyForGroupResponse: + """Create an API key for a group""" + return self._do_json( + CreateApiKeyForGroupResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/gateway/groups/{}/api_keys", + path_args=[group_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_gateway_groups_api_keys_register( + self, *, group_id: str, body: RegisterAPIKeyRequest + ) -> RegisterAPIKeyResponse: + """Register an API key for a group""" + return self._do_json( + RegisterAPIKeyResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/gateway/groups/{}/api_keys/register", + path_args=[group_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + def post_library_listings( self, *, body: CreateLibraryListingRequest ) -> LibraryListing: @@ -1124,10 +1656,10 @@ def post_library_listings_versions( ), ) - def post_llm_models(self, *, body: CreateLLMModelRequest) -> LLMModel: + def post_llm_models(self, *, body: CreateLLMModelRequest) -> LLMModelHandle: """Creates a new BIS LLM deployment""" return self._do_json( - LLMModel, + LLMModelHandle, _ApiRequest( method="POST", path_fmt="/v1/llm_models", @@ -1140,10 +1672,10 @@ def post_llm_models(self, *, body: CreateLLMModelRequest) -> LLMModel: def post_llm_models_deployments( self, *, model_id: str, body: CreateLLMModelVersionRequest - ) -> LLMModelVersion: + ) -> LLMModelHandle: """Creates a new BIS LLM deployment version""" return self._do_json( - LLMModelVersion, + LLMModelHandle, _ApiRequest( method="POST", path_fmt="/v1/llm_models/{}/deployments", @@ -1154,6 +1686,100 @@ def post_llm_models_deployments( ), ) + def post_loops_checkpoints_validate( + self, *, body: ValidateLoopsCheckpointRequest + ) -> ValidateLoopsCheckpointResponse: + """Validate a Loops checkpoint bt:// URI.""" + return self._do_json( + ValidateLoopsCheckpointResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/checkpoints/validate", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_loops_deployments_deactivate( + self, *, deployment_id: str + ) -> DeactivateLoopsDeploymentResponse: + """Deactivate a Loops deployment.""" + return self._do_json( + DeactivateLoopsDeploymentResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/deployments/{}/deactivate", + path_args=[deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + def post_loops_deployments_metrics( + self, *, deployment_id: str, body: GetLoopsDeploymentMetricsRequest + ) -> GetLoopsDeploymentMetricsResponse: + """Get metrics for a Loops trainer deployment.""" + return self._do_json( + GetLoopsDeploymentMetricsResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/deployments/{}/metrics", + path_args=[deployment_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_loops_runs(self, *, body: CreateLoopsRunRequest) -> CreateLoopsRunResponse: + """Create a Loops run.""" + return self._do_json( + CreateLoopsRunResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/runs", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_loops_samplers( + self, *, body: CreateLoopsSamplerRequest + ) -> CreateLoopsSamplerResponse: + """Create a Loops sampler.""" + return self._do_json( + CreateLoopsSamplerResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/samplers", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_loops_sessions( + self, *, body: CreateLoopsSessionRequest + ) -> CreateLoopsSessionResponse: + """Create a Loops session.""" + return self._do_json( + CreateLoopsSessionResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/sessions", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + def post_model_apis_snapshots( self, *, body: CreateModelWeightSnapshotRequest ) -> ModelWeightSnapshot: @@ -1186,6 +1812,36 @@ def post_model_apis_snapshots_model_id( ), ) + def post_models(self, *, body: CreateModelRequest) -> CreatedModelDeployment: + """Creates a new model from a source""" + return self._do_json( + CreatedModelDeployment, + _ApiRequest( + method="POST", + path_fmt="/v1/models", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_models_deployments( + self, *, model_id: str, body: CreateModelDeploymentRequest + ) -> CreatedModelDeployment: + """Adds a new deployment to a model""" + return self._do_json( + CreatedModelDeployment, + _ApiRequest( + method="POST", + path_fmt="/v1/models/{}/deployments", + path_args=[model_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + def post_models_deployments_activate( self, *, model_id: str, deployment_id: str ) -> ActivateResponse: @@ -1285,7 +1941,7 @@ def post_models_deployments_development_retry( def post_models_deployments_logs( self, *, model_id: str, deployment_id: str, body: GetDeploymentLogsRequest ) -> GetLogsResponse: - """Gets the logs for a model deployment.""" + """Gets the logs for a model deployment (deprecated; use GET).""" return self._do_json( GetLogsResponse, _ApiRequest( @@ -1378,6 +2034,22 @@ def post_models_deployments_retry( ), ) + def post_models_deployments_ssh_sign( + self, *, model_id: str, deployment_id: str, body: SignSSHCertificateRequest + ) -> SignSSHCertificateResponse: + """Sign an SSH certificate for an inference model.""" + return self._do_json( + SignSSHCertificateResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/models/{}/deployments/{}/ssh/sign", + path_args=[model_id, deployment_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + def post_models_environments( self, *, model_id: str, body: CreateEnvironmentRequest ) -> Environment: @@ -1522,6 +2194,22 @@ def post_models_environments_resume_promotion( ), ) + def post_prepare_model_upload( + self, *, body: PrepareModelUploadRequest + ) -> PrepareModelUploadResponse: + """Validates a model push payload and issues upload credentials""" + return self._do_json( + PrepareModelUploadResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/prepare_model_upload", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + def post_secrets(self, *, body: UpsertSecretRequest) -> Secret: """Upserts a secret""" return self._do_json( @@ -1550,6 +2238,38 @@ def post_teams_api_keys(self, *, team_id: str, body: CreateAPIKeyRequest) -> API ), ) + def post_teams_llm_models( + self, *, team_id: str, body: CreateLLMModelRequest + ) -> LLMModelHandle: + """Creates a new BIS LLM deployment""" + return self._do_json( + LLMModelHandle, + _ApiRequest( + method="POST", + path_fmt="/v1/teams/{}/llm_models", + path_args=[team_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_teams_models( + self, *, team_id: str, body: CreateModelRequest + ) -> CreatedModelDeployment: + """Creates a new model from a source""" + return self._do_json( + CreatedModelDeployment, + _ApiRequest( + method="POST", + path_fmt="/v1/teams/{}/models", + path_args=[team_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + def post_teams_secrets(self, *, team_id: str, body: UpsertSecretRequest) -> Secret: """Upserts a secret in a team""" return self._do_json( @@ -1580,6 +2300,86 @@ def post_teams_training_projects( ), ) + def post_trainer_sessions( + self, *, body: CreateTrainerSessionRequest + ) -> CreateTrainerSessionResponse: + """Create a trainer session.""" + return self._do_json( + CreateTrainerSessionResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainer_sessions", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_trainer_sessions_samplers( + self, *, session_id: str, body: CreateSamplingServerRequest + ) -> CreateSamplingServerResponse: + """Create a sampling server.""" + return self._do_json( + CreateSamplingServerResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainer_sessions/{}/samplers", + path_args=[session_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_trainer_sessions_trainers( + self, *, session_id: str, body: CreateTrainerServerRequest + ) -> CreateTrainerServerResponse: + """Create a trainer server.""" + return self._do_json( + CreateTrainerServerResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainer_sessions/{}/trainers", + path_args=[session_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_trainers_checkpoints_search( + self, *, body: CheckpointSearchRequest + ) -> CheckpointSearchResponse: + """Look up trainer checkpoint info by bt:// URI.""" + return self._do_json( + CheckpointSearchResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainers/checkpoints/search", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_trainers_search( + self, *, body: SearchTrainersRequest + ) -> SearchTrainersResponse: + """Search trainers.""" + return self._do_json( + SearchTrainersResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainers/search", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + def post_training_jobs_search( self, *, body: SearchTrainingJobsRequest ) -> SearchTrainingJobsResponse: @@ -1635,7 +2435,7 @@ def post_training_projects_jobs_logs( training_job_id: str, body: GetTrainingJobLogsRequest, ) -> GetLogsResponse: - """Get the logs for a training job.""" + """Get the logs for a training job (deprecated; use GET).""" return self._do_json( GetLogsResponse, _ApiRequest( @@ -1655,7 +2455,7 @@ def post_training_projects_jobs_metrics( training_job_id: str, body: GetTrainingJobMetricsRequest, ) -> GetTrainingJobMetricsResponse: - """Get the metrics for a training job.""" + """Get the metrics for a training job (deprecated; use GET).""" return self._do_json( GetTrainingJobMetricsResponse, _ApiRequest( @@ -1684,19 +2484,39 @@ def post_training_projects_jobs_recreate( ), ) - def post_training_projects_jobs_stop( + def post_training_projects_jobs_ssh_sign( self, *, training_project_id: str, training_job_id: str, - body: StopTrainingJobRequest, - ) -> StopTrainingJobResponse: - """Stop a training job.""" + body: SignSSHCertificateRequest, + ) -> SignSSHCertificateResponse: + """Sign an SSH certificate for a training job.""" return self._do_json( - StopTrainingJobResponse, + SignSSHCertificateResponse, _ApiRequest( method="POST", - path_fmt="/v1/training_projects/{}/jobs/{}/stop", + path_fmt="/v1/training_projects/{}/jobs/{}/ssh/sign", + path_args=[training_project_id, training_job_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + def post_training_projects_jobs_stop( + self, + *, + training_project_id: str, + training_job_id: str, + body: StopTrainingJobRequest, + ) -> StopTrainingJobResponse: + """Stop a training job.""" + return self._do_json( + StopTrainingJobResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/training_projects/{}/jobs/{}/stop", path_args=[training_project_id, training_job_id], body=body, success_code=200, @@ -1863,6 +2683,36 @@ async def delete_models_deployments_replicas( ), ) + async def delete_secrets(self, *, secret_name: str) -> SecretTombstone: + """Deletes a secret by name""" + return await self._do_json( + SecretTombstone, + _ApiRequest( + method="DELETE", + path_fmt="/v1/secrets/{}", + path_args=[secret_name], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def delete_teams_secrets( + self, *, team_id: str, secret_name: str + ) -> SecretTombstone: + """Deletes a secret by name""" + return await self._do_json( + SecretTombstone, + _ApiRequest( + method="DELETE", + path_fmt="/v1/teams/{}/secrets/{}", + path_args=[team_id, secret_name], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def delete_training_projects( self, *, training_project_id: str ) -> TrainingProjectTombstone: @@ -1896,7 +2746,7 @@ async def delete_training_projects_jobs( ) async def get_api_keys(self) -> APIKeys: - """Lists the user's API keys""" + """Lists the user's API keys (metadata only, no plain text keys)""" return await self._do_json( APIKeys, _ApiRequest( @@ -2009,6 +2859,22 @@ async def get_chains_deployments_chain_deployment_id( ), ) + async def get_chains_deployments_chainlets_logs( + self, *, chain_id: str, chain_deployment_id: str, chainlet_id: str + ) -> GetLogsResponse: + """Gets the logs for a chainlet within a chain deployment.""" + return await self._do_json( + GetLogsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/chains/{}/deployments/{}/chainlets/{}/logs", + path_args=[chain_id, chain_deployment_id, chainlet_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def get_chains_environments(self, *, chain_id: str) -> Environments: """Get all chain environments""" return await self._do_json( @@ -2039,6 +2905,66 @@ async def get_chains_environments_env_name( ), ) + async def get_gateway_groups(self) -> GroupsResponse: + """List groups""" + return await self._do_json( + GroupsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/gateway/groups", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_gateway_groups_api_keys( + self, *, group_id: str + ) -> KeysForGroupResponse: + """List API keys for a group""" + return await self._do_json( + KeysForGroupResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/gateway/groups/{}/api_keys", + path_args=[group_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_gateway_groups_api_keys_api_key_prefix( + self, *, group_id: str, api_key_prefix: str + ) -> GatewayKeyInfo: + """Get an API key for a group""" + return await self._do_json( + GatewayKeyInfo, + _ApiRequest( + method="GET", + path_fmt="/v1/gateway/groups/{}/api_keys/{}", + path_args=[group_id, api_key_prefix], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_gateway_groups_group_id(self, *, group_id: str) -> Group: + """Get a group""" + return await self._do_json( + Group, + _ApiRequest( + method="GET", + path_fmt="/v1/gateway/groups/{}", + path_args=[group_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def get_instance_type_prices(self) -> InstanceTypePrices: """Gets prices for available instance types.""" return await self._do_json( @@ -2129,6 +3055,168 @@ async def get_library_listings_versions_version_tag( ), ) + async def get_loops_capabilities(self) -> GetLoopsCapabilitiesResponse: + """Get Loops server capabilities.""" + return await self._do_json( + GetLoopsCapabilitiesResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/capabilities", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_checkpoints(self) -> ListLoopsCheckpointsResponse: + """List Loops checkpoints.""" + return await self._do_json( + ListLoopsCheckpointsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/checkpoints", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_checkpoints_files( + self, *, checkpoint_id: str + ) -> LoopsCheckpointFilesResponse: + """Get Loops checkpoint files.""" + return await self._do_json( + LoopsCheckpointFilesResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/checkpoints/{}/files", + path_args=[checkpoint_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_deployments(self) -> ListLoopsDeploymentsResponse: + """List Loops deployments.""" + return await self._do_json( + ListLoopsDeploymentsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/deployments", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_deployments_deployment_id( + self, *, deployment_id: str + ) -> GetLoopsDeploymentResponse: + """Get a Loops deployment.""" + return await self._do_json( + GetLoopsDeploymentResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/deployments/{}", + path_args=[deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_deployments_logs( + self, *, deployment_id: str + ) -> GetLogsResponse: + """Get logs for a Loops trainer deployment.""" + return await self._do_json( + GetLogsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/deployments/{}/logs", + path_args=[deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_runs(self) -> ListLoopsRunsResponse: + """List Loops runs.""" + return await self._do_json( + ListLoopsRunsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/runs", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_runs_run_id(self, *, run_id: str) -> GetLoopsRunResponse: + """Get a Loops run.""" + return await self._do_json( + GetLoopsRunResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/runs/{}", + path_args=[run_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_samplers(self) -> ListLoopsSamplersResponse: + """List Loops samplers.""" + return await self._do_json( + ListLoopsSamplersResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/samplers", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_samplers_sampler_id( + self, *, sampler_id: str + ) -> GetLoopsSamplerResponse: + """Get a Loops sampler.""" + return await self._do_json( + GetLoopsSamplerResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/samplers/{}", + path_args=[sampler_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_loops_sessions(self, *, session_id: str) -> GetLoopsSessionResponse: + """Get a Loops session.""" + return await self._do_json( + GetLoopsSessionResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/loops/sessions/{}", + path_args=[session_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def get_model_apis_snapshots(self) -> ModelWeightSnapshot: """Get the latest model weight snapshot""" return await self._do_json( @@ -2187,6 +3275,22 @@ async def get_models_deployments(self, *, model_id: str) -> Deployments: ), ) + async def get_models_deployments_config( + self, *, model_id: str, deployment_id: str + ) -> DeploymentConfigResponse: + """Gets a deployment's config""" + return await self._do_json( + DeploymentConfigResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/models/{}/deployments/{}/config", + path_args=[model_id, deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def get_models_deployments_deployment_id( self, *, model_id: str, deployment_id: str ) -> Deployment: @@ -2217,6 +3321,38 @@ async def get_models_deployments_development(self, *, model_id: str) -> Deployme ), ) + async def get_models_deployments_download( + self, *, model_id: str, deployment_id: str + ) -> DownloadDeploymentResponse: + """Gets a presigned download URL for a deployment's truss""" + return await self._do_json( + DownloadDeploymentResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/models/{}/deployments/{}/download", + path_args=[model_id, deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_models_deployments_logs( + self, *, model_id: str, deployment_id: str + ) -> GetLogsResponse: + """Gets the logs for a model deployment.""" + return await self._do_json( + GetLogsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/models/{}/deployments/{}/logs", + path_args=[model_id, deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def get_models_deployments_production(self, *, model_id: str) -> Deployment: """Gets a model's production deployment""" return await self._do_json( @@ -2276,7 +3412,7 @@ async def get_models_model_id(self, *, model_id: str) -> Model: ) async def get_secrets(self) -> Secrets: - """Gets all secrets""" + """Gets all secrets (metadata only, no plain text keys)""" return await self._do_json( Secrets, _ApiRequest( @@ -2303,8 +3439,22 @@ async def get_teams(self) -> Teams: ), ) + async def get_teams_models(self, *, team_id: str) -> Models: + """Gets all models""" + return await self._do_json( + Models, + _ApiRequest( + method="GET", + path_fmt="/v1/teams/{}/models", + path_args=[team_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def get_teams_secrets(self, *, team_id: str) -> Secrets: - """Gets all secrets for a team""" + """Gets all secrets for a team (metadata only, no plain text keys)""" return await self._do_json( Secrets, _ApiRequest( @@ -2317,6 +3467,52 @@ async def get_teams_secrets(self, *, team_id: str) -> Secrets: ), ) + async def get_trainer_sessions_trainers_checkpoints( + self, *, session_id: str, trainer_id: str + ) -> GetTrainerServerCheckpointsResponse: + """List trainer server checkpoints.""" + return await self._do_json( + GetTrainerServerCheckpointsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/trainer_sessions/{}/trainers/{}/checkpoints", + path_args=[session_id, trainer_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_trainer_sessions_trainers_checkpoints_files( + self, *, session_id: str, trainer_id: str, checkpoint_id: str + ) -> GetTrainerServerCheckpointFilesResponse: + """Get trainer server checkpoint files.""" + return await self._do_json( + GetTrainerServerCheckpointFilesResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/trainer_sessions/{}/trainers/{}/checkpoints/{}/files", + path_args=[session_id, trainer_id, checkpoint_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_training_capacity(self) -> GetTrainingGpuCapacityResponse: + """Get training GPU capacity.""" + return await self._do_json( + GetTrainingGpuCapacityResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/training/capacity", + path_args=[], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def get_training_projects(self) -> ListTrainingProjectsResponse: """List training projects.""" return await self._do_json( @@ -2384,10 +3580,42 @@ async def get_training_projects_jobs_checkpoint_files( ) -> GetTrainingJobCheckpointFilesResponse: """Get training job checkpoint files.""" return await self._do_json( - GetTrainingJobCheckpointFilesResponse, + GetTrainingJobCheckpointFilesResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/training_projects/{}/jobs/{}/checkpoint_files", + path_args=[training_project_id, training_job_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_training_projects_jobs_checkpoints( + self, *, training_project_id: str, training_job_id: str + ) -> GetTrainingJobCheckpointsResponse: + """Get training job checkpoints.""" + return await self._do_json( + GetTrainingJobCheckpointsResponse, + _ApiRequest( + method="GET", + path_fmt="/v1/training_projects/{}/jobs/{}/checkpoints", + path_args=[training_project_id, training_job_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def get_training_projects_jobs_download( + self, *, training_project_id: str, training_job_id: str + ) -> DownloadTrainingJobResponse: + """Get the uploaded training job as a S3 Artifact""" + return await self._do_json( + DownloadTrainingJobResponse, _ApiRequest( method="GET", - path_fmt="/v1/training_projects/{}/jobs/{}/checkpoint_files", + path_fmt="/v1/training_projects/{}/jobs/{}/download", path_args=[training_project_id, training_job_id], body=None, success_code=200, @@ -2395,15 +3623,15 @@ async def get_training_projects_jobs_checkpoint_files( ), ) - async def get_training_projects_jobs_checkpoints( + async def get_training_projects_jobs_logs( self, *, training_project_id: str, training_job_id: str - ) -> GetTrainingJobCheckpointsResponse: - """Get training job checkpoints.""" + ) -> GetLogsResponse: + """Get the logs for a training job.""" return await self._do_json( - GetTrainingJobCheckpointsResponse, + GetLogsResponse, _ApiRequest( method="GET", - path_fmt="/v1/training_projects/{}/jobs/{}/checkpoints", + path_fmt="/v1/training_projects/{}/jobs/{}/logs", path_args=[training_project_id, training_job_id], body=None, success_code=200, @@ -2411,15 +3639,15 @@ async def get_training_projects_jobs_checkpoints( ), ) - async def get_training_projects_jobs_download( + async def get_training_projects_jobs_metrics( self, *, training_project_id: str, training_job_id: str - ) -> DownloadTrainingJobResponse: - """Get the uploaded training job as a S3 Artifact""" + ) -> GetTrainingJobMetricsResponse: + """Get the metrics for a training job.""" return await self._do_json( - DownloadTrainingJobResponse, + GetTrainingJobMetricsResponse, _ApiRequest( method="GET", - path_fmt="/v1/training_projects/{}/jobs/{}/download", + path_fmt="/v1/training_projects/{}/jobs/{}/metrics", path_args=[training_project_id, training_job_id], body=None, success_code=200, @@ -2459,6 +3687,20 @@ async def get_training_projects_training_project_id( ), ) + async def get_users(self, *, user_id: str) -> UserInfo: + """Gets a user by ID""" + return await self._do_json( + UserInfo, + _ApiRequest( + method="GET", + path_fmt="/v1/users/{}", + path_args=[user_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + async def patch_chains_environments( self, *, chain_id: str, env_name: str, body: UpdateChainEnvironmentRequest ) -> UpdateChainEnvironmentResponse: @@ -2495,6 +3737,22 @@ async def patch_chains_environments_chainlet_settings_autoscaling_settings( ), ) + async def patch_gateway_groups( + self, *, group_id: str, body: UpdateGroupRequest + ) -> Group: + """Update a group""" + return await self._do_json( + Group, + _ApiRequest( + method="PATCH", + path_fmt="/v1/gateway/groups/{}", + path_args=[group_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def patch_library_listings( self, *, user_defined_listing_id: str, body: UpdateLibraryListingRequest ) -> LibraryListing: @@ -2698,6 +3956,52 @@ async def post_chains_environments_promote( ), ) + async def post_gateway_groups(self, *, body: CreateGroupRequest) -> Group: + """Create a group""" + return await self._do_json( + Group, + _ApiRequest( + method="POST", + path_fmt="/v1/gateway/groups", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_gateway_groups_api_keys( + self, *, group_id: str, body: CreateApiKeyForGroupRequest + ) -> CreateApiKeyForGroupResponse: + """Create an API key for a group""" + return await self._do_json( + CreateApiKeyForGroupResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/gateway/groups/{}/api_keys", + path_args=[group_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_gateway_groups_api_keys_register( + self, *, group_id: str, body: RegisterAPIKeyRequest + ) -> RegisterAPIKeyResponse: + """Register an API key for a group""" + return await self._do_json( + RegisterAPIKeyResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/gateway/groups/{}/api_keys/register", + path_args=[group_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def post_library_listings( self, *, body: CreateLibraryListingRequest ) -> LibraryListing: @@ -2730,10 +4034,10 @@ async def post_library_listings_versions( ), ) - async def post_llm_models(self, *, body: CreateLLMModelRequest) -> LLMModel: + async def post_llm_models(self, *, body: CreateLLMModelRequest) -> LLMModelHandle: """Creates a new BIS LLM deployment""" return await self._do_json( - LLMModel, + LLMModelHandle, _ApiRequest( method="POST", path_fmt="/v1/llm_models", @@ -2746,10 +4050,10 @@ async def post_llm_models(self, *, body: CreateLLMModelRequest) -> LLMModel: async def post_llm_models_deployments( self, *, model_id: str, body: CreateLLMModelVersionRequest - ) -> LLMModelVersion: + ) -> LLMModelHandle: """Creates a new BIS LLM deployment version""" return await self._do_json( - LLMModelVersion, + LLMModelHandle, _ApiRequest( method="POST", path_fmt="/v1/llm_models/{}/deployments", @@ -2760,6 +4064,102 @@ async def post_llm_models_deployments( ), ) + async def post_loops_checkpoints_validate( + self, *, body: ValidateLoopsCheckpointRequest + ) -> ValidateLoopsCheckpointResponse: + """Validate a Loops checkpoint bt:// URI.""" + return await self._do_json( + ValidateLoopsCheckpointResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/checkpoints/validate", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_loops_deployments_deactivate( + self, *, deployment_id: str + ) -> DeactivateLoopsDeploymentResponse: + """Deactivate a Loops deployment.""" + return await self._do_json( + DeactivateLoopsDeploymentResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/deployments/{}/deactivate", + path_args=[deployment_id], + body=None, + success_code=200, + error_codes=None, + ), + ) + + async def post_loops_deployments_metrics( + self, *, deployment_id: str, body: GetLoopsDeploymentMetricsRequest + ) -> GetLoopsDeploymentMetricsResponse: + """Get metrics for a Loops trainer deployment.""" + return await self._do_json( + GetLoopsDeploymentMetricsResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/deployments/{}/metrics", + path_args=[deployment_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_loops_runs( + self, *, body: CreateLoopsRunRequest + ) -> CreateLoopsRunResponse: + """Create a Loops run.""" + return await self._do_json( + CreateLoopsRunResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/runs", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_loops_samplers( + self, *, body: CreateLoopsSamplerRequest + ) -> CreateLoopsSamplerResponse: + """Create a Loops sampler.""" + return await self._do_json( + CreateLoopsSamplerResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/samplers", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_loops_sessions( + self, *, body: CreateLoopsSessionRequest + ) -> CreateLoopsSessionResponse: + """Create a Loops session.""" + return await self._do_json( + CreateLoopsSessionResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/loops/sessions", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def post_model_apis_snapshots( self, *, body: CreateModelWeightSnapshotRequest ) -> ModelWeightSnapshot: @@ -2792,6 +4192,36 @@ async def post_model_apis_snapshots_model_id( ), ) + async def post_models(self, *, body: CreateModelRequest) -> CreatedModelDeployment: + """Creates a new model from a source""" + return await self._do_json( + CreatedModelDeployment, + _ApiRequest( + method="POST", + path_fmt="/v1/models", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_models_deployments( + self, *, model_id: str, body: CreateModelDeploymentRequest + ) -> CreatedModelDeployment: + """Adds a new deployment to a model""" + return await self._do_json( + CreatedModelDeployment, + _ApiRequest( + method="POST", + path_fmt="/v1/models/{}/deployments", + path_args=[model_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def post_models_deployments_activate( self, *, model_id: str, deployment_id: str ) -> ActivateResponse: @@ -2891,7 +4321,7 @@ async def post_models_deployments_development_retry( async def post_models_deployments_logs( self, *, model_id: str, deployment_id: str, body: GetDeploymentLogsRequest ) -> GetLogsResponse: - """Gets the logs for a model deployment.""" + """Gets the logs for a model deployment (deprecated; use GET).""" return await self._do_json( GetLogsResponse, _ApiRequest( @@ -2984,6 +4414,22 @@ async def post_models_deployments_retry( ), ) + async def post_models_deployments_ssh_sign( + self, *, model_id: str, deployment_id: str, body: SignSSHCertificateRequest + ) -> SignSSHCertificateResponse: + """Sign an SSH certificate for an inference model.""" + return await self._do_json( + SignSSHCertificateResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/models/{}/deployments/{}/ssh/sign", + path_args=[model_id, deployment_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def post_models_environments( self, *, model_id: str, body: CreateEnvironmentRequest ) -> Environment: @@ -3128,6 +4574,22 @@ async def post_models_environments_resume_promotion( ), ) + async def post_prepare_model_upload( + self, *, body: PrepareModelUploadRequest + ) -> PrepareModelUploadResponse: + """Validates a model push payload and issues upload credentials""" + return await self._do_json( + PrepareModelUploadResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/prepare_model_upload", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def post_secrets(self, *, body: UpsertSecretRequest) -> Secret: """Upserts a secret""" return await self._do_json( @@ -3158,6 +4620,38 @@ async def post_teams_api_keys( ), ) + async def post_teams_llm_models( + self, *, team_id: str, body: CreateLLMModelRequest + ) -> LLMModelHandle: + """Creates a new BIS LLM deployment""" + return await self._do_json( + LLMModelHandle, + _ApiRequest( + method="POST", + path_fmt="/v1/teams/{}/llm_models", + path_args=[team_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_teams_models( + self, *, team_id: str, body: CreateModelRequest + ) -> CreatedModelDeployment: + """Creates a new model from a source""" + return await self._do_json( + CreatedModelDeployment, + _ApiRequest( + method="POST", + path_fmt="/v1/teams/{}/models", + path_args=[team_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def post_teams_secrets( self, *, team_id: str, body: UpsertSecretRequest ) -> Secret: @@ -3190,6 +4684,86 @@ async def post_teams_training_projects( ), ) + async def post_trainer_sessions( + self, *, body: CreateTrainerSessionRequest + ) -> CreateTrainerSessionResponse: + """Create a trainer session.""" + return await self._do_json( + CreateTrainerSessionResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainer_sessions", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_trainer_sessions_samplers( + self, *, session_id: str, body: CreateSamplingServerRequest + ) -> CreateSamplingServerResponse: + """Create a sampling server.""" + return await self._do_json( + CreateSamplingServerResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainer_sessions/{}/samplers", + path_args=[session_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_trainer_sessions_trainers( + self, *, session_id: str, body: CreateTrainerServerRequest + ) -> CreateTrainerServerResponse: + """Create a trainer server.""" + return await self._do_json( + CreateTrainerServerResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainer_sessions/{}/trainers", + path_args=[session_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_trainers_checkpoints_search( + self, *, body: CheckpointSearchRequest + ) -> CheckpointSearchResponse: + """Look up trainer checkpoint info by bt:// URI.""" + return await self._do_json( + CheckpointSearchResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainers/checkpoints/search", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + + async def post_trainers_search( + self, *, body: SearchTrainersRequest + ) -> SearchTrainersResponse: + """Search trainers.""" + return await self._do_json( + SearchTrainersResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/trainers/search", + path_args=[], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def post_training_jobs_search( self, *, body: SearchTrainingJobsRequest ) -> SearchTrainingJobsResponse: @@ -3245,7 +4819,7 @@ async def post_training_projects_jobs_logs( training_job_id: str, body: GetTrainingJobLogsRequest, ) -> GetLogsResponse: - """Get the logs for a training job.""" + """Get the logs for a training job (deprecated; use GET).""" return await self._do_json( GetLogsResponse, _ApiRequest( @@ -3265,7 +4839,7 @@ async def post_training_projects_jobs_metrics( training_job_id: str, body: GetTrainingJobMetricsRequest, ) -> GetTrainingJobMetricsResponse: - """Get the metrics for a training job.""" + """Get the metrics for a training job (deprecated; use GET).""" return await self._do_json( GetTrainingJobMetricsResponse, _ApiRequest( @@ -3294,6 +4868,26 @@ async def post_training_projects_jobs_recreate( ), ) + async def post_training_projects_jobs_ssh_sign( + self, + *, + training_project_id: str, + training_job_id: str, + body: SignSSHCertificateRequest, + ) -> SignSSHCertificateResponse: + """Sign an SSH certificate for a training job.""" + return await self._do_json( + SignSSHCertificateResponse, + _ApiRequest( + method="POST", + path_fmt="/v1/training_projects/{}/jobs/{}/ssh/sign", + path_args=[training_project_id, training_job_id], + body=body, + success_code=200, + error_codes=None, + ), + ) + async def post_training_projects_jobs_stop( self, *, diff --git a/baseten/client/managementapi/_models.py b/baseten/client/managementapi/_models.py index 0081edf..cfab263 100644 --- a/baseten/client/managementapi/_models.py +++ b/baseten/client/managementapi/_models.py @@ -8,6 +8,12 @@ from datetime import date as date_aliased +class DeploymentConfigOutputFormat(Enum): + raw = "raw" + parsed = "parsed" + both = "both" + + class CheckpointSyncStatus(Enum): SYNCING = "SYNCING" COMPLETED = "COMPLETED" @@ -200,6 +206,7 @@ class V1InteractiveSessionAuthProvider(Enum): class V1InteractiveSessionProvider(Enum): vs_code = "vs_code" cursor = "cursor" + ssh = "ssh" class V1InteractiveSessionTrigger(Enum): @@ -232,6 +239,20 @@ class FileSummary(BaseModel): ] +class TrainerCheckpointTarget(Enum): + sampler = "sampler" + trainer = "trainer" + + +class Name(Enum): + CREATED = "CREATED" + DEPLOYING = "DEPLOYING" + RUNNING = "RUNNING" + SCALED_TO_ZERO = "SCALED_TO_ZERO" + FAILED = "FAILED" + STOPPED = "STOPPED" + + class APIKeyCategory(Enum): PERSONAL = "PERSONAL" WORKSPACE_MANAGE_ALL = "WORKSPACE_MANAGE_ALL" @@ -283,6 +304,10 @@ class UpsertSecretRequest(BaseModel): ] +class SecretTombstone(BaseModel): + name: Annotated[str, Field(description="Name of the deleted secret", title="Name")] + + class Team(BaseModel): id: Annotated[str, Field(description="Unique identifier of the team", title="Id")] name: Annotated[str, Field(description="Name of the team", title="Name")] @@ -362,6 +387,143 @@ class InstanceTypePrices(BaseModel): ] +class DeploymentArchivePayload(BaseModel): + config: Annotated[ + dict[str, Any], + Field(description="Parsed model config as a JSON object.", title="Config"), + ] + raw_config: Annotated[ + str | None, + Field( + description="Original config.yaml text, persisted as-is on the deployment. Best-effort: invalid raw configs are logged and dropped without failing the request.", + title="Raw Config", + ), + ] = None + user_env: Annotated[ + dict[str, Any] | None, + Field( + description="Client environment metadata (e.g. client version, Python version). Validated server-side.", + title="User Env", + ), + ] = None + environment_name: Annotated[ + str | None, + Field( + description="Stable environment to push to (e.g. `production`). If unset, the deployment is created without environment selection. Caller must have push permission for the named environment.", + title="Environment Name", + ), + ] = None + preserve_env_instance_type: Annotated[ + bool | None, + Field( + description="Retain the target environment's current instance type rather than the one in `config`. Only meaningful when `environment_name` is set and that environment already exists.", + title="Preserve Env Instance Type", + ), + ] = True + deploy_timeout_minutes: Annotated[ + int | None, + Field( + description="Deploy timeout in minutes; allowed range 10 to 1440. Server default applies if unset.", + title="Deploy Timeout Minutes", + ), + ] = None + deployment_name: Annotated[ + str | None, + Field( + description="Optional human-readable name for the deployment.", + title="Deployment Name", + ), + ] = None + labels: Annotated[ + dict[str, Any] | None, + Field( + description="User-provided key-value labels for the deployment.", + title="Labels", + ), + ] = None + + +class PrepareModelUploadRequest(BaseModel): + deployment: Annotated[ + DeploymentArchivePayload, + Field( + description="Deployment-level payload, identical to the payload sent at commit." + ), + ] + name: Annotated[ + str | None, + Field( + description="Set to validate a new-model push. Exactly one of `name` or `model_id` is required.", + title="Name", + ), + ] = None + team_id: Annotated[ + str | None, + Field( + description="Team the new model will belong to. Only valid when `name` is set; defaults to the organization's default team when omitted. Must not be set when `model_id` is set (the existing model already has a team).", + title="Team Id", + ), + ] = None + model_id: Annotated[ + str | None, + Field( + description="Set to validate an add-deployment push to an existing model. Exactly one of `name` or `model_id` is required.", + title="Model Id", + ), + ] = None + dry_run: Annotated[ + bool | None, + Field( + description="If true, validate the payload only and do not issue upload credentials. The response sets `creds`, `s3_bucket`, and `s3_key` to `null`.", + title="Dry Run", + ), + ] = False + is_development: Annotated[ + bool | None, + Field( + description="If true, validate a development-deployment push. Only valid when `name` is set. The following `deployment` fields must be left at their defaults: `environment_name`, `preserve_env_instance_type`, `deployment_name`.", + title="Is Development", + ), + ] = False + + +class AWSCredentials(BaseModel): + aws_access_key_id: Annotated[ + str, Field(description="The AWS access key ID", title="Aws Access Key Id") + ] + aws_secret_access_key: Annotated[ + str, + Field(description="The AWS secret access key", title="Aws Secret Access Key"), + ] + aws_session_token: Annotated[ + str, Field(description="The AWS session token", title="Aws Session Token") + ] + + +class PrepareModelUploadResponse(BaseModel): + creds: Annotated[ + AWSCredentials | None, + Field(description="STS credentials to upload the model archive."), + ] = None + s3_bucket: Annotated[ + str | None, + Field( + description="S3 bucket the credentials are scoped to.", title="S3 Bucket" + ), + ] = None + s3_key: Annotated[ + str | None, + Field( + description="S3 key the credentials are scoped to. Pass this to `POST /v1/models` (in the `model_archive` source) once the upload completes.", + title="S3 Key", + ), + ] = None + s3_region: Annotated[ + str | None, + Field(description="AWS region the S3 bucket resides in.", title="S3 Region"), + ] = None + + class Model(BaseModel): id: Annotated[str, Field(description="Unique identifier of the model", title="Id")] created_at: Annotated[ @@ -411,10 +573,71 @@ class Models(BaseModel): models: Annotated[list[Model], Field(title="Models")] -class ModelTombstone(BaseModel): - id: Annotated[str, Field(description="Unique identifier of the model", title="Id")] - deleted: Annotated[ - bool, Field(description="Whether the model was deleted", title="Deleted") +class LibraryListingSource(BaseModel): + kind: Annotated[Literal["library_listing"], Field(title="Kind")] = "library_listing" + lab_display_name: Annotated[ + str, + Field( + description="Identifier of the publishing organization, as returned by `GET /v1/library_models`.", + title="Lab Display Name", + ), + ] + user_defined_listing_id: Annotated[ + str, + Field( + description="Listing identifier within the publishing organization.", + title="User Defined Listing Id", + ), + ] + deployed_model_name: Annotated[ + str | None, + Field( + description="Optional name for the new deployed model. Defaults to the listing's configured name.", + title="Deployed Model Name", + ), + ] = None + + +class ModelArchiveSource(BaseModel): + kind: Annotated[Literal["model_archive"], Field(title="Kind")] = "model_archive" + name: Annotated[str, Field(description="Name of the new model.", title="Name")] + deployment: Annotated[ + DeploymentArchivePayload, + Field( + description="Deployment-level configuration for the model's first deployment." + ), + ] + s3_key: Annotated[ + str, + Field( + description="S3 key of the uploaded archive, from the credentials returned by `POST /v1/prepare_model_upload`.", + title="S3 Key", + ), + ] + disable_archive_download: Annotated[ + bool | None, + Field( + description="If true, the uploaded archive is not downloadable after creation. Locked at model creation; cannot be changed by subsequent deployments.", + title="Disable Archive Download", + ), + ] = False + is_development: Annotated[ + bool | None, + Field( + description="If true, push as a development deployment (the model's single mutable dev slot; overwrites any existing development deployment). The following `deployment` fields must be left at their defaults: `environment_name`, `preserve_env_instance_type`, `deployment_name`.", + title="Is Development", + ), + ] = False + + +class CreateModelRequest(BaseModel): + source: Annotated[ + LibraryListingSource | ModelArchiveSource, + Field( + description="Where the new model is created from.", + discriminator="kind", + title="Source", + ), ] @@ -460,6 +683,13 @@ class AutoscalingSettings(BaseModel): title="Target In Flight Tokens", ), ] = None + max_scale_down_rate: Annotated[ + float | None, + Field( + description="Maximum rate at which replicas can scale down (e.g. 2.0 means at most halve replicas per window).", + title="Max Scale Down Rate", + ), + ] = None class DeploymentStatus(Enum): @@ -542,6 +772,25 @@ class Deployment(BaseModel): ] = None +class CreatedModelDeployment(BaseModel): + model: Annotated[ + Model, + Field( + description="The model the deployment belongs to. May have been created by this call." + ), + ] + deployment: Annotated[ + Deployment, Field(description="The newly created deployment.") + ] + + +class ModelTombstone(BaseModel): + id: Annotated[str, Field(description="Unique identifier of the model", title="Id")] + deleted: Annotated[ + bool, Field(description="Whether the model was deleted", title="Deleted") + ] + + class Deployments(BaseModel): deployments: Annotated[ list[Deployment], @@ -549,6 +798,31 @@ class Deployments(BaseModel): ] +class DeploymentArchiveSource(BaseModel): + kind: Annotated[Literal["model_archive"], Field(title="Kind")] = "model_archive" + deployment: Annotated[ + DeploymentArchivePayload, Field(description="Deployment-level configuration.") + ] + s3_key: Annotated[ + str, + Field( + description="S3 key of the uploaded archive, from the credentials returned by `POST /v1/prepare_model_upload`.", + title="S3 Key", + ), + ] + + +class CreateModelDeploymentRequest(BaseModel): + source: Annotated[ + DeploymentArchiveSource, + Field( + description="Where the new deployment is created from.", + discriminator="kind", + title="Source", + ), + ] + + class DeploymentTombstone(BaseModel): id: Annotated[ str, Field(description="Unique identifier of the deployment", title="Id") @@ -561,6 +835,19 @@ class DeploymentTombstone(BaseModel): ] +class MaxScaleDownRate(RootModel[float]): + root: Annotated[ + float | None, + Field( + description="Maximum rate at which replicas can scale down (e.g. 2.0 means at most halve replicas per window).", + examples=[2.0], + gt=1.0, + le=2.0, + title="Max Scale Down Rate", + ), + ] = None + + class UpdateAutoscalingSettings(BaseModel): model_config = ConfigDict( extra="forbid", @@ -617,6 +904,14 @@ class UpdateAutoscalingSettings(BaseModel): title="Target In Flight Tokens", ), ] = None + max_scale_down_rate: Annotated[ + MaxScaleDownRate | None, + Field( + description="Maximum rate at which replicas can scale down (e.g. 2.0 means at most halve replicas per window).", + examples=[2.0], + title="Max Scale Down Rate", + ), + ] = None class UpdateAutoscalingSettingsStatus(Enum): @@ -697,6 +992,71 @@ class RetryDeploymentResponse(BaseModel): ] +class DownloadDeploymentResponse(BaseModel): + download_url: Annotated[ + str, + Field( + description="Presigned URL to download the truss tar file", + title="Download Url", + ), + ] + + +class DeploymentConfigResponse(BaseModel): + config: Annotated[ + dict[str, Any] | None, + Field(description="The parsed config of the deployment.", title="Config"), + ] = None + raw_config: Annotated[ + str | None, + Field( + description="The original config.yaml text — preserves comments, ordering, formatting.", + title="Raw Config", + ), + ] = None + + +class GetDeploymentConfigRequest(BaseModel): + output_format: Annotated[ + DeploymentConfigOutputFormat | None, + Field( + description="'raw': verbatim config.yaml with comments — not available for deployments created before 2026-04-30. 'parsed': dict with server-side defaults applied — always available. 'both': both fields populated." + ), + ] = DeploymentConfigOutputFormat.both + + +class Log(BaseModel): + timestamp: Annotated[ + str, + Field( + description="Epoch nanosecond timestamp of the log message.", + title="Timestamp", + ), + ] + message: Annotated[ + str, Field(description="The contents of the log message.", title="Message") + ] + replica: Annotated[ + str | None, + Field( + description="The replica the log line was emitted from.", title="Replica" + ), + ] + request_id: Annotated[ + str | None, + Field( + description="The request ID associated with an inference request.", + title="Request Id", + ), + ] = None + + +class GetLogsResponse(BaseModel): + logs: Annotated[ + list[Log], Field(description="Logs for a specific entity.", title="Logs") + ] + + class SortOrder(Enum): asc = "asc" desc = "desc" @@ -704,7 +1064,7 @@ class SortOrder(Enum): class Limit(RootModel[int]): root: Annotated[ - int, + int | None, Field( description="Limit of logs to fetch in a single request", ge=1, @@ -742,39 +1102,61 @@ class GetDeploymentLogsRequest(BaseModel): ] -class Log(BaseModel): - timestamp: Annotated[ - str, +class TerminateReplicaResponse(BaseModel): + success: Annotated[ + bool | None, Field( - description="Epoch nanosecond timestamp of the log message.", - title="Timestamp", - ), - ] - message: Annotated[ - str, Field(description="The contents of the log message.", title="Message") + description="Whether the replica was successfully terminated", + title="Success", + ), + ] = True + + +class SignSSHCertificateRequest(BaseModel): + public_key: Annotated[ + str, + Field( + description="The user's SSH public key (e.g., 'ssh-ed25519 AAAA... user@host').", + title="Public Key", + ), ] - replica: Annotated[ + replica_id: Annotated[ str | None, Field( - description="The replica the log line was emitted from.", title="Replica" + description="The replica to connect to. Required for training jobs (e.g. '0'). Optional for inference (server picks a running replica if omitted).", + title="Replica Id", ), - ] + ] = None -class GetLogsResponse(BaseModel): - logs: Annotated[ - list[Log], Field(description="Logs for a specific entity.", title="Logs") +class SignSSHCertificateResponse(BaseModel): + ssh_certificate: Annotated[ + str, + Field( + description="The signed SSH certificate in OpenSSH format.", + title="Ssh Certificate", + ), ] - - -class TerminateReplicaResponse(BaseModel): - success: Annotated[ - bool | None, + jwt: Annotated[ + str, Field( - description="Whether the replica was successfully terminated", - title="Success", + description="Signed JWT (ES256) for SSH proxy authorization.", title="Jwt" ), - ] = True + ] + proxy_address: Annotated[ + str, + Field( + description="Address of the SSH proxy to connect to (host:port).", + title="Proxy Address", + ), + ] + ssh_cert_expires_at: Annotated[ + AwareDatetime, + Field( + description="When the certificate expires, in ISO 8601 format.", + title="Ssh Cert Expires At", + ), + ] class InProgressPromotionStatus(Enum): @@ -1002,6 +1384,7 @@ class ChainletEnvironmentSettingsRequest(BaseModel): "autoscaling_window": 60, "concurrency_target": 1, "max_replica": 1, + "max_scale_down_rate": None, "min_replica": 0, "scale_down_delay": 900, "target_in_flight_tokens": None, @@ -1077,6 +1460,7 @@ class ChainletEnvironmentAutoscalingSettingsUpdate(BaseModel): "autoscaling_window": 800, "concurrency_target": 3, "max_replica": 2, + "max_scale_down_rate": None, "min_replica": 1, "scale_down_delay": 60, "target_in_flight_tokens": None, @@ -1099,6 +1483,7 @@ class UpdateChainletEnvironmentAutoscalingSettingsRequest(BaseModel): "autoscaling_window": 800, "concurrency_target": 4, "max_replica": 3, + "max_scale_down_rate": None, "min_replica": 2, "scale_down_delay": 63, "target_in_flight_tokens": None, @@ -1113,6 +1498,7 @@ class UpdateChainletEnvironmentAutoscalingSettingsRequest(BaseModel): "autoscaling_window": None, "concurrency_target": None, "max_replica": None, + "max_scale_down_rate": None, "min_replica": 0, "scale_down_delay": None, "target_in_flight_tokens": None, @@ -1125,6 +1511,7 @@ class UpdateChainletEnvironmentAutoscalingSettingsRequest(BaseModel): "autoscaling_window": None, "concurrency_target": None, "max_replica": None, + "max_scale_down_rate": None, "min_replica": 0, "scale_down_delay": None, "target_in_flight_tokens": None, @@ -1428,6 +1815,19 @@ class GetTrainingJobLogsRequest(BaseModel): ] +class TrainingJobMetric(BaseModel): + value: Annotated[ + float, Field(description="The value of the metric.", title="Value") + ] + timestamp: Annotated[ + AwareDatetime, + Field( + description="The timestamp of the metric in ISO 8601 format.", + title="Timestamp", + ), + ] + + class GetTrainingJobMetricsRequest(BaseModel): end_epoch_millis: Annotated[ int | None, @@ -1445,27 +1845,11 @@ class GetTrainingJobMetricsRequest(BaseModel): ] = None -class TrainingJobMetric(BaseModel): - value: Annotated[ - float, Field(description="The value of the metric.", title="Value") - ] - timestamp: Annotated[ - AwareDatetime, - Field( - description="The timestamp of the metric in ISO 8601 format.", - title="Timestamp", - ), - ] - - class StopTrainingJobRequest(BaseModel): pass class TrainingJobCheckpoint(BaseModel): - training_job_id: Annotated[ - str, Field(description="The ID of the training job.", title="Training Job Id") - ] checkpoint_id: Annotated[ str, Field(description="The ID of the checkpoint.", title="Checkpoint Id") ] @@ -1501,6 +1885,9 @@ class TrainingJobCheckpoint(BaseModel): title="Sync Status", ), ] = None + training_job_id: Annotated[ + str, Field(description="The ID of the training job.", title="Training Job Id") + ] class GetTrainingJobCheckpointFilesResponse(BaseModel): @@ -1527,6 +1914,23 @@ class GetTrainingJobCheckpointFilesResponse(BaseModel): ] +class GetTrainingJobCheckpointFilesRequest(BaseModel): + page_size: Annotated[ + int | None, + Field( + description="Max files per page (default 1000).", ge=1, title="Page Size" + ), + ] = 1000 + page_token: Annotated[ + int | None, + Field( + description="Offset into the file list (default 0).", + ge=0, + title="Page Token", + ), + ] = 0 + + class AuthCode(BaseModel): session_id: Annotated[ str, @@ -1778,144 +2182,873 @@ class SearchTrainingJobsRequest(BaseModel): ] -class AWSCredentials(BaseModel): - aws_access_key_id: Annotated[ - str, Field(description="The AWS access key ID", title="Aws Access Key Id") - ] - aws_secret_access_key: Annotated[ +class SearchTrainersRequest(BaseModel): + trainer_id: Annotated[ + str | None, + Field( + description="Filter by trainer ID.", + examples=["k4q95w5"], + title="Trainer Id", + ), + ] = None + + +class SearchedTrainer(BaseModel): + trainer_id: Annotated[str, Field(description="The trainer ID.", title="Trainer Id")] + session_id: Annotated[ str, - Field(description="The AWS secret access key", title="Aws Secret Access Key"), + Field( + description="The session ID this trainer belongs to.", title="Session Id" + ), ] - aws_session_token: Annotated[ - str, Field(description="The AWS session token", title="Aws Session Token") + base_model: Annotated[ + str, + Field( + description="The HuggingFace base model the trainer is fine-tuning.", + title="Base Model", + ), ] -class GetBlobCredentialsResponse(BaseModel): - creds: Annotated[ - AWSCredentials, Field(description="The credentials to upload the blob to") - ] - s3_key: Annotated[ - str, Field(description="The S3 key to upload the blob to", title="S3 Key") +class SearchTrainersResponse(BaseModel): + trainers: Annotated[ + list[SearchedTrainer], Field(description="List of trainers.", title="Trainers") ] - s3_bucket: Annotated[ - str, Field(description="The S3 bucket to upload the blob to", title="S3 Bucket") + + +class CheckpointSearchRequest(BaseModel): + checkpoint_path: Annotated[ + str, + Field( + description="bt:// URI of a trainer checkpoint. Form: bt://loops:/(weights|sampler_weights)/.", + examples=["bt://loops:k4q95w5/sampler_weights/step-100"], + title="Checkpoint Path", + ), ] -class CreateAPIKeyRequest(BaseModel): - name: Annotated[ - str | None, +class TrainerServerCheckpoint(BaseModel): + checkpoint_id: Annotated[ + str, Field(description="The ID of the checkpoint.", title="Checkpoint Id") + ] + created_at: Annotated[ + AwareDatetime, Field( - description="Optional name for the API key", - examples=["my-api-key"], - title="Name", + description="The timestamp of the checkpoint in ISO 8601 format.", + title="Created At", ), - ] = None - type: Annotated[ - APIKeyCategory, + ] + checkpoint_type: Annotated[ + str, Field(description="The type of checkpoint.", title="Checkpoint Type") + ] + base_model: Annotated[ + str | None, + Field(description="The base model of the checkpoint.", title="Base Model"), + ] + lora_adapter_config: Annotated[ + dict[str, Any] | None, Field( - description="Type of the API key.", - examples=[ - "PERSONAL", - "WORKSPACE_EXPORT_METRICS", - "WORKSPACE_INVOKE", - "WORKSPACE_MANAGE_ALL", - ], + description="The adapter config of the checkpoint.", + title="Lora Adapter Config", ), ] - model_ids: Annotated[ - list[str] | None, + size_bytes: Annotated[ + int, + Field(description="The size of the checkpoint in bytes.", title="Size Bytes"), + ] + sync_status: Annotated[ + str | None, Field( - description="List of model IDs to scope the API key to, only present if type is 'WORKSPACE_EXPORT_METRICS' or 'WORKSPACE_INVOKE'", - examples=[["aaaaaaaa"]], - title="Model Ids", + description="Sync state of the checkpoint: SYNCING or COMPLETE.", + title="Sync Status", ), ] = None + id: Annotated[ + str, Field(description="The TrainerServerCheckpoint database ID.", title="Id") + ] + trainer_id: Annotated[ + str, Field(description="The ID of the trainer.", title="Trainer Id") + ] + target: Annotated[ + TrainerCheckpointTarget, + Field( + description="Whether this checkpoint is loadable by the sampler ('sampler') or by the trainer ('trainer')." + ), + ] -class APIKey(BaseModel): - api_key: Annotated[str, Field(description="The API key string", title="Api Key")] +class CheckpointSearchResponse(BaseModel): + checkpoint: Annotated[ + TrainerServerCheckpoint, + Field(description="The checkpoint metadata (id, target, type, sizes, etc.)."), + ] -class APIKeyInfo(BaseModel): - prefix: Annotated[ - str, Field(description="The prefix of the API key", title="Prefix") - ] - name: Annotated[ +class CreateTrainerSessionRequest(BaseModel): + training_project_id: Annotated[ str | None, Field( - description="Optional name for the API key", - examples=["my-api-key"], - title="Name", + description="ID of the training project to associate with. If omitted, a default project is created for the org.", + title="Training Project Id", ), ] = None - type: Annotated[ - APIKeyCategory, - Field( - description="Type of the API key.", - examples=[ - "PERSONAL", - "WORKSPACE_EXPORT_METRICS", - "WORKSPACE_INVOKE", - "WORKSPACE_MANAGE_ALL", - ], - ), + + +class TrainerSession(BaseModel): + id: Annotated[str, Field(title="Id")] + + +class CreateTrainerSessionResponse(BaseModel): + session: TrainerSession + + +class CreateTrainerServerRequest(BaseModel): + model: Annotated[ + str, Field(description="Base model ID (e.g. 'Qwen/Qwen3-8B').", title="Model") ] - model_ids: Annotated[ - list[str] | None, + max_seq_len: Annotated[ + int | None, Field( - description="List of model IDs to scope the API key to, only present if type is 'WORKSPACE_EXPORT_METRICS' or 'WORKSPACE_INVOKE'", - examples=[["aaaaaaaa"]], - title="Model Ids", + description="Maximum sequence length for training. Defaults to the maximum supported by the model configuration.", + title="Max Seq Len", ), ] = None - team_name: Annotated[ + lora_rank: Annotated[ + int | None, Field(description="LoRA rank.", title="Lora Rank") + ] = 64 + seed: Annotated[ + int | None, Field(description="Random seed for reproducibility.", title="Seed") + ] = None + scale_down_delay_seconds: Annotated[ + int | None, + Field( + description="Seconds of inactivity before the trainer scales to zero. Must be positive. Defaults to 3600 (1 hour).", + gt=0, + title="Scale Down Delay Seconds", + ), + ] = 3600 + checkpoint_path: Annotated[ str | None, Field( - description="The name of the team associated with the API key", - title="Team Name", + description="Optional bt:// URI of an existing trainer-target checkpoint to resume training from. Form: bt://loops:/weights/.", + examples=["bt://loops:k4q95w5/weights/step-100"], + title="Checkpoint Path", ), ] = None -class APIKeys(BaseModel): - keys: Annotated[ - list[APIKeyInfo], - Field(description="A list of API key information", title="Keys"), +class SamplingServer(BaseModel): + id: Annotated[str, Field(title="Id")] + base_url: Annotated[str, Field(title="Base Url")] + model_id: Annotated[ + str, + Field(description="Hashid of the underlying Baseten model.", title="Model Id"), + ] + deployment_id: Annotated[ + str, + Field( + description="Hashid of the specific model deployment (version).", + title="Deployment Id", + ), ] -class APIKeyTombstone(BaseModel): - prefix: Annotated[ - str, Field(description="Unique prefix of the API key", title="Prefix") - ] +class TrainerServer(BaseModel): + id: Annotated[str, Field(title="Id")] + base_url: Annotated[str, Field(title="Base Url")] + sampling_server: SamplingServer -class ModelWeightSnapshot(BaseModel): - model: Annotated[ - str, Field(description="Unique identifier of the model", title="Model") - ] - snapshot_uri: Annotated[ - str, - Field(description="Path to the model weight snapshot", title="Snapshot Uri"), - ] - received_at: Annotated[ - AwareDatetime, Field(description="Time of the snapshot", title="Received At") - ] +class CreateTrainerServerResponse(BaseModel): + trainer_server: TrainerServer -class CreateModelWeightSnapshotRequest(BaseModel): +class CreateSamplingServerRequest(BaseModel): model: Annotated[ - str, Field(description="Unique identifier of the model", title="Model") - ] - snapshot_uri: Annotated[ str, - Field(description="Path to the model weight snapshot", title="Snapshot Uri"), + Field( + description="Model to use for standalone samplers (eg, for baselines).", + title="Model", + ), ] - - -class CreateLLMModelRequest(BaseModel): + max_seq_length: Annotated[ + int | None, + Field( + description="Maximum prompt length (in tokens) the sampler must handle. Set this to the longest prompt you plan to send. Omit to use the default for the base model.", + title="Max Seq Length", + ), + ] = None + checkpoint_path: Annotated[ + str | None, + Field( + description="Optional bt:// URI of an existing sampler-target checkpoint to load weights from on startup. Form: bt://loops:/sampler_weights/.", + examples=["bt://loops:k4q95w5/sampler_weights/step-100"], + title="Checkpoint Path", + ), + ] = None + + +class CreateSamplingServerResponse(BaseModel): + sampling_server: SamplingServer + + +class GetTrainerServerCheckpointsResponse(BaseModel): + trainer_id: Annotated[ + str, Field(description="The ID of the trainer.", title="Trainer Id") + ] + checkpoints: Annotated[ + list[TrainerServerCheckpoint], + Field(description="The checkpoints for the trainer.", title="Checkpoints"), + ] + + +class GetTrainerServerCheckpointFilesResponse(BaseModel): + presigned_urls: Annotated[ + list[CheckpointFile], + Field( + description="List of presigned URLs for checkpoint files.", + title="Presigned Urls", + ), + ] + next_page_token: Annotated[ + int | None, + Field( + description="Token to use for fetching the next page of results. None when there are no more results.", + title="Next Page Token", + ), + ] = None + total_count: Annotated[ + int, + Field( + description="Total number of checkpoint files available.", + title="Total Count", + ), + ] + + +class SupportedModel(BaseModel): + model_name: Annotated[ + str, Field(description="The name of the supported model.", title="Model Name") + ] + max_context_length: Annotated[ + int, + Field( + description="The maximum context length (in tokens) supported by this model.", + title="Max Context Length", + ), + ] + + +class GetLoopsCapabilitiesResponse(BaseModel): + supported_models: Annotated[ + list[SupportedModel], + Field( + description="List of models available on the server.", + title="Supported Models", + ), + ] + + +class CreateLoopsSessionRequest(BaseModel): + training_project_id: Annotated[ + str | None, + Field( + description="ID of the training project to associate with. If omitted, a default project is created for the org.", + title="Training Project Id", + ), + ] = None + + +class LoopsSession(BaseModel): + id: Annotated[str, Field(title="Id")] + + +class CreateLoopsSessionResponse(BaseModel): + session: LoopsSession + + +class GetLoopsSessionResponse(BaseModel): + session: Annotated[LoopsSession, Field(description="The Loops session.")] + + +class LoopsSamplerStatus(BaseModel): + name: Annotated[ + DeploymentStatus, Field(description="The current status of the Loops sampler.") + ] + + +class LoopsSampler(BaseModel): + id: Annotated[str, Field(title="Id")] + base_url: Annotated[str, Field(title="Base Url")] + base_model: Annotated[ + str, + Field( + description="The HuggingFace base model the sampler is serving.", + title="Base Model", + ), + ] + created_at: Annotated[ + AwareDatetime, + Field( + description="Time the sampler was created in ISO 8601 format", + title="Created At", + ), + ] + model_id: Annotated[ + str, + Field(description="Hashid of the underlying Baseten model.", title="Model Id"), + ] + deployment_id: Annotated[ + str, + Field( + description="Hashid of the specific model deployment (version).", + title="Deployment Id", + ), + ] + status: Annotated[ + LoopsSamplerStatus, Field(description="The sampler's current status.") + ] + + +class ListLoopsRunsQueryParams(BaseModel): + run_id: Annotated[ + str | None, + Field(description="Filter by run ID.", examples=["k4q95w5"], title="Run Id"), + ] = None + base_model: Annotated[ + str | None, + Field( + description="Filter runs by base model name.", + examples=["Qwen/Qwen3-8B"], + title="Base Model", + ), + ] = None + + +class CreateLoopsRunRequest(BaseModel): + session_id: Annotated[ + str, + Field( + description="ID of the Loops session this run belongs to.", + title="Session Id", + ), + ] + base_model: Annotated[ + str, + Field(description="Base model ID (e.g. 'Qwen/Qwen3-8B').", title="Base Model"), + ] + max_seq_len: Annotated[ + int | None, + Field( + description="Maximum prompt length (in tokens) the run must handle. Set this to the longest training example you plan to send. Defaults to the maximum supported by the model configuration.", + title="Max Seq Len", + ), + ] = None + lora_rank: Annotated[ + int | None, Field(description="LoRA rank.", title="Lora Rank") + ] = 64 + seed: Annotated[ + int | None, Field(description="Random seed for reproducibility.", title="Seed") + ] = None + scale_down_delay_seconds: Annotated[ + int | None, + Field( + description="Seconds of inactivity before the run scales to zero. Must be positive. Defaults to 3600 (1 hour).", + gt=0, + title="Scale Down Delay Seconds", + ), + ] = 3600 + path: Annotated[ + str | None, + Field( + description="Optional bt:// URI of an existing checkpoint to load weights from on startup. Form: bt://loops:/weights/.", + examples=["bt://loops:k4q95w5/weights/step-100"], + title="Path", + ), + ] = None + reuse_from_session_id: Annotated[ + str | None, + Field( + description="Optional Loops session ID whose trainer deployment should be reused for this run, sharing the infrastructure across sessions instead of provisioning fresh. The named session must belong to the same team. Reuse is best-effort: if the prior deployment is stopped, failed, or its sampler is unhealthy, a new deployment is provisioned instead.", + title="Reuse From Session Id", + ), + ] = None + + +class ListLoopsSamplersResponse(BaseModel): + samplers: Annotated[ + list[LoopsSampler], Field(description="List of samplers.", title="Samplers") + ] + + +class CreateLoopsSamplerRequest(BaseModel): + session_id: Annotated[ + str, + Field( + description="ID of the Loops session this sampler belongs to.", + title="Session Id", + ), + ] + base_model: Annotated[ + str, + Field( + description="Base model ID for standalone samplers (e.g., for baselines).", + title="Base Model", + ), + ] + max_seq_length: Annotated[ + int | None, + Field( + description="Maximum prompt length (in tokens) the sampler must handle. Set this to the longest prompt you plan to send. Omit to use the default for the base model.", + title="Max Seq Length", + ), + ] = None + model_path: Annotated[ + str | None, + Field( + description="Optional bt:// URI of an existing sampler-target checkpoint to load weights from on startup. Form: bt://loops:/sampler_weights/.", + examples=["bt://loops:k4q95w5/sampler_weights/step-100"], + title="Model Path", + ), + ] = None + reuse_from_session_id: Annotated[ + str | None, + Field( + description="Optional Loops session ID whose deployment should be reused for this sampler. Same best-effort semantics as the run endpoint.", + title="Reuse From Session Id", + ), + ] = None + + +class CreateLoopsSamplerResponse(BaseModel): + sampler: LoopsSampler + + +class GetLoopsSamplerResponse(BaseModel): + sampler: Annotated[LoopsSampler, Field(description="The Loops sampler.")] + + +class LoopsCheckpoint(BaseModel): + checkpoint_id: Annotated[ + str, Field(description="The ID of the checkpoint.", title="Checkpoint Id") + ] + created_at: Annotated[ + AwareDatetime, + Field( + description="The timestamp of the checkpoint in ISO 8601 format.", + title="Created At", + ), + ] + checkpoint_type: Annotated[ + str, Field(description="The type of checkpoint.", title="Checkpoint Type") + ] + base_model: Annotated[ + str | None, + Field(description="The base model of the checkpoint.", title="Base Model"), + ] + lora_adapter_config: Annotated[ + dict[str, Any] | None, + Field( + description="The adapter config of the checkpoint.", + title="Lora Adapter Config", + ), + ] + size_bytes: Annotated[ + int, + Field(description="The size of the checkpoint in bytes.", title="Size Bytes"), + ] + sync_status: Annotated[ + str | None, + Field( + description="Sync state of the checkpoint: SYNCING or COMPLETE.", + title="Sync Status", + ), + ] = None + id: Annotated[str, Field(description="The checkpoint ID.", title="Id")] + run_id: Annotated[ + str, + Field( + description="The ID of the run that produced the checkpoint.", + title="Run Id", + ), + ] + target: Annotated[ + TrainerCheckpointTarget, + Field( + description="Whether this checkpoint is loadable by the sampler or by the run." + ), + ] + + +class ListLoopsCheckpointsResponse(BaseModel): + checkpoints: Annotated[ + list[LoopsCheckpoint], + Field(description="Matching checkpoints.", title="Checkpoints"), + ] + + +class ListLoopsCheckpointsQueryParams(BaseModel): + run_id: Annotated[ + str | None, + Field( + description="Filter by run ID. Returns all checkpoints saved by the run.", + examples=["k4q95w5"], + title="Run Id", + ), + ] = None + base_model: Annotated[ + str | None, + Field( + description="Filter by base model. Returns checkpoints across the caller's runs of this base model.", + examples=["Qwen/Qwen3-8B"], + title="Base Model", + ), + ] = None + checkpoint_path: Annotated[ + str | None, + Field( + description="bt:// URI of a Loops checkpoint. Form: bt://loops:/(weights|sampler_weights)/.", + examples=["bt://loops:k4q95w5/sampler_weights/step-100"], + title="Checkpoint Path", + ), + ] = None + + +class ValidateLoopsCheckpointRequest(BaseModel): + checkpoint_path: Annotated[ + str, + Field( + description="bt:// URI of a sampler checkpoint. Form: bt://loops:/sampler_weights/.", + examples=["bt://loops:k4q95w5/sampler_weights/step-100"], + title="Checkpoint Path", + ), + ] + + +class ValidateLoopsCheckpointResponse(BaseModel): + pass + + +class LoopsCheckpointFilesResponse(BaseModel): + presigned_urls: Annotated[ + list[CheckpointFile], + Field( + description="List of presigned URLs for checkpoint files.", + title="Presigned Urls", + ), + ] + next_page_token: Annotated[ + int | None, + Field( + description="Token to use for fetching the next page of results. None when there are no more results.", + title="Next Page Token", + ), + ] = None + total_count: Annotated[ + int, + Field( + description="Total number of checkpoint files available.", + title="Total Count", + ), + ] + + +class LoopsDeploymentStatus(BaseModel): + name: Annotated[Name, Field(description="Latest status of the Loops deployment.")] + + +class LoopsDeployment(BaseModel): + id: Annotated[str, Field(description="The Loops deployment ID.", title="Id")] + base_model: Annotated[ + str, + Field( + description="The HuggingFace base model the deployment is fine-tuning.", + title="Base Model", + ), + ] + base_url: Annotated[str, Field(description="The run's base URL.", title="Base Url")] + status: Annotated[ + LoopsDeploymentStatus, Field(description="Latest deployment status.") + ] + sampler: Annotated[ + LoopsSampler, Field(description="The sampler bound to this deployment.") + ] + + +class ListLoopsDeploymentsResponse(BaseModel): + deployments: Annotated[ + list[LoopsDeployment], + Field(description="Active Loops deployments.", title="Deployments"), + ] + + +class DeactivateLoopsDeploymentResponse(BaseModel): + id: Annotated[ + str, Field(description="The deactivated Loops deployment ID.", title="Id") + ] + base_model: Annotated[ + str, + Field( + description="The base model whose Loops deployment was deactivated.", + title="Base Model", + ), + ] + user: Annotated[User, Field(description="The user who owned the Loops deployment.")] + + +class GetLoopsDeploymentResponse(BaseModel): + deployment: Annotated[LoopsDeployment, Field(description="The Loops deployment.")] + + +class GetLoopsDeploymentMetricsRequest(BaseModel): + end_epoch_millis: Annotated[ + int | None, + Field( + description="Epoch millis to end fetching metrics.", + title="End Epoch Millis", + ), + ] = None + start_epoch_millis: Annotated[ + int | None, + Field( + description="Epoch millis to start fetching metrics.", + title="Start Epoch Millis", + ), + ] = None + + +class InferenceVolumeByStatusDatapoint(BaseModel): + timestamp: Annotated[ + AwareDatetime, Field(description="ISO 8601 timestamp.", title="Timestamp") + ] + status_2xx: Annotated[ + float, Field(description="2xx requests per second.", title="Status 2Xx") + ] + status_4xx: Annotated[ + float, Field(description="4xx requests per second.", title="Status 4Xx") + ] + status_5xx: Annotated[ + float, Field(description="5xx requests per second.", title="Status 5Xx") + ] + + +class ResponseTimeDatapoint(BaseModel): + timestamp: Annotated[ + AwareDatetime, Field(description="ISO 8601 timestamp.", title="Timestamp") + ] + p50: Annotated[ + float | None, + Field( + description="50th percentile request latency (milliseconds).", title="P50" + ), + ] = None + p95: Annotated[ + float | None, + Field( + description="95th percentile request latency (milliseconds).", title="P95" + ), + ] = None + p99: Annotated[ + float | None, + Field( + description="99th percentile request latency (milliseconds).", title="P99" + ), + ] = None + + +class GetLoopsDeploymentLogsRequest(BaseModel): + start_epoch_millis: Annotated[ + int | None, + Field( + description="Epoch millis timestamp to start fetching logs", + title="Start Epoch Millis", + ), + ] = None + end_epoch_millis: Annotated[ + int | None, + Field( + description="Epoch millis timestamp to end fetching logs", + title="End Epoch Millis", + ), + ] = None + direction: Annotated[SortOrder | None, Field(description="Sort order for logs")] = ( + None + ) + limit: Annotated[ + Limit | None, + Field( + default_factory=lambda: Limit(500), + description="Limit of logs to fetch in a single request", + title="Limit", + ), + ] + + +class TrainingGpuCapacityItem(BaseModel): + gpu_type: Annotated[ + str, + Field( + description="GPU type identifier (e.g. H100, A100-40GB)", title="Gpu Type" + ), + ] + baseline: Annotated[ + int, + Field( + description="Baseline GPU allocation; jobs below this threshold are expected to run immediately. 0 if not configured.", + title="Baseline", + ), + ] + limit: Annotated[ + int, + Field( + description="Maximum concurrent GPUs of this type for this org", + title="Limit", + ), + ] + usage_count: Annotated[ + int, + Field( + description="GPUs currently in use by active training jobs", + title="Usage Count", + ), + ] + + +class GetTrainingGpuCapacityResponse(BaseModel): + gpu_capacities: Annotated[ + list[TrainingGpuCapacityItem], + Field( + description="GPU capacity limits and current usage per GPU type", + title="Gpu Capacities", + ), + ] + + +class GetBlobCredentialsResponse(BaseModel): + creds: Annotated[ + AWSCredentials, Field(description="The credentials to upload the blob to") + ] + s3_key: Annotated[ + str, Field(description="The S3 key to upload the blob to", title="S3 Key") + ] + s3_bucket: Annotated[ + str, Field(description="The S3 bucket to upload the blob to", title="S3 Bucket") + ] + + +class CreateAPIKeyRequest(BaseModel): + name: Annotated[ + str | None, + Field( + description="Optional name for the API key", + examples=["my-api-key"], + title="Name", + ), + ] = None + type: Annotated[ + APIKeyCategory, + Field( + description="Type of the API key.", + examples=[ + "PERSONAL", + "WORKSPACE_EXPORT_METRICS", + "WORKSPACE_INVOKE", + "WORKSPACE_MANAGE_ALL", + ], + ), + ] + model_ids: Annotated[ + list[str] | None, + Field( + description="List of model IDs to scope the API key to, only present if type is 'WORKSPACE_EXPORT_METRICS' or 'WORKSPACE_INVOKE'", + examples=[["aaaaaaaa"]], + title="Model Ids", + ), + ] = None + + +class APIKey(BaseModel): + api_key: Annotated[str, Field(description="The API key string", title="Api Key")] + + +class APIKeyInfo(BaseModel): + prefix: Annotated[ + str, Field(description="The prefix of the API key", title="Prefix") + ] + name: Annotated[ + str | None, + Field( + description="Optional name for the API key", + examples=["my-api-key"], + title="Name", + ), + ] = None + type: Annotated[ + APIKeyCategory, + Field( + description="Type of the API key.", + examples=[ + "PERSONAL", + "WORKSPACE_EXPORT_METRICS", + "WORKSPACE_INVOKE", + "WORKSPACE_MANAGE_ALL", + ], + ), + ] + model_ids: Annotated[ + list[str] | None, + Field( + description="List of model IDs to scope the API key to, only present if type is 'WORKSPACE_EXPORT_METRICS' or 'WORKSPACE_INVOKE'", + examples=[["aaaaaaaa"]], + title="Model Ids", + ), + ] = None + team_name: Annotated[ + str | None, + Field( + description="The name of the team associated with the API key", + title="Team Name", + ), + ] = None + + +class APIKeys(BaseModel): + keys: Annotated[ + list[APIKeyInfo], + Field(description="A list of API key information", title="Keys"), + ] + + +class APIKeyTombstone(BaseModel): + prefix: Annotated[ + str, Field(description="Unique prefix of the API key", title="Prefix") + ] + + +class ModelWeightSnapshot(BaseModel): + model: Annotated[ + str, Field(description="Unique identifier of the model", title="Model") + ] + snapshot_uri: Annotated[ + str, + Field(description="Path to the model weight snapshot", title="Snapshot Uri"), + ] + received_at: Annotated[ + AwareDatetime, Field(description="Time of the snapshot", title="Received At") + ] + + +class CreateModelWeightSnapshotRequest(BaseModel): + model: Annotated[ + str, Field(description="Unique identifier of the model", title="Model") + ] + snapshot_uri: Annotated[ + str, + Field(description="Path to the model weight snapshot", title="Snapshot Uri"), + ] + + +class CreateLLMModelRequest(BaseModel): resources: Annotated[ dict[str, Any], Field(description="Resources allocated to the model", title="Resources"), @@ -1937,6 +3070,13 @@ class CreateLLMModelRequest(BaseModel): title="Environment Variables", ), ] = None + model_metadata: Annotated[ + dict[str, Any] | None, + Field( + description="Model metadata persisted into model_config", + title="Model Metadata", + ), + ] = None autoscaling_settings: Annotated[ UpdateAutoscalingSettings | None, Field( @@ -1946,6 +3086,7 @@ class CreateLLMModelRequest(BaseModel): "autoscaling_window": 600, "concurrency_target": None, "max_replica": 5, + "max_scale_down_rate": None, "min_replica": 1, "scale_down_delay": 300, "target_in_flight_tokens": None, @@ -1970,19 +3111,42 @@ class CreateLLMModelRequest(BaseModel): title="Metadata", ), ] = None + weights: Annotated[ + list[dict[str, Any]] | None, + Field( + description="Weight configurations for BDN model weight distribution", + examples=[ + [ + { + "mount_location": "/models/base", + "source": "hf://meta-llama/Llama-3-8B", + } + ] + ], + title="Weights", + ), + ] = None name: Annotated[str, Field(description="Name of the model", title="Name")] -class LLMModel(BaseModel): - id: Annotated[str, Field(description="Unique identifier of the model", title="Id")] - created_at: Annotated[ - AwareDatetime, +class LLMModelHandle(BaseModel): + model_id: Annotated[ + str, Field(description="Unique identifier of the model", title="Model Id") + ] + version_id: Annotated[ + str, + Field(description="Unique identifier of the model version", title="Version Id"), + ] + hostname: Annotated[ + str, Field(description="Hostname used to invoke the model", title="Hostname") + ] + instance_type_name: Annotated[ + str | None, Field( - description="Time the model was created in ISO 8601 format", - title="Created At", + description="Name of the instance type the model deployment is running on", + title="Instance Type Name", ), - ] - name: Annotated[str, Field(description="Name of the model", title="Name")] + ] = None class CreateLLMModelVersionRequest(BaseModel): @@ -2007,6 +3171,13 @@ class CreateLLMModelVersionRequest(BaseModel): title="Environment Variables", ), ] = None + model_metadata: Annotated[ + dict[str, Any] | None, + Field( + description="Model metadata persisted into model_config", + title="Model Metadata", + ), + ] = None autoscaling_settings: Annotated[ UpdateAutoscalingSettings | None, Field( @@ -2016,6 +3187,7 @@ class CreateLLMModelVersionRequest(BaseModel): "autoscaling_window": 600, "concurrency_target": None, "max_replica": 5, + "max_scale_down_rate": None, "min_replica": 1, "scale_down_delay": 300, "target_in_flight_tokens": None, @@ -2040,20 +3212,21 @@ class CreateLLMModelVersionRequest(BaseModel): title="Metadata", ), ] = None - - -class LLMModelVersion(BaseModel): - id: Annotated[ - str, Field(description="Unique identifier of the model version", title="Id") - ] - created_at: Annotated[ - AwareDatetime, + weights: Annotated[ + list[dict[str, Any]] | None, Field( - description="Time the model was created in ISO 8601 format", - title="Created At", + description="Weight configurations for BDN model weight distribution", + examples=[ + [ + { + "mount_location": "/models/base", + "source": "hf://meta-llama/Llama-3-8B", + } + ] + ], + title="Weights", ), - ] - name: Annotated[str, Field(description="Name of the model version", title="Name")] + ] = None class LibraryListing(BaseModel): @@ -2427,79 +3600,300 @@ class ModelApiItem(BaseModel): cached_input_tokens: Annotated[ int, Field( - description="Total cached input tokens for this model", - title="Cached Input Tokens", + description="Total cached input tokens for this model", + title="Cached Input Tokens", + ), + ] + daily: Annotated[ + list[DailyModelApiUsage] | None, + Field(description="Daily usage breakdown", title="Daily"), + ] = None + + +class Subtotal6(RootModel[str]): + model_config = ConfigDict( + regex_engine="python-re", + ) + root: Annotated[ + str, + Field( + description="Subtotal cost in dollars after applying credits used", + pattern="^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + title="Subtotal", + ), + ] + + +class ModelApisUsage(BaseModel): + subtotal: Annotated[ + float | Subtotal6, + Field( + description="Subtotal cost in dollars after applying credits used", + title="Subtotal", + ), + ] + credits_used: Annotated[ + float | CreditsUsed, + Field(description="Credits applied in dollars", title="Credits Used"), + ] + total: Annotated[ + float | Total, Field(description="Total cost in dollars", title="Total") + ] + breakdown: Annotated[ + list[ModelApiItem] | None, + Field(description="Per-model usage breakdown", title="Breakdown"), + ] = None + + +class Subtotal7(RootModel[str]): + model_config = ConfigDict( + regex_engine="python-re", + ) + root: Annotated[ + str, + Field( + description="Subtotal cost in dollars for this billable resource", + pattern="^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + title="Subtotal", + ), + ] + + +class Subtotal8(RootModel[str]): + model_config = ConfigDict( + regex_engine="python-re", + ) + root: Annotated[ + str, + Field( + description="Subtotal cost in dollars after applying credits used", + pattern="^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + title="Subtotal", + ), + ] + + +class UsageSummaryRequest(BaseModel): + start_date: Annotated[ + AwareDatetime, + Field( + description="Start date (ISO 8601, UTC). Earliest queryable: 2026-01-01.", + title="Start Date", + ), + ] + end_date: Annotated[ + AwareDatetime, + Field( + description="End date in ISO 8601 format (UTC). Date range cannot exceed 31 days.", + title="End Date", + ), + ] + + +class UserInfo(BaseModel): + user_id: Annotated[ + str, Field(description="Unique identifier for the user", title="User Id") + ] + email: Annotated[ + str | None, Field(description="Email address of the user", title="Email") + ] = None + name: Annotated[ + str | None, Field(description="Display name of the user", title="Name") + ] = None + workspace_name: Annotated[ + str | None, + Field(description="Name of the user's workspace", title="Workspace Name"), + ] = None + + +class Name1(RootModel[str]): + root: Annotated[ + str | None, + Field( + description="Optional display name for the group.", + examples=["Acme prod"], + max_length=255, + title="Name", + ), + ] = None + + +class GroupMetadata(BaseModel): + name: Annotated[ + Name1 | None, + Field( + description="Optional display name for the group.", + examples=["Acme prod"], + title="Name", + ), + ] = None + external_entity_id: Annotated[ + str, + Field( + description="External-system identifier for this group. Unique within the caller's org.", + examples=["cust_42"], + max_length=255, + min_length=1, + title="External Entity Id", + ), + ] + + +class LimitEnforcement(Enum): + CASCADING = "CASCADING" + INDEPENDENT = "INDEPENDENT" + + +class LimitType(Enum): + REQUEST = "REQUEST" + TOKEN = "TOKEN" + + +class PaginationResponse(BaseModel): + has_more: Annotated[ + bool, + Field( + description="Whether more items exist after this page.", title="Has More" + ), + ] + cursor: Annotated[ + str | None, + Field( + description="Opaque cursor to pass into the next request. Null when there is no next page.", + title="Cursor", + ), + ] = None + + +class RateLimitUnit(Enum): + SECOND = "SECOND" + MINUTE = "MINUTE" + + +class RateLimit(BaseModel): + type: Annotated[ + LimitType, + Field(description="The type of the rate limit", examples=["TOKEN", "REQUEST"]), + ] + unit: Annotated[ + RateLimitUnit, + Field(description="The unit of the rate limit", examples=["SECOND", "MINUTE"]), + ] + threshold: Annotated[ + int, + Field( + description="The threshold for the rate limit", + examples=[1000, 50000], + ge=1, + title="Threshold", + ), + ] + + +class UsageLimitUnit(Enum): + DAY = "DAY" + + +class UsageLimit(BaseModel): + type: Annotated[ + LimitType, + Field(description="The type of the usage limit", examples=["REQUEST", "TOKEN"]), + ] + unit: Annotated[ + UsageLimitUnit, + Field(description="The unit of the usage limit", examples=["DAY"]), + ] + threshold: Annotated[ + int, + Field( + description="The threshold for the usage limit", + examples=[10000000], + ge=1, + title="Threshold", + ), + ] + + +class UpdateGroupMetadata(BaseModel): + name: Annotated[ + Name1 | None, + Field( + description="Optional display name for the group.", + examples=["Acme prod"], + title="Name", ), + ] = None + + +class GatewayKeyInfo(BaseModel): + prefix: Annotated[ + str, Field(description="The prefix of the Model API key.", title="Prefix") ] - daily: Annotated[ - list[DailyModelApiUsage] | None, - Field(description="Daily usage breakdown", title="Daily"), + name: Annotated[ + str | None, Field(description="Optional display name.", title="Name") ] = None -class Subtotal6(RootModel[str]): - model_config = ConfigDict( - regex_engine="python-re", - ) - root: Annotated[ - str, - Field( - description="Subtotal cost in dollars after applying credits used", - pattern="^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - title="Subtotal", - ), +class KeysForGroupResponse(BaseModel): + items: Annotated[ + list[GatewayKeyInfo], Field(description="Items in this page.", title="Items") + ] + pagination: Annotated[ + PaginationResponse, Field(description="Pagination metadata for the page.") ] -class ModelApisUsage(BaseModel): - subtotal: Annotated[ - float | Subtotal6, +class CreateApiKeyForGroupRequest(BaseModel): + name: Annotated[ + str | None, Field( - description="Subtotal cost in dollars after applying credits used", - title="Subtotal", + description="Optional display name for the new key.", + examples=["prod-key-1"], + title="Name", ), - ] - credits_used: Annotated[ - float | CreditsUsed, - Field(description="Credits applied in dollars", title="Credits Used"), - ] - total: Annotated[ - float | Total, Field(description="Total cost in dollars", title="Total") - ] - breakdown: Annotated[ - list[ModelApiItem] | None, - Field(description="Per-model usage breakdown", title="Breakdown"), ] = None -class Subtotal7(RootModel[str]): - model_config = ConfigDict( - regex_engine="python-re", - ) - root: Annotated[ +class CreateApiKeyForGroupResponse(BaseModel): + api_key: Annotated[ str, Field( - description="Subtotal cost in dollars for this billable resource", - pattern="^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - title="Subtotal", + description="Plaintext key string, returned exactly once.", title="Api Key" ), ] + prefix: Annotated[ + str, Field(description="Key prefix (the part before the dot).", title="Prefix") + ] + name: Annotated[ + str | None, Field(description="Display name of the key.", title="Name") + ] = None -class Subtotal8(RootModel[str]): - model_config = ConfigDict( - regex_engine="python-re", - ) - root: Annotated[ +class RegisterAPIKeyRequest(BaseModel): + name: Annotated[ + str | None, + Field( + description="Optional name for the Model API key", + examples=["my-model-api-key"], + title="Name", + ), + ] = None + key: Annotated[ str, Field( - description="Subtotal cost in dollars after applying credits used", - pattern="^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - title="Subtotal", + description="Value of the API key to register", + examples=["my-secure-api-key-value"], + title="Key", ), ] +class RegisterAPIKeyResponse(BaseModel): + ok: Annotated[ + bool, Field(description="Whether the registration was successful", title="Ok") + ] + + class RollingDeployConfig(BaseModel): rolling_deploy_strategy: Annotated[ RollingDeployStrategy | None, @@ -2604,6 +3998,7 @@ class CreateEnvironmentRequest(BaseModel): "autoscaling_window": 800, "concurrency_target": 3, "max_replica": 2, + "max_scale_down_rate": None, "min_replica": 1, "scale_down_delay": 60, "target_in_flight_tokens": None, @@ -2643,6 +4038,7 @@ class UpdateEnvironmentRequest(BaseModel): "autoscaling_window": 800, "concurrency_target": 3, "max_replica": 2, + "max_scale_down_rate": None, "min_replica": 1, "scale_down_delay": 60, "target_in_flight_tokens": None, @@ -2740,6 +4136,7 @@ class CreateChainEnvironmentRequest(BaseModel): "autoscaling_window": 800, "concurrency_target": 4, "max_replica": 3, + "max_scale_down_rate": None, "min_replica": 2, "scale_down_delay": 63, "target_in_flight_tokens": None, @@ -2753,6 +4150,7 @@ class CreateChainEnvironmentRequest(BaseModel): "autoscaling_window": None, "concurrency_target": None, "max_replica": 3, + "max_scale_down_rate": None, "min_replica": 3, "scale_down_delay": None, "target_in_flight_tokens": None, @@ -2863,6 +4261,13 @@ class TrainingJob(BaseModel): CheckpointSyncStatus | None, Field(description="Checkpoint sync status of the training job."), ] = None + priority: Annotated[ + int | None, + Field( + description="Queue priority. Higher values are dequeued first. NULL is treated as 0.", + title="Priority", + ), + ] = 0 user: Annotated[ User | None, Field(description="The user who created the training job.") ] = None @@ -3163,6 +4568,77 @@ class SearchTrainingJobsResponse(BaseModel): ] +class LoopsRun(BaseModel): + id: Annotated[str, Field(description="The run ID.", title="Id")] + session_id: Annotated[ + str, + Field(description="The session ID this run belongs to.", title="Session Id"), + ] + base_model: Annotated[ + str, + Field( + description="The HuggingFace base model the run is fine-tuning.", + title="Base Model", + ), + ] + base_url: Annotated[str, Field(description="The run's base URL.", title="Base Url")] + created_at: Annotated[ + AwareDatetime, + Field( + description="Time the run was created in ISO 8601 format", + title="Created At", + ), + ] + sampler: Annotated[ + LoopsSampler, Field(description="The sampler bound to this run.") + ] + + +class ListLoopsRunsResponse(BaseModel): + runs: Annotated[list[LoopsRun], Field(description="List of runs.", title="Runs")] + + +class CreateLoopsRunResponse(BaseModel): + run: LoopsRun + + +class GetLoopsRunResponse(BaseModel): + run: Annotated[ + LoopsRun, Field(description="The Loops run with its associated sampler.") + ] + + +class LoopsDeploymentNodeMetrics(BaseModel): + node_id: Annotated[ + str, Field(description="Identifier for the node.", title="Node Id") + ] + gpu_memory_usage_bytes: Annotated[ + dict[str, list[TrainingJobMetric]], + Field( + description="GPU memory usage bytes per GPU rank.", + title="Gpu Memory Usage Bytes", + ), + ] + gpu_utilization: Annotated[ + dict[str, list[TrainingJobMetric]], + Field( + description="Fractional GPU utilization per GPU rank.", + title="Gpu Utilization", + ), + ] + cpu_usage: Annotated[ + list[TrainingJobMetric], + Field(description="CPU usage in cores.", title="Cpu Usage"), + ] + cpu_memory_usage_bytes: Annotated[ + list[TrainingJobMetric], + Field(description="CPU memory usage bytes.", title="Cpu Memory Usage Bytes"), + ] + ephemeral_storage: Annotated[ + StorageMetrics, Field(description="Ephemeral storage usage.") + ] + + class BillableResource(BaseModel): id: Annotated[ str, Field(description="Unique identifier of the resource", title="Id") @@ -3176,6 +4652,13 @@ class BillableResource(BaseModel): name: Annotated[ str | None, Field(description="Name of the resource", title="Name") ] = None + model_name: Annotated[ + str | None, + Field( + description="Name of the parent resource (e.g., model name for model deployments, training project name for training jobs)", + title="Model Name", + ), + ] = None is_deleted: Annotated[ bool, Field( @@ -3307,6 +4790,109 @@ class UsageSummary(BaseModel): ] = None +class EffectiveRateLimit(BaseModel): + type: Annotated[ + LimitType, + Field(description="The type of the rate limit", examples=["TOKEN", "REQUEST"]), + ] + unit: Annotated[ + RateLimitUnit, + Field(description="The unit of the rate limit", examples=["SECOND", "MINUTE"]), + ] + threshold: Annotated[ + int, + Field( + description="The threshold for the rate limit", + examples=[1000, 50000], + ge=1, + title="Threshold", + ), + ] + source_group: Annotated[ + str, + Field( + description="ID of the group in the hierarchy this limit is anchored to.", + examples=["abc123"], + title="Source Group", + ), + ] + + +class EffectiveUsageLimit(BaseModel): + type: Annotated[ + LimitType, + Field(description="The type of the usage limit", examples=["REQUEST", "TOKEN"]), + ] + unit: Annotated[ + UsageLimitUnit, + Field(description="The unit of the usage limit", examples=["DAY"]), + ] + threshold: Annotated[ + int, + Field( + description="The threshold for the usage limit", + examples=[10000000], + ge=1, + title="Threshold", + ), + ] + source_group: Annotated[ + str, + Field( + description="ID of the group in the hierarchy this limit is anchored to.", + examples=["abc123"], + title="Source Group", + ), + ] + + +class GroupHierarchy(BaseModel): + limit_enforcement: Annotated[ + LimitEnforcement | None, Field(examples=["CASCADING", "INDEPENDENT"]) + ] = LimitEnforcement.INDEPENDENT + parent_group_id: Annotated[ + str | None, Field(examples=["abc123"], title="Parent Group Id") + ] = None + + +class ModelConfig(BaseModel): + slug: Annotated[str, Field(description="Shared endpoint slug.", title="Slug")] + rate_limits: Annotated[list[RateLimit] | None, Field(title="Rate Limits")] = None + usage_limits: Annotated[list[UsageLimit] | None, Field(title="Usage Limits")] = None + + +class CreateGroupRequest(BaseModel): + metadata: Annotated[ + GroupMetadata, Field(description="Group identity + display metadata.") + ] + models: Annotated[ + list[ModelConfig], + Field( + description="Per-model rate and usage limit configuration. Defines the group's complete model set. Must be non-empty.", + title="Models", + ), + ] + hierarchy: Annotated[ + GroupHierarchy, + Field( + description="Parent linkage and limit enforcement mode. Immutable after creation." + ), + ] + + +class UpdateGroupRequest(BaseModel): + metadata: Annotated[ + UpdateGroupMetadata | None, + Field(description="Mutable group metadata.", examples=[{"name": "Acme Prod"}]), + ] = None + models: Annotated[ + list[ModelConfig] | None, + Field( + description="Per-model rate and usage limit configuration.", title="Models" + ), + ] = None + + class PromotionSettings(BaseModel): redeploy_on_promotion: Annotated[ bool | None, @@ -3425,6 +5011,123 @@ class DockerAuth(BaseModel): ] = None +class LoopsDeploymentMetrics(BaseModel): + inference_volume: Annotated[ + list[TrainingJobMetric], + Field( + description="Number of inference requests per unit time (requests per second).", + title="Inference Volume", + ), + ] + concurrent_requests: Annotated[ + list[TrainingJobMetric], + Field( + description="Number of in-progress concurrent inference requests. Source: the queue-proxy ``revision_queue_depth`` gauge on ``http-usermetric``.", + title="Concurrent Requests", + ), + ] + response_time_stats: Annotated[ + list[ResponseTimeDatapoint], + Field( + description="Percentiles of the response time distribution.", + title="Response Time Stats", + ), + ] + inference_volume_by_status: Annotated[ + list[InferenceVolumeByStatusDatapoint], + Field( + description="Request rate split by response code class.", + title="Inference Volume By Status", + ), + ] + gpu_memory_usage_bytes: Annotated[ + dict[str, list[TrainingJobMetric]], + Field( + description="Leader-pod GPU memory bytes per GPU rank.", + title="Gpu Memory Usage Bytes", + ), + ] + gpu_utilization: Annotated[ + dict[str, list[TrainingJobMetric]], + Field( + description="Leader-pod fractional GPU utilization per GPU rank.", + title="Gpu Utilization", + ), + ] + cpu_usage: Annotated[ + list[TrainingJobMetric], + Field(description="Leader-pod CPU usage in cores.", title="Cpu Usage"), + ] + cpu_memory_usage_bytes: Annotated[ + list[TrainingJobMetric], + Field( + description="Leader-pod CPU memory usage bytes.", + title="Cpu Memory Usage Bytes", + ), + ] + ephemeral_storage: Annotated[ + StorageMetrics, Field(description="Leader-pod ephemeral storage usage.") + ] + per_node_metrics: Annotated[ + list[LoopsDeploymentNodeMetrics], + Field( + description="Per-node compute breakdown for multinode trainer deployments.", + title="Per Node Metrics", + ), + ] + + +class GetLoopsDeploymentMetricsResponse(BaseModel): + deployment_id: Annotated[ + str, Field(description="The trainer deployment ID.", title="Deployment Id") + ] + metrics: Annotated[ + LoopsDeploymentMetrics, Field(description="Metrics for the deployment.") + ] + + +class EffectiveModelConfig(BaseModel): + slug: Annotated[str, Field(description="Shared endpoint slug.", title="Slug")] + rate_limits: Annotated[ + list[EffectiveRateLimit] | None, Field(title="Rate Limits") + ] = None + usage_limits: Annotated[ + list[EffectiveUsageLimit] | None, Field(title="Usage Limits") + ] = None + + +class Group(BaseModel): + id: Annotated[ + str, Field(description="Internal Baseten ID for the group.", title="Id") + ] + metadata: Annotated[ + GroupMetadata, Field(description="Group identity + display metadata.") + ] + models: Annotated[list[ModelConfig] | None, Field(title="Models")] = None + effective_models: Annotated[ + list[EffectiveModelConfig] | None, Field(title="Effective Models") + ] = None + hierarchy: Annotated[ + GroupHierarchy, + Field( + description="Parent linkage and limit enforcement mode. Parent is null for root groups." + ), + ] + created_at: Annotated[ + AwareDatetime, + Field(description="When this group was created.", title="Created At"), + ] + + +class GroupsResponse(BaseModel): + items: Annotated[ + list[Group], Field(description="Items in this page.", title="Items") + ] + pagination: Annotated[ + PaginationResponse, Field(description="Pagination metadata for the page.") + ] + + class Environment(BaseModel): name: Annotated[str, Field(description="Name of the environment", title="Name")] created_at: Annotated[ @@ -3556,6 +5259,22 @@ class CreateTrainingJob(BaseModel): title="Weights", ), ] = None + enable_baseten_workdir: Annotated[ + bool | None, + Field( + description="When enabled, uses /b10/workspace as the working directory instead of the image WORKDIR.", + examples=[False, True], + title="Enable Baseten Workdir", + ), + ] = False + priority: Annotated[ + int | None, + Field( + description="Queue priority. Higher values are dequeued first. Defaults to 0.", + examples=[0, 10, 100], + title="Priority", + ), + ] = None class CreateTrainingJobRequest(BaseModel): diff --git a/baseten/client/modelconfig/_models.py b/baseten/client/modelconfig/_models.py index f85ab6c..9c66a10 100644 --- a/baseten/client/modelconfig/_models.py +++ b/baseten/client/modelconfig/_models.py @@ -536,6 +536,13 @@ class CheckpointList(BaseModel): artifact_references: Annotated[ list[TrainingArtifactReference] | None, Field(title="Artifact References") ] = None + loops_checkpoint_ids: Annotated[ + list[str] | None, + Field( + description="Loops checkpoint IDs to deploy. Mutually exclusive with artifact_references.", + title="Loops Checkpoint Ids", + ), + ] = None class CheckpointRepository(BaseModel): diff --git a/pyproject.toml b/pyproject.toml index 651e247..a7914f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,13 +12,20 @@ dependencies = [ [dependency-groups] dev = [ - "datamodel-code-generator>=0.55.0", + # Capped at <0.56: starting in later releases (confirmed in 0.59.1), + # complex object/scalar defaults are emitted as raw literals assigned to + # model-typed fields (e.g. `x: Model | None = {..dict..}`, `Limit | None = + # 500`) with validate_default=True, instead of the previous typed + # `default_factory=lambda: Model.model_validate(...)`. Pydantic validates + # these at runtime, but the literal does not match the annotation, so ty + # rejects the generated code. No flag restores the old emission. + "datamodel-code-generator>=0.55.0,<0.56", "poethepoet>=0.35", "pytest>=9.0.2", "pytest-asyncio>=1.3.0", "pyyaml>=6.0", - "ruff>=0.15.8", - "ty>=0.0.33", + "ruff>=0.15.16", + "ty>=0.0.43", ] [project.urls] diff --git a/scripts/apigen/__main__.py b/scripts/apigen/__main__.py index 97d98fd..b576167 100644 --- a/scripts/apigen/__main__.py +++ b/scripts/apigen/__main__.py @@ -64,6 +64,7 @@ def generate_api(spec_file: Path, out_dir: Path, display_name: str) -> None: models_file = out_dir / "_models.py" run_datamodel_codegen(preprocessed, models_file) + models_file.write_text(postprocess_models(models_file.read_text())) print(f" -> {models_file}") client_file = out_dir / "_client.py" diff --git a/scripts/apigen/specs/config.schema.json b/scripts/apigen/specs/config.schema.json index d5e3f67..54744c2 100644 --- a/scripts/apigen/specs/config.schema.json +++ b/scripts/apigen/specs/config.schema.json @@ -180,6 +180,14 @@ }, "title": "Artifact References", "type": "array" + }, + "loops_checkpoint_ids": { + "description": "Loops checkpoint IDs to deploy. Mutually exclusive with artifact_references.", + "items": { + "type": "string" + }, + "title": "Loops Checkpoint Ids", + "type": "array" } }, "title": "CheckpointList", diff --git a/scripts/apigen/specs/inference.json b/scripts/apigen/specs/inference.json index a03a023..d4258a4 100644 --- a/scripts/apigen/specs/inference.json +++ b/scripts/apigen/specs/inference.json @@ -1488,4 +1488,4 @@ "description": "Endpoints that use bare paths (e.g. `/predict`, `/wake`). Only available on regional hostnames (`model-{model_id}-{env_name}.api.baseten.co`), where the environment is determined by the hostname." } ] -} +} \ No newline at end of file diff --git a/scripts/apigen/specs/management.json b/scripts/apigen/specs/management.json index 59b8662..19d61f9 100644 --- a/scripts/apigen/specs/management.json +++ b/scripts/apigen/specs/management.json @@ -17,7 +17,7 @@ "paths": { "/v1/secrets": { "get": { - "summary": "Gets all secrets", + "summary": "Gets all secrets (metadata only, no plain text keys)", "x-codeSamples": [ { "lang": "bash", @@ -53,7 +53,7 @@ "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/secrets\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'my_secret', 'value': 'my_secret_value'}\n)\n\nprint(response.text)" } ], - "description": "Creates a new secret or updates an existing secret if one with the provided name already exists. The name and creation date of the created or updated secret is returned.", + "description": "Creates or updates a secret by name. Scoped to the caller's primary team \u2014 use the team-scoped variant to target a specific team.", "requestBody": { "content": { "application/json": { @@ -78,9 +78,42 @@ } } }, + "/v1/secrets/{secret_name}": { + "delete": { + "summary": "Deletes a secret by name", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/secrets/{secret_name} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/secrets/{secret_name}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Deletes a secret by name in the specified team, or in the caller's organization default team when no team is specified.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SecretTombstoneV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/secret_name" + } + ] + }, "/v1/teams/{team_id}/secrets": { "get": { - "summary": "Gets all secrets for a team", + "summary": "Gets all secrets for a team (metadata only, no plain text keys)", "x-codeSamples": [ { "lang": "bash", @@ -146,6 +179,42 @@ } } }, + "/v1/teams/{team_id}/secrets/{secret_name}": { + "delete": { + "summary": "Deletes a secret by name", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/teams/{team_id}/secrets/{secret_name} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/teams/{team_id}/secrets/{secret_name}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Deletes a secret by name in the specified team, or in the caller's organization default team when no team is specified.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SecretTombstoneV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/team_id" + }, + { + "$ref": "#/components/parameters/secret_name" + } + ] + }, "/v1/teams": { "get": { "summary": "Lists all teams", @@ -228,6 +297,43 @@ } } }, + "/v1/prepare_model_upload": { + "post": { + "summary": "Validates a model push payload and issues upload credentials", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/prepare_model_upload \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"deployment\": {\n \"config\": null,\n \"raw_config\": null,\n \"user_env\": null,\n \"environment_name\": null,\n \"preserve_env_instance_type\": null,\n \"deploy_timeout_minutes\": null,\n \"deployment_name\": null,\n \"labels\": null\n },\n \"name\": null,\n \"team_id\": null,\n \"model_id\": null,\n \"dry_run\": null,\n \"is_development\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/prepare_model_upload\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'deployment': {'config': None, 'raw_config': None, 'user_env': None, 'environment_name': None, 'preserve_env_instance_type': None, 'deploy_timeout_minutes': None, 'deployment_name': None, 'labels': None}, 'name': None, 'team_id': None, 'model_id': None, 'dry_run': None, 'is_development': None}\n)\n\nprint(response.text)" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PrepareModelUploadRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PrepareModelUploadResponseV1" + } + } + } + } + } + } + }, "/v1/models": { "get": { "summary": "Gets all models", @@ -253,6 +359,110 @@ } } } + }, + "post": { + "summary": "Creates a new model from a source", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/models \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"source\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'source': None}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a new model in the caller's organization. The `source` field selects how the model is constructed (currently: `library_listing` \u2014 fork an accessible listing from `GET /v1/library_models`). The deployment isn't instantly ready \u2014 poll GET endpoint until status is ACTIVE.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateModelRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreatedModelDeploymentV1" + } + } + } + } + } + } + }, + "/v1/teams/{team_id}/models": { + "get": { + "summary": "Gets all models", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/teams/{team_id}/models \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/teams/{team_id}/models\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ModelsV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/team_id" + } + ], + "post": { + "summary": "Creates a new model from a source", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/teams/{team_id}/models \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"source\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/teams/{team_id}/models\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'source': None}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a new model in the caller's organization. The `source` field selects how the model is constructed (currently: `library_listing` \u2014 fork an accessible listing from `GET /v1/library_models`). The deployment isn't instantly ready \u2014 poll GET endpoint until status is ACTIVE.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateModelRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreatedModelDeploymentV1" + } + } + } + } + } } }, "/v1/models/{model_id}": { @@ -342,7 +552,42 @@ { "$ref": "#/components/parameters/model_id" } - ] + ], + "post": { + "summary": "Adds a new deployment to a model", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"source\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'source': None}\n)\n\nprint(response.text)" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateModelDeploymentRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreatedModelDeploymentV1" + } + } + } + } + } + } }, "/v1/models/{model_id}/deployments/development": { "get": { @@ -478,11 +723,11 @@ "x-codeSamples": [ { "lang": "bash", - "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/development/autoscaling_settings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"min_replica\": 0,\n \"max_replica\": 7,\n \"autoscaling_window\": 600,\n \"scale_down_delay\": 120,\n \"concurrency_target\": 2,\n \"target_utilization_percentage\": 70,\n \"target_in_flight_tokens\": 40000\n}'" + "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/development/autoscaling_settings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"min_replica\": 0,\n \"max_replica\": 7,\n \"autoscaling_window\": 600,\n \"scale_down_delay\": 120,\n \"concurrency_target\": 2,\n \"target_utilization_percentage\": 70,\n \"target_in_flight_tokens\": 40000,\n \"max_scale_down_rate\": 2.0\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/development/autoscaling_settings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'min_replica': 0, 'max_replica': 7, 'autoscaling_window': 600, 'scale_down_delay': 120, 'concurrency_target': 2, 'target_utilization_percentage': 70, 'target_in_flight_tokens': 40000}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/development/autoscaling_settings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'min_replica': 0, 'max_replica': 7, 'autoscaling_window': 600, 'scale_down_delay': 120, 'concurrency_target': 2, 'target_utilization_percentage': 70, 'target_in_flight_tokens': 40000, 'max_scale_down_rate': 2.0}\n)\n\nprint(response.text)" } ], "description": "Updates a development deployment's autoscaling settings and returns the update status.", @@ -521,11 +766,11 @@ "x-codeSamples": [ { "lang": "bash", - "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/production/autoscaling_settings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"min_replica\": 0,\n \"max_replica\": 7,\n \"autoscaling_window\": 600,\n \"scale_down_delay\": 120,\n \"concurrency_target\": 2,\n \"target_utilization_percentage\": 70,\n \"target_in_flight_tokens\": 40000\n}'" + "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/production/autoscaling_settings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"min_replica\": 0,\n \"max_replica\": 7,\n \"autoscaling_window\": 600,\n \"scale_down_delay\": 120,\n \"concurrency_target\": 2,\n \"target_utilization_percentage\": 70,\n \"target_in_flight_tokens\": 40000,\n \"max_scale_down_rate\": 2.0\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/production/autoscaling_settings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'min_replica': 0, 'max_replica': 7, 'autoscaling_window': 600, 'scale_down_delay': 120, 'concurrency_target': 2, 'target_utilization_percentage': 70, 'target_in_flight_tokens': 40000}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/production/autoscaling_settings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'min_replica': 0, 'max_replica': 7, 'autoscaling_window': 600, 'scale_down_delay': 120, 'concurrency_target': 2, 'target_utilization_percentage': 70, 'target_in_flight_tokens': 40000, 'max_scale_down_rate': 2.0}\n)\n\nprint(response.text)" } ], "description": "Updates a production deployment's autoscaling settings and returns the update status.", @@ -564,11 +809,11 @@ "x-codeSamples": [ { "lang": "bash", - "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/autoscaling_settings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"min_replica\": 0,\n \"max_replica\": 7,\n \"autoscaling_window\": 600,\n \"scale_down_delay\": 120,\n \"concurrency_target\": 2,\n \"target_utilization_percentage\": 70,\n \"target_in_flight_tokens\": 40000\n}'" + "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/autoscaling_settings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"min_replica\": 0,\n \"max_replica\": 7,\n \"autoscaling_window\": 600,\n \"scale_down_delay\": 120,\n \"concurrency_target\": 2,\n \"target_utilization_percentage\": 70,\n \"target_in_flight_tokens\": 40000,\n \"max_scale_down_rate\": 2.0\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/autoscaling_settings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'min_replica': 0, 'max_replica': 7, 'autoscaling_window': 600, 'scale_down_delay': 120, 'concurrency_target': 2, 'target_utilization_percentage': 70, 'target_in_flight_tokens': 40000}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/autoscaling_settings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'min_replica': 0, 'max_replica': 7, 'autoscaling_window': 600, 'scale_down_delay': 120, 'concurrency_target': 2, 'target_utilization_percentage': 70, 'target_in_flight_tokens': 40000, 'max_scale_down_rate': 2.0}\n)\n\nprint(response.text)" } ], "description": "Updates a deployment's autoscaling settings and returns the update status.", @@ -999,37 +1244,27 @@ } ] }, - "/v1/models/{model_id}/deployments/{deployment_id}/logs": { - "post": { - "summary": "Gets the logs for a model deployment.", + "/v1/models/{model_id}/deployments/{deployment_id}/download": { + "get": { + "summary": "Gets a presigned download URL for a deployment's truss", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/logs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"start_epoch_millis\": null,\n \"end_epoch_millis\": null,\n \"direction\": null,\n \"limit\": null\n}'" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/download \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/logs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'start_epoch_millis': None, 'end_epoch_millis': None, 'direction': None, 'limit': None}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/download\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Gets all the logs for a model deployment in the given time range.", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GetDeploymentLogsRequestV1" - } - } - }, - "required": true - }, + "description": "Gets a presigned URL to download the truss tar file for a deployment.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/GetLogsResponseV1" + "$ref": "#/components/schemas/DownloadDeploymentResponseV1" } } } @@ -1045,27 +1280,39 @@ } ] }, - "/v1/models/{model_id}/deployments/{deployment_id}/replicas/{replica_id}": { - "delete": { - "summary": "Terminates a replica in a deployment", + "/v1/models/{model_id}/deployments/{deployment_id}/config": { + "get": { + "summary": "Gets a deployment's config", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/replicas/{replica_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/config \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/replicas/{replica_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/config\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Returns the deployment's config. `output_format` query param picks the shape: 'raw' (config.yaml text), 'parsed' (dict with defaults), or 'both' (default).", + "parameters": [ + { + "name": "output_format", + "in": "query", + "required": false, + "description": "'raw': verbatim config.yaml with comments \u2014 not available for deployments created before 2026-04-30. 'parsed': dict with server-side defaults applied \u2014 always available. 'both': both fields populated.", + "schema": { + "$ref": "#/components/schemas/DeploymentConfigOutputFormat", + "default": "both" + } } ], - "description": "Terminates a deployment replica and returns the termination status.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/TerminateReplicaResponseV1" + "$ref": "#/components/schemas/DeploymentConfigResponseV1" } } } @@ -1078,35 +1325,266 @@ }, { "$ref": "#/components/parameters/deployment_id" - }, - { - "$ref": "#/components/parameters/replica_id" } ] }, - "/v1/models/{model_id}/environments": { + "/v1/models/{model_id}/deployments/{deployment_id}/logs": { "get": { - "summary": "Get all environments", + "summary": "Gets the logs for a model deployment.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/models/{model_id}/environments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/logs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/environments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/logs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Gets all environments for a given model", - "responses": { - "200": { - "description": "", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EnvironmentsV1" - } - } + "description": "Gets all the logs for a model deployment in the given time range.", + "parameters": [ + { + "name": "start_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to start fetching logs", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Start Epoch Millis" + } + }, + { + "name": "end_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to end fetching logs", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "End Epoch Millis" + } + }, + { + "name": "direction", + "in": "query", + "required": false, + "description": "Sort order for logs", + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/SortOrderV1" + }, + { + "type": "null" + } + ], + "default": null + } + }, + { + "name": "limit", + "in": "query", + "required": false, + "description": "Limit of logs to fetch in a single request", + "schema": { + "anyOf": [ + { + "maximum": 1000, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 500, + "title": "Limit" + } + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetLogsResponseV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/model_id" + }, + { + "$ref": "#/components/parameters/deployment_id" + } + ], + "post": { + "summary": "Gets the logs for a model deployment (deprecated; use GET).", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/logs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"start_epoch_millis\": null,\n \"end_epoch_millis\": null,\n \"direction\": null,\n \"limit\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/logs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'start_epoch_millis': None, 'end_epoch_millis': None, 'direction': None, 'limit': None}\n)\n\nprint(response.text)" + } + ], + "description": "Use the GET form on this path instead.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetDeploymentLogsRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetLogsResponseV1" + } + } + } + } + }, + "x-stability": "deprecated", + "deprecated": true + } + }, + "/v1/models/{model_id}/deployments/{deployment_id}/replicas/{replica_id}": { + "delete": { + "summary": "Terminates a replica in a deployment", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/replicas/{replica_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/replicas/{replica_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Terminates a deployment replica and returns the termination status.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TerminateReplicaResponseV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/model_id" + }, + { + "$ref": "#/components/parameters/deployment_id" + }, + { + "$ref": "#/components/parameters/replica_id" + } + ] + }, + "/v1/models/{model_id}/deployments/{deployment_id}/ssh/sign": { + "post": { + "summary": "Sign an SSH certificate for an inference model.", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/ssh/sign \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"public_key\": null,\n \"replica_id\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/deployments/{deployment_id}/ssh/sign\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'public_key': None, 'replica_id': None}\n)\n\nprint(response.text)" + } + ], + "description": "Signs a short-lived SSH certificate granting access to a running inference model pod. Returns the signed SSH certificate, a JWT token for SSH proxy authentication, the proxy address to connect through, and the certificate expiry time.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SignSSHCertificateRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SignSSHCertificateResponseV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/model_id" + }, + { + "$ref": "#/components/parameters/deployment_id" + } + ] + }, + "/v1/models/{model_id}/environments": { + "get": { + "summary": "Get all environments", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/models/{model_id}/environments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/environments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Gets all environments for a given model", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EnvironmentsV1" + } + } } } } @@ -1121,11 +1599,11 @@ "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/models/{model_id}/environments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"staging\",\n \"autoscaling_settings\": {\n \"autoscaling_window\": 800,\n \"concurrency_target\": 3,\n \"max_replica\": 2,\n \"min_replica\": 1,\n \"scale_down_delay\": 60,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"promotion_settings\": {\n \"promotion_cleanup_strategy\": null,\n \"ramp_up_duration_seconds\": 600,\n \"ramp_up_while_promoting\": true,\n \"redeploy_on_promotion\": true,\n \"rolling_deploy\": true,\n \"rolling_deploy_config\": null\n }\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/models/{model_id}/environments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"staging\",\n \"autoscaling_settings\": {\n \"autoscaling_window\": 800,\n \"concurrency_target\": 3,\n \"max_replica\": 2,\n \"max_scale_down_rate\": null,\n \"min_replica\": 1,\n \"scale_down_delay\": 60,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"promotion_settings\": {\n \"promotion_cleanup_strategy\": null,\n \"ramp_up_duration_seconds\": 600,\n \"ramp_up_while_promoting\": true,\n \"redeploy_on_promotion\": true,\n \"rolling_deploy\": true,\n \"rolling_deploy_config\": null\n }\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/environments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'staging', 'autoscaling_settings': {'autoscaling_window': 800, 'concurrency_target': 3, 'max_replica': 2, 'min_replica': 1, 'scale_down_delay': 60, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'promotion_settings': {'promotion_cleanup_strategy': None, 'ramp_up_duration_seconds': 600, 'ramp_up_while_promoting': True, 'redeploy_on_promotion': True, 'rolling_deploy': True, 'rolling_deploy_config': None}}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/environments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'staging', 'autoscaling_settings': {'autoscaling_window': 800, 'concurrency_target': 3, 'max_replica': 2, 'max_scale_down_rate': None, 'min_replica': 1, 'scale_down_delay': 60, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'promotion_settings': {'promotion_cleanup_strategy': None, 'ramp_up_duration_seconds': 600, 'ramp_up_while_promoting': True, 'redeploy_on_promotion': True, 'rolling_deploy': True, 'rolling_deploy_config': None}}\n)\n\nprint(response.text)" } ], "description": "Creates an environment for the specified model and returns the environment.", @@ -1193,14 +1671,14 @@ "x-codeSamples": [ { "lang": "bash", - "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/models/{model_id}/environments/{env_name} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"autoscaling_settings\": {\n \"autoscaling_window\": 800,\n \"concurrency_target\": 3,\n \"max_replica\": 2,\n \"min_replica\": 1,\n \"scale_down_delay\": 60,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"promotion_settings\": {\n \"promotion_cleanup_strategy\": null,\n \"ramp_up_duration_seconds\": 600,\n \"ramp_up_while_promoting\": true,\n \"redeploy_on_promotion\": true,\n \"rolling_deploy\": null,\n \"rolling_deploy_config\": null\n }\n}'" + "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/models/{model_id}/environments/{env_name} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"autoscaling_settings\": {\n \"autoscaling_window\": 800,\n \"concurrency_target\": 3,\n \"max_replica\": 2,\n \"max_scale_down_rate\": null,\n \"min_replica\": 1,\n \"scale_down_delay\": 60,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"promotion_settings\": {\n \"promotion_cleanup_strategy\": null,\n \"ramp_up_duration_seconds\": 600,\n \"ramp_up_while_promoting\": true,\n \"redeploy_on_promotion\": true,\n \"rolling_deploy\": null,\n \"rolling_deploy_config\": null\n }\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/environments/{env_name}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'autoscaling_settings': {'autoscaling_window': 800, 'concurrency_target': 3, 'max_replica': 2, 'min_replica': 1, 'scale_down_delay': 60, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'promotion_settings': {'promotion_cleanup_strategy': None, 'ramp_up_duration_seconds': 600, 'ramp_up_while_promoting': True, 'redeploy_on_promotion': True, 'rolling_deploy': None, 'rolling_deploy_config': None}}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/models/{model_id}/environments/{env_name}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'autoscaling_settings': {'autoscaling_window': 800, 'concurrency_target': 3, 'max_replica': 2, 'max_scale_down_rate': None, 'min_replica': 1, 'scale_down_delay': 60, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'promotion_settings': {'promotion_cleanup_strategy': None, 'ramp_up_duration_seconds': 600, 'ramp_up_while_promoting': True, 'redeploy_on_promotion': True, 'rolling_deploy': None, 'rolling_deploy_config': None}}\n)\n\nprint(response.text)" } ], - "description": "Updates an environment's settings and returns the updated environment.", + "description": "Asynchronously updates an environment's settings. Poll the GET endpoint for the applied state.", "requestBody": { "content": { "application/json": { @@ -1735,27 +2213,102 @@ } ] }, - "/v1/chains/{chain_id}/environments": { + "/v1/chains/{chain_id}/deployments/{chain_deployment_id}/chainlets/{chainlet_id}/logs": { "get": { - "summary": "Get all chain environments", + "summary": "Gets the logs for a chainlet within a chain deployment.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/chains/{chain_id}/environments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/chains/{chain_id}/deployments/{chain_deployment_id}/chainlets/{chainlet_id}/logs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/chains/{chain_id}/environments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/chains/{chain_id}/deployments/{chain_deployment_id}/chainlets/{chainlet_id}/logs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Resolves the chainlet (by ID, scoped to the given chain deployment) to its underlying model deployment and returns its logs in the given time range.", + "parameters": [ + { + "name": "start_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to start fetching logs", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Start Epoch Millis" + } + }, + { + "name": "end_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to end fetching logs", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "End Epoch Millis" + } + }, + { + "name": "direction", + "in": "query", + "required": false, + "description": "Sort order for logs", + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/SortOrderV1" + }, + { + "type": "null" + } + ], + "default": null + } + }, + { + "name": "limit", + "in": "query", + "required": false, + "description": "Limit of logs to fetch in a single request", + "schema": { + "anyOf": [ + { + "maximum": 1000, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 500, + "title": "Limit" + } } ], - "description": "Gets all chain environments for a given chain", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/EnvironmentsV1" + "$ref": "#/components/schemas/GetLogsResponseV1" } } } @@ -1765,18 +2318,57 @@ "parameters": [ { "$ref": "#/components/parameters/chain_id" + }, + { + "$ref": "#/components/parameters/chain_deployment_id" + }, + { + "$ref": "#/components/parameters/chainlet_id" } - ], - "post": { - "summary": "Create a chain environment", - "x-codeSamples": [ - { - "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/chains/{chain_id}/environments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"staging\",\n \"promotion_settings\": {\n \"promotion_cleanup_strategy\": null,\n \"ramp_up_duration_seconds\": 600,\n \"ramp_up_while_promoting\": true,\n \"redeploy_on_promotion\": true,\n \"rolling_deploy\": null,\n \"rolling_deploy_config\": null\n },\n \"chainlet_settings\": [\n {\n \"autoscaling_settings\": {\n \"autoscaling_window\": 800,\n \"concurrency_target\": 4,\n \"max_replica\": 3,\n \"min_replica\": 2,\n \"scale_down_delay\": 63,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"chainlet_name\": \"HelloWorld\",\n \"instance_type_id\": \"2x8\"\n },\n {\n \"autoscaling_settings\": {\n \"autoscaling_window\": null,\n \"concurrency_target\": null,\n \"max_replica\": 3,\n \"min_replica\": 3,\n \"scale_down_delay\": null,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"chainlet_name\": \"RandInt\",\n \"instance_type_id\": \"A10Gx8x32\"\n }\n ]\n}'" + ] + }, + "/v1/chains/{chain_id}/environments": { + "get": { + "summary": "Get all chain environments", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/chains/{chain_id}/environments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/chains/{chain_id}/environments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Gets all chain environments for a given chain", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EnvironmentsV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/chain_id" + } + ], + "post": { + "summary": "Create a chain environment", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/chains/{chain_id}/environments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"staging\",\n \"promotion_settings\": {\n \"promotion_cleanup_strategy\": null,\n \"ramp_up_duration_seconds\": 600,\n \"ramp_up_while_promoting\": true,\n \"redeploy_on_promotion\": true,\n \"rolling_deploy\": null,\n \"rolling_deploy_config\": null\n },\n \"chainlet_settings\": [\n {\n \"autoscaling_settings\": {\n \"autoscaling_window\": 800,\n \"concurrency_target\": 4,\n \"max_replica\": 3,\n \"max_scale_down_rate\": null,\n \"min_replica\": 2,\n \"scale_down_delay\": 63,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"chainlet_name\": \"HelloWorld\",\n \"instance_type_id\": \"2x8\"\n },\n {\n \"autoscaling_settings\": {\n \"autoscaling_window\": null,\n \"concurrency_target\": null,\n \"max_replica\": 3,\n \"max_scale_down_rate\": null,\n \"min_replica\": 3,\n \"scale_down_delay\": null,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"chainlet_name\": \"RandInt\",\n \"instance_type_id\": \"A10Gx8x32\"\n }\n ]\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/chains/{chain_id}/environments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'staging', 'promotion_settings': {'promotion_cleanup_strategy': None, 'ramp_up_duration_seconds': 600, 'ramp_up_while_promoting': True, 'redeploy_on_promotion': True, 'rolling_deploy': None, 'rolling_deploy_config': None}, 'chainlet_settings': [{'autoscaling_settings': {'autoscaling_window': 800, 'concurrency_target': 4, 'max_replica': 3, 'min_replica': 2, 'scale_down_delay': 63, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'chainlet_name': 'HelloWorld', 'instance_type_id': '2x8'}, {'autoscaling_settings': {'autoscaling_window': None, 'concurrency_target': None, 'max_replica': 3, 'min_replica': 3, 'scale_down_delay': None, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'chainlet_name': 'RandInt', 'instance_type_id': 'A10Gx8x32'}]}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/chains/{chain_id}/environments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'staging', 'promotion_settings': {'promotion_cleanup_strategy': None, 'ramp_up_duration_seconds': 600, 'ramp_up_while_promoting': True, 'redeploy_on_promotion': True, 'rolling_deploy': None, 'rolling_deploy_config': None}, 'chainlet_settings': [{'autoscaling_settings': {'autoscaling_window': 800, 'concurrency_target': 4, 'max_replica': 3, 'max_scale_down_rate': None, 'min_replica': 2, 'scale_down_delay': 63, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'chainlet_name': 'HelloWorld', 'instance_type_id': '2x8'}, {'autoscaling_settings': {'autoscaling_window': None, 'concurrency_target': None, 'max_replica': 3, 'max_scale_down_rate': None, 'min_replica': 3, 'scale_down_delay': None, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'chainlet_name': 'RandInt', 'instance_type_id': 'A10Gx8x32'}]}\n)\n\nprint(response.text)" } ], "description": "Create a chain environment. Returns the resulting environment.", @@ -1928,11 +2520,11 @@ "x-codeSamples": [ { "lang": "bash", - "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/chains/{chain_id}/environments/{env_name}/chainlet_settings/autoscaling_settings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"updates\": [\n {\n \"autoscaling_settings\": {\n \"autoscaling_window\": 800,\n \"concurrency_target\": 4,\n \"max_replica\": 3,\n \"min_replica\": 2,\n \"scale_down_delay\": 63,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"chainlet_name\": \"HelloWorld\"\n }\n ]\n}'" + "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/chains/{chain_id}/environments/{env_name}/chainlet_settings/autoscaling_settings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"updates\": [\n {\n \"autoscaling_settings\": {\n \"autoscaling_window\": 800,\n \"concurrency_target\": 4,\n \"max_replica\": 3,\n \"max_scale_down_rate\": null,\n \"min_replica\": 2,\n \"scale_down_delay\": 63,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"chainlet_name\": \"HelloWorld\"\n }\n ]\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/chains/{chain_id}/environments/{env_name}/chainlet_settings/autoscaling_settings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'updates': [{'autoscaling_settings': {'autoscaling_window': 800, 'concurrency_target': 4, 'max_replica': 3, 'min_replica': 2, 'scale_down_delay': 63, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'chainlet_name': 'HelloWorld'}]}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/chains/{chain_id}/environments/{env_name}/chainlet_settings/autoscaling_settings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'updates': [{'autoscaling_settings': {'autoscaling_window': 800, 'concurrency_target': 4, 'max_replica': 3, 'max_scale_down_rate': None, 'min_replica': 2, 'scale_down_delay': 63, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'chainlet_name': 'HelloWorld'}]}\n)\n\nprint(response.text)" } ], "description": "Updates a chainlet environment's autoscaling settings and returns the updated chainlet environment settings.", @@ -2158,11 +2750,11 @@ "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/training_projects/{training_project_id}/jobs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"training_job\": {\n \"image\": {\n \"base_image\": \"hello-world\",\n \"docker_auth\": null\n },\n \"compute\": {\n \"node_count\": 1,\n \"cpu_count\": 1,\n \"memory\": \"2Gi\",\n \"accelerator\": {\n \"accelerator\": \"H100\",\n \"count\": 2\n }\n },\n \"runtime\": {\n \"start_commands\": [\n \"python main.py\"\n ],\n \"environment_variables\": {\n \"API_KEY\": \"your_api_key_here\",\n \"PATH\": \"/usr/bin\"\n },\n \"artifacts\": null,\n \"enable_cache\": true,\n \"cache_config\": {\n \"enable_legacy_hf_mount\": true,\n \"enabled\": true,\n \"mount_base_path\": \"/root/.cache\",\n \"require_cache_affinity\": true\n },\n \"checkpointing_config\": {\n \"enabled\": true,\n \"checkpoint_path\": \"/mnt/ckpts\",\n \"volume_size_gib\": 10\n },\n \"load_checkpoint_config\": null\n },\n \"name\": \"gpt-oss-job\",\n \"truss_user_env\": null,\n \"interactive_session\": null,\n \"weights\": [\n {\n \"allow_patterns\": null,\n \"auth\": null,\n \"auth_secret_name\": null,\n \"ignore_patterns\": null,\n \"mount_location\": \"/app/models/base\",\n \"source\": \"hf://meta-llama/Llama-3-8B@main\"\n }\n ]\n }\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/training_projects/{training_project_id}/jobs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"training_job\": {\n \"image\": {\n \"base_image\": \"hello-world\",\n \"docker_auth\": null\n },\n \"compute\": {\n \"node_count\": 1,\n \"cpu_count\": 1,\n \"memory\": \"2Gi\",\n \"accelerator\": {\n \"accelerator\": \"H100\",\n \"count\": 2\n }\n },\n \"runtime\": {\n \"start_commands\": [\n \"python main.py\"\n ],\n \"environment_variables\": {\n \"API_KEY\": \"your_api_key_here\",\n \"PATH\": \"/usr/bin\"\n },\n \"artifacts\": null,\n \"enable_cache\": true,\n \"cache_config\": {\n \"enable_legacy_hf_mount\": true,\n \"enabled\": true,\n \"mount_base_path\": \"/root/.cache\",\n \"require_cache_affinity\": true\n },\n \"checkpointing_config\": {\n \"enabled\": true,\n \"checkpoint_path\": \"/mnt/ckpts\",\n \"volume_size_gib\": 10\n },\n \"load_checkpoint_config\": null\n },\n \"name\": \"gpt-oss-job\",\n \"truss_user_env\": null,\n \"interactive_session\": null,\n \"weights\": [\n {\n \"allow_patterns\": null,\n \"auth\": null,\n \"auth_secret_name\": null,\n \"ignore_patterns\": null,\n \"mount_location\": \"/app/models/base\",\n \"source\": \"hf://meta-llama/Llama-3-8B@main\"\n }\n ],\n \"enable_baseten_workdir\": false,\n \"priority\": 0\n }\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/training_projects/{training_project_id}/jobs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'training_job': {'image': {'base_image': 'hello-world', 'docker_auth': None}, 'compute': {'node_count': 1, 'cpu_count': 1, 'memory': '2Gi', 'accelerator': {'accelerator': 'H100', 'count': 2}}, 'runtime': {'start_commands': ['python main.py'], 'environment_variables': {'API_KEY': 'your_api_key_here', 'PATH': '/usr/bin'}, 'artifacts': None, 'enable_cache': True, 'cache_config': {'enable_legacy_hf_mount': True, 'enabled': True, 'mount_base_path': '/root/.cache', 'require_cache_affinity': True}, 'checkpointing_config': {'enabled': True, 'checkpoint_path': '/mnt/ckpts', 'volume_size_gib': 10}, 'load_checkpoint_config': None}, 'name': 'gpt-oss-job', 'truss_user_env': None, 'interactive_session': None, 'weights': [{'allow_patterns': None, 'auth': None, 'auth_secret_name': None, 'ignore_patterns': None, 'mount_location': '/app/models/base', 'source': 'hf://meta-llama/Llama-3-8B@main'}]}}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/training_projects/{training_project_id}/jobs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'training_job': {'image': {'base_image': 'hello-world', 'docker_auth': None}, 'compute': {'node_count': 1, 'cpu_count': 1, 'memory': '2Gi', 'accelerator': {'accelerator': 'H100', 'count': 2}}, 'runtime': {'start_commands': ['python main.py'], 'environment_variables': {'API_KEY': 'your_api_key_here', 'PATH': '/usr/bin'}, 'artifacts': None, 'enable_cache': True, 'cache_config': {'enable_legacy_hf_mount': True, 'enabled': True, 'mount_base_path': '/root/.cache', 'require_cache_affinity': True}, 'checkpointing_config': {'enabled': True, 'checkpoint_path': '/mnt/ckpts', 'volume_size_gib': 10}, 'load_checkpoint_config': None}, 'name': 'gpt-oss-job', 'truss_user_env': None, 'interactive_session': None, 'weights': [{'allow_patterns': None, 'auth': None, 'auth_secret_name': None, 'ignore_patterns': None, 'mount_location': '/app/models/base', 'source': 'hf://meta-llama/Llama-3-8B@main'}], 'enable_baseten_workdir': False, 'priority': 0}}\n)\n\nprint(response.text)" } ], "description": "Creates a training job with the specified configuration.", @@ -2325,8 +2917,117 @@ ] }, "/v1/training_projects/{training_project_id}/jobs/{training_job_id}/logs": { - "post": { + "get": { "summary": "Get the logs for a training job.", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/training_projects/{training_project_id}/jobs/{training_job_id}/logs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/training_projects/{training_project_id}/jobs/{training_job_id}/logs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Get the logs for a training job with the provided filters.", + "parameters": [ + { + "name": "start_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to start fetching logs", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Start Epoch Millis" + } + }, + { + "name": "end_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to end fetching logs", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "End Epoch Millis" + } + }, + { + "name": "direction", + "in": "query", + "required": false, + "description": "Sort order for logs", + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/SortOrderV1" + }, + { + "type": "null" + } + ], + "default": null + } + }, + { + "name": "limit", + "in": "query", + "required": false, + "description": "Limit of logs to fetch in a single request", + "schema": { + "anyOf": [ + { + "maximum": 1000, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 500, + "title": "Limit" + } + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetLogsResponseV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/training_project_id" + }, + { + "$ref": "#/components/parameters/training_job_id" + } + ], + "post": { + "summary": "Get the logs for a training job (deprecated; use GET).", "x-codeSamples": [ { "lang": "bash", @@ -2337,7 +3038,7 @@ "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/training_projects/{training_project_id}/jobs/{training_job_id}/logs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'start_epoch_millis': None, 'end_epoch_millis': None, 'direction': None, 'limit': None}\n)\n\nprint(response.text)" } ], - "description": "Get the logs for a training job with the provided filters.", + "description": "Use the GET form on this path instead.", "requestBody": { "content": { "application/json": { @@ -2359,6 +3060,74 @@ } } } + }, + "x-stability": "deprecated", + "deprecated": true + } + }, + "/v1/training_projects/{training_project_id}/jobs/{training_job_id}/metrics": { + "get": { + "summary": "Get the metrics for a training job.", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/training_projects/{training_project_id}/jobs/{training_job_id}/metrics \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/training_projects/{training_project_id}/jobs/{training_job_id}/metrics\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Get the metrics for a training job.", + "parameters": [ + { + "name": "end_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to end fetching metrics", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "End Epoch Millis" + } + }, + { + "name": "start_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to start fetching metrics.", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Start Epoch Millis" + } + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetTrainingJobMetricsResponseV1" + } + } + } + } } }, "parameters": [ @@ -2368,11 +3137,9 @@ { "$ref": "#/components/parameters/training_job_id" } - ] - }, - "/v1/training_projects/{training_project_id}/jobs/{training_job_id}/metrics": { + ], "post": { - "summary": "Get the metrics for a training job.", + "summary": "Get the metrics for a training job (deprecated; use GET).", "x-codeSamples": [ { "lang": "bash", @@ -2383,7 +3150,7 @@ "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/training_projects/{training_project_id}/jobs/{training_job_id}/metrics\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'end_epoch_millis': None, 'start_epoch_millis': None}\n)\n\nprint(response.text)" } ], - "description": "Get the metrics for a training job.", + "description": "Use the GET form on this path instead.", "requestBody": { "content": { "application/json": { @@ -2405,16 +3172,10 @@ } } } - } - }, - "parameters": [ - { - "$ref": "#/components/parameters/training_project_id" }, - { - "$ref": "#/components/parameters/training_job_id" - } - ] + "x-stability": "deprecated", + "deprecated": true + } }, "/v1/training_projects/{training_project_id}/jobs/{training_job_id}/stop": { "post": { @@ -2512,6 +3273,32 @@ } ], "description": "Get presigned URLs for all checkpoint files for a training job.", + "parameters": [ + { + "name": "page_size", + "in": "query", + "required": false, + "description": "Max files per page (default 1000).", + "schema": { + "default": 1000, + "minimum": 1, + "title": "Page Size", + "type": "integer" + } + }, + { + "name": "page_token", + "in": "query", + "required": false, + "description": "Offset into the file list (default 0).", + "schema": { + "default": 0, + "minimum": 0, + "title": "Page Token", + "type": "integer" + } + } + ], "responses": { "200": { "description": "", @@ -2619,6 +3406,52 @@ } ] }, + "/v1/training_projects/{training_project_id}/jobs/{training_job_id}/ssh/sign": { + "post": { + "summary": "Sign an SSH certificate for a training job.", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/training_projects/{training_project_id}/jobs/{training_job_id}/ssh/sign \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"public_key\": null,\n \"replica_id\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/training_projects/{training_project_id}/jobs/{training_job_id}/ssh/sign\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'public_key': None, 'replica_id': None}\n)\n\nprint(response.text)" + } + ], + "description": "Signs a short-lived SSH certificate granting access to a specific training job pod. Returns the signed SSH certificate, a JWT token for SSH proxy authentication, the proxy address to connect through, and the certificate expiry time.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SignSSHCertificateRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SignSSHCertificateResponseV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/training_project_id" + }, + { + "$ref": "#/components/parameters/training_job_id" + } + ] + }, "/v1/training_projects/{training_project_id}/cache/summary": { "get": { "summary": "Get training project cache summary.", @@ -2749,26 +3582,37 @@ } } }, - "/v1/blobs/credentials/model": { - "get": { - "summary": "Get blob credentials for models.", + "/v1/trainers/search": { + "post": { + "summary": "Search trainers.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/blobs/credentials/model \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/trainers/search \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"trainer_id\": \"k4q95w5\"\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/blobs/credentials/model\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/trainers/search\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'trainer_id': 'k4q95w5'}\n)\n\nprint(response.text)" } ], + "description": "Search trainers visible to the requesting user.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SearchTrainersRequestV1" + } + } + }, + "required": true + }, "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/GetBlobCredentialsResponseV1" + "$ref": "#/components/schemas/SearchTrainersResponseV1" } } } @@ -2776,26 +3620,37 @@ } } }, - "/v1/blobs/credentials/train": { - "get": { - "summary": "Get blob credentials for training.", + "/v1/trainers/checkpoints/search": { + "post": { + "summary": "Look up trainer checkpoint info by bt:// URI.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/blobs/credentials/train \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/trainers/checkpoints/search \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"checkpoint_path\": \"bt://loops:k4q95w5/sampler_weights/step-100\"\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/blobs/credentials/train\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/trainers/checkpoints/search\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'checkpoint_path': 'bt://loops:k4q95w5/sampler_weights/step-100'}\n)\n\nprint(response.text)" } ], + "description": "Look up the metadata for a trainer checkpoint by its bt://loops:/(weights|sampler_weights)/ URI.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CheckpointSearchRequestV1" + } + } + }, + "required": true + }, "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/GetBlobCredentialsResponseV1" + "$ref": "#/components/schemas/CheckpointSearchResponseV1" } } } @@ -2803,25 +3658,25 @@ } } }, - "/v1/teams/{team_id}/api_keys": { + "/v1/trainer_sessions": { "post": { - "summary": "Creates a team API key", + "summary": "Create a trainer session.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/teams/{team_id}/api_keys \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"my-api-key\",\n \"type\": \"PERSONAL\"\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/trainer_sessions \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"training_project_id\": null\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/teams/{team_id}/api_keys\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'my-api-key', 'type': 'PERSONAL'}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/trainer_sessions\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'training_project_id': None}\n)\n\nprint(response.text)" } ], - "description": "Creates a team API key with the provided name and type. The API key is returned in the response.", + "description": "Creates a TrainerSession for the given training project.", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CreateAPIKeyRequestV1" + "$ref": "#/components/schemas/CreateTrainerSessionRequestV1" } } }, @@ -2833,63 +3688,76 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/APIKeyV1" + "$ref": "#/components/schemas/CreateTrainerSessionResponseV1" } } } } } - }, - "parameters": [ - { - "$ref": "#/components/parameters/team_id" - } - ] + } }, - "/v1/api_keys": { - "get": { - "summary": "Lists the user's API keys", + "/v1/trainer_sessions/{session_id}/trainers": { + "post": { + "summary": "Create a trainer server.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/api_keys \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/trainer_sessions/{session_id}/trainers \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"model\": null,\n \"max_seq_len\": null,\n \"lora_rank\": null,\n \"seed\": null,\n \"scale_down_delay_seconds\": null,\n \"checkpoint_path\": \"bt://loops:k4q95w5/weights/step-100\"\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/api_keys\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/trainer_sessions/{session_id}/trainers\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'model': None, 'max_seq_len': None, 'lora_rank': None, 'seed': None, 'scale_down_delay_seconds': None, 'checkpoint_path': 'bt://loops:k4q95w5/weights/step-100'}\n)\n\nprint(response.text)" } ], + "description": "Creates a TrainerServer and an associated SamplingServer for the given trainer session.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateTrainerServerRequestV1" + } + } + }, + "required": true + }, "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/APIKeysV1" + "$ref": "#/components/schemas/CreateTrainerServerResponseV1" } } } } } }, + "parameters": [ + { + "$ref": "#/components/parameters/session_id" + } + ] + }, + "/v1/trainer_sessions/{session_id}/samplers": { "post": { - "summary": "Creates an API key", + "summary": "Create a sampling server.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/api_keys \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"my-api-key\",\n \"type\": \"PERSONAL\"\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/trainer_sessions/{session_id}/samplers \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"model\": null,\n \"max_seq_length\": null,\n \"checkpoint_path\": \"bt://loops:k4q95w5/sampler_weights/step-100\"\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/api_keys\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'my-api-key', 'type': 'PERSONAL'}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/trainer_sessions/{session_id}/samplers\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'model': None, 'max_seq_length': None, 'checkpoint_path': 'bt://loops:k4q95w5/sampler_weights/step-100'}\n)\n\nprint(response.text)" } ], - "description": "Creates an API key with the provided name and type. The API key is returned in the response.", + "description": "Creates a standalone SamplingServer (not linked to a TrainerServer).", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CreateAPIKeyRequestV1" + "$ref": "#/components/schemas/CreateSamplingServerRequestV1" } } }, @@ -2901,35 +3769,40 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/APIKeyV1" + "$ref": "#/components/schemas/CreateSamplingServerResponseV1" } } } } } - } + }, + "parameters": [ + { + "$ref": "#/components/parameters/session_id" + } + ] }, - "/v1/api_keys/{api_key_prefix}": { - "delete": { - "summary": "Deletes an API key by prefix", + "/v1/trainer_sessions/{session_id}/trainers/{trainer_id}/checkpoints": { + "get": { + "summary": "List trainer server checkpoints.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/api_keys/{api_key_prefix} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/trainer_sessions/{session_id}/trainers/{trainer_id}/checkpoints \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/api_keys/{api_key_prefix}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/trainer_sessions/{session_id}/trainers/{trainer_id}/checkpoints\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Deletes an API key by prefix and returns info about the API key.", + "description": "List the checkpoints saved by a trainer server.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/APIKeyTombstoneV1" + "$ref": "#/components/schemas/GetTrainerServerCheckpointsResponseV1" } } } @@ -2938,55 +3811,99 @@ }, "parameters": [ { - "$ref": "#/components/parameters/api_key_prefix" + "$ref": "#/components/parameters/session_id" + }, + { + "$ref": "#/components/parameters/trainer_id" } ] }, - "/v1/model_apis/snapshots": { + "/v1/trainer_sessions/{session_id}/trainers/{trainer_id}/checkpoints/{checkpoint_id}/files": { "get": { - "summary": "Get the latest model weight snapshot", + "summary": "Get trainer server checkpoint files.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/model_apis/snapshots \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/trainer_sessions/{session_id}/trainers/{trainer_id}/checkpoints/{checkpoint_id}/files \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/model_apis/snapshots\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/trainer_sessions/{session_id}/trainers/{trainer_id}/checkpoints/{checkpoint_id}/files\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Gets the most recent model weight snapshot for the specified model.", + "description": "Get presigned URLs for the files under a trainer server checkpoint. Returns a paginated list; deviates from Tinker's single-archive-URL shape because trainer weight sync writes an unzipped folder.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ModelWeightSnapshotV1" + "$ref": "#/components/schemas/GetTrainerServerCheckpointFilesResponseV1" } } } } } }, + "parameters": [ + { + "$ref": "#/components/parameters/session_id" + }, + { + "$ref": "#/components/parameters/trainer_id" + }, + { + "$ref": "#/components/parameters/checkpoint_id" + } + ] + }, + "/v1/loops/capabilities": { + "get": { + "summary": "Get Loops server capabilities.", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/capabilities \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/capabilities\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Returns the list of models supported by the Loops server, including each model's maximum context length.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetLoopsCapabilitiesResponseV1" + } + } + } + } + } + } + }, + "/v1/loops/sessions": { "post": { - "summary": "Create a model weight snapshot", + "summary": "Create a Loops session.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/model_apis/snapshots \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"model\": null,\n \"snapshot_uri\": null\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/loops/sessions \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"training_project_id\": null\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/model_apis/snapshots\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'model': None, 'snapshot_uri': None}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/sessions\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'training_project_id': None}\n)\n\nprint(response.text)" } ], - "description": "Creates a model weight snapshot for the specified model and returns the snapshot.", + "description": "Creates a Loops session for the given training project.", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CreateModelWeightSnapshotRequestV1" + "$ref": "#/components/schemas/CreateLoopsSessionRequestV1" } } }, @@ -2998,7 +3915,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ModelWeightSnapshotV1" + "$ref": "#/components/schemas/CreateLoopsSessionResponseV1" } } } @@ -3006,27 +3923,27 @@ } } }, - "/v1/model_apis/snapshots/{model_id}": { + "/v1/loops/sessions/{session_id}": { "get": { - "summary": "Get the latest model weight snapshot", + "summary": "Get a Loops session.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/model_apis/snapshots/{model_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/sessions/{session_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/model_apis/snapshots/{model_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/sessions/{session_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Gets the most recent model weight snapshot for the specified model.", + "description": "Fetch a Loops session by ID.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ModelWeightSnapshotV1" + "$ref": "#/components/schemas/GetLoopsSessionResponseV1" } } } @@ -3035,64 +3952,99 @@ }, "parameters": [ { - "$ref": "#/components/parameters/model_id" + "$ref": "#/components/parameters/session_id" } - ], - "post": { - "summary": "Create a model weight snapshot", + ] + }, + "/v1/loops/runs": { + "get": { + "summary": "List Loops runs.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/model_apis/snapshots/{model_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"model\": null,\n \"snapshot_uri\": null\n}'" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/runs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/model_apis/snapshots/{model_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'model': None, 'snapshot_uri': None}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/runs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Creates a model weight snapshot for the specified model and returns the snapshot.", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateModelWeightSnapshotRequestV1" - } + "description": "List Loops runs visible to the requesting user, optionally filtered by run id and/or base model.", + "parameters": [ + { + "name": "run_id", + "in": "query", + "required": false, + "description": "Filter by run ID.", + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "examples": [ + "k4q95w5" + ], + "title": "Run Id" } }, - "required": true - }, + { + "name": "base_model", + "in": "query", + "required": false, + "description": "Filter runs by base model name.", + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "examples": [ + "Qwen/Qwen3-8B" + ], + "title": "Base Model" + } + } + ], "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ModelWeightSnapshotV1" + "$ref": "#/components/schemas/ListLoopsRunsResponseV1" } } } } } - } - }, - "/v1/llm_models": { + }, "post": { - "summary": "Creates a new BIS LLM deployment", + "summary": "Create a Loops run.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/llm_models \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"resources\": null,\n \"llm_version\": null,\n \"llm_config\": null,\n \"environment_variables\": null,\n \"autoscaling_settings\": {\n \"autoscaling_window\": 600,\n \"concurrency_target\": null,\n \"max_replica\": 5,\n \"min_replica\": 1,\n \"scale_down_delay\": 300,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"additional_autoscaling_config\": {\n \"metrics\": [\n {\n \"name\": \"in_flight_tokens\",\n \"target\": 40000\n }\n ]\n },\n \"metadata\": {\n \"environment\": \"production\",\n \"git_sha\": \"abc123\"\n },\n \"name\": null\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/loops/runs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"session_id\": null,\n \"base_model\": null,\n \"max_seq_len\": null,\n \"lora_rank\": null,\n \"seed\": null,\n \"scale_down_delay_seconds\": null,\n \"path\": \"bt://loops:k4q95w5/weights/step-100\",\n \"reuse_from_session_id\": null\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/llm_models\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'resources': None, 'llm_version': None, 'llm_config': None, 'environment_variables': None, 'autoscaling_settings': {'autoscaling_window': 600, 'concurrency_target': None, 'max_replica': 5, 'min_replica': 1, 'scale_down_delay': 300, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'additional_autoscaling_config': {'metrics': [{'name': 'in_flight_tokens', 'target': 40000}]}, 'metadata': {'environment': 'production', 'git_sha': 'abc123'}, 'name': None}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/runs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'session_id': None, 'base_model': None, 'max_seq_len': None, 'lora_rank': None, 'seed': None, 'scale_down_delay_seconds': None, 'path': 'bt://loops:k4q95w5/weights/step-100', 'reuse_from_session_id': None}\n)\n\nprint(response.text)" } ], + "description": "Creates a Loops run with an associated sampler in the given session.", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CreateLLMModelRequestV1" + "$ref": "#/components/schemas/CreateLoopsRunRequestV1" } } }, @@ -3104,7 +4056,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LLMModelV1" + "$ref": "#/components/schemas/CreateLoopsRunResponseV1" } } } @@ -3112,36 +4064,27 @@ } } }, - "/v1/llm_models/{model_id}/deployments": { - "post": { - "summary": "Creates a new BIS LLM deployment version", + "/v1/loops/runs/{run_id}": { + "get": { + "summary": "Get a Loops run.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/llm_models/{model_id}/deployments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"resources\": null,\n \"llm_version\": null,\n \"llm_config\": null,\n \"environment_variables\": null,\n \"autoscaling_settings\": {\n \"autoscaling_window\": 600,\n \"concurrency_target\": null,\n \"max_replica\": 5,\n \"min_replica\": 1,\n \"scale_down_delay\": 300,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"additional_autoscaling_config\": {\n \"metrics\": [\n {\n \"name\": \"in_flight_tokens\",\n \"target\": 40000\n }\n ]\n },\n \"metadata\": {\n \"environment\": \"production\",\n \"git_sha\": \"abc123\"\n }\n}'" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/runs/{run_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/llm_models/{model_id}/deployments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'resources': None, 'llm_version': None, 'llm_config': None, 'environment_variables': None, 'autoscaling_settings': {'autoscaling_window': 600, 'concurrency_target': None, 'max_replica': 5, 'min_replica': 1, 'scale_down_delay': 300, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'additional_autoscaling_config': {'metrics': [{'name': 'in_flight_tokens', 'target': 40000}]}, 'metadata': {'environment': 'production', 'git_sha': 'abc123'}}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/runs/{run_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateLLMModelVersionRequestV1" - } - } - }, - "required": true - }, + "description": "Fetch a Loops run by ID.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LLMModelVersionV1" + "$ref": "#/components/schemas/GetLoopsRunResponseV1" } } } @@ -3150,31 +4093,31 @@ }, "parameters": [ { - "$ref": "#/components/parameters/model_id" + "$ref": "#/components/parameters/run_id" } ] }, - "/v1/library_listings": { + "/v1/loops/samplers": { "get": { - "summary": "Gets all library listings", + "summary": "List Loops samplers.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/library_listings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/samplers \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/samplers\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Returns all library listings for the authenticated user's organization.", + "description": "List Loops samplers visible to the requesting user.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingsV1" + "$ref": "#/components/schemas/ListLoopsSamplersResponseV1" } } } @@ -3182,23 +4125,23 @@ } }, "post": { - "summary": "Creates a new library listing", + "summary": "Create a Loops sampler.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/library_listings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"display_name\": null,\n \"user_defined_id\": null,\n \"is_public\": null\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/loops/samplers \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"session_id\": null,\n \"base_model\": null,\n \"max_seq_length\": null,\n \"model_path\": \"bt://loops:k4q95w5/sampler_weights/step-100\",\n \"reuse_from_session_id\": null\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'display_name': None, 'user_defined_id': None, 'is_public': None}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/samplers\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'session_id': None, 'base_model': None, 'max_seq_length': None, 'model_path': 'bt://loops:k4q95w5/sampler_weights/step-100', 'reuse_from_session_id': None}\n)\n\nprint(response.text)" } ], - "description": "Creates a new library listing for the authenticated user's organization.", + "description": "Creates a standalone Loops sampler not linked to a run.", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CreateLibraryListingRequestV1" + "$ref": "#/components/schemas/CreateLoopsSamplerRequestV1" } } }, @@ -3210,7 +4153,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingV1" + "$ref": "#/components/schemas/CreateLoopsSamplerResponseV1" } } } @@ -3218,27 +4161,27 @@ } } }, - "/v1/library_listings/{user_defined_listing_id}": { - "delete": { - "summary": "Deletes a library listing", + "/v1/loops/samplers/{sampler_id}": { + "get": { + "summary": "Get a Loops sampler.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/samplers/{sampler_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/samplers/{sampler_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Deletes a library listing and all of its associated versions. Any versions that are currently live will also be removed.", + "description": "Fetch a Loops sampler by ID.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingTombstoneV1" + "$ref": "#/components/schemas/GetLoopsSamplerResponseV1" } } } @@ -3247,53 +4190,122 @@ }, "parameters": [ { - "$ref": "#/components/parameters/user_defined_listing_id" + "$ref": "#/components/parameters/sampler_id" } - ], + ] + }, + "/v1/loops/checkpoints": { "get": { - "summary": "Gets a library listing", + "summary": "List Loops checkpoints.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/checkpoints \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/checkpoints\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "List Loops checkpoints filtered by run id, base model, or bt:// URI. Provide exactly one filter.", + "parameters": [ + { + "name": "run_id", + "in": "query", + "required": false, + "description": "Filter by run ID. Returns all checkpoints saved by the run.", + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "examples": [ + "k4q95w5" + ], + "title": "Run Id" + } + }, + { + "name": "base_model", + "in": "query", + "required": false, + "description": "Filter by base model. Returns checkpoints across the caller's runs of this base model.", + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "examples": [ + "Qwen/Qwen3-8B" + ], + "title": "Base Model" + } + }, + { + "name": "checkpoint_path", + "in": "query", + "required": false, + "description": "bt:// URI of a Loops checkpoint. Form: bt://loops:/(weights|sampler_weights)/.", + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "examples": [ + "bt://loops:k4q95w5/sampler_weights/step-100" + ], + "title": "Checkpoint Path" + } } ], - "description": "Returns a specific library listing by its user-defined identifier.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingV1" + "$ref": "#/components/schemas/ListLoopsCheckpointsResponseV1" } } } } } - }, - "patch": { - "summary": "Updates a library listing", + } + }, + "/v1/loops/checkpoints/validate": { + "post": { + "summary": "Validate a Loops checkpoint bt:// URI.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"display_name\": null,\n \"is_public\": null\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/loops/checkpoints/validate \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"checkpoint_path\": \"bt://loops:k4q95w5/sampler_weights/step-100\"\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'display_name': None, 'is_public': None}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/checkpoints/validate\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'checkpoint_path': 'bt://loops:k4q95w5/sampler_weights/step-100'}\n)\n\nprint(response.text)" } ], - "description": "Updates the display name of a library listing.", + "description": "Whether the caller can manage and use this checkpoint.", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UpdateLibraryListingRequestV1" + "$ref": "#/components/schemas/ValidateLoopsCheckpointRequestV1" } } }, @@ -3305,7 +4317,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingV1" + "$ref": "#/components/schemas/ValidateLoopsCheckpointResponseV1" } } } @@ -3313,27 +4325,27 @@ } } }, - "/v1/library_listings/{user_defined_listing_id}/versions": { + "/v1/loops/checkpoints/{checkpoint_id}/files": { "get": { - "summary": "Gets all versions for a library listing", + "summary": "Get Loops checkpoint files.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/checkpoints/{checkpoint_id}/files \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/checkpoints/{checkpoint_id}/files\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Returns all versions for a specific library listing.", + "description": "Get presigned URLs for the files under a Loops checkpoint. Returns a paginated list.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingVersionsV1" + "$ref": "#/components/schemas/LoopsCheckpointFilesResponseV1" } } } @@ -3342,39 +4354,31 @@ }, "parameters": [ { - "$ref": "#/components/parameters/user_defined_listing_id" + "$ref": "#/components/parameters/checkpoint_id" } - ], - "post": { - "summary": "Creates a new library listing version", - "x-codeSamples": [ - { + ] + }, + "/v1/loops/deployments": { + "get": { + "summary": "List Loops deployments.", + "x-codeSamples": [ + { "lang": "bash", - "source": "curl --request POST \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"display_name\": null,\n \"is_public\": null,\n \"oracle_version_id\": null,\n \"allow_truss_download\": null,\n \"version_tag\": null\n}'" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/deployments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'display_name': None, 'is_public': None, 'oracle_version_id': None, 'allow_truss_download': None, 'version_tag': None}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/deployments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Creates a new library listing version from an existing model version. The model version must be fully built (have an image_uri). If a listing with the given id already exists for the org, a new version is added. Otherwise, a new listing is created.", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateLibraryListingVersionRequestV1" - } - } - }, - "required": true - }, + "description": "List the caller's Loops deployments. Returns every deployment regardless of status; clients filter terminal states.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingVersionV1" + "$ref": "#/components/schemas/ListLoopsDeploymentsResponseV1" } } } @@ -3382,27 +4386,27 @@ } } }, - "/v1/library_listings/{user_defined_listing_id}/versions/{version_tag}": { - "delete": { - "summary": "Deletes a library listing version", + "/v1/loops/deployments/{deployment_id}/deactivate": { + "post": { + "summary": "Deactivate a Loops deployment.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/loops/deployments/{deployment_id}/deactivate \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/deployments/{deployment_id}/deactivate\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Deletes a specific version of a library listing. Deleting a live version will fail with a 400 error \u2014 demote the version first by setting another version as live.", + "description": "Shut down a Loops deployment by ID. Saved checkpoints remain accessible. Resolving base_model -> deployment_id is the caller's responsibility \u2014 list deployments and pick the active one.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingVersionTombstoneV1" + "$ref": "#/components/schemas/DeactivateLoopsDeploymentResponseV1" } } } @@ -3411,56 +4415,62 @@ }, "parameters": [ { - "$ref": "#/components/parameters/user_defined_listing_id" - }, - { - "$ref": "#/components/parameters/version_tag" + "$ref": "#/components/parameters/deployment_id" } - ], + ] + }, + "/v1/loops/deployments/{deployment_id}": { "get": { - "summary": "Gets a library listing version", + "summary": "Get a Loops deployment.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/deployments/{deployment_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/deployments/{deployment_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } ], - "description": "Returns a specific version of a library listing.", + "description": "Fetch a Loops deployment by ID, including its latest status.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingVersionV1" + "$ref": "#/components/schemas/GetLoopsDeploymentResponseV1" } } } } } }, - "patch": { - "summary": "Updates a library listing version", + "parameters": [ + { + "$ref": "#/components/parameters/deployment_id" + } + ] + }, + "/v1/loops/deployments/{deployment_id}/metrics": { + "post": { + "summary": "Get metrics for a Loops trainer deployment.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"is_live\": null,\n \"allow_truss_download\": null\n}'" + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/loops/deployments/{deployment_id}/metrics \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"end_epoch_millis\": null,\n \"start_epoch_millis\": null\n}'" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'is_live': None, 'allow_truss_download': None}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/deployments/{deployment_id}/metrics\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'end_epoch_millis': None, 'start_epoch_millis': None}\n)\n\nprint(response.text)" } ], - "description": "Updates a library listing version. Setting is_live to true will demote the current live version.", + "description": "Returns per-node GPU/CPU/memory utilization and Knative queue-proxy request rate / concurrency / latency for the trainer pods. The sampler half of a Loops deployment is an OracleVersion and uses the existing model-metrics endpoint.", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UpdateLibraryListingVersionRequestV1" + "$ref": "#/components/schemas/GetLoopsDeploymentMetricsRequestV1" } } }, @@ -3472,343 +4482,3619 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LibraryListingVersionV1" + "$ref": "#/components/schemas/GetLoopsDeploymentMetricsResponseV1" } } } } } - } + }, + "parameters": [ + { + "$ref": "#/components/parameters/deployment_id" + } + ] }, - "/v1/billing/usage_summary": { + "/v1/loops/deployments/{deployment_id}/logs": { "get": { - "summary": "Gets billing usage summary for a date range", + "summary": "Get logs for a Loops trainer deployment.", "x-codeSamples": [ { "lang": "bash", - "source": "curl --request GET \\\n--url https://api.baseten.co/v1/billing/usage_summary \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/loops/deployments/{deployment_id}/logs \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, { "lang": "python", - "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/billing/usage_summary\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/loops/deployments/{deployment_id}/logs\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Fetch logs from the trainer pods of a Loops deployment. Visible to any member of the deployment's team.", + "parameters": [ + { + "name": "start_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to start fetching logs", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Start Epoch Millis" + } + }, + { + "name": "end_epoch_millis", + "in": "query", + "required": false, + "description": "Epoch millis timestamp to end fetching logs", + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "End Epoch Millis" + } + }, + { + "name": "direction", + "in": "query", + "required": false, + "description": "Sort order for logs", + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/SortOrderV1" + }, + { + "type": "null" + } + ], + "default": null + } + }, + { + "name": "limit", + "in": "query", + "required": false, + "description": "Limit of logs to fetch in a single request", + "schema": { + "anyOf": [ + { + "maximum": 1000, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 500, + "title": "Limit" + } } ], - "description": "Returns billing usage data within the specified date range. Includes dedicated model serving, training, and model APIs usage. The date range must not exceed 31 days.", "responses": { "200": { "description": "", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UsageSummaryV1" + "$ref": "#/components/schemas/GetLogsResponseV1" } } } } } - } - } - }, - "openapi": "3.1.0", - "components": { - "schemas": { - "SecretV1": { - "description": "A Baseten secret. Note that we do not support retrieving secret values.", - "properties": { - "created_at": { - "description": "Time the secret was created in ISO 8601 format", - "format": "date-time", - "title": "Created At", - "type": "string" - }, - "name": { - "description": "Name of the secret", - "title": "Name", - "type": "string" + }, + "parameters": [ + { + "$ref": "#/components/parameters/deployment_id" + } + ] + }, + "/v1/training/capacity": { + "get": { + "summary": "Get training GPU capacity.", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/training/capacity \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, - "team_name": { - "description": "Name of the team the secret belongs to", - "title": "Team Name", - "type": "string" + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/training/capacity\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } - }, - "required": [ - "created_at", - "name", - "team_name" ], - "title": "SecretV1", - "type": "object" - }, - "SecretsV1": { - "description": "A list of Baseten secrets.", - "properties": { - "secrets": { - "items": { - "$ref": "#/components/schemas/SecretV1" - }, - "title": "Secrets", - "type": "array" + "description": "Returns GPU capacity limits (baseline and peak) and current usage for the organization.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetTrainingGpuCapacityResponseV1" + } + } + } } - }, - "required": [ - "secrets" - ], - "title": "SecretsV1", - "type": "object" - }, - "UpsertSecretRequestV1": { - "description": "A request to create or update a Baseten secret by name.", - "properties": { - "name": { - "description": "Name of the new or existing secret", - "examples": [ - "my_secret" - ], - "title": "Name", - "type": "string" + } + } + }, + "/v1/blobs/credentials/model": { + "get": { + "summary": "Get blob credentials for models.", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/blobs/credentials/model \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" }, - "value": { - "description": "Value of the secret", - "examples": [ - "my_secret_value" - ], - "title": "Value", - "type": "string" + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/blobs/credentials/model\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" } - }, - "required": [ - "name", - "value" ], - "title": "UpsertSecretRequestV1", - "type": "object" - }, - "TeamV1": { - "description": "A team.", - "properties": { - "id": { - "description": "Unique identifier of the team", - "title": "Id", - "type": "string" - }, - "name": { - "description": "Name of the team", - "title": "Name", - "type": "string" - }, - "default": { - "type": "boolean", - "description": "Whether this is the default team for the organization", - "title": "Default" - }, - "created_at": { - "description": "Time the team was created in ISO 8601 format", - "format": "date-time", - "title": "Created At", - "type": "string" + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetBlobCredentialsResponseV1" + } + } + } } - }, - "required": [ - "id", - "name", - "default", - "created_at" - ], - "title": "TeamV1", - "type": "object" + } + } + }, + "/v1/blobs/credentials/train": { + "get": { + "summary": "Get blob credentials for training.", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/blobs/credentials/train \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/blobs/credentials/train\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetBlobCredentialsResponseV1" + } + } + } + } + } + } + }, + "/v1/teams/{team_id}/api_keys": { + "post": { + "summary": "Creates a team API key", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/teams/{team_id}/api_keys \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"my-api-key\",\n \"type\": \"PERSONAL\"\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/teams/{team_id}/api_keys\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'my-api-key', 'type': 'PERSONAL'}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a team API key with the provided name and type. The API key is returned in the response.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateAPIKeyRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/APIKeyV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/team_id" + } + ] + }, + "/v1/api_keys": { + "get": { + "summary": "Lists the user's API keys (metadata only, no plain text keys)", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/api_keys \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/api_keys\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/APIKeysV1" + } + } + } + } + } + }, + "post": { + "summary": "Creates an API key", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/api_keys \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"my-api-key\",\n \"type\": \"PERSONAL\"\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/api_keys\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'my-api-key', 'type': 'PERSONAL'}\n)\n\nprint(response.text)" + } + ], + "description": "Creates an API key with the provided name and type. The API key is returned in the response.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateAPIKeyRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/APIKeyV1" + } + } + } + } + } + } + }, + "/v1/api_keys/{api_key_prefix}": { + "delete": { + "summary": "Deletes an API key by prefix", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/api_keys/{api_key_prefix} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/api_keys/{api_key_prefix}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Deletes an API key by prefix and returns info about the API key.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/APIKeyTombstoneV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/api_key_prefix" + } + ] + }, + "/v1/model_apis/snapshots": { + "get": { + "summary": "Get the latest model weight snapshot", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/model_apis/snapshots \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/model_apis/snapshots\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Gets the most recent model weight snapshot for the specified model.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ModelWeightSnapshotV1" + } + } + } + } + } + }, + "post": { + "summary": "Create a model weight snapshot", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/model_apis/snapshots \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"model\": null,\n \"snapshot_uri\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/model_apis/snapshots\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'model': None, 'snapshot_uri': None}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a model weight snapshot for the specified model and returns the snapshot.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateModelWeightSnapshotRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ModelWeightSnapshotV1" + } + } + } + } + } + } + }, + "/v1/model_apis/snapshots/{model_id}": { + "get": { + "summary": "Get the latest model weight snapshot", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/model_apis/snapshots/{model_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/model_apis/snapshots/{model_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Gets the most recent model weight snapshot for the specified model.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ModelWeightSnapshotV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/model_id" + } + ], + "post": { + "summary": "Create a model weight snapshot", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/model_apis/snapshots/{model_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"model\": null,\n \"snapshot_uri\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/model_apis/snapshots/{model_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'model': None, 'snapshot_uri': None}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a model weight snapshot for the specified model and returns the snapshot.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateModelWeightSnapshotRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ModelWeightSnapshotV1" + } + } + } + } + } + } + }, + "/v1/llm_models": { + "post": { + "summary": "Creates a new BIS LLM deployment", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/llm_models \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"resources\": null,\n \"llm_version\": null,\n \"llm_config\": null,\n \"environment_variables\": null,\n \"model_metadata\": null,\n \"autoscaling_settings\": {\n \"autoscaling_window\": 600,\n \"concurrency_target\": null,\n \"max_replica\": 5,\n \"max_scale_down_rate\": null,\n \"min_replica\": 1,\n \"scale_down_delay\": 300,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"additional_autoscaling_config\": {\n \"metrics\": [\n {\n \"name\": \"in_flight_tokens\",\n \"target\": 40000\n }\n ]\n },\n \"metadata\": {\n \"environment\": \"production\",\n \"git_sha\": \"abc123\"\n },\n \"weights\": [\n {\n \"mount_location\": \"/models/base\",\n \"source\": \"hf://meta-llama/Llama-3-8B\"\n }\n ],\n \"name\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/llm_models\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'resources': None, 'llm_version': None, 'llm_config': None, 'environment_variables': None, 'model_metadata': None, 'autoscaling_settings': {'autoscaling_window': 600, 'concurrency_target': None, 'max_replica': 5, 'max_scale_down_rate': None, 'min_replica': 1, 'scale_down_delay': 300, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'additional_autoscaling_config': {'metrics': [{'name': 'in_flight_tokens', 'target': 40000}]}, 'metadata': {'environment': 'production', 'git_sha': 'abc123'}, 'weights': [{'mount_location': '/models/base', 'source': 'hf://meta-llama/Llama-3-8B'}], 'name': None}\n)\n\nprint(response.text)" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateLLMModelRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LLMModelHandleV1" + } + } + } + } + } + } + }, + "/v1/teams/{team_id}/llm_models": { + "post": { + "summary": "Creates a new BIS LLM deployment", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/teams/{team_id}/llm_models \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"resources\": null,\n \"llm_version\": null,\n \"llm_config\": null,\n \"environment_variables\": null,\n \"model_metadata\": null,\n \"autoscaling_settings\": {\n \"autoscaling_window\": 600,\n \"concurrency_target\": null,\n \"max_replica\": 5,\n \"max_scale_down_rate\": null,\n \"min_replica\": 1,\n \"scale_down_delay\": 300,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"additional_autoscaling_config\": {\n \"metrics\": [\n {\n \"name\": \"in_flight_tokens\",\n \"target\": 40000\n }\n ]\n },\n \"metadata\": {\n \"environment\": \"production\",\n \"git_sha\": \"abc123\"\n },\n \"weights\": [\n {\n \"mount_location\": \"/models/base\",\n \"source\": \"hf://meta-llama/Llama-3-8B\"\n }\n ],\n \"name\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/teams/{team_id}/llm_models\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'resources': None, 'llm_version': None, 'llm_config': None, 'environment_variables': None, 'model_metadata': None, 'autoscaling_settings': {'autoscaling_window': 600, 'concurrency_target': None, 'max_replica': 5, 'max_scale_down_rate': None, 'min_replica': 1, 'scale_down_delay': 300, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'additional_autoscaling_config': {'metrics': [{'name': 'in_flight_tokens', 'target': 40000}]}, 'metadata': {'environment': 'production', 'git_sha': 'abc123'}, 'weights': [{'mount_location': '/models/base', 'source': 'hf://meta-llama/Llama-3-8B'}], 'name': None}\n)\n\nprint(response.text)" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateLLMModelRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LLMModelHandleV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/team_id" + } + ] + }, + "/v1/llm_models/{model_id}/deployments": { + "post": { + "summary": "Creates a new BIS LLM deployment version", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/llm_models/{model_id}/deployments \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"resources\": null,\n \"llm_version\": null,\n \"llm_config\": null,\n \"environment_variables\": null,\n \"model_metadata\": null,\n \"autoscaling_settings\": {\n \"autoscaling_window\": 600,\n \"concurrency_target\": null,\n \"max_replica\": 5,\n \"max_scale_down_rate\": null,\n \"min_replica\": 1,\n \"scale_down_delay\": 300,\n \"target_in_flight_tokens\": null,\n \"target_utilization_percentage\": null\n },\n \"additional_autoscaling_config\": {\n \"metrics\": [\n {\n \"name\": \"in_flight_tokens\",\n \"target\": 40000\n }\n ]\n },\n \"metadata\": {\n \"environment\": \"production\",\n \"git_sha\": \"abc123\"\n },\n \"weights\": [\n {\n \"mount_location\": \"/models/base\",\n \"source\": \"hf://meta-llama/Llama-3-8B\"\n }\n ]\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/llm_models/{model_id}/deployments\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'resources': None, 'llm_version': None, 'llm_config': None, 'environment_variables': None, 'model_metadata': None, 'autoscaling_settings': {'autoscaling_window': 600, 'concurrency_target': None, 'max_replica': 5, 'max_scale_down_rate': None, 'min_replica': 1, 'scale_down_delay': 300, 'target_in_flight_tokens': None, 'target_utilization_percentage': None}, 'additional_autoscaling_config': {'metrics': [{'name': 'in_flight_tokens', 'target': 40000}]}, 'metadata': {'environment': 'production', 'git_sha': 'abc123'}, 'weights': [{'mount_location': '/models/base', 'source': 'hf://meta-llama/Llama-3-8B'}]}\n)\n\nprint(response.text)" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateLLMModelVersionRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LLMModelHandleV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/model_id" + } + ] + }, + "/v1/library_listings": { + "get": { + "summary": "Gets all library listings", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/library_listings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Returns all library listings for the authenticated user's organization.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingsV1" + } + } + } + } + } + }, + "post": { + "summary": "Creates a new library listing", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/library_listings \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"display_name\": null,\n \"user_defined_id\": null,\n \"is_public\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'display_name': None, 'user_defined_id': None, 'is_public': None}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a new library listing for the authenticated user's organization.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateLibraryListingRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingV1" + } + } + } + } + } + } + }, + "/v1/library_listings/{user_defined_listing_id}": { + "delete": { + "summary": "Deletes a library listing", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Deletes a library listing and all of its associated versions. Any versions that are currently live will also be removed.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingTombstoneV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/user_defined_listing_id" + } + ], + "get": { + "summary": "Gets a library listing", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Returns a specific library listing by its user-defined identifier.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingV1" + } + } + } + } + } + }, + "patch": { + "summary": "Updates a library listing", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"display_name\": null,\n \"is_public\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'display_name': None, 'is_public': None}\n)\n\nprint(response.text)" + } + ], + "description": "Updates the display name of a library listing.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateLibraryListingRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingV1" + } + } + } + } + } + } + }, + "/v1/library_listings/{user_defined_listing_id}/versions": { + "get": { + "summary": "Gets all versions for a library listing", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Returns all versions for a specific library listing.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingVersionsV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/user_defined_listing_id" + } + ], + "post": { + "summary": "Creates a new library listing version", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"display_name\": null,\n \"is_public\": null,\n \"oracle_version_id\": null,\n \"allow_truss_download\": null,\n \"version_tag\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'display_name': None, 'is_public': None, 'oracle_version_id': None, 'allow_truss_download': None, 'version_tag': None}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a new library listing version from an existing model version. The model version must be fully built (have an image_uri). If a listing with the given id already exists for the org, a new version is added. Otherwise, a new listing is created.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateLibraryListingVersionRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingVersionV1" + } + } + } + } + } + } + }, + "/v1/library_listings/{user_defined_listing_id}/versions/{version_tag}": { + "delete": { + "summary": "Deletes a library listing version", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request DELETE \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"DELETE\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Deletes a specific version of a library listing. Deleting a live version will fail with a 400 error \u2014 demote the version first by setting another version as live.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingVersionTombstoneV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/user_defined_listing_id" + }, + { + "$ref": "#/components/parameters/version_tag" + } + ], + "get": { + "summary": "Gets a library listing version", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Returns a specific version of a library listing.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingVersionV1" + } + } + } + } + } + }, + "patch": { + "summary": "Updates a library listing version", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"is_live\": null,\n \"allow_truss_download\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/library_listings/{user_defined_listing_id}/versions/{version_tag}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'is_live': None, 'allow_truss_download': None}\n)\n\nprint(response.text)" + } + ], + "description": "Updates a library listing version. Setting is_live to true will demote the current live version.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateLibraryListingVersionRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LibraryListingVersionV1" + } + } + } + } + } + } + }, + "/v1/billing/usage_summary": { + "get": { + "summary": "Gets billing usage summary for a date range", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/billing/usage_summary \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/billing/usage_summary\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Returns billing usage data within the specified date range. Includes dedicated model serving, training, and model APIs usage. The date range must not exceed 31 days.", + "parameters": [ + { + "name": "start_date", + "in": "query", + "required": true, + "description": "Start date (ISO 8601, UTC). Earliest queryable: 2026-01-01.", + "schema": { + "format": "date-time", + "title": "Start Date", + "type": "string" + } + }, + { + "name": "end_date", + "in": "query", + "required": true, + "description": "End date in ISO 8601 format (UTC). Date range cannot exceed 31 days.", + "schema": { + "format": "date-time", + "title": "End Date", + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UsageSummaryV1" + } + } + } + } + } + } + }, + "/v1/users/{user_id}": { + "get": { + "summary": "Gets a user by ID", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/users/{user_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/users/{user_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "description": "Returns user info. Currently only 'me' is accepted as user_id.", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UserInfoV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/user_id" + } + ] + }, + "/v1/gateway/groups": { + "get": { + "summary": "List groups", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/gateway/groups \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/gateway/groups\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GroupsResponseV1" + } + } + } + } + } + }, + "post": { + "summary": "Create a group", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/gateway/groups \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"metadata\": {\n \"name\": \"Acme prod\",\n \"external_entity_id\": \"cust_42\"\n },\n \"models\": null,\n \"hierarchy\": {\n \"limit_enforcement\": \"INDEPENDENT\",\n \"parent_group_id\": \"abc123\"\n }\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/gateway/groups\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'metadata': {'name': 'Acme prod', 'external_entity_id': 'cust_42'}, 'models': None, 'hierarchy': {'limit_enforcement': 'INDEPENDENT', 'parent_group_id': 'abc123'}}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a group and its endpoint configuration", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateGroupRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GroupV1" + } + } + } + } + } + } + }, + "/v1/gateway/groups/{group_id}": { + "get": { + "summary": "Get a group", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/gateway/groups/{group_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/gateway/groups/{group_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GroupV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/group_id" + } + ], + "patch": { + "summary": "Update a group", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request PATCH \\\n--url https://api.baseten.co/v1/gateway/groups/{group_id} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"metadata\": {\n \"name\": \"Acme Prod\"\n },\n \"models\": null\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/gateway/groups/{group_id}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"PATCH\",\n url,\n headers=headers,\n json={'metadata': {'name': 'Acme Prod'}, 'models': None}\n)\n\nprint(response.text)" + } + ], + "description": "Updates the group's mutable fields", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateGroupRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GroupV1" + } + } + } + } + } + } + }, + "/v1/gateway/groups/{group_id}/api_keys": { + "get": { + "summary": "List API keys for a group", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/gateway/groups/{group_id}/api_keys \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/gateway/groups/{group_id}/api_keys\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/KeysForGroupResponseV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/group_id" + } + ], + "post": { + "summary": "Create an API key for a group", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/gateway/groups/{group_id}/api_keys \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"prod-key-1\"\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/gateway/groups/{group_id}/api_keys\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'prod-key-1'}\n)\n\nprint(response.text)" + } + ], + "description": "Creates a new API key for the given group", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateApiKeyForGroupRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateApiKeyForGroupResponseV1" + } + } + } + } + } + } + }, + "/v1/gateway/groups/{group_id}/api_keys/register": { + "post": { + "summary": "Register an API key for a group", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request POST \\\n--url https://api.baseten.co/v1/gateway/groups/{group_id}/api_keys/register \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n--data '{\n \"name\": \"my-model-api-key\",\n \"key\": \"my-secure-api-key-value\"\n}'" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/gateway/groups/{group_id}/api_keys/register\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"POST\",\n url,\n headers=headers,\n json={'name': 'my-model-api-key', 'key': 'my-secure-api-key-value'}\n)\n\nprint(response.text)" + } + ], + "description": "Registers a Gateway API key with provided value, name.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterAPIKeyRequestV1" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterAPIKeyResponseV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/group_id" + } + ] + }, + "/v1/gateway/groups/{group_id}/api_keys/{api_key_prefix}": { + "get": { + "summary": "Get an API key for a group", + "x-codeSamples": [ + { + "lang": "bash", + "source": "curl --request GET \\\n--url https://api.baseten.co/v1/gateway/groups/{group_id}/api_keys/{api_key_prefix} \\\n--header \"Authorization: Api-Key $BASETEN_API_KEY\"\n" + }, + { + "lang": "python", + "source": "import requests\nimport os\nAPI_KEY = os.environ.get(\"BASETEN_API_KEY\", \"\")\nurl = \"https://api.baseten.co/v1/gateway/groups/{group_id}/api_keys/{api_key_prefix}\"\n\nheaders = {\"Authorization\": f\"Api-Key {API_KEY}\"}\n\nresponse = requests.request(\n \"GET\",\n url,\n headers=headers,\n json={}\n)\n\nprint(response.text)" + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GatewayKeyInfoV1" + } + } + } + } + } + }, + "parameters": [ + { + "$ref": "#/components/parameters/group_id" + }, + { + "$ref": "#/components/parameters/api_key_prefix" + } + ] + } + }, + "openapi": "3.1.0", + "components": { + "schemas": { + "SecretV1": { + "description": "A Baseten secret. Note that we do not support retrieving secret values.", + "properties": { + "created_at": { + "description": "Time the secret was created in ISO 8601 format", + "format": "date-time", + "title": "Created At", + "type": "string" + }, + "name": { + "description": "Name of the secret", + "title": "Name", + "type": "string" + }, + "team_name": { + "description": "Name of the team the secret belongs to", + "title": "Team Name", + "type": "string" + } + }, + "required": [ + "created_at", + "name", + "team_name" + ], + "title": "SecretV1", + "type": "object" + }, + "SecretsV1": { + "description": "A list of Baseten secrets.", + "properties": { + "secrets": { + "items": { + "$ref": "#/components/schemas/SecretV1" + }, + "title": "Secrets", + "type": "array" + } + }, + "required": [ + "secrets" + ], + "title": "SecretsV1", + "type": "object" + }, + "UpsertSecretRequestV1": { + "description": "A request to create or update a Baseten secret by name.", + "properties": { + "name": { + "description": "Name of the new or existing secret", + "examples": [ + "my_secret" + ], + "title": "Name", + "type": "string" + }, + "value": { + "description": "Value of the secret", + "examples": [ + "my_secret_value" + ], + "title": "Value", + "type": "string" + } + }, + "required": [ + "name", + "value" + ], + "title": "UpsertSecretRequestV1", + "type": "object" + }, + "SecretTombstoneV1": { + "description": "A secret tombstone.", + "properties": { + "name": { + "description": "Name of the deleted secret", + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ], + "title": "SecretTombstoneV1", + "type": "object" + }, + "TeamV1": { + "description": "A team.", + "properties": { + "id": { + "description": "Unique identifier of the team", + "title": "Id", + "type": "string" + }, + "name": { + "description": "Name of the team", + "title": "Name", + "type": "string" + }, + "default": { + "type": "boolean", + "description": "Whether this is the default team for the organization", + "title": "Default" + }, + "created_at": { + "description": "Time the team was created in ISO 8601 format", + "format": "date-time", + "title": "Created At", + "type": "string" + } + }, + "required": [ + "id", + "name", + "default", + "created_at" + ], + "title": "TeamV1", + "type": "object" + }, + "TeamsV1": { + "description": "A list of teams.", + "properties": { + "teams": { + "description": "A list of teams", + "items": { + "$ref": "#/components/schemas/TeamV1" + }, + "title": "Teams", + "type": "array" + } + }, + "required": [ + "teams" + ], + "title": "TeamsV1", + "type": "object" + }, + "InstanceTypeV1": { + "description": "An instance type.", + "properties": { + "id": { + "description": "Identifier string for the instance type", + "title": "Id", + "type": "string" + }, + "name": { + "description": "Display name of the instance type", + "title": "Name", + "type": "string" + }, + "memory_limit_mib": { + "description": "Memory limit of the instance type in Mebibytes", + "title": "Memory Limit Mib", + "type": "integer" + }, + "millicpu_limit": { + "description": "CPU limit of the instance type in millicpu", + "title": "Millicpu Limit", + "type": "integer" + }, + "gpu_count": { + "description": "Number of GPUs on the instance type", + "title": "Gpu Count", + "type": "integer" + }, + "gpu_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Type of GPU on the instance type", + "title": "Gpu Type" + }, + "gpu_memory_limit_mib": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Memory limit of the GPU on the instance type in Mebibytes", + "title": "Gpu Memory Limit Mib" + } + }, + "required": [ + "id", + "name", + "memory_limit_mib", + "millicpu_limit", + "gpu_count", + "gpu_type", + "gpu_memory_limit_mib" + ], + "title": "InstanceTypeV1", + "type": "object" + }, + "InstanceTypesV1": { + "description": "A list of instance types.", + "properties": { + "instance_types": { + "items": { + "$ref": "#/components/schemas/InstanceTypeV1" + }, + "title": "Instance Types", + "type": "array" + } + }, + "required": [ + "instance_types" + ], + "title": "InstanceTypesV1", + "type": "object" + }, + "InstanceTypeWithPriceV1": { + "properties": { + "instance_type": { + "$ref": "#/components/schemas/InstanceTypeV1", + "description": "Instance type properties." + }, + "price": { + "description": "Usage price in USD / minute.", + "title": "Price", + "type": "number" + } + }, + "required": [ + "instance_type", + "price" + ], + "title": "InstanceTypeWithPriceV1", + "type": "object" + }, + "InstanceTypePricesV1": { + "description": "A list of instance types.", + "properties": { + "instance_types": { + "items": { + "$ref": "#/components/schemas/InstanceTypeWithPriceV1" + }, + "title": "Instance Types", + "type": "array" + } + }, + "required": [ + "instance_types" + ], + "title": "InstanceTypePricesV1", + "type": "object" + }, + "DeploymentArchivePayloadV1": { + "description": "Deployment-level fields for a model-archive push.\n\nShared by every endpoint that creates a deployment from an uploaded archive:\n`POST /v1/prepare_model_upload`, the `model_archive` source on `POST\n/v1/models`, and `POST /v1/models/{model_id}/deployments`.", + "properties": { + "config": { + "additionalProperties": true, + "description": "Parsed model config as a JSON object.", + "title": "Config", + "type": "object" + }, + "raw_config": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Original config.yaml text, persisted as-is on the deployment. Best-effort: invalid raw configs are logged and dropped without failing the request.", + "title": "Raw Config" + }, + "user_env": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Client environment metadata (e.g. client version, Python version). Validated server-side.", + "title": "User Env" + }, + "environment_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Stable environment to push to (e.g. `production`). If unset, the deployment is created without environment selection. Caller must have push permission for the named environment.", + "title": "Environment Name" + }, + "preserve_env_instance_type": { + "default": true, + "description": "Retain the target environment's current instance type rather than the one in `config`. Only meaningful when `environment_name` is set and that environment already exists.", + "title": "Preserve Env Instance Type", + "type": "boolean" + }, + "deploy_timeout_minutes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Deploy timeout in minutes; allowed range 10 to 1440. Server default applies if unset.", + "title": "Deploy Timeout Minutes" + }, + "deployment_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional human-readable name for the deployment.", + "title": "Deployment Name" + }, + "labels": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "User-provided key-value labels for the deployment.", + "title": "Labels" + } + }, + "required": [ + "config" + ], + "title": "DeploymentArchivePayloadV1", + "type": "object" + }, + "PrepareModelUploadRequestV1": { + "description": "Body for `POST /v1/prepare_model_upload`.\n\nValidates the same payload the commit endpoint will validate, and on\n`dry_run=false` issues STS upload credentials. Exactly one of `name` or\n`model_id` is required: `name` validates the new-model path (`POST\n/v1/models`); `model_id` validates the add-deployment path (`POST\n/v1/models/{model_id}/deployments`).", + "properties": { + "deployment": { + "$ref": "#/components/schemas/DeploymentArchivePayloadV1", + "description": "Deployment-level payload, identical to the payload sent at commit." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Set to validate a new-model push. Exactly one of `name` or `model_id` is required.", + "title": "Name" + }, + "team_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Team the new model will belong to. Only valid when `name` is set; defaults to the organization's default team when omitted. Must not be set when `model_id` is set (the existing model already has a team).", + "title": "Team Id" + }, + "model_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Set to validate an add-deployment push to an existing model. Exactly one of `name` or `model_id` is required.", + "title": "Model Id" + }, + "dry_run": { + "default": false, + "description": "If true, validate the payload only and do not issue upload credentials. The response sets `creds`, `s3_bucket`, and `s3_key` to `null`.", + "title": "Dry Run", + "type": "boolean" + }, + "is_development": { + "default": false, + "description": "If true, validate a development-deployment push. Only valid when `name` is set. The following `deployment` fields must be left at their defaults: `environment_name`, `preserve_env_instance_type`, `deployment_name`.", + "title": "Is Development", + "type": "boolean" + } + }, + "required": [ + "deployment" + ], + "title": "PrepareModelUploadRequestV1", + "type": "object" + }, + "AWSCredentialsV1": { + "description": "AWS credentials", + "properties": { + "aws_access_key_id": { + "description": "The AWS access key ID", + "title": "Aws Access Key Id", + "type": "string" + }, + "aws_secret_access_key": { + "description": "The AWS secret access key", + "title": "Aws Secret Access Key", + "type": "string" + }, + "aws_session_token": { + "description": "The AWS session token", + "title": "Aws Session Token", + "type": "string" + } + }, + "required": [ + "aws_access_key_id", + "aws_secret_access_key", + "aws_session_token" + ], + "title": "AWSCredentialsV1", + "type": "object" + }, + "PrepareModelUploadResponseV1": { + "description": "Response from `POST /v1/prepare_model_upload`.\n\nOn success with `dry_run=false`, returns STS upload credentials. On success\nwith `dry_run=true`, `creds`, `s3_bucket`, and `s3_key` are `null` and only\nvalidation has run.", + "properties": { + "creds": { + "anyOf": [ + { + "$ref": "#/components/schemas/AWSCredentialsV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "STS credentials to upload the model archive." + }, + "s3_bucket": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "S3 bucket the credentials are scoped to.", + "title": "S3 Bucket" + }, + "s3_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "S3 key the credentials are scoped to. Pass this to `POST /v1/models` (in the `model_archive` source) once the upload completes.", + "title": "S3 Key" + }, + "s3_region": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "AWS region the S3 bucket resides in.", + "title": "S3 Region" + } + }, + "title": "PrepareModelUploadResponseV1", + "type": "object" + }, + "ModelV1": { + "description": "A model.", + "properties": { + "id": { + "description": "Unique identifier of the model", + "title": "Id", + "type": "string" + }, + "created_at": { + "description": "Time the model was created in ISO 8601 format", + "format": "date-time", + "title": "Created At", + "type": "string" + }, + "name": { + "description": "Name of the model", + "title": "Name", + "type": "string" + }, + "deployments_count": { + "description": "Number of deployments of the model", + "title": "Deployments Count", + "type": "integer" + }, + "production_deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Unique identifier of the production deployment of the model", + "title": "Production Deployment Id" + }, + "development_deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Unique identifier of the development deployment of the model", + "title": "Development Deployment Id" + }, + "instance_type_name": { + "description": "Name of the instance type for the production deployment of the model", + "title": "Instance Type Name", + "type": "string" + }, + "team_name": { + "description": "Name of the team associated with the model.", + "title": "Team Name", + "type": "string" + } + }, + "required": [ + "id", + "created_at", + "name", + "deployments_count", + "production_deployment_id", + "development_deployment_id", + "instance_type_name", + "team_name" + ], + "title": "ModelV1", + "type": "object" + }, + "ModelsV1": { + "description": "A list of models.", + "properties": { + "models": { + "items": { + "$ref": "#/components/schemas/ModelV1" + }, + "title": "Models", + "type": "array" + } + }, + "required": [ + "models" + ], + "title": "ModelsV1", + "type": "object" + }, + "LibraryListingSourceV1": { + "description": "Create a model by forking a library listing accessible to the caller's organization.", + "properties": { + "kind": { + "const": "library_listing", + "default": "library_listing", + "title": "Kind", + "type": "string" + }, + "lab_display_name": { + "description": "Identifier of the publishing organization, as returned by `GET /v1/library_models`.", + "title": "Lab Display Name", + "type": "string" + }, + "user_defined_listing_id": { + "description": "Listing identifier within the publishing organization.", + "title": "User Defined Listing Id", + "type": "string" + }, + "deployed_model_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional name for the new deployed model. Defaults to the listing's configured name.", + "title": "Deployed Model Name" + } + }, + "required": [ + "lab_display_name", + "user_defined_listing_id" + ], + "title": "LibraryListingSourceV1", + "type": "object" + }, + "ModelArchiveSourceV1": { + "description": "Create a model from an archive previously uploaded via the credentials\nissued by `POST /v1/prepare_model_upload`.", + "properties": { + "kind": { + "const": "model_archive", + "default": "model_archive", + "title": "Kind", + "type": "string" + }, + "name": { + "description": "Name of the new model.", + "title": "Name", + "type": "string" + }, + "deployment": { + "$ref": "#/components/schemas/DeploymentArchivePayloadV1", + "description": "Deployment-level configuration for the model's first deployment." + }, + "s3_key": { + "description": "S3 key of the uploaded archive, from the credentials returned by `POST /v1/prepare_model_upload`.", + "title": "S3 Key", + "type": "string" + }, + "disable_archive_download": { + "default": false, + "description": "If true, the uploaded archive is not downloadable after creation. Locked at model creation; cannot be changed by subsequent deployments.", + "title": "Disable Archive Download", + "type": "boolean" + }, + "is_development": { + "default": false, + "description": "If true, push as a development deployment (the model's single mutable dev slot; overwrites any existing development deployment). The following `deployment` fields must be left at their defaults: `environment_name`, `preserve_env_instance_type`, `deployment_name`.", + "title": "Is Development", + "type": "boolean" + } + }, + "required": [ + "name", + "deployment", + "s3_key" + ], + "title": "ModelArchiveSourceV1", + "type": "object" + }, + "CreateModelRequestV1": { + "description": "Body for creating a model via `POST /v1/models`.", + "properties": { + "source": { + "description": "Where the new model is created from.", + "discriminator": { + "mapping": { + "library_listing": "#/components/schemas/LibraryListingSourceV1", + "model_archive": "#/components/schemas/ModelArchiveSourceV1" + }, + "propertyName": "kind" + }, + "oneOf": [ + { + "$ref": "#/components/schemas/LibraryListingSourceV1" + }, + { + "$ref": "#/components/schemas/ModelArchiveSourceV1" + } + ], + "title": "Source" + } + }, + "required": [ + "source" + ], + "title": "CreateModelRequestV1", + "type": "object" + }, + "AutoscalingSettingsV1": { + "description": "Autoscaling settings for a deployment.", + "properties": { + "min_replica": { + "description": "Minimum number of replicas", + "title": "Min Replica", + "type": "integer" + }, + "max_replica": { + "description": "Maximum number of replicas", + "title": "Max Replica", + "type": "integer" + }, + "autoscaling_window": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Timeframe of traffic considered for autoscaling decisions", + "title": "Autoscaling Window" + }, + "scale_down_delay": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Waiting period before scaling down any active replica", + "title": "Scale Down Delay" + }, + "concurrency_target": { + "description": "Number of requests per replica before scaling up", + "title": "Concurrency Target", + "type": "integer" + }, + "target_utilization_percentage": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Target utilization percentage for scaling up/down.", + "title": "Target Utilization Percentage" + }, + "target_in_flight_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Target number of in-flight tokens for autoscaling decisions. Early access only.", + "title": "Target In Flight Tokens" + }, + "max_scale_down_rate": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Maximum rate at which replicas can scale down (e.g. 2.0 means at most halve replicas per window).", + "title": "Max Scale Down Rate" + } + }, + "required": [ + "min_replica", + "max_replica", + "autoscaling_window", + "scale_down_delay", + "concurrency_target", + "target_utilization_percentage" + ], + "title": "AutoscalingSettingsV1", + "type": "object" + }, + "DeploymentStatusV1": { + "description": "The status of a deployment.", + "enum": [ + "BUILDING", + "DEPLOYING", + "DEPLOY_FAILED", + "LOADING_MODEL", + "ACTIVE", + "UNHEALTHY", + "BUILD_FAILED", + "BUILD_STOPPED", + "DEACTIVATING", + "INACTIVE", + "FAILED", + "UPDATING", + "SCALED_TO_ZERO", + "WAKING_UP" + ], + "title": "DeploymentStatusV1", + "type": "string" + }, + "DeploymentV1": { + "description": "A deployment of a model.", + "properties": { + "id": { + "description": "Unique identifier of the deployment", + "title": "Id", + "type": "string" + }, + "created_at": { + "description": "Time the deployment was created in ISO 8601 format", + "format": "date-time", + "title": "Created At", + "type": "string" + }, + "name": { + "description": "Name of the deployment", + "title": "Name", + "type": "string" + }, + "model_id": { + "description": "Unique identifier of the model", + "title": "Model Id", + "type": "string" + }, + "is_production": { + "description": "Whether the deployment is the production deployment of the model", + "title": "Is Production", + "type": "boolean" + }, + "is_development": { + "description": "Whether the deployment is the development deployment of the model", + "title": "Is Development", + "type": "boolean" + }, + "status": { + "$ref": "#/components/schemas/DeploymentStatusV1", + "description": "Status of the deployment" + }, + "active_replica_count": { + "description": "Number of active replicas", + "title": "Active Replica Count", + "type": "integer" + }, + "autoscaling_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/AutoscalingSettingsV1" + }, + { + "type": "null" + } + ], + "description": "Autoscaling settings for the deployment. If null, the model has not finished deploying" + }, + "instance_type_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Name of the instance type the model deployment is running on", + "title": "Instance Type Name" + }, + "environment": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The environment associated with the deployment", + "title": "Environment" + }, + "labels": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "User-provided key-value labels for the deployment", + "title": "Labels" + } + }, + "required": [ + "id", + "created_at", + "name", + "model_id", + "is_production", + "is_development", + "status", + "active_replica_count", + "autoscaling_settings", + "instance_type_name", + "environment" + ], + "title": "DeploymentV1", + "type": "object" + }, + "CreatedModelDeploymentV1": { + "description": "A newly created deployment and its model.", + "properties": { + "model": { + "$ref": "#/components/schemas/ModelV1", + "description": "The model the deployment belongs to. May have been created by this call." + }, + "deployment": { + "$ref": "#/components/schemas/DeploymentV1", + "description": "The newly created deployment." + } + }, + "required": [ + "model", + "deployment" + ], + "title": "CreatedModelDeploymentV1", + "type": "object" + }, + "ModelTombstoneV1": { + "description": "A model tombstone.", + "properties": { + "id": { + "description": "Unique identifier of the model", + "title": "Id", + "type": "string" + }, + "deleted": { + "description": "Whether the model was deleted", + "title": "Deleted", + "type": "boolean" + } + }, + "required": [ + "id", + "deleted" + ], + "title": "ModelTombstoneV1", + "type": "object" + }, + "DeploymentsV1": { + "description": "A list of deployments of a model.", + "properties": { + "deployments": { + "description": "A list of deployments of a model", + "items": { + "$ref": "#/components/schemas/DeploymentV1" + }, + "title": "Deployments", + "type": "array" + } + }, + "required": [ + "deployments" + ], + "title": "DeploymentsV1", + "type": "object" + }, + "DeploymentArchiveSourceV1": { + "description": "Add a deployment from an archive previously uploaded via the credentials\nissued by `POST /v1/prepare_model_upload`.", + "properties": { + "kind": { + "const": "model_archive", + "default": "model_archive", + "title": "Kind", + "type": "string" + }, + "deployment": { + "$ref": "#/components/schemas/DeploymentArchivePayloadV1", + "description": "Deployment-level configuration." + }, + "s3_key": { + "description": "S3 key of the uploaded archive, from the credentials returned by `POST /v1/prepare_model_upload`.", + "title": "S3 Key", + "type": "string" + } + }, + "required": [ + "deployment", + "s3_key" + ], + "title": "DeploymentArchiveSourceV1", + "type": "object" + }, + "CreateModelDeploymentRequestV1": { + "description": "Body for adding a deployment to an existing model via\n`POST /v1/models/{model_id}/deployments`.", + "properties": { + "source": { + "description": "Where the new deployment is created from.", + "discriminator": { + "mapping": { + "model_archive": "#/components/schemas/DeploymentArchiveSourceV1" + }, + "propertyName": "kind" + }, + "oneOf": [ + { + "$ref": "#/components/schemas/DeploymentArchiveSourceV1" + } + ], + "title": "Source" + } + }, + "required": [ + "source" + ], + "title": "CreateModelDeploymentRequestV1", + "type": "object" + }, + "DeploymentTombstoneV1": { + "description": "A model deployment tombstone.", + "properties": { + "id": { + "description": "Unique identifier of the deployment", + "title": "Id", + "type": "string" + }, + "deleted": { + "description": "Whether the deployment was deleted", + "title": "Deleted", + "type": "boolean" + }, + "model_id": { + "description": "Unique identifier of the model", + "title": "Model Id", + "type": "string" + } + }, + "required": [ + "id", + "deleted", + "model_id" + ], + "title": "DeploymentTombstoneV1", + "type": "object" + }, + "UpdateAutoscalingSettingsV1": { + "additionalProperties": false, + "description": "A request to update autoscaling settings for a deployment. All fields are optional, and we only update ones passed in.", + "properties": { + "min_replica": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Minimum number of replicas", + "examples": [ + 0 + ], + "title": "Min Replica" + }, + "max_replica": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Maximum number of replicas", + "examples": [ + 7 + ], + "title": "Max Replica" + }, + "autoscaling_window": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Timeframe of traffic considered for autoscaling decisions", + "examples": [ + 600 + ], + "title": "Autoscaling Window" + }, + "scale_down_delay": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Waiting period before scaling down any active replica", + "examples": [ + 120 + ], + "title": "Scale Down Delay" + }, + "concurrency_target": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of requests per replica before scaling up", + "examples": [ + 2 + ], + "title": "Concurrency Target" + }, + "target_utilization_percentage": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Target utilization percentage for scaling up/down.", + "examples": [ + 70 + ], + "title": "Target Utilization Percentage" + }, + "target_in_flight_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Target number of in-flight tokens for autoscaling decisions. Early access only.", + "examples": [ + 40000 + ], + "title": "Target In Flight Tokens" + }, + "max_scale_down_rate": { + "anyOf": [ + { + "exclusiveMinimum": 1, + "maximum": 2, + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Maximum rate at which replicas can scale down (e.g. 2.0 means at most halve replicas per window).", + "examples": [ + 2.0 + ], + "title": "Max Scale Down Rate" + } + }, + "title": "UpdateAutoscalingSettingsV1", + "type": "object" + }, + "UpdateAutoscalingSettingsStatusV1": { + "description": "The status of a request to update autoscaling settings.", + "enum": [ + "ACCEPTED", + "QUEUED", + "UNCHANGED" + ], + "title": "UpdateAutoscalingSettingsStatusV1", + "type": "string" + }, + "UpdateAutoscalingSettingsResponseV1": { + "description": "The response to a request to update autoscaling settings.", + "properties": { + "status": { + "$ref": "#/components/schemas/UpdateAutoscalingSettingsStatusV1", + "description": "Status of the request to update autoscaling settings" + }, + "message": { + "description": "A message describing the status of the request to update autoscaling settings", + "title": "Message", + "type": "string" + } + }, + "required": [ + "status", + "message" + ], + "title": "UpdateAutoscalingSettingsResponseV1", + "type": "object" + }, + "PromoteRequestV1": { + "description": "A request to promote a deployment to production.", + "properties": { + "scale_down_previous_production": { + "default": true, + "description": "Whether to scale down the previous production deployment after promoting", + "examples": [ + true + ], + "title": "Scale Down Previous Production", + "type": "boolean" + }, + "preserve_env_instance_type": { + "default": true, + "description": "Whether to use the promoting deployment's instance type or preserve target environment's instance type", + "examples": [ + true + ], + "title": "Preserve Env Instance Type", + "type": "boolean" + } + }, + "title": "PromoteRequestV1", + "type": "object" + }, + "ActivateResponseV1": { + "description": "The response to a request to activate a deployment.", + "properties": { + "success": { + "default": true, + "description": "Whether the deployment was successfully activated", + "title": "Success", + "type": "boolean" + } + }, + "title": "ActivateResponseV1", + "type": "object" + }, + "DeactivateResponseV1": { + "description": "The response to a request to deactivate a deployment.", + "properties": { + "success": { + "default": true, + "description": "Whether the deployment was successfully deactivated", + "title": "Success", + "type": "boolean" + } + }, + "title": "DeactivateResponseV1", + "type": "object" + }, + "RetryDeploymentResponseV1": { + "description": "The response to a request to retry a deployment.", + "properties": { + "retried": { + "description": "Whether the retry was successfully initiated", + "title": "Retried", + "type": "boolean" + }, + "reason": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Explanation of the result. Provided when retried is false to explain why retry was not possible.", + "title": "Reason" + }, + "deployment": { + "$ref": "#/components/schemas/DeploymentV1", + "description": "The deployment that was retried" + } + }, + "required": [ + "retried", + "deployment" + ], + "title": "RetryDeploymentResponseV1", + "type": "object" + }, + "DownloadDeploymentResponseV1": { + "description": "The response to a request to download a deployment's truss.", + "properties": { + "download_url": { + "description": "Presigned URL to download the truss tar file", + "title": "Download Url", + "type": "string" + } + }, + "required": [ + "download_url" + ], + "title": "DownloadDeploymentResponseV1", + "type": "object" + }, + "DeploymentConfigResponseV1": { + "description": "The config of a deployment. Fields are populated per `output_format`.", + "properties": { + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The parsed config of the deployment.", + "title": "Config" + }, + "raw_config": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The original config.yaml text \u2014 preserves comments, ordering, formatting.", + "title": "Raw Config" + } + }, + "title": "DeploymentConfigResponseV1", + "type": "object" + }, + "DeploymentConfigOutputFormat": { + "enum": [ + "raw", + "parsed", + "both" + ], + "title": "DeploymentConfigOutputFormat", + "type": "string" + }, + "GetDeploymentConfigRequestV1": { + "description": "Query params for ``GET /v1/models/.../deployments/.../config``.", + "properties": { + "output_format": { + "$ref": "#/components/schemas/DeploymentConfigOutputFormat", + "default": "both", + "description": "'raw': verbatim config.yaml with comments \u2014 not available for deployments created before 2026-04-30. 'parsed': dict with server-side defaults applied \u2014 always available. 'both': both fields populated." + } + }, + "title": "GetDeploymentConfigRequestV1", + "type": "object" + }, + "LogV1": { + "properties": { + "timestamp": { + "description": "Epoch nanosecond timestamp of the log message.", + "title": "Timestamp", + "type": "string" + }, + "message": { + "description": "The contents of the log message.", + "title": "Message", + "type": "string" + }, + "replica": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The replica the log line was emitted from.", + "title": "Replica" + }, + "request_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The request ID associated with an inference request.", + "title": "Request Id" + } + }, + "required": [ + "timestamp", + "message", + "replica" + ], + "title": "LogV1", + "type": "object" + }, + "GetLogsResponseV1": { + "description": "A response to querying logs.", + "properties": { + "logs": { + "description": "Logs for a specific entity.", + "items": { + "$ref": "#/components/schemas/LogV1" + }, + "title": "Logs", + "type": "array" + } + }, + "required": [ + "logs" + ], + "title": "GetLogsResponseV1", + "type": "object" + }, + "SortOrderV1": { + "enum": [ + "asc", + "desc" + ], + "title": "SortOrderV1", + "type": "string" + }, + "GetDeploymentLogsRequestV1": { + "description": "A request to fetch deployment logs.", + "properties": { + "start_epoch_millis": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Epoch millis timestamp to start fetching logs", + "title": "Start Epoch Millis" + }, + "end_epoch_millis": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Epoch millis timestamp to end fetching logs", + "title": "End Epoch Millis" + }, + "direction": { + "anyOf": [ + { + "$ref": "#/components/schemas/SortOrderV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Sort order for logs" + }, + "limit": { + "anyOf": [ + { + "maximum": 1000, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 500, + "description": "Limit of logs to fetch in a single request", + "title": "Limit" + } + }, + "title": "GetDeploymentLogsRequestV1", + "type": "object" + }, + "TerminateReplicaResponseV1": { + "description": "The response to a request to terminate a replica in a deployment.", + "properties": { + "success": { + "default": true, + "description": "Whether the replica was successfully terminated", + "title": "Success", + "type": "boolean" + } + }, + "title": "TerminateReplicaResponseV1", + "type": "object" + }, + "SignSSHCertificateRequestV1": { + "description": "Request to sign an SSH certificate for accessing a workload pod.", + "properties": { + "public_key": { + "description": "The user's SSH public key (e.g., 'ssh-ed25519 AAAA... user@host').", + "title": "Public Key", + "type": "string" + }, + "replica_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The replica to connect to. Required for training jobs (e.g. '0'). Optional for inference (server picks a running replica if omitted).", + "title": "Replica Id" + } + }, + "required": [ + "public_key" + ], + "title": "SignSSHCertificateRequestV1", + "type": "object" + }, + "SignSSHCertificateResponseV1": { + "description": "Response containing a signed SSH certificate for proxy authentication.", + "properties": { + "ssh_certificate": { + "description": "The signed SSH certificate in OpenSSH format.", + "title": "Ssh Certificate", + "type": "string" + }, + "jwt": { + "description": "Signed JWT (ES256) for SSH proxy authorization.", + "title": "Jwt", + "type": "string" + }, + "proxy_address": { + "description": "Address of the SSH proxy to connect to (host:port).", + "title": "Proxy Address", + "type": "string" + }, + "ssh_cert_expires_at": { + "description": "When the certificate expires, in ISO 8601 format.", + "format": "date-time", + "title": "Ssh Cert Expires At", + "type": "string" + } + }, + "required": [ + "ssh_certificate", + "jwt", + "proxy_address", + "ssh_cert_expires_at" + ], + "title": "SignSSHCertificateResponseV1", + "type": "object" + }, + "EnvironmentV1": { + "description": "Environment for oracles.", + "properties": { + "name": { + "description": "Name of the environment", + "title": "Name", + "type": "string" + }, + "created_at": { + "description": "Time the environment was created in ISO 8601 format", + "format": "date-time", + "title": "Created At", + "type": "string" + }, + "model_id": { + "description": "Unique identifier of the model", + "title": "Model Id", + "type": "string" + }, + "current_deployment": { + "anyOf": [ + { + "$ref": "#/components/schemas/DeploymentV1" + }, + { + "type": "null" + } + ], + "description": "Current deployment of the environment" + }, + "candidate_deployment": { + "anyOf": [ + { + "$ref": "#/components/schemas/DeploymentV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Candidate deployment being promoted to the environment, if a promotion is in progress" + }, + "in_progress_promotion": { + "anyOf": [ + { + "$ref": "#/components/schemas/InProgressPromotionV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Details of the in-progress promotion, if any" + }, + "autoscaling_settings": { + "$ref": "#/components/schemas/AutoscalingSettingsV1", + "description": "Autoscaling settings for the environment" + }, + "promotion_settings": { + "$ref": "#/components/schemas/PromotionSettingsV1", + "description": "Promotion settings for the environment" + }, + "instance_type": { + "$ref": "#/components/schemas/InstanceTypeV1", + "description": "Instance type for the environment" + } + }, + "required": [ + "name", + "created_at", + "model_id", + "current_deployment", + "autoscaling_settings", + "promotion_settings", + "instance_type" + ], + "title": "EnvironmentV1", + "type": "object" + }, + "InProgressPromotionStatusV1": { + "enum": [ + "RELEASING", + "RAMPING_UP", + "RAMPING_DOWN", + "PAUSED", + "SUCCEEDED", + "FAILED", + "CANCELED" + ], + "title": "InProgressPromotionStatusV1", + "type": "string" + }, + "InProgressPromotionV1": { + "description": "Details of an in-progress promotion.", + "properties": { + "status": { + "$ref": "#/components/schemas/InProgressPromotionStatusV1", + "description": "Status of the promotion" + }, + "percent_traffic_to_new_version": { + "description": "Percentage of traffic routed to the candidate deployment", + "title": "Percent Traffic To New Version", + "type": "integer" + }, + "error_message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Error message if promotion failed", + "title": "Error Message" + }, + "rolling_deploy": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Whether this is a rolling deploy", + "title": "Rolling Deploy" + } + }, + "required": [ + "status", + "percent_traffic_to_new_version" + ], + "title": "InProgressPromotionV1", + "type": "object" + }, + "PromotionCleanupStrategyV1": { + "description": "The promotion cleanup strategy.", + "enum": [ + "KEEP", + "SCALE_TO_ZERO", + "DEACTIVATE" + ], + "title": "PromotionCleanupStrategyV1", + "type": "string" + }, + "PromotionSettingsV1": { + "description": "Promotion settings for promoting chains and oracles", + "properties": { + "redeploy_on_promotion": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": false, + "description": "Whether to deploy on all promotions. Enabling this flag allows model code to safely handle environment-specific logic. When a deployment is promoted, a new deployment will be created with a copy of the image.", + "examples": [ + true + ], + "title": "Redeploy On Promotion" + }, + "rolling_deploy": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": false, + "description": "Whether the environment should rely on rolling deploy orchestration.", + "examples": [ + true + ], + "title": "Rolling Deploy" + }, + "promotion_cleanup_strategy": { + "anyOf": [ + { + "$ref": "#/components/schemas/PromotionCleanupStrategyV1" + }, + { + "type": "null" + } + ], + "default": "SCALE_TO_ZERO", + "description": "The cleanup strategy to use after a promotion completes.", + "examples": [ + "SCALE_TO_ZERO" + ] + }, + "rolling_deploy_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/RollingDeployConfigV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Rolling deploy configuration for promotions" + }, + "ramp_up_while_promoting": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": false, + "description": "Whether to ramp up traffic while promoting", + "examples": [ + true + ], + "title": "Ramp Up While Promoting" + }, + "ramp_up_duration_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 600, + "description": "Duration of the ramp up in seconds", + "examples": [ + 600 + ], + "title": "Ramp Up Duration Seconds" + } + }, + "title": "PromotionSettingsV1", + "type": "object" + }, + "RollingDeployConfigV1": { + "description": "Rolling deploy config for promoting chains and oracles", + "properties": { + "rolling_deploy_strategy": { + "$ref": "#/components/schemas/RollingDeployStrategyV1", + "default": "REPLICA", + "description": "The rolling deploy strategy to use for promotions.", + "examples": [ + "REPLICA" + ] + }, + "max_surge_percent": { + "default": 10, + "description": "The maximum surge percentage for rolling deploys.", + "examples": [ + 10 + ], + "title": "Max Surge Percent", + "type": "integer" + }, + "max_unavailable_percent": { + "default": 0, + "description": "The maximum unavailable percentage for rolling deploys.", + "examples": [ + 10 + ], + "title": "Max Unavailable Percent", + "type": "integer" + }, + "stabilization_time_seconds": { + "default": 0, + "description": "The stabilization time in seconds for rolling deploys.", + "examples": [ + 300 + ], + "title": "Stabilization Time Seconds", + "type": "integer" + }, + "replica_overhead_percent": { + "default": 0, + "description": "The replica overhead percentage for rolling deploys.", + "examples": [ + 0 + ], + "title": "Replica Overhead Percent", + "type": "integer" + } + }, + "title": "RollingDeployConfigV1", + "type": "object" + }, + "RollingDeployStrategyV1": { + "description": "The rolling deploy strategy.", + "enum": [ + "REPLICA" + ], + "title": "RollingDeployStrategyV1", + "type": "string" + }, + "EnvironmentsV1": { + "description": "list of environments", + "properties": { + "environments": { + "items": { + "$ref": "#/components/schemas/EnvironmentV1" + }, + "title": "Environments", + "type": "array" + } + }, + "required": [ + "environments" + ], + "title": "EnvironmentsV1", + "type": "object" + }, + "UpdatePromotionSettingsV1": { + "description": "Promotion settings for model promotion", + "properties": { + "redeploy_on_promotion": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Whether to deploy on all promotions. Enabling this flag allows model code to safely handle environment-specific logic. When a deployment is promoted, a new deployment will be created with a copy of the image.", + "examples": [ + true + ], + "title": "Redeploy On Promotion" + }, + "rolling_deploy": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Whether the environment should rely on rolling deploy orchestration.", + "examples": [ + true + ], + "title": "Rolling Deploy" + }, + "promotion_cleanup_strategy": { + "anyOf": [ + { + "$ref": "#/components/schemas/PromotionCleanupStrategyV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The cleanup strategy to use after a promotion completes.", + "examples": [ + "SCALE_TO_ZERO" + ] + }, + "rolling_deploy_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/UpdateRollingDeployConfigV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Rolling deploy configuration for promotions" + }, + "ramp_up_while_promoting": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Whether to ramp up traffic while promoting", + "examples": [ + true + ], + "title": "Ramp Up While Promoting" + }, + "ramp_up_duration_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Duration of the ramp up in seconds", + "examples": [ + 600 + ], + "title": "Ramp Up Duration Seconds" + } + }, + "title": "UpdatePromotionSettingsV1", + "type": "object" }, - "TeamsV1": { - "description": "A list of teams.", + "UpdateRollingDeployConfigV1": { + "description": "Rolling deploy config for promoting chains and oracles", "properties": { - "teams": { - "description": "A list of teams", - "items": { - "$ref": "#/components/schemas/TeamV1" - }, - "title": "Teams", - "type": "array" + "rolling_deploy_strategy": { + "anyOf": [ + { + "$ref": "#/components/schemas/RollingDeployStrategyV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The rolling deploy strategy to use for promotions.", + "examples": [ + "REPLICA" + ] + }, + "max_surge_percent": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 10, + "description": "The maximum surge percentage for rolling deploys.", + "examples": [ + 10 + ], + "title": "Max Surge Percent" + }, + "max_unavailable_percent": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The maximum unavailable percentage for rolling deploys.", + "examples": [ + 10 + ], + "title": "Max Unavailable Percent" + }, + "stabilization_time_seconds": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The stabilization time in seconds for rolling deploys.", + "examples": [ + 300 + ], + "title": "Stabilization Time Seconds" + }, + "replica_overhead_percent": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The replica overhead percentage for rolling deploys.", + "examples": [ + 0 + ], + "title": "Replica Overhead Percent" } }, - "required": [ - "teams" - ], - "title": "TeamsV1", + "title": "UpdateRollingDeployConfigV1", "type": "object" }, - "InstanceTypeV1": { - "description": "An instance type.", + "CreateEnvironmentRequestV1": { + "description": "A request to create an environment.", "properties": { - "id": { - "description": "Identifier string for the instance type", - "title": "Id", - "type": "string" - }, "name": { - "description": "Display name of the instance type", + "description": "Name of the environment", + "examples": [ + "staging" + ], "title": "Name", "type": "string" }, - "memory_limit_mib": { - "description": "Memory limit of the instance type in Mebibytes", - "title": "Memory Limit Mib", - "type": "integer" - }, - "millicpu_limit": { - "description": "CPU limit of the instance type in millicpu", - "title": "Millicpu Limit", - "type": "integer" - }, - "gpu_count": { - "description": "Number of GPUs on the instance type", - "title": "Gpu Count", - "type": "integer" + "autoscaling_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/UpdateAutoscalingSettingsV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Autoscaling settings for the environment", + "examples": [ + { + "autoscaling_window": 800, + "concurrency_target": 3, + "max_replica": 2, + "max_scale_down_rate": null, + "min_replica": 1, + "scale_down_delay": 60, + "target_in_flight_tokens": null, + "target_utilization_percentage": null + } + ] }, - "gpu_type": { + "promotion_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/UpdatePromotionSettingsV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Promotion settings for the environment", + "examples": [ + { + "promotion_cleanup_strategy": null, + "ramp_up_duration_seconds": 600, + "ramp_up_while_promoting": true, + "redeploy_on_promotion": true, + "rolling_deploy": true, + "rolling_deploy_config": null + } + ] + } + }, + "required": [ + "name" + ], + "title": "CreateEnvironmentRequestV1", + "type": "object" + }, + "UpdateEnvironmentRequestV1": { + "additionalProperties": false, + "description": "A request to update an environment.", + "properties": { + "autoscaling_settings": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/UpdateAutoscalingSettingsV1" }, { "type": "null" } ], - "description": "Type of GPU on the instance type", - "title": "Gpu Type" + "default": null, + "description": "Autoscaling settings for the environment", + "examples": [ + { + "autoscaling_window": 800, + "concurrency_target": 3, + "max_replica": 2, + "max_scale_down_rate": null, + "min_replica": 1, + "scale_down_delay": 60, + "target_in_flight_tokens": null, + "target_utilization_percentage": null + } + ] }, - "gpu_memory_limit_mib": { + "promotion_settings": { "anyOf": [ { - "type": "integer" + "$ref": "#/components/schemas/UpdatePromotionSettingsV1" }, { "type": "null" } ], - "description": "Memory limit of the GPU on the instance type in Mebibytes", - "title": "Gpu Memory Limit Mib" + "default": null, + "description": "Promotion settings for the environment", + "examples": [ + { + "promotion_cleanup_strategy": null, + "ramp_up_duration_seconds": 600, + "ramp_up_while_promoting": true, + "redeploy_on_promotion": true, + "rolling_deploy": null, + "rolling_deploy_config": null + } + ] } }, - "required": [ - "id", - "name", - "memory_limit_mib", - "millicpu_limit", - "gpu_count", - "gpu_type", - "gpu_memory_limit_mib" - ], - "title": "InstanceTypeV1", + "title": "UpdateEnvironmentRequestV1", "type": "object" }, - "InstanceTypesV1": { - "description": "A list of instance types.", + "PromoteToEnvironmentRequestV1": { + "description": "A request to promote a deployment to a environment.", "properties": { - "instance_types": { - "items": { - "$ref": "#/components/schemas/InstanceTypeV1" - }, - "title": "Instance Types", - "type": "array" + "scale_down_previous_deployment": { + "default": true, + "description": "Whether to scale down the previous deployment after promoting", + "examples": [ + true + ], + "title": "Scale Down Previous Deployment", + "type": "boolean" + }, + "deployment_id": { + "description": "The id of the deployment to promote", + "title": "Deployment Id", + "type": "string" + }, + "preserve_env_instance_type": { + "default": true, + "description": "Whether to use the promoting deployment's instance type or preserve target environment's instance type", + "examples": [ + true + ], + "title": "Preserve Env Instance Type", + "type": "boolean" } }, "required": [ - "instance_types" + "deployment_id" ], - "title": "InstanceTypesV1", + "title": "PromoteToEnvironmentRequestV1", "type": "object" }, - "InstanceTypeWithPriceV1": { + "CancelPromotionStatusV1": { + "description": "The status of a request to cancel a promotion.", + "enum": [ + "CANCELED", + "RAMPING_DOWN" + ], + "title": "CancelPromotionStatusV1", + "type": "string" + }, + "CancelPromotionResponseV1": { + "description": "The response to a request to cancel a promotion.", "properties": { - "instance_type": { - "$ref": "#/components/schemas/InstanceTypeV1", - "description": "Instance type properties." + "status": { + "$ref": "#/components/schemas/CancelPromotionStatusV1", + "description": "Status of the request to cancel a promotion. Can be CANCELED or RAMPING_DOWN." }, - "price": { - "description": "Usage price in USD / minute.", - "title": "Price", - "type": "number" + "message": { + "description": "A message describing the status of the request to cancel a promotion", + "title": "Message", + "type": "string" } }, "required": [ - "instance_type", - "price" + "status", + "message" ], - "title": "InstanceTypeWithPriceV1", + "title": "CancelPromotionResponseV1", "type": "object" }, - "InstanceTypePricesV1": { - "description": "A list of instance types.", + "SignalPromotionResponseV1": { + "description": "The response to a request to signal a rolling promotion.", "properties": { - "instance_types": { - "items": { - "$ref": "#/components/schemas/InstanceTypeWithPriceV1" - }, - "title": "Instance Types", - "type": "array" + "success": { + "description": "Whether the signal was successfully sent", + "title": "Success", + "type": "boolean" } }, "required": [ - "instance_types" + "success" ], - "title": "InstanceTypePricesV1", + "title": "SignalPromotionResponseV1", "type": "object" }, - "ModelV1": { - "description": "A model.", + "ChainV1": { + "description": "A chain.", "properties": { "id": { - "description": "Unique identifier of the model", + "description": "Unique identifier of the chain", "title": "Id", "type": "string" }, "created_at": { - "description": "Time the model was created in ISO 8601 format", + "description": "Time the chain was created in ISO 8601 format", "format": "date-time", "title": "Created At", "type": "string" }, "name": { - "description": "Name of the model", + "description": "Name of the chain", "title": "Name", "type": "string" }, "deployments_count": { - "description": "Number of deployments of the model", + "description": "Number of deployments of the chain", "title": "Deployments Count", "type": "integer" }, - "production_deployment_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "Unique identifier of the production deployment of the model", - "title": "Production Deployment Id" - }, - "development_deployment_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "Unique identifier of the development deployment of the model", - "title": "Development Deployment Id" - }, - "instance_type_name": { - "description": "Name of the instance type for the production deployment of the model", - "title": "Instance Type Name", - "type": "string" - }, "team_name": { - "description": "Name of the team associated with the model.", + "description": "Name of the team associated with the chain", "title": "Team Name", "type": "string" } @@ -3818,41 +8104,38 @@ "created_at", "name", "deployments_count", - "production_deployment_id", - "development_deployment_id", - "instance_type_name", "team_name" ], - "title": "ModelV1", + "title": "ChainV1", "type": "object" }, - "ModelsV1": { - "description": "A list of models.", + "ChainsV1": { + "description": "A list of chains.", "properties": { - "models": { + "chains": { "items": { - "$ref": "#/components/schemas/ModelV1" + "$ref": "#/components/schemas/ChainV1" }, - "title": "Models", + "title": "Chains", "type": "array" } }, "required": [ - "models" + "chains" ], - "title": "ModelsV1", + "title": "ChainsV1", "type": "object" }, - "ModelTombstoneV1": { - "description": "A model tombstone.", + "ChainTombstoneV1": { + "description": "A chain tombstone.", "properties": { "id": { - "description": "Unique identifier of the model", + "description": "Unique identifier of the chain", "title": "Id", "type": "string" }, "deleted": { - "description": "Whether the model was deleted", + "description": "Whether the chain was deleted", "title": "Deleted", "type": "boolean" } @@ -3861,152 +8144,77 @@ "id", "deleted" ], - "title": "ModelTombstoneV1", + "title": "ChainTombstoneV1", "type": "object" }, - "AutoscalingSettingsV1": { - "description": "Autoscaling settings for a deployment.", + "ChainDeploymentV1": { + "description": "A deployment of a chain.", "properties": { - "min_replica": { - "description": "Minimum number of replicas", - "title": "Min Replica", - "type": "integer" - }, - "max_replica": { - "description": "Maximum number of replicas", - "title": "Max Replica", - "type": "integer" - }, - "autoscaling_window": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "description": "Timeframe of traffic considered for autoscaling decisions", - "title": "Autoscaling Window" + "id": { + "description": "Unique identifier of the chain deployment", + "title": "Id", + "type": "string" }, - "scale_down_delay": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "description": "Waiting period before scaling down any active replica", - "title": "Scale Down Delay" + "created_at": { + "description": "Time the chain deployment was created in ISO 8601 format", + "format": "date-time", + "title": "Created At", + "type": "string" }, - "concurrency_target": { - "description": "Number of requests per replica before scaling up", - "title": "Concurrency Target", - "type": "integer" + "chain_id": { + "description": "Unique identifier of the chain", + "title": "Chain Id", + "type": "string" }, - "target_utilization_percentage": { + "environment": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], - "description": "Target utilization percentage for scaling up/down.", - "title": "Target Utilization Percentage" + "description": "Environment the chain deployment is deployed in", + "title": "Environment" }, - "target_in_flight_tokens": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Target number of in-flight tokens for autoscaling decisions. Early access only.", - "title": "Target In Flight Tokens" + "chainlets": { + "description": "Chainlets in the chain deployment", + "items": { + "$ref": "#/components/schemas/ChainletV1" + }, + "title": "Chainlets", + "type": "array" + }, + "status": { + "$ref": "#/components/schemas/DeploymentStatusV1", + "description": "Status of the chain deployment" } }, "required": [ - "min_replica", - "max_replica", - "autoscaling_window", - "scale_down_delay", - "concurrency_target", - "target_utilization_percentage" + "id", + "created_at", + "chain_id", + "environment", + "chainlets", + "status" ], - "title": "AutoscalingSettingsV1", + "title": "ChainDeploymentV1", "type": "object" }, - "DeploymentStatusV1": { - "description": "The status of a deployment.", - "enum": [ - "BUILDING", - "DEPLOYING", - "DEPLOY_FAILED", - "LOADING_MODEL", - "ACTIVE", - "UNHEALTHY", - "BUILD_FAILED", - "BUILD_STOPPED", - "DEACTIVATING", - "INACTIVE", - "FAILED", - "UPDATING", - "SCALED_TO_ZERO", - "WAKING_UP" - ], - "title": "DeploymentStatusV1", - "type": "string" - }, - "DeploymentV1": { - "description": "A deployment of a model.", + "ChainletV1": { + "description": "A chainlet in a chain deployment.", "properties": { "id": { - "description": "Unique identifier of the deployment", + "description": "Unique identifier of the chainlet", "title": "Id", "type": "string" }, - "created_at": { - "description": "Time the deployment was created in ISO 8601 format", - "format": "date-time", - "title": "Created At", - "type": "string" - }, "name": { - "description": "Name of the deployment", + "description": "Name of the chainlet", "title": "Name", "type": "string" }, - "model_id": { - "description": "Unique identifier of the model", - "title": "Model Id", - "type": "string" - }, - "is_production": { - "description": "Whether the deployment is the production deployment of the model", - "title": "Is Production", - "type": "boolean" - }, - "is_development": { - "description": "Whether the deployment is the development deployment of the model", - "title": "Is Development", - "type": "boolean" - }, - "status": { - "$ref": "#/components/schemas/DeploymentStatusV1", - "description": "Status of the deployment" - }, - "active_replica_count": { - "description": "Number of active replicas", - "title": "Active Replica Count", - "type": "integer" - }, "autoscaling_settings": { "anyOf": [ { @@ -4016,70 +8224,41 @@ "type": "null" } ], - "description": "Autoscaling settings for the deployment. If null, the model has not finished deploying" + "description": "Autoscaling settings for the chainlet. If null, it has not finished deploying" }, "instance_type_name": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "Name of the instance type the model deployment is running on", - "title": "Instance Type Name" + "description": "Name of the instance type the chainlet is deployed on", + "title": "Instance Type Name", + "type": "string" }, - "environment": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "The environment associated with the deployment", - "title": "Environment" + "active_replica_count": { + "description": "Number of active replicas", + "title": "Active Replica Count", + "type": "integer" }, - "labels": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "description": "User-provided key-value labels for the deployment", - "title": "Labels" + "status": { + "$ref": "#/components/schemas/DeploymentStatusV1", + "description": "Status of the chainlet" } }, "required": [ "id", - "created_at", "name", - "model_id", - "is_production", - "is_development", - "status", - "active_replica_count", "autoscaling_settings", "instance_type_name", - "environment" + "active_replica_count", + "status" ], - "title": "DeploymentV1", + "title": "ChainletV1", "type": "object" }, - "DeploymentsV1": { - "description": "A list of deployments of a model.", + "ChainDeploymentsV1": { + "description": "A list of chain deployments.", "properties": { "deployments": { - "description": "A list of deployments of a model", + "description": "A list of chain deployments", "items": { - "$ref": "#/components/schemas/DeploymentV1" + "$ref": "#/components/schemas/ChainDeploymentV1" }, "title": "Deployments", "type": "array" @@ -4088,1830 +8267,1824 @@ "required": [ "deployments" ], - "title": "DeploymentsV1", + "title": "ChainDeploymentsV1", "type": "object" }, - "DeploymentTombstoneV1": { - "description": "A model deployment tombstone.", + "ChainDeploymentTombstoneV1": { + "description": "A chain deployment tombstone.", "properties": { "id": { - "description": "Unique identifier of the deployment", + "description": "Unique identifier of the chain deployment", "title": "Id", "type": "string" }, "deleted": { - "description": "Whether the deployment was deleted", + "description": "Whether the chain deployment was deleted", "title": "Deleted", "type": "boolean" }, - "model_id": { - "description": "Unique identifier of the model", - "title": "Model Id", + "chain_id": { + "description": "Unique identifier of the chain", + "title": "Chain Id", "type": "string" } }, "required": [ "id", "deleted", - "model_id" + "chain_id" ], - "title": "DeploymentTombstoneV1", + "title": "ChainDeploymentTombstoneV1", "type": "object" }, - "UpdateAutoscalingSettingsV1": { - "additionalProperties": false, - "description": "A request to update autoscaling settings for a deployment. All fields are optional, and we only update ones passed in.", + "ChainletEnvironmentSettingsRequestV1": { + "description": "Request to create environment settings for a chainlet.", "properties": { - "min_replica": { + "chainlet_name": { + "description": "Name of the chainlet", + "examples": [ + "HelloWorld" + ], + "title": "Chainlet Name", + "type": "string" + }, + "autoscaling_settings": { "anyOf": [ { - "type": "integer" + "$ref": "#/components/schemas/UpdateAutoscalingSettingsV1" }, { "type": "null" } ], "default": null, - "description": "Minimum number of replicas", + "description": "Autoscaling settings for the chainlet", "examples": [ - 0 + { + "autoscaling_window": 60, + "concurrency_target": 1, + "max_replica": 1, + "max_scale_down_rate": null, + "min_replica": 0, + "scale_down_delay": 900, + "target_in_flight_tokens": null, + "target_utilization_percentage": 70 + } + ] + }, + "instance_type_id": { + "default": "1x2", + "description": "ID of the instance type to use for the chainlet", + "examples": [ + "1x4", + "2x8", + "A10G:2x24x96", + "H100:2x52x468" + ], + "title": "Instance Type Id", + "type": "string" + } + }, + "required": [ + "chainlet_name" + ], + "title": "ChainletEnvironmentSettingsRequestV1", + "type": "object" + }, + "CreateChainEnvironmentRequestV1": { + "description": "A request to create a custom environment for a chain.", + "properties": { + "name": { + "description": "Name of the environment", + "examples": [ + "staging" ], - "title": "Min Replica" + "title": "Name", + "type": "string" }, - "max_replica": { + "promotion_settings": { "anyOf": [ { - "type": "integer" + "$ref": "#/components/schemas/UpdatePromotionSettingsV1" }, { "type": "null" } ], "default": null, - "description": "Maximum number of replicas", + "description": "Promotion settings for the environment", "examples": [ - 7 - ], - "title": "Max Replica" - }, - "autoscaling_window": { - "anyOf": [ { - "type": "integer" - }, - { - "type": "null" + "promotion_cleanup_strategy": null, + "ramp_up_duration_seconds": 600, + "ramp_up_while_promoting": true, + "redeploy_on_promotion": true, + "rolling_deploy": null, + "rolling_deploy_config": null } - ], - "default": null, - "description": "Timeframe of traffic considered for autoscaling decisions", - "examples": [ - 600 - ], - "title": "Autoscaling Window" + ] }, - "scale_down_delay": { + "chainlet_settings": { "anyOf": [ { - "type": "integer" + "items": { + "$ref": "#/components/schemas/ChainletEnvironmentSettingsRequestV1" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "Waiting period before scaling down any active replica", + "description": "Mapping of chainlet name to the desired chainlet environment settings", "examples": [ - 120 + [ + { + "autoscaling_settings": { + "autoscaling_window": 800, + "concurrency_target": 4, + "max_replica": 3, + "max_scale_down_rate": null, + "min_replica": 2, + "scale_down_delay": 63, + "target_in_flight_tokens": null, + "target_utilization_percentage": null + }, + "chainlet_name": "HelloWorld", + "instance_type_id": "2x8" + }, + { + "autoscaling_settings": { + "autoscaling_window": null, + "concurrency_target": null, + "max_replica": 3, + "max_scale_down_rate": null, + "min_replica": 3, + "scale_down_delay": null, + "target_in_flight_tokens": null, + "target_utilization_percentage": null + }, + "chainlet_name": "RandInt", + "instance_type_id": "A10Gx8x32" + } + ] ], - "title": "Scale Down Delay" + "title": "Chainlet Settings" + } + }, + "required": [ + "name" + ], + "title": "CreateChainEnvironmentRequestV1", + "type": "object" + }, + "ChainletEnvironmentSettingsV1": { + "description": "Environment settings for a chainlet.", + "properties": { + "chainlet_name": { + "description": "Name of the chainlet", + "title": "Chainlet Name", + "type": "string" }, - "concurrency_target": { + "autoscaling_settings": { "anyOf": [ { - "type": "integer" + "$ref": "#/components/schemas/AutoscalingSettingsV1" }, { "type": "null" } ], - "default": null, - "description": "Number of requests per replica before scaling up", - "examples": [ - 2 - ], - "title": "Concurrency Target" + "description": "Autoscaling settings for the chainlet. If null, it has not finished deploying" }, - "target_utilization_percentage": { + "instance_type": { + "$ref": "#/components/schemas/InstanceTypeV1", + "description": "Instance type for the chainlet" + } + }, + "required": [ + "chainlet_name", + "autoscaling_settings", + "instance_type" + ], + "title": "ChainletEnvironmentSettingsV1", + "type": "object" + }, + "ChainEnvironmentV1": { + "description": "Environment for oracles.", + "properties": { + "name": { + "description": "Name of the environment", + "title": "Name", + "type": "string" + }, + "created_at": { + "description": "Time the environment was created in ISO 8601 format", + "format": "date-time", + "title": "Created At", + "type": "string" + }, + "chain_id": { + "description": "Unique identifier of the chain", + "title": "Chain Id", + "type": "string" + }, + "promotion_settings": { + "$ref": "#/components/schemas/PromotionSettingsV1", + "description": "Promotion settings for the environment" + }, + "chainlet_settings": { + "description": "Environment settings for the chainlets", + "items": { + "$ref": "#/components/schemas/ChainletEnvironmentSettingsV1" + }, + "title": "Chainlet Settings", + "type": "array" + }, + "current_deployment": { "anyOf": [ { - "type": "integer" + "$ref": "#/components/schemas/ChainDeploymentV1" }, { "type": "null" } ], - "default": null, - "description": "Target utilization percentage for scaling up/down.", - "examples": [ - 70 - ], - "title": "Target Utilization Percentage" + "description": "Current chain deployment of the environment" }, - "target_in_flight_tokens": { + "candidate_deployment": { "anyOf": [ { - "type": "integer" + "$ref": "#/components/schemas/ChainDeploymentV1" }, { "type": "null" } ], "default": null, - "description": "Target number of in-flight tokens for autoscaling decisions. Early access only.", - "examples": [ - 40000 - ], - "title": "Target In Flight Tokens" - } - }, - "title": "UpdateAutoscalingSettingsV1", - "type": "object" - }, - "UpdateAutoscalingSettingsStatusV1": { - "description": "The status of a request to update autoscaling settings.", - "enum": [ - "ACCEPTED", - "QUEUED", - "UNCHANGED" - ], - "title": "UpdateAutoscalingSettingsStatusV1", - "type": "string" - }, - "UpdateAutoscalingSettingsResponseV1": { - "description": "The response to a request to update autoscaling settings.", - "properties": { - "status": { - "$ref": "#/components/schemas/UpdateAutoscalingSettingsStatusV1", - "description": "Status of the request to update autoscaling settings" - }, - "message": { - "description": "A message describing the status of the request to update autoscaling settings", - "title": "Message", - "type": "string" + "description": "Candidate chain deployment being promoted to the environment, if a promotion is in progress" } }, "required": [ - "status", - "message" + "name", + "created_at", + "chain_id", + "promotion_settings", + "chainlet_settings", + "current_deployment" ], - "title": "UpdateAutoscalingSettingsResponseV1", + "title": "ChainEnvironmentV1", "type": "object" }, - "PromoteRequestV1": { - "description": "A request to promote a deployment to production.", + "UpdateChainEnvironmentRequestV1": { + "description": "A request to update a chain environment.", "properties": { - "scale_down_previous_production": { - "default": true, - "description": "Whether to scale down the previous production deployment after promoting", - "examples": [ - true + "promotion_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/UpdatePromotionSettingsV1" + }, + { + "type": "null" + } ], - "title": "Scale Down Previous Production", - "type": "boolean" - }, - "preserve_env_instance_type": { - "default": true, - "description": "Whether to use the promoting deployment's instance type or preserve target environment's instance type", + "default": null, + "description": "Promotion settings for the environment", "examples": [ - true - ], - "title": "Preserve Env Instance Type", - "type": "boolean" + { + "promotion_cleanup_strategy": null, + "ramp_up_duration_seconds": 600, + "ramp_up_while_promoting": true, + "redeploy_on_promotion": null, + "rolling_deploy": null, + "rolling_deploy_config": null + } + ] } }, - "title": "PromoteRequestV1", + "title": "UpdateChainEnvironmentRequestV1", "type": "object" }, - "ActivateResponseV1": { - "description": "The response to a request to activate a deployment.", + "UpdateChainEnvironmentResponseV1": { + "description": "A response to update a chain environment.", "properties": { - "success": { - "default": true, - "description": "Whether the deployment was successfully activated", - "title": "Success", + "ok": { + "description": "Whether the update was successful", + "title": "Ok", "type": "boolean" } }, - "title": "ActivateResponseV1", + "required": [ + "ok" + ], + "title": "UpdateChainEnvironmentResponseV1", "type": "object" }, - "DeactivateResponseV1": { - "description": "The response to a request to deactivate a deployment.", + "PromoteToChainEnvironmentRequestV1": { + "description": "A request to promote a deployment to a environment.", "properties": { - "success": { + "scale_down_previous_deployment": { "default": true, - "description": "Whether the deployment was successfully deactivated", - "title": "Success", + "description": "Whether to scale down the previous deployment after promoting", + "examples": [ + true + ], + "title": "Scale Down Previous Deployment", "type": "boolean" + }, + "deployment_id": { + "description": "The id of the chain deployment to promote", + "title": "Deployment Id", + "type": "string" } }, - "title": "DeactivateResponseV1", + "required": [ + "deployment_id" + ], + "title": "PromoteToChainEnvironmentRequestV1", "type": "object" }, - "RetryDeploymentResponseV1": { - "description": "The response to a request to retry a deployment.", + "ChainletEnvironmentAutoscalingSettingsUpdateV1": { + "description": "The request to update the autoscaling settings for a chainlet.", "properties": { - "retried": { - "description": "Whether the retry was successfully initiated", - "title": "Retried", - "type": "boolean" + "chainlet_name": { + "description": "Name of the chainlet", + "examples": [ + "HelloWorld" + ], + "title": "Chainlet Name", + "type": "string" }, - "reason": { - "anyOf": [ - { - "type": "string" - }, + "autoscaling_settings": { + "$ref": "#/components/schemas/UpdateAutoscalingSettingsV1", + "description": "Autoscaling settings for the chainlet", + "examples": [ { - "type": "null" + "autoscaling_window": 800, + "concurrency_target": 3, + "max_replica": 2, + "max_scale_down_rate": null, + "min_replica": 1, + "scale_down_delay": 60, + "target_in_flight_tokens": null, + "target_utilization_percentage": null } - ], - "default": null, - "description": "Explanation of the result. Provided when retried is false to explain why retry was not possible.", - "title": "Reason" - }, - "deployment": { - "$ref": "#/components/schemas/DeploymentV1", - "description": "The deployment that was retried" + ] } }, "required": [ - "retried", - "deployment" + "chainlet_name", + "autoscaling_settings" ], - "title": "RetryDeploymentResponseV1", + "title": "ChainletEnvironmentAutoscalingSettingsUpdateV1", "type": "object" }, - "SortOrderV1": { - "enum": [ - "asc", - "desc" + "UpdateChainletEnvironmentAutoscalingSettingsRequestV1": { + "description": "A request to update the autoscaling settings for a multiple chainlets in an environment.\nIf a chainlet name doesn't exist, an error is returned.", + "properties": { + "updates": { + "description": "Mapping of chainlet name to the desired chainlet autoscaling settings. If the chainlet name doesn't exist, an error is returned.", + "examples": [ + [ + { + "autoscaling_settings": { + "autoscaling_window": 800, + "concurrency_target": 4, + "max_replica": 3, + "max_scale_down_rate": null, + "min_replica": 2, + "scale_down_delay": 63, + "target_in_flight_tokens": null, + "target_utilization_percentage": null + }, + "chainlet_name": "HelloWorld" + } + ], + [ + { + "autoscaling_settings": { + "autoscaling_window": null, + "concurrency_target": null, + "max_replica": null, + "max_scale_down_rate": null, + "min_replica": 0, + "scale_down_delay": null, + "target_in_flight_tokens": null, + "target_utilization_percentage": null + }, + "chainlet_name": "HelloWorld" + }, + { + "autoscaling_settings": { + "autoscaling_window": null, + "concurrency_target": null, + "max_replica": null, + "max_scale_down_rate": null, + "min_replica": 0, + "scale_down_delay": null, + "target_in_flight_tokens": null, + "target_utilization_percentage": null + }, + "chainlet_name": "RandInt" + } + ] + ], + "items": { + "$ref": "#/components/schemas/ChainletEnvironmentAutoscalingSettingsUpdateV1" + }, + "title": "Updates", + "type": "array" + } + }, + "required": [ + "updates" ], - "title": "SortOrderV1", - "type": "string" + "title": "UpdateChainletEnvironmentAutoscalingSettingsRequestV1", + "type": "object" }, - "GetDeploymentLogsRequestV1": { - "description": "A request to fetch deployment logs.", + "ChainletEnvironmentInstanceTypeUpdateV1": { + "description": "A request to update the environment settings for a chainlet.", "properties": { - "start_epoch_millis": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Epoch millis timestamp to start fetching logs", - "title": "Start Epoch Millis" - }, - "end_epoch_millis": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } + "chainlet_name": { + "description": "Name of the chainlet", + "examples": [ + "HelloWorld" ], - "default": null, - "description": "Epoch millis timestamp to end fetching logs", - "title": "End Epoch Millis" + "title": "Chainlet Name", + "type": "string" }, - "direction": { - "anyOf": [ - { - "$ref": "#/components/schemas/SortOrderV1" - }, - { - "type": "null" - } + "instance_type_id": { + "description": "Key of the instance type to use for the chainlet", + "examples": [ + "1x4", + "2x8", + "A10G:2x24x96" ], - "default": null, - "description": "Sort order for logs" - }, - "limit": { - "anyOf": [ - { - "maximum": 1000, - "minimum": 1, - "type": "integer" - }, - { - "type": "null" - } + "title": "Instance Type Id", + "type": "string" + } + }, + "required": [ + "chainlet_name", + "instance_type_id" + ], + "title": "ChainletEnvironmentInstanceTypeUpdateV1", + "type": "object" + }, + "UpdateChainletEnvironmentInstanceTypeRequestV1": { + "description": "A request to update the instance types for chainlets in an environment. Multiples\nupdates can be made in one request. The updates will be processed in batch and a new deployment\nwill be created, deployed and promoted into the environment.", + "properties": { + "updates": { + "description": "Mapping of chainlet name to the desired chainlet instance type. If the chainlet name doesn't exist, an error is returned.", + "examples": [ + [ + { + "chainlet_name": "HelloWorld", + "instance_type_id": "1x4" + }, + { + "chainlet_name": "RandInt", + "instance_type_id": "A10G:2x24x96" + } + ] ], - "default": 500, - "description": "Limit of logs to fetch in a single request", - "title": "Limit" + "items": { + "$ref": "#/components/schemas/ChainletEnvironmentInstanceTypeUpdateV1" + }, + "title": "Updates", + "type": "array" } }, - "title": "GetDeploymentLogsRequestV1", + "required": [ + "updates" + ], + "title": "UpdateChainletEnvironmentInstanceTypeRequestV1", "type": "object" }, - "LogV1": { + "UpdateChainletEnvironmentInstanceTypeResponseV1": { + "description": "A response to update the environment settings for a chainlet. If updating the instance type\nresulted in a re-deployment, `requires_redeployment` will be True and the resulting deployment\nwill be returned in the `chain_deployment` field.", "properties": { - "timestamp": { - "description": "Epoch nanosecond timestamp of the log message.", - "title": "Timestamp", - "type": "string" - }, - "message": { - "description": "The contents of the log message.", - "title": "Message", - "type": "string" + "requires_redeployment": { + "description": "Whether the resource update requires a re-deployment to update the instance type.", + "title": "Requires Redeployment", + "type": "boolean" }, - "replica": { + "chain_deployment": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/ChainDeploymentV1" }, { "type": "null" } ], - "description": "The replica the log line was emitted from.", - "title": "Replica" + "description": "The chain deployment resulting from the resource update, if any." + }, + "chainlet_environment_settings": { + "description": "The updated chainlet environment settings", + "items": { + "$ref": "#/components/schemas/ChainletEnvironmentSettingsV1" + }, + "title": "Chainlet Environment Settings", + "type": "array" } }, "required": [ - "timestamp", - "message", - "replica" + "requires_redeployment", + "chain_deployment", + "chainlet_environment_settings" ], - "title": "LogV1", + "title": "UpdateChainletEnvironmentInstanceTypeResponseV1", "type": "object" }, - "GetLogsResponseV1": { - "description": "A response to querying logs.", + "UpsertTrainingProjectV1": { + "description": "Fields that can be upserted on a training project.", "properties": { - "logs": { - "description": "Logs for a specific entity.", - "items": { - "$ref": "#/components/schemas/LogV1" - }, - "title": "Logs", - "type": "array" + "name": { + "description": "Name of the training project.", + "examples": [ + "My Training Project" + ], + "title": "Name", + "type": "string" } }, "required": [ - "logs" + "name" ], - "title": "GetLogsResponseV1", + "title": "UpsertTrainingProjectV1", "type": "object" }, - "TerminateReplicaResponseV1": { - "description": "The response to a request to terminate a replica in a deployment.", + "UpsertTrainingProjectRequestV1": { + "description": "A request to upsert a training project.", "properties": { - "success": { - "default": true, - "description": "Whether the replica was successfully terminated", - "title": "Success", - "type": "boolean" + "training_project": { + "$ref": "#/components/schemas/UpsertTrainingProjectV1", + "description": "The training project to upsert." } }, - "title": "TerminateReplicaResponseV1", + "required": [ + "training_project" + ], + "title": "UpsertTrainingProjectRequestV1", "type": "object" }, - "EnvironmentV1": { - "description": "Environment for oracles.", + "CheckpointSyncStatus": { + "description": "Lifecycle state for the checkpoint uploader.", + "enum": [ + "SYNCING", + "COMPLETED" + ], + "title": "CheckpointSyncStatus", + "type": "string" + }, + "TrainingJobV1": { "properties": { - "name": { - "description": "Name of the environment", - "title": "Name", + "id": { + "description": "Unique identifier of the training job.", + "title": "Id", "type": "string" }, "created_at": { - "description": "Time the environment was created in ISO 8601 format", + "description": "Time the job was created in ISO 8601 format.", "format": "date-time", "title": "Created At", "type": "string" }, - "model_id": { - "description": "Unique identifier of the model", - "title": "Model Id", + "current_status": { + "description": "Current status of the training job.", + "title": "Current Status", "type": "string" }, - "current_deployment": { + "error_message": { "anyOf": [ { - "$ref": "#/components/schemas/DeploymentV1" + "type": "string" }, { "type": "null" } ], - "description": "Current deployment of the environment" + "default": null, + "description": "Error message if the training job failed.", + "title": "Error Message" }, - "candidate_deployment": { + "instance_type": { + "$ref": "#/components/schemas/InstanceTypeV1", + "description": "Instance type of the training job." + }, + "updated_at": { + "description": "Time the job was updated in ISO 8601 format.", + "format": "date-time", + "title": "Updated At", + "type": "string" + }, + "training_project_id": { + "description": "ID of the training project.", + "title": "Training Project Id", + "type": "string" + }, + "training_project": { + "$ref": "#/components/schemas/TrainingProjectSummaryV1", + "description": "Summary of the training project." + }, + "name": { "anyOf": [ { - "$ref": "#/components/schemas/DeploymentV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Candidate deployment being promoted to the environment, if a promotion is in progress" + "description": "Name of the training job.", + "examples": [ + "gpt-oss-job" + ], + "title": "Name" }, - "in_progress_promotion": { + "checkpoint_sync_status": { "anyOf": [ { - "$ref": "#/components/schemas/InProgressPromotionV1" + "$ref": "#/components/schemas/CheckpointSyncStatus" }, { "type": "null" } ], "default": null, - "description": "Details of the in-progress promotion, if any" - }, - "autoscaling_settings": { - "$ref": "#/components/schemas/AutoscalingSettingsV1", - "description": "Autoscaling settings for the environment" - }, - "promotion_settings": { - "$ref": "#/components/schemas/PromotionSettingsV1", - "description": "Promotion settings for the environment" - }, - "instance_type": { - "$ref": "#/components/schemas/InstanceTypeV1", - "description": "Instance type for the environment" - } - }, - "required": [ - "name", - "created_at", - "model_id", - "current_deployment", - "autoscaling_settings", - "promotion_settings", - "instance_type" - ], - "title": "EnvironmentV1", - "type": "object" - }, - "InProgressPromotionStatusV1": { - "enum": [ - "RELEASING", - "RAMPING_UP", - "RAMPING_DOWN", - "PAUSED", - "SUCCEEDED", - "FAILED", - "CANCELED" - ], - "title": "InProgressPromotionStatusV1", - "type": "string" - }, - "InProgressPromotionV1": { - "description": "Details of an in-progress promotion.", - "properties": { - "status": { - "$ref": "#/components/schemas/InProgressPromotionStatusV1", - "description": "Status of the promotion" + "description": "Checkpoint sync status of the training job." }, - "percent_traffic_to_new_version": { - "description": "Percentage of traffic routed to the candidate deployment", - "title": "Percent Traffic To New Version", + "priority": { + "default": 0, + "description": "Queue priority. Higher values are dequeued first. NULL is treated as 0.", + "title": "Priority", "type": "integer" }, - "error_message": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Error message if promotion failed", - "title": "Error Message" - }, - "rolling_deploy": { + "user": { "anyOf": [ { - "type": "boolean" + "$ref": "#/components/schemas/UserV1" }, { "type": "null" } ], "default": null, - "description": "Whether this is a rolling deploy", - "title": "Rolling Deploy" + "description": "The user who created the training job." } }, "required": [ - "status", - "percent_traffic_to_new_version" + "id", + "created_at", + "current_status", + "instance_type", + "updated_at", + "training_project_id", + "training_project" ], - "title": "InProgressPromotionV1", + "title": "TrainingJobV1", "type": "object" }, - "PromotionCleanupStrategyV1": { - "description": "The promotion cleanup strategy.", - "enum": [ - "KEEP", - "SCALE_TO_ZERO", - "DEACTIVATE" + "TrainingProjectSummaryV1": { + "description": "A summary of a training project.", + "properties": { + "id": { + "description": "Unique identifier of the training project.", + "title": "Id", + "type": "string" + }, + "name": { + "description": "Name of the training project.", + "title": "Name", + "type": "string" + } + }, + "required": [ + "id", + "name" ], - "title": "PromotionCleanupStrategyV1", - "type": "string" + "title": "TrainingProjectSummaryV1", + "type": "object" }, - "PromotionSettingsV1": { - "description": "Promotion settings for promoting chains and oracles", + "TrainingProjectV1": { "properties": { - "redeploy_on_promotion": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": false, - "description": "Whether to deploy on all promotions. Enabling this flag allows model code to safely handle environment-specific logic. When a deployment is promoted, a new deployment will be created with a copy of the image.", - "examples": [ - true - ], - "title": "Redeploy On Promotion" + "id": { + "description": "Unique identifier of the training project", + "title": "Id", + "type": "string" }, - "rolling_deploy": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": false, - "description": "Whether the environment should rely on rolling deploy orchestration.", - "examples": [ - true - ], - "title": "Rolling Deploy" + "name": { + "description": "Name of the training project.", + "title": "Name", + "type": "string" }, - "promotion_cleanup_strategy": { - "anyOf": [ - { - "$ref": "#/components/schemas/PromotionCleanupStrategyV1" - }, - { - "type": "null" - } - ], - "default": "SCALE_TO_ZERO", - "description": "The cleanup strategy to use after a promotion completes.", - "examples": [ - "SCALE_TO_ZERO" - ] + "created_at": { + "description": "Time the training project was created in ISO 8601 format.", + "format": "date-time", + "title": "Created At", + "type": "string" }, - "rolling_deploy_config": { + "updated_at": { + "description": "Time the training project was updated in ISO 8601 format.", + "format": "date-time", + "title": "Updated At", + "type": "string" + }, + "team_name": { "anyOf": [ { - "$ref": "#/components/schemas/RollingDeployConfigV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Rolling deploy configuration for promotions" + "description": "Name of the team associated with the training project.", + "title": "Team Name" }, - "ramp_up_while_promoting": { + "latest_job": { "anyOf": [ { - "type": "boolean" + "$ref": "#/components/schemas/TrainingJobV1" }, { "type": "null" } ], - "default": false, - "description": "Whether to ramp up traffic while promoting", - "examples": [ - true - ], - "title": "Ramp Up While Promoting" - }, - "ramp_up_duration_seconds": { + "description": "Most recently created training job for the training project." + } + }, + "required": [ + "id", + "name", + "created_at", + "updated_at", + "latest_job" + ], + "title": "TrainingProjectV1", + "type": "object" + }, + "UserV1": { + "description": "A user.", + "properties": { + "email": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], - "default": 600, - "description": "Duration of the ramp up in seconds", - "examples": [ - 600 - ], - "title": "Ramp Up Duration Seconds" + "default": null, + "description": "Email of the user.", + "title": "Email" } }, - "title": "PromotionSettingsV1", + "title": "UserV1", "type": "object" }, - "RollingDeployConfigV1": { - "description": "Rolling deploy config for promoting chains and oracles", + "UpsertTrainingProjectResponseV1": { + "description": "A response to upserting a training project.", "properties": { - "rolling_deploy_strategy": { - "$ref": "#/components/schemas/RollingDeployStrategyV1", - "default": "REPLICA", - "description": "The rolling deploy strategy to use for promotions.", - "examples": [ - "REPLICA" - ] - }, - "max_surge_percent": { - "default": 10, - "description": "The maximum surge percentage for rolling deploys.", - "examples": [ - 10 - ], - "title": "Max Surge Percent", - "type": "integer" - }, - "max_unavailable_percent": { - "default": 0, - "description": "The maximum unavailable percentage for rolling deploys.", - "examples": [ - 10 - ], - "title": "Max Unavailable Percent", - "type": "integer" - }, - "stabilization_time_seconds": { - "default": 0, - "description": "The stabilization time in seconds for rolling deploys.", - "examples": [ - 300 - ], - "title": "Stabilization Time Seconds", - "type": "integer" - }, - "replica_overhead_percent": { - "default": 0, - "description": "The replica overhead percentage for rolling deploys.", - "examples": [ - 0 - ], - "title": "Replica Overhead Percent", - "type": "integer" + "training_project": { + "$ref": "#/components/schemas/TrainingProjectV1", + "description": "The upserted training project." } }, - "title": "RollingDeployConfigV1", + "required": [ + "training_project" + ], + "title": "UpsertTrainingProjectResponseV1", "type": "object" }, - "RollingDeployStrategyV1": { - "description": "The rolling deploy strategy.", - "enum": [ - "REPLICA" + "ListTrainingProjectsResponseV1": { + "description": "A response to list training projects.", + "properties": { + "training_projects": { + "description": "List of training projects.", + "items": { + "$ref": "#/components/schemas/TrainingProjectV1" + }, + "title": "Training Projects", + "type": "array" + } + }, + "required": [ + "training_projects" ], - "title": "RollingDeployStrategyV1", - "type": "string" + "title": "ListTrainingProjectsResponseV1", + "type": "object" }, - "EnvironmentsV1": { - "description": "list of environments", + "ListTrainingJobsResponseV1": { + "description": "A response to list training jobs.", "properties": { - "environments": { + "training_project": { + "$ref": "#/components/schemas/TrainingProjectV1", + "description": "The training project." + }, + "training_jobs": { + "description": "List of training jobs.", "items": { - "$ref": "#/components/schemas/EnvironmentV1" + "$ref": "#/components/schemas/TrainingJobV1" }, - "title": "Environments", + "title": "Training Jobs", "type": "array" } }, "required": [ - "environments" + "training_project", + "training_jobs" ], - "title": "EnvironmentsV1", + "title": "ListTrainingJobsResponseV1", "type": "object" }, - "UpdatePromotionSettingsV1": { - "description": "Promotion settings for model promotion", + "AwsIamDockerAuthV1": { + "description": "AWS details for the registry.", "properties": { - "redeploy_on_promotion": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Whether to deploy on all promotions. Enabling this flag allows model code to safely handle environment-specific logic. When a deployment is promoted, a new deployment will be created with a copy of the image.", - "examples": [ - true - ], - "title": "Redeploy On Promotion" - }, - "rolling_deploy": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Whether the environment should rely on rolling deploy orchestration.", - "examples": [ - true - ], - "title": "Rolling Deploy" + "access_key_secret_ref": { + "$ref": "#/components/schemas/SecretReferenceV1", + "description": "Name of the access key secret" }, - "promotion_cleanup_strategy": { - "anyOf": [ - { - "$ref": "#/components/schemas/PromotionCleanupStrategyV1" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The cleanup strategy to use after a promotion completes.", - "examples": [ - "SCALE_TO_ZERO" - ] + "secret_access_key_secret_ref": { + "$ref": "#/components/schemas/SecretReferenceV1", + "description": "Name of the secret key secret" + } + }, + "required": [ + "access_key_secret_ref", + "secret_access_key_secret_ref" + ], + "title": "AwsIamDockerAuthV1", + "type": "object" + }, + "AwsOidcDockerAuthV1": { + "description": "AWS OIDC details for the registry.", + "properties": { + "role_arn": { + "description": "AWS IAM role ARN for OIDC authentication", + "title": "Role Arn", + "type": "string" }, - "rolling_deploy_config": { + "region": { + "description": "AWS region for OIDC authentication", + "title": "Region", + "type": "string" + } + }, + "required": [ + "role_arn", + "region" + ], + "title": "AwsOidcDockerAuthV1", + "type": "object" + }, + "BasetenLatestCheckpointConfig": { + "properties": { + "project_name": { "anyOf": [ { - "$ref": "#/components/schemas/UpdateRollingDeployConfigV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Rolling deploy configuration for promotions" + "description": "Name of the project to load the checkpoint from", + "title": "Project Name" }, - "ramp_up_while_promoting": { + "job_id": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Whether to ramp up traffic while promoting", - "examples": [ - true - ], - "title": "Ramp Up While Promoting" + "description": "ID of the job to load the checkpoint from", + "title": "Job Id" }, - "ramp_up_duration_seconds": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Duration of the ramp up in seconds", - "examples": [ - 600 - ], - "title": "Ramp Up Duration Seconds" + "typ": { + "const": "baseten_latest_checkpoint", + "default": "baseten_latest_checkpoint", + "title": "Typ", + "type": "string" } }, - "title": "UpdatePromotionSettingsV1", + "title": "BasetenLatestCheckpointConfig", "type": "object" }, - "UpdateRollingDeployConfigV1": { - "description": "Rolling deploy config for promoting chains and oracles", + "BasetenNamedCheckpointConfig": { "properties": { - "rolling_deploy_strategy": { - "anyOf": [ - { - "$ref": "#/components/schemas/RollingDeployStrategyV1" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The rolling deploy strategy to use for promotions.", - "examples": [ - "REPLICA" - ] - }, - "max_surge_percent": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 10, - "description": "The maximum surge percentage for rolling deploys.", - "examples": [ - 10 - ], - "title": "Max Surge Percent" + "checkpoint_name": { + "description": "Name of the checkpoint to load from", + "title": "Checkpoint Name", + "type": "string" }, - "max_unavailable_percent": { + "project_name": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "The maximum unavailable percentage for rolling deploys.", - "examples": [ - 10 - ], - "title": "Max Unavailable Percent" + "description": "Name of the project to load the checkpoint from", + "title": "Project Name" }, - "stabilization_time_seconds": { + "job_id": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "The stabilization time in seconds for rolling deploys.", - "examples": [ - 300 - ], - "title": "Stabilization Time Seconds" + "description": "ID of the job to load the checkpoint from", + "title": "Job Id" }, - "replica_overhead_percent": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The replica overhead percentage for rolling deploys.", - "examples": [ - 0 - ], - "title": "Replica Overhead Percent" + "typ": { + "const": "baseten_named_checkpoint", + "default": "baseten_named_checkpoint", + "title": "Typ", + "type": "string" } }, - "title": "UpdateRollingDeployConfigV1", + "required": [ + "checkpoint_name" + ], + "title": "BasetenNamedCheckpointConfig", "type": "object" }, - "CreateEnvironmentRequestV1": { - "description": "A request to create an environment.", + "CreateJobWeightConfigV1": { + "description": "Weight source configuration for MDN (Model Distribution Network).\n\nEnables training jobs to mount external model weights from HuggingFace, S3, GCS, or R2\nvia MDN's caching and CSI mounting infrastructure. Weights are mirrored once and\ndeduplicated across training jobs.", "properties": { - "name": { - "description": "Name of the environment", + "source": { + "description": "Weight source URI. Supported formats: hf://, s3://, gs://, r2://", "examples": [ - "staging" + "hf://meta-llama/Llama-3-8B@main", + "s3://my-bucket/models/llama", + "gs://my-bucket/models/llama", + "r2://account_id.bucket/models/llama" ], - "title": "Name", + "title": "Source", "type": "string" }, - "autoscaling_settings": { + "mount_location": { + "description": "Path where weights will be mounted in the container", + "examples": [ + "/app/models/base", + "/models/llama" + ], + "title": "Mount Location", + "type": "string" + }, + "allow_patterns": { "anyOf": [ { - "$ref": "#/components/schemas/UpdateAutoscalingSettingsV1" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "Autoscaling settings for the environment", + "description": "File patterns to include (Unix-style shell patterns)", "examples": [ - { - "autoscaling_window": 800, - "concurrency_target": 3, - "max_replica": 2, - "min_replica": 1, - "scale_down_delay": 60, - "target_in_flight_tokens": null, - "target_utilization_percentage": null - } - ] + [ + "*.safetensors", + "config.json" + ] + ], + "title": "Allow Patterns" }, - "promotion_settings": { + "ignore_patterns": { "anyOf": [ { - "$ref": "#/components/schemas/UpdatePromotionSettingsV1" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "Promotion settings for the environment", + "description": "File patterns to exclude (Unix-style shell patterns)", "examples": [ - { - "promotion_cleanup_strategy": null, - "ramp_up_duration_seconds": 600, - "ramp_up_while_promoting": true, - "redeploy_on_promotion": true, - "rolling_deploy": true, - "rolling_deploy_config": null - } - ] - } - }, - "required": [ - "name" - ], - "title": "CreateEnvironmentRequestV1", - "type": "object" - }, - "UpdateEnvironmentRequestV1": { - "additionalProperties": false, - "description": "A request to update an environment.", - "properties": { - "autoscaling_settings": { + [ + "*.bin", + "*.h5" + ] + ], + "title": "Ignore Patterns" + }, + "auth_secret_name": { "anyOf": [ { - "$ref": "#/components/schemas/UpdateAutoscalingSettingsV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Autoscaling settings for the environment", + "description": "Name of the workspace secret for authentication (e.g., HuggingFace token)", "examples": [ - { - "autoscaling_window": 800, - "concurrency_target": 3, - "max_replica": 2, - "min_replica": 1, - "scale_down_delay": 60, - "target_in_flight_tokens": null, - "target_utilization_percentage": null - } - ] + "hf_token", + "aws_credentials" + ], + "title": "Auth Secret Name" }, - "promotion_settings": { + "auth": { "anyOf": [ { - "$ref": "#/components/schemas/UpdatePromotionSettingsV1" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], "default": null, - "description": "Promotion settings for the environment", + "description": "Authentication configuration for the weight source.", "examples": [ { - "promotion_cleanup_strategy": null, - "ramp_up_duration_seconds": 600, - "ramp_up_while_promoting": true, - "redeploy_on_promotion": true, - "rolling_deploy": null, - "rolling_deploy_config": null + "auth_method": "CUSTOM_SECRET", + "auth_secret_name": "hf_token" } - ] + ], + "title": "Auth" } }, - "title": "UpdateEnvironmentRequestV1", + "required": [ + "source", + "mount_location" + ], + "title": "CreateJobWeightConfigV1", "type": "object" }, - "PromoteToEnvironmentRequestV1": { - "description": "A request to promote a deployment to a environment.", + "CreateTrainingJobAcceleratorV1": { "properties": { - "scale_down_previous_deployment": { - "default": true, - "description": "Whether to scale down the previous deployment after promoting", + "accelerator": { + "description": "GPU type for the training job.", "examples": [ - true + "H100" ], - "title": "Scale Down Previous Deployment", - "type": "boolean" - }, - "deployment_id": { - "description": "The id of the deployment to promote", - "title": "Deployment Id", + "title": "Accelerator", "type": "string" }, - "preserve_env_instance_type": { - "default": true, - "description": "Whether to use the promoting deployment's instance type or preserve target environment's instance type", + "count": { + "description": "GPUs needed for the training job.", "examples": [ - true + 2 ], - "title": "Preserve Env Instance Type", - "type": "boolean" - } - }, - "required": [ - "deployment_id" - ], - "title": "PromoteToEnvironmentRequestV1", - "type": "object" - }, - "CancelPromotionStatusV1": { - "description": "The status of a request to cancel a promotion.", - "enum": [ - "CANCELED", - "RAMPING_DOWN" - ], - "title": "CancelPromotionStatusV1", - "type": "string" - }, - "CancelPromotionResponseV1": { - "description": "The response to a request to cancel a promotion.", - "properties": { - "status": { - "$ref": "#/components/schemas/CancelPromotionStatusV1", - "description": "Status of the request to cancel a promotion. Can be CANCELED or RAMPING_DOWN." - }, - "message": { - "description": "A message describing the status of the request to cancel a promotion", - "title": "Message", - "type": "string" + "title": "Count", + "type": "integer" } }, "required": [ - "status", - "message" + "accelerator", + "count" ], - "title": "CancelPromotionResponseV1", + "title": "CreateTrainingJobAcceleratorV1", "type": "object" }, - "SignalPromotionResponseV1": { - "description": "The response to a request to signal a rolling promotion.", + "CreateTrainingJobCacheConfig": { "properties": { - "success": { - "description": "Whether the signal was successfully sent", - "title": "Success", + "enable_legacy_hf_mount": { + "default": false, + "description": "Whether to enable the legacy Hugging Face cache.", + "examples": [ + true + ], + "title": "Enable Legacy Hf Mount", "type": "boolean" - } - }, - "required": [ - "success" - ], - "title": "SignalPromotionResponseV1", - "type": "object" - }, - "ChainV1": { - "description": "A chain.", - "properties": { - "id": { - "description": "Unique identifier of the chain", - "title": "Id", - "type": "string" - }, - "created_at": { - "description": "Time the chain was created in ISO 8601 format", - "format": "date-time", - "title": "Created At", - "type": "string" }, - "name": { - "description": "Name of the chain", - "title": "Name", - "type": "string" + "enabled": { + "default": false, + "description": "Whether to enable the read-write cache.", + "examples": [ + true + ], + "title": "Enabled", + "type": "boolean" }, - "deployments_count": { - "description": "Number of deployments of the chain", - "title": "Deployments Count", - "type": "integer" + "require_cache_affinity": { + "default": true, + "description": "Whether to require region affinity for the read-write cache. If False, the resulting job is not guaranteed to be deployed alongside the previous cache.", + "examples": [ + true, + false + ], + "title": "Require Cache Affinity", + "type": "boolean" }, - "team_name": { - "description": "Name of the team associated with the chain", - "title": "Team Name", + "mount_base_path": { + "default": "/root/.cache", + "description": "Mount base path for the cache directory. The project cache and team cache will be mounted under this path.", + "examples": [ + "/workspace/.cache", + "/root/.cache" + ], + "title": "Mount Base Path", "type": "string" } }, - "required": [ - "id", - "created_at", - "name", - "deployments_count", - "team_name" - ], - "title": "ChainV1", - "type": "object" - }, - "ChainsV1": { - "description": "A list of chains.", - "properties": { - "chains": { - "items": { - "$ref": "#/components/schemas/ChainV1" - }, - "title": "Chains", - "type": "array" - } - }, - "required": [ - "chains" - ], - "title": "ChainsV1", + "title": "CreateTrainingJobCacheConfig", "type": "object" }, - "ChainTombstoneV1": { - "description": "A chain tombstone.", + "CreateTrainingJobCheckpointingConfig": { "properties": { - "id": { - "description": "Unique identifier of the chain", - "title": "Id", - "type": "string" - }, - "deleted": { - "description": "Whether the chain was deleted", - "title": "Deleted", + "enabled": { + "default": false, + "description": "Whether checkpointing is enabled.", + "examples": [ + true + ], + "title": "Enabled", "type": "boolean" + }, + "checkpoint_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "path where checkpoints will be saved.", + "examples": [ + "/mnt/ckpts" + ], + "title": "Checkpoint Path" + }, + "volume_size_gib": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Size of the volume in gibibytes. If not provided, the default size will be used", + "examples": [ + 10 + ], + "title": "Volume Size Gib" } }, - "required": [ - "id", - "deleted" - ], - "title": "ChainTombstoneV1", + "title": "CreateTrainingJobCheckpointingConfig", "type": "object" }, - "ChainDeploymentV1": { - "description": "A deployment of a chain.", + "CreateTrainingJobComputeV1": { + "description": "Configuration to specify the compute for a training job.", "properties": { - "id": { - "description": "Unique identifier of the chain deployment", - "title": "Id", - "type": "string" + "node_count": { + "default": 1, + "description": "Number of nodes for the training job.", + "examples": [ + 1 + ], + "title": "Node Count", + "type": "integer" }, - "created_at": { - "description": "Time the chain deployment was created in ISO 8601 format", - "format": "date-time", - "title": "Created At", - "type": "string" + "cpu_count": { + "default": 1, + "description": "Number of cpus for the training job.", + "examples": [ + 1 + ], + "title": "Cpu Count", + "type": "integer" }, - "chain_id": { - "description": "Unique identifier of the chain", - "title": "Chain Id", + "memory": { + "default": "2Gi", + "description": "Memory for the training job.", + "examples": [ + "2Gi" + ], + "title": "Memory", "type": "string" }, - "environment": { + "accelerator": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/CreateTrainingJobAcceleratorV1" }, { "type": "null" } ], - "description": "Environment the chain deployment is deployed in", - "title": "Environment" - }, - "chainlets": { - "description": "Chainlets in the chain deployment", - "items": { - "$ref": "#/components/schemas/ChainletV1" - }, - "title": "Chainlets", - "type": "array" - }, - "status": { - "$ref": "#/components/schemas/DeploymentStatusV1", - "description": "Status of the chain deployment" + "default": null, + "description": "GPU specification for the training job", + "examples": [ + { + "accelerator": "H100", + "count": 2 + } + ] } }, - "required": [ - "id", - "created_at", - "chain_id", - "environment", - "chainlets", - "status" - ], - "title": "ChainDeploymentV1", + "title": "CreateTrainingJobComputeV1", "type": "object" }, - "ChainletV1": { - "description": "A chainlet in a chain deployment.", + "CreateTrainingJobImageV1": { + "description": "Configuration to create a training job image.", "properties": { - "id": { - "description": "Unique identifier of the chainlet", - "title": "Id", - "type": "string" - }, - "name": { - "description": "Name of the chainlet", - "title": "Name", + "base_image": { + "description": "Base image for the training job.", + "examples": [ + "hello-world" + ], + "title": "Base Image", "type": "string" }, - "autoscaling_settings": { + "docker_auth": { "anyOf": [ { - "$ref": "#/components/schemas/AutoscalingSettingsV1" + "$ref": "#/components/schemas/DockerAuthV1" }, { "type": "null" } ], - "description": "Autoscaling settings for the chainlet. If null, it has not finished deploying" - }, - "instance_type_name": { - "description": "Name of the instance type the chainlet is deployed on", - "title": "Instance Type Name", - "type": "string" - }, - "active_replica_count": { - "description": "Number of active replicas", - "title": "Active Replica Count", - "type": "integer" - }, - "status": { - "$ref": "#/components/schemas/DeploymentStatusV1", - "description": "Status of the chainlet" + "default": null, + "description": "Docker authentication credentials" } }, "required": [ - "id", - "name", - "autoscaling_settings", - "instance_type_name", - "active_replica_count", - "status" + "base_image" ], - "title": "ChainletV1", + "title": "CreateTrainingJobImageV1", "type": "object" }, - "ChainDeploymentsV1": { - "description": "A list of chain deployments.", + "CreateTrainingJobRuntimeV1": { + "description": "Configuration to specify the runtime environment for a training job.", "properties": { - "deployments": { - "description": "A list of chain deployments", + "start_commands": { + "description": "Commands to execute when starting the runtime.", + "examples": [ + [ + "python main.py" + ] + ], "items": { - "$ref": "#/components/schemas/ChainDeploymentV1" + "type": "string" }, - "title": "Deployments", + "title": "Start Commands", "type": "array" - } - }, - "required": [ - "deployments" - ], - "title": "ChainDeploymentsV1", - "type": "object" - }, - "ChainDeploymentTombstoneV1": { - "description": "A chain deployment tombstone.", - "properties": { - "id": { - "description": "Unique identifier of the chain deployment", - "title": "Id", - "type": "string" }, - "deleted": { - "description": "Whether the chain deployment was deleted", - "title": "Deleted", - "type": "boolean" + "environment_variables": { + "additionalProperties": { + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/SecretReferenceV1" + } + ] + }, + "description": "Environment variables to set in the runtime.", + "examples": [ + { + "API_KEY": "your_api_key_here", + "PATH": "/usr/bin" + } + ], + "title": "Environment Variables", + "type": "object" }, - "chain_id": { - "description": "Unique identifier of the chain", - "title": "Chain Id", - "type": "string" - } - }, - "required": [ - "id", - "deleted", - "chain_id" - ], - "title": "ChainDeploymentTombstoneV1", - "type": "object" - }, - "ChainletEnvironmentSettingsRequestV1": { - "description": "Request to create environment settings for a chainlet.", - "properties": { - "chainlet_name": { - "description": "Name of the chainlet", + "artifacts": { + "description": "Runtime artifacts for the training job.", + "items": { + "$ref": "#/components/schemas/CreateTrainingJobS3Artifact" + }, + "title": "Artifacts", + "type": "array" + }, + "enable_cache": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Deprecated. Use cache_config instead.", "examples": [ - "HelloWorld" + true ], - "title": "Chainlet Name", - "type": "string" + "title": "Enable Cache" }, - "autoscaling_settings": { + "cache_config": { "anyOf": [ { - "$ref": "#/components/schemas/UpdateAutoscalingSettingsV1" + "$ref": "#/components/schemas/CreateTrainingJobCacheConfig" }, { "type": "null" } ], "default": null, - "description": "Autoscaling settings for the chainlet", + "description": "Configuration for the read-write cache.", "examples": [ { - "autoscaling_window": 60, - "concurrency_target": 1, - "max_replica": 1, - "min_replica": 0, - "scale_down_delay": 900, - "target_in_flight_tokens": null, - "target_utilization_percentage": 70 + "enable_legacy_hf_mount": true, + "enabled": true, + "mount_base_path": "/root/.cache", + "require_cache_affinity": true } ] }, - "instance_type_id": { - "default": "1x2", - "description": "ID of the instance type to use for the chainlet", + "checkpointing_config": { + "$ref": "#/components/schemas/CreateTrainingJobCheckpointingConfig", + "description": "Configuration for checkpointing.", + "examples": [ + { + "checkpoint_path": "/mnt/ckpts", + "enabled": true, + "volume_size_gib": null + } + ] + }, + "load_checkpoint_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/LoadCheckpointConfig" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Configuration for loading checkpoints" + } + }, + "title": "CreateTrainingJobRuntimeV1", + "type": "object" + }, + "CreateTrainingJobS3Artifact": { + "properties": { + "s3_bucket": { + "description": "S3 bucket for the uploaded runtime artifact.", + "examples": [ + "my-s3-bucket" + ], + "title": "S3 Bucket", + "type": "string" + }, + "s3_key": { + "description": "S3 key for the uploaded runtime artifact.", "examples": [ - "1x4", - "2x8", - "A10G:2x24x96", - "H100:2x52x468" + "my-s3-key" ], - "title": "Instance Type Id", + "title": "S3 Key", "type": "string" } }, "required": [ - "chainlet_name" + "s3_bucket", + "s3_key" ], - "title": "ChainletEnvironmentSettingsRequestV1", + "title": "CreateTrainingJobS3Artifact", "type": "object" }, - "CreateChainEnvironmentRequestV1": { - "description": "A request to create a custom environment for a chain.", + "CreateTrainingJobV1": { + "description": "Configuration for a training job.", "properties": { - "name": { - "description": "Name of the environment", + "image": { + "$ref": "#/components/schemas/CreateTrainingJobImageV1", "examples": [ - "staging" - ], - "title": "Name", - "type": "string" + { + "base_image": "hello-world", + "docker_auth": null + } + ] }, - "promotion_settings": { + "compute": { + "$ref": "#/components/schemas/CreateTrainingJobComputeV1", + "examples": [ + { + "accelerator": { + "accelerator": "H100", + "count": 2 + }, + "cpu_count": 1, + "memory": "2Gi", + "node_count": 1 + } + ] + }, + "runtime": { + "$ref": "#/components/schemas/CreateTrainingJobRuntimeV1", + "description": "Configuration for the runtime environment of the training job.", + "examples": [ + { + "artifacts": [], + "cache_config": null, + "checkpointing_config": { + "checkpoint_path": null, + "enabled": false, + "volume_size_gib": null + }, + "enable_cache": null, + "environment_variables": { + "API_KEY": "your_api_key_here", + "PATH": "/usr/bin" + }, + "load_checkpoint_config": null, + "start_commands": [ + "python main.py" + ] + } + ] + }, + "name": { "anyOf": [ { - "$ref": "#/components/schemas/UpdatePromotionSettingsV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Promotion settings for the environment", + "description": "Name of the training job.", "examples": [ + "gpt-oss-job" + ], + "title": "Name" + }, + "truss_user_env": { + "anyOf": [ { - "promotion_cleanup_strategy": null, - "ramp_up_duration_seconds": 600, - "ramp_up_while_promoting": true, - "redeploy_on_promotion": true, - "rolling_deploy": null, - "rolling_deploy_config": null + "$ref": "#/components/schemas/TrussUserEnv" + }, + { + "type": "null" } - ] + ], + "default": null, + "description": "Truss user environment information" }, - "chainlet_settings": { + "interactive_session": { "anyOf": [ { - "items": { - "$ref": "#/components/schemas/ChainletEnvironmentSettingsRequestV1" - }, - "type": "array" + "$ref": "#/components/schemas/InteractiveSessionConfigV1" }, { "type": "null" } ], "default": null, - "description": "Mapping of chainlet name to the desired chainlet environment settings", + "description": "Configuration for interactive debugging sessions." + }, + "weights": { + "description": "MDN weight sources to mount in the training container. Weights are mirrored and cached for fast startup.", "examples": [ [ { - "autoscaling_settings": { - "autoscaling_window": 800, - "concurrency_target": 4, - "max_replica": 3, - "min_replica": 2, - "scale_down_delay": 63, - "target_in_flight_tokens": null, - "target_utilization_percentage": null - }, - "chainlet_name": "HelloWorld", - "instance_type_id": "2x8" - }, - { - "autoscaling_settings": { - "autoscaling_window": null, - "concurrency_target": null, - "max_replica": 3, - "min_replica": 3, - "scale_down_delay": null, - "target_in_flight_tokens": null, - "target_utilization_percentage": null - }, - "chainlet_name": "RandInt", - "instance_type_id": "A10Gx8x32" + "allow_patterns": null, + "auth": null, + "auth_secret_name": null, + "ignore_patterns": null, + "mount_location": "/app/models/base", + "source": "hf://meta-llama/Llama-3-8B@main" } ] ], - "title": "Chainlet Settings" - } - }, - "required": [ - "name" - ], - "title": "CreateChainEnvironmentRequestV1", - "type": "object" - }, - "ChainletEnvironmentSettingsV1": { - "description": "Environment settings for a chainlet.", - "properties": { - "chainlet_name": { - "description": "Name of the chainlet", - "title": "Chainlet Name", - "type": "string" + "items": { + "$ref": "#/components/schemas/CreateJobWeightConfigV1" + }, + "title": "Weights", + "type": "array" }, - "autoscaling_settings": { + "enable_baseten_workdir": { + "default": false, + "description": "When enabled, uses /b10/workspace as the working directory instead of the image WORKDIR.", + "examples": [ + false, + true + ], + "title": "Enable Baseten Workdir", + "type": "boolean" + }, + "priority": { "anyOf": [ { - "$ref": "#/components/schemas/AutoscalingSettingsV1" + "type": "integer" }, { "type": "null" } ], - "description": "Autoscaling settings for the chainlet. If null, it has not finished deploying" - }, - "instance_type": { - "$ref": "#/components/schemas/InstanceTypeV1", - "description": "Instance type for the chainlet" + "default": null, + "description": "Queue priority. Higher values are dequeued first. Defaults to 0.", + "examples": [ + 0, + 10, + 100 + ], + "title": "Priority" } }, "required": [ - "chainlet_name", - "autoscaling_settings", - "instance_type" + "image" ], - "title": "ChainletEnvironmentSettingsV1", + "title": "CreateTrainingJobV1", "type": "object" }, - "ChainEnvironmentV1": { - "description": "Environment for oracles.", + "DockerAuthType": { + "enum": [ + "GCP_SERVICE_ACCOUNT_JSON", + "AWS_IAM", + "AWS_OIDC", + "GCP_OIDC", + "REGISTRY_SECRET" + ], + "title": "DockerAuthType", + "type": "string" + }, + "DockerAuthV1": { + "description": "Docker authentication credentials.", "properties": { - "name": { - "description": "Name of the environment", - "title": "Name", - "type": "string" - }, - "created_at": { - "description": "Time the environment was created in ISO 8601 format", - "format": "date-time", - "title": "Created At", + "registry": { + "description": "Registry to authenticate with", + "title": "Registry", "type": "string" }, - "chain_id": { - "description": "Unique identifier of the chain", - "title": "Chain Id", - "type": "string" + "auth_method": { + "$ref": "#/components/schemas/DockerAuthType", + "description": "Method to authenticate with the registry", + "examples": [ + "GCP_SERVICE_ACCOUNT_JSON", + "AWS_IAM", + "AWS_OIDC", + "GCP_OIDC", + "REGISTRY_SECRET" + ] }, - "promotion_settings": { - "$ref": "#/components/schemas/PromotionSettingsV1", - "description": "Promotion settings for the environment" + "gcp_service_account_json_docker_auth": { + "anyOf": [ + { + "$ref": "#/components/schemas/GcpServiceAccountJsonDockerAuthV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "GCP service account details for the registry" }, - "chainlet_settings": { - "description": "Environment settings for the chainlets", - "items": { - "$ref": "#/components/schemas/ChainletEnvironmentSettingsV1" - }, - "title": "Chainlet Settings", - "type": "array" + "aws_iam_docker_auth": { + "anyOf": [ + { + "$ref": "#/components/schemas/AwsIamDockerAuthV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "AWS details for the registry" }, - "current_deployment": { + "aws_oidc_docker_auth": { "anyOf": [ { - "$ref": "#/components/schemas/ChainDeploymentV1" + "$ref": "#/components/schemas/AwsOidcDockerAuthV1" }, { "type": "null" } ], - "description": "Current chain deployment of the environment" + "default": null, + "description": "AWS OIDC details for the registry" }, - "candidate_deployment": { + "gcp_oidc_docker_auth": { "anyOf": [ { - "$ref": "#/components/schemas/ChainDeploymentV1" + "$ref": "#/components/schemas/GcpOidcDockerAuthV1" }, { "type": "null" } ], "default": null, - "description": "Candidate chain deployment being promoted to the environment, if a promotion is in progress" - } - }, - "required": [ - "name", - "created_at", - "chain_id", - "promotion_settings", - "chainlet_settings", - "current_deployment" - ], - "title": "ChainEnvironmentV1", - "type": "object" - }, - "UpdateChainEnvironmentRequestV1": { - "description": "A request to update a chain environment.", - "properties": { - "promotion_settings": { + "description": "GCP OIDC details for the registry" + }, + "registry_secret_docker_auth": { "anyOf": [ { - "$ref": "#/components/schemas/UpdatePromotionSettingsV1" + "$ref": "#/components/schemas/RegistrySecretDockerAuthV1" }, { "type": "null" } ], "default": null, - "description": "Promotion settings for the environment", - "examples": [ - { - "promotion_cleanup_strategy": null, - "ramp_up_duration_seconds": 600, - "ramp_up_while_promoting": true, - "redeploy_on_promotion": null, - "rolling_deploy": null, - "rolling_deploy_config": null - } - ] - } - }, - "title": "UpdateChainEnvironmentRequestV1", - "type": "object" - }, - "UpdateChainEnvironmentResponseV1": { - "description": "A response to update a chain environment.", - "properties": { - "ok": { - "description": "Whether the update was successful", - "title": "Ok", - "type": "boolean" + "description": "Required when auth_method is REGISTRY_SECRET. Supports any Docker registry (Docker Hub, GHCR, NGC, etc.) via username:password credentials stored as a Baseten secret." } }, "required": [ - "ok" + "registry", + "auth_method" ], - "title": "UpdateChainEnvironmentResponseV1", + "title": "DockerAuthV1", "type": "object" }, - "PromoteToChainEnvironmentRequestV1": { - "description": "A request to promote a deployment to a environment.", + "GcpOidcDockerAuthV1": { + "description": "GCP OIDC details for the registry.", "properties": { - "scale_down_previous_deployment": { - "default": true, - "description": "Whether to scale down the previous deployment after promoting", - "examples": [ - true - ], - "title": "Scale Down Previous Deployment", - "type": "boolean" + "service_account": { + "description": "GCP service account name for OIDC authentication", + "title": "Service Account", + "type": "string" }, - "deployment_id": { - "description": "The id of the chain deployment to promote", - "title": "Deployment Id", + "workload_identity_provider": { + "description": "GCP workload identity provider for OIDC authentication", + "title": "Workload Identity Provider", "type": "string" } }, "required": [ - "deployment_id" + "service_account", + "workload_identity_provider" ], - "title": "PromoteToChainEnvironmentRequestV1", + "title": "GcpOidcDockerAuthV1", "type": "object" }, - "ChainletEnvironmentAutoscalingSettingsUpdateV1": { - "description": "The request to update the autoscaling settings for a chainlet.", + "GcpServiceAccountJsonDockerAuthV1": { + "description": "GCP details for the registry.", "properties": { - "chainlet_name": { - "description": "Name of the chainlet", - "examples": [ - "HelloWorld" - ], - "title": "Chainlet Name", - "type": "string" - }, - "autoscaling_settings": { - "$ref": "#/components/schemas/UpdateAutoscalingSettingsV1", - "description": "Autoscaling settings for the chainlet", - "examples": [ - { - "autoscaling_window": 800, - "concurrency_target": 3, - "max_replica": 2, - "min_replica": 1, - "scale_down_delay": 60, - "target_in_flight_tokens": null, - "target_utilization_percentage": null - } - ] + "service_account_json_secret_ref": { + "$ref": "#/components/schemas/SecretReferenceV1", + "description": "Name of the service account secret" } }, "required": [ - "chainlet_name", - "autoscaling_settings" + "service_account_json_secret_ref" ], - "title": "ChainletEnvironmentAutoscalingSettingsUpdateV1", + "title": "GcpServiceAccountJsonDockerAuthV1", "type": "object" }, - "UpdateChainletEnvironmentAutoscalingSettingsRequestV1": { - "description": "A request to update the autoscaling settings for a multiple chainlets in an environment.\nIf a chainlet name doesn't exist, an error is returned.", + "GitInfo": { "properties": { - "updates": { - "description": "Mapping of chainlet name to the desired chainlet autoscaling settings. If the chainlet name doesn't exist, an error is returned.", - "examples": [ - [ - { - "autoscaling_settings": { - "autoscaling_window": 800, - "concurrency_target": 4, - "max_replica": 3, - "min_replica": 2, - "scale_down_delay": 63, - "target_in_flight_tokens": null, - "target_utilization_percentage": null - }, - "chainlet_name": "HelloWorld" - } - ], - [ - { - "autoscaling_settings": { - "autoscaling_window": null, - "concurrency_target": null, - "max_replica": null, - "min_replica": 0, - "scale_down_delay": null, - "target_in_flight_tokens": null, - "target_utilization_percentage": null - }, - "chainlet_name": "HelloWorld" - }, - { - "autoscaling_settings": { - "autoscaling_window": null, - "concurrency_target": null, - "max_replica": null, - "min_replica": 0, - "scale_down_delay": null, - "target_in_flight_tokens": null, - "target_utilization_percentage": null - }, - "chainlet_name": "RandInt" - } - ] + "latest_commit_sha": { + "title": "Latest Commit Sha", + "type": "string" + }, + "latest_tag": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } ], - "items": { - "$ref": "#/components/schemas/ChainletEnvironmentAutoscalingSettingsUpdateV1" - }, - "title": "Updates", - "type": "array" + "title": "Latest Tag" + }, + "commits_since_tag": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Commits Since Tag" + }, + "has_uncommitted_changes": { + "title": "Has Uncommitted Changes", + "type": "boolean" } }, "required": [ - "updates" + "latest_commit_sha", + "latest_tag", + "commits_since_tag", + "has_uncommitted_changes" ], - "title": "UpdateChainletEnvironmentAutoscalingSettingsRequestV1", + "title": "GitInfo", "type": "object" }, - "ChainletEnvironmentInstanceTypeUpdateV1": { - "description": "A request to update the environment settings for a chainlet.", + "InteractiveSessionConfigV1": { + "description": "Configuration for interactive debugging sessions on training jobs.", "properties": { - "chainlet_name": { - "description": "Name of the chainlet", - "examples": [ - "HelloWorld" - ], - "title": "Chainlet Name", - "type": "string" + "trigger": { + "$ref": "#/components/schemas/V1InteractiveSessionTrigger", + "default": "on_demand", + "description": "When to create the interactive session. 'on_startup' creates on job start, 'on_failure' creates on job failure, 'on_demand' bypasses automatic session creation." }, - "instance_type_id": { - "description": "Key of the instance type to use for the chainlet", + "timeout_minutes": { + "default": 480, + "description": "Number of minutes before the interactive session times out.", "examples": [ - "1x4", - "2x8", - "A10G:2x24x96" + 480, + 1440, + 10080 ], - "title": "Instance Type Id", - "type": "string" + "title": "Timeout Minutes", + "type": "integer" + }, + "session_provider": { + "$ref": "#/components/schemas/V1InteractiveSessionProvider", + "default": "vs_code", + "description": "The IDE client for the interactive session." + }, + "auth_provider": { + "$ref": "#/components/schemas/V1InteractiveSessionAuthProvider", + "default": "github", + "description": "The authentication provider for the interactive session." } }, - "required": [ - "chainlet_name", - "instance_type_id" - ], - "title": "ChainletEnvironmentInstanceTypeUpdateV1", + "title": "InteractiveSessionConfigV1", "type": "object" - }, - "UpdateChainletEnvironmentInstanceTypeRequestV1": { - "description": "A request to update the instance types for chainlets in an environment. Multiples\nupdates can be made in one request. The updates will be processed in batch and a new deployment\nwill be created, deployed and promoted into the environment.", + }, + "LoadCheckpointConfig": { "properties": { - "updates": { - "description": "Mapping of chainlet name to the desired chainlet instance type. If the chainlet name doesn't exist, an error is returned.", - "examples": [ - [ + "enabled": { + "default": false, + "description": "Whether checkpoint loading is enabled", + "title": "Enabled", + "type": "boolean" + }, + "download_folder": { + "default": "/tmp/loaded_checkpoints", + "description": "Folder where checkpoints will be downloaded", + "title": "Download Folder", + "type": "string" + }, + "checkpoints": { + "description": "List of checkpoint configurations", + "items": { + "discriminator": { + "mapping": { + "baseten_latest_checkpoint": "#/components/schemas/BasetenLatestCheckpointConfig", + "baseten_named_checkpoint": "#/components/schemas/BasetenNamedCheckpointConfig" + }, + "propertyName": "typ" + }, + "oneOf": [ { - "chainlet_name": "HelloWorld", - "instance_type_id": "1x4" + "$ref": "#/components/schemas/BasetenLatestCheckpointConfig" }, { - "chainlet_name": "RandInt", - "instance_type_id": "A10G:2x24x96" + "$ref": "#/components/schemas/BasetenNamedCheckpointConfig" } ] - ], - "items": { - "$ref": "#/components/schemas/ChainletEnvironmentInstanceTypeUpdateV1" }, - "title": "Updates", + "title": "Checkpoints", "type": "array" } }, - "required": [ - "updates" - ], - "title": "UpdateChainletEnvironmentInstanceTypeRequestV1", + "title": "LoadCheckpointConfig", "type": "object" }, - "UpdateChainletEnvironmentInstanceTypeResponseV1": { - "description": "A response to update the environment settings for a chainlet. If updating the instance type\nresulted in a re-deployment, `requires_redeployment` will be True and the resulting deployment\nwill be returned in the `chain_deployment` field.", + "RegistrySecretDockerAuthV1": { + "description": "Authentication via a Baseten secret for any Docker registry (Docker Hub, GHCR, NGC, etc.).\nThe referenced secret must contain credentials in the format 'username:password'.\nFor Docker Hub, set registry to 'https://index.docker.io/v1/'. For GHCR, use 'ghcr.io'.", "properties": { - "requires_redeployment": { - "description": "Whether the resource update requires a re-deployment to update the instance type.", - "title": "Requires Redeployment", - "type": "boolean" - }, - "chain_deployment": { - "anyOf": [ - { - "$ref": "#/components/schemas/ChainDeploymentV1" - }, - { - "type": "null" - } - ], - "description": "The chain deployment resulting from the resource update, if any." - }, - "chainlet_environment_settings": { - "description": "The updated chainlet environment settings", - "items": { - "$ref": "#/components/schemas/ChainletEnvironmentSettingsV1" - }, - "title": "Chainlet Environment Settings", - "type": "array" + "secret_ref": { + "$ref": "#/components/schemas/SecretReferenceV1", + "description": "Reference to a Baseten secret containing credentials in the format 'username:password'" } }, "required": [ - "requires_redeployment", - "chain_deployment", - "chainlet_environment_settings" + "secret_ref" ], - "title": "UpdateChainletEnvironmentInstanceTypeResponseV1", + "title": "RegistrySecretDockerAuthV1", "type": "object" }, - "UpsertTrainingProjectV1": { - "description": "Fields that can be upserted on a training project.", + "SecretReferenceV1": { "properties": { "name": { - "description": "Name of the training project.", + "description": "Name of the secret to reference.", "examples": [ - "My Training Project" + "hf_token" ], "title": "Name", "type": "string" @@ -5920,51 +10093,13 @@ "required": [ "name" ], - "title": "UpsertTrainingProjectV1", - "type": "object" - }, - "UpsertTrainingProjectRequestV1": { - "description": "A request to upsert a training project.", - "properties": { - "training_project": { - "$ref": "#/components/schemas/UpsertTrainingProjectV1", - "description": "The training project to upsert." - } - }, - "required": [ - "training_project" - ], - "title": "UpsertTrainingProjectRequestV1", + "title": "SecretReferenceV1", "type": "object" }, - "CheckpointSyncStatus": { - "description": "Lifecycle state for the checkpoint uploader.", - "enum": [ - "SYNCING", - "COMPLETED" - ], - "title": "CheckpointSyncStatus", - "type": "string" - }, - "TrainingJobV1": { + "TrussUserEnv": { + "description": "This data models is used to flexibly store info alongside oracle versions.\n\nThere is a corresponding data model in the truss client.\nIn contrast, here all fields are optional for backwards compatibility with old\nclients.", "properties": { - "id": { - "description": "Unique identifier of the training job.", - "title": "Id", - "type": "string" - }, - "created_at": { - "description": "Time the job was created in ISO 8601 format.", - "format": "date-time", - "title": "Created At", - "type": "string" - }, - "current_status": { - "description": "Current status of the training job.", - "title": "Current Status", - "type": "string" - }, - "error_message": { + "truss_client_version": { "anyOf": [ { "type": "string" @@ -5974,29 +10109,9 @@ } ], "default": null, - "description": "Error message if the training job failed.", - "title": "Error Message" - }, - "instance_type": { - "$ref": "#/components/schemas/InstanceTypeV1", - "description": "Instance type of the training job." - }, - "updated_at": { - "description": "Time the job was updated in ISO 8601 format.", - "format": "date-time", - "title": "Updated At", - "type": "string" - }, - "training_project_id": { - "description": "ID of the training project.", - "title": "Training Project Id", - "type": "string" - }, - "training_project": { - "$ref": "#/components/schemas/TrainingProjectSummaryV1", - "description": "Summary of the training project." + "title": "Truss Client Version" }, - "name": { + "python_version": { "anyOf": [ { "type": "string" @@ -6006,396 +10121,528 @@ } ], "default": null, - "description": "Name of the training job.", - "examples": [ - "gpt-oss-job" - ], - "title": "Name" + "title": "Python Version" }, - "checkpoint_sync_status": { + "pydantic_version": { "anyOf": [ { - "$ref": "#/components/schemas/CheckpointSyncStatus" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Checkpoint sync status of the training job." + "title": "Pydantic Version" }, - "user": { + "mypy_version": { "anyOf": [ { - "$ref": "#/components/schemas/UserV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "The user who created the training job." + "title": "Mypy Version" + }, + "is_library_deployment": { + "default": false, + "title": "Is Library Deployment", + "type": "boolean" + }, + "is_frontend_deployment": { + "default": false, + "title": "Is Frontend Deployment", + "type": "boolean" + }, + "git_info": { + "anyOf": [ + { + "$ref": "#/components/schemas/GitInfo" + }, + { + "type": "null" + } + ], + "default": null + } + }, + "title": "TrussUserEnv", + "type": "object" + }, + "V1InteractiveSessionAuthProvider": { + "enum": [ + "github", + "microsoft" + ], + "title": "V1InteractiveSessionAuthProvider", + "type": "string" + }, + "V1InteractiveSessionProvider": { + "enum": [ + "vs_code", + "cursor", + "ssh" + ], + "title": "V1InteractiveSessionProvider", + "type": "string" + }, + "V1InteractiveSessionTrigger": { + "enum": [ + "on_startup", + "on_failure", + "on_demand" + ], + "title": "V1InteractiveSessionTrigger", + "type": "string" + }, + "CreateTrainingJobRequestV1": { + "description": "A request to create a training job.", + "properties": { + "training_job": { + "$ref": "#/components/schemas/CreateTrainingJobV1", + "description": "The training job to create." } }, "required": [ - "id", - "created_at", - "current_status", - "instance_type", - "updated_at", - "training_project_id", - "training_project" + "training_job" ], - "title": "TrainingJobV1", + "title": "CreateTrainingJobRequestV1", + "type": "object" + }, + "CreateTrainingJobResponseV1": { + "description": "A response to creating a training job.", + "properties": { + "training_job": { + "$ref": "#/components/schemas/TrainingJobV1", + "description": "The created training job." + } + }, + "required": [ + "training_job" + ], + "title": "CreateTrainingJobResponseV1", "type": "object" }, - "TrainingProjectSummaryV1": { - "description": "A summary of a training project.", + "TrainingJobTombstoneV1": { + "description": "A training job tombstone.", "properties": { "id": { - "description": "Unique identifier of the training project.", + "description": "Unique identifier of the training job", "title": "Id", "type": "string" }, - "name": { - "description": "Name of the training project.", - "title": "Name", + "deleted": { + "description": "Whether the training job was deleted", + "title": "Deleted", + "type": "boolean" + }, + "training_project_id": { + "description": "Unique identifier of the training project", + "title": "Training Project Id", "type": "string" } }, "required": [ "id", - "name" + "deleted", + "training_project_id" ], - "title": "TrainingProjectSummaryV1", + "title": "TrainingJobTombstoneV1", "type": "object" }, - "TrainingProjectV1": { + "GetTrainingJobResponseV1": { + "description": "A response to fetch a training job.", "properties": { - "id": { - "description": "Unique identifier of the training project", - "title": "Id", - "type": "string" - }, - "name": { - "description": "Name of the training project.", - "title": "Name", - "type": "string" - }, - "created_at": { - "description": "Time the training project was created in ISO 8601 format.", - "format": "date-time", - "title": "Created At", - "type": "string" - }, - "updated_at": { - "description": "Time the training project was updated in ISO 8601 format.", - "format": "date-time", - "title": "Updated At", - "type": "string" + "training_project": { + "$ref": "#/components/schemas/TrainingProjectV1", + "description": "The training project." }, - "team_name": { + "training_job": { + "$ref": "#/components/schemas/TrainingJobV1", + "description": "The fetched training job." + } + }, + "required": [ + "training_project", + "training_job" + ], + "title": "GetTrainingJobResponseV1", + "type": "object" + }, + "DownloadTrainingJobResponseV1": { + "description": "A response that includes the artifacts for a training job", + "properties": { + "artifact_presigned_urls": { + "description": "Presigned URL's for the artifacts", + "items": { + "type": "string" + }, + "title": "Artifact Presigned Urls", + "type": "array" + } + }, + "required": [ + "artifact_presigned_urls" + ], + "title": "DownloadTrainingJobResponseV1", + "type": "object" + }, + "RecreateTrainingJobResponseV1": { + "description": "A response that sends the new training job", + "properties": { + "training_job": { + "$ref": "#/components/schemas/TrainingJobV1", + "description": "The created training job." + } + }, + "required": [ + "training_job" + ], + "title": "RecreateTrainingJobResponseV1", + "type": "object" + }, + "GetTrainingJobLogsRequestV1": { + "description": "A request to fetch training logs.", + "properties": { + "start_epoch_millis": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "Name of the team associated with the training project.", - "title": "Team Name" + "description": "Epoch millis timestamp to start fetching logs", + "title": "Start Epoch Millis" }, - "latest_job": { + "end_epoch_millis": { "anyOf": [ { - "$ref": "#/components/schemas/TrainingJobV1" + "type": "integer" }, { "type": "null" } ], - "description": "Most recently created training job for the training project." - } - }, - "required": [ - "id", - "name", - "created_at", - "updated_at", - "latest_job" - ], - "title": "TrainingProjectV1", - "type": "object" - }, - "UserV1": { - "description": "A user.", - "properties": { - "email": { + "default": null, + "description": "Epoch millis timestamp to end fetching logs", + "title": "End Epoch Millis" + }, + "direction": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/SortOrderV1" }, { "type": "null" } ], "default": null, - "description": "Email of the user.", - "title": "Email" - } - }, - "title": "UserV1", - "type": "object" - }, - "UpsertTrainingProjectResponseV1": { - "description": "A response to upserting a training project.", - "properties": { - "training_project": { - "$ref": "#/components/schemas/TrainingProjectV1", - "description": "The upserted training project." + "description": "Sort order for logs" + }, + "limit": { + "anyOf": [ + { + "maximum": 1000, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 500, + "description": "Limit of logs to fetch in a single request", + "title": "Limit" } }, - "required": [ - "training_project" - ], - "title": "UpsertTrainingProjectResponseV1", + "title": "GetTrainingJobLogsRequestV1", "type": "object" }, - "ListTrainingProjectsResponseV1": { - "description": "A response to list training projects.", + "StorageMetricsV1": { + "description": "A metric for a training job.", "properties": { - "training_projects": { - "description": "List of training projects.", + "usage_bytes": { + "description": "The number of bytes used on the storage entity.", "items": { - "$ref": "#/components/schemas/TrainingProjectV1" + "$ref": "#/components/schemas/TrainingJobMetricV1" }, - "title": "Training Projects", + "title": "Usage Bytes", + "type": "array" + }, + "utilization": { + "description": "The utilization of the storage entity as a decimal percentage.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Utilization", "type": "array" } }, "required": [ - "training_projects" + "usage_bytes", + "utilization" ], - "title": "ListTrainingProjectsResponseV1", + "title": "StorageMetricsV1", "type": "object" }, - "ListTrainingJobsResponseV1": { - "description": "A response to list training jobs.", + "TrainingJobMetricV1": { + "description": "A metric for a training job.", "properties": { - "training_project": { - "$ref": "#/components/schemas/TrainingProjectV1", - "description": "The training project." + "value": { + "description": "The value of the metric.", + "title": "Value", + "type": "number" }, - "training_jobs": { - "description": "List of training jobs.", - "items": { - "$ref": "#/components/schemas/TrainingJobV1" - }, - "title": "Training Jobs", - "type": "array" + "timestamp": { + "description": "The timestamp of the metric in ISO 8601 format.", + "format": "date-time", + "title": "Timestamp", + "type": "string" } }, "required": [ - "training_project", - "training_jobs" + "value", + "timestamp" ], - "title": "ListTrainingJobsResponseV1", + "title": "TrainingJobMetricV1", "type": "object" }, - "AwsIamDockerAuthV1": { - "description": "AWS details for the registry.", + "TrainingJobMetricsV1": { "properties": { - "access_key_secret_ref": { - "$ref": "#/components/schemas/SecretReferenceV1", - "description": "Name of the access key secret" + "gpu_memory_usage_bytes": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "type": "array" + }, + "description": "A map of GPU rank to memory usage for the training job. For multinode jobs, this is the memory usage of the leader unless specified otherwise.", + "title": "Gpu Memory Usage Bytes", + "type": "object" + }, + "gpu_utilization": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "type": "array" + }, + "description": "A map of GPU rank to fractional GPU utilization. For multinode jobs, this is the GPU utilization of the leader unless specified otherwise.", + "title": "Gpu Utilization", + "type": "object" + }, + "cpu_usage": { + "description": "The CPU usage measured in cores. For multinode jobs, this is the CPU usage of the leader unless specified otherwise.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Cpu Usage", + "type": "array" + }, + "cpu_memory_usage_bytes": { + "description": "The CPU memory usage for the training job. For multinode jobs, this is the CPU memory usage of the leader unless specified otherwise.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Cpu Memory Usage Bytes", + "type": "array" }, - "secret_access_key_secret_ref": { - "$ref": "#/components/schemas/SecretReferenceV1", - "description": "Name of the secret key secret" + "ephemeral_storage": { + "$ref": "#/components/schemas/StorageMetricsV1", + "description": "The storage usage for the ephemeral storage. For multinode jobs, this is the ephemeral storage usage of the leader unless specified otherwise." } }, "required": [ - "access_key_secret_ref", - "secret_access_key_secret_ref" + "gpu_memory_usage_bytes", + "gpu_utilization", + "cpu_usage", + "cpu_memory_usage_bytes", + "ephemeral_storage" ], - "title": "AwsIamDockerAuthV1", + "title": "TrainingJobMetricsV1", "type": "object" }, - "AwsOidcDockerAuthV1": { - "description": "AWS OIDC details for the registry.", + "TrainingJobNodeMetricsV1": { + "description": "A set of metrics for a training job node.", "properties": { - "role_arn": { - "description": "AWS IAM role ARN for OIDC authentication", - "title": "Role Arn", + "node_id": { + "description": "The name of the node.", + "title": "Node Id", "type": "string" }, - "region": { - "description": "AWS region for OIDC authentication", - "title": "Region", - "type": "string" + "metrics": { + "$ref": "#/components/schemas/TrainingJobMetricsV1", + "description": "The metrics for the node." } }, "required": [ - "role_arn", - "region" + "node_id", + "metrics" ], - "title": "AwsOidcDockerAuthV1", + "title": "TrainingJobNodeMetricsV1", "type": "object" }, - "BasetenLatestCheckpointConfig": { + "GetTrainingJobMetricsResponseV1": { + "description": "A response to fetch training job metrics. The outer list for each metric represents that metric across time.", "properties": { - "project_name": { - "anyOf": [ - { - "type": "string" + "gpu_memory_usage_bytes": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" }, - { - "type": "null" - } - ], - "default": null, - "description": "Name of the project to load the checkpoint from", - "title": "Project Name" + "type": "array" + }, + "description": "A map of GPU rank to memory usage for the training job. For multinode jobs, this is the memory usage of the leader unless specified otherwise.", + "title": "Gpu Memory Usage Bytes", + "type": "object" }, - "job_id": { + "gpu_utilization": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "type": "array" + }, + "description": "A map of GPU rank to fractional GPU utilization. For multinode jobs, this is the GPU utilization of the leader unless specified otherwise.", + "title": "Gpu Utilization", + "type": "object" + }, + "cpu_usage": { + "description": "The CPU usage measured in cores. For multinode jobs, this is the CPU usage of the leader unless specified otherwise.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Cpu Usage", + "type": "array" + }, + "cpu_memory_usage_bytes": { + "description": "The CPU memory usage for the training job. For multinode jobs, this is the CPU memory usage of the leader unless specified otherwise.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Cpu Memory Usage Bytes", + "type": "array" + }, + "ephemeral_storage": { + "$ref": "#/components/schemas/StorageMetricsV1", + "description": "The storage usage for the ephemeral storage. For multinode jobs, this is the ephemeral storage usage of the leader unless specified otherwise." + }, + "training_job": { + "$ref": "#/components/schemas/TrainingJobV1", + "description": "The training job." + }, + "cache": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/StorageMetricsV1" }, { "type": "null" } ], - "default": null, - "description": "ID of the job to load the checkpoint from", - "title": "Job Id" + "description": "The storage usage for the read-write cache." }, - "typ": { - "const": "baseten_latest_checkpoint", - "default": "baseten_latest_checkpoint", - "title": "Typ", - "type": "string" + "per_node_metrics": { + "description": "The metrics for each node in the training job.", + "items": { + "$ref": "#/components/schemas/TrainingJobNodeMetricsV1" + }, + "title": "Per Node Metrics", + "type": "array" } }, - "title": "BasetenLatestCheckpointConfig", + "required": [ + "gpu_memory_usage_bytes", + "gpu_utilization", + "cpu_usage", + "cpu_memory_usage_bytes", + "ephemeral_storage", + "training_job", + "cache", + "per_node_metrics" + ], + "title": "GetTrainingJobMetricsResponseV1", "type": "object" }, - "BasetenNamedCheckpointConfig": { + "GetTrainingJobMetricsRequestV1": { + "description": "A request to fetch metrics. Allows the user to request metrics over a period of time.", "properties": { - "checkpoint_name": { - "description": "Name of the checkpoint to load from", - "title": "Checkpoint Name", - "type": "string" - }, - "project_name": { + "end_epoch_millis": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "Name of the project to load the checkpoint from", - "title": "Project Name" + "description": "Epoch millis timestamp to end fetching metrics", + "title": "End Epoch Millis" }, - "job_id": { + "start_epoch_millis": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "ID of the job to load the checkpoint from", - "title": "Job Id" - }, - "typ": { - "const": "baseten_named_checkpoint", - "default": "baseten_named_checkpoint", - "title": "Typ", - "type": "string" + "description": "Epoch millis timestamp to start fetching metrics.", + "title": "Start Epoch Millis" + } + }, + "title": "GetTrainingJobMetricsRequestV1", + "type": "object" + }, + "StopTrainingJobRequestV1": { + "description": "A request to stop a training job.", + "properties": {}, + "title": "StopTrainingJobRequestV1", + "type": "object" + }, + "StopTrainingJobResponseV1": { + "description": "A response to stopping a training job.", + "properties": { + "training_job": { + "$ref": "#/components/schemas/TrainingJobV1", + "description": "The stopped training job." } }, "required": [ - "checkpoint_name" + "training_job" ], - "title": "BasetenNamedCheckpointConfig", + "title": "StopTrainingJobResponseV1", "type": "object" }, - "CreateJobWeightConfigV1": { - "description": "Weight source configuration for MDN (Model Distribution Network).\n\nEnables training jobs to mount external model weights from HuggingFace, S3, GCS, or R2\nvia MDN's caching and CSI mounting infrastructure. Weights are mirrored once and\ndeduplicated across training jobs.", + "TrainingJobCheckpointV1": { + "description": "A checkpoint for a training job.", "properties": { - "source": { - "description": "Weight source URI. Supported formats: hf://, s3://, gs://, r2://", - "examples": [ - "hf://meta-llama/Llama-3-8B@main", - "s3://my-bucket/models/llama", - "gs://my-bucket/models/llama", - "r2://account_id.bucket/models/llama" - ], - "title": "Source", + "checkpoint_id": { + "description": "The ID of the checkpoint.", + "title": "Checkpoint Id", "type": "string" }, - "mount_location": { - "description": "Path where weights will be mounted in the container", - "examples": [ - "/app/models/base", - "/models/llama" - ], - "title": "Mount Location", + "created_at": { + "description": "The timestamp of the checkpoint in ISO 8601 format.", + "format": "date-time", + "title": "Created At", "type": "string" }, - "allow_patterns": { - "anyOf": [ - { - "items": { - "type": "string" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "File patterns to include (Unix-style shell patterns)", - "examples": [ - [ - "*.safetensors", - "config.json" - ] - ], - "title": "Allow Patterns" - }, - "ignore_patterns": { - "anyOf": [ - { - "items": { - "type": "string" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "File patterns to exclude (Unix-style shell patterns)", - "examples": [ - [ - "*.bin", - "*.h5" - ] - ], - "title": "Ignore Patterns" + "checkpoint_type": { + "description": "The type of checkpoint.", + "title": "Checkpoint Type", + "type": "string" }, - "auth_secret_name": { + "base_model": { "anyOf": [ { "type": "string" @@ -6404,15 +10651,10 @@ "type": "null" } ], - "default": null, - "description": "Name of the workspace secret for authentication (e.g., HuggingFace token)", - "examples": [ - "hf_token", - "aws_credentials" - ], - "title": "Auth Secret Name" + "description": "The base model of the checkpoint.", + "title": "Base Model" }, - "auth": { + "lora_adapter_config": { "anyOf": [ { "additionalProperties": true, @@ -6422,410 +10664,386 @@ "type": "null" } ], - "default": null, - "description": "Authentication configuration for the weight source.", - "examples": [ + "description": "The adapter config of the checkpoint.", + "title": "Lora Adapter Config" + }, + "size_bytes": { + "description": "The size of the checkpoint in bytes.", + "title": "Size Bytes", + "type": "integer" + }, + "sync_status": { + "anyOf": [ { - "auth_method": "CUSTOM_SECRET", - "auth_secret_name": "hf_token" + "type": "string" + }, + { + "type": "null" } ], - "title": "Auth" + "default": null, + "description": "Sync state of the checkpoint: SYNCING or COMPLETE.", + "title": "Sync Status" + }, + "training_job_id": { + "description": "The ID of the training job.", + "title": "Training Job Id", + "type": "string" } }, "required": [ - "source", - "mount_location" + "checkpoint_id", + "created_at", + "checkpoint_type", + "base_model", + "lora_adapter_config", + "size_bytes", + "training_job_id" ], - "title": "CreateJobWeightConfigV1", + "title": "TrainingJobCheckpointV1", "type": "object" }, - "CreateTrainingJobAcceleratorV1": { + "GetTrainingJobCheckpointsResponseV1": { + "description": "A response to fetch checkpoints for a training job.", "properties": { - "accelerator": { - "description": "GPU type for the training job.", - "examples": [ - "H100" - ], - "title": "Accelerator", - "type": "string" + "training_job": { + "$ref": "#/components/schemas/TrainingJobV1", + "description": "The training job." }, - "count": { - "description": "GPUs needed for the training job.", - "examples": [ - 2 - ], - "title": "Count", - "type": "integer" + "checkpoints": { + "description": "The checkpoints for the training job.", + "items": { + "$ref": "#/components/schemas/TrainingJobCheckpointV1" + }, + "title": "Checkpoints", + "type": "array" } }, "required": [ - "accelerator", - "count" + "training_job", + "checkpoints" ], - "title": "CreateTrainingJobAcceleratorV1", + "title": "GetTrainingJobCheckpointsResponseV1", "type": "object" }, - "CreateTrainingJobCacheConfig": { + "CheckpointFile": { "properties": { - "enable_legacy_hf_mount": { - "default": false, - "description": "Whether to enable the legacy Hugging Face cache.", - "examples": [ - true - ], - "title": "Enable Legacy Hf Mount", - "type": "boolean" - }, - "enabled": { - "default": false, - "description": "Whether to enable the read-write cache.", - "examples": [ - true - ], - "title": "Enabled", - "type": "boolean" - }, - "require_cache_affinity": { - "default": true, - "description": "Whether to require region affinity for the read-write cache. If False, the resulting job is not guaranteed to be deployed alongside the previous cache.", - "examples": [ - true, - false - ], - "title": "Require Cache Affinity", - "type": "boolean" - }, - "mount_base_path": { - "default": "/root/.cache", - "description": "Mount base path for the cache directory. The project cache and team cache will be mounted under this path.", - "examples": [ - "/workspace/.cache", - "/root/.cache" - ], - "title": "Mount Base Path", + "url": { + "title": "Url", "type": "string" - } - }, - "title": "CreateTrainingJobCacheConfig", - "type": "object" - }, - "CreateTrainingJobCheckpointingConfig": { - "properties": { - "enabled": { - "default": false, - "description": "Whether checkpointing is enabled.", - "examples": [ - true - ], - "title": "Enabled", - "type": "boolean" }, - "checkpoint_path": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "path where checkpoints will be saved.", - "examples": [ - "/mnt/ckpts" - ], - "title": "Checkpoint Path" + "relative_file_name": { + "title": "Relative File Name", + "type": "string" }, - "volume_size_gib": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Size of the volume in gibibytes. If not provided, the default size will be used", - "examples": [ - 10 - ], - "title": "Volume Size Gib" - } - }, - "title": "CreateTrainingJobCheckpointingConfig", - "type": "object" - }, - "CreateTrainingJobComputeV1": { - "description": "Configuration to specify the compute for a training job.", - "properties": { - "node_count": { - "default": 1, - "description": "Number of nodes for the training job.", - "examples": [ - 1 - ], - "title": "Node Count", + "node_rank": { + "title": "Node Rank", "type": "integer" }, - "cpu_count": { - "default": 1, - "description": "Number of cpus for the training job.", - "examples": [ - 1 - ], - "title": "Cpu Count", + "size_bytes": { + "title": "Size Bytes", "type": "integer" }, - "memory": { - "default": "2Gi", - "description": "Memory for the training job.", - "examples": [ - "2Gi" - ], - "title": "Memory", + "last_modified": { + "title": "Last Modified", "type": "string" - }, - "accelerator": { - "anyOf": [ - { - "$ref": "#/components/schemas/CreateTrainingJobAcceleratorV1" - }, - { - "type": "null" - } - ], - "default": null, - "description": "GPU specification for the training job", - "examples": [ - { - "accelerator": "H100", - "count": 2 - } - ] } }, - "title": "CreateTrainingJobComputeV1", + "required": [ + "url", + "relative_file_name", + "node_rank", + "size_bytes", + "last_modified" + ], + "title": "CheckpointFile", "type": "object" }, - "CreateTrainingJobImageV1": { - "description": "Configuration to create a training job image.", + "GetTrainingJobCheckpointFilesResponseV1": { + "description": "A response to fetch presigned URLs for checkpoint files of a training job.", "properties": { - "base_image": { - "description": "Base image for the training job.", - "examples": [ - "hello-world" - ], - "title": "Base Image", - "type": "string" + "presigned_urls": { + "description": "List of presigned URLs for checkpoint files.", + "items": { + "$ref": "#/components/schemas/CheckpointFile" + }, + "title": "Presigned Urls", + "type": "array" }, - "docker_auth": { + "next_page_token": { "anyOf": [ { - "$ref": "#/components/schemas/DockerAuthV1" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "Docker authentication credentials" + "description": "Token to use for fetching the next page of results. None when there are no more results.", + "title": "Next Page Token" + }, + "total_count": { + "description": "Total number of checkpoint files available.", + "title": "Total Count", + "type": "integer" } }, "required": [ - "base_image" + "presigned_urls", + "total_count" ], - "title": "CreateTrainingJobImageV1", + "title": "GetTrainingJobCheckpointFilesResponseV1", "type": "object" }, - "CreateTrainingJobRuntimeV1": { - "description": "Configuration to specify the runtime environment for a training job.", + "GetTrainingJobCheckpointFilesRequestV1": { + "description": "Pagination params for ``GET /v1/training_projects/.../checkpoint_files``.", "properties": { - "start_commands": { - "description": "Commands to execute when starting the runtime.", - "examples": [ - [ - "python main.py" - ] - ], - "items": { - "type": "string" - }, - "title": "Start Commands", - "type": "array" + "page_size": { + "default": 1000, + "description": "Max files per page (default 1000).", + "minimum": 1, + "title": "Page Size", + "type": "integer" }, - "environment_variables": { - "additionalProperties": { - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/SecretReferenceV1" - } - ] - }, - "description": "Environment variables to set in the runtime.", - "examples": [ - { - "API_KEY": "your_api_key_here", - "PATH": "/usr/bin" - } - ], - "title": "Environment Variables", - "type": "object" + "page_token": { + "default": 0, + "description": "Offset into the file list (default 0).", + "minimum": 0, + "title": "Page Token", + "type": "integer" + } + }, + "title": "GetTrainingJobCheckpointFilesRequestV1", + "type": "object" + }, + "AuthCodeV1": { + "description": "Authentication code for a training job interactive session node.", + "properties": { + "session_id": { + "description": "Unique identifier of the interactive session.", + "title": "Session Id", + "type": "string" }, - "artifacts": { - "description": "Runtime artifacts for the training job.", - "items": { - "$ref": "#/components/schemas/CreateTrainingJobS3Artifact" - }, - "title": "Artifacts", - "type": "array" + "replica_id": { + "description": "Replica identifier in gXXrY format (e.g., 'g00r0' for group 0, replica 0).", + "title": "Replica Id", + "type": "string" }, - "enable_cache": { + "auth_code": { + "description": "The device authentication code (e.g., '4F64-C0D9').", + "title": "Auth Code", + "type": "string" + }, + "auth_url": { + "description": "URL where the user should enter the auth code (e.g., 'https://github.com/login/device').", + "title": "Auth Url", + "type": "string" + }, + "generated_at": { "anyOf": [ { - "type": "boolean" + "format": "date-time", + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Deprecated. Use cache_config instead.", - "examples": [ - true - ], - "title": "Enable Cache" + "description": "When the auth code was generated, in ISO 8601 format.", + "title": "Generated At" }, - "cache_config": { + "tunnel_name": { "anyOf": [ { - "$ref": "#/components/schemas/CreateTrainingJobCacheConfig" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Configuration for the read-write cache.", - "examples": [ - { - "enable_legacy_hf_mount": true, - "enabled": true, - "mount_base_path": "/root/.cache", - "require_cache_affinity": true - } - ] + "description": "The name of the tunnel node.", + "title": "Tunnel Name" }, - "checkpointing_config": { - "$ref": "#/components/schemas/CreateTrainingJobCheckpointingConfig", - "description": "Configuration for checkpointing.", - "examples": [ + "expires_at": { + "anyOf": [ { - "checkpoint_path": "/mnt/ckpts", - "enabled": true, - "volume_size_gib": null + "format": "date-time", + "type": "string" + }, + { + "type": "null" } - ] + ], + "default": null, + "description": "When the session expires, in ISO 8601 format.", + "title": "Expires At" }, - "load_checkpoint_config": { + "working_directory": { "anyOf": [ { - "$ref": "#/components/schemas/LoadCheckpointConfig" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Configuration for loading checkpoints" + "description": "The working directory of the session.", + "title": "Working Directory" } }, - "title": "CreateTrainingJobRuntimeV1", + "required": [ + "session_id", + "replica_id", + "auth_code", + "auth_url" + ], + "title": "AuthCodeV1", "type": "object" }, - "CreateTrainingJobS3Artifact": { + "GetAuthCodesResponseV1": { + "description": "Response containing auth codes for all nodes of a training job's interactive sessions.", "properties": { - "s3_bucket": { - "description": "S3 bucket for the uploaded runtime artifact.", - "examples": [ - "my-s3-bucket" + "auth_codes": { + "description": "List of auth codes for each node that has an active interactive session.", + "items": { + "$ref": "#/components/schemas/AuthCodeV1" + }, + "title": "Auth Codes", + "type": "array" + } + }, + "required": [ + "auth_codes" + ], + "title": "GetAuthCodesResponseV1", + "type": "object" + }, + "PatchInteractiveSessionRequestV1": { + "description": "Request to patch an interactive session.\n\nOnly fields that are provided (non-None) will be applied.", + "properties": { + "timeout_minutes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } ], - "title": "S3 Bucket", - "type": "string" + "default": null, + "description": "For on_startup sessions, minutes to add to the expiration. For on_demand/on_failure sessions, minutes to add to the timeout. Use -1 for infinite timeout (bumps by 10 years).", + "title": "Timeout Minutes" }, - "s3_key": { - "description": "S3 key for the uploaded runtime artifact.", - "examples": [ - "my-s3-key" + "trigger": { + "anyOf": [ + { + "$ref": "#/components/schemas/V1InteractiveSessionTrigger" + }, + { + "type": "null" + } ], - "title": "S3 Key", - "type": "string" + "default": null, + "description": "Update when the interactive session is created. Cannot be changed if the session trigger is 'on_startup'." } }, - "required": [ - "s3_bucket", - "s3_key" - ], - "title": "CreateTrainingJobS3Artifact", + "title": "PatchInteractiveSessionRequestV1", "type": "object" }, - "CreateTrainingJobV1": { - "description": "Configuration for a training job.", + "InteractiveSessionV1": { + "description": "Representation of a training job interactive session.", "properties": { - "image": { - "$ref": "#/components/schemas/CreateTrainingJobImageV1", - "examples": [ + "id": { + "description": "Unique identifier of the interactive session.", + "title": "Id", + "type": "string" + }, + "trigger": { + "description": "When the interactive session is created.", + "title": "Trigger", + "type": "string" + }, + "timeout_minutes": { + "description": "Minutes before the session times out.", + "title": "Timeout Minutes", + "type": "integer" + }, + "session_provider": { + "description": "The IDE client for the session.", + "title": "Session Provider", + "type": "string" + }, + "auth_provider": { + "description": "The authentication provider for the session.", + "title": "Auth Provider", + "type": "string" + }, + "pod_name": { + "description": "Pod name / node rank for the session.", + "title": "Pod Name", + "type": "string" + }, + "expires_at": { + "anyOf": [ + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "When the session expires, in ISO 8601 format.", + "title": "Expires At" + }, + "tunnel_name": { + "anyOf": [ { - "base_image": "hello-world", - "docker_auth": null + "type": "string" + }, + { + "type": "null" } - ] + ], + "default": null, + "description": "The tunnel name for the session.", + "title": "Tunnel Name" }, - "compute": { - "$ref": "#/components/schemas/CreateTrainingJobComputeV1", - "examples": [ + "auth_code": { + "anyOf": [ { - "accelerator": { - "accelerator": "H100", - "count": 2 - }, - "cpu_count": 1, - "memory": "2Gi", - "node_count": 1 + "type": "string" + }, + { + "type": "null" } - ] + ], + "default": null, + "description": "The device authentication code.", + "title": "Auth Code" }, - "runtime": { - "$ref": "#/components/schemas/CreateTrainingJobRuntimeV1", - "description": "Configuration for the runtime environment of the training job.", - "examples": [ + "auth_url": { + "anyOf": [ { - "artifacts": [], - "cache_config": null, - "checkpointing_config": { - "checkpoint_path": null, - "enabled": false, - "volume_size_gib": null - }, - "enable_cache": null, - "environment_variables": { - "API_KEY": "your_api_key_here", - "PATH": "/usr/bin" - }, - "load_checkpoint_config": null, - "start_commands": [ - "python main.py" - ] + "type": "string" + }, + { + "type": "null" } - ] + ], + "default": null, + "description": "URL where the user should enter the auth code.", + "title": "Auth Url" }, - "name": { + "auth_code_generated_at": { "anyOf": [ { + "format": "date-time", "type": "string" }, { @@ -6833,203 +11051,297 @@ } ], "default": null, - "description": "Name of the training job.", - "examples": [ - "gpt-oss-job" - ], - "title": "Name" + "description": "When the auth code was generated.", + "title": "Auth Code Generated At" }, - "truss_user_env": { + "authenticated_at": { "anyOf": [ { - "$ref": "#/components/schemas/TrussUserEnv" + "format": "date-time", + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Truss user environment information" + "description": "When the session was authenticated.", + "title": "Authenticated At" }, - "interactive_session": { + "working_directory": { "anyOf": [ { - "$ref": "#/components/schemas/InteractiveSessionConfigV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Configuration for interactive debugging sessions." + "description": "The working directory of the session.", + "title": "Working Directory" + } + }, + "required": [ + "id", + "trigger", + "timeout_minutes", + "session_provider", + "auth_provider", + "pod_name" + ], + "title": "InteractiveSessionV1", + "type": "object" + }, + "PatchInteractiveSessionResponseV1": { + "description": "Response after patching an interactive session.", + "properties": { + "interactive_session": { + "$ref": "#/components/schemas/InteractiveSessionV1", + "description": "The updated interactive session." }, - "weights": { - "description": "MDN weight sources to mount in the training container. Weights are mirrored and cached for fast startup.", - "examples": [ - [ - { - "allow_patterns": null, - "auth": null, - "auth_secret_name": null, - "ignore_patterns": null, - "mount_location": "/app/models/base", - "source": "hf://meta-llama/Llama-3-8B@main" - } - ] - ], + "message": { + "description": "Human-readable summary of what was updated.", + "title": "Message", + "type": "string" + } + }, + "required": [ + "interactive_session", + "message" + ], + "title": "PatchInteractiveSessionResponseV1", + "type": "object" + }, + "FileSummary": { + "description": "Information about a file in the cache.", + "properties": { + "path": { + "description": "Relative path of the file in the cache", + "title": "Path", + "type": "string" + }, + "size_bytes": { + "description": "Size of the file in bytes", + "title": "Size Bytes", + "type": "integer" + }, + "modified": { + "description": "Last modification time of the file", + "title": "Modified", + "type": "string" + }, + "file_type": { + "description": "Type of the file", + "title": "File Type", + "type": "string" + }, + "permissions": { + "description": "Permissions of the file", + "title": "Permissions", + "type": "string" + } + }, + "required": [ + "path", + "size_bytes", + "modified", + "file_type", + "permissions" + ], + "title": "FileSummary", + "type": "object" + }, + "GetCacheSummaryResponseV1": { + "description": "Response for getting cache summary.", + "properties": { + "timestamp": { + "description": "Timestamp when the cache summary was captured", + "title": "Timestamp", + "type": "string" + }, + "project_id": { + "description": "Project ID associated with the cache", + "title": "Project Id", + "type": "string" + }, + "file_summaries": { + "description": "List of files in the cache", "items": { - "$ref": "#/components/schemas/CreateJobWeightConfigV1" + "$ref": "#/components/schemas/FileSummary" }, - "title": "Weights", + "title": "File Summaries", "type": "array" } }, "required": [ - "image" + "timestamp", + "project_id", + "file_summaries" + ], + "title": "GetCacheSummaryResponseV1", + "type": "object" + }, + "TrainingProjectTombstoneV1": { + "description": "A training project tombstone.", + "properties": { + "id": { + "description": "Unique identifier of the training project", + "title": "Id", + "type": "string" + }, + "deleted": { + "description": "Whether the training project was deleted", + "title": "Deleted", + "type": "boolean" + } + }, + "required": [ + "id", + "deleted" + ], + "title": "TrainingProjectTombstoneV1", + "type": "object" + }, + "GetTrainingProjectResponseV1": { + "description": "A response to getting a training project.", + "properties": { + "training_project": { + "$ref": "#/components/schemas/TrainingProjectV1", + "description": "The training project." + } + }, + "required": [ + "training_project" + ], + "title": "GetTrainingProjectResponseV1", + "type": "object" + }, + "OrderByV1": { + "description": "A request to order training jobs.", + "properties": { + "field": { + "description": "The field to order by.", + "examples": [ + "created_at" + ], + "title": "Field", + "type": "string" + }, + "order": { + "description": "The direction to order by.", + "examples": [ + "asc", + "desc" + ], + "title": "Order", + "type": "string" + } + }, + "required": [ + "field", + "order" ], - "title": "CreateTrainingJobV1", + "title": "OrderByV1", "type": "object" }, - "DockerAuthType": { - "enum": [ - "GCP_SERVICE_ACCOUNT_JSON", - "AWS_IAM", - "AWS_OIDC", - "GCP_OIDC", - "REGISTRY_SECRET" - ], - "title": "DockerAuthType", - "type": "string" - }, - "DockerAuthV1": { - "description": "Docker authentication credentials.", + "SearchTrainingJobsRequestV1": { + "description": "A request to search training jobs.", "properties": { - "registry": { - "description": "Registry to authenticate with", - "title": "Registry", - "type": "string" - }, - "auth_method": { - "$ref": "#/components/schemas/DockerAuthType", - "description": "Method to authenticate with the registry", - "examples": [ - "GCP_SERVICE_ACCOUNT_JSON", - "AWS_IAM", - "AWS_OIDC", - "GCP_OIDC", - "REGISTRY_SECRET" - ] - }, - "gcp_service_account_json_docker_auth": { + "project_id": { "anyOf": [ { - "$ref": "#/components/schemas/GcpServiceAccountJsonDockerAuthV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "GCP service account details for the registry" - }, - "aws_iam_docker_auth": { - "anyOf": [ - { - "$ref": "#/components/schemas/AwsIamDockerAuthV1" - }, - { - "type": "null" - } + "description": "Filter the training jobs by project ID.", + "examples": [ + "n4q95w5" ], - "default": null, - "description": "AWS details for the registry" + "title": "Project Id" }, - "aws_oidc_docker_auth": { + "job_id": { "anyOf": [ { - "$ref": "#/components/schemas/AwsOidcDockerAuthV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "AWS OIDC details for the registry" + "description": "Filter the training jobs by job ID.", + "examples": [ + "p7qr9qv" + ], + "title": "Job Id" }, - "gcp_oidc_docker_auth": { + "statuses": { "anyOf": [ { - "$ref": "#/components/schemas/GcpOidcDockerAuthV1" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "GCP OIDC details for the registry" + "description": "Filter the training jobs by status.", + "examples": [ + [ + "TRAINING_JOB_RUNNING", + "TRAINING_JOB_COMPLETED" + ] + ], + "title": "Statuses" }, - "registry_secret_docker_auth": { - "anyOf": [ - { - "$ref": "#/components/schemas/RegistrySecretDockerAuthV1" - }, + "order_by": { + "default": [ { - "type": "null" + "field": "created_at", + "order": "desc" } ], - "default": null, - "description": "Required when auth_method is REGISTRY_SECRET. Supports any Docker registry (Docker Hub, GHCR, NGC, etc.) via username:password credentials stored as a Baseten secret." - } - }, - "required": [ - "registry", - "auth_method" - ], - "title": "DockerAuthV1", - "type": "object" - }, - "GcpOidcDockerAuthV1": { - "description": "GCP OIDC details for the registry.", - "properties": { - "service_account": { - "description": "GCP service account name for OIDC authentication", - "title": "Service Account", - "type": "string" - }, - "workload_identity_provider": { - "description": "GCP workload identity provider for OIDC authentication", - "title": "Workload Identity Provider", - "type": "string" + "description": "Order the training jobs by a field. Currently supports created_at", + "items": { + "$ref": "#/components/schemas/OrderByV1" + }, + "title": "Order By", + "type": "array" } }, - "required": [ - "service_account", - "workload_identity_provider" - ], - "title": "GcpOidcDockerAuthV1", + "title": "SearchTrainingJobsRequestV1", "type": "object" }, - "GcpServiceAccountJsonDockerAuthV1": { - "description": "GCP details for the registry.", + "SearchTrainingJobsResponseV1": { + "description": "A response to search training jobs.", "properties": { - "service_account_json_secret_ref": { - "$ref": "#/components/schemas/SecretReferenceV1", - "description": "Name of the service account secret" + "training_jobs": { + "description": "List of training jobs.", + "items": { + "$ref": "#/components/schemas/TrainingJobV1" + }, + "title": "Training Jobs", + "type": "array" } }, "required": [ - "service_account_json_secret_ref" + "training_jobs" ], - "title": "GcpServiceAccountJsonDockerAuthV1", + "title": "SearchTrainingJobsResponseV1", "type": "object" }, - "GitInfo": { + "SearchTrainersRequestV1": { + "description": "Filters for searching trainers visible to the requesting user.", "properties": { - "latest_commit_sha": { - "title": "Latest Commit Sha", - "type": "string" - }, - "latest_tag": { + "trainer_id": { "anyOf": [ { "type": "string" @@ -7038,153 +11350,109 @@ "type": "null" } ], - "title": "Latest Tag" - }, - "commits_since_tag": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "title": "Commits Since Tag" - }, - "has_uncommitted_changes": { - "title": "Has Uncommitted Changes", - "type": "boolean" - } - }, - "required": [ - "latest_commit_sha", - "latest_tag", - "commits_since_tag", - "has_uncommitted_changes" - ], - "title": "GitInfo", - "type": "object" - }, - "InteractiveSessionConfigV1": { - "description": "Configuration for interactive debugging sessions on training jobs.", - "properties": { - "trigger": { - "$ref": "#/components/schemas/V1InteractiveSessionTrigger", - "default": "on_demand", - "description": "When to create the interactive session. 'on_startup' creates on job start, 'on_failure' creates on job failure, 'on_demand' bypasses automatic session creation." - }, - "timeout_minutes": { - "default": 480, - "description": "Number of minutes before the interactive session times out.", + "default": null, + "description": "Filter by trainer ID.", "examples": [ - 480, - 1440, - 10080 + "k4q95w5" ], - "title": "Timeout Minutes", - "type": "integer" - }, - "session_provider": { - "$ref": "#/components/schemas/V1InteractiveSessionProvider", - "default": "vs_code", - "description": "The IDE client for the interactive session." - }, - "auth_provider": { - "$ref": "#/components/schemas/V1InteractiveSessionAuthProvider", - "default": "github", - "description": "The authentication provider for the interactive session." + "title": "Trainer Id" } }, - "title": "InteractiveSessionConfigV1", + "title": "SearchTrainersRequestV1", "type": "object" }, - "LoadCheckpointConfig": { + "SearchedTrainerV1": { + "description": "Trainer entry returned by /v1/trainers/search.", "properties": { - "enabled": { - "default": false, - "description": "Whether checkpoint loading is enabled", - "title": "Enabled", - "type": "boolean" + "trainer_id": { + "description": "The trainer ID.", + "title": "Trainer Id", + "type": "string" }, - "download_folder": { - "default": "/tmp/loaded_checkpoints", - "description": "Folder where checkpoints will be downloaded", - "title": "Download Folder", + "session_id": { + "description": "The session ID this trainer belongs to.", + "title": "Session Id", "type": "string" }, - "checkpoints": { - "description": "List of checkpoint configurations", - "items": { - "discriminator": { - "mapping": { - "baseten_latest_checkpoint": "#/components/schemas/BasetenLatestCheckpointConfig", - "baseten_named_checkpoint": "#/components/schemas/BasetenNamedCheckpointConfig" - }, - "propertyName": "typ" - }, - "oneOf": [ - { - "$ref": "#/components/schemas/BasetenLatestCheckpointConfig" - }, - { - "$ref": "#/components/schemas/BasetenNamedCheckpointConfig" - } - ] - }, - "title": "Checkpoints", - "type": "array" + "base_model": { + "description": "The HuggingFace base model the trainer is fine-tuning.", + "title": "Base Model", + "type": "string" } }, - "title": "LoadCheckpointConfig", + "required": [ + "trainer_id", + "session_id", + "base_model" + ], + "title": "SearchedTrainerV1", "type": "object" }, - "RegistrySecretDockerAuthV1": { - "description": "Authentication via a Baseten secret for any Docker registry (Docker Hub, GHCR, NGC, etc.).\nThe referenced secret must contain credentials in the format 'username:password'.\nFor Docker Hub, set registry to 'https://index.docker.io/v1/'. For GHCR, use 'ghcr.io'.", - "properties": { - "secret_ref": { - "$ref": "#/components/schemas/SecretReferenceV1", - "description": "Reference to a Baseten secret containing credentials in the format 'username:password'" + "SearchTrainersResponseV1": { + "description": "Trainers matching the search filters.", + "properties": { + "trainers": { + "description": "List of trainers.", + "items": { + "$ref": "#/components/schemas/SearchedTrainerV1" + }, + "title": "Trainers", + "type": "array" } }, "required": [ - "secret_ref" + "trainers" ], - "title": "RegistrySecretDockerAuthV1", + "title": "SearchTrainersResponseV1", "type": "object" }, - "SecretReferenceV1": { + "CheckpointSearchRequestV1": { + "description": "Lookup body for ``POST /v1/trainers/checkpoints/search``.", "properties": { - "name": { - "description": "Name of the secret to reference.", + "checkpoint_path": { + "description": "bt:// URI of a trainer checkpoint. Form: bt://loops:/(weights|sampler_weights)/.", "examples": [ - "hf_token" + "bt://loops:k4q95w5/sampler_weights/step-100" ], - "title": "Name", + "title": "Checkpoint Path", "type": "string" } }, "required": [ - "name" + "checkpoint_path" ], - "title": "SecretReferenceV1", + "title": "CheckpointSearchRequestV1", "type": "object" }, - "TrussUserEnv": { - "description": "This data models is used to flexibly store info alongside oracle versions.\n\nThere is a corresponding data model in the truss client.\nIn contrast, here all fields are optional for backwards compatibility with old\nclients.", + "TrainerCheckpointTarget": { + "description": "Whether a TrainerServerCheckpoint is loadable by the sampler or the trainer.\n\nSAMPLER checkpoints are consumed by the sampling server for inference;\nTRAINER checkpoints capture full trainer state for resuming training.\nMirrored in the bt:// URI as\n``bt://loops:/(sampler_weights|weights)/``.", + "enum": [ + "sampler", + "trainer" + ], + "title": "TrainerCheckpointTarget", + "type": "string" + }, + "TrainerServerCheckpointV1": { + "description": "A checkpoint saved by a trainer.", "properties": { - "truss_client_version": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Truss Client Version" + "checkpoint_id": { + "description": "The ID of the checkpoint.", + "title": "Checkpoint Id", + "type": "string" }, - "python_version": { + "created_at": { + "description": "The timestamp of the checkpoint in ISO 8601 format.", + "format": "date-time", + "title": "Created At", + "type": "string" + }, + "checkpoint_type": { + "description": "The type of checkpoint.", + "title": "Checkpoint Type", + "type": "string" + }, + "base_model": { "anyOf": [ { "type": "string" @@ -7193,22 +11461,28 @@ "type": "null" } ], - "default": null, - "title": "Python Version" + "description": "The base model of the checkpoint.", + "title": "Base Model" }, - "pydantic_version": { + "lora_adapter_config": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "default": null, - "title": "Pydantic Version" + "description": "The adapter config of the checkpoint.", + "title": "Lora Adapter Config" }, - "mypy_version": { + "size_bytes": { + "description": "The size of the checkpoint in bytes.", + "title": "Size Bytes", + "type": "integer" + }, + "sync_status": { "anyOf": [ { "type": "string" @@ -7218,181 +11492,237 @@ } ], "default": null, - "title": "Mypy Version" + "description": "Sync state of the checkpoint: SYNCING or COMPLETE.", + "title": "Sync Status" }, - "is_library_deployment": { - "default": false, - "title": "Is Library Deployment", - "type": "boolean" + "id": { + "description": "The TrainerServerCheckpoint database ID.", + "title": "Id", + "type": "string" }, - "is_frontend_deployment": { - "default": false, - "title": "Is Frontend Deployment", - "type": "boolean" + "trainer_id": { + "description": "The ID of the trainer.", + "title": "Trainer Id", + "type": "string" }, - "git_info": { + "target": { + "$ref": "#/components/schemas/TrainerCheckpointTarget", + "description": "Whether this checkpoint is loadable by the sampler ('sampler') or by the trainer ('trainer')." + } + }, + "required": [ + "checkpoint_id", + "created_at", + "checkpoint_type", + "base_model", + "lora_adapter_config", + "size_bytes", + "id", + "trainer_id", + "target" + ], + "title": "TrainerServerCheckpointV1", + "type": "object" + }, + "CheckpointSearchResponseV1": { + "description": "Response for ``POST /v1/trainers/checkpoints/search``.", + "properties": { + "checkpoint": { + "$ref": "#/components/schemas/TrainerServerCheckpointV1", + "description": "The checkpoint metadata (id, target, type, sizes, etc.)." + } + }, + "required": [ + "checkpoint" + ], + "title": "CheckpointSearchResponseV1", + "type": "object" + }, + "CreateTrainerSessionRequestV1": { + "properties": { + "training_project_id": { "anyOf": [ { - "$ref": "#/components/schemas/GitInfo" + "type": "string" }, { "type": "null" } ], - "default": null + "default": null, + "description": "ID of the training project to associate with. If omitted, a default project is created for the org.", + "title": "Training Project Id" } }, - "title": "TrussUserEnv", + "title": "CreateTrainerSessionRequestV1", "type": "object" }, - "V1InteractiveSessionAuthProvider": { - "enum": [ - "github", - "microsoft" - ], - "title": "V1InteractiveSessionAuthProvider", - "type": "string" - }, - "V1InteractiveSessionProvider": { - "enum": [ - "vs_code", - "cursor" - ], - "title": "V1InteractiveSessionProvider", - "type": "string" - }, - "V1InteractiveSessionTrigger": { - "enum": [ - "on_startup", - "on_failure", - "on_demand" - ], - "title": "V1InteractiveSessionTrigger", - "type": "string" - }, - "CreateTrainingJobRequestV1": { - "description": "A request to create a training job.", + "TrainerSessionV1": { "properties": { - "training_job": { - "$ref": "#/components/schemas/CreateTrainingJobV1", - "description": "The training job to create." + "id": { + "title": "Id", + "type": "string" } }, "required": [ - "training_job" + "id" ], - "title": "CreateTrainingJobRequestV1", + "title": "TrainerSessionV1", "type": "object" }, - "CreateTrainingJobResponseV1": { - "description": "A response to creating a training job.", + "CreateTrainerSessionResponseV1": { "properties": { - "training_job": { - "$ref": "#/components/schemas/TrainingJobV1", - "description": "The created training job." + "session": { + "$ref": "#/components/schemas/TrainerSessionV1" } }, "required": [ - "training_job" + "session" ], - "title": "CreateTrainingJobResponseV1", + "title": "CreateTrainerSessionResponseV1", "type": "object" }, - "TrainingJobTombstoneV1": { - "description": "A training job tombstone.", + "CreateTrainerServerRequestV1": { "properties": { - "id": { - "description": "Unique identifier of the training job", - "title": "Id", + "model": { + "description": "Base model ID (e.g. 'Qwen/Qwen3-8B').", + "title": "Model", "type": "string" }, - "deleted": { - "description": "Whether the training job was deleted", - "title": "Deleted", - "type": "boolean" + "max_seq_len": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Maximum sequence length for training. Defaults to the maximum supported by the model configuration.", + "title": "Max Seq Len" }, - "training_project_id": { - "description": "Unique identifier of the training project", - "title": "Training Project Id", - "type": "string" + "lora_rank": { + "default": 64, + "description": "LoRA rank.", + "title": "Lora Rank", + "type": "integer" + }, + "seed": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Random seed for reproducibility.", + "title": "Seed" + }, + "scale_down_delay_seconds": { + "default": 3600, + "description": "Seconds of inactivity before the trainer scales to zero. Must be positive. Defaults to 3600 (1 hour).", + "exclusiveMinimum": 0, + "title": "Scale Down Delay Seconds", + "type": "integer" + }, + "checkpoint_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional bt:// URI of an existing trainer-target checkpoint to resume training from. Form: bt://loops:/weights/.", + "examples": [ + "bt://loops:k4q95w5/weights/step-100" + ], + "title": "Checkpoint Path" } }, "required": [ - "id", - "deleted", - "training_project_id" + "model" ], - "title": "TrainingJobTombstoneV1", + "title": "CreateTrainerServerRequestV1", "type": "object" }, - "GetTrainingJobResponseV1": { - "description": "A response to fetch a training job.", + "SamplingServerV1": { "properties": { - "training_project": { - "$ref": "#/components/schemas/TrainingProjectV1", - "description": "The training project." + "id": { + "title": "Id", + "type": "string" }, - "training_job": { - "$ref": "#/components/schemas/TrainingJobV1", - "description": "The fetched training job." + "base_url": { + "title": "Base Url", + "type": "string" + }, + "model_id": { + "description": "Hashid of the underlying Baseten model.", + "title": "Model Id", + "type": "string" + }, + "deployment_id": { + "description": "Hashid of the specific model deployment (version).", + "title": "Deployment Id", + "type": "string" } }, "required": [ - "training_project", - "training_job" + "id", + "base_url", + "model_id", + "deployment_id" ], - "title": "GetTrainingJobResponseV1", + "title": "SamplingServerV1", "type": "object" }, - "DownloadTrainingJobResponseV1": { - "description": "A response that includes the artifacts for a training job", + "TrainerServerV1": { "properties": { - "artifact_presigned_urls": { - "description": "Presigned URL's for the artifacts", - "items": { - "type": "string" - }, - "title": "Artifact Presigned Urls", - "type": "array" + "id": { + "title": "Id", + "type": "string" + }, + "base_url": { + "title": "Base Url", + "type": "string" + }, + "sampling_server": { + "$ref": "#/components/schemas/SamplingServerV1" } }, "required": [ - "artifact_presigned_urls" + "id", + "base_url", + "sampling_server" ], - "title": "DownloadTrainingJobResponseV1", + "title": "TrainerServerV1", "type": "object" }, - "RecreateTrainingJobResponseV1": { - "description": "A response that sends the new training job", + "CreateTrainerServerResponseV1": { "properties": { - "training_job": { - "$ref": "#/components/schemas/TrainingJobV1", - "description": "The created training job." + "trainer_server": { + "$ref": "#/components/schemas/TrainerServerV1" } }, "required": [ - "training_job" + "trainer_server" ], - "title": "RecreateTrainingJobResponseV1", + "title": "CreateTrainerServerResponseV1", "type": "object" }, - "GetTrainingJobLogsRequestV1": { - "description": "A request to fetch training logs.", + "CreateSamplingServerRequestV1": { "properties": { - "start_epoch_millis": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Epoch millis timestamp to start fetching logs", - "title": "Start Epoch Millis" + "model": { + "description": "Model to use for standalone samplers (eg, for baselines).", + "title": "Model", + "type": "string" }, - "end_epoch_millis": { + "max_seq_length": { "anyOf": [ { "type": "integer" @@ -7402,57 +11732,80 @@ } ], "default": null, - "description": "Epoch millis timestamp to end fetching logs", - "title": "End Epoch Millis" + "description": "Maximum prompt length (in tokens) the sampler must handle. Set this to the longest prompt you plan to send. Omit to use the default for the base model.", + "title": "Max Seq Length" }, - "direction": { + "checkpoint_path": { "anyOf": [ { - "$ref": "#/components/schemas/SortOrderV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Sort order for logs" - }, - "limit": { - "anyOf": [ - { - "maximum": 1000, - "minimum": 1, - "type": "integer" - }, - { - "type": "null" - } + "description": "Optional bt:// URI of an existing sampler-target checkpoint to load weights from on startup. Form: bt://loops:/sampler_weights/.", + "examples": [ + "bt://loops:k4q95w5/sampler_weights/step-100" ], - "default": 500, - "description": "Limit of logs to fetch in a single request", - "title": "Limit" + "title": "Checkpoint Path" } }, - "title": "GetTrainingJobLogsRequestV1", + "required": [ + "model" + ], + "title": "CreateSamplingServerRequestV1", "type": "object" }, - "GetTrainingJobMetricsRequestV1": { - "description": "A request to fetch metrics. Allows the user to request metrics over a period of time.", + "CreateSamplingServerResponseV1": { "properties": { - "end_epoch_millis": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Epoch millis timestamp to end fetching metrics", - "title": "End Epoch Millis" + "sampling_server": { + "$ref": "#/components/schemas/SamplingServerV1" + } + }, + "required": [ + "sampling_server" + ], + "title": "CreateSamplingServerResponseV1", + "type": "object" + }, + "GetTrainerServerCheckpointsResponseV1": { + "description": "A response to list checkpoints for a trainer.", + "properties": { + "trainer_id": { + "description": "The ID of the trainer.", + "title": "Trainer Id", + "type": "string" }, - "start_epoch_millis": { + "checkpoints": { + "description": "The checkpoints for the trainer.", + "items": { + "$ref": "#/components/schemas/TrainerServerCheckpointV1" + }, + "title": "Checkpoints", + "type": "array" + } + }, + "required": [ + "trainer_id", + "checkpoints" + ], + "title": "GetTrainerServerCheckpointsResponseV1", + "type": "object" + }, + "GetTrainerServerCheckpointFilesResponseV1": { + "description": "A response to fetch presigned URLs for files under a trainer server checkpoint.", + "properties": { + "presigned_urls": { + "description": "List of presigned URLs for checkpoint files.", + "items": { + "$ref": "#/components/schemas/CheckpointFile" + }, + "title": "Presigned Urls", + "type": "array" + }, + "next_page_token": { "anyOf": [ { "type": "integer" @@ -7462,263 +11815,262 @@ } ], "default": null, - "description": "Epoch millis timestamp to start fetching metrics.", - "title": "Start Epoch Millis" + "description": "Token to use for fetching the next page of results. None when there are no more results.", + "title": "Next Page Token" + }, + "total_count": { + "description": "Total number of checkpoint files available.", + "title": "Total Count", + "type": "integer" } }, - "title": "GetTrainingJobMetricsRequestV1", + "required": [ + "presigned_urls", + "total_count" + ], + "title": "GetTrainerServerCheckpointFilesResponseV1", "type": "object" }, - "StorageMetricsV1": { - "description": "A metric for a training job.", + "SupportedModelV1": { + "description": "A model supported by the Loops server.", "properties": { - "usage_bytes": { - "description": "The number of bytes used on the storage entity.", - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "title": "Usage Bytes", - "type": "array" + "model_name": { + "description": "The name of the supported model.", + "title": "Model Name", + "type": "string" }, - "utilization": { - "description": "The utilization of the storage entity as a decimal percentage.", + "max_context_length": { + "description": "The maximum context length (in tokens) supported by this model.", + "title": "Max Context Length", + "type": "integer" + } + }, + "required": [ + "model_name", + "max_context_length" + ], + "title": "SupportedModelV1", + "type": "object" + }, + "GetLoopsCapabilitiesResponseV1": { + "description": "Response for ``GET /v1/loops/capabilities``.", + "properties": { + "supported_models": { + "description": "List of models available on the server.", "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" + "$ref": "#/components/schemas/SupportedModelV1" }, - "title": "Utilization", + "title": "Supported Models", "type": "array" } }, "required": [ - "usage_bytes", - "utilization" + "supported_models" ], - "title": "StorageMetricsV1", + "title": "GetLoopsCapabilitiesResponseV1", "type": "object" }, - "TrainingJobMetricV1": { - "description": "A metric for a training job.", + "CreateLoopsSessionRequestV1": { "properties": { - "value": { - "description": "The value of the metric.", - "title": "Value", - "type": "number" - }, - "timestamp": { - "description": "The timestamp of the metric in ISO 8601 format.", - "format": "date-time", - "title": "Timestamp", + "training_project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "ID of the training project to associate with. If omitted, a default project is created for the org.", + "title": "Training Project Id" + } + }, + "title": "CreateLoopsSessionRequestV1", + "type": "object" + }, + "LoopsSessionV1": { + "properties": { + "id": { + "title": "Id", "type": "string" } }, "required": [ - "value", - "timestamp" + "id" ], - "title": "TrainingJobMetricV1", + "title": "LoopsSessionV1", "type": "object" }, - "TrainingJobMetricsV1": { + "CreateLoopsSessionResponseV1": { "properties": { - "gpu_memory_usage_bytes": { - "additionalProperties": { - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "type": "array" - }, - "description": "A map of GPU rank to memory usage for the training job. For multinode jobs, this is the memory usage of the leader unless specified otherwise.", - "title": "Gpu Memory Usage Bytes", - "type": "object" - }, - "gpu_utilization": { - "additionalProperties": { - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "type": "array" - }, - "description": "A map of GPU rank to fractional GPU utilization. For multinode jobs, this is the GPU utilization of the leader unless specified otherwise.", - "title": "Gpu Utilization", - "type": "object" - }, - "cpu_usage": { - "description": "The CPU usage measured in cores. For multinode jobs, this is the CPU usage of the leader unless specified otherwise.", - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "title": "Cpu Usage", - "type": "array" - }, - "cpu_memory_usage_bytes": { - "description": "The CPU memory usage for the training job. For multinode jobs, this is the CPU memory usage of the leader unless specified otherwise.", - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "title": "Cpu Memory Usage Bytes", - "type": "array" - }, - "ephemeral_storage": { - "$ref": "#/components/schemas/StorageMetricsV1", - "description": "The storage usage for the ephemeral storage. For multinode jobs, this is the ephemeral storage usage of the leader unless specified otherwise." + "session": { + "$ref": "#/components/schemas/LoopsSessionV1" } }, "required": [ - "gpu_memory_usage_bytes", - "gpu_utilization", - "cpu_usage", - "cpu_memory_usage_bytes", - "ephemeral_storage" + "session" ], - "title": "TrainingJobMetricsV1", + "title": "CreateLoopsSessionResponseV1", "type": "object" }, - "TrainingJobNodeMetricsV1": { - "description": "A set of metrics for a training job node.", + "GetLoopsSessionResponseV1": { + "description": "Response for ``GET /v1/loops/sessions/``.", "properties": { - "node_id": { - "description": "The name of the node.", - "title": "Node Id", - "type": "string" - }, - "metrics": { - "$ref": "#/components/schemas/TrainingJobMetricsV1", - "description": "The metrics for the node." + "session": { + "$ref": "#/components/schemas/LoopsSessionV1", + "description": "The Loops session." } }, "required": [ - "node_id", - "metrics" + "session" ], - "title": "TrainingJobNodeMetricsV1", + "title": "GetLoopsSessionResponseV1", "type": "object" }, - "GetTrainingJobMetricsResponseV1": { - "description": "A response to fetch training job metrics. The outer list for each metric represents that metric across time.", - "properties": { - "gpu_memory_usage_bytes": { - "additionalProperties": { - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "type": "array" - }, - "description": "A map of GPU rank to memory usage for the training job. For multinode jobs, this is the memory usage of the leader unless specified otherwise.", - "title": "Gpu Memory Usage Bytes", - "type": "object" - }, - "gpu_utilization": { - "additionalProperties": { - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "type": "array" - }, - "description": "A map of GPU rank to fractional GPU utilization. For multinode jobs, this is the GPU utilization of the leader unless specified otherwise.", - "title": "Gpu Utilization", - "type": "object" - }, - "cpu_usage": { - "description": "The CPU usage measured in cores. For multinode jobs, this is the CPU usage of the leader unless specified otherwise.", - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "title": "Cpu Usage", - "type": "array" + "LoopsRunV1": { + "properties": { + "id": { + "description": "The run ID.", + "title": "Id", + "type": "string" }, - "cpu_memory_usage_bytes": { - "description": "The CPU memory usage for the training job. For multinode jobs, this is the CPU memory usage of the leader unless specified otherwise.", - "items": { - "$ref": "#/components/schemas/TrainingJobMetricV1" - }, - "title": "Cpu Memory Usage Bytes", - "type": "array" + "session_id": { + "description": "The session ID this run belongs to.", + "title": "Session Id", + "type": "string" }, - "ephemeral_storage": { - "$ref": "#/components/schemas/StorageMetricsV1", - "description": "The storage usage for the ephemeral storage. For multinode jobs, this is the ephemeral storage usage of the leader unless specified otherwise." + "base_model": { + "description": "The HuggingFace base model the run is fine-tuning.", + "title": "Base Model", + "type": "string" }, - "training_job": { - "$ref": "#/components/schemas/TrainingJobV1", - "description": "The training job." + "base_url": { + "description": "The run's base URL.", + "title": "Base Url", + "type": "string" }, - "cache": { - "anyOf": [ - { - "$ref": "#/components/schemas/StorageMetricsV1" - }, - { - "type": "null" - } - ], - "description": "The storage usage for the read-write cache." + "created_at": { + "description": "Time the run was created in ISO 8601 format", + "format": "date-time", + "title": "Created At", + "type": "string" }, - "per_node_metrics": { - "description": "The metrics for each node in the training job.", - "items": { - "$ref": "#/components/schemas/TrainingJobNodeMetricsV1" - }, - "title": "Per Node Metrics", - "type": "array" + "sampler": { + "$ref": "#/components/schemas/LoopsSamplerV1", + "description": "The sampler bound to this run." } }, "required": [ - "gpu_memory_usage_bytes", - "gpu_utilization", - "cpu_usage", - "cpu_memory_usage_bytes", - "ephemeral_storage", - "training_job", - "cache", - "per_node_metrics" + "id", + "session_id", + "base_model", + "base_url", + "created_at", + "sampler" ], - "title": "GetTrainingJobMetricsResponseV1", - "type": "object" - }, - "StopTrainingJobRequestV1": { - "description": "A request to stop a training job.", - "properties": {}, - "title": "StopTrainingJobRequestV1", + "title": "LoopsRunV1", "type": "object" }, - "StopTrainingJobResponseV1": { - "description": "A response to stopping a training job.", + "LoopsSamplerStatusV1": { + "description": "The current status of a Loops sampler.", "properties": { - "training_job": { - "$ref": "#/components/schemas/TrainingJobV1", - "description": "The stopped training job." + "name": { + "$ref": "#/components/schemas/DeploymentStatusV1", + "description": "The current status of the Loops sampler." } }, "required": [ - "training_job" + "name" ], - "title": "StopTrainingJobResponseV1", + "title": "LoopsSamplerStatusV1", "type": "object" }, - "TrainingJobCheckpointV1": { - "description": "A checkpoint for a training job.", + "LoopsSamplerV1": { "properties": { - "training_job_id": { - "description": "The ID of the training job.", - "title": "Training Job Id", + "id": { + "title": "Id", "type": "string" }, - "checkpoint_id": { - "description": "The ID of the checkpoint.", - "title": "Checkpoint Id", + "base_url": { + "title": "Base Url", + "type": "string" + }, + "base_model": { + "description": "The HuggingFace base model the sampler is serving.", + "title": "Base Model", "type": "string" }, "created_at": { - "description": "The timestamp of the checkpoint in ISO 8601 format.", + "description": "Time the sampler was created in ISO 8601 format", "format": "date-time", "title": "Created At", "type": "string" }, - "checkpoint_type": { - "description": "The type of checkpoint.", - "title": "Checkpoint Type", + "model_id": { + "description": "Hashid of the underlying Baseten model.", + "title": "Model Id", + "type": "string" + }, + "deployment_id": { + "description": "Hashid of the specific model deployment (version).", + "title": "Deployment Id", "type": "string" }, + "status": { + "$ref": "#/components/schemas/LoopsSamplerStatusV1", + "description": "The sampler's current status." + } + }, + "required": [ + "id", + "base_url", + "base_model", + "created_at", + "model_id", + "deployment_id", + "status" + ], + "title": "LoopsSamplerV1", + "type": "object" + }, + "ListLoopsRunsResponseV1": { + "description": "Runs matching the query filters.", + "properties": { + "runs": { + "description": "List of runs.", + "items": { + "$ref": "#/components/schemas/LoopsRunV1" + }, + "title": "Runs", + "type": "array" + } + }, + "required": [ + "runs" + ], + "title": "ListLoopsRunsResponseV1", + "type": "object" + }, + "ListLoopsRunsQueryParamsV1": { + "description": "Query-string filters for ``GET /v1/loops/runs``.\n\nBoth filters are optional and can be combined; omit both to list all\nruns visible to the caller.", + "properties": { + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Filter by run ID.", + "examples": [ + "k4q95w5" + ], + "title": "Run Id" + }, "base_model": { "anyOf": [ { @@ -7728,28 +12080,69 @@ "type": "null" } ], - "description": "The base model of the checkpoint.", + "default": null, + "description": "Filter runs by base model name.", + "examples": [ + "Qwen/Qwen3-8B" + ], "title": "Base Model" + } + }, + "title": "ListLoopsRunsQueryParamsV1", + "type": "object" + }, + "CreateLoopsRunRequestV1": { + "properties": { + "session_id": { + "description": "ID of the Loops session this run belongs to.", + "title": "Session Id", + "type": "string" }, - "lora_adapter_config": { + "base_model": { + "description": "Base model ID (e.g. 'Qwen/Qwen3-8B').", + "title": "Base Model", + "type": "string" + }, + "max_seq_len": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "integer" }, { "type": "null" } ], - "description": "The adapter config of the checkpoint.", - "title": "Lora Adapter Config" + "default": null, + "description": "Maximum prompt length (in tokens) the run must handle. Set this to the longest training example you plan to send. Defaults to the maximum supported by the model configuration.", + "title": "Max Seq Len" }, - "size_bytes": { - "description": "The size of the checkpoint in bytes.", - "title": "Size Bytes", + "lora_rank": { + "default": 64, + "description": "LoRA rank.", + "title": "Lora Rank", "type": "integer" }, - "sync_status": { + "seed": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Random seed for reproducibility.", + "title": "Seed" + }, + "scale_down_delay_seconds": { + "default": 3600, + "description": "Seconds of inactivity before the run scales to zero. Must be positive. Defaults to 3600 (1 hour).", + "exclusiveMinimum": 0, + "title": "Scale Down Delay Seconds", + "type": "integer" + }, + "path": { "anyOf": [ { "type": "string" @@ -7759,153 +12152,103 @@ } ], "default": null, - "description": "Sync state of the checkpoint: SYNCING or COMPLETE.", - "title": "Sync Status" + "description": "Optional bt:// URI of an existing checkpoint to load weights from on startup. Form: bt://loops:/weights/.", + "examples": [ + "bt://loops:k4q95w5/weights/step-100" + ], + "title": "Path" + }, + "reuse_from_session_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional Loops session ID whose trainer deployment should be reused for this run, sharing the infrastructure across sessions instead of provisioning fresh. The named session must belong to the same team. Reuse is best-effort: if the prior deployment is stopped, failed, or its sampler is unhealthy, a new deployment is provisioned instead.", + "title": "Reuse From Session Id" } }, "required": [ - "training_job_id", - "checkpoint_id", - "created_at", - "checkpoint_type", - "base_model", - "lora_adapter_config", - "size_bytes" + "session_id", + "base_model" ], - "title": "TrainingJobCheckpointV1", + "title": "CreateLoopsRunRequestV1", "type": "object" }, - "GetTrainingJobCheckpointsResponseV1": { - "description": "A response to fetch checkpoints for a training job.", + "CreateLoopsRunResponseV1": { "properties": { - "training_job": { - "$ref": "#/components/schemas/TrainingJobV1", - "description": "The training job." - }, - "checkpoints": { - "description": "The checkpoints for the training job.", - "items": { - "$ref": "#/components/schemas/TrainingJobCheckpointV1" - }, - "title": "Checkpoints", - "type": "array" + "run": { + "$ref": "#/components/schemas/LoopsRunV1" } }, "required": [ - "training_job", - "checkpoints" + "run" ], - "title": "GetTrainingJobCheckpointsResponseV1", + "title": "CreateLoopsRunResponseV1", "type": "object" }, - "CheckpointFile": { + "GetLoopsRunResponseV1": { + "description": "Response for ``GET /v1/loops/runs/``.", "properties": { - "url": { - "title": "Url", - "type": "string" - }, - "relative_file_name": { - "title": "Relative File Name", - "type": "string" - }, - "node_rank": { - "title": "Node Rank", - "type": "integer" - }, - "size_bytes": { - "title": "Size Bytes", - "type": "integer" - }, - "last_modified": { - "title": "Last Modified", - "type": "string" + "run": { + "$ref": "#/components/schemas/LoopsRunV1", + "description": "The Loops run with its associated sampler." } }, "required": [ - "url", - "relative_file_name", - "node_rank", - "size_bytes", - "last_modified" + "run" ], - "title": "CheckpointFile", + "title": "GetLoopsRunResponseV1", "type": "object" }, - "GetTrainingJobCheckpointFilesResponseV1": { - "description": "A response to fetch presigned URLs for checkpoint files of a training job.", + "ListLoopsSamplersResponseV1": { + "description": "Response for ``GET /v1/loops/samplers``.\n\nReturns the caller's samplers, including those paired to runs and\nstandalone samplers. Ordered newest-first.", "properties": { - "presigned_urls": { - "description": "List of presigned URLs for checkpoint files.", + "samplers": { + "description": "List of samplers.", "items": { - "$ref": "#/components/schemas/CheckpointFile" + "$ref": "#/components/schemas/LoopsSamplerV1" }, - "title": "Presigned Urls", + "title": "Samplers", "type": "array" - }, - "next_page_token": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Token to use for fetching the next page of results. None when there are no more results.", - "title": "Next Page Token" - }, - "total_count": { - "description": "Total number of checkpoint files available.", - "title": "Total Count", - "type": "integer" } }, "required": [ - "presigned_urls", - "total_count" + "samplers" ], - "title": "GetTrainingJobCheckpointFilesResponseV1", + "title": "ListLoopsSamplersResponseV1", "type": "object" }, - "AuthCodeV1": { - "description": "Authentication code for a training job interactive session node.", + "CreateLoopsSamplerRequestV1": { "properties": { "session_id": { - "description": "Unique identifier of the interactive session.", + "description": "ID of the Loops session this sampler belongs to.", "title": "Session Id", "type": "string" }, - "replica_id": { - "description": "Replica identifier in gXXrY format (e.g., 'g00r0' for group 0, replica 0).", - "title": "Replica Id", - "type": "string" - }, - "auth_code": { - "description": "The device authentication code (e.g., '4F64-C0D9').", - "title": "Auth Code", - "type": "string" - }, - "auth_url": { - "description": "URL where the user should enter the auth code (e.g., 'https://github.com/login/device').", - "title": "Auth Url", + "base_model": { + "description": "Base model ID for standalone samplers (e.g., for baselines).", + "title": "Base Model", "type": "string" }, - "generated_at": { + "max_seq_length": { "anyOf": [ { - "format": "date-time", - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "When the auth code was generated, in ISO 8601 format.", - "title": "Generated At" + "description": "Maximum prompt length (in tokens) the sampler must handle. Set this to the longest prompt you plan to send. Omit to use the default for the base model.", + "title": "Max Seq Length" }, - "tunnel_name": { + "model_path": { "anyOf": [ { "type": "string" @@ -7915,24 +12258,13 @@ } ], "default": null, - "description": "The name of the tunnel node.", - "title": "Tunnel Name" - }, - "expires_at": { - "anyOf": [ - { - "format": "date-time", - "type": "string" - }, - { - "type": "null" - } + "description": "Optional bt:// URI of an existing sampler-target checkpoint to load weights from on startup. Form: bt://loops:/sampler_weights/.", + "examples": [ + "bt://loops:k4q95w5/sampler_weights/step-100" ], - "default": null, - "description": "When the session expires, in ISO 8601 format.", - "title": "Expires At" + "title": "Model Path" }, - "working_directory": { + "reuse_from_session_id": { "anyOf": [ { "type": "string" @@ -7942,106 +12274,158 @@ } ], "default": null, - "description": "The working directory of the session.", - "title": "Working Directory" + "description": "Optional Loops session ID whose deployment should be reused for this sampler. Same best-effort semantics as the run endpoint.", + "title": "Reuse From Session Id" } }, "required": [ "session_id", - "replica_id", - "auth_code", - "auth_url" + "base_model" ], - "title": "AuthCodeV1", + "title": "CreateLoopsSamplerRequestV1", "type": "object" }, - "GetAuthCodesResponseV1": { - "description": "Response containing auth codes for all nodes of a training job's interactive sessions.", + "CreateLoopsSamplerResponseV1": { "properties": { - "auth_codes": { - "description": "List of auth codes for each node that has an active interactive session.", - "items": { - "$ref": "#/components/schemas/AuthCodeV1" - }, - "title": "Auth Codes", - "type": "array" + "sampler": { + "$ref": "#/components/schemas/LoopsSamplerV1" } }, "required": [ - "auth_codes" + "sampler" ], - "title": "GetAuthCodesResponseV1", + "title": "CreateLoopsSamplerResponseV1", "type": "object" }, - "PatchInteractiveSessionRequestV1": { - "description": "Request to patch an interactive session.\n\nOnly fields that are provided (non-None) will be applied.", + "GetLoopsSamplerResponseV1": { + "description": "Response for ``GET /v1/loops/samplers/``.", "properties": { - "timeout_minutes": { + "sampler": { + "$ref": "#/components/schemas/LoopsSamplerV1", + "description": "The Loops sampler." + } + }, + "required": [ + "sampler" + ], + "title": "GetLoopsSamplerResponseV1", + "type": "object" + }, + "LoopsCheckpointV1": { + "description": "A checkpoint saved by a Loops run.", + "properties": { + "checkpoint_id": { + "description": "The ID of the checkpoint.", + "title": "Checkpoint Id", + "type": "string" + }, + "created_at": { + "description": "The timestamp of the checkpoint in ISO 8601 format.", + "format": "date-time", + "title": "Created At", + "type": "string" + }, + "checkpoint_type": { + "description": "The type of checkpoint.", + "title": "Checkpoint Type", + "type": "string" + }, + "base_model": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], - "default": null, - "description": "For on_startup sessions, minutes to add to the expiration. For on_demand/on_failure sessions, minutes to add to the timeout. Use -1 for infinite timeout (bumps by 10 years).", - "title": "Timeout Minutes" + "description": "The base model of the checkpoint.", + "title": "Base Model" }, - "trigger": { + "lora_adapter_config": { "anyOf": [ { - "$ref": "#/components/schemas/V1InteractiveSessionTrigger" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "default": null, - "description": "Update when the interactive session is created. Cannot be changed if the session trigger is 'on_startup'." - } - }, - "title": "PatchInteractiveSessionRequestV1", - "type": "object" - }, - "InteractiveSessionV1": { - "description": "Representation of a training job interactive session.", - "properties": { - "id": { - "description": "Unique identifier of the interactive session.", - "title": "Id", - "type": "string" - }, - "trigger": { - "description": "When the interactive session is created.", - "title": "Trigger", - "type": "string" + "description": "The adapter config of the checkpoint.", + "title": "Lora Adapter Config" }, - "timeout_minutes": { - "description": "Minutes before the session times out.", - "title": "Timeout Minutes", + "size_bytes": { + "description": "The size of the checkpoint in bytes.", + "title": "Size Bytes", "type": "integer" }, - "session_provider": { - "description": "The IDE client for the session.", - "title": "Session Provider", - "type": "string" + "sync_status": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Sync state of the checkpoint: SYNCING or COMPLETE.", + "title": "Sync Status" }, - "auth_provider": { - "description": "The authentication provider for the session.", - "title": "Auth Provider", + "id": { + "description": "The checkpoint ID.", + "title": "Id", "type": "string" }, - "pod_name": { - "description": "Pod name / node rank for the session.", - "title": "Pod Name", + "run_id": { + "description": "The ID of the run that produced the checkpoint.", + "title": "Run Id", "type": "string" }, - "expires_at": { + "target": { + "$ref": "#/components/schemas/TrainerCheckpointTarget", + "description": "Whether this checkpoint is loadable by the sampler or by the run." + } + }, + "required": [ + "checkpoint_id", + "created_at", + "checkpoint_type", + "base_model", + "lora_adapter_config", + "size_bytes", + "id", + "run_id", + "target" + ], + "title": "LoopsCheckpointV1", + "type": "object" + }, + "ListLoopsCheckpointsResponseV1": { + "description": "Checkpoints matching the query filter.", + "properties": { + "checkpoints": { + "description": "Matching checkpoints.", + "items": { + "$ref": "#/components/schemas/LoopsCheckpointV1" + }, + "title": "Checkpoints", + "type": "array" + } + }, + "required": [ + "checkpoints" + ], + "title": "ListLoopsCheckpointsResponseV1", + "type": "object" + }, + "ListLoopsCheckpointsQueryParamsV1": { + "description": "Query-string filters for ``GET /v1/loops/checkpoints``.\n\nProvide exactly one of ``run_id`` (all checkpoints for the run),\n``base_model`` (all checkpoints across the caller's runs of that\nbase model), or ``checkpoint_path`` (the single matching checkpoint).", + "properties": { + "run_id": { "anyOf": [ { - "format": "date-time", "type": "string" }, { @@ -8049,10 +12433,13 @@ } ], "default": null, - "description": "When the session expires, in ISO 8601 format.", - "title": "Expires At" + "description": "Filter by run ID. Returns all checkpoints saved by the run.", + "examples": [ + "k4q95w5" + ], + "title": "Run Id" }, - "tunnel_name": { + "base_model": { "anyOf": [ { "type": "string" @@ -8062,10 +12449,13 @@ } ], "default": null, - "description": "The tunnel name for the session.", - "title": "Tunnel Name" + "description": "Filter by base model. Returns checkpoints across the caller's runs of this base model.", + "examples": [ + "Qwen/Qwen3-8B" + ], + "title": "Base Model" }, - "auth_code": { + "checkpoint_path": { "anyOf": [ { "type": "string" @@ -8075,345 +12465,612 @@ } ], "default": null, - "description": "The device authentication code.", - "title": "Auth Code" + "description": "bt:// URI of a Loops checkpoint. Form: bt://loops:/(weights|sampler_weights)/.", + "examples": [ + "bt://loops:k4q95w5/sampler_weights/step-100" + ], + "title": "Checkpoint Path" + } + }, + "title": "ListLoopsCheckpointsQueryParamsV1", + "type": "object" + }, + "ValidateLoopsCheckpointRequestV1": { + "description": "Request body for ``POST /v1/loops/checkpoints/validate``.", + "properties": { + "checkpoint_path": { + "description": "bt:// URI of a sampler checkpoint. Form: bt://loops:/sampler_weights/.", + "examples": [ + "bt://loops:k4q95w5/sampler_weights/step-100" + ], + "title": "Checkpoint Path", + "type": "string" + } + }, + "required": [ + "checkpoint_path" + ], + "title": "ValidateLoopsCheckpointRequestV1", + "type": "object" + }, + "ValidateLoopsCheckpointResponseV1": { + "description": "Response for ``POST /v1/loops/checkpoints/validate``. Empty on success;\ninaccessible or malformed paths raise 400.", + "properties": {}, + "title": "ValidateLoopsCheckpointResponseV1", + "type": "object" + }, + "LoopsCheckpointFilesResponseV1": { + "description": "Response with presigned URLs for files under a Loops checkpoint.", + "properties": { + "presigned_urls": { + "description": "List of presigned URLs for checkpoint files.", + "items": { + "$ref": "#/components/schemas/CheckpointFile" + }, + "title": "Presigned Urls", + "type": "array" }, - "auth_url": { + "next_page_token": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "URL where the user should enter the auth code.", - "title": "Auth Url" + "description": "Token to use for fetching the next page of results. None when there are no more results.", + "title": "Next Page Token" + }, + "total_count": { + "description": "Total number of checkpoint files available.", + "title": "Total Count", + "type": "integer" + } + }, + "required": [ + "presigned_urls", + "total_count" + ], + "title": "LoopsCheckpointFilesResponseV1", + "type": "object" + }, + "LoopsDeploymentStatusV1": { + "description": "Latest deployment status for a Loops deployment.", + "properties": { + "name": { + "$ref": "#/components/schemas/Name", + "description": "Latest status of the Loops deployment." + } + }, + "required": [ + "name" + ], + "title": "LoopsDeploymentStatusV1", + "type": "object" + }, + "LoopsDeploymentV1": { + "description": "A Loops deployment \u2014 the long-lived run + sampler pair owned by a user.\n\nThe deployment's current sampler is included inline. The full list of\nsamplers visible to the caller (across all deployments) lives at\n``GET /v1/loops/samplers``.", + "properties": { + "id": { + "description": "The Loops deployment ID.", + "title": "Id", + "type": "string" }, - "auth_code_generated_at": { - "anyOf": [ - { - "format": "date-time", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "When the auth code was generated.", - "title": "Auth Code Generated At" + "base_model": { + "description": "The HuggingFace base model the deployment is fine-tuning.", + "title": "Base Model", + "type": "string" }, - "authenticated_at": { + "base_url": { + "description": "The run's base URL.", + "title": "Base Url", + "type": "string" + }, + "status": { + "$ref": "#/components/schemas/LoopsDeploymentStatusV1", + "description": "Latest deployment status." + }, + "sampler": { + "$ref": "#/components/schemas/LoopsSamplerV1", + "description": "The sampler bound to this deployment." + } + }, + "required": [ + "id", + "base_model", + "base_url", + "status", + "sampler" + ], + "title": "LoopsDeploymentV1", + "type": "object" + }, + "Name": { + "enum": [ + "CREATED", + "DEPLOYING", + "RUNNING", + "SCALED_TO_ZERO", + "FAILED", + "STOPPED" + ], + "title": "LoopsDeploymentStatus", + "type": "string" + }, + "ListLoopsDeploymentsResponseV1": { + "description": "Response for ``GET /v1/loops/deployments``.\n\nReturns every Loops deployment owned by the caller, regardless of status\n(CREATED, DEPLOYING, RUNNING, STOPPED, or FAILED). Clients filter\nterminal-state deployments themselves.", + "properties": { + "deployments": { + "description": "Active Loops deployments.", + "items": { + "$ref": "#/components/schemas/LoopsDeploymentV1" + }, + "title": "Deployments", + "type": "array" + } + }, + "required": [ + "deployments" + ], + "title": "ListLoopsDeploymentsResponseV1", + "type": "object" + }, + "DeactivateLoopsDeploymentResponseV1": { + "description": "Response for ``POST /v1/loops/deployments//deactivate``.", + "properties": { + "id": { + "description": "The deactivated Loops deployment ID.", + "title": "Id", + "type": "string" + }, + "base_model": { + "description": "The base model whose Loops deployment was deactivated.", + "title": "Base Model", + "type": "string" + }, + "user": { + "$ref": "#/components/schemas/UserV1", + "description": "The user who owned the Loops deployment." + } + }, + "required": [ + "id", + "base_model", + "user" + ], + "title": "DeactivateLoopsDeploymentResponseV1", + "type": "object" + }, + "GetLoopsDeploymentResponseV1": { + "description": "Response for ``GET /v1/loops/deployments/``.", + "properties": { + "deployment": { + "$ref": "#/components/schemas/LoopsDeploymentV1", + "description": "The Loops deployment." + } + }, + "required": [ + "deployment" + ], + "title": "GetLoopsDeploymentResponseV1", + "type": "object" + }, + "GetLoopsDeploymentMetricsRequestV1": { + "description": "Time-range request for trainer deployment metrics.", + "properties": { + "end_epoch_millis": { "anyOf": [ { - "format": "date-time", - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "When the session was authenticated.", - "title": "Authenticated At" + "description": "Epoch millis to end fetching metrics.", + "title": "End Epoch Millis" }, - "working_directory": { + "start_epoch_millis": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "The working directory of the session.", - "title": "Working Directory" + "description": "Epoch millis to start fetching metrics.", + "title": "Start Epoch Millis" } }, - "required": [ - "id", - "trigger", - "timeout_minutes", - "session_provider", - "auth_provider", - "pod_name" - ], - "title": "InteractiveSessionV1", + "title": "GetLoopsDeploymentMetricsRequestV1", "type": "object" }, - "PatchInteractiveSessionResponseV1": { - "description": "Response after patching an interactive session.", + "InferenceVolumeByStatusDatapointV1": { + "description": "Request rate split by HTTP response code class.", "properties": { - "interactive_session": { - "$ref": "#/components/schemas/InteractiveSessionV1", - "description": "The updated interactive session." - }, - "message": { - "description": "Human-readable summary of what was updated.", - "title": "Message", + "timestamp": { + "description": "ISO 8601 timestamp.", + "format": "date-time", + "title": "Timestamp", "type": "string" + }, + "status_2xx": { + "description": "2xx requests per second.", + "title": "Status 2Xx", + "type": "number" + }, + "status_4xx": { + "description": "4xx requests per second.", + "title": "Status 4Xx", + "type": "number" + }, + "status_5xx": { + "description": "5xx requests per second.", + "title": "Status 5Xx", + "type": "number" } }, "required": [ - "interactive_session", - "message" + "timestamp", + "status_2xx", + "status_4xx", + "status_5xx" ], - "title": "PatchInteractiveSessionResponseV1", + "title": "InferenceVolumeByStatusDatapointV1", "type": "object" }, - "FileSummary": { - "description": "Information about a file in the cache.", + "LoopsDeploymentMetricsV1": { + "description": "Metrics for a trainer (Loops) deployment.\n\nService-level fields summarize HTTP traffic into the trainer pods (the\nKnative queue-proxy is the source). Compute fields are the leader-pod\naggregate; ``per_node_metrics`` carries the full multinode breakdown.", "properties": { - "path": { - "description": "Relative path of the file in the cache", - "title": "Path", - "type": "string" + "inference_volume": { + "description": "Number of inference requests per unit time (requests per second).", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Inference Volume", + "type": "array" }, - "size_bytes": { - "description": "Size of the file in bytes", - "title": "Size Bytes", - "type": "integer" + "concurrent_requests": { + "description": "Number of in-progress concurrent inference requests. Source: the queue-proxy ``revision_queue_depth`` gauge on ``http-usermetric``.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Concurrent Requests", + "type": "array" }, - "modified": { - "description": "Last modification time of the file", - "title": "Modified", - "type": "string" + "response_time_stats": { + "description": "Percentiles of the response time distribution.", + "items": { + "$ref": "#/components/schemas/ResponseTimeDatapointV1" + }, + "title": "Response Time Stats", + "type": "array" }, - "file_type": { - "description": "Type of the file", - "title": "File Type", - "type": "string" + "inference_volume_by_status": { + "description": "Request rate split by response code class.", + "items": { + "$ref": "#/components/schemas/InferenceVolumeByStatusDatapointV1" + }, + "title": "Inference Volume By Status", + "type": "array" }, - "permissions": { - "description": "Permissions of the file", - "title": "Permissions", - "type": "string" + "gpu_memory_usage_bytes": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "type": "array" + }, + "description": "Leader-pod GPU memory bytes per GPU rank.", + "title": "Gpu Memory Usage Bytes", + "type": "object" + }, + "gpu_utilization": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "type": "array" + }, + "description": "Leader-pod fractional GPU utilization per GPU rank.", + "title": "Gpu Utilization", + "type": "object" + }, + "cpu_usage": { + "description": "Leader-pod CPU usage in cores.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Cpu Usage", + "type": "array" + }, + "cpu_memory_usage_bytes": { + "description": "Leader-pod CPU memory usage bytes.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Cpu Memory Usage Bytes", + "type": "array" + }, + "ephemeral_storage": { + "$ref": "#/components/schemas/StorageMetricsV1", + "description": "Leader-pod ephemeral storage usage." + }, + "per_node_metrics": { + "description": "Per-node compute breakdown for multinode trainer deployments.", + "items": { + "$ref": "#/components/schemas/LoopsDeploymentNodeMetricsV1" + }, + "title": "Per Node Metrics", + "type": "array" } }, "required": [ - "path", - "size_bytes", - "modified", - "file_type", - "permissions" + "inference_volume", + "concurrent_requests", + "response_time_stats", + "inference_volume_by_status", + "gpu_memory_usage_bytes", + "gpu_utilization", + "cpu_usage", + "cpu_memory_usage_bytes", + "ephemeral_storage", + "per_node_metrics" ], - "title": "FileSummary", + "title": "LoopsDeploymentMetricsV1", "type": "object" }, - "GetCacheSummaryResponseV1": { - "description": "Response for getting cache summary.", + "LoopsDeploymentNodeMetricsV1": { + "description": "Per-node compute metrics for a multinode trainer deployment.", "properties": { - "timestamp": { - "description": "Timestamp when the cache summary was captured", - "title": "Timestamp", + "node_id": { + "description": "Identifier for the node.", + "title": "Node Id", "type": "string" }, - "project_id": { - "description": "Project ID associated with the cache", - "title": "Project Id", - "type": "string" + "gpu_memory_usage_bytes": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "type": "array" + }, + "description": "GPU memory usage bytes per GPU rank.", + "title": "Gpu Memory Usage Bytes", + "type": "object" }, - "file_summaries": { - "description": "List of files in the cache", + "gpu_utilization": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "type": "array" + }, + "description": "Fractional GPU utilization per GPU rank.", + "title": "Gpu Utilization", + "type": "object" + }, + "cpu_usage": { + "description": "CPU usage in cores.", "items": { - "$ref": "#/components/schemas/FileSummary" + "$ref": "#/components/schemas/TrainingJobMetricV1" }, - "title": "File Summaries", + "title": "Cpu Usage", + "type": "array" + }, + "cpu_memory_usage_bytes": { + "description": "CPU memory usage bytes.", + "items": { + "$ref": "#/components/schemas/TrainingJobMetricV1" + }, + "title": "Cpu Memory Usage Bytes", "type": "array" + }, + "ephemeral_storage": { + "$ref": "#/components/schemas/StorageMetricsV1", + "description": "Ephemeral storage usage." } }, "required": [ - "timestamp", - "project_id", - "file_summaries" + "node_id", + "gpu_memory_usage_bytes", + "gpu_utilization", + "cpu_usage", + "cpu_memory_usage_bytes", + "ephemeral_storage" ], - "title": "GetCacheSummaryResponseV1", + "title": "LoopsDeploymentNodeMetricsV1", "type": "object" }, - "TrainingProjectTombstoneV1": { - "description": "A training project tombstone.", + "ResponseTimeDatapointV1": { + "description": "Latency quantile datapoint.\n\nValues are reported in **milliseconds** to match the oracle/inference\n``response_time_stats`` convention. Source histogram is the queue-proxy's\n``revision_request_latencies_bucket`` whose bucket boundaries are in ms.", "properties": { - "id": { - "description": "Unique identifier of the training project", - "title": "Id", + "timestamp": { + "description": "ISO 8601 timestamp.", + "format": "date-time", + "title": "Timestamp", "type": "string" }, - "deleted": { - "description": "Whether the training project was deleted", - "title": "Deleted", - "type": "boolean" - } - }, - "required": [ - "id", - "deleted" - ], - "title": "TrainingProjectTombstoneV1", - "type": "object" - }, - "GetTrainingProjectResponseV1": { - "description": "A response to getting a training project.", - "properties": { - "training_project": { - "$ref": "#/components/schemas/TrainingProjectV1", - "description": "The training project." + "p50": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "50th percentile request latency (milliseconds).", + "title": "P50" + }, + "p95": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "95th percentile request latency (milliseconds).", + "title": "P95" + }, + "p99": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "99th percentile request latency (milliseconds).", + "title": "P99" } }, "required": [ - "training_project" + "timestamp" ], - "title": "GetTrainingProjectResponseV1", + "title": "ResponseTimeDatapointV1", "type": "object" }, - "OrderByV1": { - "description": "A request to order training jobs.", + "GetLoopsDeploymentMetricsResponseV1": { + "description": "Response for ``POST /v1/loops/deployments//metrics``.", "properties": { - "field": { - "description": "The field to order by.", - "examples": [ - "created_at" - ], - "title": "Field", + "deployment_id": { + "description": "The trainer deployment ID.", + "title": "Deployment Id", "type": "string" }, - "order": { - "description": "The direction to order by.", - "examples": [ - "asc", - "desc" - ], - "title": "Order", - "type": "string" + "metrics": { + "$ref": "#/components/schemas/LoopsDeploymentMetricsV1", + "description": "Metrics for the deployment." } }, "required": [ - "field", - "order" + "deployment_id", + "metrics" ], - "title": "OrderByV1", + "title": "GetLoopsDeploymentMetricsResponseV1", "type": "object" }, - "SearchTrainingJobsRequestV1": { - "description": "A request to search training jobs.", + "GetLoopsDeploymentLogsRequestV1": { + "description": "Query parameters for fetching a Loops deployment's logs.", "properties": { - "project_id": { + "start_epoch_millis": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "Filter the training jobs by project ID.", - "examples": [ - "n4q95w5" - ], - "title": "Project Id" + "description": "Epoch millis timestamp to start fetching logs", + "title": "Start Epoch Millis" }, - "job_id": { + "end_epoch_millis": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "Filter the training jobs by job ID.", - "examples": [ - "p7qr9qv" - ], - "title": "Job Id" + "description": "Epoch millis timestamp to end fetching logs", + "title": "End Epoch Millis" }, - "statuses": { + "direction": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "$ref": "#/components/schemas/SortOrderV1" }, { "type": "null" } ], "default": null, - "description": "Filter the training jobs by status.", - "examples": [ - [ - "TRAINING_JOB_RUNNING", - "TRAINING_JOB_COMPLETED" - ] - ], - "title": "Statuses" + "description": "Sort order for logs" }, - "order_by": { - "default": [ + "limit": { + "anyOf": [ { - "field": "created_at", - "order": "desc" + "maximum": 1000, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" } ], - "description": "Order the training jobs by a field. Currently supports created_at", - "items": { - "$ref": "#/components/schemas/OrderByV1" - }, - "title": "Order By", - "type": "array" + "default": 500, + "description": "Limit of logs to fetch in a single request", + "title": "Limit" } }, - "title": "SearchTrainingJobsRequestV1", + "title": "GetLoopsDeploymentLogsRequestV1", "type": "object" }, - "SearchTrainingJobsResponseV1": { - "description": "A response to search training jobs.", + "TrainingGpuCapacityItemV1": { + "description": "GPU capacity and current usage for one GPU type.", "properties": { - "training_jobs": { - "description": "List of training jobs.", - "items": { - "$ref": "#/components/schemas/TrainingJobV1" - }, - "title": "Training Jobs", - "type": "array" + "gpu_type": { + "description": "GPU type identifier (e.g. H100, A100-40GB)", + "title": "Gpu Type", + "type": "string" + }, + "baseline": { + "description": "Baseline GPU allocation; jobs below this threshold are expected to run immediately. 0 if not configured.", + "title": "Baseline", + "type": "integer" + }, + "limit": { + "description": "Maximum concurrent GPUs of this type for this org", + "title": "Limit", + "type": "integer" + }, + "usage_count": { + "description": "GPUs currently in use by active training jobs", + "title": "Usage Count", + "type": "integer" } }, "required": [ - "training_jobs" + "gpu_type", + "baseline", + "limit", + "usage_count" ], - "title": "SearchTrainingJobsResponseV1", + "title": "TrainingGpuCapacityItemV1", "type": "object" }, - "AWSCredentialsV1": { - "description": "AWS credentials", + "GetTrainingGpuCapacityResponseV1": { + "description": "Response for the training GPU capacity endpoint.", "properties": { - "aws_access_key_id": { - "description": "The AWS access key ID", - "title": "Aws Access Key Id", - "type": "string" - }, - "aws_secret_access_key": { - "description": "The AWS secret access key", - "title": "Aws Secret Access Key", - "type": "string" - }, - "aws_session_token": { - "description": "The AWS session token", - "title": "Aws Session Token", - "type": "string" + "gpu_capacities": { + "description": "GPU capacity limits and current usage per GPU type", + "items": { + "$ref": "#/components/schemas/TrainingGpuCapacityItemV1" + }, + "title": "Gpu Capacities", + "type": "array" } }, "required": [ - "aws_access_key_id", - "aws_secret_access_key", - "aws_session_token" + "gpu_capacities" ], - "title": "AWSCredentialsV1", + "title": "GetTrainingGpuCapacityResponseV1", "type": "object" }, "GetBlobCredentialsResponseV1": { @@ -8710,6 +13367,20 @@ "title": "Environment Variables", "type": "object" }, + "model_metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Model metadata persisted into model_config", + "title": "Model Metadata" + }, "autoscaling_settings": { "anyOf": [ { @@ -8726,6 +13397,7 @@ "autoscaling_window": 600, "concurrency_target": null, "max_replica": 5, + "max_scale_down_rate": null, "min_replica": 1, "scale_down_delay": 300, "target_in_flight_tokens": null, @@ -8777,6 +13449,31 @@ ], "title": "Metadata" }, + "weights": { + "anyOf": [ + { + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Weight configurations for BDN model weight distribution", + "examples": [ + [ + { + "mount_location": "/models/base", + "source": "hf://meta-llama/Llama-3-8B" + } + ] + ], + "title": "Weights" + }, "name": { "description": "Name of the model", "title": "Name", @@ -8790,32 +13487,44 @@ "title": "CreateLLMModelRequestV1", "type": "object" }, - "LLMModelV1": { - "description": "A BIS LLM model", + "LLMModelHandleV1": { + "description": "Handle for a BIS LLM model deployment.", "properties": { - "id": { + "model_id": { "description": "Unique identifier of the model", - "title": "Id", + "title": "Model Id", "type": "string" }, - "created_at": { - "description": "Time the model was created in ISO 8601 format", - "format": "date-time", - "title": "Created At", + "version_id": { + "description": "Unique identifier of the model version", + "title": "Version Id", "type": "string" }, - "name": { - "description": "Name of the model", - "title": "Name", + "hostname": { + "description": "Hostname used to invoke the model", + "title": "Hostname", "type": "string" + }, + "instance_type_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Name of the instance type the model deployment is running on", + "title": "Instance Type Name" } }, "required": [ - "id", - "created_at", - "name" + "model_id", + "version_id", + "hostname" ], - "title": "LLMModelV1", + "title": "LLMModelHandleV1", "type": "object" }, "CreateLLMModelVersionRequestV1": { @@ -8845,6 +13554,20 @@ "title": "Environment Variables", "type": "object" }, + "model_metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Model metadata persisted into model_config", + "title": "Model Metadata" + }, "autoscaling_settings": { "anyOf": [ { @@ -8861,6 +13584,7 @@ "autoscaling_window": 600, "concurrency_target": null, "max_replica": 5, + "max_scale_down_rate": null, "min_replica": 1, "scale_down_delay": 300, "target_in_flight_tokens": null, @@ -8911,6 +13635,31 @@ } ], "title": "Metadata" + }, + "weights": { + "anyOf": [ + { + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Weight configurations for BDN model weight distribution", + "examples": [ + [ + { + "mount_location": "/models/base", + "source": "hf://meta-llama/Llama-3-8B" + } + ] + ], + "title": "Weights" } }, "required": [ @@ -8919,34 +13668,6 @@ "title": "CreateLLMModelVersionRequestV1", "type": "object" }, - "LLMModelVersionV1": { - "description": "A BIS LLM model version", - "properties": { - "id": { - "description": "Unique identifier of the model version", - "title": "Id", - "type": "string" - }, - "created_at": { - "description": "Time the model was created in ISO 8601 format", - "format": "date-time", - "title": "Created At", - "type": "string" - }, - "name": { - "description": "Name of the model version", - "title": "Name", - "type": "string" - } - }, - "required": [ - "id", - "created_at", - "name" - ], - "title": "LLMModelVersionV1", - "type": "object" - }, "LibraryListingV1": { "description": "A library listing.", "properties": { @@ -9274,6 +13995,19 @@ "description": "Name of the resource", "title": "Name" }, + "model_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Name of the parent resource (e.g., model name for model deployments, training project name for training jobs)", + "title": "Model Name" + }, "is_deleted": { "description": "Indicates if the resource has been deleted", "title": "Is Deleted", @@ -9392,22 +14126,227 @@ } }, "required": [ - "date", + "date", + "subtotal", + "minutes", + "inference_requests" + ], + "title": "DailyDedicatedUsageV1", + "type": "object" + }, + "DailyModelApiUsageV1": { + "properties": { + "date": { + "description": "Date of the usage", + "format": "date", + "title": "Date", + "type": "string" + }, + "subtotal": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Subtotal cost incurred on this date in dollars", + "title": "Subtotal" + }, + "input_tokens": { + "description": "Number of input tokens on this date", + "title": "Input Tokens", + "type": "integer" + }, + "output_tokens": { + "description": "Number of output tokens on this date", + "title": "Output Tokens", + "type": "integer" + }, + "cached_input_tokens": { + "description": "Number of cached input tokens on this date", + "title": "Cached Input Tokens", + "type": "integer" + } + }, + "required": [ + "date", + "subtotal", + "input_tokens", + "output_tokens", + "cached_input_tokens" + ], + "title": "DailyModelApiUsageV1", + "type": "object" + }, + "DailyTrainingUsageV1": { + "properties": { + "date": { + "description": "Date of the usage", + "format": "date", + "title": "Date", + "type": "string" + }, + "subtotal": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Subtotal cost incurred on this date in dollars", + "title": "Subtotal" + }, + "minutes": { + "description": "Minutes used on this date", + "title": "Minutes", + "type": "integer" + } + }, + "required": [ + "date", + "subtotal", + "minutes" + ], + "title": "DailyTrainingUsageV1", + "type": "object" + }, + "DedicatedItemV1": { + "properties": { + "billable_resource": { + "$ref": "#/components/schemas/BillableResourceV1", + "description": "The model deployment resource" + }, + "subtotal": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Subtotal cost in dollars for this billable resource", + "title": "Subtotal" + }, + "minutes": { + "description": "Total minutes used for this billable resource", + "title": "Minutes", + "type": "integer" + }, + "inference_requests": { + "description": "Total inference requests for this billable resource", + "title": "Inference Requests", + "type": "integer" + }, + "daily": { + "description": "Daily usage breakdown", + "items": { + "$ref": "#/components/schemas/DailyDedicatedUsageV1" + }, + "title": "Daily", + "type": "array" + } + }, + "required": [ + "billable_resource", + "subtotal", + "minutes", + "inference_requests" + ], + "title": "DedicatedItemV1", + "type": "object" + }, + "DedicatedUsageV1": { + "properties": { + "subtotal": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Subtotal cost in dollars after applying credits used", + "title": "Subtotal" + }, + "credits_used": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Credits applied in dollars", + "title": "Credits Used" + }, + "total": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Total cost in dollars", + "title": "Total" + }, + "minutes": { + "description": "Total minutes used", + "title": "Minutes", + "type": "integer" + }, + "breakdown": { + "description": "Per-deployment usage breakdown", + "items": { + "$ref": "#/components/schemas/DedicatedItemV1" + }, + "title": "Breakdown", + "type": "array" + } + }, + "required": [ "subtotal", - "minutes", - "inference_requests" + "credits_used", + "total", + "minutes" ], - "title": "DailyDedicatedUsageV1", + "title": "DedicatedUsageV1", "type": "object" }, - "DailyModelApiUsageV1": { + "ModelApiItemV1": { "properties": { - "date": { - "description": "Date of the usage", - "format": "date", - "title": "Date", + "model_name": { + "description": "Model name", + "title": "Model Name", "type": "string" }, + "model_family": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Model family (e.g., llama, mistral)", + "title": "Model Family" + }, "subtotal": { "anyOf": [ { @@ -9418,43 +14357,45 @@ "type": "string" } ], - "description": "Subtotal cost incurred on this date in dollars", + "description": "Subtotal cost in dollars for this model", "title": "Subtotal" }, "input_tokens": { - "description": "Number of input tokens on this date", + "description": "Total input tokens for this model", "title": "Input Tokens", "type": "integer" }, "output_tokens": { - "description": "Number of output tokens on this date", + "description": "Total output tokens for this model", "title": "Output Tokens", "type": "integer" }, "cached_input_tokens": { - "description": "Number of cached input tokens on this date", + "description": "Total cached input tokens for this model", "title": "Cached Input Tokens", "type": "integer" + }, + "daily": { + "description": "Daily usage breakdown", + "items": { + "$ref": "#/components/schemas/DailyModelApiUsageV1" + }, + "title": "Daily", + "type": "array" } }, "required": [ - "date", + "model_name", "subtotal", "input_tokens", "output_tokens", "cached_input_tokens" ], - "title": "DailyModelApiUsageV1", + "title": "ModelApiItemV1", "type": "object" }, - "DailyTrainingUsageV1": { + "ModelApisUsageV1": { "properties": { - "date": { - "description": "Date of the usage", - "format": "date", - "title": "Date", - "type": "string" - }, "subtotal": { "anyOf": [ { @@ -9465,28 +14406,66 @@ "type": "string" } ], - "description": "Subtotal cost incurred on this date in dollars", + "description": "Subtotal cost in dollars after applying credits used", "title": "Subtotal" }, - "minutes": { - "description": "Minutes used on this date", - "title": "Minutes", - "type": "integer" + "credits_used": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Credits applied in dollars", + "title": "Credits Used" + }, + "total": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Total cost in dollars", + "title": "Total" + }, + "breakdown": { + "description": "Per-model usage breakdown", + "items": { + "$ref": "#/components/schemas/ModelApiItemV1" + }, + "title": "Breakdown", + "type": "array" } }, "required": [ - "date", "subtotal", - "minutes" + "credits_used", + "total" ], - "title": "DailyTrainingUsageV1", + "title": "ModelApisUsageV1", "type": "object" }, - "DedicatedItemV1": { + "ResourceKind": { + "enum": [ + "MODEL_DEPLOYMENT", + "TRAINING_JOB", + "CHAINLET" + ], + "title": "ResourceKind", + "type": "string" + }, + "TrainingItemV1": { "properties": { "billable_resource": { "$ref": "#/components/schemas/BillableResourceV1", - "description": "The model deployment resource" + "description": "The training job resource" }, "subtotal": { "anyOf": [ @@ -9506,15 +14485,10 @@ "title": "Minutes", "type": "integer" }, - "inference_requests": { - "description": "Total inference requests for this billable resource", - "title": "Inference Requests", - "type": "integer" - }, "daily": { "description": "Daily usage breakdown", "items": { - "$ref": "#/components/schemas/DailyDedicatedUsageV1" + "$ref": "#/components/schemas/DailyTrainingUsageV1" }, "title": "Daily", "type": "array" @@ -9523,13 +14497,12 @@ "required": [ "billable_resource", "subtotal", - "minutes", - "inference_requests" + "minutes" ], - "title": "DedicatedItemV1", + "title": "TrainingItemV1", "type": "object" }, - "DedicatedUsageV1": { + "TrainingUsageV1": { "properties": { "subtotal": { "anyOf": [ @@ -9547,60 +14520,300 @@ "credits_used": { "anyOf": [ { - "type": "number" + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Credits applied in dollars", + "title": "Credits Used" + }, + "total": { + "anyOf": [ + { + "type": "number" + }, + { + "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", + "type": "string" + } + ], + "description": "Total cost in dollars", + "title": "Total" + }, + "minutes": { + "description": "Total minutes used", + "title": "Minutes", + "type": "integer" + }, + "breakdown": { + "description": "Per-job usage breakdown", + "items": { + "$ref": "#/components/schemas/TrainingItemV1" + }, + "title": "Breakdown", + "type": "array" + } + }, + "required": [ + "subtotal", + "credits_used", + "total", + "minutes" + ], + "title": "TrainingUsageV1", + "type": "object" + }, + "UsageSummaryV1": { + "description": "Billing usage summary for the requested date range.", + "properties": { + "dedicated_usage": { + "anyOf": [ + { + "$ref": "#/components/schemas/DedicatedUsageV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Dedicated model serving usage" + }, + "training_usage": { + "anyOf": [ + { + "$ref": "#/components/schemas/TrainingUsageV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Training usage" + }, + "model_apis_usage": { + "anyOf": [ + { + "$ref": "#/components/schemas/ModelApisUsageV1" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Model APIs usage" + } + }, + "title": "UsageSummaryV1", + "type": "object" + }, + "UsageSummaryRequestV1": { + "properties": { + "start_date": { + "description": "Start date (ISO 8601, UTC). Earliest queryable: 2026-01-01.", + "format": "date-time", + "title": "Start Date", + "type": "string" + }, + "end_date": { + "description": "End date in ISO 8601 format (UTC). Date range cannot exceed 31 days.", + "format": "date-time", + "title": "End Date", + "type": "string" + } + }, + "required": [ + "start_date", + "end_date" + ], + "title": "UsageSummaryRequestV1", + "type": "object" + }, + "UserInfoV1": { + "description": "A Baseten user.", + "properties": { + "user_id": { + "description": "Unique identifier for the user", + "title": "User Id", + "type": "string" + }, + "email": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Email address of the user", + "title": "Email" + }, + "name": { + "anyOf": [ + { + "type": "string" }, { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" + "type": "null" } ], - "description": "Credits applied in dollars", - "title": "Credits Used" + "default": null, + "description": "Display name of the user", + "title": "Name" }, - "total": { + "workspace_name": { "anyOf": [ { - "type": "number" + "type": "string" }, { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" + "type": "null" } ], - "description": "Total cost in dollars", - "title": "Total" + "default": null, + "description": "Name of the user's workspace", + "title": "Workspace Name" + } + }, + "required": [ + "user_id" + ], + "title": "UserInfoV1", + "type": "object" + }, + "EffectiveModelConfigV1": { + "properties": { + "slug": { + "description": "Shared endpoint slug.", + "title": "Slug", + "type": "string" }, - "minutes": { - "description": "Total minutes used", - "title": "Minutes", - "type": "integer" + "rate_limits": { + "items": { + "$ref": "#/components/schemas/EffectiveRateLimitV1" + }, + "title": "Rate Limits", + "type": "array" }, - "breakdown": { - "description": "Per-deployment usage breakdown", + "usage_limits": { "items": { - "$ref": "#/components/schemas/DedicatedItemV1" + "$ref": "#/components/schemas/EffectiveUsageLimitV1" }, - "title": "Breakdown", + "title": "Usage Limits", "type": "array" } }, "required": [ - "subtotal", - "credits_used", - "total", - "minutes" + "slug" ], - "title": "DedicatedUsageV1", + "title": "EffectiveModelConfigV1", "type": "object" }, - "ModelApiItemV1": { + "EffectiveRateLimitV1": { "properties": { - "model_name": { - "description": "Model name", - "title": "Model Name", + "type": { + "$ref": "#/components/schemas/LimitTypeV1", + "description": "The type of the rate limit", + "examples": [ + "TOKEN", + "REQUEST" + ] + }, + "unit": { + "$ref": "#/components/schemas/RateLimitUnitV1", + "description": "The unit of the rate limit", + "examples": [ + "SECOND", + "MINUTE" + ] + }, + "threshold": { + "description": "The threshold for the rate limit", + "examples": [ + 1000, + 50000 + ], + "minimum": 1, + "title": "Threshold", + "type": "integer" + }, + "source_group": { + "description": "ID of the group in the hierarchy this limit is anchored to.", + "examples": [ + "abc123" + ], + "title": "Source Group", + "type": "string" + } + }, + "required": [ + "type", + "unit", + "threshold", + "source_group" + ], + "title": "EffectiveRateLimitV1", + "type": "object" + }, + "EffectiveUsageLimitV1": { + "properties": { + "type": { + "$ref": "#/components/schemas/LimitTypeV1", + "description": "The type of the usage limit", + "examples": [ + "REQUEST", + "TOKEN" + ] + }, + "unit": { + "$ref": "#/components/schemas/UsageLimitUnitV1", + "description": "The unit of the usage limit", + "examples": [ + "DAY" + ] + }, + "threshold": { + "description": "The threshold for the usage limit", + "examples": [ + 10000000 + ], + "minimum": 1, + "title": "Threshold", + "type": "integer" + }, + "source_group": { + "description": "ID of the group in the hierarchy this limit is anchored to.", + "examples": [ + "abc123" + ], + "title": "Source Group", "type": "string" + } + }, + "required": [ + "type", + "unit", + "threshold", + "source_group" + ], + "title": "EffectiveUsageLimitV1", + "type": "object" + }, + "GroupHierarchyV1": { + "properties": { + "limit_enforcement": { + "$ref": "#/components/schemas/LimitEnforcementV1", + "default": "INDEPENDENT", + "examples": [ + "CASCADING", + "INDEPENDENT" + ] }, - "model_family": { + "parent_group_id": { "anyOf": [ { "type": "string" @@ -9610,273 +14823,527 @@ } ], "default": null, - "description": "Model family (e.g., llama, mistral)", - "title": "Model Family" - }, - "subtotal": { + "examples": [ + "abc123" + ], + "title": "Parent Group Id" + } + }, + "title": "GroupHierarchyV1", + "type": "object" + }, + "GroupMetadataV1": { + "properties": { + "name": { "anyOf": [ { - "type": "number" + "maxLength": 255, + "type": "string" }, { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" + "type": "null" } ], - "description": "Subtotal cost in dollars for this model", - "title": "Subtotal" + "default": null, + "description": "Optional display name for the group.", + "examples": [ + "Acme prod" + ], + "title": "Name" }, - "input_tokens": { - "description": "Total input tokens for this model", - "title": "Input Tokens", - "type": "integer" + "external_entity_id": { + "description": "External-system identifier for this group. Unique within the caller's org.", + "examples": [ + "cust_42" + ], + "maxLength": 255, + "minLength": 1, + "title": "External Entity Id", + "type": "string" + } + }, + "required": [ + "external_entity_id" + ], + "title": "GroupMetadataV1", + "type": "object" + }, + "GroupV1": { + "properties": { + "id": { + "description": "Internal Baseten ID for the group.", + "title": "Id", + "type": "string" }, - "output_tokens": { - "description": "Total output tokens for this model", - "title": "Output Tokens", - "type": "integer" + "metadata": { + "$ref": "#/components/schemas/GroupMetadataV1", + "description": "Group identity + display metadata." }, - "cached_input_tokens": { - "description": "Total cached input tokens for this model", - "title": "Cached Input Tokens", - "type": "integer" + "models": { + "items": { + "$ref": "#/components/schemas/ModelConfigV1" + }, + "title": "Models", + "type": "array" }, - "daily": { - "description": "Daily usage breakdown", + "effective_models": { "items": { - "$ref": "#/components/schemas/DailyModelApiUsageV1" + "$ref": "#/components/schemas/EffectiveModelConfigV1" }, - "title": "Daily", + "title": "Effective Models", "type": "array" + }, + "hierarchy": { + "$ref": "#/components/schemas/GroupHierarchyV1", + "description": "Parent linkage and limit enforcement mode. Parent is null for root groups." + }, + "created_at": { + "description": "When this group was created.", + "format": "date-time", + "title": "Created At", + "type": "string" } }, "required": [ - "model_name", - "subtotal", - "input_tokens", - "output_tokens", - "cached_input_tokens" + "id", + "metadata", + "hierarchy", + "created_at" ], - "title": "ModelApiItemV1", + "title": "GroupV1", "type": "object" }, - "ModelApisUsageV1": { + "LimitEnforcementV1": { + "enum": [ + "CASCADING", + "INDEPENDENT" + ], + "title": "LimitEnforcementV1", + "type": "string" + }, + "LimitTypeV1": { + "enum": [ + "REQUEST", + "TOKEN" + ], + "title": "LimitTypeV1", + "type": "string" + }, + "ModelConfigV1": { "properties": { - "subtotal": { - "anyOf": [ - { - "type": "number" - }, - { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" - } - ], - "description": "Subtotal cost in dollars after applying credits used", - "title": "Subtotal" + "slug": { + "description": "Shared endpoint slug.", + "title": "Slug", + "type": "string" }, - "credits_used": { + "rate_limits": { + "items": { + "$ref": "#/components/schemas/RateLimitV1" + }, + "title": "Rate Limits", + "type": "array" + }, + "usage_limits": { + "items": { + "$ref": "#/components/schemas/UsageLimitV1" + }, + "title": "Usage Limits", + "type": "array" + } + }, + "required": [ + "slug" + ], + "title": "ModelConfigV1", + "type": "object" + }, + "PaginationResponseV1": { + "properties": { + "has_more": { + "description": "Whether more items exist after this page.", + "title": "Has More", + "type": "boolean" + }, + "cursor": { "anyOf": [ { - "type": "number" + "type": "string" }, { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" + "type": "null" } ], - "description": "Credits applied in dollars", - "title": "Credits Used" + "default": null, + "description": "Opaque cursor to pass into the next request. Null when there is no next page.", + "title": "Cursor" + } + }, + "required": [ + "has_more" + ], + "title": "PaginationResponseV1", + "type": "object" + }, + "RateLimitUnitV1": { + "enum": [ + "SECOND", + "MINUTE" + ], + "title": "RateLimitUnitV1", + "type": "string" + }, + "RateLimitV1": { + "properties": { + "type": { + "$ref": "#/components/schemas/LimitTypeV1", + "description": "The type of the rate limit", + "examples": [ + "TOKEN", + "REQUEST" + ] }, - "total": { - "anyOf": [ - { - "type": "number" - }, - { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" - } + "unit": { + "$ref": "#/components/schemas/RateLimitUnitV1", + "description": "The unit of the rate limit", + "examples": [ + "SECOND", + "MINUTE" + ] + }, + "threshold": { + "description": "The threshold for the rate limit", + "examples": [ + 1000, + 50000 ], - "description": "Total cost in dollars", - "title": "Total" + "minimum": 1, + "title": "Threshold", + "type": "integer" + } + }, + "required": [ + "type", + "unit", + "threshold" + ], + "title": "RateLimitV1", + "type": "object" + }, + "UsageLimitUnitV1": { + "enum": [ + "DAY" + ], + "title": "UsageLimitUnitV1", + "type": "string" + }, + "UsageLimitV1": { + "properties": { + "type": { + "$ref": "#/components/schemas/LimitTypeV1", + "description": "The type of the usage limit", + "examples": [ + "REQUEST", + "TOKEN" + ] + }, + "unit": { + "$ref": "#/components/schemas/UsageLimitUnitV1", + "description": "The unit of the usage limit", + "examples": [ + "DAY" + ] }, - "breakdown": { - "description": "Per-model usage breakdown", + "threshold": { + "description": "The threshold for the usage limit", + "examples": [ + 10000000 + ], + "minimum": 1, + "title": "Threshold", + "type": "integer" + } + }, + "required": [ + "type", + "unit", + "threshold" + ], + "title": "UsageLimitV1", + "type": "object" + }, + "GroupsResponseV1": { + "properties": { + "items": { + "description": "Items in this page.", "items": { - "$ref": "#/components/schemas/ModelApiItemV1" + "$ref": "#/components/schemas/GroupV1" }, - "title": "Breakdown", + "title": "Items", "type": "array" + }, + "pagination": { + "$ref": "#/components/schemas/PaginationResponseV1", + "description": "Pagination metadata for the page." } }, "required": [ - "subtotal", - "credits_used", - "total" + "items", + "pagination" ], - "title": "ModelApisUsageV1", + "title": "GroupsResponseV1", "type": "object" }, - "ResourceKind": { - "enum": [ - "MODEL_DEPLOYMENT", - "TRAINING_JOB", - "CHAINLET" + "CreateGroupRequestV1": { + "properties": { + "metadata": { + "$ref": "#/components/schemas/GroupMetadataV1", + "description": "Group identity + display metadata." + }, + "models": { + "description": "Per-model rate and usage limit configuration. Defines the group's complete model set. Must be non-empty.", + "items": { + "$ref": "#/components/schemas/ModelConfigV1" + }, + "title": "Models", + "type": "array" + }, + "hierarchy": { + "$ref": "#/components/schemas/GroupHierarchyV1", + "description": "Parent linkage and limit enforcement mode. Immutable after creation." + } + }, + "required": [ + "metadata", + "models", + "hierarchy" ], - "title": "ResourceKind", - "type": "string" + "title": "CreateGroupRequestV1", + "type": "object" }, - "TrainingItemV1": { + "UpdateGroupMetadataV1": { "properties": { - "billable_resource": { - "$ref": "#/components/schemas/BillableResourceV1", - "description": "The training job resource" - }, - "subtotal": { + "name": { "anyOf": [ { - "type": "number" + "maxLength": 255, + "type": "string" }, { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" + "type": "null" } ], - "description": "Subtotal cost in dollars for this billable resource", - "title": "Subtotal" - }, - "minutes": { - "description": "Total minutes used for this billable resource", - "title": "Minutes", - "type": "integer" - }, - "daily": { - "description": "Daily usage breakdown", - "items": { - "$ref": "#/components/schemas/DailyTrainingUsageV1" - }, - "title": "Daily", - "type": "array" + "default": null, + "description": "Optional display name for the group.", + "examples": [ + "Acme prod" + ], + "title": "Name" } }, - "required": [ - "billable_resource", - "subtotal", - "minutes" - ], - "title": "TrainingItemV1", + "title": "UpdateGroupMetadataV1", "type": "object" }, - "TrainingUsageV1": { + "UpdateGroupRequestV1": { "properties": { - "subtotal": { + "metadata": { "anyOf": [ { - "type": "number" + "$ref": "#/components/schemas/UpdateGroupMetadataV1" }, { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" + "type": "null" } ], - "description": "Subtotal cost in dollars after applying credits used", - "title": "Subtotal" + "default": null, + "description": "Mutable group metadata.", + "examples": [ + { + "name": "Acme Prod" + } + ] }, - "credits_used": { + "models": { "anyOf": [ { - "type": "number" + "items": { + "$ref": "#/components/schemas/ModelConfigV1" + }, + "type": "array" }, { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" + "type": "null" } ], - "description": "Credits applied in dollars", - "title": "Credits Used" + "default": null, + "description": "Per-model rate and usage limit configuration.", + "title": "Models" + } + }, + "title": "UpdateGroupRequestV1", + "type": "object" + }, + "GatewayKeyInfoV1": { + "properties": { + "prefix": { + "description": "The prefix of the Model API key.", + "title": "Prefix", + "type": "string" }, - "total": { + "name": { "anyOf": [ { - "type": "number" + "type": "string" }, { - "pattern": "^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$", - "type": "string" + "type": "null" } ], - "description": "Total cost in dollars", - "title": "Total" - }, - "minutes": { - "description": "Total minutes used", - "title": "Minutes", - "type": "integer" - }, - "breakdown": { - "description": "Per-job usage breakdown", + "default": null, + "description": "Optional display name.", + "title": "Name" + } + }, + "required": [ + "prefix" + ], + "title": "GatewayKeyInfoV1", + "type": "object" + }, + "KeysForGroupResponseV1": { + "properties": { + "items": { + "description": "Items in this page.", "items": { - "$ref": "#/components/schemas/TrainingItemV1" + "$ref": "#/components/schemas/GatewayKeyInfoV1" }, - "title": "Breakdown", + "title": "Items", "type": "array" + }, + "pagination": { + "$ref": "#/components/schemas/PaginationResponseV1", + "description": "Pagination metadata for the page." } }, "required": [ - "subtotal", - "credits_used", - "total", - "minutes" + "items", + "pagination" ], - "title": "TrainingUsageV1", + "title": "KeysForGroupResponseV1", "type": "object" }, - "UsageSummaryV1": { - "description": "Billing usage summary for the requested date range.", + "CreateApiKeyForGroupRequestV1": { "properties": { - "dedicated_usage": { + "name": { "anyOf": [ { - "$ref": "#/components/schemas/DedicatedUsageV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Dedicated model serving usage" + "description": "Optional display name for the new key.", + "examples": [ + "prod-key-1" + ], + "title": "Name" + } + }, + "title": "CreateApiKeyForGroupRequestV1", + "type": "object" + }, + "CreateApiKeyForGroupResponseV1": { + "properties": { + "api_key": { + "description": "Plaintext key string, returned exactly once.", + "title": "Api Key", + "type": "string" }, - "training_usage": { + "prefix": { + "description": "Key prefix (the part before the dot).", + "title": "Prefix", + "type": "string" + }, + "name": { "anyOf": [ { - "$ref": "#/components/schemas/TrainingUsageV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Training usage" - }, - "model_apis_usage": { + "description": "Display name of the key.", + "title": "Name" + } + }, + "required": [ + "api_key", + "prefix" + ], + "title": "CreateApiKeyForGroupResponseV1", + "type": "object" + }, + "RegisterAPIKeyRequestV1": { + "description": "Request to register a caller-supplied API key against an existing FederatedGroup.", + "properties": { + "name": { "anyOf": [ { - "$ref": "#/components/schemas/ModelApisUsageV1" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Model APIs usage" + "description": "Optional name for the Model API key", + "examples": [ + "my-model-api-key" + ], + "title": "Name" + }, + "key": { + "description": "Value of the API key to register", + "examples": [ + "my-secure-api-key-value" + ], + "title": "Key", + "type": "string" } }, - "title": "UsageSummaryV1", + "required": [ + "key" + ], + "title": "RegisterAPIKeyRequestV1", + "type": "object" + }, + "RegisterAPIKeyResponseV1": { + "properties": { + "ok": { + "description": "Whether the registration was successful", + "title": "Ok", + "type": "boolean" + } + }, + "required": [ + "ok" + ], + "title": "RegisterAPIKeyResponseV1", "type": "object" } }, "parameters": { + "secret_name": { + "schema": { + "type": "string" + }, + "name": "secret_name", + "in": "path", + "required": true + }, "team_id": { "schema": { "type": "string" @@ -9933,6 +15400,14 @@ "in": "path", "required": true }, + "chainlet_id": { + "schema": { + "type": "string" + }, + "name": "chainlet_id", + "in": "path", + "required": true + }, "training_project_id": { "schema": { "type": "string" @@ -9957,6 +15432,38 @@ "in": "path", "required": true }, + "trainer_id": { + "schema": { + "type": "string" + }, + "name": "trainer_id", + "in": "path", + "required": true + }, + "checkpoint_id": { + "schema": { + "type": "string" + }, + "name": "checkpoint_id", + "in": "path", + "required": true + }, + "run_id": { + "schema": { + "type": "string" + }, + "name": "run_id", + "in": "path", + "required": true + }, + "sampler_id": { + "schema": { + "type": "string" + }, + "name": "sampler_id", + "in": "path", + "required": true + }, "api_key_prefix": { "schema": { "type": "string" @@ -9980,6 +15487,22 @@ "name": "version_tag", "in": "path", "required": true + }, + "user_id": { + "schema": { + "type": "string" + }, + "name": "user_id", + "in": "path", + "required": true + }, + "group_id": { + "schema": { + "type": "string" + }, + "name": "group_id", + "in": "path", + "required": true } }, "securitySchemes": { diff --git a/tests/client/test_inference.py b/tests/client/test_inference.py index eced040..e729f54 100644 --- a/tests/client/test_inference.py +++ b/tests/client/test_inference.py @@ -30,7 +30,7 @@ def test_predict_production_sync() -> None: assert resp.root["result"] == 42 assert fake.capture.method == "POST" assert fake.capture.path == "/production/predict" - assert fake.capture.headers["authorization"] == "Api-Key test-key" + assert fake.capture.headers["authorization"] == "Bearer test-key" body = json.loads(fake.capture.body) assert body == {"input": "hello"} client.close() diff --git a/tests/client/test_management.py b/tests/client/test_management.py index 98df35d..ee9b390 100644 --- a/tests/client/test_management.py +++ b/tests/client/test_management.py @@ -48,7 +48,7 @@ def test_get_models_sync() -> None: assert resp.models[0].name == "my-model" assert fake.capture.method == "GET" assert fake.capture.path == "/v1/models" - assert fake.capture.headers["authorization"] == "Api-Key test-key" + assert fake.capture.headers["authorization"] == "Bearer test-key" client.close() @@ -74,7 +74,7 @@ async def test_get_models() -> None: assert resp.models[0].name == "my-model" assert fake.capture.method == "GET" assert fake.capture.path == "/v1/models" - assert fake.capture.headers["authorization"] == "Api-Key test-key" + assert fake.capture.headers["authorization"] == "Bearer test-key" await client.close() diff --git a/uv.lock b/uv.lock index 8755312..9edc5aa 100644 --- a/uv.lock +++ b/uv.lock @@ -75,13 +75,13 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "datamodel-code-generator", specifier = ">=0.55.0" }, + { name = "datamodel-code-generator", specifier = ">=0.55.0,<0.56" }, { name = "poethepoet", specifier = ">=0.35" }, { name = "pytest", specifier = ">=9.0.2" }, { name = "pytest-asyncio", specifier = ">=1.3.0" }, { name = "pyyaml", specifier = ">=6.0" }, - { name = "ruff", specifier = ">=0.15.8" }, - { name = "ty", specifier = ">=0.0.33" }, + { name = "ruff", specifier = ">=0.15.16" }, + { name = "ty", specifier = ">=0.0.43" }, ] [[package]] @@ -729,27 +729,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/14/b0/73cf7550861e2b4824950b8b52eebdcc5adc792a00c514406556c5b80817/ruff-0.15.8.tar.gz", hash = "sha256:995f11f63597ee362130d1d5a327a87cb6f3f5eae3094c620bcc632329a4d26e", size = 4610921, upload-time = "2026-03-26T18:39:38.675Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/92/c445b0cd6da6e7ae51e954939cb69f97e008dbe750cfca89b8cedc081be7/ruff-0.15.8-py3-none-linux_armv6l.whl", hash = "sha256:cbe05adeba76d58162762d6b239c9056f1a15a55bd4b346cfd21e26cd6ad7bc7", size = 10527394, upload-time = "2026-03-26T18:39:41.566Z" }, - { url = "https://files.pythonhosted.org/packages/eb/92/f1c662784d149ad1414cae450b082cf736430c12ca78367f20f5ed569d65/ruff-0.15.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d3e3d0b6ba8dca1b7ef9ab80a28e840a20070c4b62e56d675c24f366ef330570", size = 10905693, upload-time = "2026-03-26T18:39:30.364Z" }, - { url = "https://files.pythonhosted.org/packages/ca/f2/7a631a8af6d88bcef997eb1bf87cc3da158294c57044aafd3e17030613de/ruff-0.15.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6ee3ae5c65a42f273f126686353f2e08ff29927b7b7e203b711514370d500de3", size = 10323044, upload-time = "2026-03-26T18:39:33.37Z" }, - { url = "https://files.pythonhosted.org/packages/67/18/1bf38e20914a05e72ef3b9569b1d5c70a7ef26cd188d69e9ca8ef588d5bf/ruff-0.15.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdce027ada77baa448077ccc6ebb2fa9c3c62fd110d8659d601cf2f475858d94", size = 10629135, upload-time = "2026-03-26T18:39:44.142Z" }, - { url = "https://files.pythonhosted.org/packages/d2/e9/138c150ff9af60556121623d41aba18b7b57d95ac032e177b6a53789d279/ruff-0.15.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12e617fc01a95e5821648a6df341d80456bd627bfab8a829f7cfc26a14a4b4a3", size = 10348041, upload-time = "2026-03-26T18:39:52.178Z" }, - { url = "https://files.pythonhosted.org/packages/02/f1/5bfb9298d9c323f842c5ddeb85f1f10ef51516ac7a34ba446c9347d898df/ruff-0.15.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:432701303b26416d22ba696c39f2c6f12499b89093b61360abc34bcc9bf07762", size = 11121987, upload-time = "2026-03-26T18:39:55.195Z" }, - { url = "https://files.pythonhosted.org/packages/10/11/6da2e538704e753c04e8d86b1fc55712fdbdcc266af1a1ece7a51fff0d10/ruff-0.15.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d910ae974b7a06a33a057cb87d2a10792a3b2b3b35e33d2699fdf63ec8f6b17a", size = 11951057, upload-time = "2026-03-26T18:39:19.18Z" }, - { url = "https://files.pythonhosted.org/packages/83/f0/c9208c5fd5101bf87002fed774ff25a96eea313d305f1e5d5744698dc314/ruff-0.15.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2033f963c43949d51e6fdccd3946633c6b37c484f5f98c3035f49c27395a8ab8", size = 11464613, upload-time = "2026-03-26T18:40:06.301Z" }, - { url = "https://files.pythonhosted.org/packages/f8/22/d7f2fabdba4fae9f3b570e5605d5eb4500dcb7b770d3217dca4428484b17/ruff-0.15.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f29b989a55572fb885b77464cf24af05500806ab4edf9a0fd8977f9759d85b1", size = 11257557, upload-time = "2026-03-26T18:39:57.972Z" }, - { url = "https://files.pythonhosted.org/packages/71/8c/382a9620038cf6906446b23ce8632ab8c0811b8f9d3e764f58bedd0c9a6f/ruff-0.15.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:ac51d486bf457cdc985a412fb1801b2dfd1bd8838372fc55de64b1510eff4bec", size = 11169440, upload-time = "2026-03-26T18:39:22.205Z" }, - { url = "https://files.pythonhosted.org/packages/4d/0d/0994c802a7eaaf99380085e4e40c845f8e32a562e20a38ec06174b52ef24/ruff-0.15.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c9861eb959edab053c10ad62c278835ee69ca527b6dcd72b47d5c1e5648964f6", size = 10605963, upload-time = "2026-03-26T18:39:46.682Z" }, - { url = "https://files.pythonhosted.org/packages/19/aa/d624b86f5b0aad7cef6bbf9cd47a6a02dfdc4f72c92a337d724e39c9d14b/ruff-0.15.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8d9a5b8ea13f26ae90838afc33f91b547e61b794865374f114f349e9036835fb", size = 10357484, upload-time = "2026-03-26T18:39:49.176Z" }, - { url = "https://files.pythonhosted.org/packages/35/c3/e0b7835d23001f7d999f3895c6b569927c4d39912286897f625736e1fd04/ruff-0.15.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c2a33a529fb3cbc23a7124b5c6ff121e4d6228029cba374777bd7649cc8598b8", size = 10830426, upload-time = "2026-03-26T18:40:03.702Z" }, - { url = "https://files.pythonhosted.org/packages/f0/51/ab20b322f637b369383adc341d761eaaa0f0203d6b9a7421cd6e783d81b9/ruff-0.15.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:75e5cd06b1cf3f47a3996cfc999226b19aa92e7cce682dcd62f80d7035f98f49", size = 11345125, upload-time = "2026-03-26T18:39:27.799Z" }, - { url = "https://files.pythonhosted.org/packages/37/e6/90b2b33419f59d0f2c4c8a48a4b74b460709a557e8e0064cf33ad894f983/ruff-0.15.8-py3-none-win32.whl", hash = "sha256:bc1f0a51254ba21767bfa9a8b5013ca8149dcf38092e6a9eb704d876de94dc34", size = 10571959, upload-time = "2026-03-26T18:39:36.117Z" }, - { url = "https://files.pythonhosted.org/packages/1f/a2/ef467cb77099062317154c63f234b8a7baf7cb690b99af760c5b68b9ee7f/ruff-0.15.8-py3-none-win_amd64.whl", hash = "sha256:04f79eff02a72db209d47d665ba7ebcad609d8918a134f86cb13dd132159fc89", size = 11743893, upload-time = "2026-03-26T18:39:25.01Z" }, - { url = "https://files.pythonhosted.org/packages/15/e2/77be4fff062fa78d9b2a4dea85d14785dac5f1d0c1fb58ed52331f0ebe28/ruff-0.15.8-py3-none-win_arm64.whl", hash = "sha256:cf891fa8e3bb430c0e7fac93851a5978fc99c8fa2c053b57b118972866f8e5f2", size = 11048175, upload-time = "2026-03-26T18:40:01.06Z" }, +version = "0.15.16" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/bd/5f7ec371001337d8fa61701c186ff8b613ecac1651848c5950f4c4d5f2e9/ruff-0.15.16.tar.gz", hash = "sha256:d05e78d38c78caf020b03789e25106c93017db5a0cb6e2819885018c61343b78", size = 4714267, upload-time = "2026-06-04T16:33:09.974Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/42/53ef1c3953f157956db9bf7861e3bc50b9b887ce93300aa48cdba8336fe6/ruff-0.15.16-py3-none-linux_armv6l.whl", hash = "sha256:6ac3c0b3969cc6cf6b158c4e2f8f682acb58e7d700d8a44b65ecdc72d66ab0b2", size = 10709025, upload-time = "2026-06-04T16:32:51.935Z" }, + { url = "https://files.pythonhosted.org/packages/93/9a/a79159346f19134a956607754e57d8d128f7a4c00f4ad2f7514d224c172c/ruff-0.15.16-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:197c207ed75ffba54a0dec23db4aa939a27a3053073e085e0042433cbdc58e4a", size = 11063550, upload-time = "2026-06-04T16:32:42.24Z" }, + { url = "https://files.pythonhosted.org/packages/bc/72/3ce2ac000a5299ec238e01f51397b3b653c93b077d9b1bfe8715bb895f20/ruff-0.15.16-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3a39fec45ab316cc23e7558f23fea4a70403ddb5648ea9a4a3854a16973d0071", size = 10421345, upload-time = "2026-06-04T16:32:37.251Z" }, + { url = "https://files.pythonhosted.org/packages/b0/c2/cc7fad3ec9169373f5b6a18f1917b91080feec40c3f9658334a1d28e2f03/ruff-0.15.16-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba93191d79003116b95128c9d306e045200fdbd0bccb782b110f3cd1d4abc5cf", size = 10757217, upload-time = "2026-06-04T16:32:54.722Z" }, + { url = "https://files.pythonhosted.org/packages/69/d2/3474009eaa0a65b31fa7152a2fad5e2f050c640ceb1e6b02ee6922e94c82/ruff-0.15.16-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6ee4b90520630120ef032aa5cc10db483852dff950e78b1d717e2993a61ac8d", size = 10507035, upload-time = "2026-06-04T16:33:05.343Z" }, + { url = "https://files.pythonhosted.org/packages/ca/81/b7ae6ccbd11f0c8dc3d5d67fc4be9b57ff57ca86ba56152021378e1277f2/ruff-0.15.16-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e4215bc938bc3c8215c1472c1aa437e310fee20cd427335fec9d7e609563628", size = 11255291, upload-time = "2026-06-04T16:32:49.49Z" }, + { url = "https://files.pythonhosted.org/packages/d9/e1/46e526f1a7cc90857ce6ddf25fbb77eb6568651ac38d71b033af07076dd5/ruff-0.15.16-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c8d26be963b090f10e29abc8b3e74a2a321f6fa34e02424e30b5af89350ecbb", size = 12124922, upload-time = "2026-06-04T16:33:07.821Z" }, + { url = "https://files.pythonhosted.org/packages/1a/da/5c791b088b596b24d0deb967fa28ae02ad751a140c0b9ea81c5ab915d6c0/ruff-0.15.16-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f198cf4123602a2280ed46c307bcbafe41758d6fee5b456b6b6058ca1514b3b4", size = 11332186, upload-time = "2026-06-04T16:33:02.971Z" }, + { url = "https://files.pythonhosted.org/packages/72/11/5da87abe20047c8962361473923ebb2f62b595250126aadfad8c20649c1e/ruff-0.15.16-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb27515fa6240fb586ae82b901a59e67d24acff86f2190b433dc542fe0435aeb", size = 11373541, upload-time = "2026-06-04T16:32:47.007Z" }, + { url = "https://files.pythonhosted.org/packages/fe/2a/8554754c23a854ae3fd6b507e36ad61ddb121e298c6d5d617dec94ed0f14/ruff-0.15.16-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a267c46ba1593fc26b8eecbea050b39d40c0b6bb7781ee11c90a02cd10032951", size = 11353014, upload-time = "2026-06-04T16:32:34.795Z" }, + { url = "https://files.pythonhosted.org/packages/62/25/62ea41529ec89f742ea3fed9cb1059c72877ec7cf9b9e99ac9cf3294d1d9/ruff-0.15.16-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:528c68f39a91498a8d50e91ff5985df3d105782bab49cc378e73ac26bff083e8", size = 10737467, upload-time = "2026-06-04T16:32:26.348Z" }, + { url = "https://files.pythonhosted.org/packages/90/17/334d3ad9de4d40f9dd58fdd09e35ce64553bb501e2f19a839e2fb6be14fc/ruff-0.15.16-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7ed55c58950df60589a9a7a5d2f8fa5f54ebd287163be805adfe6ee95a9de123", size = 10521910, upload-time = "2026-06-04T16:32:32.54Z" }, + { url = "https://files.pythonhosted.org/packages/4d/bd/3ac7c6ae77a885c1004b3dda2446ea401768d24f851c14b4ad4b24f6639c/ruff-0.15.16-py3-none-musllinux_1_2_i686.whl", hash = "sha256:d482feaf51512b50f9790ceb417a56a61dd1e9d9bf967662b9ed27c01b34f53a", size = 10979190, upload-time = "2026-06-04T16:32:57.492Z" }, + { url = "https://files.pythonhosted.org/packages/33/d7/609546e6a413c3f216fbf2a50c928f97c80939154f6a0503114094a86191/ruff-0.15.16-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1e15bc8c94513dae2a40cc9ef07c94fdd4ecc9e29dabebeebe170f952322c9e3", size = 11477014, upload-time = "2026-06-04T16:32:44.687Z" }, + { url = "https://files.pythonhosted.org/packages/74/0d/f2cd247ad32633a5c36e97141a2c21b11c6279f7957bc2ff360b1e08fddd/ruff-0.15.16-py3-none-win32.whl", hash = "sha256:580378f7bd4aa25f72e74aa54948a9622f142b1e509521dd10902e886681cc1e", size = 10735541, upload-time = "2026-06-04T16:32:30.145Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9e/02e845ef151b1dee585e55c4739f8e1734ae1d9f1221dff65761c162208b/ruff-0.15.16-py3-none-win_amd64.whl", hash = "sha256:408256017284eddf98fff77b29aa4fb30f586042d535b2d9befc6512f400aaec", size = 11843403, upload-time = "2026-06-04T16:32:39.76Z" }, + { url = "https://files.pythonhosted.org/packages/15/19/016553f86f207450aebebc2b2b5088d086b901cc8186c02ac4284db3bd88/ruff-0.15.16-py3-none-win_arm64.whl", hash = "sha256:8cd61783afb39638a7133ef0d2dfb1e91277593962f81b5a8423eb0b888a6121", size = 11134555, upload-time = "2026-06-04T16:33:00.136Z" }, ] [[package]] @@ -808,26 +808,27 @@ wheels = [ [[package]] name = "ty" -version = "0.0.33" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/84/44/9478c50c266826c1bf30d1692e589755bffa8f1c0a3eb7af8a346c255991/ty-0.0.33.tar.gz", hash = "sha256:46d63bda07403322cb6c28ccfdd5536be916e13df725c29f7ccd0a21f06bd9e8", size = 5559373, upload-time = "2026-04-28T10:45:13.18Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/24/e287388c63a19191be26b32ff4dbd06029834068150ebe2532939bc4c851/ty-0.0.33-py3-none-linux_armv6l.whl", hash = "sha256:94d0a9d2234261a8911396d59e506b5923fe0971dbda43b9dcea287936887fcc", size = 11021308, upload-time = "2026-04-28T10:45:43.34Z" }, - { url = "https://files.pythonhosted.org/packages/00/ca/ba1eed819895bd239fba8ee35dfcd5fcb266c203b0914a17a59579096bb5/ty-0.0.33-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e4a2b5ba078f90de342f56b5f7979bb77c9b9b1d8625a041352ffc6ee93c4073", size = 10777272, upload-time = "2026-04-28T10:45:32.905Z" }, - { url = "https://files.pythonhosted.org/packages/25/a8/c3131d37b44b3fea1d6654a1c929a0cd0873822f77a90482b8ec28f6fbbd/ty-0.0.33-py3-none-macosx_11_0_arm64.whl", hash = "sha256:84ff5707825e9af9668d2bcf66975f93e520a63b524ab494e3a8265735be2563", size = 10201078, upload-time = "2026-04-28T10:45:23.374Z" }, - { url = "https://files.pythonhosted.org/packages/7b/db/d8e37ff0045810cc65e1ff36aa0da0a2253c05659787ac987df8a16c7897/ty-0.0.33-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e375285736f57886868e7af0b11c7b0ec5b6543fa15e7ad2a714fed9f077d4e0", size = 10732347, upload-time = "2026-04-28T10:45:21.444Z" }, - { url = "https://files.pythonhosted.org/packages/e0/1a/20e83a412506a918e4684fc67b567cf7cc13b105470b3428cb23c3d5aa13/ty-0.0.33-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5680f6350c3b4e46b8bff6d7bb132366ea239463d6cad4892725d06046e65464", size = 10808238, upload-time = "2026-04-28T10:45:38.565Z" }, - { url = "https://files.pythonhosted.org/packages/5d/4b/d0a39f4464dc6cb4cc2c159473ce216bd1846bfb684c0323a3cb36dce5c6/ty-0.0.33-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5535538bad8d0f7e62bcdff02197cdb30e41451d80b35d27e17d128f2e1dc5d", size = 11288348, upload-time = "2026-04-28T10:45:08.419Z" }, - { url = "https://files.pythonhosted.org/packages/35/7e/f1745e0f9583363d7a83d9a4990fc244f76ecc30840ddad83dc16a33c52d/ty-0.0.33-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:da196c42bbbc069e1e21e3e52107c061aa9660352dae57a41930690b56e2c02d", size = 11789907, upload-time = "2026-04-28T10:45:19.064Z" }, - { url = "https://files.pythonhosted.org/packages/a5/71/25f39f46a12d662859d45bc648555d0661044eb43db6b5648c9947487da9/ty-0.0.33-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9281672921ef6d4460e03146b5e6c18cb1a3e3a3b8a1a88f6f33226d05a469b7", size = 11500774, upload-time = "2026-04-28T10:45:48.012Z" }, - { url = "https://files.pythonhosted.org/packages/94/ec/136959ecbb7c71cb90537f5aea441c73f4ab24612868a6ecdc9d7444d32d/ty-0.0.33-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82c1b8f303f82da64e878108e764be3ecbcd7c9903ac0a7f7031614ed00b97ab", size = 11360314, upload-time = "2026-04-28T10:45:05.402Z" }, - { url = "https://files.pythonhosted.org/packages/cf/95/32809575c222f00beed498cb728e9290a0f5009f930025381bb7253b2206/ty-0.0.33-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:efe3af412c9ff67bce5fa37d0a2b0d8555c24072b145a5bac6c79637f1c83abe", size = 10707785, upload-time = "2026-04-28T10:45:10.836Z" }, - { url = "https://files.pythonhosted.org/packages/13/89/c8e9531f7aa4a093359e15fa32c8e1277fbbe90d16894d7c6032d29f4b34/ty-0.0.33-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:aeec29c91ea768601747da546c3efc20b72c2fb1bd52bcc786a5c6eeff51d27b", size = 10834987, upload-time = "2026-04-28T10:45:40.738Z" }, - { url = "https://files.pythonhosted.org/packages/31/16/9835fbcf5338af1a1917bd28fdb8a7193c210b83f243aa286fa9f79cb3ad/ty-0.0.33-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a535977c52bbb5f7e96b8b70a6ad375ad077f4a9ff2492508ea3816a2b403819", size = 10968968, upload-time = "2026-04-28T10:45:30.26Z" }, - { url = "https://files.pythonhosted.org/packages/36/69/64c76aabc1bc70c7f24b686cd93c3407f8ea430905e395f59bf9603ef571/ty-0.0.33-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1d732facf39fcb221ba279d469c5040d37883e964f123b1563888efd34818180", size = 11458077, upload-time = "2026-04-28T10:45:45.971Z" }, - { url = "https://files.pythonhosted.org/packages/91/84/fae27b0c4718776a298690d31ca4cc1995f2e3e1c63a7b59e84c41498e9a/ty-0.0.33-py3-none-win32.whl", hash = "sha256:d90960b574428dc252f85e8598ec5fcb7f619794196b2fc95a90da075ed4681c", size = 10345364, upload-time = "2026-04-28T10:45:16.836Z" }, - { url = "https://files.pythonhosted.org/packages/3c/a0/a2938b23ae3e1a09a2d7c189e2ac5f7113676bae4e0e23948b568e18e5f8/ty-0.0.33-py3-none-win_amd64.whl", hash = "sha256:c1c3aec62c44de610c6e95f0a4e97ac3dbc07934bfdbf1fd90d758c9ff72f48e", size = 11342470, upload-time = "2026-04-28T10:45:26.455Z" }, - { url = "https://files.pythonhosted.org/packages/ab/62/7fb948aace38d2f6329261bb33c035a8484549c74f1db28649c7a4c6fed9/ty-0.0.33-py3-none-win_arm64.whl", hash = "sha256:0d44f99ba1b441e55e2aa301b2ac0a21112784931b46a5f66f4ea9efe5620d97", size = 10742673, upload-time = "2026-04-28T10:45:35.555Z" }, +version = "0.0.43" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/37/4ec04de0659b93be37d956dfceca13b1ecab9c959f28d8a1d5e514603f36/ty-0.0.43.tar.gz", hash = "sha256:ea4cff50548f2a1877e848d3abe9e293cde8ab94757a7eb93fc0d4013f98be8e", size = 5798429, upload-time = "2026-06-04T00:52:10.013Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/74/1916026a78f20019a2f03adbd6fb4430ddb7ce1e52c2e17a90856a6d192e/ty-0.0.43-py3-none-linux_armv6l.whl", hash = "sha256:3bf70f5446480562bf6c9f639df4b5cb60716b8f8d1a6b8e5811d5c7eccd8bf2", size = 11598153, upload-time = "2026-06-04T00:52:20.646Z" }, + { url = "https://files.pythonhosted.org/packages/b9/af/58bb0089d2635216c8fa6612dd486a3f986d0ab1c46a41527ab95e57f0e3/ty-0.0.43-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7184741f8b15425a1bc64b950ad005cb353573288ac0e8a04f5481ceb3832596", size = 11357811, upload-time = "2026-06-04T00:52:24.683Z" }, + { url = "https://files.pythonhosted.org/packages/d6/9c/32c6b14f3feddf87b59c7a50709e2b3da408258f2f583f05575f77bc8f7b/ty-0.0.43-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8c306379ca9a35f6ae5270fe9bda7af4b46d91822725a2586d78c8b9b5493b62", size = 10772024, upload-time = "2026-06-04T00:52:14.312Z" }, + { url = "https://files.pythonhosted.org/packages/09/fa/98aa4a74bd00cd5efc424923cd1daffbf1e40a0338041cafb203379d746f/ty-0.0.43-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d624b884c9c1fd244ad2a5f026364e7162a22b3f537025941ada2e363e676414", size = 11291034, upload-time = "2026-06-04T00:52:37.249Z" }, + { url = "https://files.pythonhosted.org/packages/b5/db/4de086c38ce96dcada2bd451f43171d2c237f96d8ed19a1ea8fe51bb8ef4/ty-0.0.43-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:281fc4c00fbc196045141faa085055bddc58846b04a2800204701415a1b9c6aa", size = 11364724, upload-time = "2026-06-04T00:52:33.138Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d3/e3cd8e3233a6fd8362a49aa025b79e9f40151a2a86d811ace154c6eb7445/ty-0.0.43-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f57d6cc28de89024b48d1788e4758c05299d5749d4a51c02e71ac655ec23d9a5", size = 11890555, upload-time = "2026-06-04T00:52:22.711Z" }, + { url = "https://files.pythonhosted.org/packages/80/7b/6f46d444e8241606bbde098df3dca93f2ec0b834a42055db85ee7d33646f/ty-0.0.43-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a1d6ad6c5e7792c7eac0a01e550f2c2004462e01a64a91ea1636aba6fef6e71", size = 12450968, upload-time = "2026-06-04T00:52:28.94Z" }, + { url = "https://files.pythonhosted.org/packages/4a/e1/79fbe51f2e4b9d8347f2013cd7ed0b63f3b499038c02dc0357e9b28a3a47/ty-0.0.43-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:66d474395d7635fb618bdbb58b4e3360259a2056d0a5621b82754b9da2cd8a04", size = 12064187, upload-time = "2026-06-04T00:52:12.039Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3f/c758a3a8df5b90d331f2b60c8f16021ee64d75e78f99d67cc4efc9bf5f4b/ty-0.0.43-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2663a0003a8b60fb98db7f6f6e673df80b21d0fe3a9868a26fb06b4e049b6fc4", size = 11943208, upload-time = "2026-06-04T00:52:31.14Z" }, + { url = "https://files.pythonhosted.org/packages/54/5f/f516442749cf1b45ca6720a5d41df2738a486ed9ace774c03d515db89084/ty-0.0.43-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:d5a6c352d374d889189d5ec82b54b26a5885f769f7b7787f7f875500dcb8673e", size = 12143572, upload-time = "2026-06-04T00:52:18.457Z" }, + { url = "https://files.pythonhosted.org/packages/b7/bf/0d83c7f43bf4c10f3678bfe7d938e51c445298c7b923f155c5204730c2df/ty-0.0.43-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e7dbbeedfad3ca250d74fcc355fa9ab6b38d2a17f22d6304f615716939dbbb27", size = 11279355, upload-time = "2026-06-04T00:52:26.726Z" }, + { url = "https://files.pythonhosted.org/packages/3e/de/a6c978bef6d9e949f79f4782d9e4ee4df0893713e73b055d84c1a5116b9a/ty-0.0.43-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:24b18a0273ee46154996cfcfa27438f851f440c925587ec200df6f98dffe67d3", size = 11408412, upload-time = "2026-06-04T00:52:35.282Z" }, + { url = "https://files.pythonhosted.org/packages/ec/b1/d13857c23867f0f76b92e38e5841c64ca5e76dc5d4bf27f52cb81d8ab685/ty-0.0.43-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2ef681951520d692b7e9c0b5e56aacf4f98ccae47cf6ffccaf2c7b6b33dc226e", size = 11541709, upload-time = "2026-06-04T00:52:16.451Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f1/cd6afc6f6a687e238bf5e12189f7920e81a0bdef6c3dba4c784ef140f7d9/ty-0.0.43-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2af105de7437143aa4676b28016b5bee661aaaa4eff52be5867fb25119641ceb", size = 12041266, upload-time = "2026-06-04T00:52:43.541Z" }, + { url = "https://files.pythonhosted.org/packages/bd/ba/51ca7c3335da2b8d0a3e477fa4986be9f4a53b05bfab862967d8d2e6ca60/ty-0.0.43-py3-none-win32.whl", hash = "sha256:e4773115b0d6486ee30f1657fc8bdffe7e3a3f5300ab77ef2495da6e83e4694f", size = 10858724, upload-time = "2026-06-04T00:52:07.843Z" }, + { url = "https://files.pythonhosted.org/packages/9f/29/5d80453e5f7c520145fa058851da87230dbd7ca761a7675447a9fe504e0b/ty-0.0.43-py3-none-win_amd64.whl", hash = "sha256:48d3545094a4ae6395492c7e6ac90550fce969e0ed2815fbf8c5da9756676b7d", size = 11976157, upload-time = "2026-06-04T00:52:41.438Z" }, + { url = "https://files.pythonhosted.org/packages/dc/ed/befe5a543e5b95e754ed38ee95239e44efda9bc5f578db4ac1bc8dd758d6/ty-0.0.43-py3-none-win_arm64.whl", hash = "sha256:740ca33d7f75f655a4e7d475bc42dfb825c13219bb073fad30fcc04d35790c74", size = 11308680, upload-time = "2026-06-04T00:52:39.233Z" }, ] [[package]]