diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 18141129d9f..e30d118ef4e 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,4 +1,4 @@
contact_links:
- - name: "Join MindsDB Community"
- url: https://mindsdb.com/joincommunity
- about: Join our community on Slack for other questions and general chat
\ No newline at end of file
+ - name: "Join the MindsDB Discord"
+ url: https://mindshub.ai/discord
+ about: Join our Discord for questions and general chat
diff --git a/.github/ISSUE_TEMPLATE/integrations_contest.yaml b/.github/ISSUE_TEMPLATE/integrations_contest.yaml
deleted file mode 100644
index 8318873d296..00000000000
--- a/.github/ISSUE_TEMPLATE/integrations_contest.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-name: π§βπ§ Propose a new integration
-description: Share an idea for a new datasource or machine learning integration
-title: "[Integration]: "
-labels: [roadmap, integration]
-assignees:
--
-body:
-- type: markdown
- attributes:
- value: |
- Thanks for taking the time to share the new integration! Please fill out the form in English!
-- type: checkboxes
- attributes:
- label: Is there an existing integration?
- description: Please search to see if MindsDB already supports this integration.A list with supported integrations can be found [here](https://github.com/mindsdb/mindsdb#database-integrations).
- options:
- - label: I have searched the existing integrations.
- required: true
-- type: textarea
- attributes:
- label: Use Case
- description: Which use-cases does this solve?
- placeholder: |
- Why this integration will be usefull to users? What is the value of having this integration?
- validations:
- required: true
-- type: textarea
- attributes:
- label: Motivation
- description: How will we know that this has succeeded?
- placeholder: |
- Explain the proposed integration as though it was already implemented and you were explaining it to a user.
- validations:
- required: true
-- type: textarea
- attributes:
- label: Implementation
- description: Describe how this integration will work, with code, pseudo-code, mock-ups, text, or add diagrams
- validations:
- required: false
-- type: textarea
- attributes:
- label: Anything else?
- description: |
- Links? References? Anything that will give more context about this integration!
- validations:
- required: false
diff --git a/.github/workflows/build_deploy_dev.yml b/.github/workflows/build_deploy_dev.yml
index 78701c7b6da..cc9d25f7edd 100644
--- a/.github/workflows/build_deploy_dev.yml
+++ b/.github/workflows/build_deploy_dev.yml
@@ -77,6 +77,18 @@ jobs:
platforms: linux/amd64
push-cache: false
+ scan-keycloak:
+ runs-on: mdb-dev
+ needs: [ build ]
+ name: Scan cloud-cpu image
+ steps:
+ - uses: actions/checkout@v4
+ - uses: mindsdb/github-actions/snyk-docker-scan@main
+ with:
+ image: 168681354662.dkr.ecr.us-east-1.amazonaws.com/mindsdb:${{ github.event.pull_request.head.sha }}-cloud-cpu
+ snyk-token: ${{ secrets.SNYK_TOKEN }}
+ dockerfile: docker/mindsdb.Dockerfile
+
# Push cache layers to docker registry
# This is separate to the build step so we can do other stuff in parallel
build-cache:
diff --git a/.github/workflows/build_deploy_staging.yml b/.github/workflows/build_deploy_staging.yml
index d0f3b0c27b9..f580f8baa5c 100644
--- a/.github/workflows/build_deploy_staging.yml
+++ b/.github/workflows/build_deploy_staging.yml
@@ -2,6 +2,9 @@ name: Build and deploy to staging
permissions:
contents: read
+ pull-requests: write
+ pages: write
+ id-token: write
on:
# Using pull_request instead of push on main because we want access to the pull request's details via 'github.event'
@@ -10,7 +13,7 @@ on:
types:
- closed
branches:
- - 'develop'
+ - 'main'
- 'releases/*'
concurrency:
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 00000000000..2308aa164aa
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,38 @@
+name: Deploy SQL Docs to GitHub Pages
+
+on:
+ push:
+ branches: [main]
+ paths:
+ - 'docs/index.html'
+ workflow_dispatch:
+
+permissions:
+ contents: read
+ pages: write
+ id-token: write
+
+concurrency:
+ group: github-pages
+ cancel-in-progress: true
+
+jobs:
+ deploy:
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Configure Pages
+ uses: actions/configure-pages@v5
+
+ - name: Upload Pages artifact
+ uses: actions/upload-pages-artifact@v3
+ with:
+ path: docs/
+
+ - name: Deploy to GitHub Pages
+ id: deployment
+ uses: actions/deploy-pages@v4
diff --git a/.github/workflows/tests_unit.yml b/.github/workflows/tests_unit.yml
index 3a9455a7f76..876972852f7 100644
--- a/.github/workflows/tests_unit.yml
+++ b/.github/workflows/tests_unit.yml
@@ -23,17 +23,12 @@ env:
timescaledb
mssql
oracle
- slack
redshift
bigquery
- clickhouse
web
databricks
- github
- ms_teams
- statsforecast
- chromadb
- confluence
+ duckdb_faiss
+ openai
# We measure 80% on this handlers, as they are the verified
HANDLERS_TO_VERIFY: |
mysql
@@ -43,11 +38,9 @@ env:
timescaledb
mssql
oracle
- slack
file
redshift
bigquery
- confluence
COVERAGE_FAIL_UNDER: "80"
jobs:
@@ -163,8 +156,6 @@ jobs:
uv pip install ".[agents,kb]" \
-r requirements/requirements-test.txt \
"${HANDLER_EXTRAS[@]}"
- # Onuxruntime is required for ChromaDB, once we have default pgvector we can remove it
- uv pip install --force-reinstall onnxruntime==1.20.1
git clone --branch v$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2) https://github.com/mindsdb/mindsdb_sql_parser.git parser_tests
- name: Run unit tests
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a22cc2c206b..bc20563dfb7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@
Being part of the core MindsDB team is accessible to anyone who is motivated and wants to be part of that journey!
-Please see below how to contribute to the project, also refer to the contributing documentation.
+Please see below how to contribute to the project.
## How can you help us?
@@ -27,7 +27,7 @@ In general, we follow the "fork-and-pull" Git workflow.
> NOTE: Be sure to merge the latest from "upstream" before making a pull request! Also, make the PR to the `main` branch.
## Feature and Bug reports
-We use GitHub issues to track bugs and features. Report them by opening a [new issue](https://github.com/mindsdb/mindsdb/issues/new/choose) and fill out all of the required inputs.
+We use GitHub issues to track bugs and features. Report them by opening a [new issue](https://github.com/mindsdb/engine/issues) and fill out all of the required inputs.
## Code review process
@@ -35,12 +35,10 @@ The Pull Request reviews are done on a regular basis. Please, make sure you resp
## Community
-If you have additional questions or you want to chat with the MindsDB core team, please join our [Slack community](https://mindsdb.com/joincommunity) or post at [Github Discussions](https://github.com/mindsdb/mindsdb/discussions).
-
-To get updates on MindsDBβs latest announcements, releases, and events, sign up for our [Monthly Community Newsletter](https://mindsdb.com/newsletter/?utm_medium=community&utm_source=github&utm_campaign=mindsdb%20repo).
+If you have additional questions or you want to chat with the MindsDB core team, please join our [Discord](https://mindshub.ai/discord) or open a [GitHub issue](https://github.com/mindsdb/engine/issues).
-Join our mission of democratizing machine learning!
+Join our mission of making semantic search accessible to everyone who knows SQL!
## Contributor Code of Conduct
-Please note that this project is released with a [Contributor Code of Conduct](https://github.com/mindsdb/mindsdb/blob/main/CODE_OF_CONDUCT.md). By participating in this project, you agree to abide by its terms.
+Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project, you agree to abide by its terms.
diff --git a/Makefile b/Makefile
index 05cea89b906..75c05b7c647 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-PYTEST_ARGS = -v -rs --disable-warnings -n auto --dist loadfile
+PYTEST_ARGS = -v -xrs --disable-warnings -n 1 --dist loadfile
PYTEST_ARGS_DEBUG = --runslow -vs -rs
DSI_PYTEST_ARGS = --run-dsi-tests
DSI_REPORT_ARGS = --json-report --json-report-file=reports/report.json
@@ -87,4 +87,8 @@ unit_tests_debug:
env PYTHONPATH=./ pytest $(PYTEST_ARGS_DEBUG) tests/unit/executor/
pytest $(PYTEST_ARGS_DEBUG) --ignore=tests/unit/executor tests/unit/
+.PHONY: tests-artifacts
+tests-artifacts:
+ ./scripts/test-artifacts.sh
+
.PHONY: install_mindsdb install_handler precommit format run_mindsdb check build_docker run_docker integration_tests integration_tests_slow integration_tests_debug datasource_integration_tests datasource_integration_tests_debug unit_tests unit_tests_slow unit_tests_debug
diff --git a/README.md b/README.md
index 3ce7bd5bd15..6b455275e73 100644
--- a/README.md
+++ b/README.md
@@ -1,238 +1,189 @@
-
+
---
-MindsDB is a popular open-source query engine for AI analytics, powering AI agents that need to answer questions directly from databases, data warehouses, and applications, with no ETL required.
+MindsDB Query Engine connects to 200+ data sources β databases, warehouses, applications, files β and lets you query them live in one SQL dialect, with no ETL. Index unstructured content into [knowledge bases](https://mindsdb.github.io/engine#kb-overview), then search it by meaning, by keyword, or both at once, with plain SQL filters on top. Everything is reachable from any MySQL- or PostgreSQL-compatible client.
-
+> **Where this fits:** MindsDB now builds [MindsHub](https://mindshub.ai) β a hub for open AI agents. The Query Engine remains a standalone open-source project, and it pairs well with MindsHub agents: connect it to give an agent live, SQL-queryable access to your data and semantic search. The full story: [MindsHub vs MindsDB](https://mindshub.ai/mindshub-vs-mindsdb).
-## What you can build with MindsDB Query Engine
+## How it works
+
+```
+ MySQL clients Β· PostgreSQL clients Β· BI tools Β· ORMs Β· HTTP API
+ β
+ ββββββββββββββββΌββββββββββββββββ
+ β MindsDB Query Engine β
+ β one SQL dialect over β
+ β a federated query planner β
+ ββββββββββββββββ¬ββββββββββββββββ
+ β
+ βββββββββββββββββββββββΌββββββββββββββββββββββ
+ β β β
+ βββββββββββΌββββββββββ βββββββββββΌββββββββββ βββββββββββΌββββββββββ
+ β Databases β β Apps & files β β Knowledge bases β
+ β Postgres, MySQL, β β Slack, web crawlerβ β embeddings + β
+ β MongoDB, Snowflakeβ β docs, sheets, β β vector store + β
+ β BigQuery, S3, β¦ β β email, calendarsβ¦ β β BM25 index β
+ βββββββββββββββββββββ βββββββββββββββββββββ βββββββββββββββββββββ
+ queried live, in place β data is never copied
+```
+
+- **One server, three interfaces.** The engine ships a built-in SQL editor on HTTP (`:47334`) and speaks the MySQL (`:47335`) and PostgreSQL (`:47336`) wire protocols β so `mysql`, `psql`, DBeaver, SQLAlchemy, or any BI tool [connects directly](https://mindsdb.github.io/engine#setup-clients).
+- **Federated queries, no pipelines.** [`CREATE DATABASE`](https://mindsdb.github.io/engine#db-create) attaches a live data source through an integration handler. The planner translates each query, pushes work down to the source, and streams results back β your data stays where it is. Source-specific syntax is still available via [native queries](https://mindsdb.github.io/engine#native-queries).
+- **Knowledge bases are the semantic layer.** A [knowledge base](https://mindsdb.github.io/engine#kb-overview) combines an embedding model, an optional reranking model, and a vector store (e.g. pgvector). `INSERT INTO` it to chunk, embed, and index content; `SELECT` from it to retrieve by meaning, filtered by metadata columns like any other table.
+- **Hybrid retrieval.** [Hybrid search](https://mindsdb.github.io/engine#kb-hybrid) runs vector similarity and BM25 keyword matching in parallel and merges the results β for queries that mix natural language with exact identifiers, codes, or acronyms.
+- **Organize and automate.** [Projects](https://mindsdb.github.io/engine#proj-create) namespace your work, [views](https://mindsdb.github.io/engine#view-create) save cross-source transformations, and [jobs](https://mindsdb.github.io/engine#job-create) schedule any SQL to run on an interval β e.g. to keep knowledge bases fresh.
+
+## Quick start
+
+Run with [Docker](https://mindsdb.github.io/engine#setup-docker):
-| CONVERSATIONAL ANALYTICS AGENTS | SEMANTIC SEARCH AGENTS |
-| --- | --- |
-| Get precise, data-driven answers using natural language.
Unify and query data across sources (MySQL, Salesforce, Shopify, etc.), without ETL.
Watch video | Ground LLM responses in your most relevant internal knowledge.
Search across unstructured sources like documents, support tickets, Google Drive, and more.
Watch video |
-
-## How MindsDB works
-
-MindsDB follows a simple workflow: **Connect β Unify β Respond**. At the center is an SQL-compatible data language with additional constructs for searching unstructured data, managing workflows (jobs/triggers), and building agents.
-
-
- Autonomous reasoning: Deploy agents that blend and retrieve data points across your stack to produce grounded answers.
-
-
-
-
-## Setup
-
-Users can install MindsDB via Docker, Docker Extension, or PyPI.
-
-Here is how to pull and run MindsDB via Docker:
```bash
docker run --name mindsdb_container \
--e MINDSDB_APIS=http,mysql \
--p 47334:47334 -p 47335:47335 \
-mindsdb/mindsdb:latest
+ -e MINDSDB_APIS=http,mysql \
+ -p 47334:47334 -p 47335:47335 \
+ mindsdb/mindsdb
```
-## Usage
+Or install from [PyPI](https://mindsdb.github.io/engine#setup-pip):
+
+```bash
+pip install mindsdb # add extras as needed, e.g. mindsdb[pgvector,openai,postgres]
+python -m mindsdb
+```
-**Follow the quickstart guide to get started with MindsDB using our demo data.**
+Then open the editor at `http://127.0.0.1:47334`, or connect any MySQL client to port `47335`. The [quickstart](https://mindsdb.github.io/engine#quickstart) walks through the rest.
+
+## From zero to semantic search
+
+Six SQL statements, start to finish. Full syntax for every statement is in the [SQL reference](https://mindsdb.github.io/engine).
+
+**1. Attach your data sources** ([docs](https://mindsdb.github.io/engine#db-create)) β they are queried live, nothing is imported:
-Retrieve and analyze data from over 200 data sources in one SQL dialect. For AI agents, this means faster response time, better accuracy, and lower token consumption.
```sql
---use SQL to aggregate pipeline data from Salesforce
-SELECT SUM(ExpectedRevenue) AS open_pipeline
-FROM salesforce.opportunities
-WHERE close_date >= CURDATE()
-
---use the same dialect to retrieve even from a non-SQL database, like MondoDB
-SELECT COUNT(*) AS negative_emails_last_30_days
-FROM mongodb.support_tickets
-WHERE sentiment = 'negative'
- AND created_at >= CURRENT_DATE - INTERVAL '30 days';
+CREATE DATABASE my_pg
+WITH ENGINE = 'postgres',
+PARAMETERS = {
+ "host": "localhost", "port": 5432,
+ "user": "user", "password": "pass",
+ "database": "mydb"
+};
+
+CREATE DATABASE my_mongo
+WITH ENGINE = 'mongodb',
+PARAMETERS = {
+ "host": "mongodb+srv://user:pass@cluster.example.net",
+ "database": "support"
+};
```
-Create views and join data even from different types of data systems.
+**2. Query across sources in one dialect** ([docs](https://mindsdb.github.io/engine#sql-join)) β even non-SQL stores like MongoDB, and save the result as a [view](https://mindsdb.github.io/engine#view-create):
+
```sql
---join MongoDB and Salesforce data
-CREATE VIEW risky_renewals AS (
-SELECT *
-FROM mongodb.support_tickets AS reviews
-JOIN salesforce.opportunities AS deals
- ON reviews.customer_domain = deals.customer_domain
-WHERE deals.type = "renewal"
- AND reviews.sentiment = "negative"
+CREATE VIEW open_tickets_by_product AS (
+ SELECT p.name, COUNT(t.ticket_id) AS open_tickets
+ FROM my_mongo.support_tickets AS t
+ JOIN my_pg.products AS p
+ ON t.product_id = p.id
+ WHERE t.status = 'open'
+ GROUP BY p.name
);
```
-Join vectorized and structured data inside a knowledge base. Combine semantic search with precise metadata criteria in a single SQL query.
+**3. Create a knowledge base** ([docs](https://mindsdb.github.io/engine#kb-create)) β an embedding model plus a vector store, addressable as a table:
+
```sql
---create a knowledge base for customer issues
-CREATE KNOWLEDGE_BASE customers_issues
+CREATE KNOWLEDGE_BASE support_kb
USING
- storage = my_vector.db,
- content_columns = ['ticket_description'];
- metadata_columns = ['customer_name', 'segment', 'revenue', 'is_pending_renewal'];
-
---find large customers who submitted ticket related to data security topics
-SELECT * FROM customers_issues
-WHERE content = 'data security'
-AND
- is_pending_renewal = 'true'.
- revenue > 1000000;
+ embedding_model = {
+ "provider": "openai",
+ "model_name": "text-embedding-3-large",
+ "api_key": "sk-..."
+ },
+ storage = my_pgvector.support_kb_store, -- a pgvector connection
+ content_columns = ['subject', 'body'],
+ metadata_columns = ['product_name', 'priority', 'created_at'],
+ id_column = 'ticket_id';
```
-Use MindsDB pre-packaged data agents and connect them with your own. See how to use MindsDB via API or MCP.
+**4. Index your content** ([docs](https://mindsdb.github.io/engine#kb-insert)) β rows are chunked, embedded, and upserted:
+
```sql
-CREATE AGENT my_agent
-USING
- model = {
- "provider": "openai",
- "model_name" : "gpt-xx",
- "api_key": "sk-..."
- },
- data = {
- "knowledge_bases": ["mindsdb.customer_issues"],
- "tables": ["salesforce.opportunities", "postgres.sales", "mongodb.support_tickets"]
- },
- prompt_template = 'my prompt template and agent guidance';
+INSERT INTO support_kb
+ SELECT ticket_id, subject, body, product_name, priority, created_at
+ FROM my_mongo.support_tickets;
```
-See MindsDBβs recommended usage of agents here and how to automate workflows with jobs.
-
-## π Tutorials
-- Enterprise Knowledge Search (example)
-- Advanced Semantic Search (example)
-- Customer Support Automation (example1, example2)
-- Intelligent Content Discovery (example)
-- Financial Analysis Agents (example)
-- Real-time AI-powered analytics (example)
-- Conversational Data Assistants (example)
-- CRM Intelligence (example)
-- Compliance & Customer Intelligence (example)
-- Conversation Intelligence (example)
-Subscribe to our (blog) for more
-
-## π«΄ Help and support
-
-Stuck on a query? Found a bug? Weβre here to help.
-
- Open a GitHub Issue. Please include reproduction steps!
-
-
-
-
- Get commercial support
-
-
- Contact the MindsDB Team for enterprise SLAs and custom solutions.
-
-
-
-
-**Security Note:** If you find a security vulnerability, please do not open a public issue. Refer to our security policy for reporting instructions.
-
-## π€ Contribute to MindsDB
-
-MindsDB is open source and contributions are welcome! You can submit code changes through pull requests or by opening issues to report bugs, suggest new features, or enhancements.
-
-**Ways you can help:**
-- Develop a database integration
-- Develop an app integration
-- Identify and fix bugs
-
-**How to contribute**
-
-- Read the contribution guide to get set up.
-- Browse open issues.
-- Join the #contributors channel in Slack.
-- Explore community rewards and programs.
-
+**5. Search by meaning, filter by metadata** ([docs](https://mindsdb.github.io/engine#kb-query)):
-Our top 100 contributors
+```sql
+SELECT chunk_content, product_name, relevance
+FROM support_kb
+WHERE content = 'cannot connect after the latest update'
+ AND priority <= 2
+ AND relevance >= 0.5
+LIMIT 10;
+
+-- hybrid search: blend vector similarity with BM25 keyword matching
+SELECT *
+FROM support_kb
+WHERE content = 'error ERR-4421'
+ AND hybrid_search = true;
+```
-
-
-
-
-Made with [contrib.rocks](https://contrib.rocks)
-
+βΆ [How to use semantic search with metadata filters](https://www.youtube.com/watch?v=HN4fHtS4mvo) β a good explainer of this feature.
+
+**6. Keep the index fresh with a job** ([docs](https://mindsdb.github.io/engine#job-create)):
+
+```sql
+CREATE JOB refresh_support_kb (
+ INSERT INTO support_kb
+ SELECT ticket_id, subject, body, product_name, priority, created_at
+ FROM my_mongo.support_tickets
+ WHERE created_at > LAST
+)
+EVERY hour;
+```
+
+## Help and support
+
+| You need | Go to |
+| --- | --- |
+| Ask a question | [Discord](https://mindshub.ai/discord) |
+| Report a bug | [GitHub Issues](https://github.com/mindsdb/engine/issues) β please include reproduction steps |
+| Commercial support | [Contact the team](https://mindshub.ai/contact) |
+
+**Security note:** if you find a vulnerability, please do not open a public issue β follow our [security policy](https://github.com/mindsdb/engine/security) instead.
+
+## Contributing
+
+Contributions are welcome β code, integrations, docs, and bug reports alike. We follow the fork-and-pull workflow: see the [contribution guide](CONTRIBUTING.md) to get set up, and browse the [open issues](https://github.com/mindsdb/engine/issues) for somewhere to start. Good first areas are new integration handlers, bug fixes, and documentation improvements.
+
+## Resources
+
+- [Documentation](https://mindsdb.github.io/engine)
+- [MindsHub β open AI agents, from the same team](https://mindshub.ai)
+- [MindsHub vs MindsDB β how the product evolved](https://mindshub.ai/mindshub-vs-mindsdb)
+- [Discord](https://mindshub.ai/discord)
+- [Contact](https://mindshub.ai/contact)
+
+## License
-## π Resources
-- Documentation
-- Blog
-- Events
-- Community Slack
-- Brand guidelines
-- Contact form
+MindsDB Core is licensed under the [Elastic License 2.0](LICENSE); some directories carry their own license β see the [LICENSE](LICENSE) file for the full structure.
diff --git a/assets/contributions-agreement/signatures/cla.json b/assets/contributions-agreement/signatures/cla.json
index dc0e2328551..b3dabe85607 100644
--- a/assets/contributions-agreement/signatures/cla.json
+++ b/assets/contributions-agreement/signatures/cla.json
@@ -5831,6 +5831,1054 @@
"created_at": "2023-10-30T12:46:04Z",
"repoId": 143328315,
"pullRequestNo": 8163
+ },
+ {
+ "name": "minakshisharma197",
+ "id": 184736207,
+ "comment_id": 2413433683,
+ "created_at": "2024-10-15T09:55:40Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9865
+ },
+ {
+ "name": "divyakhatiyan",
+ "id": 141419850,
+ "comment_id": 2417330560,
+ "created_at": "2024-10-16T16:28:34Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9899
+ },
+ {
+ "name": "Sekhar-Kumar-Dash",
+ "id": 119131588,
+ "comment_id": 2419495274,
+ "created_at": "2024-10-17T13:05:15Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9914
+ },
+ {
+ "name": "kom-senapati",
+ "id": 92045934,
+ "comment_id": 2423485137,
+ "created_at": "2024-10-19T02:28:49Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9807
+ },
+ {
+ "name": "RiyanaD",
+ "id": 117534139,
+ "comment_id": 2420766574,
+ "created_at": "2024-10-17T22:54:53Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9427
+ },
+ {
+ "name": "narengogi",
+ "id": 47327611,
+ "comment_id": 2296396377,
+ "created_at": "2024-08-19T11:55:54Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9641
+ },
+ {
+ "name": "PatLittle",
+ "id": 31454591,
+ "comment_id": 2425743649,
+ "created_at": "2024-10-21T06:49:47Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9962
+ },
+ {
+ "name": "panoskyriakis",
+ "id": 134383572,
+ "comment_id": 2317914456,
+ "created_at": "2024-08-29T14:39:56Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9654
+ },
+ {
+ "name": "lucas-koontz",
+ "id": 7515210,
+ "comment_id": 2428585608,
+ "created_at": "2024-10-22T08:19:54Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9976
+ },
+ {
+ "name": "Tryxns",
+ "id": 10586708,
+ "comment_id": 2433530462,
+ "created_at": "2024-10-23T21:51:00Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9975
+ },
+ {
+ "name": "DhanushNehru",
+ "id": 22955675,
+ "comment_id": 2438155935,
+ "created_at": "2024-10-25T15:40:09Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10047
+ },
+ {
+ "name": "TalaatHasanin",
+ "id": 105648065,
+ "comment_id": 2439488990,
+ "created_at": "2024-10-26T10:54:04Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9726
+ },
+ {
+ "name": "AkashJana18",
+ "id": 103350981,
+ "comment_id": 2442254462,
+ "created_at": "2024-10-28T17:52:47Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10073
+ },
+ {
+ "name": "prajwal-pai77",
+ "id": 108796209,
+ "comment_id": 2445980761,
+ "created_at": "2024-10-30T06:33:47Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10039
+ },
+ {
+ "name": "JanumalaAkhilendra",
+ "id": 82641474,
+ "comment_id": 2446791257,
+ "created_at": "2024-10-30T11:43:16Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10051
+ },
+ {
+ "name": "herjanice",
+ "id": 72483795,
+ "comment_id": 2370891577,
+ "created_at": "2024-09-24T10:33:26Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9727
+ },
+ {
+ "name": "mabderrahim",
+ "id": 20402768,
+ "comment_id": 2377340466,
+ "created_at": "2024-09-26T15:48:00Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9727
+ },
+ {
+ "name": "mohamed-abderrahim3",
+ "id": 183199390,
+ "comment_id": 2380593605,
+ "created_at": "2024-09-28T10:19:38Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9727
+ },
+ {
+ "name": "chuangyeshuo",
+ "id": 14370480,
+ "comment_id": 2449017804,
+ "created_at": "2024-10-31T05:05:38Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10099
+ },
+ {
+ "name": "md-abid-hussain",
+ "id": 101964499,
+ "comment_id": 2449303679,
+ "created_at": "2024-10-31T08:26:54Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10100
+ },
+ {
+ "name": "poisonvine",
+ "id": 179939949,
+ "comment_id": 2408223847,
+ "created_at": "2024-10-11T23:08:39Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9833
+ },
+ {
+ "name": "code-vine",
+ "id": 95056519,
+ "comment_id": 2408235943,
+ "created_at": "2024-10-11T23:31:03Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9833
+ },
+ {
+ "name": "poisonvine",
+ "id": 179939949,
+ "comment_id": 2463687190,
+ "created_at": "2024-11-08T03:30:34Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9833
+ },
+ {
+ "name": "vishwamartur",
+ "id": 64204611,
+ "comment_id": 2480506920,
+ "created_at": "2024-11-16T10:24:05Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10176
+ },
+ {
+ "name": "UTSAVS26",
+ "id": 119779889,
+ "comment_id": 2482548112,
+ "created_at": "2024-11-18T10:15:35Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10182
+ },
+ {
+ "name": "fshabashev",
+ "id": 6548211,
+ "comment_id": 2482924022,
+ "created_at": "2024-11-18T12:36:59Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10153
+ },
+ {
+ "name": "GTgyani206",
+ "id": 128274569,
+ "comment_id": 2407637789,
+ "created_at": "2024-10-11T15:20:07Z",
+ "repoId": 143328315,
+ "pullRequestNo": 9832
+ },
+ {
+ "name": "QuantumPlumber",
+ "id": 44450703,
+ "comment_id": 2521508302,
+ "created_at": "2024-12-05T21:39:15Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10243
+ },
+ {
+ "name": "Abdusshh",
+ "id": 101020733,
+ "comment_id": 2525127867,
+ "created_at": "2024-12-07T13:40:48Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10253
+ },
+ {
+ "name": "cliffordp",
+ "id": 1812179,
+ "comment_id": 2540449382,
+ "created_at": "2024-12-13T03:21:48Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10285
+ },
+ {
+ "name": "abhirajadhikary06",
+ "id": 171187625,
+ "comment_id": 2563775672,
+ "created_at": "2024-12-27T14:55:52Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10331
+ },
+ {
+ "name": "jbrass",
+ "id": 125982,
+ "comment_id": 2587312474,
+ "created_at": "2025-01-13T14:50:21Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10355
+ },
+ {
+ "name": "dj013",
+ "id": 47425755,
+ "comment_id": 2593267189,
+ "created_at": "2025-01-15T15:43:10Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10371
+ },
+ {
+ "name": "juliette0704",
+ "id": 91728573,
+ "comment_id": 2609377887,
+ "created_at": "2025-01-23T10:01:31Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10395
+ },
+ {
+ "name": "ivancastanop",
+ "id": 107499323,
+ "comment_id": 2598203208,
+ "created_at": "2025-01-17T11:55:12Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10379
+ },
+ {
+ "name": "rdonato",
+ "id": 128521,
+ "comment_id": 2643683251,
+ "created_at": "2025-02-07T18:22:51Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10444
+ },
+ {
+ "name": "SoNiC-HeRE",
+ "id": 96797205,
+ "comment_id": 2654003700,
+ "created_at": "2025-02-12T15:10:05Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10460
+ },
+ {
+ "name": "guspan-tanadi",
+ "id": 36249910,
+ "comment_id": 2675814807,
+ "created_at": "2025-02-21T23:28:45Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10465
+ },
+ {
+ "name": "arashaomrani",
+ "id": 20032520,
+ "comment_id": 2705110135,
+ "created_at": "2025-03-06T22:46:52Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10544
+ },
+ {
+ "name": "kevinrawal",
+ "id": 84058124,
+ "comment_id": 2708288010,
+ "created_at": "2025-03-08T13:33:56Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10550
+ },
+ {
+ "name": "MR901",
+ "id": 20877166,
+ "comment_id": 2788354723,
+ "created_at": "2025-04-09T05:54:32Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10681
+ },
+ {
+ "name": "pnewsam",
+ "id": 22651415,
+ "comment_id": 2813745881,
+ "created_at": "2025-04-17T18:35:55Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10736
+ },
+ {
+ "name": "emmanuel-ferdman",
+ "id": 35470921,
+ "comment_id": 2816053850,
+ "created_at": "2025-04-18T19:17:39Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10739
+ },
+ {
+ "name": "Konstantinos-10",
+ "id": 161840728,
+ "comment_id": 2833463268,
+ "created_at": "2025-04-27T13:35:09Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10761
+ },
+ {
+ "name": "NikosLaspias",
+ "id": 148558723,
+ "comment_id": 2834255670,
+ "created_at": "2025-04-28T07:38:11Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10760
+ },
+ {
+ "name": "jzs1997",
+ "id": 29564670,
+ "comment_id": 2840686847,
+ "created_at": "2025-04-30T03:07:12Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10776
+ },
+ {
+ "name": "HarshaVardhanMannem",
+ "id": 144146034,
+ "comment_id": 2896453670,
+ "created_at": "2025-05-21T03:28:49Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10861
+ },
+ {
+ "name": "arun-prasath2005",
+ "id": 84761066,
+ "comment_id": 2906488930,
+ "created_at": "2025-05-24T06:10:22Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10882
+ },
+ {
+ "name": "vmanikanta07",
+ "id": 117996904,
+ "comment_id": 2906811274,
+ "created_at": "2025-05-24T12:37:42Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10885
+ },
+ {
+ "name": "omerc7",
+ "id": 32813109,
+ "comment_id": 2908711653,
+ "created_at": "2025-05-26T06:34:46Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10895
+ },
+ {
+ "name": "trickster026",
+ "id": 212937700,
+ "comment_id": 2910591816,
+ "created_at": "2025-05-26T20:34:08Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10903
+ },
+ {
+ "name": "ivanvza",
+ "id": 8543825,
+ "comment_id": 2911844022,
+ "created_at": "2025-05-27T09:31:36Z",
+ "repoId": 143328315,
+ "pullRequestNo": 10900
+ },
+ {
+ "name": "Joystonm",
+ "id": 116254639,
+ "comment_id": 2965183033,
+ "created_at": "2025-06-12T05:37:40Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11070
+ },
+ {
+ "name": "noname4life",
+ "id": 77653287,
+ "comment_id": 2983573198,
+ "created_at": "2025-06-18T10:07:09Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11117
+ },
+ {
+ "name": "D1m7asis",
+ "id": 80602676,
+ "comment_id": 2985345244,
+ "created_at": "2025-06-18T18:42:15Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11124
+ },
+ {
+ "name": "Alex-xd",
+ "id": 11256006,
+ "comment_id": 2999207900,
+ "created_at": "2025-06-24T07:50:23Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11160
+ },
+ {
+ "name": "PriyanshuPz",
+ "id": 112266318,
+ "comment_id": 3000590454,
+ "created_at": "2025-06-24T13:51:44Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11163
+ },
+ {
+ "name": "rawathemant246",
+ "id": 99639231,
+ "comment_id": 2999067598,
+ "created_at": "2025-06-24T06:59:35Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11159
+ },
+ {
+ "name": "aryanmalik-iet",
+ "id": 187411120,
+ "comment_id": 3007270696,
+ "created_at": "2025-06-26T06:24:35Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11186
+ },
+ {
+ "name": "iabhi4",
+ "id": 61010675,
+ "comment_id": 3017197726,
+ "created_at": "2025-06-29T22:22:14Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11212
+ },
+ {
+ "name": "dotWee",
+ "id": 8060356,
+ "comment_id": 3072932250,
+ "created_at": "2025-07-15T09:45:05Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11300
+ },
+ {
+ "name": "buallen",
+ "id": 54055907,
+ "comment_id": 3078683990,
+ "created_at": "2025-07-16T13:40:32Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11234
+ },
+ {
+ "name": "Raahim-Lone",
+ "id": 175012415,
+ "comment_id": 3120439531,
+ "created_at": "2025-07-25T21:35:44Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11365
+ },
+ {
+ "name": "kaizenjinco",
+ "id": 78314961,
+ "comment_id": 3124537097,
+ "created_at": "2025-07-27T16:53:30Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11367
+ },
+ {
+ "name": "huang-x-h",
+ "id": 381860,
+ "comment_id": 3132498852,
+ "created_at": "2025-07-29T13:16:29Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11126
+ },
+ {
+ "name": "aperepel",
+ "id": 119367,
+ "comment_id": 3137657308,
+ "created_at": "2025-07-30T20:03:35Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11385
+ },
+ {
+ "name": "abhayasr",
+ "id": 108477628,
+ "comment_id": 3164476409,
+ "created_at": "2025-08-07T14:39:49Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11291
+ },
+ {
+ "name": "logan-mo",
+ "id": 63550599,
+ "comment_id": 3167373652,
+ "created_at": "2025-08-08T10:27:53Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11414
+ },
+ {
+ "name": "kylediaz",
+ "id": 35979917,
+ "comment_id": 3180690963,
+ "created_at": "2025-08-12T19:21:02Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11427
+ },
+ {
+ "name": "Kenxpx",
+ "id": 155082290,
+ "comment_id": 3194287003,
+ "created_at": "2025-08-17T10:15:06Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11450
+ },
+ {
+ "name": "Nancy9ice",
+ "id": 103530451,
+ "comment_id": 3197557060,
+ "created_at": "2025-08-18T16:11:20Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11453
+ },
+ {
+ "name": "Matvey-Kuk",
+ "id": 3284841,
+ "comment_id": 3197947416,
+ "created_at": "2025-08-18T18:18:26Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11452
+ },
+ {
+ "name": "louisneal",
+ "id": 47094728,
+ "comment_id": 3222541351,
+ "created_at": "2025-08-26T04:06:55Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11478
+ },
+ {
+ "name": "sejubar",
+ "id": 154475559,
+ "comment_id": 3240009269,
+ "created_at": "2025-08-31T09:59:19Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11495
+ },
+ {
+ "name": "sudsmenon",
+ "id": 11342520,
+ "comment_id": 3250743797,
+ "created_at": "2025-09-03T20:48:18Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11510
+ },
+ {
+ "name": "TaniyaKatigar",
+ "id": 214086943,
+ "comment_id": 3262560837,
+ "created_at": "2025-09-06T16:30:40Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11530
+ },
+ {
+ "name": "GeorgeGithiri5",
+ "id": 46107866,
+ "comment_id": 3269367783,
+ "created_at": "2025-09-09T07:49:06Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11541
+ },
+ {
+ "name": "gauiPPP",
+ "id": 43440362,
+ "comment_id": 3284159007,
+ "created_at": "2025-09-12T07:46:21Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11554
+ },
+ {
+ "name": "morningman",
+ "id": 2899462,
+ "comment_id": 3293544413,
+ "created_at": "2025-09-15T19:07:52Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11574
+ },
+ {
+ "name": "sadiqkhzn",
+ "id": 24961132,
+ "comment_id": 3312201690,
+ "created_at": "2025-09-19T13:26:49Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11596
+ },
+ {
+ "name": "yumosx",
+ "id": 141902143,
+ "comment_id": 3322908961,
+ "created_at": "2025-09-23T08:21:07Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11605
+ },
+ {
+ "name": "aimurphy",
+ "id": 36110273,
+ "comment_id": 3335211124,
+ "created_at": "2025-09-25T17:38:00Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11618
+ },
+ {
+ "name": "richardokonicha",
+ "id": 48168290,
+ "comment_id": 3346750889,
+ "created_at": "2025-09-29T12:48:00Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11552
+ },
+ {
+ "name": "vigbav36",
+ "id": 90998381,
+ "comment_id": 3361788337,
+ "created_at": "2025-10-02T15:24:35Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11666
+ },
+ {
+ "name": "yashisthebatman",
+ "id": 149709821,
+ "comment_id": 3364470461,
+ "created_at": "2025-10-03T06:48:03Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11676
+ },
+ {
+ "name": "survivant",
+ "id": 191879,
+ "comment_id": 3369115643,
+ "created_at": "2025-10-05T15:02:15Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11684
+ },
+ {
+ "name": "Sai-Sravya-Thumati",
+ "id": 64857617,
+ "comment_id": 3370705793,
+ "created_at": "2025-10-06T09:31:16Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11686
+ },
+ {
+ "name": "cclauss",
+ "id": 3709715,
+ "comment_id": 3364277206,
+ "created_at": "2025-10-03T05:08:38Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11673
+ },
+ {
+ "name": "ParasNingune",
+ "id": 153178176,
+ "comment_id": 3388187853,
+ "created_at": "2025-10-10T03:48:32Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11703
+ },
+ {
+ "name": "HarshitR2004",
+ "id": 159914116,
+ "comment_id": 3388359328,
+ "created_at": "2025-10-10T05:37:12Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11704
+ },
+ {
+ "name": "Nirzak",
+ "id": 11460645,
+ "comment_id": 3393522813,
+ "created_at": "2025-10-11T17:20:41Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11726
+ },
+ {
+ "name": "faizan842",
+ "id": 91795555,
+ "comment_id": 3407632893,
+ "created_at": "2025-10-15T17:55:57Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11748
+ },
+ {
+ "name": "AhmadYasser1",
+ "id": 77586860,
+ "comment_id": 3419161297,
+ "created_at": "2025-10-19T02:48:49Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11766
+ },
+ {
+ "name": "Nikhil172913832",
+ "id": 140622713,
+ "comment_id": 3443931056,
+ "created_at": "2025-10-24T16:13:14Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11786
+ },
+ {
+ "name": "jiaqicheng1998",
+ "id": 65794980,
+ "comment_id": 3459506446,
+ "created_at": "2025-10-29T03:48:36Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11793
+ },
+ {
+ "name": "Aashish079",
+ "id": 106550372,
+ "comment_id": 3461223031,
+ "created_at": "2025-10-29T12:19:16Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11812
+ },
+ {
+ "name": "guddu-debasis",
+ "id": 167549811,
+ "comment_id": 3463419567,
+ "created_at": "2025-10-29T19:15:44Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11821
+ },
+ {
+ "name": "jeis4wpi",
+ "id": 42679190,
+ "comment_id": 3467642515,
+ "created_at": "2025-10-30T11:55:54Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11822
+ },
+ {
+ "name": "ak4shravikumar",
+ "id": 189372043,
+ "comment_id": 3469119609,
+ "created_at": "2025-10-30T17:15:30Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11828
+ },
+ {
+ "name": "rajesh-adk-137",
+ "id": 89499267,
+ "comment_id": 3470873094,
+ "created_at": "2025-10-31T00:51:14Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11835
+ },
+ {
+ "name": "KrishThakur23",
+ "id": 214495511,
+ "comment_id": 3475330781,
+ "created_at": "2025-11-01T01:05:56Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11841
+ },
+ {
+ "name": "ritoban23",
+ "id": 124308320,
+ "comment_id": 3476917215,
+ "created_at": "2025-11-01T22:16:42Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11843
+ },
+ {
+ "name": "bala-ceg",
+ "id": 70808619,
+ "comment_id": 3478836423,
+ "created_at": "2025-11-03T04:05:40Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11844
+ },
+ {
+ "name": "HamoonDBA",
+ "id": 3939424,
+ "comment_id": 3499521731,
+ "created_at": "2025-11-06T21:49:51Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11858
+ },
+ {
+ "name": "md-ziauddin",
+ "id": 29926473,
+ "comment_id": 3533762471,
+ "created_at": "2025-11-14T17:15:19Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11888
+ },
+ {
+ "name": "suman-X",
+ "id": 137594910,
+ "comment_id": 3534136586,
+ "created_at": "2025-11-14T18:54:22Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11890
+ },
+ {
+ "name": "suman-X",
+ "id": 137594910,
+ "comment_id": 3534230691,
+ "created_at": "2025-11-14T19:21:59Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11890
+ },
+ {
+ "name": "SyedaAnshrahGillani",
+ "id": 90501474,
+ "comment_id": 3616952272,
+ "created_at": "2025-12-05T13:33:42Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11973
+ },
+ {
+ "name": "neversettle17-101",
+ "id": 41864816,
+ "comment_id": 3620426556,
+ "created_at": "2025-12-06T13:56:57Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11975
+ },
+ {
+ "name": "duskobogdanovski",
+ "id": 21080468,
+ "comment_id": 3656079267,
+ "created_at": "2025-12-15T14:55:07Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12013
+ },
+ {
+ "name": "kelvinvelasquez-SDE",
+ "id": 112011775,
+ "comment_id": 3675658408,
+ "created_at": "2025-12-19T16:19:32Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12029
+ },
+ {
+ "name": "PPeitsch",
+ "id": 88450637,
+ "comment_id": 3704693294,
+ "created_at": "2026-01-02T07:50:33Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12048
+ },
+ {
+ "name": "SachinMyadam",
+ "id": 110909093,
+ "comment_id": 3716118688,
+ "created_at": "2026-01-06T20:02:10Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12054
+ },
+ {
+ "name": "xuwei95",
+ "id": 18109811,
+ "comment_id": 3723114411,
+ "created_at": "2026-01-08T10:01:14Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12063
+ },
+ {
+ "name": "Nandha-kumar-S",
+ "id": 85221220,
+ "comment_id": 3727602927,
+ "created_at": "2026-01-09T07:52:10Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12082
+ },
+ {
+ "name": "Sweetdevil144",
+ "id": 117591942,
+ "comment_id": 3761427133,
+ "created_at": "2026-01-16T19:12:39Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12110
+ },
+ {
+ "name": "Sriram-B-Srivatsa",
+ "id": 144884365,
+ "comment_id": 3765374596,
+ "created_at": "2026-01-18T14:51:54Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12113
+ },
+ {
+ "name": "zhaojinxin409",
+ "id": 5874804,
+ "comment_id": 3771260955,
+ "created_at": "2026-01-20T06:34:45Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12122
+ },
+ {
+ "name": "murataslan1",
+ "id": 78961478,
+ "comment_id": 3784602307,
+ "created_at": "2026-01-22T14:06:33Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12004
+ },
+ {
+ "name": "C1ARKGABLE",
+ "id": 13039858,
+ "comment_id": 3792661007,
+ "created_at": "2026-01-23T21:53:19Z",
+ "repoId": 143328315,
+ "pullRequestNo": 11988
+ },
+ {
+ "name": "AndrewFarley",
+ "id": 470163,
+ "comment_id": 3801391357,
+ "created_at": "2026-01-26T19:40:00Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12123
+ },
+ {
+ "name": "007slm",
+ "id": 1670036,
+ "comment_id": 3803635367,
+ "created_at": "2026-01-27T07:48:21Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12155
+ },
+ {
+ "name": "C0staTin",
+ "id": 12409467,
+ "comment_id": 3812795861,
+ "created_at": "2026-01-28T17:36:00Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12151
+ },
+ {
+ "name": "Amogh-2404",
+ "id": 114862749,
+ "comment_id": 3814926744,
+ "created_at": "2026-01-29T02:00:24Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12167
+ },
+ {
+ "name": "themavik",
+ "id": 179817126,
+ "comment_id": 3936291923,
+ "created_at": "2026-02-20T17:50:39Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12213
+ },
+ {
+ "name": "ianu82",
+ "id": 86010258,
+ "comment_id": 3973995110,
+ "created_at": "2026-02-27T16:55:27Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12251
+ },
+ {
+ "name": "Mirza-Samad-Ahmed-Baig",
+ "id": 89132160,
+ "comment_id": 4054729064,
+ "created_at": "2026-03-13T12:24:17Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12290
+ },
+ {
+ "name": "Krishnav1237",
+ "id": 147693159,
+ "comment_id": 4061239564,
+ "created_at": "2026-03-14T19:45:50Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12294
+ },
+ {
+ "name": "StefanTrsunov",
+ "id": 91495981,
+ "comment_id": 4070493719,
+ "created_at": "2026-03-16T20:45:48Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12297
+ },
+ {
+ "name": "Tzsapphire",
+ "id": 209363831,
+ "comment_id": 4106737895,
+ "created_at": "2026-03-22T18:27:23Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12317
+ },
+ {
+ "name": "jnMetaCode",
+ "id": 12096460,
+ "comment_id": 4111619407,
+ "created_at": "2026-03-23T15:43:15Z",
+ "repoId": 143328315,
+ "pullRequestNo": 12279
}
]
}
\ No newline at end of file
diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl
index d7ad61ed0c8..852a82f7839 100644
--- a/docker/docker-bake.hcl
+++ b/docker/docker-bake.hcl
@@ -105,23 +105,23 @@ target "images" {
item = [
{
name = "bare"
- extras = ".[agents,kb,mysql,postgresql,snowflake,bigquery,mssql,mssql-odbc,salesforce,duckdb_faiss]"
+ extras = ".[agents,kb,mysql,postgresql,snowflake,bigquery,mssql,mssql-odbc,salesforce,duckdb_faiss,pgvector]"
target = ""
},
{
name = "devel"
- extras = ".[agents,kb,mysql,postgresql,snowflake,bigquery,mssql,mssql-odbc,salesforce,duckdb_faiss]" # Required for running integration tests
+ extras = ".[agents,kb,mysql,postgresql,snowflake,bigquery,mssql,mssql-odbc,salesforce,duckdb_faiss,pgvector]" # Required for running integration tests
target = "dev"
},
{
# If you make any changes here, make them to cloud-cpu as well
name = "cloud"
- extras = ".[mysql,statsforecast-extra,neuralforecast-extra,timegpt,mssql,mssql-odbc,gmail,snowflake,clickhouse,bigquery,elasticsearch,s3,databricks,oracle,opentelemetry,langfuse,jira,salesforce,gong,hubspot,netsuite,shopify,agents,kb] darts datasetsforecast transformers"
+ extras = ".[mysql,mssql,mssql-odbc,snowflake,bigquery,databricks,oracle,opentelemetry,langfuse,salesforce,hubspot,netsuite,shopify,agents,kb,pgvector] darts datasetsforecast transformers"
target = ""
},
{
name = "cloud-cpu"
- extras = ".[mysql,statsforecast-extra,neuralforecast-extra,timegpt,mssql,mssql-odbc,gmail,snowflake,clickhouse,bigquery,elasticsearch,s3,databricks,oracle,opentelemetry,langfuse,jira,salesforce,gong,hubspot,netsuite,shopify,agents,kb] darts datasetsforecast transformers"
+ extras = ".[mysql,mssql,mssql-odbc,snowflake,bigquery,databricks,oracle,opentelemetry,langfuse,salesforce,hubspot,netsuite,shopify,agents,kb,pgvector] darts datasetsforecast transformers"
target = ""
},
]
diff --git a/docker/mindsdb.Dockerfile b/docker/mindsdb.Dockerfile
index ae706ed02df..70f40751f58 100644
--- a/docker/mindsdb.Dockerfile
+++ b/docker/mindsdb.Dockerfile
@@ -18,8 +18,6 @@ COPY mindsdb/__about__.py mindsdb/
# Which will mean the next stage can be cached, even if the cache for the above stage was invalidated.
-
-
# Use the stage from above to install our deps with as much caching as possible
FROM python:3.10-slim AS build
WORKDIR /mindsdb
@@ -56,7 +54,7 @@ COPY --from=deps /mindsdb .
# - and finally declare `/mindsdb` as the target dir.
ENV UV_LINK_MODE=copy \
UV_PYTHON_DOWNLOADS=never \
- UV_PYTHON=python3.10 \
+ UV_PYTHON=python3.10.20 \
UV_PROJECT_ENVIRONMENT=/mindsdb \
VIRTUAL_ENV=/venv \
PATH=/venv/bin:$PATH
@@ -71,6 +69,13 @@ RUN --mount=type=cache,target=/root/.cache \
FROM build AS extras
+
+# Apply latest security patches so the final image picks up fixes
+# even when the build stage layers are cached
+RUN --mount=target=/var/lib/apt,type=cache,sharing=locked \
+ --mount=target=/var/cache/apt,type=cache,sharing=locked \
+ apt-get update -qy && apt-get upgrade -qy
+
ARG EXTRAS
# Install extras on top of the bare mindsdb
# The torch index is provided for "-cpu" images which install the cpu-only version of torch
@@ -93,8 +98,10 @@ ENV PATH=/venv/bin:$PATH
EXPOSE 47334/tcp
EXPOSE 47335/tcp
-# Pre-load tokenizer from Huggingface, and UI
-RUN python -m mindsdb --config=/root/mindsdb_config.json --load-tokenizer --update-gui
+HEALTHCHECK --interval=30s --timeout=10s --retries=5 --start-period=60s CMD curl -fsS "http://localhost:47334/api/status"
+
+# Pre-load web GUI
+RUN python -m mindsdb --config=/root/mindsdb_config.json --update-gui
# Same as extras image, but with dev dependencies installed.
# This image is used in our docker-compose and for local development with volume mounting
diff --git a/docs/README.md b/docs 2/README.md
similarity index 100%
rename from docs/README.md
rename to docs 2/README.md
diff --git a/docs/assets/SLBot-Hero-Whizfizz.png b/docs 2/assets/SLBot-Hero-Whizfizz.png
similarity index 100%
rename from docs/assets/SLBot-Hero-Whizfizz.png
rename to docs 2/assets/SLBot-Hero-Whizfizz.png
diff --git a/docs/assets/SLBot-response1.png b/docs 2/assets/SLBot-response1.png
similarity index 100%
rename from docs/assets/SLBot-response1.png
rename to docs 2/assets/SLBot-response1.png
diff --git a/docs/assets/SLBot-response2.png b/docs 2/assets/SLBot-response2.png
similarity index 100%
rename from docs/assets/SLBot-response2.png
rename to docs 2/assets/SLBot-response2.png
diff --git a/docs/assets/SLBot-response3.png b/docs 2/assets/SLBot-response3.png
similarity index 100%
rename from docs/assets/SLBot-response3.png
rename to docs 2/assets/SLBot-response3.png
diff --git a/docs/assets/SLBot-response4.png b/docs 2/assets/SLBot-response4.png
similarity index 100%
rename from docs/assets/SLBot-response4.png
rename to docs 2/assets/SLBot-response4.png
diff --git a/docs/assets/ai-integrations.png b/docs 2/assets/ai-integrations.png
similarity index 100%
rename from docs/assets/ai-integrations.png
rename to docs 2/assets/ai-integrations.png
diff --git a/docs/assets/automation.png b/docs 2/assets/automation.png
similarity index 100%
rename from docs/assets/automation.png
rename to docs 2/assets/automation.png
diff --git a/docs/assets/byom_diagram.png b/docs 2/assets/byom_diagram.png
similarity index 100%
rename from docs/assets/byom_diagram.png
rename to docs 2/assets/byom_diagram.png
diff --git a/docs/assets/byom_empty_form.png b/docs 2/assets/byom_empty_form.png
similarity index 100%
rename from docs/assets/byom_empty_form.png
rename to docs 2/assets/byom_empty_form.png
diff --git a/docs/assets/byom_form.png b/docs 2/assets/byom_form.png
similarity index 100%
rename from docs/assets/byom_form.png
rename to docs 2/assets/byom_form.png
diff --git a/docs/assets/byom_upload_custom_model.png b/docs 2/assets/byom_upload_custom_model.png
similarity index 100%
rename from docs/assets/byom_upload_custom_model.png
rename to docs 2/assets/byom_upload_custom_model.png
diff --git a/docs/assets/chatbot_diagram.png b/docs 2/assets/chatbot_diagram.png
similarity index 100%
rename from docs/assets/chatbot_diagram.png
rename to docs 2/assets/chatbot_diagram.png
diff --git a/docs/assets/cloud/gui_query.png b/docs 2/assets/cloud/gui_query.png
similarity index 100%
rename from docs/assets/cloud/gui_query.png
rename to docs 2/assets/cloud/gui_query.png
diff --git a/docs/assets/cloud/main_mdb.png b/docs 2/assets/cloud/main_mdb.png
similarity index 100%
rename from docs/assets/cloud/main_mdb.png
rename to docs 2/assets/cloud/main_mdb.png
diff --git a/docs/assets/connect_tableau.png b/docs 2/assets/connect_tableau.png
similarity index 100%
rename from docs/assets/connect_tableau.png
rename to docs 2/assets/connect_tableau.png
diff --git a/docs/assets/connect_tableau_2.png b/docs 2/assets/connect_tableau_2.png
similarity index 100%
rename from docs/assets/connect_tableau_2.png
rename to docs 2/assets/connect_tableau_2.png
diff --git a/docs/assets/connect_tableau_3.png b/docs 2/assets/connect_tableau_3.png
similarity index 100%
rename from docs/assets/connect_tableau_3.png
rename to docs 2/assets/connect_tableau_3.png
diff --git a/docs/assets/connect_tableau_4.png b/docs 2/assets/connect_tableau_4.png
similarity index 100%
rename from docs/assets/connect_tableau_4.png
rename to docs 2/assets/connect_tableau_4.png
diff --git a/docs/assets/connect_tableau_5.png b/docs 2/assets/connect_tableau_5.png
similarity index 100%
rename from docs/assets/connect_tableau_5.png
rename to docs 2/assets/connect_tableau_5.png
diff --git a/docs/assets/connect_tableau_6.png b/docs 2/assets/connect_tableau_6.png
similarity index 100%
rename from docs/assets/connect_tableau_6.png
rename to docs 2/assets/connect_tableau_6.png
diff --git a/docs/assets/connect_tableau_7.png b/docs 2/assets/connect_tableau_7.png
similarity index 100%
rename from docs/assets/connect_tableau_7.png
rename to docs 2/assets/connect_tableau_7.png
diff --git a/docs/assets/docker/docker_desktop/containers-running-extension.png b/docs 2/assets/docker/docker_desktop/containers-running-extension.png
similarity index 100%
rename from docs/assets/docker/docker_desktop/containers-running-extension.png
rename to docs 2/assets/docker/docker_desktop/containers-running-extension.png
diff --git a/docs/assets/docker/docker_desktop/enable-extension-containers.png b/docs 2/assets/docker/docker_desktop/enable-extension-containers.png
similarity index 100%
rename from docs/assets/docker/docker_desktop/enable-extension-containers.png
rename to docs 2/assets/docker/docker_desktop/enable-extension-containers.png
diff --git a/docs/assets/docker/docker_desktop/enable-win-dev-mode.png b/docs 2/assets/docker/docker_desktop/enable-win-dev-mode.png
similarity index 100%
rename from docs/assets/docker/docker_desktop/enable-win-dev-mode.png
rename to docs 2/assets/docker/docker_desktop/enable-win-dev-mode.png
diff --git a/docs/assets/docker/docker_desktop/mindsdb-container-logs.png b/docs 2/assets/docker/docker_desktop/mindsdb-container-logs.png
similarity index 100%
rename from docs/assets/docker/docker_desktop/mindsdb-container-logs.png
rename to docs 2/assets/docker/docker_desktop/mindsdb-container-logs.png
diff --git a/docs/assets/docker/docker_desktop/mindsdb_docker_desktop.png b/docs 2/assets/docker/docker_desktop/mindsdb_docker_desktop.png
similarity index 100%
rename from docs/assets/docker/docker_desktop/mindsdb_docker_desktop.png
rename to docs 2/assets/docker/docker_desktop/mindsdb_docker_desktop.png
diff --git a/docs/assets/docker/docker_desktop/pull-latest-image.png b/docs 2/assets/docker/docker_desktop/pull-latest-image.png
similarity index 100%
rename from docs/assets/docker/docker_desktop/pull-latest-image.png
rename to docs 2/assets/docker/docker_desktop/pull-latest-image.png
diff --git a/docs/assets/faqs_download.csv.png b/docs 2/assets/faqs_download.csv.png
similarity index 100%
rename from docs/assets/faqs_download.csv.png
rename to docs 2/assets/faqs_download.csv.png
diff --git a/docs/assets/files/upload_file.png b/docs 2/assets/files/upload_file.png
similarity index 100%
rename from docs/assets/files/upload_file.png
rename to docs 2/assets/files/upload_file.png
diff --git a/docs/assets/files/upload_file_from_computer.png b/docs 2/assets/files/upload_file_from_computer.png
similarity index 100%
rename from docs/assets/files/upload_file_from_computer.png
rename to docs 2/assets/files/upload_file_from_computer.png
diff --git a/docs/assets/files/upload_file_from_url.png b/docs 2/assets/files/upload_file_from_url.png
similarity index 100%
rename from docs/assets/files/upload_file_from_url.png
rename to docs 2/assets/files/upload_file_from_url.png
diff --git a/docs/assets/install-dependencies-gui.png b/docs 2/assets/install-dependencies-gui.png
similarity index 100%
rename from docs/assets/install-dependencies-gui.png
rename to docs 2/assets/install-dependencies-gui.png
diff --git a/docs/assets/integrations/Arjuna.png b/docs 2/assets/integrations/Arjuna.png
similarity index 100%
rename from docs/assets/integrations/Arjuna.png
rename to docs 2/assets/integrations/Arjuna.png
diff --git a/docs/assets/jssdk_install_output.png b/docs 2/assets/jssdk_install_output.png
similarity index 100%
rename from docs/assets/jssdk_install_output.png
rename to docs 2/assets/jssdk_install_output.png
diff --git a/docs/assets/kb_data_insertion.png b/docs 2/assets/kb_data_insertion.png
similarity index 100%
rename from docs/assets/kb_data_insertion.png
rename to docs 2/assets/kb_data_insertion.png
diff --git a/docs/assets/kb_hybrid_search.jpg b/docs 2/assets/kb_hybrid_search.jpg
similarity index 100%
rename from docs/assets/kb_hybrid_search.jpg
rename to docs 2/assets/kb_hybrid_search.jpg
diff --git a/docs/assets/mcp.png b/docs 2/assets/mcp.png
similarity index 100%
rename from docs/assets/mcp.png
rename to docs 2/assets/mcp.png
diff --git a/docs/assets/mcp_cursor_chat.png b/docs 2/assets/mcp_cursor_chat.png
similarity index 100%
rename from docs/assets/mcp_cursor_chat.png
rename to docs 2/assets/mcp_cursor_chat.png
diff --git a/docs/assets/mcp_cursor_chat_mode.png b/docs 2/assets/mcp_cursor_chat_mode.png
similarity index 100%
rename from docs/assets/mcp_cursor_chat_mode.png
rename to docs 2/assets/mcp_cursor_chat_mode.png
diff --git a/docs/assets/mcp_cursor_chat_tool.png b/docs 2/assets/mcp_cursor_chat_tool.png
similarity index 100%
rename from docs/assets/mcp_cursor_chat_tool.png
rename to docs 2/assets/mcp_cursor_chat_tool.png
diff --git a/docs/assets/mcp_cursor_mcp_server.png b/docs 2/assets/mcp_cursor_mcp_server.png
similarity index 100%
rename from docs/assets/mcp_cursor_mcp_server.png
rename to docs 2/assets/mcp_cursor_mcp_server.png
diff --git a/docs/assets/mcp_cursor_settings.png b/docs 2/assets/mcp_cursor_settings.png
similarity index 100%
rename from docs/assets/mcp_cursor_settings.png
rename to docs 2/assets/mcp_cursor_settings.png
diff --git a/docs/assets/metabase_add_database.png b/docs 2/assets/metabase_add_database.png
similarity index 100%
rename from docs/assets/metabase_add_database.png
rename to docs 2/assets/metabase_add_database.png
diff --git a/docs/assets/metabase_connected.png b/docs 2/assets/metabase_connected.png
similarity index 100%
rename from docs/assets/metabase_connected.png
rename to docs 2/assets/metabase_connected.png
diff --git a/docs/assets/metabase_run_query_failure.png b/docs 2/assets/metabase_run_query_failure.png
similarity index 100%
rename from docs/assets/metabase_run_query_failure.png
rename to docs 2/assets/metabase_run_query_failure.png
diff --git a/docs/assets/metabase_run_query_home_rentals.png b/docs 2/assets/metabase_run_query_home_rentals.png
similarity index 100%
rename from docs/assets/metabase_run_query_home_rentals.png
rename to docs 2/assets/metabase_run_query_home_rentals.png
diff --git a/docs/assets/metabase_run_query_show_tables.png b/docs 2/assets/metabase_run_query_show_tables.png
similarity index 100%
rename from docs/assets/metabase_run_query_show_tables.png
rename to docs 2/assets/metabase_run_query_show_tables.png
diff --git a/docs/assets/minds/Dashboard_Minds.png b/docs 2/assets/minds/Dashboard_Minds.png
similarity index 100%
rename from docs/assets/minds/Dashboard_Minds.png
rename to docs 2/assets/minds/Dashboard_Minds.png
diff --git a/docs/assets/minds/DatasourcesConn_Minds.png b/docs 2/assets/minds/DatasourcesConn_Minds.png
similarity index 100%
rename from docs/assets/minds/DatasourcesConn_Minds.png
rename to docs 2/assets/minds/DatasourcesConn_Minds.png
diff --git a/docs/assets/minds/DatasourcesTab_Minds.png b/docs 2/assets/minds/DatasourcesTab_Minds.png
similarity index 100%
rename from docs/assets/minds/DatasourcesTab_Minds.png
rename to docs 2/assets/minds/DatasourcesTab_Minds.png
diff --git a/docs/assets/minds/DatasourcesType_Minds.png b/docs 2/assets/minds/DatasourcesType_Minds.png
similarity index 100%
rename from docs/assets/minds/DatasourcesType_Minds.png
rename to docs 2/assets/minds/DatasourcesType_Minds.png
diff --git a/docs/assets/minds/MindChat_Minds.png b/docs 2/assets/minds/MindChat_Minds.png
similarity index 100%
rename from docs/assets/minds/MindChat_Minds.png
rename to docs 2/assets/minds/MindChat_Minds.png
diff --git a/docs/assets/minds/MindsTab_Minds.png b/docs 2/assets/minds/MindsTab_Minds.png
similarity index 100%
rename from docs/assets/minds/MindsTab_Minds.png
rename to docs 2/assets/minds/MindsTab_Minds.png
diff --git a/docs/assets/minds/MindsWorkflow.png b/docs 2/assets/minds/MindsWorkflow.png
similarity index 100%
rename from docs/assets/minds/MindsWorkflow.png
rename to docs 2/assets/minds/MindsWorkflow.png
diff --git a/docs/assets/minds/NewMind_Minds.png b/docs 2/assets/minds/NewMind_Minds.png
similarity index 100%
rename from docs/assets/minds/NewMind_Minds.png
rename to docs 2/assets/minds/NewMind_Minds.png
diff --git a/docs/assets/minds/Playground_Mind.png b/docs 2/assets/minds/Playground_Mind.png
similarity index 100%
rename from docs/assets/minds/Playground_Mind.png
rename to docs 2/assets/minds/Playground_Mind.png
diff --git a/docs/assets/minds/PreviewData_Minds.png b/docs 2/assets/minds/PreviewData_Minds.png
similarity index 100%
rename from docs/assets/minds/PreviewData_Minds.png
rename to docs 2/assets/minds/PreviewData_Minds.png
diff --git a/docs/assets/mindsdb-editor.png b/docs 2/assets/mindsdb-editor.png
similarity index 100%
rename from docs/assets/mindsdb-editor.png
rename to docs 2/assets/mindsdb-editor.png
diff --git a/docs/assets/mindsdb-fqe.png b/docs 2/assets/mindsdb-fqe.png
similarity index 100%
rename from docs/assets/mindsdb-fqe.png
rename to docs 2/assets/mindsdb-fqe.png
diff --git a/docs/assets/mindsdb_gui_editor/create_model_1.png b/docs 2/assets/mindsdb_gui_editor/create_model_1.png
similarity index 100%
rename from docs/assets/mindsdb_gui_editor/create_model_1.png
rename to docs 2/assets/mindsdb_gui_editor/create_model_1.png
diff --git a/docs/assets/mindsdb_gui_editor/create_model_2.png b/docs 2/assets/mindsdb_gui_editor/create_model_2.png
similarity index 100%
rename from docs/assets/mindsdb_gui_editor/create_model_2.png
rename to docs 2/assets/mindsdb_gui_editor/create_model_2.png
diff --git a/docs/assets/mindsdb_gui_editor/mindsdb_editor.png b/docs 2/assets/mindsdb_gui_editor/mindsdb_editor.png
similarity index 100%
rename from docs/assets/mindsdb_gui_editor/mindsdb_editor.png
rename to docs 2/assets/mindsdb_gui_editor/mindsdb_editor.png
diff --git a/docs/assets/mindsdb_gui_editor/multiple_query_editor.png b/docs 2/assets/mindsdb_gui_editor/multiple_query_editor.png
similarity index 100%
rename from docs/assets/mindsdb_gui_editor/multiple_query_editor.png
rename to docs 2/assets/mindsdb_gui_editor/multiple_query_editor.png
diff --git a/docs/assets/mindsdb_gui_editor/object_explorer.png b/docs 2/assets/mindsdb_gui_editor/object_explorer.png
similarity index 100%
rename from docs/assets/mindsdb_gui_editor/object_explorer.png
rename to docs 2/assets/mindsdb_gui_editor/object_explorer.png
diff --git a/docs/assets/mindsdb_gui_editor/object_explorer_query.png b/docs 2/assets/mindsdb_gui_editor/object_explorer_query.png
similarity index 100%
rename from docs/assets/mindsdb_gui_editor/object_explorer_query.png
rename to docs 2/assets/mindsdb_gui_editor/object_explorer_query.png
diff --git a/docs/assets/mindsdb_gui_editor/query_editor.png b/docs 2/assets/mindsdb_gui_editor/query_editor.png
similarity index 100%
rename from docs/assets/mindsdb_gui_editor/query_editor.png
rename to docs 2/assets/mindsdb_gui_editor/query_editor.png
diff --git a/docs/assets/mindsdb_gui_editor/results_viewer.png b/docs 2/assets/mindsdb_gui_editor/results_viewer.png
similarity index 100%
rename from docs/assets/mindsdb_gui_editor/results_viewer.png
rename to docs 2/assets/mindsdb_gui_editor/results_viewer.png
diff --git a/docs/assets/mindsdb_gui_respond.png b/docs 2/assets/mindsdb_gui_respond.png
similarity index 100%
rename from docs/assets/mindsdb_gui_respond.png
rename to docs 2/assets/mindsdb_gui_respond.png
diff --git a/docs/assets/mindsdb_gui_respond_agents.png b/docs 2/assets/mindsdb_gui_respond_agents.png
similarity index 100%
rename from docs/assets/mindsdb_gui_respond_agents.png
rename to docs 2/assets/mindsdb_gui_respond_agents.png
diff --git a/docs/assets/mindsdb_gui_respond_chat.png b/docs 2/assets/mindsdb_gui_respond_chat.png
similarity index 100%
rename from docs/assets/mindsdb_gui_respond_chat.png
rename to docs 2/assets/mindsdb_gui_respond_chat.png
diff --git a/docs/assets/model-management.png b/docs 2/assets/model-management.png
similarity index 100%
rename from docs/assets/model-management.png
rename to docs 2/assets/model-management.png
diff --git a/docs/assets/pythonsdk_install_output.png b/docs 2/assets/pythonsdk_install_output.png
similarity index 100%
rename from docs/assets/pythonsdk_install_output.png
rename to docs 2/assets/pythonsdk_install_output.png
diff --git a/docs/assets/sql/data-insights-1.png b/docs 2/assets/sql/data-insights-1.png
similarity index 100%
rename from docs/assets/sql/data-insights-1.png
rename to docs 2/assets/sql/data-insights-1.png
diff --git a/docs/assets/sql/data-insights-2.png b/docs 2/assets/sql/data-insights-2.png
similarity index 100%
rename from docs/assets/sql/data-insights-2.png
rename to docs 2/assets/sql/data-insights-2.png
diff --git a/docs/assets/sql/data-insights-3.png b/docs 2/assets/sql/data-insights-3.png
similarity index 100%
rename from docs/assets/sql/data-insights-3.png
rename to docs 2/assets/sql/data-insights-3.png
diff --git a/docs/assets/sql/data-insights-4.png b/docs 2/assets/sql/data-insights-4.png
similarity index 100%
rename from docs/assets/sql/data-insights-4.png
rename to docs 2/assets/sql/data-insights-4.png
diff --git a/docs/assets/sql/data-insights-5.png b/docs 2/assets/sql/data-insights-5.png
similarity index 100%
rename from docs/assets/sql/data-insights-5.png
rename to docs 2/assets/sql/data-insights-5.png
diff --git a/docs/assets/sql/data-insights-6.png b/docs 2/assets/sql/data-insights-6.png
similarity index 100%
rename from docs/assets/sql/data-insights-6.png
rename to docs 2/assets/sql/data-insights-6.png
diff --git a/docs/assets/sql/dbeaver_1.png b/docs 2/assets/sql/dbeaver_1.png
similarity index 100%
rename from docs/assets/sql/dbeaver_1.png
rename to docs 2/assets/sql/dbeaver_1.png
diff --git a/docs/assets/sql/dbeaver_2.png b/docs 2/assets/sql/dbeaver_2.png
similarity index 100%
rename from docs/assets/sql/dbeaver_2.png
rename to docs 2/assets/sql/dbeaver_2.png
diff --git a/docs/assets/sql/dbeaver_3.png b/docs 2/assets/sql/dbeaver_3.png
similarity index 100%
rename from docs/assets/sql/dbeaver_3.png
rename to docs 2/assets/sql/dbeaver_3.png
diff --git a/docs/assets/sql/dbeaver_4.png b/docs 2/assets/sql/dbeaver_4.png
similarity index 100%
rename from docs/assets/sql/dbeaver_4.png
rename to docs 2/assets/sql/dbeaver_4.png
diff --git a/docs/assets/sql/dbeaver_5.png b/docs 2/assets/sql/dbeaver_5.png
similarity index 100%
rename from docs/assets/sql/dbeaver_5.png
rename to docs 2/assets/sql/dbeaver_5.png
diff --git a/docs/assets/sql/grafana_1.png b/docs 2/assets/sql/grafana_1.png
similarity index 100%
rename from docs/assets/sql/grafana_1.png
rename to docs 2/assets/sql/grafana_1.png
diff --git a/docs/assets/sql/grafana_2.png b/docs 2/assets/sql/grafana_2.png
similarity index 100%
rename from docs/assets/sql/grafana_2.png
rename to docs 2/assets/sql/grafana_2.png
diff --git a/docs/assets/sql/grafana_3.png b/docs 2/assets/sql/grafana_3.png
similarity index 100%
rename from docs/assets/sql/grafana_3.png
rename to docs 2/assets/sql/grafana_3.png
diff --git a/docs/assets/sql/grafana_4.png b/docs 2/assets/sql/grafana_4.png
similarity index 100%
rename from docs/assets/sql/grafana_4.png
rename to docs 2/assets/sql/grafana_4.png
diff --git a/docs/assets/sql/grafana_5.png b/docs 2/assets/sql/grafana_5.png
similarity index 100%
rename from docs/assets/sql/grafana_5.png
rename to docs 2/assets/sql/grafana_5.png
diff --git a/docs/assets/sql/income_vs_debt.png b/docs 2/assets/sql/income_vs_debt.png
similarity index 100%
rename from docs/assets/sql/income_vs_debt.png
rename to docs 2/assets/sql/income_vs_debt.png
diff --git a/docs/assets/sql/income_vs_debt_known_value.png b/docs 2/assets/sql/income_vs_debt_known_value.png
similarity index 100%
rename from docs/assets/sql/income_vs_debt_known_value.png
rename to docs 2/assets/sql/income_vs_debt_known_value.png
diff --git a/docs/assets/sql/income_vs_debt_prediction.png b/docs 2/assets/sql/income_vs_debt_prediction.png
similarity index 100%
rename from docs/assets/sql/income_vs_debt_prediction.png
rename to docs 2/assets/sql/income_vs_debt_prediction.png
diff --git a/docs/assets/sql/income_vs_debt_predictor.png b/docs 2/assets/sql/income_vs_debt_predictor.png
similarity index 100%
rename from docs/assets/sql/income_vs_debt_predictor.png
rename to docs 2/assets/sql/income_vs_debt_predictor.png
diff --git a/docs/assets/sql/income_vs_debt_unknown_value.png b/docs 2/assets/sql/income_vs_debt_unknown_value.png
similarity index 100%
rename from docs/assets/sql/income_vs_debt_unknown_value.png
rename to docs 2/assets/sql/income_vs_debt_unknown_value.png
diff --git a/docs/assets/sql/kb_retrieval_example1.png b/docs 2/assets/sql/kb_retrieval_example1.png
similarity index 100%
rename from docs/assets/sql/kb_retrieval_example1.png
rename to docs 2/assets/sql/kb_retrieval_example1.png
diff --git a/docs/assets/sql/kb_retrieval_example2.png b/docs 2/assets/sql/kb_retrieval_example2.png
similarity index 100%
rename from docs/assets/sql/kb_retrieval_example2.png
rename to docs 2/assets/sql/kb_retrieval_example2.png
diff --git a/docs/assets/sql/kb_retrieval_example3.png b/docs 2/assets/sql/kb_retrieval_example3.png
similarity index 100%
rename from docs/assets/sql/kb_retrieval_example3.png
rename to docs 2/assets/sql/kb_retrieval_example3.png
diff --git a/docs/assets/sql/tutorials/customer_churn/.gitkeep b/docs 2/assets/sql/tutorials/customer_churn/.gitkeep
similarity index 100%
rename from docs/assets/sql/tutorials/customer_churn/.gitkeep
rename to docs 2/assets/sql/tutorials/customer_churn/.gitkeep
diff --git a/docs/assets/sql/tutorials/generating_images_1.png b/docs 2/assets/sql/tutorials/generating_images_1.png
similarity index 100%
rename from docs/assets/sql/tutorials/generating_images_1.png
rename to docs 2/assets/sql/tutorials/generating_images_1.png
diff --git a/docs/assets/sql/tutorials/generating_images_2.png b/docs 2/assets/sql/tutorials/generating_images_2.png
similarity index 100%
rename from docs/assets/sql/tutorials/generating_images_2.png
rename to docs 2/assets/sql/tutorials/generating_images_2.png
diff --git a/docs/assets/sql/upload_file1.png b/docs 2/assets/sql/upload_file1.png
similarity index 100%
rename from docs/assets/sql/upload_file1.png
rename to docs 2/assets/sql/upload_file1.png
diff --git a/docs/assets/sql/upload_file2.png b/docs 2/assets/sql/upload_file2.png
similarity index 100%
rename from docs/assets/sql/upload_file2.png
rename to docs 2/assets/sql/upload_file2.png
diff --git a/docs/assets/sql/use.png b/docs 2/assets/sql/use.png
similarity index 100%
rename from docs/assets/sql/use.png
rename to docs 2/assets/sql/use.png
diff --git a/docs/assets/supported_integrations.png b/docs 2/assets/supported_integrations.png
similarity index 100%
rename from docs/assets/supported_integrations.png
rename to docs 2/assets/supported_integrations.png
diff --git a/docs/assets/tutorials/crops/.gitkeep b/docs 2/assets/tutorials/crops/.gitkeep
similarity index 100%
rename from docs/assets/tutorials/crops/.gitkeep
rename to docs 2/assets/tutorials/crops/.gitkeep
diff --git a/docs/assets/tutorials/llm-chatbot-ui/chat.png b/docs 2/assets/tutorials/llm-chatbot-ui/chat.png
similarity index 100%
rename from docs/assets/tutorials/llm-chatbot-ui/chat.png
rename to docs 2/assets/tutorials/llm-chatbot-ui/chat.png
diff --git a/docs/assets/tutorials/llm-chatbot-ui/prompt.png b/docs 2/assets/tutorials/llm-chatbot-ui/prompt.png
similarity index 100%
rename from docs/assets/tutorials/llm-chatbot-ui/prompt.png
rename to docs 2/assets/tutorials/llm-chatbot-ui/prompt.png
diff --git a/docs/assets/tutorials/llm-chatbot-ui/publish.png b/docs 2/assets/tutorials/llm-chatbot-ui/publish.png
similarity index 100%
rename from docs/assets/tutorials/llm-chatbot-ui/publish.png
rename to docs 2/assets/tutorials/llm-chatbot-ui/publish.png
diff --git a/docs/assets/tutorials/llm-chatbot-ui/settings.png b/docs 2/assets/tutorials/llm-chatbot-ui/settings.png
similarity index 100%
rename from docs/assets/tutorials/llm-chatbot-ui/settings.png
rename to docs 2/assets/tutorials/llm-chatbot-ui/settings.png
diff --git a/docs/assets/tutorials/llm-chatbot-ui/settings2.png b/docs 2/assets/tutorials/llm-chatbot-ui/settings2.png
similarity index 100%
rename from docs/assets/tutorials/llm-chatbot-ui/settings2.png
rename to docs 2/assets/tutorials/llm-chatbot-ui/settings2.png
diff --git a/docs/assets/tutorials/llm-chatbot-ui/slack-chat.png b/docs 2/assets/tutorials/llm-chatbot-ui/slack-chat.png
similarity index 100%
rename from docs/assets/tutorials/llm-chatbot-ui/slack-chat.png
rename to docs 2/assets/tutorials/llm-chatbot-ui/slack-chat.png
diff --git a/docs/assets/tutorials/llm-chatbot-ui/slack.png b/docs 2/assets/tutorials/llm-chatbot-ui/slack.png
similarity index 100%
rename from docs/assets/tutorials/llm-chatbot-ui/slack.png
rename to docs 2/assets/tutorials/llm-chatbot-ui/slack.png
diff --git a/docs/assets/tutorials/llm-chatbot-ui/welcome.png b/docs 2/assets/tutorials/llm-chatbot-ui/welcome.png
similarity index 100%
rename from docs/assets/tutorials/llm-chatbot-ui/welcome.png
rename to docs 2/assets/tutorials/llm-chatbot-ui/welcome.png
diff --git a/docs/assets/tutorials/monkeylearn/.gitkeep b/docs 2/assets/tutorials/monkeylearn/.gitkeep
similarity index 100%
rename from docs/assets/tutorials/monkeylearn/.gitkeep
rename to docs 2/assets/tutorials/monkeylearn/.gitkeep
diff --git a/docs/assets/tutorials/monkeylearn/1.create_ml.png b/docs 2/assets/tutorials/monkeylearn/1.create_ml.png
similarity index 100%
rename from docs/assets/tutorials/monkeylearn/1.create_ml.png
rename to docs 2/assets/tutorials/monkeylearn/1.create_ml.png
diff --git a/docs/assets/tutorials/monkeylearn/10.select_prediction.png b/docs 2/assets/tutorials/monkeylearn/10.select_prediction.png
similarity index 100%
rename from docs/assets/tutorials/monkeylearn/10.select_prediction.png
rename to docs 2/assets/tutorials/monkeylearn/10.select_prediction.png
diff --git a/docs/assets/tutorials/monkeylearn/4.describe.png b/docs 2/assets/tutorials/monkeylearn/4.describe.png
similarity index 100%
rename from docs/assets/tutorials/monkeylearn/4.describe.png
rename to docs 2/assets/tutorials/monkeylearn/4.describe.png
diff --git a/docs/assets/tutorials/monkeylearn/5.select_prediction.png b/docs 2/assets/tutorials/monkeylearn/5.select_prediction.png
similarity index 100%
rename from docs/assets/tutorials/monkeylearn/5.select_prediction.png
rename to docs 2/assets/tutorials/monkeylearn/5.select_prediction.png
diff --git a/docs/assets/tutorials/monkeylearn/createmodel1.png b/docs 2/assets/tutorials/monkeylearn/createmodel1.png
similarity index 100%
rename from docs/assets/tutorials/monkeylearn/createmodel1.png
rename to docs 2/assets/tutorials/monkeylearn/createmodel1.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png
rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png
diff --git a/docs/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif b/docs 2/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif
rename to docs 2/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif
diff --git a/docs/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif b/docs 2/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif
similarity index 100%
rename from docs/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif
rename to docs 2/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif
diff --git a/docs/assets/twilio-chatbot-diagram.png b/docs 2/assets/twilio-chatbot-diagram.png
similarity index 100%
rename from docs/assets/twilio-chatbot-diagram.png
rename to docs 2/assets/twilio-chatbot-diagram.png
diff --git a/docs/assets/twilio-chatbot-response.png b/docs 2/assets/twilio-chatbot-response.png
similarity index 100%
rename from docs/assets/twilio-chatbot-response.png
rename to docs 2/assets/twilio-chatbot-response.png
diff --git a/docs/assets/twilio-image-model-image.png b/docs 2/assets/twilio-image-model-image.png
similarity index 100%
rename from docs/assets/twilio-image-model-image.png
rename to docs 2/assets/twilio-image-model-image.png
diff --git a/docs/assets/twilio-image-model-response.png b/docs 2/assets/twilio-image-model-response.png
similarity index 100%
rename from docs/assets/twilio-image-model-response.png
rename to docs 2/assets/twilio-image-model-response.png
diff --git a/docs/assets/twilio-text-model-response.png b/docs 2/assets/twilio-text-model-response.png
similarity index 100%
rename from docs/assets/twilio-text-model-response.png
rename to docs 2/assets/twilio-text-model-response.png
diff --git a/docs/assets/upload_custom_function.png b/docs 2/assets/upload_custom_function.png
similarity index 100%
rename from docs/assets/upload_custom_function.png
rename to docs 2/assets/upload_custom_function.png
diff --git a/docs/assets/upload_custom_function2.png b/docs 2/assets/upload_custom_function2.png
similarity index 100%
rename from docs/assets/upload_custom_function2.png
rename to docs 2/assets/upload_custom_function2.png
diff --git a/docs/assets/upload_custom_function_empty_form.png b/docs 2/assets/upload_custom_function_empty_form.png
similarity index 100%
rename from docs/assets/upload_custom_function_empty_form.png
rename to docs 2/assets/upload_custom_function_empty_form.png
diff --git a/docs/assets/use_cases/ai_agents.jpg b/docs 2/assets/use_cases/ai_agents.jpg
similarity index 100%
rename from docs/assets/use_cases/ai_agents.jpg
rename to docs 2/assets/use_cases/ai_agents.jpg
diff --git a/docs/assets/use_cases/ai_workflow_automation.jpg b/docs 2/assets/use_cases/ai_workflow_automation.jpg
similarity index 100%
rename from docs/assets/use_cases/ai_workflow_automation.jpg
rename to docs 2/assets/use_cases/ai_workflow_automation.jpg
diff --git a/docs/assets/use_cases/aipowered_data_retrieval.jpg b/docs 2/assets/use_cases/aipowered_data_retrieval.jpg
similarity index 100%
rename from docs/assets/use_cases/aipowered_data_retrieval.jpg
rename to docs 2/assets/use_cases/aipowered_data_retrieval.jpg
diff --git a/docs/assets/use_cases/automated_finetuning.jpg b/docs 2/assets/use_cases/automated_finetuning.jpg
similarity index 100%
rename from docs/assets/use_cases/automated_finetuning.jpg
rename to docs 2/assets/use_cases/automated_finetuning.jpg
diff --git a/docs/assets/use_cases/data_enrichment.jpg b/docs 2/assets/use_cases/data_enrichment.jpg
similarity index 100%
rename from docs/assets/use_cases/data_enrichment.jpg
rename to docs 2/assets/use_cases/data_enrichment.jpg
diff --git a/docs/assets/use_cases/indatabase_ml.jpg b/docs 2/assets/use_cases/indatabase_ml.jpg
similarity index 100%
rename from docs/assets/use_cases/indatabase_ml.jpg
rename to docs 2/assets/use_cases/indatabase_ml.jpg
diff --git a/docs/assets/use_cases/predictive_analytics.jpg b/docs 2/assets/use_cases/predictive_analytics.jpg
similarity index 100%
rename from docs/assets/use_cases/predictive_analytics.jpg
rename to docs 2/assets/use_cases/predictive_analytics.jpg
diff --git a/docs/callbacks.mdx b/docs 2/callbacks.mdx
similarity index 100%
rename from docs/callbacks.mdx
rename to docs 2/callbacks.mdx
diff --git a/docs/contribute/app-handlers.mdx b/docs 2/contribute/app-handlers.mdx
similarity index 98%
rename from docs/contribute/app-handlers.mdx
rename to docs 2/contribute/app-handlers.mdx
index 0c0a24639e1..040d3e2bc37 100644
--- a/docs/contribute/app-handlers.mdx
+++ b/docs 2/contribute/app-handlers.mdx
@@ -118,13 +118,13 @@ Here is a step-by-step guide:
The `native_query()` method runs commands of the native API syntax.
```py
- def native_query(self, query: Any) -> HandlerResponse:
+ def native_query(self, query: Any) -> TableResponse | OkResponse | ErrorResponse:
"""Receive raw query and act upon it somehow.
Args:
query (Any): query in native format (str for sql databases,
api's json etc)
Returns:
- HandlerResponse
+ TableResponse | OkResponse | ErrorResponse
"""
```
diff --git a/docs/contribute/community.mdx b/docs 2/contribute/community.mdx
similarity index 100%
rename from docs/contribute/community.mdx
rename to docs 2/contribute/community.mdx
diff --git a/docs/contribute/contribute.mdx b/docs 2/contribute/contribute.mdx
similarity index 100%
rename from docs/contribute/contribute.mdx
rename to docs 2/contribute/contribute.mdx
diff --git a/docs/contribute/data-handlers.mdx b/docs 2/contribute/data-handlers.mdx
similarity index 72%
rename from docs/contribute/data-handlers.mdx
rename to docs 2/contribute/data-handlers.mdx
index ca796627a7c..cb13aa0621d 100644
--- a/docs/contribute/data-handlers.mdx
+++ b/docs 2/contribute/data-handlers.mdx
@@ -45,7 +45,15 @@ Authors can opt for adding private methods, new files and folders, or any combin
Under the `mindsdb.integrations.libs.utils` library, contributors can find various methods that may be useful while implementing new handlers.
- Also, there are wrapper classes for the `DatabaseHandler` instances called [HandlerResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py#L7) and [HandlerStatusResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py#L32). You should use them to ensure proper output formatting.
+ For response formatting, use the following classes from `mindsdb.integrations.libs.response`:
+ - [TableResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for queries returning data (SELECT, SHOW, etc.)
+ - [OkResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for successful operations without data (CREATE, DROP, INSERT, etc.)
+ - [ErrorResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for error cases
+ - [HandlerStatusResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for connection status checks
+
+
+ The legacy `HandlerResponse` class is deprecated. Use `TableResponse`, `OkResponse`, or `ErrorResponse` instead.
+
### Implementation
@@ -124,13 +132,13 @@ Here is a step-by-step guide:
The `native_query()` method runs commands of the native database language.
```py
- def native_query(self, query: Any) -> HandlerResponse:
+ def native_query(self, query: Any) -> TableResponse | OkResponse | ErrorResponse:
"""Receive raw query and act upon it somehow.
Args:
query (Any): query in native format (str for sql databases,
etc)
Returns:
- HandlerResponse
+ TableResponse | OkResponse | ErrorResponse
"""
```
@@ -139,13 +147,13 @@ Here is a step-by-step guide:
The query method runs parsed SQL commands.
```py
- def query(self, query: ASTNode) -> HandlerResponse:
+ def query(self, query: ASTNode) -> TableResponse | OkResponse | ErrorResponse:
"""Receive query as AST (abstract syntax tree) and act upon it somehow.
Args:
query (ASTNode): sql query represented as AST. May be any kind
of query: SELECT, INSERT, DELETE, etc
Returns:
- HandlerResponse
+ TableResponse | OkResponse | ErrorResponse
"""
```
@@ -154,11 +162,11 @@ Here is a step-by-step guide:
The `get_tables()` method lists all the available tables.
```py
- def get_tables(self) -> HandlerResponse:
+ def get_tables(self) -> TableResponse | ErrorResponse:
""" Return list of entities
Return a list of entities that will be accessible as tables.
Returns:
- HandlerResponse: should have the same columns as information_schema.tables
+ TableResponse | ErrorResponse: should have the same columns as information_schema.tables
(https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html)
Column 'TABLE_NAME' is mandatory, other is optional.
"""
@@ -169,12 +177,12 @@ Here is a step-by-step guide:
The `get_columns()` method lists all columns of a specified table.
```py
- def get_columns(self, table_name: str) -> HandlerResponse:
+ def get_columns(self, table_name: str) -> TableResponse | ErrorResponse:
""" Returns a list of entity columns
Args:
table_name (str): name of one of tables returned by self.get_tables()
Returns:
- HandlerResponse: should have the same columns as information_schema.columns
+ TableResponse | ErrorResponse: data should have the same columns as information_schema.columns
(https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html)
Column 'COLUMN_NAME' is mandatory, other is optional. Highly
recommended to define also 'DATA_TYPE': it should be one of
@@ -182,6 +190,112 @@ Here is a step-by-step guide:
"""
```
+### Response Classes
+
+The data-returning methods (`native_query()`, `query()`, `get_tables()`, `get_columns()`) should return one of the following response classes from `mindsdb.integrations.libs.response`:
+
+| Response Class | Use Case | Key Attributes |
+|---------------|----------|----------------|
+| `TableResponse` | Queries that return data (SELECT, SHOW, etc.) | `data`, `data_generator`, `columns`, `affected_rows` |
+| `OkResponse` | Successful operations without data (CREATE, DROP, INSERT, UPDATE, DELETE) | `affected_rows` |
+| `ErrorResponse` | Error cases | `error_code`, `error_message`, `is_expected_error` |
+
+#### TableResponse
+
+`TableResponse` is used when returning data from queries. It supports two modes of data delivery:
+
+1. **Immediate data**: Pass all data at once via the `data` parameter (pandas DataFrame)
+2. **Streaming data**: Pass a generator via the `data_generator` parameter for lazy loading
+
+```py
+from mindsdb.integrations.libs.response import TableResponse, OkResponse, ErrorResponse
+
+# Immediate data response
+def native_query(self, query: str) -> TableResponse:
+ result = self.execute_query(query)
+ df = pd.DataFrame(result)
+ return TableResponse(data=df)
+
+# Streaming data response (for large datasets)
+def native_query(self, query: str) -> TableResponse:
+ def data_generator():
+ cursor = self.connection.cursor()
+ cursor.execute(query)
+ while batch := cursor.fetchmany(size=1000):
+ yield pd.DataFrame(batch)
+
+ return TableResponse(data_generator=data_generator())
+```
+
+#### OkResponse
+
+`OkResponse` is used for operations that don't return data:
+
+```py
+def native_query(self, query: str) -> OkResponse:
+ cursor = self.connection.cursor()
+ cursor.execute(query)
+ self.connection.commit()
+ return OkResponse(affected_rows=cursor.rowcount)
+```
+
+#### ErrorResponse
+
+`ErrorResponse` is used to report errors:
+
+```py
+def native_query(self, query: str) -> ErrorResponse:
+ try:
+ # ... execute query
+ except DatabaseError as e:
+ return ErrorResponse(
+ error_code=e.code,
+ error_message=str(e),
+ is_expected_error=True # Set to True for user errors (syntax, permissions, etc.)
+ )
+```
+
+### Streaming Support
+
+For handlers that deal with large datasets, implementing streaming support is recommended. This allows data to be returned in chunks rather than loading everything into memory at once.
+
+To enable streaming:
+
+1. Set the `stream_response` class attribute to `True`:
+
+ ```py
+ class MyDatabaseHandler(DatabaseHandler):
+ name = "mydatabase"
+ stream_response = True # Indicates that handler can return data as a generator
+ ```
+
+2. Implement `native_query()` to return a `TableResponse` with a `data_generator`:
+
+ ```py
+ def native_query(self, query: str, stream: bool = True) -> TableResponse | OkResponse | ErrorResponse:
+ if stream:
+ return self._execute_streaming(query)
+ else:
+ return self._execute_immediate(query)
+
+ def _execute_streaming(self, query: str) -> TableResponse:
+ """Execute query and return results as a stream."""
+ cursor = self.connection.cursor(name="server_side_cursor")
+ cursor.execute(query)
+
+ columns = [Column(name=col.name, type=col.type) for col in cursor.description]
+
+ def generate_data():
+ while batch := cursor.fetchmany(size=1000):
+ yield pd.DataFrame(batch, columns=[c.name for c in columns])
+
+ return TableResponse(columns=columns, data_generator=generate_data())
+ ```
+
+
+For a complete example of streaming implementation, see the [PostgreSQL handler](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py).
+
+
### Exporting the `connection_args` Dictionary
The `connection_args` dictionary contains all of the arguments used to establish the connection along with their descriptions, types, labels, and whether they are required or not.
diff --git a/docs/contribute/docs.mdx b/docs 2/contribute/docs.mdx
similarity index 100%
rename from docs/contribute/docs.mdx
rename to docs 2/contribute/docs.mdx
diff --git a/docs/contribute/install.mdx b/docs 2/contribute/install.mdx
similarity index 100%
rename from docs/contribute/install.mdx
rename to docs 2/contribute/install.mdx
diff --git a/docs/contribute/integrations-readme.mdx b/docs 2/contribute/integrations-readme.mdx
similarity index 100%
rename from docs/contribute/integrations-readme.mdx
rename to docs 2/contribute/integrations-readme.mdx
diff --git a/docs/contribute/ml-handlers.mdx b/docs 2/contribute/ml-handlers.mdx
similarity index 100%
rename from docs/contribute/ml-handlers.mdx
rename to docs 2/contribute/ml-handlers.mdx
diff --git a/docs/contribute/python-coding-standards.mdx b/docs 2/contribute/python-coding-standards.mdx
similarity index 100%
rename from docs/contribute/python-coding-standards.mdx
rename to docs 2/contribute/python-coding-standards.mdx
diff --git a/docs/data_catalog/integrations/overview.mdx b/docs 2/data_catalog/integrations/overview.mdx
similarity index 100%
rename from docs/data_catalog/integrations/overview.mdx
rename to docs 2/data_catalog/integrations/overview.mdx
diff --git a/docs/data_catalog/integrations/query.mdx b/docs 2/data_catalog/integrations/query.mdx
similarity index 100%
rename from docs/data_catalog/integrations/query.mdx
rename to docs 2/data_catalog/integrations/query.mdx
diff --git a/docs/data_catalog/overview.mdx b/docs 2/data_catalog/overview.mdx
similarity index 100%
rename from docs/data_catalog/overview.mdx
rename to docs 2/data_catalog/overview.mdx
diff --git a/docs/docs.json b/docs 2/docs.json
similarity index 99%
rename from docs/docs.json
rename to docs 2/docs.json
index f0a1b49cb87..ff5a1352eb2 100644
--- a/docs/docs.json
+++ b/docs 2/docs.json
@@ -1,6 +1,6 @@
{
"$schema": "https://mintlify.com/docs.json",
- "theme": "willow",
+ "theme": "almond",
"name": "MindsDB",
"colors": {
"primary": "#3f8394",
@@ -13,6 +13,11 @@
{
"tab": "Documentation",
"groups": [
+ {
+ "group": "Home",
+ "hidden": true,
+ "pages": ["homepage"]
+ },
{
"group": "Get Started",
"pages": [
@@ -726,7 +731,8 @@
},
"logo": {
"light": "/logo/light.svg",
- "dark": "/logo/dark.svg"
+ "dark": "/logo/dark.svg",
+ "href": "/homepage"
},
"api": {
"mdx": {
@@ -1079,14 +1085,6 @@
"source": "/custom-model/mlflow",
"destination": "/integrations/ai-engines/mlflow"
},
- {
- "source": "/nixtla/statsforecast",
- "destination": "/integrations/ai-engines/statsforecast"
- },
- {
- "source": "/nixtla/house-sales-statsforecast",
- "destination": "/sql/tutorials/house-sales-statsforecast"
- },
{
"source": "/connect/mindsdb_editor",
"destination": "/mindsdb_sql/connect/mindsdb_editor"
@@ -1371,10 +1369,6 @@
"source": "/sql/tutorials/house-sales-forecasting",
"destination": "/use-cases/predictive_analytics/house-sales-forecasting"
},
- {
- "source": "/sql/tutorials/expenditures-statsforecast",
- "destination": "/use-cases/predictive_analytics/expenditures-statsforecast"
- },
{
"source": "/sql/tutorials/eeg-forecasting",
"destination": "/use-cases/predictive_analytics/eeg-forecasting"
@@ -1462,6 +1456,14 @@
{
"source": "/mcp/cursor_usage",
"destination": "/model-context-protocol/cursor_usage"
+ },
+ {
+ "source": "/use-cases/overview",
+ "destination": "/homepage"
+ },
+ {
+ "source": "/",
+ "destination": "/homepage"
}
]
}
diff --git a/docs/faqs/benefits.mdx b/docs 2/faqs/benefits.mdx
similarity index 100%
rename from docs/faqs/benefits.mdx
rename to docs 2/faqs/benefits.mdx
diff --git a/docs/faqs/disposable-email-doman-and-openai.mdx b/docs 2/faqs/disposable-email-doman-and-openai.mdx
similarity index 100%
rename from docs/faqs/disposable-email-doman-and-openai.mdx
rename to docs 2/faqs/disposable-email-doman-and-openai.mdx
diff --git a/docs/faqs/mindsdb-with-php.mdx b/docs 2/faqs/mindsdb-with-php.mdx
similarity index 100%
rename from docs/faqs/mindsdb-with-php.mdx
rename to docs 2/faqs/mindsdb-with-php.mdx
diff --git a/docs/faqs/missing-required-cpu-features.mdx b/docs 2/faqs/missing-required-cpu-features.mdx
similarity index 100%
rename from docs/faqs/missing-required-cpu-features.mdx
rename to docs 2/faqs/missing-required-cpu-features.mdx
diff --git a/docs/faqs/persist-predictions.mdx b/docs 2/faqs/persist-predictions.mdx
similarity index 100%
rename from docs/faqs/persist-predictions.mdx
rename to docs 2/faqs/persist-predictions.mdx
diff --git a/docs/favicon-dark.png b/docs 2/favicon-dark.png
similarity index 100%
rename from docs/favicon-dark.png
rename to docs 2/favicon-dark.png
diff --git a/docs/favicon.png b/docs 2/favicon.png
similarity index 100%
rename from docs/favicon.png
rename to docs 2/favicon.png
diff --git a/docs/favicon_old.png b/docs 2/favicon_old.png
similarity index 100%
rename from docs/favicon_old.png
rename to docs 2/favicon_old.png
diff --git a/docs/features/ai-integrations.mdx b/docs 2/features/ai-integrations.mdx
similarity index 100%
rename from docs/features/ai-integrations.mdx
rename to docs 2/features/ai-integrations.mdx
diff --git a/docs/features/automation.mdx b/docs 2/features/automation.mdx
similarity index 100%
rename from docs/features/automation.mdx
rename to docs 2/features/automation.mdx
diff --git a/docs/features/data-integrations.mdx b/docs 2/features/data-integrations.mdx
similarity index 100%
rename from docs/features/data-integrations.mdx
rename to docs 2/features/data-integrations.mdx
diff --git a/docs/features/model-management.mdx b/docs 2/features/model-management.mdx
similarity index 100%
rename from docs/features/model-management.mdx
rename to docs 2/features/model-management.mdx
diff --git a/docs/generative-ai-tables.mdx b/docs 2/generative-ai-tables.mdx
similarity index 100%
rename from docs/generative-ai-tables.mdx
rename to docs 2/generative-ai-tables.mdx
diff --git a/docs 2/homepage.mdx b/docs 2/homepage.mdx
new file mode 100644
index 00000000000..65a14624dc1
--- /dev/null
+++ b/docs 2/homepage.mdx
@@ -0,0 +1,29 @@
+---
+mode: "custom"
+---
+
+
+
+ Documentation
+
+
+ Everything you need to get up and running. Choose a path below to get started.
+
+
+
+
+
+
+ I want to use the Query Engine to build AI features that learn from my data and answer questions intelligently.
+
+ Go to MindsDB Documentation.
+
+
+
+
+ I want to use MindsDB Anton, an AI coworker, to explore my data and uncover insights β no code required.
+
+ Go to Anton GitHub repository.
+
+
+
diff --git a/docs/integrations/ai-engines/amazon-bedrock.mdx b/docs 2/integrations/ai-engines/amazon-bedrock.mdx
similarity index 100%
rename from docs/integrations/ai-engines/amazon-bedrock.mdx
rename to docs 2/integrations/ai-engines/amazon-bedrock.mdx
diff --git a/docs/integrations/ai-engines/anomaly.mdx b/docs 2/integrations/ai-engines/anomaly.mdx
similarity index 100%
rename from docs/integrations/ai-engines/anomaly.mdx
rename to docs 2/integrations/ai-engines/anomaly.mdx
diff --git a/docs/integrations/ai-engines/anthropic.mdx b/docs 2/integrations/ai-engines/anthropic.mdx
similarity index 100%
rename from docs/integrations/ai-engines/anthropic.mdx
rename to docs 2/integrations/ai-engines/anthropic.mdx
diff --git a/docs/integrations/ai-engines/autokeras.mdx b/docs 2/integrations/ai-engines/autokeras.mdx
similarity index 100%
rename from docs/integrations/ai-engines/autokeras.mdx
rename to docs 2/integrations/ai-engines/autokeras.mdx
diff --git a/docs/integrations/ai-engines/autosklearn.mdx b/docs 2/integrations/ai-engines/autosklearn.mdx
similarity index 100%
rename from docs/integrations/ai-engines/autosklearn.mdx
rename to docs 2/integrations/ai-engines/autosklearn.mdx
diff --git a/docs/integrations/ai-engines/byom.mdx b/docs 2/integrations/ai-engines/byom.mdx
similarity index 91%
rename from docs/integrations/ai-engines/byom.mdx
rename to docs 2/integrations/ai-engines/byom.mdx
index 736af426317..ffdc564ff57 100644
--- a/docs/integrations/ai-engines/byom.mdx
+++ b/docs 2/integrations/ai-engines/byom.mdx
@@ -25,7 +25,7 @@ Let's briefly go over the files that need to be uploaded:
```py
class CustomPredictor():
- β
+
def train(self, df, target_col, args=None):
return ''
@@ -39,38 +39,41 @@ Let's briefly go over the files that need to be uploaded:
```py
import os
import pandas as pd
- β
+
from sklearn.cross_decomposition import PLSRegression
from sklearn import preprocessing
- β
+
class CustomPredictor():
- β
+
def train(self, df, target_col, args=None):
print(args, '1111')
- β
+
self.target_col = target_col
y = df[self.target_col]
x = df.drop(columns=self.target_col)
x_cols = list(x.columns)
- β
+
x_scaler = preprocessing.StandardScaler().fit(x)
y_scaler = preprocessing.StandardScaler().fit(y.values.reshape(-1, 1))
- β
+
xs = x_scaler.transform(x)
ys = y_scaler.transform(y.values.reshape(-1, 1))
- β
+
pls = PLSRegression(n_components=1)
pls.fit(xs, ys)
- β
+
+ self.pls = pls
+ self.y_scaler = y_scaler
+
T = pls.x_scores_
W = pls.x_weights_
P = pls.x_loadings_
R = pls.x_rotations_
- β
+
self.x_cols = x_cols
self.x_scaler = x_scaler
self.P = P
- β
+
def calc_limit(df):
res = None
for column in df.columns:
@@ -89,32 +92,32 @@ Let's briefly go over the files that need to be uploaded:
except:
res = tbl
return res
- β
+
trdf = pd.DataFrame()
trdf[self.target_col] = y.values
trdf['T1'] = T.squeeze()
limit = calc_limit(trdf).reset_index()
- β
+
self.limit = limit
- β
+
return "Trained predictor ready to be stored"
- β
+
def predict(self, df):
- β
- yt = df[self.target_col].values
+
+
xt = df[self.x_cols]
- β
+
xt = self.x_scaler.transform(xt)
- β
+
excess_cols = list(set(df.columns) - set(self.x_cols))
- β
+
pred_df = df[excess_cols].copy()
- β
- pred_df[self.target_col] = yt
+
+ ys_pred = self.pls.predict(xt)
+ y_pred = self.y_scaler.inverse_transform(ys_pred).ravel()
+ pred_df[self.target_col] = y_pred
+
pred_df['T1'] = (xt @ self.P).squeeze()
- β
- pred_df = pd.merge(pred_df, self.limit[[self.target_col, 'lower', 'upper']], how='left', on=self.target_col)
- β
return pred_df
```
@@ -195,12 +198,14 @@ USING
ENGINE = 'custom_model_engine';
```
-Let's query for predictions by joining the custom model with the data table.
+Let's query for predictions by joining the custom model with the data table. Please note that when querying for predictions, do not include the target column in the `input` data selection.
```sql
-SELECT input.feature_column, model_target_column
-FROM my_integration.my_table as input
-JOIN custom_model as model;
+SELECT
+ input.feature_column,
+ model.target AS predicted_target
+FROM my_integration.my_table AS input
+JOIN custom_model AS model;
```
diff --git a/docs/integrations/ai-engines/clipdrop.mdx b/docs 2/integrations/ai-engines/clipdrop.mdx
similarity index 100%
rename from docs/integrations/ai-engines/clipdrop.mdx
rename to docs 2/integrations/ai-engines/clipdrop.mdx
diff --git a/docs/integrations/ai-engines/cohere.mdx b/docs 2/integrations/ai-engines/cohere.mdx
similarity index 100%
rename from docs/integrations/ai-engines/cohere.mdx
rename to docs 2/integrations/ai-engines/cohere.mdx
diff --git a/docs/integrations/ai-engines/google_gemini.mdx b/docs 2/integrations/ai-engines/google_gemini.mdx
similarity index 100%
rename from docs/integrations/ai-engines/google_gemini.mdx
rename to docs 2/integrations/ai-engines/google_gemini.mdx
diff --git a/docs/integrations/ai-engines/huggingface.mdx b/docs 2/integrations/ai-engines/huggingface.mdx
similarity index 100%
rename from docs/integrations/ai-engines/huggingface.mdx
rename to docs 2/integrations/ai-engines/huggingface.mdx
diff --git a/docs/integrations/ai-engines/huggingface_inference_api.mdx b/docs 2/integrations/ai-engines/huggingface_inference_api.mdx
similarity index 100%
rename from docs/integrations/ai-engines/huggingface_inference_api.mdx
rename to docs 2/integrations/ai-engines/huggingface_inference_api.mdx
diff --git a/docs/integrations/ai-engines/langchain.mdx b/docs 2/integrations/ai-engines/langchain.mdx
similarity index 100%
rename from docs/integrations/ai-engines/langchain.mdx
rename to docs 2/integrations/ai-engines/langchain.mdx
diff --git a/docs/integrations/ai-engines/langchain_embedding.mdx b/docs 2/integrations/ai-engines/langchain_embedding.mdx
similarity index 100%
rename from docs/integrations/ai-engines/langchain_embedding.mdx
rename to docs 2/integrations/ai-engines/langchain_embedding.mdx
diff --git a/docs/integrations/ai-engines/lightfm.mdx b/docs 2/integrations/ai-engines/lightfm.mdx
similarity index 100%
rename from docs/integrations/ai-engines/lightfm.mdx
rename to docs 2/integrations/ai-engines/lightfm.mdx
diff --git a/docs/integrations/ai-engines/litellm.mdx b/docs 2/integrations/ai-engines/litellm.mdx
similarity index 100%
rename from docs/integrations/ai-engines/litellm.mdx
rename to docs 2/integrations/ai-engines/litellm.mdx
diff --git a/docs/integrations/ai-engines/llamaindex.mdx b/docs 2/integrations/ai-engines/llamaindex.mdx
similarity index 100%
rename from docs/integrations/ai-engines/llamaindex.mdx
rename to docs 2/integrations/ai-engines/llamaindex.mdx
diff --git a/docs/integrations/ai-engines/merlion.mdx b/docs 2/integrations/ai-engines/merlion.mdx
similarity index 100%
rename from docs/integrations/ai-engines/merlion.mdx
rename to docs 2/integrations/ai-engines/merlion.mdx
diff --git a/docs/integrations/ai-engines/mlflow.mdx b/docs 2/integrations/ai-engines/mlflow.mdx
similarity index 100%
rename from docs/integrations/ai-engines/mlflow.mdx
rename to docs 2/integrations/ai-engines/mlflow.mdx
diff --git a/docs/integrations/ai-engines/monkeylearn.mdx b/docs 2/integrations/ai-engines/monkeylearn.mdx
similarity index 100%
rename from docs/integrations/ai-engines/monkeylearn.mdx
rename to docs 2/integrations/ai-engines/monkeylearn.mdx
diff --git a/docs/integrations/ai-engines/ollama.mdx b/docs 2/integrations/ai-engines/ollama.mdx
similarity index 93%
rename from docs/integrations/ai-engines/ollama.mdx
rename to docs 2/integrations/ai-engines/ollama.mdx
index 287738d54b7..9cca48ce604 100644
--- a/docs/integrations/ai-engines/ollama.mdx
+++ b/docs 2/integrations/ai-engines/ollama.mdx
@@ -28,17 +28,17 @@ Here are the recommended system specifications:
Create an AI engine from the [Ollama handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/ollama_handler).
```sql
-CREATE ML_ENGINE ollama_engine
+CREATE ML_ENGINE ollama
FROM ollama;
```
-Create a model using `ollama_engine` as an engine.
+Create a model using `ollama` as an engine.
```sql
CREATE MODEL ollama_model
PREDICT completion
USING
- engine = 'ollama_engine', -- engine name as created via CREATE ML_ENGINE
+ engine = 'ollama', -- engine name as created via CREATE ML_ENGINE
model_name = 'model-name', -- model run with 'ollama run model-name'
ollama_serve_url = 'http://localhost:11434';
```
@@ -51,7 +51,7 @@ You can find [available models here](https://github.com/ollama/ollama?tab=readme
## Usage
-The following usage examples utilize `ollama_engine` to create a model with the `CREATE MODEL` statement.
+The following usage examples utilize `ollama` to create a model with the `CREATE MODEL` statement.
Deploy and use the `llama3` model.
@@ -63,7 +63,7 @@ Now deploy this model within MindsDB.
CREATE MODEL llama3_model
PREDICT completion
USING
- engine = 'ollama_engine',
+ engine = 'ollama',
model_name = 'llama3';
```
diff --git a/docs/integrations/ai-engines/openai.mdx b/docs 2/integrations/ai-engines/openai.mdx
similarity index 100%
rename from docs/integrations/ai-engines/openai.mdx
rename to docs 2/integrations/ai-engines/openai.mdx
diff --git a/docs/integrations/ai-engines/popularity-recommender.mdx b/docs 2/integrations/ai-engines/popularity-recommender.mdx
similarity index 100%
rename from docs/integrations/ai-engines/popularity-recommender.mdx
rename to docs 2/integrations/ai-engines/popularity-recommender.mdx
diff --git a/docs/integrations/ai-engines/portkey.mdx b/docs 2/integrations/ai-engines/portkey.mdx
similarity index 100%
rename from docs/integrations/ai-engines/portkey.mdx
rename to docs 2/integrations/ai-engines/portkey.mdx
diff --git a/docs/integrations/ai-engines/pycaret.mdx b/docs 2/integrations/ai-engines/pycaret.mdx
similarity index 100%
rename from docs/integrations/ai-engines/pycaret.mdx
rename to docs 2/integrations/ai-engines/pycaret.mdx
diff --git a/docs/integrations/ai-engines/ray-serve.mdx b/docs 2/integrations/ai-engines/ray-serve.mdx
similarity index 100%
rename from docs/integrations/ai-engines/ray-serve.mdx
rename to docs 2/integrations/ai-engines/ray-serve.mdx
diff --git a/docs/integrations/ai-engines/replicate-audio.mdx b/docs 2/integrations/ai-engines/replicate-audio.mdx
similarity index 100%
rename from docs/integrations/ai-engines/replicate-audio.mdx
rename to docs 2/integrations/ai-engines/replicate-audio.mdx
diff --git a/docs/integrations/ai-engines/replicate-img2text.mdx b/docs 2/integrations/ai-engines/replicate-img2text.mdx
similarity index 100%
rename from docs/integrations/ai-engines/replicate-img2text.mdx
rename to docs 2/integrations/ai-engines/replicate-img2text.mdx
diff --git a/docs/integrations/ai-engines/replicate-llm.mdx b/docs 2/integrations/ai-engines/replicate-llm.mdx
similarity index 100%
rename from docs/integrations/ai-engines/replicate-llm.mdx
rename to docs 2/integrations/ai-engines/replicate-llm.mdx
diff --git a/docs/integrations/ai-engines/replicate-text2img.mdx b/docs 2/integrations/ai-engines/replicate-text2img.mdx
similarity index 100%
rename from docs/integrations/ai-engines/replicate-text2img.mdx
rename to docs 2/integrations/ai-engines/replicate-text2img.mdx
diff --git a/docs/integrations/ai-engines/replicate-text2video.mdx b/docs 2/integrations/ai-engines/replicate-text2video.mdx
similarity index 100%
rename from docs/integrations/ai-engines/replicate-text2video.mdx
rename to docs 2/integrations/ai-engines/replicate-text2video.mdx
diff --git a/docs/integrations/ai-engines/twelvelabs.mdx b/docs 2/integrations/ai-engines/twelvelabs.mdx
similarity index 100%
rename from docs/integrations/ai-engines/twelvelabs.mdx
rename to docs 2/integrations/ai-engines/twelvelabs.mdx
diff --git a/docs/integrations/ai-engines/vertex.mdx b/docs 2/integrations/ai-engines/vertex.mdx
similarity index 100%
rename from docs/integrations/ai-engines/vertex.mdx
rename to docs 2/integrations/ai-engines/vertex.mdx
diff --git a/docs/integrations/ai-engines/xgboost.mdx b/docs 2/integrations/ai-engines/xgboost.mdx
similarity index 100%
rename from docs/integrations/ai-engines/xgboost.mdx
rename to docs 2/integrations/ai-engines/xgboost.mdx
diff --git a/docs/integrations/ai-overview.mdx b/docs 2/integrations/ai-overview.mdx
similarity index 100%
rename from docs/integrations/ai-overview.mdx
rename to docs 2/integrations/ai-overview.mdx
diff --git a/docs/integrations/app-integrations/binance.mdx b/docs 2/integrations/app-integrations/binance.mdx
similarity index 77%
rename from docs/integrations/app-integrations/binance.mdx
rename to docs 2/integrations/app-integrations/binance.mdx
index 6e622db5c3e..596e8a32531 100644
--- a/docs/integrations/app-integrations/binance.mdx
+++ b/docs 2/integrations/app-integrations/binance.mdx
@@ -69,51 +69,3 @@ LIMIT 10000;
Supported intervals are [listed here](https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data)
-
-### Train a Model
-
-Here is how to create a time series model using 10000 trading intervals in the past with a duration of 1m.
-
-```sql
-CREATE MODEL mindsdb.btc_forecast_model
-FROM my_binance
-(
- SELECT * FROM aggregated_trade_data
- WHERE symbol = 'BTCUSDT'
- AND close_time < '2023-01-01'
- AND interval = '1m'
- LIMIT 10000;
-)
-
-PREDICT open_price
-
-ORDER BY open_time
-WINDOW 100
-HORIZON 10;
-```
-
-
-For more accuracy, the limit can be set to a higher value (e.g. 100,000)
-
-
-### Making Predictions
-
-First, let's create a view for the most recent BTCUSDT aggregate trade data:
-
-```sql
-CREATE VIEW recent_btcusdt_data AS (
- SELECT * FROM my_binance.aggregated_trade_data
- WHERE symbol = 'BTCUSDT'
-)
-```
-
-Now let's predict the future price of BTC:
-
-```sql
-SELECT m.*
-FROM recent_btcusdt_data AS t
-JOIN mindsdb.btc_forecast_model AS m
-WHERE m.open_time > LATEST
-```
-
-This will give the predicted BTC price for the next 10 minutes (as the horizon is set to 10) in terms of USDT.
diff --git a/docs/integrations/app-integrations/confluence.mdx b/docs 2/integrations/app-integrations/confluence.mdx
similarity index 100%
rename from docs/integrations/app-integrations/confluence.mdx
rename to docs 2/integrations/app-integrations/confluence.mdx
diff --git a/docs/integrations/app-integrations/dockerhub.mdx b/docs 2/integrations/app-integrations/dockerhub.mdx
similarity index 100%
rename from docs/integrations/app-integrations/dockerhub.mdx
rename to docs 2/integrations/app-integrations/dockerhub.mdx
diff --git a/docs/integrations/app-integrations/email.mdx b/docs 2/integrations/app-integrations/email.mdx
similarity index 100%
rename from docs/integrations/app-integrations/email.mdx
rename to docs 2/integrations/app-integrations/email.mdx
diff --git a/docs/integrations/app-integrations/github.mdx b/docs 2/integrations/app-integrations/github.mdx
similarity index 100%
rename from docs/integrations/app-integrations/github.mdx
rename to docs 2/integrations/app-integrations/github.mdx
diff --git a/docs/integrations/app-integrations/gitlab.mdx b/docs 2/integrations/app-integrations/gitlab.mdx
similarity index 100%
rename from docs/integrations/app-integrations/gitlab.mdx
rename to docs 2/integrations/app-integrations/gitlab.mdx
diff --git a/docs/integrations/app-integrations/gmail.mdx b/docs 2/integrations/app-integrations/gmail.mdx
similarity index 100%
rename from docs/integrations/app-integrations/gmail.mdx
rename to docs 2/integrations/app-integrations/gmail.mdx
diff --git a/docs/integrations/app-integrations/gong.mdx b/docs 2/integrations/app-integrations/gong.mdx
similarity index 100%
rename from docs/integrations/app-integrations/gong.mdx
rename to docs 2/integrations/app-integrations/gong.mdx
diff --git a/docs/integrations/app-integrations/google-analytics.mdx b/docs 2/integrations/app-integrations/google-analytics.mdx
similarity index 100%
rename from docs/integrations/app-integrations/google-analytics.mdx
rename to docs 2/integrations/app-integrations/google-analytics.mdx
diff --git a/docs/integrations/app-integrations/google-calendar.mdx b/docs 2/integrations/app-integrations/google-calendar.mdx
similarity index 100%
rename from docs/integrations/app-integrations/google-calendar.mdx
rename to docs 2/integrations/app-integrations/google-calendar.mdx
diff --git a/docs/integrations/app-integrations/hackernews.mdx b/docs 2/integrations/app-integrations/hackernews.mdx
similarity index 100%
rename from docs/integrations/app-integrations/hackernews.mdx
rename to docs 2/integrations/app-integrations/hackernews.mdx
diff --git a/docs/integrations/app-integrations/hubspot.mdx b/docs 2/integrations/app-integrations/hubspot.mdx
similarity index 100%
rename from docs/integrations/app-integrations/hubspot.mdx
rename to docs 2/integrations/app-integrations/hubspot.mdx
diff --git a/docs/integrations/app-integrations/instatus.mdx b/docs 2/integrations/app-integrations/instatus.mdx
similarity index 90%
rename from docs/integrations/app-integrations/instatus.mdx
rename to docs 2/integrations/app-integrations/instatus.mdx
index 93624974bd7..65f97e68fc1 100644
--- a/docs/integrations/app-integrations/instatus.mdx
+++ b/docs 2/integrations/app-integrations/instatus.mdx
@@ -41,7 +41,7 @@ To create a new status page, use the `INSERT` statement:
```sql
INSERT INTO mindsdb_instatus.status_pages (email, name, subdomain, components, logoUrl, faviconUrl, websiteUrl, language, useLargeHeader, brandColor, okColor, disruptedColor, degradedColor, downColor, noticeColor, unknownColor, googleAnalytics, subscribeBySms, smsService, twilioSid, twilioToken, twilioSender, nexmoKey, nexmoSecret, nexmoSender, htmlInMeta, htmlAboveHeader, htmlBelowHeader, htmlAboveFooter, htmlBelowFooter, htmlBelowSummary, cssGlobal, launchDate, dateFormat, dateFormatShort, timeFormat)
-VALUES ('yourname@gmail.com', 'mindsdb', 'mindsdb-instatus', '["Website", "App", "API"]', 'https://instatus.com/sample.png', 'https://instatus.com/favicon-32x32.png', 'https://instatus.com', 'en', true, '#111', '#33B17E', '#FF8C03', '#ECC94B', '#DC123D', '#70808F', '#DFE0E1', 'UA-00000000-1', true, 'twilio', 'YOUR_TWILIO_SID', 'YOUR_TWILIO_TOKEN', 'YOUR_TWILIO_SENDER', null, null, null, null, null, null, null, null, null, null, 'MMMMMM d, yyyy', 'MMM yyyy', 'p');
+VALUES ('yourname@gmail.com', 'mindsdb', 'mindsdb-instatus', '["Website", "App", "API"]', 'https://instatus.com/sample.png', 'https://instatus.com/favicon-32x32.png', 'https://instatus.com', 'en', 'true', '#111', '#33B17E', '#FF8C03', '#ECC94B', '#DC123D', '#70808F', '#DFE0E1', 'UA-00000000-1', 'true', 'twilio', 'YOUR_TWILIO_SID', 'YOUR_TWILIO_TOKEN', 'YOUR_TWILIO_SENDER', null, null, null, null, null, null, null, null, null, null, null, 'MMMMMM d, yyyy', 'MMM yyyy', 'p');
```
diff --git a/docs/integrations/app-integrations/intercom.mdx b/docs 2/integrations/app-integrations/intercom.mdx
similarity index 100%
rename from docs/integrations/app-integrations/intercom.mdx
rename to docs 2/integrations/app-integrations/intercom.mdx
diff --git a/docs 2/integrations/app-integrations/jira.mdx b/docs 2/integrations/app-integrations/jira.mdx
new file mode 100644
index 00000000000..3671aa31af4
--- /dev/null
+++ b/docs 2/integrations/app-integrations/jira.mdx
@@ -0,0 +1,100 @@
+---
+title: Jira
+sidebarTitle: Jira
+---
+
+This documentation describes the integration of MindsDB with [Jira](https://www.atlassian.com/software/jira/guides/getting-started/introduction), the #1 agile project management tool used by teams to plan, track, release and support world-class software with confidence.
+The integration allows MindsDB to access data from Jira and enhance it with AI capabilities.
+
+## Prerequisites
+
+Before proceeding, ensure the following prerequisites are met:
+
+1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop).
+2. To connect Jira to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies).
+
+## Connection
+
+Establish a connection to Jira from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/jira_handler) as an engine.
+
+```sql
+CREATE DATABASE jira_datasource
+WITH
+ ENGINE = 'jira',
+ PARAMETERS = {
+ "jira_url": "https://example.atlassian.net",
+ "jira_username": "john.doe@example.com",
+ "jira_api_token": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6",
+ "cloud": true
+ };
+```
+
+Required connection parameters include the following:
+
+- `jira_url`: The base URL for your Jira instance/server.
+- `cloud` (optional): Set `true` for Jira Cloud or `false` for Jira Server. Defaults to `true`.
+- Jira Cloud credentials:
+ - `jira_username`
+ - `jira_api_token`
+- Jira Server credentials (set `cloud: false`):
+ - Either `jira_personal_access_token`, **or**
+ - `jira_username` and `jira_password`
+
+
+Refer this [guide](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/) for instructions on how to create API tokens for your account.
+
+
+## Usage
+
+Retrieve data from a specified table by providing the integration and table names:
+
+```sql
+SELECT *
+FROM jira_datasource.table_name
+LIMIT 10;
+```
+
+
+The above example utilize `jira_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command.
+
+
+## Available tables
+
+The handler registers the following tables:
+
+- `projects`: Basic project metadata.
+- `issues`: Normalized issue fields (project, summary, description, priority, status, labels, components, creator/reporter/assignee, timestamps).
+- `attachments`: Attachments derived from issues.
+- `comments`: Comments derived from issues.
+- `users`: Users available to the current Jira context. Column set depends on `cloud`:
+ - Cloud columns: `accountId, accountType, emailAddress, displayName, active, timeZone, locale, applicationRoles, avatarUrls, groups`
+ - Server columns: `key, name, emailAddress, displayName, active, timeZone, locale, lastLoginTime, applicationRoles, avatarUrls, groups, deleted, expand`
+- `groups`: User groups (`groupId, name, html`).
+
+Attachments and comments are fetched by first loading issues. Use `LIMIT` whenever possible to reduce API calls.
+
+## Query examples
+
+List projects:
+
+```sql
+SELECT id, key, name
+FROM jira_datasource.projects;
+```
+
+Fetch recent issues for a project:
+
+```sql
+SELECT key, summary, status, assignee, created
+FROM jira_datasource.issues
+WHERE project_key = 'ENG'
+LIMIT 50;
+```
+
+Retrieve comments for a specific issue:
+
+```sql
+SELECT body, author, created
+FROM jira_datasource.comments
+WHERE issue_key = 'ENG-123';
+```
diff --git a/docs/integrations/app-integrations/mediawiki.mdx b/docs 2/integrations/app-integrations/mediawiki.mdx
similarity index 100%
rename from docs/integrations/app-integrations/mediawiki.mdx
rename to docs 2/integrations/app-integrations/mediawiki.mdx
diff --git a/docs/integrations/app-integrations/microsoft-onedrive.mdx b/docs 2/integrations/app-integrations/microsoft-onedrive.mdx
similarity index 100%
rename from docs/integrations/app-integrations/microsoft-onedrive.mdx
rename to docs 2/integrations/app-integrations/microsoft-onedrive.mdx
diff --git a/docs/integrations/app-integrations/microsoft-teams.mdx b/docs 2/integrations/app-integrations/microsoft-teams.mdx
similarity index 100%
rename from docs/integrations/app-integrations/microsoft-teams.mdx
rename to docs 2/integrations/app-integrations/microsoft-teams.mdx
diff --git a/docs/integrations/app-integrations/netsuite.mdx b/docs 2/integrations/app-integrations/netsuite.mdx
similarity index 100%
rename from docs/integrations/app-integrations/netsuite.mdx
rename to docs 2/integrations/app-integrations/netsuite.mdx
diff --git a/docs/integrations/app-integrations/newsapi.mdx b/docs 2/integrations/app-integrations/newsapi.mdx
similarity index 100%
rename from docs/integrations/app-integrations/newsapi.mdx
rename to docs 2/integrations/app-integrations/newsapi.mdx
diff --git a/docs/integrations/app-integrations/paypal.mdx b/docs 2/integrations/app-integrations/paypal.mdx
similarity index 100%
rename from docs/integrations/app-integrations/paypal.mdx
rename to docs 2/integrations/app-integrations/paypal.mdx
diff --git a/docs/integrations/app-integrations/plaid.mdx b/docs 2/integrations/app-integrations/plaid.mdx
similarity index 100%
rename from docs/integrations/app-integrations/plaid.mdx
rename to docs 2/integrations/app-integrations/plaid.mdx
diff --git a/docs/integrations/app-integrations/pypi.mdx b/docs 2/integrations/app-integrations/pypi.mdx
similarity index 100%
rename from docs/integrations/app-integrations/pypi.mdx
rename to docs 2/integrations/app-integrations/pypi.mdx
diff --git a/docs/integrations/app-integrations/reddit.mdx b/docs 2/integrations/app-integrations/reddit.mdx
similarity index 100%
rename from docs/integrations/app-integrations/reddit.mdx
rename to docs 2/integrations/app-integrations/reddit.mdx
diff --git a/docs 2/integrations/app-integrations/rest-api.mdx b/docs 2/integrations/app-integrations/rest-api.mdx
new file mode 100644
index 00000000000..1ead2f519ae
--- /dev/null
+++ b/docs 2/integrations/app-integrations/rest-api.mdx
@@ -0,0 +1,148 @@
+---
+title: REST API
+sidebarTitle: REST API
+---
+
+In this section, we present how to connect any REST API to MindsDB using bearer-token authentication.
+
+The REST API handler is a generic integration that lets you forward HTTP requests to any API through MindsDB using stored credentials. Unlike named integrations (HubSpot, Shopify, etc.), it requires no handler-specific knowledge β just a base URL and a bearer token.
+
+This is useful for APIs that MindsDB doesn't have a dedicated handler for, or when you only need direct HTTP access without SQL table mapping.
+
+## Connection
+
+The required arguments to establish a connection are as follows:
+
+- `base_url`: the base URL of the REST API (e.g. `https://api.example.com`). All request paths are appended to this URL.
+- `bearer_token`: the token used for authentication. Injected as `Authorization: Bearer ` on every request.
+
+Optional arguments:
+
+- `default_headers`: a JSON object of static headers added to every request (e.g. `{"Accept": "application/json"}`).
+- `allowed_hosts`: a list of allowed hostnames for requests. Defaults to the hostname of `base_url`. Use `["*"]` to disable host containment.
+- `test_path`: the path used by the test endpoint to verify connectivity. Defaults to `/`.
+
+To connect a REST API to MindsDB, create a new database:
+
+```sql
+CREATE DATABASE my_api
+WITH ENGINE = 'rest_api',
+PARAMETERS = {
+ "base_url": "https://api.example.com",
+ "bearer_token": "your_token_here"
+};
+```
+
+### Example: Connect to HubSpot
+
+```sql
+CREATE DATABASE my_hubspot
+WITH ENGINE = 'rest_api',
+PARAMETERS = {
+ "base_url": "https://api.hubapi.com",
+ "bearer_token": "pat-eu1-..."
+};
+```
+
+### Example: Connect with default headers and a custom test path
+
+```sql
+CREATE DATABASE my_internal_api
+WITH ENGINE = 'rest_api',
+PARAMETERS = {
+ "base_url": "https://internal.example.com/api/v2",
+ "bearer_token": "sk-...",
+ "default_headers": {"Accept": "application/json"},
+ "test_path": "/health"
+};
+```
+
+### Example: Multiple allowed hosts
+
+```sql
+CREATE DATABASE my_multi_region_api
+WITH ENGINE = 'rest_api',
+PARAMETERS = {
+ "base_url": "https://api.example.com",
+ "bearer_token": "your_token",
+ "allowed_hosts": ["api.example.com", "api.eu.example.com"]
+};
+```
+
+## Usage
+
+This handler is **passthrough-only** β it does not expose SQL tables. All interaction is through the REST passthrough endpoint.
+
+### Sending requests
+
+Forward HTTP requests to the upstream API:
+
+```
+POST /api/integrations/my_api/passthrough
+```
+
+```json
+{
+ "method": "GET",
+ "path": "/v1/users",
+ "query": {"limit": "10"},
+ "headers": {"Accept": "application/json"}
+}
+```
+
+The response wraps the upstream HTTP response:
+
+```json
+{
+ "status_code": 200,
+ "headers": {"content-type": "application/json"},
+ "body": {"results": [...]},
+ "content_type": "application/json"
+}
+```
+
+Supported HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`.
+
+### Testing the connection
+
+Verify that the base URL, token, and host allowlist are configured correctly:
+
+```
+POST /api/integrations/my_api/passthrough/test
+```
+
+A successful response:
+
+```json
+{"ok": true, "status_code": 200, "host": "api.example.com", "latency_ms": 140}
+```
+
+A failed response:
+
+```json
+{"ok": false, "error_code": "auth_failed", "message": "upstream rejected credentials; base URL and allowlist look correct"}
+```
+
+## Security
+
+- Credentials are stored in MindsDB and never exposed to the caller.
+- Requests are restricted to hostnames in the allowlist. Private and loopback IP addresses are rejected by default.
+- Callers cannot override `Authorization`, `Host`, `Cookie`, or `Proxy-*` headers.
+- If the upstream API echoes the token in responses, it is replaced with `[REDACTED_API_KEY]`.
+- Request bodies are capped at 1 MB, response bodies at 10 MB.
+
+
+**`host 'X' is not in the datasource allowlist`**
+
+The request path resolved to a different hostname than `base_url`. Add the hostname to `allowed_hosts`, or use `["*"]` to disable host containment (not recommended for production).
+
+
+
+**`upstream rejected credentials (401/403)`**
+
+The token is invalid, expired, or missing required scopes. Verify the token with the upstream API provider.
+
+
+
+For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/rest_api_handler/README.md).
+
diff --git a/docs/integrations/app-integrations/salesforce.mdx b/docs 2/integrations/app-integrations/salesforce.mdx
similarity index 100%
rename from docs/integrations/app-integrations/salesforce.mdx
rename to docs 2/integrations/app-integrations/salesforce.mdx
diff --git a/docs/integrations/app-integrations/sendinblue.mdx b/docs 2/integrations/app-integrations/sendinblue.mdx
similarity index 100%
rename from docs/integrations/app-integrations/sendinblue.mdx
rename to docs 2/integrations/app-integrations/sendinblue.mdx
diff --git a/docs/integrations/app-integrations/shopify.mdx b/docs 2/integrations/app-integrations/shopify.mdx
similarity index 100%
rename from docs/integrations/app-integrations/shopify.mdx
rename to docs 2/integrations/app-integrations/shopify.mdx
diff --git a/docs/integrations/app-integrations/slack.mdx b/docs 2/integrations/app-integrations/slack.mdx
similarity index 100%
rename from docs/integrations/app-integrations/slack.mdx
rename to docs 2/integrations/app-integrations/slack.mdx
diff --git a/docs/integrations/app-integrations/strapi.mdx b/docs 2/integrations/app-integrations/strapi.mdx
similarity index 90%
rename from docs/integrations/app-integrations/strapi.mdx
rename to docs 2/integrations/app-integrations/strapi.mdx
index cdb66e063e4..e92d560d632 100644
--- a/docs/integrations/app-integrations/strapi.mdx
+++ b/docs 2/integrations/app-integrations/strapi.mdx
@@ -14,7 +14,7 @@ To use the Strapi Handler, initialize it with the following parameters:
- `host`: Strapi server host.
- `port`: Strapi server port (typically 1337).
- `api_token`: Strapi server API token for authentication.
-- `plural_api_ids`: List of plural API IDs for the collections.
+- `endpoints`: List of collection endpoints.
To get started, create a Strapi engine database with the following SQL command:
@@ -25,7 +25,7 @@ PARAMETERS = {
"host" : "", --- Host (can be an IP address or URL).
"port" : "", --- Common port is 1337.
"api_token": "", --- API token of the Strapi server.
- "plural_api_ids" : [""] --- Plural API IDs of the collections.
+ "endpoints" : [""] --- Collection endpoints.
};
```
@@ -43,7 +43,7 @@ Filter data based on specific criteria:
```sql
SELECT *
FROM myshop.
-WHERE id =
+WHERE documentId = '';
```
Insert new data into a collection:
@@ -64,7 +64,7 @@ Modify existing data in a collection:
```sql
UPDATE myshop.
SET = , = , ...
-WHERE id = ;
+WHERE documentId = '';
```
diff --git a/docs/integrations/app-integrations/stripe.mdx b/docs 2/integrations/app-integrations/stripe.mdx
similarity index 100%
rename from docs/integrations/app-integrations/stripe.mdx
rename to docs 2/integrations/app-integrations/stripe.mdx
diff --git a/docs/integrations/app-integrations/symbl.mdx b/docs 2/integrations/app-integrations/symbl.mdx
similarity index 100%
rename from docs/integrations/app-integrations/symbl.mdx
rename to docs 2/integrations/app-integrations/symbl.mdx
diff --git a/docs/integrations/app-integrations/twitter.mdx b/docs 2/integrations/app-integrations/twitter.mdx
similarity index 100%
rename from docs/integrations/app-integrations/twitter.mdx
rename to docs 2/integrations/app-integrations/twitter.mdx
diff --git a/docs/integrations/app-integrations/web-crawler.mdx b/docs 2/integrations/app-integrations/web-crawler.mdx
similarity index 100%
rename from docs/integrations/app-integrations/web-crawler.mdx
rename to docs 2/integrations/app-integrations/web-crawler.mdx
diff --git a/docs/integrations/app-integrations/youtube.mdx b/docs 2/integrations/app-integrations/youtube.mdx
similarity index 100%
rename from docs/integrations/app-integrations/youtube.mdx
rename to docs 2/integrations/app-integrations/youtube.mdx
diff --git a/docs/integrations/data-integrations/airtable.mdx b/docs 2/integrations/data-integrations/airtable.mdx
similarity index 100%
rename from docs/integrations/data-integrations/airtable.mdx
rename to docs 2/integrations/data-integrations/airtable.mdx
diff --git a/docs/integrations/data-integrations/all-data-integrations.mdx b/docs 2/integrations/data-integrations/all-data-integrations.mdx
similarity index 100%
rename from docs/integrations/data-integrations/all-data-integrations.mdx
rename to docs 2/integrations/data-integrations/all-data-integrations.mdx
diff --git a/docs/integrations/data-integrations/amazon-aurora.mdx b/docs 2/integrations/data-integrations/amazon-aurora.mdx
similarity index 100%
rename from docs/integrations/data-integrations/amazon-aurora.mdx
rename to docs 2/integrations/data-integrations/amazon-aurora.mdx
diff --git a/docs/integrations/data-integrations/amazon-dynamodb.mdx b/docs 2/integrations/data-integrations/amazon-dynamodb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/amazon-dynamodb.mdx
rename to docs 2/integrations/data-integrations/amazon-dynamodb.mdx
diff --git a/docs/integrations/data-integrations/amazon-redshift.mdx b/docs 2/integrations/data-integrations/amazon-redshift.mdx
similarity index 100%
rename from docs/integrations/data-integrations/amazon-redshift.mdx
rename to docs 2/integrations/data-integrations/amazon-redshift.mdx
diff --git a/docs/integrations/data-integrations/amazon-s3.mdx b/docs 2/integrations/data-integrations/amazon-s3.mdx
similarity index 100%
rename from docs/integrations/data-integrations/amazon-s3.mdx
rename to docs 2/integrations/data-integrations/amazon-s3.mdx
diff --git a/docs/integrations/data-integrations/apache-cassandra.mdx b/docs 2/integrations/data-integrations/apache-cassandra.mdx
similarity index 100%
rename from docs/integrations/data-integrations/apache-cassandra.mdx
rename to docs 2/integrations/data-integrations/apache-cassandra.mdx
diff --git a/docs/integrations/data-integrations/apache-druid.mdx b/docs 2/integrations/data-integrations/apache-druid.mdx
similarity index 100%
rename from docs/integrations/data-integrations/apache-druid.mdx
rename to docs 2/integrations/data-integrations/apache-druid.mdx
diff --git a/docs/integrations/data-integrations/apache-hive.mdx b/docs 2/integrations/data-integrations/apache-hive.mdx
similarity index 100%
rename from docs/integrations/data-integrations/apache-hive.mdx
rename to docs 2/integrations/data-integrations/apache-hive.mdx
diff --git a/docs/integrations/data-integrations/apache-ignite.mdx b/docs 2/integrations/data-integrations/apache-ignite.mdx
similarity index 100%
rename from docs/integrations/data-integrations/apache-ignite.mdx
rename to docs 2/integrations/data-integrations/apache-ignite.mdx
diff --git a/docs/integrations/data-integrations/apache-impala.mdx b/docs 2/integrations/data-integrations/apache-impala.mdx
similarity index 100%
rename from docs/integrations/data-integrations/apache-impala.mdx
rename to docs 2/integrations/data-integrations/apache-impala.mdx
diff --git a/docs/integrations/data-integrations/apache-pinot.mdx b/docs 2/integrations/data-integrations/apache-pinot.mdx
similarity index 100%
rename from docs/integrations/data-integrations/apache-pinot.mdx
rename to docs 2/integrations/data-integrations/apache-pinot.mdx
diff --git a/docs/integrations/data-integrations/apache-solr.mdx b/docs 2/integrations/data-integrations/apache-solr.mdx
similarity index 100%
rename from docs/integrations/data-integrations/apache-solr.mdx
rename to docs 2/integrations/data-integrations/apache-solr.mdx
diff --git a/docs/integrations/data-integrations/ckan.mdx b/docs 2/integrations/data-integrations/ckan.mdx
similarity index 100%
rename from docs/integrations/data-integrations/ckan.mdx
rename to docs 2/integrations/data-integrations/ckan.mdx
diff --git a/docs/integrations/data-integrations/clickhouse.mdx b/docs 2/integrations/data-integrations/clickhouse.mdx
similarity index 100%
rename from docs/integrations/data-integrations/clickhouse.mdx
rename to docs 2/integrations/data-integrations/clickhouse.mdx
diff --git a/docs/integrations/data-integrations/cloud-spanner.mdx b/docs 2/integrations/data-integrations/cloud-spanner.mdx
similarity index 100%
rename from docs/integrations/data-integrations/cloud-spanner.mdx
rename to docs 2/integrations/data-integrations/cloud-spanner.mdx
diff --git a/docs/integrations/data-integrations/cockroachdb.mdx b/docs 2/integrations/data-integrations/cockroachdb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/cockroachdb.mdx
rename to docs 2/integrations/data-integrations/cockroachdb.mdx
diff --git a/docs/integrations/data-integrations/couchbase.mdx b/docs 2/integrations/data-integrations/couchbase.mdx
similarity index 100%
rename from docs/integrations/data-integrations/couchbase.mdx
rename to docs 2/integrations/data-integrations/couchbase.mdx
diff --git a/docs/integrations/data-integrations/cratedb.mdx b/docs 2/integrations/data-integrations/cratedb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/cratedb.mdx
rename to docs 2/integrations/data-integrations/cratedb.mdx
diff --git a/docs/integrations/data-integrations/d0lt.mdx b/docs 2/integrations/data-integrations/d0lt.mdx
similarity index 100%
rename from docs/integrations/data-integrations/d0lt.mdx
rename to docs 2/integrations/data-integrations/d0lt.mdx
diff --git a/docs/integrations/data-integrations/databend.mdx b/docs 2/integrations/data-integrations/databend.mdx
similarity index 100%
rename from docs/integrations/data-integrations/databend.mdx
rename to docs 2/integrations/data-integrations/databend.mdx
diff --git a/docs/integrations/data-integrations/databricks.mdx b/docs 2/integrations/data-integrations/databricks.mdx
similarity index 100%
rename from docs/integrations/data-integrations/databricks.mdx
rename to docs 2/integrations/data-integrations/databricks.mdx
diff --git a/docs/integrations/data-integrations/datastax.mdx b/docs 2/integrations/data-integrations/datastax.mdx
similarity index 100%
rename from docs/integrations/data-integrations/datastax.mdx
rename to docs 2/integrations/data-integrations/datastax.mdx
diff --git a/docs/integrations/data-integrations/duckdb.mdx b/docs 2/integrations/data-integrations/duckdb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/duckdb.mdx
rename to docs 2/integrations/data-integrations/duckdb.mdx
diff --git a/docs/integrations/data-integrations/edgelessdb.mdx b/docs 2/integrations/data-integrations/edgelessdb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/edgelessdb.mdx
rename to docs 2/integrations/data-integrations/edgelessdb.mdx
diff --git a/docs/integrations/data-integrations/elasticsearch.mdx b/docs 2/integrations/data-integrations/elasticsearch.mdx
similarity index 100%
rename from docs/integrations/data-integrations/elasticsearch.mdx
rename to docs 2/integrations/data-integrations/elasticsearch.mdx
diff --git a/docs/integrations/data-integrations/firebird.mdx b/docs 2/integrations/data-integrations/firebird.mdx
similarity index 100%
rename from docs/integrations/data-integrations/firebird.mdx
rename to docs 2/integrations/data-integrations/firebird.mdx
diff --git a/docs/integrations/data-integrations/google-bigquery.mdx b/docs 2/integrations/data-integrations/google-bigquery.mdx
similarity index 100%
rename from docs/integrations/data-integrations/google-bigquery.mdx
rename to docs 2/integrations/data-integrations/google-bigquery.mdx
diff --git a/docs/integrations/data-integrations/google-cloud-sql.mdx b/docs 2/integrations/data-integrations/google-cloud-sql.mdx
similarity index 100%
rename from docs/integrations/data-integrations/google-cloud-sql.mdx
rename to docs 2/integrations/data-integrations/google-cloud-sql.mdx
diff --git a/docs/integrations/data-integrations/google-sheets.mdx b/docs 2/integrations/data-integrations/google-sheets.mdx
similarity index 100%
rename from docs/integrations/data-integrations/google-sheets.mdx
rename to docs 2/integrations/data-integrations/google-sheets.mdx
diff --git a/docs/integrations/data-integrations/greptimedb.mdx b/docs 2/integrations/data-integrations/greptimedb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/greptimedb.mdx
rename to docs 2/integrations/data-integrations/greptimedb.mdx
diff --git a/docs/integrations/data-integrations/ibm-db2.mdx b/docs 2/integrations/data-integrations/ibm-db2.mdx
similarity index 100%
rename from docs/integrations/data-integrations/ibm-db2.mdx
rename to docs 2/integrations/data-integrations/ibm-db2.mdx
diff --git a/docs/integrations/data-integrations/ibm-informix.mdx b/docs 2/integrations/data-integrations/ibm-informix.mdx
similarity index 100%
rename from docs/integrations/data-integrations/ibm-informix.mdx
rename to docs 2/integrations/data-integrations/ibm-informix.mdx
diff --git a/docs/integrations/data-integrations/influxdb.mdx b/docs 2/integrations/data-integrations/influxdb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/influxdb.mdx
rename to docs 2/integrations/data-integrations/influxdb.mdx
diff --git a/docs/integrations/data-integrations/mariadb.mdx b/docs 2/integrations/data-integrations/mariadb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/mariadb.mdx
rename to docs 2/integrations/data-integrations/mariadb.mdx
diff --git a/docs/integrations/data-integrations/matrixone.mdx b/docs 2/integrations/data-integrations/matrixone.mdx
similarity index 100%
rename from docs/integrations/data-integrations/matrixone.mdx
rename to docs 2/integrations/data-integrations/matrixone.mdx
diff --git a/docs/integrations/data-integrations/microsoft-access.mdx b/docs 2/integrations/data-integrations/microsoft-access.mdx
similarity index 100%
rename from docs/integrations/data-integrations/microsoft-access.mdx
rename to docs 2/integrations/data-integrations/microsoft-access.mdx
diff --git a/docs/integrations/data-integrations/microsoft-sql-server.mdx b/docs 2/integrations/data-integrations/microsoft-sql-server.mdx
similarity index 100%
rename from docs/integrations/data-integrations/microsoft-sql-server.mdx
rename to docs 2/integrations/data-integrations/microsoft-sql-server.mdx
diff --git a/docs/integrations/data-integrations/monetdb.mdx b/docs 2/integrations/data-integrations/monetdb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/monetdb.mdx
rename to docs 2/integrations/data-integrations/monetdb.mdx
diff --git a/docs/integrations/data-integrations/mongodb.mdx b/docs 2/integrations/data-integrations/mongodb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/mongodb.mdx
rename to docs 2/integrations/data-integrations/mongodb.mdx
diff --git a/docs/integrations/data-integrations/mysql.mdx b/docs 2/integrations/data-integrations/mysql.mdx
similarity index 100%
rename from docs/integrations/data-integrations/mysql.mdx
rename to docs 2/integrations/data-integrations/mysql.mdx
diff --git a/docs/integrations/data-integrations/oceanbase.mdx b/docs 2/integrations/data-integrations/oceanbase.mdx
similarity index 100%
rename from docs/integrations/data-integrations/oceanbase.mdx
rename to docs 2/integrations/data-integrations/oceanbase.mdx
diff --git a/docs/integrations/data-integrations/opengauss.mdx b/docs 2/integrations/data-integrations/opengauss.mdx
similarity index 100%
rename from docs/integrations/data-integrations/opengauss.mdx
rename to docs 2/integrations/data-integrations/opengauss.mdx
diff --git a/docs/integrations/data-integrations/oracle.mdx b/docs 2/integrations/data-integrations/oracle.mdx
similarity index 100%
rename from docs/integrations/data-integrations/oracle.mdx
rename to docs 2/integrations/data-integrations/oracle.mdx
diff --git a/docs/integrations/data-integrations/orioledb.mdx b/docs 2/integrations/data-integrations/orioledb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/orioledb.mdx
rename to docs 2/integrations/data-integrations/orioledb.mdx
diff --git a/docs/integrations/data-integrations/planetscale.mdx b/docs 2/integrations/data-integrations/planetscale.mdx
similarity index 100%
rename from docs/integrations/data-integrations/planetscale.mdx
rename to docs 2/integrations/data-integrations/planetscale.mdx
diff --git a/docs/integrations/data-integrations/postgresql.mdx b/docs 2/integrations/data-integrations/postgresql.mdx
similarity index 100%
rename from docs/integrations/data-integrations/postgresql.mdx
rename to docs 2/integrations/data-integrations/postgresql.mdx
diff --git a/docs/integrations/data-integrations/questdb.mdx b/docs 2/integrations/data-integrations/questdb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/questdb.mdx
rename to docs 2/integrations/data-integrations/questdb.mdx
diff --git a/docs/integrations/data-integrations/sap-hana.mdx b/docs 2/integrations/data-integrations/sap-hana.mdx
similarity index 100%
rename from docs/integrations/data-integrations/sap-hana.mdx
rename to docs 2/integrations/data-integrations/sap-hana.mdx
diff --git a/docs/integrations/data-integrations/sap-sql-anywhere.mdx b/docs 2/integrations/data-integrations/sap-sql-anywhere.mdx
similarity index 100%
rename from docs/integrations/data-integrations/sap-sql-anywhere.mdx
rename to docs 2/integrations/data-integrations/sap-sql-anywhere.mdx
diff --git a/docs/integrations/data-integrations/scylladb.mdx b/docs 2/integrations/data-integrations/scylladb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/scylladb.mdx
rename to docs 2/integrations/data-integrations/scylladb.mdx
diff --git a/docs/integrations/data-integrations/singlestore.mdx b/docs 2/integrations/data-integrations/singlestore.mdx
similarity index 100%
rename from docs/integrations/data-integrations/singlestore.mdx
rename to docs 2/integrations/data-integrations/singlestore.mdx
diff --git a/docs/integrations/data-integrations/snowflake.mdx b/docs 2/integrations/data-integrations/snowflake.mdx
similarity index 100%
rename from docs/integrations/data-integrations/snowflake.mdx
rename to docs 2/integrations/data-integrations/snowflake.mdx
diff --git a/docs/integrations/data-integrations/sqlite.mdx b/docs 2/integrations/data-integrations/sqlite.mdx
similarity index 100%
rename from docs/integrations/data-integrations/sqlite.mdx
rename to docs 2/integrations/data-integrations/sqlite.mdx
diff --git a/docs/integrations/data-integrations/starrocks.mdx b/docs 2/integrations/data-integrations/starrocks.mdx
similarity index 100%
rename from docs/integrations/data-integrations/starrocks.mdx
rename to docs 2/integrations/data-integrations/starrocks.mdx
diff --git a/docs/integrations/data-integrations/supabase.mdx b/docs 2/integrations/data-integrations/supabase.mdx
similarity index 100%
rename from docs/integrations/data-integrations/supabase.mdx
rename to docs 2/integrations/data-integrations/supabase.mdx
diff --git a/docs/integrations/data-integrations/surrealdb.mdx b/docs 2/integrations/data-integrations/surrealdb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/surrealdb.mdx
rename to docs 2/integrations/data-integrations/surrealdb.mdx
diff --git a/docs/integrations/data-integrations/tdengine.mdx b/docs 2/integrations/data-integrations/tdengine.mdx
similarity index 100%
rename from docs/integrations/data-integrations/tdengine.mdx
rename to docs 2/integrations/data-integrations/tdengine.mdx
diff --git a/docs/integrations/data-integrations/teradata.mdx b/docs 2/integrations/data-integrations/teradata.mdx
similarity index 100%
rename from docs/integrations/data-integrations/teradata.mdx
rename to docs 2/integrations/data-integrations/teradata.mdx
diff --git a/docs/integrations/data-integrations/tidb.mdx b/docs 2/integrations/data-integrations/tidb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/tidb.mdx
rename to docs 2/integrations/data-integrations/tidb.mdx
diff --git a/docs/integrations/data-integrations/timescaledb.mdx b/docs 2/integrations/data-integrations/timescaledb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/timescaledb.mdx
rename to docs 2/integrations/data-integrations/timescaledb.mdx
diff --git a/docs/integrations/data-integrations/trino.mdx b/docs 2/integrations/data-integrations/trino.mdx
similarity index 100%
rename from docs/integrations/data-integrations/trino.mdx
rename to docs 2/integrations/data-integrations/trino.mdx
diff --git a/docs/integrations/data-integrations/vertica.mdx b/docs 2/integrations/data-integrations/vertica.mdx
similarity index 100%
rename from docs/integrations/data-integrations/vertica.mdx
rename to docs 2/integrations/data-integrations/vertica.mdx
diff --git a/docs/integrations/data-integrations/vitess.mdx b/docs 2/integrations/data-integrations/vitess.mdx
similarity index 100%
rename from docs/integrations/data-integrations/vitess.mdx
rename to docs 2/integrations/data-integrations/vitess.mdx
diff --git a/docs/integrations/data-integrations/yugabytedb.mdx b/docs 2/integrations/data-integrations/yugabytedb.mdx
similarity index 100%
rename from docs/integrations/data-integrations/yugabytedb.mdx
rename to docs 2/integrations/data-integrations/yugabytedb.mdx
diff --git a/docs/integrations/data-overview.mdx b/docs 2/integrations/data-overview.mdx
similarity index 100%
rename from docs/integrations/data-overview.mdx
rename to docs 2/integrations/data-overview.mdx
diff --git a/docs/integrations/files/csv-xlsx-xls.mdx b/docs 2/integrations/files/csv-xlsx-xls.mdx
similarity index 100%
rename from docs/integrations/files/csv-xlsx-xls.mdx
rename to docs 2/integrations/files/csv-xlsx-xls.mdx
diff --git a/docs/integrations/files/json.mdx b/docs 2/integrations/files/json.mdx
similarity index 100%
rename from docs/integrations/files/json.mdx
rename to docs 2/integrations/files/json.mdx
diff --git a/docs/integrations/files/parquet.mdx b/docs 2/integrations/files/parquet.mdx
similarity index 100%
rename from docs/integrations/files/parquet.mdx
rename to docs 2/integrations/files/parquet.mdx
diff --git a/docs/integrations/files/pdf.mdx b/docs 2/integrations/files/pdf.mdx
similarity index 100%
rename from docs/integrations/files/pdf.mdx
rename to docs 2/integrations/files/pdf.mdx
diff --git a/docs/integrations/files/txt.mdx b/docs 2/integrations/files/txt.mdx
similarity index 100%
rename from docs/integrations/files/txt.mdx
rename to docs 2/integrations/files/txt.mdx
diff --git a/docs/integrations/integrations.mdx b/docs 2/integrations/integrations.mdx
similarity index 100%
rename from docs/integrations/integrations.mdx
rename to docs 2/integrations/integrations.mdx
diff --git a/docs/integrations/sample-database.mdx b/docs 2/integrations/sample-database.mdx
similarity index 100%
rename from docs/integrations/sample-database.mdx
rename to docs 2/integrations/sample-database.mdx
diff --git a/docs/integrations/support.mdx b/docs 2/integrations/support.mdx
similarity index 98%
rename from docs/integrations/support.mdx
rename to docs 2/integrations/support.mdx
index 4f52b81fdf3..d76df9af518 100644
--- a/docs/integrations/support.mdx
+++ b/docs 2/integrations/support.mdx
@@ -51,7 +51,6 @@ Below is the list of all community integrations.
| Llama Index | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/llama_index_handler) |
| Anthropic | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/anthropic_handler) |
| MariaDB | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mariadb_handler) |
-| TimeGPT | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/timegpt_handler) |
| X (Twitter) | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/twitter_handler) |
| GitHub | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/github_handler) |
| Hugging Face Inference API | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/huggingface_api_handler) |
@@ -61,7 +60,6 @@ Below is the list of all community integrations.
| Confluence | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/confluence_handler) |
| Gmail | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/gmail_handler) |
| Couchbase | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/couchbase_handler) |
-| StatsForecast | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/statsforecast_handler) |
| Twelve Labs | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/twelve_labs_handler) |
| Anomaly Detection | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/anomaly_detection_handler) |
| YouTube | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/youtube_handler) |
@@ -147,7 +145,6 @@ Below is the list of all community integrations.
| MonetDB | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/monetdb_handler) |
| MonkeyLearn | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/monkeylearn_handler) |
| Microsoft Teams | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/ms_teams_handler) |
-| NeuralForecast | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/neuralforecast_handler) |
| NewsAPI | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/newsapi_handler) |
| Notion | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/notion_handler) |
| npm | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/npm_handler) |
diff --git a/docs/integrations/vector-db-integrations/chromadb.mdx b/docs 2/integrations/vector-db-integrations/chromadb.mdx
similarity index 100%
rename from docs/integrations/vector-db-integrations/chromadb.mdx
rename to docs 2/integrations/vector-db-integrations/chromadb.mdx
diff --git a/docs/integrations/vector-db-integrations/couchbase.mdx b/docs 2/integrations/vector-db-integrations/couchbase.mdx
similarity index 100%
rename from docs/integrations/vector-db-integrations/couchbase.mdx
rename to docs 2/integrations/vector-db-integrations/couchbase.mdx
diff --git a/docs/integrations/vector-db-integrations/milvus.mdx b/docs 2/integrations/vector-db-integrations/milvus.mdx
similarity index 100%
rename from docs/integrations/vector-db-integrations/milvus.mdx
rename to docs 2/integrations/vector-db-integrations/milvus.mdx
diff --git a/docs/integrations/vector-db-integrations/pgvector.mdx b/docs 2/integrations/vector-db-integrations/pgvector.mdx
similarity index 100%
rename from docs/integrations/vector-db-integrations/pgvector.mdx
rename to docs 2/integrations/vector-db-integrations/pgvector.mdx
diff --git a/docs/integrations/vector-db-integrations/pinecone.mdx b/docs 2/integrations/vector-db-integrations/pinecone.mdx
similarity index 100%
rename from docs/integrations/vector-db-integrations/pinecone.mdx
rename to docs 2/integrations/vector-db-integrations/pinecone.mdx
diff --git a/docs/integrations/vector-db-integrations/weaviate.mdx b/docs 2/integrations/vector-db-integrations/weaviate.mdx
similarity index 100%
rename from docs/integrations/vector-db-integrations/weaviate.mdx
rename to docs 2/integrations/vector-db-integrations/weaviate.mdx
diff --git a/docs/logo/dark.svg b/docs 2/logo/dark.svg
similarity index 100%
rename from docs/logo/dark.svg
rename to docs 2/logo/dark.svg
diff --git a/docs/logo/light.svg b/docs 2/logo/light.svg
similarity index 100%
rename from docs/logo/light.svg
rename to docs 2/logo/light.svg
diff --git a/docs/minds.mdx b/docs 2/minds.mdx
similarity index 100%
rename from docs/minds.mdx
rename to docs 2/minds.mdx
diff --git a/docs/minds/authentication.mdx b/docs 2/minds/authentication.mdx
similarity index 100%
rename from docs/minds/authentication.mdx
rename to docs 2/minds/authentication.mdx
diff --git a/docs/minds/data_sources.mdx b/docs 2/minds/data_sources.mdx
similarity index 100%
rename from docs/minds/data_sources.mdx
rename to docs 2/minds/data_sources.mdx
diff --git a/docs/minds/minds_ips.mdx b/docs 2/minds/minds_ips.mdx
similarity index 100%
rename from docs/minds/minds_ips.mdx
rename to docs 2/minds/minds_ips.mdx
diff --git a/docs/minds/minds_overview.mdx b/docs 2/minds/minds_overview.mdx
similarity index 100%
rename from docs/minds/minds_overview.mdx
rename to docs 2/minds/minds_overview.mdx
diff --git a/docs/minds/python_sdk/assistants_api.mdx b/docs 2/minds/python_sdk/assistants_api.mdx
similarity index 100%
rename from docs/minds/python_sdk/assistants_api.mdx
rename to docs 2/minds/python_sdk/assistants_api.mdx
diff --git a/docs/minds/python_sdk/completions_api.mdx b/docs 2/minds/python_sdk/completions_api.mdx
similarity index 100%
rename from docs/minds/python_sdk/completions_api.mdx
rename to docs 2/minds/python_sdk/completions_api.mdx
diff --git a/docs/minds/python_sdk/mind_setup.mdx b/docs 2/minds/python_sdk/mind_setup.mdx
similarity index 100%
rename from docs/minds/python_sdk/mind_setup.mdx
rename to docs 2/minds/python_sdk/mind_setup.mdx
diff --git a/docs/minds/python_sdk/overview.mdx b/docs 2/minds/python_sdk/overview.mdx
similarity index 100%
rename from docs/minds/python_sdk/overview.mdx
rename to docs 2/minds/python_sdk/overview.mdx
diff --git a/docs/minds/quickstart.mdx b/docs 2/minds/quickstart.mdx
similarity index 100%
rename from docs/minds/quickstart.mdx
rename to docs 2/minds/quickstart.mdx
diff --git a/docs/minds/quickstart_best_practices.mdx b/docs 2/minds/quickstart_best_practices.mdx
similarity index 100%
rename from docs/minds/quickstart_best_practices.mdx
rename to docs 2/minds/quickstart_best_practices.mdx
diff --git a/docs/minds/quickstart_custom_mind.mdx b/docs 2/minds/quickstart_custom_mind.mdx
similarity index 100%
rename from docs/minds/quickstart_custom_mind.mdx
rename to docs 2/minds/quickstart_custom_mind.mdx
diff --git a/docs/minds/rest_api/add_data_to_mind.mdx b/docs 2/minds/rest_api/add_data_to_mind.mdx
similarity index 100%
rename from docs/minds/rest_api/add_data_to_mind.mdx
rename to docs 2/minds/rest_api/add_data_to_mind.mdx
diff --git a/docs/minds/rest_api/chat.mdx b/docs 2/minds/rest_api/chat.mdx
similarity index 100%
rename from docs/minds/rest_api/chat.mdx
rename to docs 2/minds/rest_api/chat.mdx
diff --git a/docs/minds/rest_api/create_datasource.mdx b/docs 2/minds/rest_api/create_datasource.mdx
similarity index 100%
rename from docs/minds/rest_api/create_datasource.mdx
rename to docs 2/minds/rest_api/create_datasource.mdx
diff --git a/docs/minds/rest_api/create_message.mdx b/docs 2/minds/rest_api/create_message.mdx
similarity index 100%
rename from docs/minds/rest_api/create_message.mdx
rename to docs 2/minds/rest_api/create_message.mdx
diff --git a/docs/minds/rest_api/create_mind.mdx b/docs 2/minds/rest_api/create_mind.mdx
similarity index 100%
rename from docs/minds/rest_api/create_mind.mdx
rename to docs 2/minds/rest_api/create_mind.mdx
diff --git a/docs/minds/rest_api/create_run.mdx b/docs 2/minds/rest_api/create_run.mdx
similarity index 100%
rename from docs/minds/rest_api/create_run.mdx
rename to docs 2/minds/rest_api/create_run.mdx
diff --git a/docs/minds/rest_api/create_thread.mdx b/docs 2/minds/rest_api/create_thread.mdx
similarity index 100%
rename from docs/minds/rest_api/create_thread.mdx
rename to docs 2/minds/rest_api/create_thread.mdx
diff --git a/docs/minds/rest_api/delete_datasource.mdx b/docs 2/minds/rest_api/delete_datasource.mdx
similarity index 100%
rename from docs/minds/rest_api/delete_datasource.mdx
rename to docs 2/minds/rest_api/delete_datasource.mdx
diff --git a/docs/minds/rest_api/delete_mind.mdx b/docs 2/minds/rest_api/delete_mind.mdx
similarity index 100%
rename from docs/minds/rest_api/delete_mind.mdx
rename to docs 2/minds/rest_api/delete_mind.mdx
diff --git a/docs/minds/rest_api/delete_thread.mdx b/docs 2/minds/rest_api/delete_thread.mdx
similarity index 100%
rename from docs/minds/rest_api/delete_thread.mdx
rename to docs 2/minds/rest_api/delete_thread.mdx
diff --git a/docs/minds/rest_api/get_datasource.mdx b/docs 2/minds/rest_api/get_datasource.mdx
similarity index 100%
rename from docs/minds/rest_api/get_datasource.mdx
rename to docs 2/minds/rest_api/get_datasource.mdx
diff --git a/docs/minds/rest_api/get_mind.mdx b/docs 2/minds/rest_api/get_mind.mdx
similarity index 100%
rename from docs/minds/rest_api/get_mind.mdx
rename to docs 2/minds/rest_api/get_mind.mdx
diff --git a/docs/minds/rest_api/list_datasources.mdx b/docs 2/minds/rest_api/list_datasources.mdx
similarity index 100%
rename from docs/minds/rest_api/list_datasources.mdx
rename to docs 2/minds/rest_api/list_datasources.mdx
diff --git a/docs/minds/rest_api/list_messages.mdx b/docs 2/minds/rest_api/list_messages.mdx
similarity index 100%
rename from docs/minds/rest_api/list_messages.mdx
rename to docs 2/minds/rest_api/list_messages.mdx
diff --git a/docs/minds/rest_api/list_minds.mdx b/docs 2/minds/rest_api/list_minds.mdx
similarity index 100%
rename from docs/minds/rest_api/list_minds.mdx
rename to docs 2/minds/rest_api/list_minds.mdx
diff --git a/docs/minds/rest_api/overview.mdx b/docs 2/minds/rest_api/overview.mdx
similarity index 100%
rename from docs/minds/rest_api/overview.mdx
rename to docs 2/minds/rest_api/overview.mdx
diff --git a/docs/minds/rest_api/overview_assistants.mdx b/docs 2/minds/rest_api/overview_assistants.mdx
similarity index 100%
rename from docs/minds/rest_api/overview_assistants.mdx
rename to docs 2/minds/rest_api/overview_assistants.mdx
diff --git a/docs/minds/rest_api/retrieve_run.mdx b/docs 2/minds/rest_api/retrieve_run.mdx
similarity index 100%
rename from docs/minds/rest_api/retrieve_run.mdx
rename to docs 2/minds/rest_api/retrieve_run.mdx
diff --git a/docs/minds/rest_api/update_datasource.mdx b/docs 2/minds/rest_api/update_datasource.mdx
similarity index 100%
rename from docs/minds/rest_api/update_datasource.mdx
rename to docs 2/minds/rest_api/update_datasource.mdx
diff --git a/docs/minds/rest_api/update_mind.mdx b/docs 2/minds/rest_api/update_mind.mdx
similarity index 100%
rename from docs/minds/rest_api/update_mind.mdx
rename to docs 2/minds/rest_api/update_mind.mdx
diff --git a/docs/minds/sample_data.mdx b/docs 2/minds/sample_data.mdx
similarity index 100%
rename from docs/minds/sample_data.mdx
rename to docs 2/minds/sample_data.mdx
diff --git a/docs/mindsdb-connect.mdx b/docs 2/mindsdb-connect.mdx
similarity index 100%
rename from docs/mindsdb-connect.mdx
rename to docs 2/mindsdb-connect.mdx
diff --git a/docs/mindsdb-fqe.mdx b/docs 2/mindsdb-fqe.mdx
similarity index 100%
rename from docs/mindsdb-fqe.mdx
rename to docs 2/mindsdb-fqe.mdx
diff --git a/docs/mindsdb-gui.mdx b/docs 2/mindsdb-gui.mdx
similarity index 100%
rename from docs/mindsdb-gui.mdx
rename to docs 2/mindsdb-gui.mdx
diff --git a/docs/mindsdb-handlers.mdx b/docs 2/mindsdb-handlers.mdx
similarity index 92%
rename from docs/mindsdb-handlers.mdx
rename to docs 2/mindsdb-handlers.mdx
index c69e09cee4c..0d9a1aaac36 100644
--- a/docs/mindsdb-handlers.mdx
+++ b/docs 2/mindsdb-handlers.mdx
@@ -76,7 +76,7 @@ Whenever you want to parse a string that contains SQL, we strongly recommend usi
### Formatting Output
-In the case of data handlers, when it comes to building the response of the public methods, the output should be wrapped by the [mindsdb.integrations.libs.response.HandlerResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py#L7) or [mindsdb.integrations.libs.response.HandlerStatusResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py#L32) class. These classes are used by the MindsDB executioner to orchestrate and coordinate multiple handler instances in parallel.
+In the case of data handlers, the data-returning methods (`native_query()`, `query()`, `get_tables()`, `get_columns()`) should return one of the response classes from [mindsdb.integrations.libs.response](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py)
And in the case of ML handlers, output wrapping is automatically done by an intermediate wrapper, the `BaseMLEngineExec` class, so the contributor wouldn't need to worry about it.
diff --git a/docs/mindsdb-objects.mdx b/docs 2/mindsdb-objects.mdx
similarity index 100%
rename from docs/mindsdb-objects.mdx
rename to docs 2/mindsdb-objects.mdx
diff --git a/docs/mindsdb-respond.mdx b/docs 2/mindsdb-respond.mdx
similarity index 100%
rename from docs/mindsdb-respond.mdx
rename to docs 2/mindsdb-respond.mdx
diff --git a/docs/mindsdb-unify.mdx b/docs 2/mindsdb-unify.mdx
similarity index 100%
rename from docs/mindsdb-unify.mdx
rename to docs 2/mindsdb-unify.mdx
diff --git a/docs/mindsdb.mdx b/docs 2/mindsdb.mdx
similarity index 100%
rename from docs/mindsdb.mdx
rename to docs 2/mindsdb.mdx
diff --git a/docs/mindsdb_sql/agents/agent.mdx b/docs 2/mindsdb_sql/agents/agent.mdx
similarity index 100%
rename from docs/mindsdb_sql/agents/agent.mdx
rename to docs 2/mindsdb_sql/agents/agent.mdx
diff --git a/docs/mindsdb_sql/agents/agent_gui.mdx b/docs 2/mindsdb_sql/agents/agent_gui.mdx
similarity index 100%
rename from docs/mindsdb_sql/agents/agent_gui.mdx
rename to docs 2/mindsdb_sql/agents/agent_gui.mdx
diff --git a/docs/mindsdb_sql/agents/agent_syntax.mdx b/docs 2/mindsdb_sql/agents/agent_syntax.mdx
similarity index 96%
rename from docs/mindsdb_sql/agents/agent_syntax.mdx
rename to docs 2/mindsdb_sql/agents/agent_syntax.mdx
index 9c1d3c01563..42597689170 100644
--- a/docs/mindsdb_sql/agents/agent_syntax.mdx
+++ b/docs 2/mindsdb_sql/agents/agent_syntax.mdx
@@ -24,7 +24,8 @@ USING
"tables": ["datasource_conn_name.table_name", ...]
},
prompt_template='describe data',
- timeout=10;
+ timeout=10,
+ mode='text';
```
It creates an agent that uses the defined model and has access to the connected data.
@@ -315,6 +316,12 @@ This parameter defines the time the agent can take to come back with an answer.
For example, when the `timeout` parameter is set to 10, the agent has 10 seconds to return an answer. If the agent takes longer than 10 seconds, it aborts the process and comes back with an answer indicating its failure to return an answer within the defined time interval.
+### `mode`
+
+This parameter defines the agent's response style, allowing users to partially control the output format. Supported values include `text` and `sql`.
+
+When set, the agent will tailor its responses to match the specified format. Note that the agent may still adapt its output when necessary to ensure clarity or correctness.
+
## `SELECT FROM AGENT` Syntax
Query an agent to generate responses to questions.
diff --git a/docs/mindsdb_sql/agents/chatbot.mdx b/docs 2/mindsdb_sql/agents/chatbot.mdx
similarity index 100%
rename from docs/mindsdb_sql/agents/chatbot.mdx
rename to docs 2/mindsdb_sql/agents/chatbot.mdx
diff --git a/docs/mindsdb_sql/connect/connect-mariadb-skysql.mdx b/docs 2/mindsdb_sql/connect/connect-mariadb-skysql.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/connect-mariadb-skysql.mdx
rename to docs 2/mindsdb_sql/connect/connect-mariadb-skysql.mdx
diff --git a/docs/mindsdb_sql/connect/dbeaver.mdx b/docs 2/mindsdb_sql/connect/dbeaver.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/dbeaver.mdx
rename to docs 2/mindsdb_sql/connect/dbeaver.mdx
diff --git a/docs/mindsdb_sql/connect/deepnote.mdx b/docs 2/mindsdb_sql/connect/deepnote.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/deepnote.mdx
rename to docs 2/mindsdb_sql/connect/deepnote.mdx
diff --git a/docs/mindsdb_sql/connect/grafana.mdx b/docs 2/mindsdb_sql/connect/grafana.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/grafana.mdx
rename to docs 2/mindsdb_sql/connect/grafana.mdx
diff --git a/docs/mindsdb_sql/connect/jupysql.mdx b/docs 2/mindsdb_sql/connect/jupysql.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/jupysql.mdx
rename to docs 2/mindsdb_sql/connect/jupysql.mdx
diff --git a/docs/mindsdb_sql/connect/metabase.mdx b/docs 2/mindsdb_sql/connect/metabase.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/metabase.mdx
rename to docs 2/mindsdb_sql/connect/metabase.mdx
diff --git a/docs/mindsdb_sql/connect/mindsdb_editor.mdx b/docs 2/mindsdb_sql/connect/mindsdb_editor.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/mindsdb_editor.mdx
rename to docs 2/mindsdb_sql/connect/mindsdb_editor.mdx
diff --git a/docs/mindsdb_sql/connect/mysql-client.mdx b/docs 2/mindsdb_sql/connect/mysql-client.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/mysql-client.mdx
rename to docs 2/mindsdb_sql/connect/mysql-client.mdx
diff --git a/docs/mindsdb_sql/connect/sql-alchemy.mdx b/docs 2/mindsdb_sql/connect/sql-alchemy.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/sql-alchemy.mdx
rename to docs 2/mindsdb_sql/connect/sql-alchemy.mdx
diff --git a/docs/mindsdb_sql/connect/tableau.mdx b/docs 2/mindsdb_sql/connect/tableau.mdx
similarity index 100%
rename from docs/mindsdb_sql/connect/tableau.mdx
rename to docs 2/mindsdb_sql/connect/tableau.mdx
diff --git a/docs/mindsdb_sql/functions/custom_functions.mdx b/docs 2/mindsdb_sql/functions/custom_functions.mdx
similarity index 100%
rename from docs/mindsdb_sql/functions/custom_functions.mdx
rename to docs 2/mindsdb_sql/functions/custom_functions.mdx
diff --git a/docs/mindsdb_sql/functions/from_env.mdx b/docs 2/mindsdb_sql/functions/from_env.mdx
similarity index 100%
rename from docs/mindsdb_sql/functions/from_env.mdx
rename to docs 2/mindsdb_sql/functions/from_env.mdx
diff --git a/docs/mindsdb_sql/functions/llm_function.mdx b/docs 2/mindsdb_sql/functions/llm_function.mdx
similarity index 100%
rename from docs/mindsdb_sql/functions/llm_function.mdx
rename to docs 2/mindsdb_sql/functions/llm_function.mdx
diff --git a/docs/mindsdb_sql/functions/standard-functions.mdx b/docs 2/mindsdb_sql/functions/standard-functions.mdx
similarity index 100%
rename from docs/mindsdb_sql/functions/standard-functions.mdx
rename to docs 2/mindsdb_sql/functions/standard-functions.mdx
diff --git a/docs/mindsdb_sql/functions/to_markdown_function.mdx b/docs 2/mindsdb_sql/functions/to_markdown_function.mdx
similarity index 100%
rename from docs/mindsdb_sql/functions/to_markdown_function.mdx
rename to docs 2/mindsdb_sql/functions/to_markdown_function.mdx
diff --git a/docs/mindsdb_sql/functions/variables.mdx b/docs 2/mindsdb_sql/functions/variables.mdx
similarity index 100%
rename from docs/mindsdb_sql/functions/variables.mdx
rename to docs 2/mindsdb_sql/functions/variables.mdx
diff --git a/docs/mindsdb_sql/knowledge-bases.mdx b/docs 2/mindsdb_sql/knowledge-bases.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge-bases.mdx
rename to docs 2/mindsdb_sql/knowledge-bases.mdx
diff --git a/docs/mindsdb_sql/knowledge_bases/alter.mdx b/docs 2/mindsdb_sql/knowledge_bases/alter.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge_bases/alter.mdx
rename to docs 2/mindsdb_sql/knowledge_bases/alter.mdx
diff --git a/docs/mindsdb_sql/knowledge_bases/create.mdx b/docs 2/mindsdb_sql/knowledge_bases/create.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge_bases/create.mdx
rename to docs 2/mindsdb_sql/knowledge_bases/create.mdx
diff --git a/docs/mindsdb_sql/knowledge_bases/evaluate.mdx b/docs 2/mindsdb_sql/knowledge_bases/evaluate.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge_bases/evaluate.mdx
rename to docs 2/mindsdb_sql/knowledge_bases/evaluate.mdx
diff --git a/docs/mindsdb_sql/knowledge_bases/examples.mdx b/docs 2/mindsdb_sql/knowledge_bases/examples.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge_bases/examples.mdx
rename to docs 2/mindsdb_sql/knowledge_bases/examples.mdx
diff --git a/docs/mindsdb_sql/knowledge_bases/hybrid_search.mdx b/docs 2/mindsdb_sql/knowledge_bases/hybrid_search.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge_bases/hybrid_search.mdx
rename to docs 2/mindsdb_sql/knowledge_bases/hybrid_search.mdx
diff --git a/docs/mindsdb_sql/knowledge_bases/insert_data.mdx b/docs 2/mindsdb_sql/knowledge_bases/insert_data.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge_bases/insert_data.mdx
rename to docs 2/mindsdb_sql/knowledge_bases/insert_data.mdx
diff --git a/docs/mindsdb_sql/knowledge_bases/overview.mdx b/docs 2/mindsdb_sql/knowledge_bases/overview.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge_bases/overview.mdx
rename to docs 2/mindsdb_sql/knowledge_bases/overview.mdx
diff --git a/docs/mindsdb_sql/knowledge_bases/query.mdx b/docs 2/mindsdb_sql/knowledge_bases/query.mdx
similarity index 100%
rename from docs/mindsdb_sql/knowledge_bases/query.mdx
rename to docs 2/mindsdb_sql/knowledge_bases/query.mdx
diff --git a/docs/mindsdb_sql/overview.mdx b/docs 2/mindsdb_sql/overview.mdx
similarity index 100%
rename from docs/mindsdb_sql/overview.mdx
rename to docs 2/mindsdb_sql/overview.mdx
diff --git a/docs/mindsdb_sql/sql/api/alter-view.mdx b/docs 2/mindsdb_sql/sql/api/alter-view.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/alter-view.mdx
rename to docs 2/mindsdb_sql/sql/api/alter-view.mdx
diff --git a/docs/mindsdb_sql/sql/api/delete.mdx b/docs 2/mindsdb_sql/sql/api/delete.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/delete.mdx
rename to docs 2/mindsdb_sql/sql/api/delete.mdx
diff --git a/docs/mindsdb_sql/sql/api/describe.mdx b/docs 2/mindsdb_sql/sql/api/describe.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/describe.mdx
rename to docs 2/mindsdb_sql/sql/api/describe.mdx
diff --git a/docs/mindsdb_sql/sql/api/evaluate.mdx b/docs 2/mindsdb_sql/sql/api/evaluate.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/evaluate.mdx
rename to docs 2/mindsdb_sql/sql/api/evaluate.mdx
diff --git a/docs/mindsdb_sql/sql/api/finetune.mdx b/docs 2/mindsdb_sql/sql/api/finetune.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/finetune.mdx
rename to docs 2/mindsdb_sql/sql/api/finetune.mdx
diff --git a/docs/mindsdb_sql/sql/api/insert.mdx b/docs 2/mindsdb_sql/sql/api/insert.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/insert.mdx
rename to docs 2/mindsdb_sql/sql/api/insert.mdx
diff --git a/docs/mindsdb_sql/sql/api/join-on.mdx b/docs 2/mindsdb_sql/sql/api/join-on.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/join-on.mdx
rename to docs 2/mindsdb_sql/sql/api/join-on.mdx
diff --git a/docs/mindsdb_sql/sql/api/join.mdx b/docs 2/mindsdb_sql/sql/api/join.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/join.mdx
rename to docs 2/mindsdb_sql/sql/api/join.mdx
diff --git a/docs/mindsdb_sql/sql/api/manage-models-versions.mdx b/docs 2/mindsdb_sql/sql/api/manage-models-versions.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/manage-models-versions.mdx
rename to docs 2/mindsdb_sql/sql/api/manage-models-versions.mdx
diff --git a/docs/mindsdb_sql/sql/api/retrain.mdx b/docs 2/mindsdb_sql/sql/api/retrain.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/retrain.mdx
rename to docs 2/mindsdb_sql/sql/api/retrain.mdx
diff --git a/docs/mindsdb_sql/sql/api/select-files.mdx b/docs 2/mindsdb_sql/sql/api/select-files.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/select-files.mdx
rename to docs 2/mindsdb_sql/sql/api/select-files.mdx
diff --git a/docs/mindsdb_sql/sql/api/select-view.mdx b/docs 2/mindsdb_sql/sql/api/select-view.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/select-view.mdx
rename to docs 2/mindsdb_sql/sql/api/select-view.mdx
diff --git a/docs/mindsdb_sql/sql/api/select.mdx b/docs 2/mindsdb_sql/sql/api/select.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/select.mdx
rename to docs 2/mindsdb_sql/sql/api/select.mdx
diff --git a/docs/mindsdb_sql/sql/api/update.mdx b/docs 2/mindsdb_sql/sql/api/update.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/update.mdx
rename to docs 2/mindsdb_sql/sql/api/update.mdx
diff --git a/docs/mindsdb_sql/sql/api/use.mdx b/docs 2/mindsdb_sql/sql/api/use.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/api/use.mdx
rename to docs 2/mindsdb_sql/sql/api/use.mdx
diff --git a/docs/mindsdb_sql/sql/create/database.mdx b/docs 2/mindsdb_sql/sql/create/database.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/create/database.mdx
rename to docs 2/mindsdb_sql/sql/create/database.mdx
diff --git a/docs/mindsdb_sql/sql/create/file.mdx b/docs 2/mindsdb_sql/sql/create/file.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/create/file.mdx
rename to docs 2/mindsdb_sql/sql/create/file.mdx
diff --git a/docs/mindsdb_sql/sql/create/jobs.mdx b/docs 2/mindsdb_sql/sql/create/jobs.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/create/jobs.mdx
rename to docs 2/mindsdb_sql/sql/create/jobs.mdx
diff --git a/docs/mindsdb_sql/sql/create/ml-engine.mdx b/docs 2/mindsdb_sql/sql/create/ml-engine.mdx
similarity index 92%
rename from docs/mindsdb_sql/sql/create/ml-engine.mdx
rename to docs 2/mindsdb_sql/sql/create/ml-engine.mdx
index 1e10bba6c63..ece54f7916e 100644
--- a/docs/mindsdb_sql/sql/create/ml-engine.mdx
+++ b/docs 2/mindsdb_sql/sql/create/ml-engine.mdx
@@ -70,14 +70,12 @@ On execution, we get:
| NAME | TITLE | DESCRIPTION | VERSION | CONNECTION_ARGS | IMPORT_SUCCESS | IMPORT_ERROR |
+-------------------+--------------------+-------------------------------------------------------+---------+-----------------------------------------------------------------------------------------------------------------------------------------------------+----------------+-----------------------------------------------------------------------------+
| "ray_serve" | "RayServe" | "MindsDB handler for Ray Serve" | "0.0.1" | "[NULL]" | "true" | "[NULL]" |
-| "neuralforecast" | "NeuralForecast" | "MindsDB handler for Nixtla's NeuralForecast package" | "0.0.1" | "[NULL]" | "true" | "[NULL]" |
| "autosklearn" | "Auto-Sklearn" | "MindsDB handler for Auto-Sklearn" | "0.0.2" | "[NULL]" | "false" | "No module named 'autosklearn'" |
| "mlflow" | "MLFlow" | "MindsDB handler for MLflow" | "0.0.2" | "[NULL]" | "false" | "No module named 'mlflow'" |
| "openai" | "OpenAI" | "MindsDB handler for OpenAI" | "0.0.1" | "[NULL]" | "true" | "[NULL]" |
| "merlion" | "Merlion" | "MindsDB handler for Merlion" | "0.0.1" | "[NULL]" | "false" | "object.__init__() takes exactly one argument (the instance to initialize)" |
| "byom" | "BYOM" | "MindsDB handler for BYOM" | "0.0.1" | "{'code': {'type': 'path', 'description': 'The path to model code'}, 'modules': {'type': 'path', 'description': 'The path to model requirements'}}" | "true" | "[NULL]" |
| "huggingface_api" | "Hugging Face API" | "MindsDB handler for Auto-Sklearn" | "0.0.2" | "[NULL]" | "false" | "No module named 'hugging_py_face'" |
-| "statsforecast" | "StatsForecast" | "MindsDB handler for Nixtla's StatsForecast package" | "0.0.0" | "[NULL]" | "true" | "[NULL]" |
| "huggingface" | "Hugging Face" | "MindsDB handler for Higging Face" | "0.0.1" | "[NULL]" | "true" | "[NULL]" |
| "TPOT" | "Tpot" | "MindsDB handler for TPOT " | "0.0.2" | "[NULL]" | "false" | "No module named 'tpot'" |
| "langchain" | "LangChain" | "MindsDB handler for LangChain" | "0.0.1" | "[NULL]" | "true" | "[NULL]" |
diff --git a/docs/mindsdb_sql/sql/create/model.mdx b/docs 2/mindsdb_sql/sql/create/model.mdx
similarity index 98%
rename from docs/mindsdb_sql/sql/create/model.mdx
rename to docs 2/mindsdb_sql/sql/create/model.mdx
index 3fa60ccfe3c..a68ed13da10 100644
--- a/docs/mindsdb_sql/sql/create/model.mdx
+++ b/docs 2/mindsdb_sql/sql/create/model.mdx
@@ -8,8 +8,8 @@ sidebarTitle: Create, Train, and Deploy a Model
The `CREATE MODEL` statement creates and trains a machine learning (ML) model.
- Please note that the `CREATE MODEL` statement is equivalent to the `CREATE MODEL` statement.
- We are transitioning to the `CREATE MODEL` statement, but the `CREATE MODEL` statement still works.
+ Please note that the `CREATE PREDICTOR` statement is equivalent to the `CREATE MODEL` statement.
+ We are transitioning to the `CREATE MODEL` statement, but the `CREATE PREDICTOR` statement still works.
## Syntax
diff --git a/docs/mindsdb_sql/sql/create/project.mdx b/docs 2/mindsdb_sql/sql/create/project.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/create/project.mdx
rename to docs 2/mindsdb_sql/sql/create/project.mdx
diff --git a/docs/mindsdb_sql/sql/create/table.mdx b/docs 2/mindsdb_sql/sql/create/table.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/create/table.mdx
rename to docs 2/mindsdb_sql/sql/create/table.mdx
diff --git a/docs/mindsdb_sql/sql/create/trigger.mdx b/docs 2/mindsdb_sql/sql/create/trigger.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/create/trigger.mdx
rename to docs 2/mindsdb_sql/sql/create/trigger.mdx
diff --git a/docs/mindsdb_sql/sql/create/view.mdx b/docs 2/mindsdb_sql/sql/create/view.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/create/view.mdx
rename to docs 2/mindsdb_sql/sql/create/view.mdx
diff --git a/docs/mindsdb_sql/sql/drop/database.mdx b/docs 2/mindsdb_sql/sql/drop/database.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/database.mdx
rename to docs 2/mindsdb_sql/sql/drop/database.mdx
diff --git a/docs/mindsdb_sql/sql/drop/file.mdx b/docs 2/mindsdb_sql/sql/drop/file.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/file.mdx
rename to docs 2/mindsdb_sql/sql/drop/file.mdx
diff --git a/docs/mindsdb_sql/sql/drop/jobs.mdx b/docs 2/mindsdb_sql/sql/drop/jobs.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/jobs.mdx
rename to docs 2/mindsdb_sql/sql/drop/jobs.mdx
diff --git a/docs/mindsdb_sql/sql/drop/ml-engine.mdx b/docs 2/mindsdb_sql/sql/drop/ml-engine.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/ml-engine.mdx
rename to docs 2/mindsdb_sql/sql/drop/ml-engine.mdx
diff --git a/docs/mindsdb_sql/sql/drop/model.mdx b/docs 2/mindsdb_sql/sql/drop/model.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/model.mdx
rename to docs 2/mindsdb_sql/sql/drop/model.mdx
diff --git a/docs/mindsdb_sql/sql/drop/project.mdx b/docs 2/mindsdb_sql/sql/drop/project.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/project.mdx
rename to docs 2/mindsdb_sql/sql/drop/project.mdx
diff --git a/docs/mindsdb_sql/sql/drop/table.mdx b/docs 2/mindsdb_sql/sql/drop/table.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/table.mdx
rename to docs 2/mindsdb_sql/sql/drop/table.mdx
diff --git a/docs/mindsdb_sql/sql/drop/trigger.mdx b/docs 2/mindsdb_sql/sql/drop/trigger.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/trigger.mdx
rename to docs 2/mindsdb_sql/sql/drop/trigger.mdx
diff --git a/docs/mindsdb_sql/sql/drop/view.mdx b/docs 2/mindsdb_sql/sql/drop/view.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/drop/view.mdx
rename to docs 2/mindsdb_sql/sql/drop/view.mdx
diff --git a/docs/mindsdb_sql/sql/get-batch-predictions.mdx b/docs 2/mindsdb_sql/sql/get-batch-predictions.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/get-batch-predictions.mdx
rename to docs 2/mindsdb_sql/sql/get-batch-predictions.mdx
diff --git a/docs/mindsdb_sql/sql/get-single-prediction.mdx b/docs 2/mindsdb_sql/sql/get-single-prediction.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/get-single-prediction.mdx
rename to docs 2/mindsdb_sql/sql/get-single-prediction.mdx
diff --git a/docs/mindsdb_sql/sql/list-data-handlers.mdx b/docs 2/mindsdb_sql/sql/list-data-handlers.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/list-data-handlers.mdx
rename to docs 2/mindsdb_sql/sql/list-data-handlers.mdx
diff --git a/docs/mindsdb_sql/sql/list-ml-handlers.mdx b/docs 2/mindsdb_sql/sql/list-ml-handlers.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/list-ml-handlers.mdx
rename to docs 2/mindsdb_sql/sql/list-ml-handlers.mdx
diff --git a/docs/mindsdb_sql/sql/list-projects.mdx b/docs 2/mindsdb_sql/sql/list-projects.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/list-projects.mdx
rename to docs 2/mindsdb_sql/sql/list-projects.mdx
diff --git a/docs/mindsdb_sql/sql/native-queries.mdx b/docs 2/mindsdb_sql/sql/native-queries.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/native-queries.mdx
rename to docs 2/mindsdb_sql/sql/native-queries.mdx
diff --git a/docs/mindsdb_sql/sql/query-jobs.mdx b/docs 2/mindsdb_sql/sql/query-jobs.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/query-jobs.mdx
rename to docs 2/mindsdb_sql/sql/query-jobs.mdx
diff --git a/docs/mindsdb_sql/sql/query-triggers.mdx b/docs 2/mindsdb_sql/sql/query-triggers.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/query-triggers.mdx
rename to docs 2/mindsdb_sql/sql/query-triggers.mdx
diff --git a/docs/mindsdb_sql/sql/show-databases.mdx b/docs 2/mindsdb_sql/sql/show-databases.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/show-databases.mdx
rename to docs 2/mindsdb_sql/sql/show-databases.mdx
diff --git a/docs/mindsdb_sql/sql/show-ml-engines.mdx b/docs 2/mindsdb_sql/sql/show-ml-engines.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/show-ml-engines.mdx
rename to docs 2/mindsdb_sql/sql/show-ml-engines.mdx
diff --git a/docs/mindsdb_sql/sql/show-models.mdx b/docs 2/mindsdb_sql/sql/show-models.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/show-models.mdx
rename to docs 2/mindsdb_sql/sql/show-models.mdx
diff --git a/docs/mindsdb_sql/sql/use/project.mdx b/docs 2/mindsdb_sql/sql/use/project.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql/use/project.mdx
rename to docs 2/mindsdb_sql/sql/use/project.mdx
diff --git a/docs/mindsdb_sql/sql_support/case-when.mdx b/docs 2/mindsdb_sql/sql_support/case-when.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql_support/case-when.mdx
rename to docs 2/mindsdb_sql/sql_support/case-when.mdx
diff --git a/docs/mindsdb_sql/sql_support/cte.mdx b/docs 2/mindsdb_sql/sql_support/cte.mdx
similarity index 100%
rename from docs/mindsdb_sql/sql_support/cte.mdx
rename to docs 2/mindsdb_sql/sql_support/cte.mdx
diff --git a/docs/mindsdb_sql/syntax.mdx b/docs 2/mindsdb_sql/syntax.mdx
similarity index 100%
rename from docs/mindsdb_sql/syntax.mdx
rename to docs 2/mindsdb_sql/syntax.mdx
diff --git a/docs/model-context-protocol/anthropic.mdx b/docs 2/model-context-protocol/anthropic.mdx
similarity index 97%
rename from docs/model-context-protocol/anthropic.mdx
rename to docs 2/model-context-protocol/anthropic.mdx
index ba8609f5b23..0b594db756a 100644
--- a/docs/model-context-protocol/anthropic.mdx
+++ b/docs 2/model-context-protocol/anthropic.mdx
@@ -35,7 +35,7 @@ response = client.beta.messages.create(
mcp_servers = [
{
"type": "url",
- "url": "https://5a52-88-203-84-191.ngrok-free.app/mcp/sse",
+ "url": "https:///mcp/sse",
"name": "mindsdb-mcp",
"authorization_token": ""
}
diff --git a/docs/model-context-protocol/cursor_usage.mdx b/docs 2/model-context-protocol/cursor_usage.mdx
similarity index 100%
rename from docs/model-context-protocol/cursor_usage.mdx
rename to docs 2/model-context-protocol/cursor_usage.mdx
diff --git a/docs/model-context-protocol/openai.mdx b/docs 2/model-context-protocol/openai.mdx
similarity index 98%
rename from docs/model-context-protocol/openai.mdx
rename to docs 2/model-context-protocol/openai.mdx
index c3d8ea2df54..3d9736dde9e 100644
--- a/docs/model-context-protocol/openai.mdx
+++ b/docs 2/model-context-protocol/openai.mdx
@@ -32,7 +32,7 @@ response = client.responses.create(
{
"type": "mcp",
"server_label": "mdb",
- "server_url": "https://5a52-88-203-84-191.ngrok-free.app/mcp/sse",
+ "server_url": "https:///mcp/sse",
"headers": { "Authorization": "Bearer " },
"require_approval": "never",
}
diff --git a/docs/model-context-protocol/overview.mdx b/docs 2/model-context-protocol/overview.mdx
similarity index 100%
rename from docs/model-context-protocol/overview.mdx
rename to docs 2/model-context-protocol/overview.mdx
diff --git a/docs 2/model-context-protocol/usage.mdx b/docs 2/model-context-protocol/usage.mdx
new file mode 100644
index 00000000000..43ed653b8d8
--- /dev/null
+++ b/docs 2/model-context-protocol/usage.mdx
@@ -0,0 +1,95 @@
+---
+title: MindsDB's MCP Server Usage and Tools
+sidebarTitle: Usage
+---
+
+**MindsDB** is an MCP server that enables your MCP applications to answer questions over large-scale federated data spanning databases, data warehouses, and SaaS applications.
+
+## Start MindsDB as an MCP Server
+
+Follow the steps below to use MindsDB as an MCP server.
+
+1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop).
+
+2. [Connect your data source](/mindsdb_sql/sql/create/database) and/or [upload files](/mindsdb_sql/sql/create/file) to MindsDB in order to ask questions over your data.
+
+
+ You can use our sample dataset that stores the sales manager data.
+
+ ```sql
+ CREATE DATABASE sales_manager_data
+ WITH ENGINE = "postgres",
+ PARAMETERS = {
+ "user": "demo_user",
+ "password": "demo_password",
+ "host": "samples.mindsdb.com",
+ "port": "5432",
+ "database": "sales_manager_data"
+ };
+ ```
+
+
+3. Start MindsDB MCP server.
+
+ * **Without authentication** (suitable for local tools):
+
+ ```bash
+ docker run --name mindsdb_container -p 47334:47334 mindsdb/mindsdb
+ ```
+
+ * **With PAT authentication** (suitable for remote):
+
+ ```bash
+ docker run --name mindsdb_container -p 47334:47334 -e MINDSDB_USERNAME=admin -e MINDSDB_PASSWORD=password123 mindsdb/mindsdb
+ ```
+
+ Get a Bearer token:
+ ```bash
+ curl -X POST -d '{"username":"admin","password":"password123"}' -H "Content-Type: application/json" http://localhost:47334/api/login
+ ```
+ Use this token as `Authorization: Bearer ` in your MCP client.
+
+ * **With OAuth 2.0** (for enterprise deployments): configure `MINDSDB_MCP_OAUTH_ENABLED=true` along with `MINDSDB_MCP_OAUTH_ISSUER_URL`, `MINDSDB_MCP_OAUTH_CLIENT_ID`, and `MINDSDB_MCP_OAUTH_CLIENT_SECRET`.
+
+4. To confirm the MindsDB MCP server is running use `http://127.0.0.1:47334/mcp/status`. A successful response means your MCP environment is ready.
+
+
+## MCP Capabilities
+
+### Tools
+
+**`query`** β Executes SQL queries against MindsDB using MySQL syntax.
+
+Parameters:
+- `query` (required): SQL query string
+- `context` (optional): Dict with default database, e.g. `{"db": "my_postgres"}`
+
+Returns one of:
+- `{"type": "table", "column_names": [...], "data": [...]}` β for SELECT results
+- `{"type": "ok", "affected_rows": N}` β for INSERT/UPDATE/DELETE
+- `{"type": "error", "error_code": N, "error_message": "..."}` β on failure
+
+### Resources
+
+MCP resources expose schema information for discovery:
+
+| Resource URI | Description |
+|---|---|
+| `schema://databases` | Lists all connected data sources |
+| `schema://databases/{db}/tables` | Lists tables in a database |
+| `schema://databases/{db}/tables/{table}/columns` | Lists columns with types |
+| `schema://knowledge_bases` | Lists knowledge bases |
+
+### Prompts
+
+**`sample_table`** β Generates instructions to fetch 5 sample rows and describe a table's structure.
+
+## Transport Modes
+
+- **HTTP (SSE)**: `http://127.0.0.1:47334/mcp/sse`
+- **HTTP (Streamable)**: `http://127.0.0.1:47334/mcp/streamable`
+- **Stdio**: run with `--mcp-stdio` flag for local stdio-based transport
+
+## Configuration
+
+CORS, rate limiting, DNS rebinding protection, and OAuth settings for the MCP server are configured via the `api.mcp` section of `config.json` or the corresponding environment variables. See [Extend the Default MindsDB Configuration](/setup/custom-config#mcp-api) for the full parameter reference.
diff --git a/docs/openapi.yml b/docs 2/openapi.yml
similarity index 100%
rename from docs/openapi.yml
rename to docs 2/openapi.yml
diff --git a/docs/overview_sdks_apis.mdx b/docs 2/overview_sdks_apis.mdx
similarity index 100%
rename from docs/overview_sdks_apis.mdx
rename to docs 2/overview_sdks_apis.mdx
diff --git a/docs/package-lock.json b/docs 2/package-lock.json
similarity index 89%
rename from docs/package-lock.json
rename to docs 2/package-lock.json
index 5d303f5c439..551eba76093 100644
--- a/docs/package-lock.json
+++ b/docs 2/package-lock.json
@@ -1,18 +1,18 @@
{
- "name": "relock-npm-lock-v2-Lu0TRg",
+ "name": "docs",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
- "mintlify": "^4.2.296",
+ "mintlify": "^4.2.500",
"sharp": "^0.34.4"
}
},
"node_modules/@alcalzone/ansi-tokenize": {
- "version": "0.2.3",
- "resolved": "https://registry.npmjs.org/@alcalzone/ansi-tokenize/-/ansi-tokenize-0.2.3.tgz",
- "integrity": "sha512-jsElTJ0sQ4wHRz+C45tfect76BwbTbgkgKByOzpCN9xG61N5V6u/glvg1CsNJhq2xJIFpKHSwG3D2wPPuEYOrQ==",
+ "version": "0.2.5",
+ "resolved": "https://registry.npmjs.org/@alcalzone/ansi-tokenize/-/ansi-tokenize-0.2.5.tgz",
+ "integrity": "sha512-3NX/MpTdroi0aKz134A6RC2Gb2iXVECN4QaAXnvCIxxIm3C3AVB1mkUe8NaaiyvOpDfsrqWhYtj+Q6a62RrTsw==",
"license": "MIT",
"dependencies": {
"ansi-styles": "^6.2.1",
@@ -77,18 +77,18 @@
}
},
"node_modules/@asyncapi/specs": {
- "version": "6.10.0",
- "resolved": "https://registry.npmjs.org/@asyncapi/specs/-/specs-6.10.0.tgz",
- "integrity": "sha512-vB5oKLsdrLUORIZ5BXortZTlVyGWWMC1Nud/0LtgxQ3Yn2738HigAD6EVqScvpPsDUI/bcLVsYEXN4dtXQHVng==",
+ "version": "6.8.1",
+ "resolved": "https://registry.npmjs.org/@asyncapi/specs/-/specs-6.8.1.tgz",
+ "integrity": "sha512-czHoAk3PeXTLR+X8IUaD+IpT+g+zUvkcgMDJVothBsan+oHN3jfcFcFUNdOPAAFoUCQN1hXF1dWuphWy05THlA==",
"license": "Apache-2.0",
"dependencies": {
"@types/json-schema": "^7.0.11"
}
},
"node_modules/@babel/code-frame": {
- "version": "7.28.6",
- "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.28.6.tgz",
- "integrity": "sha512-JYgintcMjRiCvS8mMECzaEn+m3PfoQiyqukOMCCVQtoJGYJw8j/8LBJEiqkHLkfwCcs74E3pbAUFNg7d9VNJ+Q==",
+ "version": "7.29.0",
+ "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
+ "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==",
"license": "MIT",
"dependencies": {
"@babel/helper-validator-identifier": "^7.28.5",
@@ -115,9 +115,9 @@
"license": "MIT"
},
"node_modules/@emnapi/runtime": {
- "version": "1.7.1",
- "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.7.1.tgz",
- "integrity": "sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA==",
+ "version": "1.9.2",
+ "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz",
+ "integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==",
"license": "MIT",
"optional": true,
"dependencies": {
@@ -132,51 +132,37 @@
"optional": true
},
"node_modules/@floating-ui/core": {
- "version": "1.7.3",
- "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz",
- "integrity": "sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w==",
+ "version": "1.7.5",
+ "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.5.tgz",
+ "integrity": "sha512-1Ih4WTWyw0+lKyFMcBHGbb5U5FtuHJuujoyyr5zTaWS5EYMeT6Jb2AuDeftsCsEuchO+mM2ij5+q9crhydzLhQ==",
"license": "MIT",
"peer": true,
"dependencies": {
- "@floating-ui/utils": "^0.2.10"
+ "@floating-ui/utils": "^0.2.11"
}
},
"node_modules/@floating-ui/dom": {
- "version": "1.7.4",
- "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.4.tgz",
- "integrity": "sha512-OOchDgh4F2CchOX94cRVqhvy7b3AFb+/rQXyswmzmGakRfkMgoWVjfnLWkRirfLEfuD4ysVW16eXzwt3jHIzKA==",
- "license": "MIT",
- "peer": true,
- "dependencies": {
- "@floating-ui/core": "^1.7.3",
- "@floating-ui/utils": "^0.2.10"
- }
- },
- "node_modules/@floating-ui/react-dom": {
- "version": "2.1.6",
- "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.6.tgz",
- "integrity": "sha512-4JX6rEatQEvlmgU80wZyq9RT96HZJa88q8hp0pBd+LrczeDI4o6uA2M+uvxngVHo4Ihr8uibXxH6+70zhAFrVw==",
+ "version": "1.7.6",
+ "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.6.tgz",
+ "integrity": "sha512-9gZSAI5XM36880PPMm//9dfiEngYoC6Am2izES1FF406YFsjvyBMmeJ2g4SAju3xWwtuynNRFL2s9hgxpLI5SQ==",
"license": "MIT",
"peer": true,
"dependencies": {
- "@floating-ui/dom": "^1.7.4"
- },
- "peerDependencies": {
- "react": ">=16.8.0",
- "react-dom": ">=16.8.0"
+ "@floating-ui/core": "^1.7.5",
+ "@floating-ui/utils": "^0.2.11"
}
},
"node_modules/@floating-ui/utils": {
- "version": "0.2.10",
- "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.10.tgz",
- "integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==",
+ "version": "0.2.11",
+ "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.11.tgz",
+ "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==",
"license": "MIT",
"peer": true
},
"node_modules/@img/colour": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz",
- "integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==",
+ "version": "1.1.0",
+ "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
+ "integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
"license": "MIT",
"engines": {
"node": ">=18"
@@ -265,6 +251,9 @@
"cpu": [
"arm"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -281,6 +270,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -297,6 +289,9 @@
"cpu": [
"ppc64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -313,6 +308,9 @@
"cpu": [
"riscv64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -329,6 +327,9 @@
"cpu": [
"s390x"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -345,6 +346,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -361,6 +365,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "musl"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -377,6 +384,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "musl"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -393,6 +403,9 @@
"cpu": [
"arm"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -415,6 +428,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -437,6 +453,9 @@
"cpu": [
"ppc64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -459,6 +478,9 @@
"cpu": [
"riscv64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -481,6 +503,9 @@
"cpu": [
"s390x"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -503,6 +528,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -525,6 +553,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "musl"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -547,6 +578,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "musl"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -1104,18 +1138,17 @@
}
},
"node_modules/@mintlify/cli": {
- "version": "4.0.900",
- "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.900.tgz",
- "integrity": "sha512-2Do87Ip/uh1kYQG40JY/teAJlybylwuUKsE+M8CF8Og0UNinaOch/xBqhJ2NUYiarLfTXoMOMnxb/t1nWt7fCg==",
+ "version": "4.0.1103",
+ "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.1103.tgz",
+ "integrity": "sha512-/Tz4ydJp0eY4I5oKv4D4FYK0xPm9fpwCfnSye4UzjRU7bVUv34Qzi6px/1PQJbQtpUiISwF7tuWH6tyB5AWknw==",
"license": "Elastic-2.0",
"dependencies": {
"@inquirer/prompts": "7.9.0",
- "@mintlify/common": "1.0.682",
- "@mintlify/link-rot": "3.0.839",
- "@mintlify/models": "0.0.259",
- "@mintlify/prebuild": "1.0.817",
- "@mintlify/previewing": "4.0.873",
- "@mintlify/validation": "0.1.568",
+ "@mintlify/common": "1.0.844",
+ "@mintlify/link-rot": "3.0.1019",
+ "@mintlify/prebuild": "1.0.986",
+ "@mintlify/previewing": "4.0.1047",
+ "@mintlify/validation": "0.1.660",
"adm-zip": "0.5.16",
"chalk": "5.2.0",
"color": "4.2.3",
@@ -1126,10 +1159,14 @@
"inquirer": "12.3.0",
"js-yaml": "4.1.0",
"mdast-util-mdx-jsx": "3.2.0",
+ "open": "^8.4.2",
+ "openid-client": "^6.8.2",
+ "posthog-node": "5.17.2",
"react": "19.2.3",
"semver": "7.7.2",
"unist-util-visit": "5.0.0",
- "yargs": "17.7.1"
+ "yargs": "17.7.1",
+ "zod": "^4.3.6"
},
"bin": {
"mint": "bin/index.js",
@@ -1137,19 +1174,23 @@
},
"engines": {
"node": ">=18.0.0"
+ },
+ "optionalDependencies": {
+ "keytar": "^7.9.0"
}
},
"node_modules/@mintlify/common": {
- "version": "1.0.682",
- "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.682.tgz",
- "integrity": "sha512-pa08wRQ1BHJboGLZUhV30P6MWpUawxhSAL3YyudAsmhG4KGjoTO1hlOZid7C61NyHR4K2JQo7GZvKUPDSvuRvg==",
+ "version": "1.0.844",
+ "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.844.tgz",
+ "integrity": "sha512-uTQ5yGFNvP4wpc5FHvBEkJubg5VNW9R2LL9+IcSg/KraDzRn0vCD9YIdq2f2RdwYDYl6sWGMmYjDxUqrOOZVFg==",
"license": "ISC",
"dependencies": {
"@asyncapi/parser": "3.4.0",
+ "@asyncapi/specs": "6.8.1",
"@mintlify/mdx": "^3.0.4",
- "@mintlify/models": "0.0.259",
+ "@mintlify/models": "0.0.290",
"@mintlify/openapi-parser": "^0.0.8",
- "@mintlify/validation": "0.1.568",
+ "@mintlify/validation": "0.1.660",
"@sindresorhus/slugify": "2.2.0",
"@types/mdast": "4.0.4",
"acorn": "8.11.2",
@@ -1183,7 +1224,8 @@
"remark-parse": "11.0.0",
"remark-rehype": "11.1.1",
"remark-stringify": "11.0.0",
- "tailwindcss": "3.4.4",
+ "sucrase": "^3.34.0",
+ "tailwindcss": "^3.4.17",
"unified": "11.0.5",
"unist-builder": "4.0.0",
"unist-util-map": "4.0.0",
@@ -1191,7 +1233,22 @@
"unist-util-remove-position": "5.0.0",
"unist-util-visit": "5.0.0",
"unist-util-visit-parents": "6.0.1",
- "vfile": "6.0.3"
+ "vfile": "6.0.3",
+ "xss": "1.0.15"
+ }
+ },
+ "node_modules/@mintlify/common/node_modules/@floating-ui/react-dom": {
+ "version": "2.1.8",
+ "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz",
+ "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@floating-ui/dom": "^1.7.6"
+ },
+ "peerDependencies": {
+ "react": ">=16.8.0",
+ "react-dom": ">=16.8.0"
}
},
"node_modules/@mintlify/common/node_modules/@mintlify/mdx": {
@@ -1266,124 +1323,347 @@
"url": "https://opencollective.com/unified"
}
},
- "node_modules/@mintlify/common/node_modules/mdast-util-mdx-jsx": {
- "version": "3.1.3",
- "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz",
- "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==",
+ "node_modules/@mintlify/common/node_modules/@radix-ui/react-arrow": {
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
+ "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
"license": "MIT",
+ "peer": true,
"dependencies": {
- "@types/estree-jsx": "^1.0.0",
- "@types/hast": "^3.0.0",
- "@types/mdast": "^4.0.0",
- "@types/unist": "^3.0.0",
- "ccount": "^2.0.0",
- "devlop": "^1.1.0",
- "mdast-util-from-markdown": "^2.0.0",
- "mdast-util-to-markdown": "^2.0.0",
- "parse-entities": "^4.0.0",
- "stringify-entities": "^4.0.0",
- "unist-util-stringify-position": "^4.0.0",
- "vfile-message": "^4.0.0"
+ "@radix-ui/react-primitive": "2.1.3"
},
- "funding": {
- "type": "opencollective",
- "url": "https://opencollective.com/unified"
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
- "node_modules/@mintlify/common/node_modules/next-mdx-remote-client": {
- "version": "1.1.4",
- "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.4.tgz",
- "integrity": "sha512-psCMdO50tfoT1kAH7OGXZvhyRfiHVK6IqwjmWFV5gtLo4dnqjAgcjcLNeJ92iI26UNlKShxYrBs1GQ6UXxk97A==",
- "license": "MPL 2.0",
+ "node_modules/@mintlify/common/node_modules/@radix-ui/react-dismissable-layer": {
+ "version": "1.1.11",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
+ "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
+ "license": "MIT",
+ "peer": true,
"dependencies": {
- "@babel/code-frame": "^7.27.1",
- "@mdx-js/mdx": "^3.1.1",
- "@mdx-js/react": "^3.1.1",
- "remark-mdx-remove-esm": "^1.2.1",
- "serialize-error": "^12.0.0",
- "vfile": "^6.0.3",
- "vfile-matter": "^5.0.1"
- },
- "engines": {
- "node": ">=18.18.0"
+ "@radix-ui/primitive": "1.1.3",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1",
+ "@radix-ui/react-use-escape-keydown": "1.1.1"
},
"peerDependencies": {
- "react": ">= 18.3.0 < 19.0.0",
- "react-dom": ">= 18.3.0 < 19.0.0"
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
- "node_modules/@mintlify/common/node_modules/react": {
- "version": "18.3.1",
- "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
- "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
+ "node_modules/@mintlify/common/node_modules/@radix-ui/react-focus-scope": {
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
+ "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
"license": "MIT",
"peer": true,
"dependencies": {
- "loose-envify": "^1.1.0"
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1"
},
- "engines": {
- "node": ">=0.10.0"
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
- "node_modules/@mintlify/common/node_modules/react-dom": {
- "version": "18.3.1",
- "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
- "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
+ "node_modules/@mintlify/common/node_modules/@radix-ui/react-popover": {
+ "version": "1.1.15",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz",
+ "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==",
"license": "MIT",
"peer": true,
"dependencies": {
- "loose-envify": "^1.1.0",
- "scheduler": "^0.23.2"
+ "@radix-ui/primitive": "1.1.3",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-context": "1.1.2",
+ "@radix-ui/react-dismissable-layer": "1.1.11",
+ "@radix-ui/react-focus-guards": "1.1.3",
+ "@radix-ui/react-focus-scope": "1.1.7",
+ "@radix-ui/react-id": "1.1.1",
+ "@radix-ui/react-popper": "1.2.8",
+ "@radix-ui/react-portal": "1.1.9",
+ "@radix-ui/react-presence": "1.1.5",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-slot": "1.2.3",
+ "@radix-ui/react-use-controllable-state": "1.2.2",
+ "aria-hidden": "^1.2.4",
+ "react-remove-scroll": "^2.6.3"
},
"peerDependencies": {
- "react": "^18.3.1"
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
- "node_modules/@mintlify/common/node_modules/scheduler": {
- "version": "0.23.2",
- "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz",
- "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==",
+ "node_modules/@mintlify/common/node_modules/@radix-ui/react-popper": {
+ "version": "1.2.8",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
+ "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
"license": "MIT",
"peer": true,
"dependencies": {
- "loose-envify": "^1.1.0"
+ "@floating-ui/react-dom": "^2.0.0",
+ "@radix-ui/react-arrow": "1.1.7",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-context": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1",
+ "@radix-ui/react-use-layout-effect": "1.1.1",
+ "@radix-ui/react-use-rect": "1.1.1",
+ "@radix-ui/react-use-size": "1.1.1",
+ "@radix-ui/rect": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
- "node_modules/@mintlify/link-rot": {
- "version": "3.0.839",
- "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.839.tgz",
- "integrity": "sha512-t9dJDEUWwPa9H9fzGDVtMVQaK9YYyxS6rFkSTe1U2AahglyHBoB4hLF3qOGtr6894x2f2q7JxKkOawBcQ1ERmw==",
- "license": "Elastic-2.0",
+ "node_modules/@mintlify/common/node_modules/@radix-ui/react-portal": {
+ "version": "1.1.9",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+ "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+ "license": "MIT",
+ "peer": true,
"dependencies": {
- "@mintlify/common": "1.0.682",
- "@mintlify/prebuild": "1.0.817",
- "@mintlify/previewing": "4.0.873",
- "@mintlify/scraping": "4.0.522",
- "@mintlify/validation": "0.1.568",
- "fs-extra": "11.1.0",
- "unist-util-visit": "4.1.2"
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-layout-effect": "1.1.1"
},
- "engines": {
- "node": ">=18.0.0"
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
- "node_modules/@mintlify/link-rot/node_modules/@types/unist": {
- "version": "2.0.11",
- "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz",
- "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==",
- "license": "MIT"
- },
- "node_modules/@mintlify/link-rot/node_modules/fs-extra": {
- "version": "11.1.0",
- "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz",
- "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==",
+ "node_modules/@mintlify/common/node_modules/@radix-ui/react-presence": {
+ "version": "1.1.5",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
+ "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
"license": "MIT",
+ "peer": true,
"dependencies": {
- "graceful-fs": "^4.2.0",
- "jsonfile": "^6.0.1",
- "universalify": "^2.0.0"
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-use-layout-effect": "1.1.1"
},
- "engines": {
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/common/node_modules/@radix-ui/react-primitive": {
+ "version": "2.1.3",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+ "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-slot": "1.2.3"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/common/node_modules/mdast-util-mdx-jsx": {
+ "version": "3.1.3",
+ "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz",
+ "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==",
+ "license": "MIT",
+ "dependencies": {
+ "@types/estree-jsx": "^1.0.0",
+ "@types/hast": "^3.0.0",
+ "@types/mdast": "^4.0.0",
+ "@types/unist": "^3.0.0",
+ "ccount": "^2.0.0",
+ "devlop": "^1.1.0",
+ "mdast-util-from-markdown": "^2.0.0",
+ "mdast-util-to-markdown": "^2.0.0",
+ "parse-entities": "^4.0.0",
+ "stringify-entities": "^4.0.0",
+ "unist-util-stringify-position": "^4.0.0",
+ "vfile-message": "^4.0.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/unified"
+ }
+ },
+ "node_modules/@mintlify/common/node_modules/next-mdx-remote-client": {
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz",
+ "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==",
+ "license": "MPL 2.0",
+ "dependencies": {
+ "@babel/code-frame": "^7.29.0",
+ "@mdx-js/mdx": "^3.1.1",
+ "@mdx-js/react": "^3.1.1",
+ "remark-mdx-remove-esm": "^1.3.1",
+ "serialize-error": "^13.0.1",
+ "vfile": "^6.0.3",
+ "vfile-matter": "^5.0.1"
+ },
+ "engines": {
+ "node": ">=20.9.0"
+ },
+ "peerDependencies": {
+ "react": ">= 18.3.0 < 19.0.0",
+ "react-dom": ">= 18.3.0 < 19.0.0"
+ }
+ },
+ "node_modules/@mintlify/common/node_modules/react": {
+ "version": "18.3.1",
+ "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
+ "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "loose-envify": "^1.1.0"
+ },
+ "engines": {
+ "node": ">=0.10.0"
+ }
+ },
+ "node_modules/@mintlify/common/node_modules/react-dom": {
+ "version": "18.3.1",
+ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
+ "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "loose-envify": "^1.1.0",
+ "scheduler": "^0.23.2"
+ },
+ "peerDependencies": {
+ "react": "^18.3.1"
+ }
+ },
+ "node_modules/@mintlify/common/node_modules/scheduler": {
+ "version": "0.23.2",
+ "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz",
+ "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "loose-envify": "^1.1.0"
+ }
+ },
+ "node_modules/@mintlify/link-rot": {
+ "version": "3.0.1019",
+ "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.1019.tgz",
+ "integrity": "sha512-moUkUUcdfm/ivgavmrcgcnxhJ4XCDAbYPABhQbwo6hP3FHXyTB8jJdbjG/wJLZSzjH3KQpq/+DglMH5cCmSNJQ==",
+ "license": "Elastic-2.0",
+ "dependencies": {
+ "@mintlify/common": "1.0.844",
+ "@mintlify/prebuild": "1.0.986",
+ "@mintlify/previewing": "4.0.1047",
+ "@mintlify/scraping": "4.0.522",
+ "@mintlify/validation": "0.1.660",
+ "fs-extra": "11.1.0",
+ "unist-util-visit": "4.1.2"
+ },
+ "engines": {
+ "node": ">=18.0.0"
+ }
+ },
+ "node_modules/@mintlify/link-rot/node_modules/@types/unist": {
+ "version": "2.0.11",
+ "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz",
+ "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==",
+ "license": "MIT"
+ },
+ "node_modules/@mintlify/link-rot/node_modules/fs-extra": {
+ "version": "11.1.0",
+ "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz",
+ "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==",
+ "license": "MIT",
+ "dependencies": {
+ "graceful-fs": "^4.2.0",
+ "jsonfile": "^6.0.1",
+ "universalify": "^2.0.0"
+ },
+ "engines": {
"node": ">=14.14"
}
},
@@ -1430,9 +1710,9 @@
}
},
"node_modules/@mintlify/models": {
- "version": "0.0.259",
- "resolved": "https://registry.npmjs.org/@mintlify/models/-/models-0.0.259.tgz",
- "integrity": "sha512-KOpOeh8e7fVQMA4ex3shGgZ8BLYz8RG/ajRAGDo837/zC4sZQ6l/pPom9VuoN8glE8d4X1FtfOSu8YuER35koA==",
+ "version": "0.0.290",
+ "resolved": "https://registry.npmjs.org/@mintlify/models/-/models-0.0.290.tgz",
+ "integrity": "sha512-dkUIepQOpyZmgdapL22wdQi7MXupLyqFWP/ebiP0NYLcRRYBLWFVcpHHfIDGC2mWOZxNCVVZDvg2rTzfccpj6A==",
"license": "Elastic-2.0",
"dependencies": {
"axios": "1.13.2",
@@ -1477,15 +1757,15 @@
}
},
"node_modules/@mintlify/prebuild": {
- "version": "1.0.817",
- "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.817.tgz",
- "integrity": "sha512-+NQzmGRg71kHFThtnlJsK75TefKTbQNXSHrZuzs0shSFzI/uJIBDstrjFWhqRdUSyTAUTfQLF5zV0kTsrPz08A==",
+ "version": "1.0.986",
+ "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.986.tgz",
+ "integrity": "sha512-HGQwegpiP0ZwAg/kpISdtad6t5om32HZ/OCWQGHh2G3+gv2Fjg3hGRttagU88oBT9oKC1N7lJPjhxK8FrvwX3w==",
"license": "Elastic-2.0",
"dependencies": {
- "@mintlify/common": "1.0.682",
+ "@mintlify/common": "1.0.844",
"@mintlify/openapi-parser": "^0.0.8",
- "@mintlify/scraping": "4.0.543",
- "@mintlify/validation": "0.1.568",
+ "@mintlify/scraping": "4.0.708",
+ "@mintlify/validation": "0.1.660",
"chalk": "5.3.0",
"favicons": "7.2.0",
"front-matter": "4.0.2",
@@ -1581,6 +1861,9 @@
"cpu": [
"arm"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -1597,6 +1880,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -1613,6 +1899,9 @@
"cpu": [
"s390x"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -1629,6 +1918,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -1645,6 +1937,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "musl"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -1661,6 +1956,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "musl"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -1677,6 +1975,9 @@
"cpu": [
"arm"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -1699,6 +2000,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -1721,6 +2025,9 @@
"cpu": [
"s390x"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -1743,6 +2050,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -1765,6 +2075,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "musl"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -1787,6 +2100,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "musl"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -1860,12 +2176,12 @@
}
},
"node_modules/@mintlify/prebuild/node_modules/@mintlify/scraping": {
- "version": "4.0.543",
- "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-4.0.543.tgz",
- "integrity": "sha512-SrVZabI7OBbMBQ8qhtnvS8Nv+zCQeSrM2HfWDWbUzBxBQhVAW8hJXLVTg6J3ZpTyQOgOBAw5z8rPrBlblEqEIA==",
+ "version": "4.0.708",
+ "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-4.0.708.tgz",
+ "integrity": "sha512-6GDxVKM7B0NqxXvg4Mm8nVhtybAzkVRZcMGtsp5OoHZrnATZ/C4wv2B82ZnwZvdhzLDATWoSoe3W14IXgYYcCQ==",
"license": "Elastic-2.0",
"dependencies": {
- "@mintlify/common": "1.0.682",
+ "@mintlify/common": "1.0.844",
"@mintlify/openapi-parser": "^0.0.8",
"fs-extra": "11.1.1",
"hast-util-to-mdast": "10.1.0",
@@ -1881,7 +2197,7 @@
"unified": "11.0.5",
"unist-util-visit": "5.0.0",
"yargs": "17.7.1",
- "zod": "3.21.4"
+ "zod": "3.24.0"
},
"bin": {
"mintlify-scrape": "bin/cli.js"
@@ -2070,15 +2386,25 @@
"url": "https://opencollective.com/unified"
}
},
+ "node_modules/@mintlify/prebuild/node_modules/zod": {
+ "version": "3.24.0",
+ "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.0.tgz",
+ "integrity": "sha512-Hz+wiY8yD0VLA2k/+nsg2Abez674dDGTai33SwNvMPuf9uIrBC9eFgIMQxBBbHFxVXi8W+5nX9DcAh9YNSQm/w==",
+ "license": "MIT",
+ "funding": {
+ "url": "https://github.com/sponsors/colinhacks"
+ }
+ },
"node_modules/@mintlify/previewing": {
- "version": "4.0.873",
- "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.873.tgz",
- "integrity": "sha512-uq5d5q7LybSR/rB4Wzae6zFUqrtV8TNIcZAz8+nzD7bn5hLoYlGADk4HPg3a9Z6tZXSoGumQFWzneDuO7b4q3w==",
+ "version": "4.0.1047",
+ "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.1047.tgz",
+ "integrity": "sha512-4/k7a/kXkD8LK7nHvRGEPCvigpeunFk2Ku07wlXLR4tB8OEG6v5ZjLFKVHArd+UuRmjHB/oBcCht3DARaizPOw==",
"license": "Elastic-2.0",
"dependencies": {
- "@mintlify/common": "1.0.682",
- "@mintlify/prebuild": "1.0.817",
- "@mintlify/validation": "0.1.568",
+ "@mintlify/common": "1.0.844",
+ "@mintlify/prebuild": "1.0.986",
+ "@mintlify/validation": "0.1.660",
+ "adm-zip": "0.5.16",
"better-opn": "3.0.2",
"chalk": "5.2.0",
"chokidar": "3.5.3",
@@ -2194,6 +2520,20 @@
"node": ">=18.0.0"
}
},
+ "node_modules/@mintlify/scraping/node_modules/@floating-ui/react-dom": {
+ "version": "2.1.8",
+ "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz",
+ "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@floating-ui/dom": "^1.7.6"
+ },
+ "peerDependencies": {
+ "react": ">=16.8.0",
+ "react-dom": ">=16.8.0"
+ }
+ },
"node_modules/@mintlify/scraping/node_modules/@mintlify/common": {
"version": "1.0.661",
"resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.661.tgz",
@@ -2367,71 +2707,315 @@
"zod-to-json-schema": "3.20.4"
}
},
- "node_modules/@mintlify/scraping/node_modules/axios": {
- "version": "1.10.0",
- "resolved": "https://registry.npmjs.org/axios/-/axios-1.10.0.tgz",
- "integrity": "sha512-/1xYAC4MP/HEG+3duIhFr4ZQXR4sQXOIe+o6sdqzeykGLx6Upp/1p8MHqhINOvGeP7xyNHe7tsiJByc4SSVUxw==",
+ "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-arrow": {
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
+ "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
"license": "MIT",
+ "peer": true,
"dependencies": {
- "follow-redirects": "^1.15.6",
- "form-data": "^4.0.0",
- "proxy-from-env": "^1.1.0"
+ "@radix-ui/react-primitive": "2.1.3"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
- "node_modules/@mintlify/scraping/node_modules/fs-extra": {
- "version": "11.1.1",
- "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz",
- "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==",
+ "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-dismissable-layer": {
+ "version": "1.1.11",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
+ "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
"license": "MIT",
+ "peer": true,
"dependencies": {
- "graceful-fs": "^4.2.0",
- "jsonfile": "^6.0.1",
- "universalify": "^2.0.0"
+ "@radix-ui/primitive": "1.1.3",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1",
+ "@radix-ui/react-use-escape-keydown": "1.1.1"
},
- "engines": {
- "node": ">=14.14"
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
- "node_modules/@mintlify/scraping/node_modules/mdast-util-mdx-jsx": {
- "version": "3.1.3",
- "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz",
- "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==",
+ "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-focus-scope": {
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
+ "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
"license": "MIT",
+ "peer": true,
"dependencies": {
- "@types/estree-jsx": "^1.0.0",
- "@types/hast": "^3.0.0",
- "@types/mdast": "^4.0.0",
- "@types/unist": "^3.0.0",
- "ccount": "^2.0.0",
- "devlop": "^1.1.0",
- "mdast-util-from-markdown": "^2.0.0",
- "mdast-util-to-markdown": "^2.0.0",
- "parse-entities": "^4.0.0",
- "stringify-entities": "^4.0.0",
- "unist-util-stringify-position": "^4.0.0",
- "vfile-message": "^4.0.0"
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1"
},
- "funding": {
- "type": "opencollective",
- "url": "https://opencollective.com/unified"
- }
- },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-popover": {
+ "version": "1.1.15",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz",
+ "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/primitive": "1.1.3",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-context": "1.1.2",
+ "@radix-ui/react-dismissable-layer": "1.1.11",
+ "@radix-ui/react-focus-guards": "1.1.3",
+ "@radix-ui/react-focus-scope": "1.1.7",
+ "@radix-ui/react-id": "1.1.1",
+ "@radix-ui/react-popper": "1.2.8",
+ "@radix-ui/react-portal": "1.1.9",
+ "@radix-ui/react-presence": "1.1.5",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-slot": "1.2.3",
+ "@radix-ui/react-use-controllable-state": "1.2.2",
+ "aria-hidden": "^1.2.4",
+ "react-remove-scroll": "^2.6.3"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-popper": {
+ "version": "1.2.8",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
+ "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@floating-ui/react-dom": "^2.0.0",
+ "@radix-ui/react-arrow": "1.1.7",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-context": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1",
+ "@radix-ui/react-use-layout-effect": "1.1.1",
+ "@radix-ui/react-use-rect": "1.1.1",
+ "@radix-ui/react-use-size": "1.1.1",
+ "@radix-ui/rect": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-portal": {
+ "version": "1.1.9",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+ "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-layout-effect": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-presence": {
+ "version": "1.1.5",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
+ "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-use-layout-effect": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-primitive": {
+ "version": "2.1.3",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+ "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-slot": "1.2.3"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/axios": {
+ "version": "1.10.0",
+ "resolved": "https://registry.npmjs.org/axios/-/axios-1.10.0.tgz",
+ "integrity": "sha512-/1xYAC4MP/HEG+3duIhFr4ZQXR4sQXOIe+o6sdqzeykGLx6Upp/1p8MHqhINOvGeP7xyNHe7tsiJByc4SSVUxw==",
+ "license": "MIT",
+ "dependencies": {
+ "follow-redirects": "^1.15.6",
+ "form-data": "^4.0.0",
+ "proxy-from-env": "^1.1.0"
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/fs-extra": {
+ "version": "11.1.1",
+ "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz",
+ "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==",
+ "license": "MIT",
+ "dependencies": {
+ "graceful-fs": "^4.2.0",
+ "jsonfile": "^6.0.1",
+ "universalify": "^2.0.0"
+ },
+ "engines": {
+ "node": ">=14.14"
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/glob-parent": {
+ "version": "6.0.2",
+ "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
+ "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
+ "license": "ISC",
+ "dependencies": {
+ "is-glob": "^4.0.3"
+ },
+ "engines": {
+ "node": ">=10.13.0"
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/lilconfig": {
+ "version": "2.1.0",
+ "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz",
+ "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=10"
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/mdast-util-mdx-jsx": {
+ "version": "3.1.3",
+ "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz",
+ "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==",
+ "license": "MIT",
+ "dependencies": {
+ "@types/estree-jsx": "^1.0.0",
+ "@types/hast": "^3.0.0",
+ "@types/mdast": "^4.0.0",
+ "@types/unist": "^3.0.0",
+ "ccount": "^2.0.0",
+ "devlop": "^1.1.0",
+ "mdast-util-from-markdown": "^2.0.0",
+ "mdast-util-to-markdown": "^2.0.0",
+ "parse-entities": "^4.0.0",
+ "stringify-entities": "^4.0.0",
+ "unist-util-stringify-position": "^4.0.0",
+ "vfile-message": "^4.0.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/unified"
+ }
+ },
"node_modules/@mintlify/scraping/node_modules/next-mdx-remote-client": {
- "version": "1.1.4",
- "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.4.tgz",
- "integrity": "sha512-psCMdO50tfoT1kAH7OGXZvhyRfiHVK6IqwjmWFV5gtLo4dnqjAgcjcLNeJ92iI26UNlKShxYrBs1GQ6UXxk97A==",
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz",
+ "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==",
"license": "MPL 2.0",
"dependencies": {
- "@babel/code-frame": "^7.27.1",
+ "@babel/code-frame": "^7.29.0",
"@mdx-js/mdx": "^3.1.1",
"@mdx-js/react": "^3.1.1",
- "remark-mdx-remove-esm": "^1.2.1",
- "serialize-error": "^12.0.0",
+ "remark-mdx-remove-esm": "^1.3.1",
+ "serialize-error": "^13.0.1",
"vfile": "^6.0.3",
"vfile-matter": "^5.0.1"
},
"engines": {
- "node": ">=18.18.0"
+ "node": ">=20.9.0"
},
"peerDependencies": {
"react": ">= 18.3.0 < 19.0.0",
@@ -2489,40 +3073,157 @@
"loose-envify": "^1.1.0"
}
},
- "node_modules/@mintlify/validation": {
- "version": "0.1.568",
- "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.568.tgz",
- "integrity": "sha512-TsVBQKDU9okr9zncqN33hYAERMdF3a7kgvBNsJCkSqQmhSgtdZw7ZfJV0v1UBnBUZ96yhN4XUF7l4azgHVBLDw==",
- "license": "Elastic-2.0",
- "dependencies": {
- "@mintlify/mdx": "^3.0.4",
- "@mintlify/models": "0.0.259",
- "arktype": "2.1.27",
- "js-yaml": "4.1.0",
- "lcm": "0.0.3",
- "lodash": "4.17.21",
- "object-hash": "3.0.0",
- "openapi-types": "12.1.3",
- "uuid": "11.1.0",
- "zod": "3.21.4",
- "zod-to-json-schema": "3.20.4"
- }
- },
- "node_modules/@mintlify/validation/node_modules/@mintlify/mdx": {
- "version": "3.0.4",
- "resolved": "https://registry.npmjs.org/@mintlify/mdx/-/mdx-3.0.4.tgz",
- "integrity": "sha512-tJhdpnM5ReJLNJ2fuDRIEr0zgVd6id7/oAIfs26V46QlygiLsc8qx4Rz3LWIX51rUXW/cfakjj0EATxIciIw+g==",
+ "node_modules/@mintlify/scraping/node_modules/tailwindcss": {
+ "version": "3.4.4",
+ "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.4.tgz",
+ "integrity": "sha512-ZoyXOdJjISB7/BcLTR6SEsLgKtDStYyYZVLsUtWChO4Ps20CBad7lfJKVDiejocV4ME1hLmyY0WJE3hSDcmQ2A==",
"license": "MIT",
"dependencies": {
- "@shikijs/transformers": "^3.11.0",
- "@shikijs/twoslash": "^3.12.2",
- "arktype": "^2.1.26",
- "hast-util-to-string": "^3.0.1",
- "mdast-util-from-markdown": "^2.0.2",
- "mdast-util-gfm": "^3.1.0",
- "mdast-util-mdx-jsx": "^3.2.0",
- "mdast-util-to-hast": "^13.2.0",
- "next-mdx-remote-client": "^1.0.3",
+ "@alloc/quick-lru": "^5.2.0",
+ "arg": "^5.0.2",
+ "chokidar": "^3.5.3",
+ "didyoumean": "^1.2.2",
+ "dlv": "^1.1.3",
+ "fast-glob": "^3.3.0",
+ "glob-parent": "^6.0.2",
+ "is-glob": "^4.0.3",
+ "jiti": "^1.21.0",
+ "lilconfig": "^2.1.0",
+ "micromatch": "^4.0.5",
+ "normalize-path": "^3.0.0",
+ "object-hash": "^3.0.0",
+ "picocolors": "^1.0.0",
+ "postcss": "^8.4.23",
+ "postcss-import": "^15.1.0",
+ "postcss-js": "^4.0.1",
+ "postcss-load-config": "^4.0.1",
+ "postcss-nested": "^6.0.1",
+ "postcss-selector-parser": "^6.0.11",
+ "resolve": "^1.22.2",
+ "sucrase": "^3.32.0"
+ },
+ "bin": {
+ "tailwind": "lib/cli.js",
+ "tailwindcss": "lib/cli.js"
+ },
+ "engines": {
+ "node": ">=14.0.0"
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/tailwindcss/node_modules/postcss-load-config": {
+ "version": "4.0.2",
+ "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz",
+ "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==",
+ "funding": [
+ {
+ "type": "opencollective",
+ "url": "https://opencollective.com/postcss/"
+ },
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/ai"
+ }
+ ],
+ "license": "MIT",
+ "dependencies": {
+ "lilconfig": "^3.0.0",
+ "yaml": "^2.3.4"
+ },
+ "engines": {
+ "node": ">= 14"
+ },
+ "peerDependencies": {
+ "postcss": ">=8.0.9",
+ "ts-node": ">=9.0.0"
+ },
+ "peerDependenciesMeta": {
+ "postcss": {
+ "optional": true
+ },
+ "ts-node": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/tailwindcss/node_modules/postcss-load-config/node_modules/lilconfig": {
+ "version": "3.1.3",
+ "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
+ "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=14"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/antonk52"
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/zod": {
+ "version": "3.21.4",
+ "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz",
+ "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==",
+ "license": "MIT",
+ "funding": {
+ "url": "https://github.com/sponsors/colinhacks"
+ }
+ },
+ "node_modules/@mintlify/scraping/node_modules/zod-to-json-schema": {
+ "version": "3.20.4",
+ "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.20.4.tgz",
+ "integrity": "sha512-Un9+kInJ2Zt63n6Z7mLqBifzzPcOyX+b+Exuzf7L1+xqck9Q2EPByyTRduV3kmSPaXaRer1JCsucubpgL1fipg==",
+ "license": "ISC",
+ "peerDependencies": {
+ "zod": "^3.20.0"
+ }
+ },
+ "node_modules/@mintlify/validation": {
+ "version": "0.1.660",
+ "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.660.tgz",
+ "integrity": "sha512-IHlea3t9ZZcQMOfext3fZuG6/hXXTZPBFJkgeHA9lbG2OkdAVRbSMDY9FvC07sEEX1VQJX+bPimRaXUz/ujyYg==",
+ "license": "Elastic-2.0",
+ "dependencies": {
+ "@mintlify/mdx": "^3.0.4",
+ "@mintlify/models": "0.0.290",
+ "arktype": "2.1.27",
+ "js-yaml": "4.1.0",
+ "lcm": "0.0.3",
+ "lodash": "4.17.21",
+ "neotraverse": "0.6.18",
+ "object-hash": "3.0.0",
+ "openapi-types": "12.1.3",
+ "uuid": "11.1.0",
+ "zod": "3.24.0",
+ "zod-to-json-schema": "3.20.4"
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@floating-ui/react-dom": {
+ "version": "2.1.8",
+ "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz",
+ "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@floating-ui/dom": "^1.7.6"
+ },
+ "peerDependencies": {
+ "react": ">=16.8.0",
+ "react-dom": ">=16.8.0"
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@mintlify/mdx": {
+ "version": "3.0.4",
+ "resolved": "https://registry.npmjs.org/@mintlify/mdx/-/mdx-3.0.4.tgz",
+ "integrity": "sha512-tJhdpnM5ReJLNJ2fuDRIEr0zgVd6id7/oAIfs26V46QlygiLsc8qx4Rz3LWIX51rUXW/cfakjj0EATxIciIw+g==",
+ "license": "MIT",
+ "dependencies": {
+ "@shikijs/transformers": "^3.11.0",
+ "@shikijs/twoslash": "^3.12.2",
+ "arktype": "^2.1.26",
+ "hast-util-to-string": "^3.0.1",
+ "mdast-util-from-markdown": "^2.0.2",
+ "mdast-util-gfm": "^3.1.0",
+ "mdast-util-mdx-jsx": "^3.2.0",
+ "mdast-util-to-hast": "^13.2.0",
+ "next-mdx-remote-client": "^1.0.3",
"rehype-katex": "^7.0.1",
"remark-gfm": "^4.0.0",
"remark-math": "^6.0.0",
@@ -2532,9 +3233,232 @@
"unist-util-visit": "^5.0.0"
},
"peerDependencies": {
- "@radix-ui/react-popover": "^1.1.15",
- "react": "^18.3.1",
- "react-dom": "^18.3.1"
+ "@radix-ui/react-popover": "^1.1.15",
+ "react": "^18.3.1",
+ "react-dom": "^18.3.1"
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@radix-ui/react-arrow": {
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
+ "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-primitive": "2.1.3"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@radix-ui/react-dismissable-layer": {
+ "version": "1.1.11",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
+ "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/primitive": "1.1.3",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1",
+ "@radix-ui/react-use-escape-keydown": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@radix-ui/react-focus-scope": {
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
+ "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@radix-ui/react-popover": {
+ "version": "1.1.15",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz",
+ "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/primitive": "1.1.3",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-context": "1.1.2",
+ "@radix-ui/react-dismissable-layer": "1.1.11",
+ "@radix-ui/react-focus-guards": "1.1.3",
+ "@radix-ui/react-focus-scope": "1.1.7",
+ "@radix-ui/react-id": "1.1.1",
+ "@radix-ui/react-popper": "1.2.8",
+ "@radix-ui/react-portal": "1.1.9",
+ "@radix-ui/react-presence": "1.1.5",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-slot": "1.2.3",
+ "@radix-ui/react-use-controllable-state": "1.2.2",
+ "aria-hidden": "^1.2.4",
+ "react-remove-scroll": "^2.6.3"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@radix-ui/react-popper": {
+ "version": "1.2.8",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
+ "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@floating-ui/react-dom": "^2.0.0",
+ "@radix-ui/react-arrow": "1.1.7",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-context": "1.1.2",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-callback-ref": "1.1.1",
+ "@radix-ui/react-use-layout-effect": "1.1.1",
+ "@radix-ui/react-use-rect": "1.1.1",
+ "@radix-ui/react-use-size": "1.1.1",
+ "@radix-ui/rect": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@radix-ui/react-portal": {
+ "version": "1.1.9",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+ "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-use-layout-effect": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@radix-ui/react-presence": {
+ "version": "1.1.5",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
+ "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-use-layout-effect": "1.1.1"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/@radix-ui/react-primitive": {
+ "version": "2.1.3",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+ "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@radix-ui/react-slot": "1.2.3"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
}
},
"node_modules/@mintlify/validation/node_modules/mdast-util-gfm": {
@@ -2557,21 +3481,21 @@
}
},
"node_modules/@mintlify/validation/node_modules/next-mdx-remote-client": {
- "version": "1.1.4",
- "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.4.tgz",
- "integrity": "sha512-psCMdO50tfoT1kAH7OGXZvhyRfiHVK6IqwjmWFV5gtLo4dnqjAgcjcLNeJ92iI26UNlKShxYrBs1GQ6UXxk97A==",
+ "version": "1.1.7",
+ "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz",
+ "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==",
"license": "MPL 2.0",
"dependencies": {
- "@babel/code-frame": "^7.27.1",
+ "@babel/code-frame": "^7.29.0",
"@mdx-js/mdx": "^3.1.1",
"@mdx-js/react": "^3.1.1",
- "remark-mdx-remove-esm": "^1.2.1",
- "serialize-error": "^12.0.0",
+ "remark-mdx-remove-esm": "^1.3.1",
+ "serialize-error": "^13.0.1",
"vfile": "^6.0.3",
"vfile-matter": "^5.0.1"
},
"engines": {
- "node": ">=18.18.0"
+ "node": ">=20.9.0"
},
"peerDependencies": {
"react": ">= 18.3.0 < 19.0.0",
@@ -2615,6 +3539,24 @@
"loose-envify": "^1.1.0"
}
},
+ "node_modules/@mintlify/validation/node_modules/zod": {
+ "version": "3.24.0",
+ "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.0.tgz",
+ "integrity": "sha512-Hz+wiY8yD0VLA2k/+nsg2Abez674dDGTai33SwNvMPuf9uIrBC9eFgIMQxBBbHFxVXi8W+5nX9DcAh9YNSQm/w==",
+ "license": "MIT",
+ "funding": {
+ "url": "https://github.com/sponsors/colinhacks"
+ }
+ },
+ "node_modules/@mintlify/validation/node_modules/zod-to-json-schema": {
+ "version": "3.20.4",
+ "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.20.4.tgz",
+ "integrity": "sha512-Un9+kInJ2Zt63n6Z7mLqBifzzPcOyX+b+Exuzf7L1+xqck9Q2EPByyTRduV3kmSPaXaRer1JCsucubpgL1fipg==",
+ "license": "ISC",
+ "peerDependencies": {
+ "zod": "^3.20.0"
+ }
+ },
"node_modules/@nodelib/fs.scandir": {
"version": "2.1.5",
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -2659,6 +3601,15 @@
"fast-deep-equal": "^3.1.3"
}
},
+ "node_modules/@posthog/core": {
+ "version": "1.7.1",
+ "resolved": "https://registry.npmjs.org/@posthog/core/-/core-1.7.1.tgz",
+ "integrity": "sha512-kjK0eFMIpKo9GXIbts8VtAknsoZ18oZorANdtuTj1CbgS28t4ZVq//HAWhnxEuXRTrtkd+SUJ6Ux3j2Af8NCuA==",
+ "license": "MIT",
+ "dependencies": {
+ "cross-spawn": "^7.0.6"
+ }
+ },
"node_modules/@puppeteer/browsers": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.3.0.tgz",
@@ -2731,6 +3682,32 @@
"node": ">=8"
}
},
+ "node_modules/@puppeteer/browsers/node_modules/tar-fs": {
+ "version": "3.1.2",
+ "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.2.tgz",
+ "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==",
+ "license": "MIT",
+ "dependencies": {
+ "pump": "^3.0.0",
+ "tar-stream": "^3.1.5"
+ },
+ "optionalDependencies": {
+ "bare-fs": "^4.0.1",
+ "bare-path": "^3.0.0"
+ }
+ },
+ "node_modules/@puppeteer/browsers/node_modules/tar-stream": {
+ "version": "3.1.8",
+ "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.8.tgz",
+ "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==",
+ "license": "MIT",
+ "dependencies": {
+ "b4a": "^1.6.4",
+ "bare-fs": "^4.5.5",
+ "fast-fifo": "^1.2.0",
+ "streamx": "^2.15.0"
+ }
+ },
"node_modules/@puppeteer/browsers/node_modules/yargs": {
"version": "17.7.2",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
@@ -2756,30 +3733,6 @@
"license": "MIT",
"peer": true
},
- "node_modules/@radix-ui/react-arrow": {
- "version": "1.1.7",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
- "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
- "license": "MIT",
- "peer": true,
- "dependencies": {
- "@radix-ui/react-primitive": "2.1.3"
- },
- "peerDependencies": {
- "@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- },
- "@types/react-dom": {
- "optional": true
- }
- }
- },
"node_modules/@radix-ui/react-compose-refs": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz",
@@ -2796,253 +3749,54 @@
}
}
},
- "node_modules/@radix-ui/react-context": {
- "version": "1.1.2",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz",
- "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==",
- "license": "MIT",
- "peer": true,
- "peerDependencies": {
- "@types/react": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- }
- }
- },
- "node_modules/@radix-ui/react-dismissable-layer": {
- "version": "1.1.11",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
- "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
- "license": "MIT",
- "peer": true,
- "dependencies": {
- "@radix-ui/primitive": "1.1.3",
- "@radix-ui/react-compose-refs": "1.1.2",
- "@radix-ui/react-primitive": "2.1.3",
- "@radix-ui/react-use-callback-ref": "1.1.1",
- "@radix-ui/react-use-escape-keydown": "1.1.1"
- },
- "peerDependencies": {
- "@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- },
- "@types/react-dom": {
- "optional": true
- }
- }
- },
- "node_modules/@radix-ui/react-focus-guards": {
- "version": "1.1.3",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
- "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
- "license": "MIT",
- "peer": true,
- "peerDependencies": {
- "@types/react": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- }
- }
- },
- "node_modules/@radix-ui/react-focus-scope": {
- "version": "1.1.7",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
- "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
- "license": "MIT",
- "peer": true,
- "dependencies": {
- "@radix-ui/react-compose-refs": "1.1.2",
- "@radix-ui/react-primitive": "2.1.3",
- "@radix-ui/react-use-callback-ref": "1.1.1"
- },
- "peerDependencies": {
- "@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- },
- "@types/react-dom": {
- "optional": true
- }
- }
- },
- "node_modules/@radix-ui/react-id": {
- "version": "1.1.1",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz",
- "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==",
- "license": "MIT",
- "peer": true,
- "dependencies": {
- "@radix-ui/react-use-layout-effect": "1.1.1"
- },
- "peerDependencies": {
- "@types/react": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- }
- }
- },
- "node_modules/@radix-ui/react-popover": {
- "version": "1.1.15",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz",
- "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==",
- "license": "MIT",
- "peer": true,
- "dependencies": {
- "@radix-ui/primitive": "1.1.3",
- "@radix-ui/react-compose-refs": "1.1.2",
- "@radix-ui/react-context": "1.1.2",
- "@radix-ui/react-dismissable-layer": "1.1.11",
- "@radix-ui/react-focus-guards": "1.1.3",
- "@radix-ui/react-focus-scope": "1.1.7",
- "@radix-ui/react-id": "1.1.1",
- "@radix-ui/react-popper": "1.2.8",
- "@radix-ui/react-portal": "1.1.9",
- "@radix-ui/react-presence": "1.1.5",
- "@radix-ui/react-primitive": "2.1.3",
- "@radix-ui/react-slot": "1.2.3",
- "@radix-ui/react-use-controllable-state": "1.2.2",
- "aria-hidden": "^1.2.4",
- "react-remove-scroll": "^2.6.3"
- },
- "peerDependencies": {
- "@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- },
- "@types/react-dom": {
- "optional": true
- }
- }
- },
- "node_modules/@radix-ui/react-popper": {
- "version": "1.2.8",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
- "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
- "license": "MIT",
- "peer": true,
- "dependencies": {
- "@floating-ui/react-dom": "^2.0.0",
- "@radix-ui/react-arrow": "1.1.7",
- "@radix-ui/react-compose-refs": "1.1.2",
- "@radix-ui/react-context": "1.1.2",
- "@radix-ui/react-primitive": "2.1.3",
- "@radix-ui/react-use-callback-ref": "1.1.1",
- "@radix-ui/react-use-layout-effect": "1.1.1",
- "@radix-ui/react-use-rect": "1.1.1",
- "@radix-ui/react-use-size": "1.1.1",
- "@radix-ui/rect": "1.1.1"
- },
- "peerDependencies": {
- "@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- },
- "@types/react-dom": {
- "optional": true
- }
- }
- },
- "node_modules/@radix-ui/react-portal": {
- "version": "1.1.9",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
- "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+ "node_modules/@radix-ui/react-context": {
+ "version": "1.1.2",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz",
+ "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==",
"license": "MIT",
"peer": true,
- "dependencies": {
- "@radix-ui/react-primitive": "2.1.3",
- "@radix-ui/react-use-layout-effect": "1.1.1"
- },
"peerDependencies": {
"@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
- },
- "@types/react-dom": {
- "optional": true
}
}
},
- "node_modules/@radix-ui/react-presence": {
- "version": "1.1.5",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
- "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
+ "node_modules/@radix-ui/react-focus-guards": {
+ "version": "1.1.3",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
+ "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
"license": "MIT",
"peer": true,
- "dependencies": {
- "@radix-ui/react-compose-refs": "1.1.2",
- "@radix-ui/react-use-layout-effect": "1.1.1"
- },
"peerDependencies": {
"@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
- },
- "@types/react-dom": {
- "optional": true
}
}
},
- "node_modules/@radix-ui/react-primitive": {
- "version": "2.1.3",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
- "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+ "node_modules/@radix-ui/react-id": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz",
+ "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==",
"license": "MIT",
"peer": true,
"dependencies": {
- "@radix-ui/react-slot": "1.2.3"
+ "@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
- },
- "@types/react-dom": {
- "optional": true
}
}
},
@@ -3201,12 +3955,12 @@
"peer": true
},
"node_modules/@shikijs/core": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.21.0.tgz",
- "integrity": "sha512-AXSQu/2n1UIQekY8euBJlvFYZIw0PHY63jUzGbrOma4wPxzznJXTXkri+QcHeBNaFxiiOljKxxJkVSoB3PjbyA==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.23.0.tgz",
+ "integrity": "sha512-NSWQz0riNb67xthdm5br6lAkvpDJRTgB36fxlo37ZzM2yq0PQFFzbd8psqC2XMPgCzo1fW6cVi18+ArJ44wqgA==",
"license": "MIT",
"dependencies": {
- "@shikijs/types": "3.21.0",
+ "@shikijs/types": "3.23.0",
"@shikijs/vscode-textmate": "^10.0.2",
"@types/hast": "^3.0.4",
"hast-util-to-html": "^9.0.5"
@@ -3236,62 +3990,62 @@
}
},
"node_modules/@shikijs/engine-javascript": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.21.0.tgz",
- "integrity": "sha512-ATwv86xlbmfD9n9gKRiwuPpWgPENAWCLwYCGz9ugTJlsO2kOzhOkvoyV/UD+tJ0uT7YRyD530x6ugNSffmvIiQ==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.23.0.tgz",
+ "integrity": "sha512-aHt9eiGFobmWR5uqJUViySI1bHMqrAgamWE1TYSUoftkAeCCAiGawPMwM+VCadylQtF4V3VNOZ5LmfItH5f3yA==",
"license": "MIT",
"dependencies": {
- "@shikijs/types": "3.21.0",
+ "@shikijs/types": "3.23.0",
"@shikijs/vscode-textmate": "^10.0.2",
"oniguruma-to-es": "^4.3.4"
}
},
"node_modules/@shikijs/engine-oniguruma": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.21.0.tgz",
- "integrity": "sha512-OYknTCct6qiwpQDqDdf3iedRdzj6hFlOPv5hMvI+hkWfCKs5mlJ4TXziBG9nyabLwGulrUjHiCq3xCspSzErYQ==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.23.0.tgz",
+ "integrity": "sha512-1nWINwKXxKKLqPibT5f4pAFLej9oZzQTsby8942OTlsJzOBZ0MWKiwzMsd+jhzu8YPCHAswGnnN1YtQfirL35g==",
"license": "MIT",
"dependencies": {
- "@shikijs/types": "3.21.0",
+ "@shikijs/types": "3.23.0",
"@shikijs/vscode-textmate": "^10.0.2"
}
},
"node_modules/@shikijs/langs": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.21.0.tgz",
- "integrity": "sha512-g6mn5m+Y6GBJ4wxmBYqalK9Sp0CFkUqfNzUy2pJglUginz6ZpWbaWjDB4fbQ/8SHzFjYbtU6Ddlp1pc+PPNDVA==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.23.0.tgz",
+ "integrity": "sha512-2Ep4W3Re5aB1/62RSYQInK9mM3HsLeB91cHqznAJMuylqjzNVAVCMnNWRHFtcNHXsoNRayP9z1qj4Sq3nMqYXg==",
"license": "MIT",
"dependencies": {
- "@shikijs/types": "3.21.0"
+ "@shikijs/types": "3.23.0"
}
},
"node_modules/@shikijs/themes": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.21.0.tgz",
- "integrity": "sha512-BAE4cr9EDiZyYzwIHEk7JTBJ9CzlPuM4PchfcA5ao1dWXb25nv6hYsoDiBq2aZK9E3dlt3WB78uI96UESD+8Mw==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.23.0.tgz",
+ "integrity": "sha512-5qySYa1ZgAT18HR/ypENL9cUSGOeI2x+4IvYJu4JgVJdizn6kG4ia5Q1jDEOi7gTbN4RbuYtmHh0W3eccOrjMA==",
"license": "MIT",
"dependencies": {
- "@shikijs/types": "3.21.0"
+ "@shikijs/types": "3.23.0"
}
},
"node_modules/@shikijs/transformers": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/@shikijs/transformers/-/transformers-3.21.0.tgz",
- "integrity": "sha512-CZwvCWWIiRRiFk9/JKzdEooakAP8mQDtBOQ1TKiCaS2E1bYtyBCOkUzS8akO34/7ufICQ29oeSfkb3tT5KtrhA==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/@shikijs/transformers/-/transformers-3.23.0.tgz",
+ "integrity": "sha512-F9msZVxdF+krQNSdQ4V+Ja5QemeAoTQ2jxt7nJCwhDsdF1JWS3KxIQXA3lQbyKwS3J61oHRUSv4jYWv3CkaKTQ==",
"license": "MIT",
"dependencies": {
- "@shikijs/core": "3.21.0",
- "@shikijs/types": "3.21.0"
+ "@shikijs/core": "3.23.0",
+ "@shikijs/types": "3.23.0"
}
},
"node_modules/@shikijs/twoslash": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/@shikijs/twoslash/-/twoslash-3.21.0.tgz",
- "integrity": "sha512-iH360udAYON2JwfIldoCiMZr9MljuQA5QRBivKLpEuEpmVCSwrR+0WTQ0eS1ptgGBdH9weFiIsA5wJDzsEzTYg==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/@shikijs/twoslash/-/twoslash-3.23.0.tgz",
+ "integrity": "sha512-pNaLJWMA3LU7PhT8tm9OQBZ1epy0jmdgeJzntBtr1EVXLbHxGzTj3mnf9vOdcl84l96qnlJXkJ/NGXZYBpXl5g==",
"license": "MIT",
"dependencies": {
- "@shikijs/core": "3.21.0",
- "@shikijs/types": "3.21.0",
+ "@shikijs/core": "3.23.0",
+ "@shikijs/types": "3.23.0",
"twoslash": "^0.3.6"
},
"peerDependencies": {
@@ -3299,9 +4053,9 @@
}
},
"node_modules/@shikijs/types": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.21.0.tgz",
- "integrity": "sha512-zGrWOxZ0/+0ovPY7PvBU2gIS9tmhSUUt30jAcNV0Bq0gb2S98gwfjIs1vxlmH5zM7/4YxLamT6ChlqqAJmPPjA==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.23.0.tgz",
+ "integrity": "sha512-3JZ5HXOZfYjsYSk0yPwBrkupyYSLpAE26Qc0HLghhZNGTZg/SKxXIIgoxOpmmeQP0RRSDJTk1/vPfw9tbw+jSQ==",
"license": "MIT",
"dependencies": {
"@shikijs/vscode-textmate": "^10.0.2",
@@ -3464,9 +4218,9 @@
}
},
"node_modules/@stoplight/spectral-core": {
- "version": "1.20.0",
- "resolved": "https://registry.npmjs.org/@stoplight/spectral-core/-/spectral-core-1.20.0.tgz",
- "integrity": "sha512-5hBP81nCC1zn1hJXL/uxPNRKNcB+/pEIHgCjPRpl/w/qy9yC9ver04tw1W0l/PMiv0UeB5dYgozXVQ4j5a6QQQ==",
+ "version": "1.21.0",
+ "resolved": "https://registry.npmjs.org/@stoplight/spectral-core/-/spectral-core-1.21.0.tgz",
+ "integrity": "sha512-oj4e/FrDLUhBRocIW+lRMKlJ/q/rDZw61HkLbTFsdMd+f/FTkli2xHNB1YC6n1mrMKjjvy7XlUuFkC7XxtgbWw==",
"license": "Apache-2.0",
"dependencies": {
"@stoplight/better-ajv-errors": "1.0.3",
@@ -3483,7 +4237,7 @@
"ajv-formats": "~2.1.1",
"es-aggregate-error": "^1.0.7",
"jsonpath-plus": "^10.3.0",
- "lodash": "~4.17.21",
+ "lodash": "~4.17.23",
"lodash.topath": "^4.5.2",
"minimatch": "3.1.2",
"nimma": "0.2.3",
@@ -3508,6 +4262,12 @@
"node": "^12.20 || >=14.13"
}
},
+ "node_modules/@stoplight/spectral-core/node_modules/lodash": {
+ "version": "4.17.23",
+ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
+ "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
+ "license": "MIT"
+ },
"node_modules/@stoplight/spectral-core/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
@@ -3759,9 +4519,9 @@
}
},
"node_modules/@types/debug": {
- "version": "4.1.12",
- "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz",
- "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==",
+ "version": "4.1.13",
+ "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz",
+ "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==",
"license": "MIT",
"dependencies": {
"@types/ms": "*"
@@ -3801,9 +4561,9 @@
}
},
"node_modules/@types/http-cache-semantics": {
- "version": "4.0.4",
- "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.4.tgz",
- "integrity": "sha512-1m0bIFVc7eJWyve9S0RnuRgcQqF/Xd5QsUZAZeQFr1Q3/p9JWoQQEqmVy+DPTNpGXwhgIetAoYF8JSc33q29QA==",
+ "version": "4.2.0",
+ "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz",
+ "integrity": "sha512-L3LgimLHXtGkWikKnsPg0/VFx9OGZaC+eN1u4r+OB1XRqH3meBIAVC2zr1WdMH+RHmnRkqliQAOHNJ/E0j/e0Q==",
"license": "MIT"
},
"node_modules/@types/json-schema": {
@@ -3849,18 +4609,18 @@
}
},
"node_modules/@types/node": {
- "version": "25.0.10",
- "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.10.tgz",
- "integrity": "sha512-zWW5KPngR/yvakJgGOmZ5vTBemDoSqF3AcV/LrO5u5wTWyEAVVh+IT39G4gtyAkh3CtTZs8aX/yRM82OfzHJRg==",
+ "version": "25.5.2",
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.2.tgz",
+ "integrity": "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg==",
"license": "MIT",
"dependencies": {
- "undici-types": "~7.16.0"
+ "undici-types": "~7.18.0"
}
},
"node_modules/@types/react": {
- "version": "19.2.9",
- "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.9.tgz",
- "integrity": "sha512-Lpo8kgb/igvMIPeNV2rsYKTgaORYdO1XGVZ4Qz3akwOj0ySGYMPlQWa8BaLn0G63D1aSaAQ5ldR06wCpChQCjA==",
+ "version": "19.2.14",
+ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz",
+ "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==",
"license": "MIT",
"peer": true,
"dependencies": {
@@ -3890,12 +4650,12 @@
}
},
"node_modules/@typescript/vfs": {
- "version": "1.6.2",
- "resolved": "https://registry.npmjs.org/@typescript/vfs/-/vfs-1.6.2.tgz",
- "integrity": "sha512-hoBwJwcbKHmvd2QVebiytN1aELvpk9B74B4L1mFm/XT1Q/VOYAWl2vQ9AWRFtQq8zmz6enTpfTV8WRc4ATjW/g==",
+ "version": "1.6.4",
+ "resolved": "https://registry.npmjs.org/@typescript/vfs/-/vfs-1.6.4.tgz",
+ "integrity": "sha512-PJFXFS4ZJKiJ9Qiuix6Dz/OwEIqHD7Dme1UwZhTK11vR+5dqW2ACbdndWQexBzCx+CPuMe5WBYQWCsFyGlQLlQ==",
"license": "MIT",
"dependencies": {
- "debug": "^4.1.1"
+ "debug": "^4.4.3"
},
"peerDependencies": {
"typescript": "*"
@@ -3997,9 +4757,9 @@
}
},
"node_modules/ajv": {
- "version": "8.17.1",
- "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
- "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
+ "version": "8.18.0",
+ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz",
+ "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==",
"license": "MIT",
"dependencies": {
"fast-deep-equal": "^3.1.3",
@@ -4053,9 +4813,9 @@
}
},
"node_modules/ansi-escapes": {
- "version": "7.2.0",
- "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-7.2.0.tgz",
- "integrity": "sha512-g6LhBsl+GBPRWGWsBtutpzBYuIIdBkLEvad5C/va/74Db018+5TZiyA26cZJAr3Rft5lprVqOIPxf5Vid6tqAw==",
+ "version": "7.3.0",
+ "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-7.3.0.tgz",
+ "integrity": "sha512-BvU8nYgGQBxcmMuEeUEmNTvrMVjJNSH7RgW24vXexN4Ven6qCvy4TntnvlnwnMLTVlcRQQdbRY8NKnaIoeWDNg==",
"license": "MIT",
"dependencies": {
"environment": "^1.0.0"
@@ -4305,9 +5065,9 @@
}
},
"node_modules/b4a": {
- "version": "1.7.3",
- "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz",
- "integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==",
+ "version": "1.8.0",
+ "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.0.tgz",
+ "integrity": "sha512-qRuSmNSkGQaHwNbM7J78Wwy+ghLEYF1zNrSeMxj4Kgw6y33O3mXcQ6Ie9fRvfU/YnxWkOchPXbaLb73TkIsfdg==",
"license": "Apache-2.0",
"peerDependencies": {
"react-native-b4a": "*"
@@ -4349,11 +5109,10 @@
}
},
"node_modules/bare-fs": {
- "version": "4.5.2",
- "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
- "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
+ "version": "4.6.0",
+ "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.6.0.tgz",
+ "integrity": "sha512-2YkS7NuiJceSEbyEOdSNLE9tsGd+f4+f7C+Nik/MCk27SYdwIMPT/yRKvg++FZhQXgk0KWJKJyXX9RhVV0RGqA==",
"license": "Apache-2.0",
- "optional": true,
"dependencies": {
"bare-events": "^2.5.4",
"bare-path": "^3.0.0",
@@ -4374,11 +5133,10 @@
}
},
"node_modules/bare-os": {
- "version": "3.6.2",
- "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
- "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
+ "version": "3.8.7",
+ "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.8.7.tgz",
+ "integrity": "sha512-G4Gr1UsGeEy2qtDTZwL7JFLo2wapUarz7iTMcYcMFdS89AIQuBoyjgXZz0Utv7uHs3xA9LckhVbeBi8lEQrC+w==",
"license": "Apache-2.0",
- "optional": true,
"engines": {
"bare": ">=1.14.0"
}
@@ -4388,25 +5146,28 @@
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
"license": "Apache-2.0",
- "optional": true,
"dependencies": {
"bare-os": "^3.0.1"
}
},
"node_modules/bare-stream": {
- "version": "2.7.0",
- "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
- "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
+ "version": "2.12.0",
+ "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.12.0.tgz",
+ "integrity": "sha512-w28i8lkBgREV3rPXGbgK+BO66q+ZpKqRWrZLiCdmmUlLPrQ45CzkvRhN+7lnv00Gpi2zy5naRxnUFAxCECDm9g==",
"license": "Apache-2.0",
- "optional": true,
"dependencies": {
- "streamx": "^2.21.0"
+ "streamx": "^2.25.0",
+ "teex": "^1.0.1"
},
"peerDependencies": {
+ "bare-abort-controller": "*",
"bare-buffer": "*",
"bare-events": "*"
},
"peerDependenciesMeta": {
+ "bare-abort-controller": {
+ "optional": true
+ },
"bare-buffer": {
"optional": true
},
@@ -4416,11 +5177,10 @@
}
},
"node_modules/bare-url": {
- "version": "2.3.2",
- "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
- "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
+ "version": "2.4.0",
+ "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.4.0.tgz",
+ "integrity": "sha512-NSTU5WN+fy/L0DDenfE8SXQna4voXuW0FHM7wH8i3/q9khUSchfPbPezO4zSFMnDGIf9YE+mt/RWhZgNRKRIXA==",
"license": "Apache-2.0",
- "optional": true,
"dependencies": {
"bare-path": "^3.0.0"
}
@@ -4455,9 +5215,9 @@
}
},
"node_modules/basic-ftp": {
- "version": "5.1.0",
- "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.1.0.tgz",
- "integrity": "sha512-RkaJzeJKDbaDWTIPiJwubyljaEPwpVWkm9Rt5h9Nd6h7tEXTJ3VB4qxdZBioV7JO5yLUaOKwz7vDOzlncUsegw==",
+ "version": "5.3.0",
+ "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.3.0.tgz",
+ "integrity": "sha512-5K9eNNn7ywHPsYnFwjKgYH8Hf8B5emh7JKcPaVjjrMJFQQwGpwowEnZNEtHs7DfR7hCZsmaK3VA4HUK0YarT+w==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"
@@ -4487,6 +5247,18 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
+ "node_modules/bl": {
+ "version": "4.1.0",
+ "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
+ "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
+ "license": "MIT",
+ "optional": true,
+ "dependencies": {
+ "buffer": "^5.5.0",
+ "inherits": "^2.0.4",
+ "readable-stream": "^3.4.0"
+ }
+ },
"node_modules/body-parser": {
"version": "1.20.1",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz",
@@ -4539,9 +5311,9 @@
"license": "MIT"
},
"node_modules/brace-expansion": {
- "version": "1.1.12",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
- "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+ "version": "1.1.13",
+ "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz",
+ "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==",
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0",
@@ -5196,9 +5968,9 @@
}
},
"node_modules/cosmiconfig": {
- "version": "9.0.0",
- "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.0.tgz",
- "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==",
+ "version": "9.0.1",
+ "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.1.tgz",
+ "integrity": "sha512-hr4ihw+DBqcvrsEDioRO31Z17x71pUYoNe/4h6Z0wB72p7MU7/9gH8Q3s12NFhHPfYBBOV3qyfUxmr/Yn3shnQ==",
"license": "MIT",
"dependencies": {
"env-paths": "^2.2.1",
@@ -5221,6 +5993,20 @@
}
}
},
+ "node_modules/cross-spawn": {
+ "version": "7.0.6",
+ "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+ "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
+ "license": "MIT",
+ "dependencies": {
+ "path-key": "^3.1.0",
+ "shebang-command": "^2.0.0",
+ "which": "^2.0.1"
+ },
+ "engines": {
+ "node": ">= 8"
+ }
+ },
"node_modules/cssesc": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz",
@@ -5233,6 +6019,12 @@
"node": ">=4"
}
},
+ "node_modules/cssfilter": {
+ "version": "0.0.10",
+ "resolved": "https://registry.npmjs.org/cssfilter/-/cssfilter-0.0.10.tgz",
+ "integrity": "sha512-FAaLDaplstoRsDR8XGYH51znUN0UY7nMc6Z9/fvE8EXGwvJE9hu7W2vHwx1+bd6gCYnln9nLbzxFTrcO9YQDZw==",
+ "license": "MIT"
+ },
"node_modules/csstype": {
"version": "3.2.3",
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
@@ -5384,6 +6176,16 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
+ "node_modules/deep-extend": {
+ "version": "0.6.0",
+ "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
+ "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
+ "license": "MIT",
+ "optional": true,
+ "engines": {
+ "node": ">=4.0.0"
+ }
+ },
"node_modules/defer-to-connect": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz",
@@ -5745,9 +6547,9 @@
}
},
"node_modules/es-abstract": {
- "version": "1.24.1",
- "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.24.1.tgz",
- "integrity": "sha512-zHXBLhP+QehSSbsS9Pt23Gg964240DPd6QCf8WpkqEXxQ7fhdZzYsocOr5u7apWonsS5EjZDmTF+/slGMyasvw==",
+ "version": "1.24.2",
+ "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.24.2.tgz",
+ "integrity": "sha512-2FpH9Q5i2RRwyEP1AylXe6nYLR5OhaJTZwmlcP0dL/+JCbgg7yyEo/sEK6HeGZRf3dFpWwThaRHVApXSkW3xeg==",
"license": "MIT",
"dependencies": {
"array-buffer-byte-length": "^1.0.2",
@@ -5897,9 +6699,9 @@
}
},
"node_modules/es-toolkit": {
- "version": "1.44.0",
- "resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.44.0.tgz",
- "integrity": "sha512-6penXeZalaV88MM3cGkFZZfOoLGWshWWfdy0tWw/RlVVyhvMaWSBTOvXNeiW3e5FwdS5ePW0LGEu17zT139ktg==",
+ "version": "1.45.1",
+ "resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.45.1.tgz",
+ "integrity": "sha512-/jhoOj/Fx+A+IIyDNOvO3TItGmlMKhtX8ISAHKE90c4b/k1tqaqEZ+uUqfpU8DMnW5cgNJv606zS55jGvza0Xw==",
"license": "MIT",
"workspaces": [
"docs",
@@ -6145,6 +6947,16 @@
"bare-events": "^2.7.0"
}
},
+ "node_modules/expand-template": {
+ "version": "2.0.3",
+ "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
+ "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
+ "license": "(MIT OR WTFPL)",
+ "optional": true,
+ "engines": {
+ "node": ">=6"
+ }
+ },
"node_modules/express": {
"version": "4.18.2",
"resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz",
@@ -6412,6 +7224,9 @@
"cpu": [
"arm"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -6428,6 +7243,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -6444,6 +7262,9 @@
"cpu": [
"s390x"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -6460,6 +7281,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -6476,6 +7300,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "musl"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -6492,6 +7319,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "musl"
+ ],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -6508,6 +7338,9 @@
"cpu": [
"arm"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -6530,6 +7363,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -6552,6 +7388,9 @@
"cpu": [
"s390x"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -6574,6 +7413,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "glibc"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -6596,6 +7438,9 @@
"cpu": [
"arm64"
],
+ "libc": [
+ "musl"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -6618,6 +7463,9 @@
"cpu": [
"x64"
],
+ "libc": [
+ "musl"
+ ],
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -6784,9 +7632,9 @@
"license": "MIT"
},
"node_modules/follow-redirects": {
- "version": "1.15.11",
- "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
- "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
+ "version": "1.16.0",
+ "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.16.0.tgz",
+ "integrity": "sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==",
"funding": [
{
"type": "individual",
@@ -6900,6 +7748,13 @@
"js-yaml": "bin/js-yaml.js"
}
},
+ "node_modules/fs-constants": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
+ "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
+ "license": "MIT",
+ "optional": true
+ },
"node_modules/fs-extra": {
"version": "11.2.0",
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz",
@@ -7015,9 +7870,9 @@
}
},
"node_modules/get-east-asian-width": {
- "version": "1.4.0",
- "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.4.0.tgz",
- "integrity": "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==",
+ "version": "1.5.0",
+ "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.5.0.tgz",
+ "integrity": "sha512-CQ+bEO+Tva/qlmw24dCejulK5pMzVnUOFOijVogd3KQs07HnRIgp8TGipvCCRT06xeYEbpbgwaCxglFyiuIcmA==",
"license": "MIT",
"engines": {
"node": ">=18"
@@ -7116,6 +7971,13 @@
"node": ">= 14"
}
},
+ "node_modules/github-from-package": {
+ "version": "0.0.0",
+ "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
+ "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==",
+ "license": "MIT",
+ "optional": true
+ },
"node_modules/glob-parent": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
@@ -7785,6 +8647,13 @@
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
"license": "ISC"
},
+ "node_modules/ini": {
+ "version": "1.3.8",
+ "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
+ "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
+ "license": "ISC",
+ "optional": true
+ },
"node_modules/ink": {
"version": "6.3.0",
"resolved": "https://registry.npmjs.org/ink/-/ink-6.3.0.tgz",
@@ -8537,6 +9406,12 @@
"integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==",
"license": "MIT"
},
+ "node_modules/isexe": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
+ "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
+ "license": "ISC"
+ },
"node_modules/jiti": {
"version": "1.21.7",
"resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz",
@@ -8546,6 +9421,15 @@
"jiti": "bin/jiti.js"
}
},
+ "node_modules/jose": {
+ "version": "6.2.2",
+ "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz",
+ "integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==",
+ "license": "MIT",
+ "funding": {
+ "url": "https://github.com/sponsors/panva"
+ }
+ },
"node_modules/js-tokens": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -8610,9 +9494,9 @@
}
},
"node_modules/jsonpath-plus": {
- "version": "10.3.0",
- "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
- "integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
+ "version": "10.4.0",
+ "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.4.0.tgz",
+ "integrity": "sha512-T92WWatJXmhBbKsgH/0hl+jxjdXrifi5IKeMY02DWggRxX0UElcbVzPlmgLTbvsPeW1PasQ6xE2Q75stkhGbsA==",
"license": "MIT",
"dependencies": {
"@jsep-plugin/assignment": "^1.3.0",
@@ -8637,9 +9521,9 @@
}
},
"node_modules/katex": {
- "version": "0.16.28",
- "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.28.tgz",
- "integrity": "sha512-YHzO7721WbmAL6Ov1uzN/l5mY5WWWhJBSW+jq4tkfZfsxmo1hu6frS0EOswvjBUnWE6NtjEs48SFn5CQESRLZg==",
+ "version": "0.16.45",
+ "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.45.tgz",
+ "integrity": "sha512-pQpZbdBu7wCTmQUh7ufPmLr0pFoObnGUoL/yhtwJDgmmQpbkg/0HSVti25Fu4rmd1oCR6NGWe9vqTWuWv3GcNA==",
"funding": [
"https://opencollective.com/katex",
"https://github.com/sponsors/katex"
@@ -8652,6 +9536,18 @@
"katex": "cli.js"
}
},
+ "node_modules/keytar": {
+ "version": "7.9.0",
+ "resolved": "https://registry.npmjs.org/keytar/-/keytar-7.9.0.tgz",
+ "integrity": "sha512-VPD8mtVtm5JNtA2AErl6Chp06JBfy7diFQ7TQQhdpWOl6MrCRB+eRbvAZUsbGQS9kiMq0coJsy0W0vHpDCkWsQ==",
+ "hasInstallScript": true,
+ "license": "MIT",
+ "optional": true,
+ "dependencies": {
+ "node-addon-api": "^4.3.0",
+ "prebuild-install": "^7.0.1"
+ }
+ },
"node_modules/keyv": {
"version": "4.5.4",
"resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
@@ -8683,12 +9579,15 @@
}
},
"node_modules/lilconfig": {
- "version": "2.1.0",
- "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz",
- "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==",
+ "version": "3.1.3",
+ "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
+ "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
"license": "MIT",
"engines": {
- "node": ">=10"
+ "node": ">=14"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/antonk52"
}
},
"node_modules/lines-and-columns": {
@@ -9973,6 +10872,16 @@
"node": "*"
}
},
+ "node_modules/minimist": {
+ "version": "1.2.8",
+ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
+ "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
+ "license": "MIT",
+ "optional": true,
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/minipass": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz",
@@ -10008,15 +10917,14 @@
}
},
"node_modules/mintlify": {
- "version": "4.2.296",
- "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.2.296.tgz",
- "integrity": "sha512-OX9eflJeueAnCEZQoQw9Q3YzJXoi5pK8W94DmDxSnsog9mJeVnsTrOXHAnN0Qq1Zxw+hM8ylxzO3qAk7ERxIZA==",
+ "version": "4.2.500",
+ "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.2.500.tgz",
+ "integrity": "sha512-pVuzf4F+JRmVCuQZLQebIlggCzWQyHsnPiAbuUoJ8aofsKbbs30woRQznoeCmzgmzDxBk25xPay9yy4GRPRlOw==",
"license": "Elastic-2.0",
"dependencies": {
- "@mintlify/cli": "4.0.900"
+ "@mintlify/cli": "4.0.1103"
},
"bin": {
- "mint": "index.js",
"mintlify": "index.js"
},
"engines": {
@@ -10041,6 +10949,13 @@
"node": ">=10"
}
},
+ "node_modules/mkdirp-classic": {
+ "version": "0.5.3",
+ "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
+ "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==",
+ "license": "MIT",
+ "optional": true
+ },
"node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -10085,6 +11000,13 @@
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
}
},
+ "node_modules/napi-build-utils": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz",
+ "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==",
+ "license": "MIT",
+ "optional": true
+ },
"node_modules/negotiator": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
@@ -10104,9 +11026,9 @@
}
},
"node_modules/netmask": {
- "version": "2.0.2",
- "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz",
- "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==",
+ "version": "2.1.0",
+ "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.1.0.tgz",
+ "integrity": "sha512-z9sZrk6wyf8/NDKKqe+Tyl58XtgkYrV4kgt1O8xrzYvpl1LvPacPo0imMLHfpStk3kgCIq1ksJ2bmJn9hue2lQ==",
"license": "MIT",
"engines": {
"node": ">= 0.4.0"
@@ -10144,6 +11066,26 @@
"url": "https://opencollective.com/unified"
}
},
+ "node_modules/node-abi": {
+ "version": "3.89.0",
+ "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz",
+ "integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==",
+ "license": "MIT",
+ "optional": true,
+ "dependencies": {
+ "semver": "^7.3.5"
+ },
+ "engines": {
+ "node": ">=10"
+ }
+ },
+ "node_modules/node-addon-api": {
+ "version": "4.3.0",
+ "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-4.3.0.tgz",
+ "integrity": "sha512-73sE9+3UaLYYFmDsFZnqCInzPyh3MqIwZO9cw58yIqAZhONrrabrYyYe3TuIqtIiOuTXVhsGau8hcrhhwSsDIQ==",
+ "license": "MIT",
+ "optional": true
+ },
"node_modules/node-fetch": {
"version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
@@ -10164,6 +11106,18 @@
}
}
},
+ "node_modules/non-error": {
+ "version": "0.1.0",
+ "resolved": "https://registry.npmjs.org/non-error/-/non-error-0.1.0.tgz",
+ "integrity": "sha512-TMB1uHiGsHRGv1uYclfhivcnf0/PdFp2pNqRxXjncaAsjYMoisaQJI+SSZCqRq+VliwRTC8tsMQfmrWjDMhkPQ==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=20"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/sindresorhus"
+ }
+ },
"node_modules/normalize-path": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
@@ -10185,6 +11139,15 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
+ "node_modules/oauth4webapi": {
+ "version": "3.8.5",
+ "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.5.tgz",
+ "integrity": "sha512-A8jmyUckVhRJj5lspguklcl90Ydqk61H3dcU0oLhH3Yv13KpAliKTt5hknpGGPZSSfOwGyraNEFmofDYH+1kSg==",
+ "license": "MIT",
+ "funding": {
+ "url": "https://github.com/sponsors/panva"
+ }
+ },
"node_modules/object-assign": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
@@ -10287,13 +11250,13 @@
"license": "MIT"
},
"node_modules/oniguruma-to-es": {
- "version": "4.3.4",
- "resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.4.tgz",
- "integrity": "sha512-3VhUGN3w2eYxnTzHn+ikMI+fp/96KoRSVK9/kMTcFqj1NRDh2IhQCKvYxDnWePKRXY/AqH+Fuiyb7VHSzBjHfA==",
+ "version": "4.3.5",
+ "resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.5.tgz",
+ "integrity": "sha512-Zjygswjpsewa0NLTsiizVuMQZbp0MDyM6lIt66OxsF21npUDlzpHi1Mgb/qhQdkb+dWFTzJmFbEWdvZgRho8eQ==",
"license": "MIT",
"dependencies": {
"oniguruma-parser": "^0.12.1",
- "regex": "^6.0.1",
+ "regex": "^6.1.0",
"regex-recursion": "^6.0.2"
}
},
@@ -10320,6 +11283,19 @@
"integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==",
"license": "MIT"
},
+ "node_modules/openid-client": {
+ "version": "6.8.2",
+ "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.2.tgz",
+ "integrity": "sha512-uOvTCndr4udZsKihJ68H9bUICrriHdUVJ6Az+4Ns6cW55rwM5h0bjVIzDz2SxgOI84LKjFyjOFvERLzdTUROGA==",
+ "license": "MIT",
+ "dependencies": {
+ "jose": "^6.1.3",
+ "oauth4webapi": "^3.8.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/panva"
+ }
+ },
"node_modules/own-keys": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz",
@@ -10525,6 +11501,15 @@
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
}
},
+ "node_modules/path-key": {
+ "version": "3.1.1",
+ "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
+ "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=8"
+ }
+ },
"node_modules/path-parse": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
@@ -10550,9 +11535,9 @@
"license": "ISC"
},
"node_modules/picomatch": {
- "version": "2.3.1",
- "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
- "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+ "version": "2.3.2",
+ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+ "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
"license": "MIT",
"engines": {
"node": ">=8.6"
@@ -10668,9 +11653,9 @@
}
},
"node_modules/postcss-load-config": {
- "version": "4.0.2",
- "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz",
- "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==",
+ "version": "6.0.1",
+ "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-6.0.1.tgz",
+ "integrity": "sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==",
"funding": [
{
"type": "opencollective",
@@ -10683,37 +11668,32 @@
],
"license": "MIT",
"dependencies": {
- "lilconfig": "^3.0.0",
- "yaml": "^2.3.4"
+ "lilconfig": "^3.1.1"
},
"engines": {
- "node": ">= 14"
+ "node": ">= 18"
},
"peerDependencies": {
+ "jiti": ">=1.21.0",
"postcss": ">=8.0.9",
- "ts-node": ">=9.0.0"
+ "tsx": "^4.8.1",
+ "yaml": "^2.4.2"
},
"peerDependenciesMeta": {
+ "jiti": {
+ "optional": true
+ },
"postcss": {
"optional": true
},
- "ts-node": {
+ "tsx": {
+ "optional": true
+ },
+ "yaml": {
"optional": true
}
}
},
- "node_modules/postcss-load-config/node_modules/lilconfig": {
- "version": "3.1.3",
- "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
- "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
- "license": "MIT",
- "engines": {
- "node": ">=14"
- },
- "funding": {
- "url": "https://github.com/sponsors/antonk52"
- }
- },
"node_modules/postcss-nested": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz",
@@ -10745,19 +11725,59 @@
"integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==",
"license": "MIT",
"dependencies": {
- "cssesc": "^3.0.0",
- "util-deprecate": "^1.0.2"
+ "cssesc": "^3.0.0",
+ "util-deprecate": "^1.0.2"
+ },
+ "engines": {
+ "node": ">=4"
+ }
+ },
+ "node_modules/postcss-value-parser": {
+ "version": "4.2.0",
+ "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz",
+ "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==",
+ "license": "MIT"
+ },
+ "node_modules/posthog-node": {
+ "version": "5.17.2",
+ "resolved": "https://registry.npmjs.org/posthog-node/-/posthog-node-5.17.2.tgz",
+ "integrity": "sha512-lz3YJOr0Nmiz0yHASaINEDHqoV+0bC3eD8aZAG+Ky292dAnVYul+ga/dMX8KCBXg8hHfKdxw0SztYD5j6dgUqQ==",
+ "license": "MIT",
+ "dependencies": {
+ "@posthog/core": "1.7.1"
+ },
+ "engines": {
+ "node": ">=20"
+ }
+ },
+ "node_modules/prebuild-install": {
+ "version": "7.1.3",
+ "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
+ "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==",
+ "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.",
+ "license": "MIT",
+ "optional": true,
+ "dependencies": {
+ "detect-libc": "^2.0.0",
+ "expand-template": "^2.0.3",
+ "github-from-package": "0.0.0",
+ "minimist": "^1.2.3",
+ "mkdirp-classic": "^0.5.3",
+ "napi-build-utils": "^2.0.0",
+ "node-abi": "^3.3.0",
+ "pump": "^3.0.0",
+ "rc": "^1.2.7",
+ "simple-get": "^4.0.0",
+ "tar-fs": "^2.0.0",
+ "tunnel-agent": "^0.6.0"
+ },
+ "bin": {
+ "prebuild-install": "bin.js"
},
"engines": {
- "node": ">=4"
+ "node": ">=10"
}
},
- "node_modules/postcss-value-parser": {
- "version": "4.2.0",
- "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz",
- "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==",
- "license": "MIT"
- },
"node_modules/progress": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
@@ -10858,9 +11878,9 @@
}
},
"node_modules/pump": {
- "version": "3.0.3",
- "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz",
- "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==",
+ "version": "3.0.4",
+ "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz",
+ "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==",
"license": "MIT",
"dependencies": {
"end-of-stream": "^1.1.0",
@@ -10986,6 +12006,22 @@
"node": ">=0.10.0"
}
},
+ "node_modules/rc": {
+ "version": "1.2.8",
+ "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
+ "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
+ "license": "(BSD-2-Clause OR MIT OR Apache-2.0)",
+ "optional": true,
+ "dependencies": {
+ "deep-extend": "^0.6.0",
+ "ini": "~1.3.0",
+ "minimist": "^1.2.0",
+ "strip-json-comments": "~2.0.1"
+ },
+ "bin": {
+ "rc": "cli.js"
+ }
+ },
"node_modules/react": {
"version": "19.2.3",
"resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz",
@@ -10995,26 +12031,6 @@
"node": ">=0.10.0"
}
},
- "node_modules/react-dom": {
- "version": "19.2.3",
- "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz",
- "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==",
- "license": "MIT",
- "peer": true,
- "dependencies": {
- "scheduler": "^0.27.0"
- },
- "peerDependencies": {
- "react": "^19.2.3"
- }
- },
- "node_modules/react-dom/node_modules/scheduler": {
- "version": "0.27.0",
- "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
- "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==",
- "license": "MIT",
- "peer": true
- },
"node_modules/react-reconciler": {
"version": "0.32.0",
"resolved": "https://registry.npmjs.org/react-reconciler/-/react-reconciler-0.32.0.tgz",
@@ -11132,6 +12148,21 @@
"pify": "^2.3.0"
}
},
+ "node_modules/readable-stream": {
+ "version": "3.6.2",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
+ "license": "MIT",
+ "optional": true,
+ "dependencies": {
+ "inherits": "^2.0.3",
+ "string_decoder": "^1.1.1",
+ "util-deprecate": "^1.0.1"
+ },
+ "engines": {
+ "node": ">= 6"
+ }
+ },
"node_modules/readdirp": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
@@ -11436,9 +12467,9 @@
}
},
"node_modules/remark-mdx-remove-esm": {
- "version": "1.2.2",
- "resolved": "https://registry.npmjs.org/remark-mdx-remove-esm/-/remark-mdx-remove-esm-1.2.2.tgz",
- "integrity": "sha512-YSaUwqiuJuD6S9XTAD6zmO4JJJZJgsRAdsl2drZO8/ssAVv0HXAg4vkSgHZAP46ORh8ERPFQrC7JWlbkwBwu1A==",
+ "version": "1.3.1",
+ "resolved": "https://registry.npmjs.org/remark-mdx-remove-esm/-/remark-mdx-remove-esm-1.3.1.tgz",
+ "integrity": "sha512-POa8abdiuicD2e+zQkclxzJa5JEGLtV8XIOFVvisnGuw4l4xd6dfQozedwqR8JTeXQmxLebvYhlbwHoQP9RWkw==",
"license": "MIT",
"dependencies": {
"@types/mdast": "^4.0.4",
@@ -11805,9 +12836,9 @@
"license": "MIT"
},
"node_modules/sax": {
- "version": "1.4.4",
- "resolved": "https://registry.npmjs.org/sax/-/sax-1.4.4.tgz",
- "integrity": "sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==",
+ "version": "1.6.0",
+ "resolved": "https://registry.npmjs.org/sax/-/sax-1.6.0.tgz",
+ "integrity": "sha512-6R3J5M4AcbtLUdZmRv2SygeVaM7IhrLXu9BmnOGmmACak8fiUtOsYNWUS4uK7upbmHIBbLBeFeI//477BKLBzA==",
"license": "BlueOak-1.0.0",
"engines": {
"node": ">=11.0.0"
@@ -11871,15 +12902,31 @@
"license": "MIT"
},
"node_modules/serialize-error": {
- "version": "12.0.0",
- "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-12.0.0.tgz",
- "integrity": "sha512-ZYkZLAvKTKQXWuh5XpBw7CdbSzagarX39WyZ2H07CDLC5/KfsRGlIXV8d4+tfqX1M7916mRqR1QfNHSij+c9Pw==",
+ "version": "13.0.1",
+ "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-13.0.1.tgz",
+ "integrity": "sha512-bBZaRwLH9PN5HbLCjPId4dP5bNGEtumcErgOX952IsvOhVPrm3/AeK1y0UHA/QaPG701eg0yEnOKsCOC6X/kaA==",
"license": "MIT",
"dependencies": {
- "type-fest": "^4.31.0"
+ "non-error": "^0.1.0",
+ "type-fest": "^5.4.1"
},
"engines": {
- "node": ">=18"
+ "node": ">=20"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/sindresorhus"
+ }
+ },
+ "node_modules/serialize-error/node_modules/type-fest": {
+ "version": "5.5.0",
+ "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-5.5.0.tgz",
+ "integrity": "sha512-PlBfpQwiUvGViBNX84Yxwjsdhd1TUlXr6zjX7eoirtCPIr08NAmxwa+fcYBTeRQxHo9YC9wwF3m9i700sHma8g==",
+ "license": "(MIT OR CC0-1.0)",
+ "dependencies": {
+ "tagged-tag": "^1.0.0"
+ },
+ "engines": {
+ "node": ">=20"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
@@ -12008,9 +13055,9 @@
}
},
"node_modules/sharp/node_modules/semver": {
- "version": "7.7.3",
- "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
- "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+ "version": "7.7.4",
+ "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
+ "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
"license": "ISC",
"bin": {
"semver": "bin/semver.js"
@@ -12019,18 +13066,39 @@
"node": ">=10"
}
},
+ "node_modules/shebang-command": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
+ "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
+ "license": "MIT",
+ "dependencies": {
+ "shebang-regex": "^3.0.0"
+ },
+ "engines": {
+ "node": ">=8"
+ }
+ },
+ "node_modules/shebang-regex": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
+ "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=8"
+ }
+ },
"node_modules/shiki": {
- "version": "3.21.0",
- "resolved": "https://registry.npmjs.org/shiki/-/shiki-3.21.0.tgz",
- "integrity": "sha512-N65B/3bqL/TI2crrXr+4UivctrAGEjmsib5rPMMPpFp1xAx/w03v8WZ9RDDFYteXoEgY7qZ4HGgl5KBIu1153w==",
+ "version": "3.23.0",
+ "resolved": "https://registry.npmjs.org/shiki/-/shiki-3.23.0.tgz",
+ "integrity": "sha512-55Dj73uq9ZXL5zyeRPzHQsK7Nbyt6Y10k5s7OjuFZGMhpp4r/rsLBH0o/0fstIzX1Lep9VxefWljK/SKCzygIA==",
"license": "MIT",
"dependencies": {
- "@shikijs/core": "3.21.0",
- "@shikijs/engine-javascript": "3.21.0",
- "@shikijs/engine-oniguruma": "3.21.0",
- "@shikijs/langs": "3.21.0",
- "@shikijs/themes": "3.21.0",
- "@shikijs/types": "3.21.0",
+ "@shikijs/core": "3.23.0",
+ "@shikijs/engine-javascript": "3.23.0",
+ "@shikijs/engine-oniguruma": "3.23.0",
+ "@shikijs/langs": "3.23.0",
+ "@shikijs/themes": "3.23.0",
+ "@shikijs/types": "3.23.0",
"@shikijs/vscode-textmate": "^10.0.2",
"@types/hast": "^3.0.4"
}
@@ -12119,6 +13187,27 @@
"url": "https://github.com/sponsors/isaacs"
}
},
+ "node_modules/simple-concat": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
+ "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/feross"
+ },
+ {
+ "type": "patreon",
+ "url": "https://www.patreon.com/feross"
+ },
+ {
+ "type": "consulting",
+ "url": "https://feross.org/support"
+ }
+ ],
+ "license": "MIT",
+ "optional": true
+ },
"node_modules/simple-eval": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/simple-eval/-/simple-eval-1.0.1.tgz",
@@ -12131,6 +13220,32 @@
"node": ">=12"
}
},
+ "node_modules/simple-get": {
+ "version": "4.0.1",
+ "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
+ "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/feross"
+ },
+ {
+ "type": "patreon",
+ "url": "https://www.patreon.com/feross"
+ },
+ {
+ "type": "consulting",
+ "url": "https://feross.org/support"
+ }
+ ],
+ "license": "MIT",
+ "optional": true,
+ "dependencies": {
+ "decompress-response": "^6.0.0",
+ "once": "^1.3.1",
+ "simple-concat": "^1.0.0"
+ }
+ },
"node_modules/simple-swizzle": {
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.4.tgz",
@@ -12222,9 +13337,9 @@
}
},
"node_modules/socket.io-parser": {
- "version": "4.2.5",
- "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.5.tgz",
- "integrity": "sha512-bPMmpy/5WWKHea5Y/jYAP6k74A+hvmRCQaJuJB6I/ML5JZq/KfNieUVo/3Mh7SAqn7TyFdIo6wqYHInG1MU1bQ==",
+ "version": "4.2.6",
+ "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.6.tgz",
+ "integrity": "sha512-asJqbVBDsBCJx0pTqw3WfesSY0iRX+2xzWEWzrpcH7L6fLzrhyF8WPI8UaeM4YCuDfpwA/cgsdugMsmtz8EJeg==",
"license": "MIT",
"dependencies": {
"@socket.io/component-emitter": "~3.1.0",
@@ -12357,9 +13472,9 @@
}
},
"node_modules/streamx": {
- "version": "2.23.0",
- "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
- "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==",
+ "version": "2.25.0",
+ "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.25.0.tgz",
+ "integrity": "sha512-0nQuG6jf1w+wddNEEXCF4nTg3LtufWINB5eFEN+5TNZW7KWJp6x87+JFL43vaAUPyCfH1wID+mNVyW6OHtFamg==",
"license": "MIT",
"dependencies": {
"events-universal": "^1.0.0",
@@ -12367,6 +13482,16 @@
"text-decoder": "^1.1.0"
}
},
+ "node_modules/string_decoder": {
+ "version": "1.3.0",
+ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
+ "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
+ "license": "MIT",
+ "optional": true,
+ "dependencies": {
+ "safe-buffer": "~5.2.0"
+ }
+ },
"node_modules/string-width": {
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz",
@@ -12455,12 +13580,12 @@
}
},
"node_modules/strip-ansi": {
- "version": "7.1.2",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz",
- "integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==",
+ "version": "7.2.0",
+ "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.2.0.tgz",
+ "integrity": "sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==",
"license": "MIT",
"dependencies": {
- "ansi-regex": "^6.0.1"
+ "ansi-regex": "^6.2.2"
},
"engines": {
"node": ">=12"
@@ -12469,6 +13594,16 @@
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
}
},
+ "node_modules/strip-json-comments": {
+ "version": "2.0.1",
+ "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
+ "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
+ "license": "MIT",
+ "optional": true,
+ "engines": {
+ "node": ">=0.10.0"
+ }
+ },
"node_modules/style-to-js": {
"version": "1.1.21",
"resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.21.tgz",
@@ -12530,34 +13665,46 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/tagged-tag": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/tagged-tag/-/tagged-tag-1.0.0.tgz",
+ "integrity": "sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=20"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/sindresorhus"
+ }
+ },
"node_modules/tailwindcss": {
- "version": "3.4.4",
- "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.4.tgz",
- "integrity": "sha512-ZoyXOdJjISB7/BcLTR6SEsLgKtDStYyYZVLsUtWChO4Ps20CBad7lfJKVDiejocV4ME1hLmyY0WJE3hSDcmQ2A==",
+ "version": "3.4.19",
+ "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.19.tgz",
+ "integrity": "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==",
"license": "MIT",
"dependencies": {
"@alloc/quick-lru": "^5.2.0",
"arg": "^5.0.2",
- "chokidar": "^3.5.3",
+ "chokidar": "^3.6.0",
"didyoumean": "^1.2.2",
"dlv": "^1.1.3",
- "fast-glob": "^3.3.0",
+ "fast-glob": "^3.3.2",
"glob-parent": "^6.0.2",
"is-glob": "^4.0.3",
- "jiti": "^1.21.0",
- "lilconfig": "^2.1.0",
- "micromatch": "^4.0.5",
+ "jiti": "^1.21.7",
+ "lilconfig": "^3.1.3",
+ "micromatch": "^4.0.8",
"normalize-path": "^3.0.0",
"object-hash": "^3.0.0",
- "picocolors": "^1.0.0",
- "postcss": "^8.4.23",
+ "picocolors": "^1.1.1",
+ "postcss": "^8.4.47",
"postcss-import": "^15.1.0",
"postcss-js": "^4.0.1",
- "postcss-load-config": "^4.0.1",
- "postcss-nested": "^6.0.1",
- "postcss-selector-parser": "^6.0.11",
- "resolve": "^1.22.2",
- "sucrase": "^3.32.0"
+ "postcss-load-config": "^4.0.2 || ^5.0 || ^6.0",
+ "postcss-nested": "^6.2.0",
+ "postcss-selector-parser": "^6.1.2",
+ "resolve": "^1.22.8",
+ "sucrase": "^3.35.0"
},
"bin": {
"tailwind": "lib/cli.js",
@@ -12567,6 +13714,42 @@
"node": ">=14.0.0"
}
},
+ "node_modules/tailwindcss/node_modules/chokidar": {
+ "version": "3.6.0",
+ "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
+ "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
+ "license": "MIT",
+ "dependencies": {
+ "anymatch": "~3.1.2",
+ "braces": "~3.0.2",
+ "glob-parent": "~5.1.2",
+ "is-binary-path": "~2.1.0",
+ "is-glob": "~4.0.1",
+ "normalize-path": "~3.0.0",
+ "readdirp": "~3.6.0"
+ },
+ "engines": {
+ "node": ">= 8.10.0"
+ },
+ "funding": {
+ "url": "https://paulmillr.com/funding/"
+ },
+ "optionalDependencies": {
+ "fsevents": "~2.3.2"
+ }
+ },
+ "node_modules/tailwindcss/node_modules/chokidar/node_modules/glob-parent": {
+ "version": "5.1.2",
+ "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+ "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+ "license": "ISC",
+ "dependencies": {
+ "is-glob": "^4.0.1"
+ },
+ "engines": {
+ "node": ">= 6"
+ }
+ },
"node_modules/tailwindcss/node_modules/glob-parent": {
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
@@ -12583,7 +13766,7 @@
"version": "6.1.15",
"resolved": "https://registry.npmjs.org/tar/-/tar-6.1.15.tgz",
"integrity": "sha512-/zKt9UyngnxIT/EAGYuxaMYgOIJiP81ab9ZfkILq4oNLPFX50qyYmu7jRj9qeXoxmJHjGlbH0+cm2uy1WCs10A==",
- "deprecated": "Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exhorbitant rates) by contacting i@izs.me",
+ "deprecated": "Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me",
"license": "ISC",
"dependencies": {
"chownr": "^2.0.0",
@@ -12598,34 +13781,55 @@
}
},
"node_modules/tar-fs": {
- "version": "3.1.1",
- "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
- "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
+ "version": "2.1.4",
+ "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz",
+ "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==",
"license": "MIT",
+ "optional": true,
"dependencies": {
+ "chownr": "^1.1.1",
+ "mkdirp-classic": "^0.5.2",
"pump": "^3.0.0",
- "tar-stream": "^3.1.5"
- },
- "optionalDependencies": {
- "bare-fs": "^4.0.1",
- "bare-path": "^3.0.0"
+ "tar-stream": "^2.1.4"
}
},
+ "node_modules/tar-fs/node_modules/chownr": {
+ "version": "1.1.4",
+ "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
+ "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
+ "license": "ISC",
+ "optional": true
+ },
"node_modules/tar-stream": {
- "version": "3.1.7",
- "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz",
- "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==",
+ "version": "2.2.0",
+ "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
+ "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
"license": "MIT",
+ "optional": true,
"dependencies": {
- "b4a": "^1.6.4",
- "fast-fifo": "^1.2.0",
- "streamx": "^2.15.0"
+ "bl": "^4.0.3",
+ "end-of-stream": "^1.4.1",
+ "fs-constants": "^1.0.0",
+ "inherits": "^2.0.3",
+ "readable-stream": "^3.1.1"
+ },
+ "engines": {
+ "node": ">=6"
+ }
+ },
+ "node_modules/teex": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/teex/-/teex-1.0.1.tgz",
+ "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==",
+ "license": "MIT",
+ "dependencies": {
+ "streamx": "^2.12.5"
}
},
"node_modules/text-decoder": {
- "version": "1.2.3",
- "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
- "integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==",
+ "version": "1.2.7",
+ "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz",
+ "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==",
"license": "Apache-2.0",
"dependencies": {
"b4a": "^1.6.4"
@@ -12659,13 +13863,13 @@
"license": "MIT"
},
"node_modules/tinyglobby": {
- "version": "0.2.15",
- "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
- "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
+ "version": "0.2.16",
+ "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz",
+ "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==",
"license": "MIT",
"dependencies": {
"fdir": "^6.5.0",
- "picomatch": "^4.0.3"
+ "picomatch": "^4.0.4"
},
"engines": {
"node": ">=12.0.0"
@@ -12692,9 +13896,9 @@
}
},
"node_modules/tinyglobby/node_modules/picomatch": {
- "version": "4.0.3",
- "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
- "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
+ "version": "4.0.4",
+ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"license": "MIT",
"engines": {
"node": ">=12"
@@ -12778,6 +13982,19 @@
"integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==",
"license": "0BSD"
},
+ "node_modules/tunnel-agent": {
+ "version": "0.6.0",
+ "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
+ "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
+ "license": "Apache-2.0",
+ "optional": true,
+ "dependencies": {
+ "safe-buffer": "^5.0.1"
+ },
+ "engines": {
+ "node": "*"
+ }
+ },
"node_modules/twoslash": {
"version": "0.3.6",
"resolved": "https://registry.npmjs.org/twoslash/-/twoslash-0.3.6.tgz",
@@ -12939,9 +14156,9 @@
}
},
"node_modules/undici-types": {
- "version": "7.16.0",
- "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
- "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
+ "version": "7.18.2",
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz",
+ "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
"license": "MIT"
},
"node_modules/unified": {
@@ -13357,6 +14574,21 @@
"webidl-conversions": "^3.0.0"
}
},
+ "node_modules/which": {
+ "version": "2.0.2",
+ "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
+ "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
+ "license": "ISC",
+ "dependencies": {
+ "isexe": "^2.0.0"
+ },
+ "bin": {
+ "node-which": "bin/node-which"
+ },
+ "engines": {
+ "node": ">= 8"
+ }
+ },
"node_modules/which-boxed-primitive": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.1.1.tgz",
@@ -13543,9 +14775,9 @@
"license": "ISC"
},
"node_modules/ws": {
- "version": "8.19.0",
- "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz",
- "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==",
+ "version": "8.20.0",
+ "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
+ "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"
@@ -13585,6 +14817,28 @@
"node": ">=4.0"
}
},
+ "node_modules/xss": {
+ "version": "1.0.15",
+ "resolved": "https://registry.npmjs.org/xss/-/xss-1.0.15.tgz",
+ "integrity": "sha512-FVdlVVC67WOIPvfOwhoMETV72f6GbW7aOabBC3WxN/oUdoEMDyLz4OgRv5/gck2ZeNqEQu+Tb0kloovXOfpYVg==",
+ "license": "MIT",
+ "dependencies": {
+ "commander": "^2.20.3",
+ "cssfilter": "0.0.10"
+ },
+ "bin": {
+ "xss": "bin/xss"
+ },
+ "engines": {
+ "node": ">= 0.10.0"
+ }
+ },
+ "node_modules/xss/node_modules/commander": {
+ "version": "2.20.3",
+ "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz",
+ "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==",
+ "license": "MIT"
+ },
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
@@ -13601,9 +14855,9 @@
"license": "ISC"
},
"node_modules/yaml": {
- "version": "2.8.2",
- "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.2.tgz",
- "integrity": "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==",
+ "version": "2.8.3",
+ "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz",
+ "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==",
"license": "ISC",
"bin": {
"yaml": "bin.mjs"
@@ -13721,23 +14975,14 @@
"license": "MIT"
},
"node_modules/zod": {
- "version": "3.21.4",
- "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz",
- "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==",
+ "version": "4.3.6",
+ "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",
+ "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
},
- "node_modules/zod-to-json-schema": {
- "version": "3.20.4",
- "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.20.4.tgz",
- "integrity": "sha512-Un9+kInJ2Zt63n6Z7mLqBifzzPcOyX+b+Exuzf7L1+xqck9Q2EPByyTRduV3kmSPaXaRer1JCsucubpgL1fipg==",
- "license": "ISC",
- "peerDependencies": {
- "zod": "^3.20.0"
- }
- },
"node_modules/zwitch": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz",
diff --git a/docs/package.json b/docs 2/package.json
similarity index 64%
rename from docs/package.json
rename to docs 2/package.json
index a6c2ef20edd..4413f7c935b 100644
--- a/docs/package.json
+++ b/docs 2/package.json
@@ -1,6 +1,6 @@
{
"dependencies": {
- "mintlify": "^4.2.296",
+ "mintlify": "^4.2.500",
"sharp": "^0.34.4"
}
}
diff --git a/docs/quickstart-tutorial.mdx b/docs 2/quickstart-tutorial.mdx
similarity index 100%
rename from docs/quickstart-tutorial.mdx
rename to docs 2/quickstart-tutorial.mdx
diff --git a/docs/releases.mdx b/docs 2/releases.mdx
similarity index 100%
rename from docs/releases.mdx
rename to docs 2/releases.mdx
diff --git a/docs/rest/agents/create-agent.mdx b/docs 2/rest/agents/create-agent.mdx
similarity index 100%
rename from docs/rest/agents/create-agent.mdx
rename to docs 2/rest/agents/create-agent.mdx
diff --git a/docs/rest/agents/create-chatbot.mdx b/docs 2/rest/agents/create-chatbot.mdx
similarity index 100%
rename from docs/rest/agents/create-chatbot.mdx
rename to docs 2/rest/agents/create-chatbot.mdx
diff --git a/docs/rest/agents/create-skill.mdx b/docs 2/rest/agents/create-skill.mdx
similarity index 100%
rename from docs/rest/agents/create-skill.mdx
rename to docs 2/rest/agents/create-skill.mdx
diff --git a/docs/rest/agents/create.mdx b/docs 2/rest/agents/create.mdx
similarity index 100%
rename from docs/rest/agents/create.mdx
rename to docs 2/rest/agents/create.mdx
diff --git a/docs/rest/agents/delete-agent.mdx b/docs 2/rest/agents/delete-agent.mdx
similarity index 100%
rename from docs/rest/agents/delete-agent.mdx
rename to docs 2/rest/agents/delete-agent.mdx
diff --git a/docs/rest/agents/delete-chatbot.mdx b/docs 2/rest/agents/delete-chatbot.mdx
similarity index 100%
rename from docs/rest/agents/delete-chatbot.mdx
rename to docs 2/rest/agents/delete-chatbot.mdx
diff --git a/docs/rest/agents/delete-skill.mdx b/docs 2/rest/agents/delete-skill.mdx
similarity index 100%
rename from docs/rest/agents/delete-skill.mdx
rename to docs 2/rest/agents/delete-skill.mdx
diff --git a/docs/rest/agents/delete.mdx b/docs 2/rest/agents/delete.mdx
similarity index 100%
rename from docs/rest/agents/delete.mdx
rename to docs 2/rest/agents/delete.mdx
diff --git a/docs/rest/agents/get-agent.mdx b/docs 2/rest/agents/get-agent.mdx
similarity index 100%
rename from docs/rest/agents/get-agent.mdx
rename to docs 2/rest/agents/get-agent.mdx
diff --git a/docs/rest/agents/get-chatbot.mdx b/docs 2/rest/agents/get-chatbot.mdx
similarity index 100%
rename from docs/rest/agents/get-chatbot.mdx
rename to docs 2/rest/agents/get-chatbot.mdx
diff --git a/docs/rest/agents/get-skill.mdx b/docs 2/rest/agents/get-skill.mdx
similarity index 100%
rename from docs/rest/agents/get-skill.mdx
rename to docs 2/rest/agents/get-skill.mdx
diff --git a/docs/rest/agents/get.mdx b/docs 2/rest/agents/get.mdx
similarity index 100%
rename from docs/rest/agents/get.mdx
rename to docs 2/rest/agents/get.mdx
diff --git a/docs/rest/agents/list-agents.mdx b/docs 2/rest/agents/list-agents.mdx
similarity index 100%
rename from docs/rest/agents/list-agents.mdx
rename to docs 2/rest/agents/list-agents.mdx
diff --git a/docs/rest/agents/list-chatbots.mdx b/docs 2/rest/agents/list-chatbots.mdx
similarity index 100%
rename from docs/rest/agents/list-chatbots.mdx
rename to docs 2/rest/agents/list-chatbots.mdx
diff --git a/docs/rest/agents/list-skills.mdx b/docs 2/rest/agents/list-skills.mdx
similarity index 100%
rename from docs/rest/agents/list-skills.mdx
rename to docs 2/rest/agents/list-skills.mdx
diff --git a/docs/rest/agents/list.mdx b/docs 2/rest/agents/list.mdx
similarity index 100%
rename from docs/rest/agents/list.mdx
rename to docs 2/rest/agents/list.mdx
diff --git a/docs/rest/agents/query-agent.mdx b/docs 2/rest/agents/query-agent.mdx
similarity index 100%
rename from docs/rest/agents/query-agent.mdx
rename to docs 2/rest/agents/query-agent.mdx
diff --git a/docs/rest/agents/query.mdx b/docs 2/rest/agents/query.mdx
similarity index 100%
rename from docs/rest/agents/query.mdx
rename to docs 2/rest/agents/query.mdx
diff --git a/docs/rest/agents/update-agent.mdx b/docs 2/rest/agents/update-agent.mdx
similarity index 100%
rename from docs/rest/agents/update-agent.mdx
rename to docs 2/rest/agents/update-agent.mdx
diff --git a/docs/rest/agents/update-chatbot.mdx b/docs 2/rest/agents/update-chatbot.mdx
similarity index 100%
rename from docs/rest/agents/update-chatbot.mdx
rename to docs 2/rest/agents/update-chatbot.mdx
diff --git a/docs/rest/agents/update-skill.mdx b/docs 2/rest/agents/update-skill.mdx
similarity index 100%
rename from docs/rest/agents/update-skill.mdx
rename to docs 2/rest/agents/update-skill.mdx
diff --git a/docs/rest/agents/update.mdx b/docs 2/rest/agents/update.mdx
similarity index 100%
rename from docs/rest/agents/update.mdx
rename to docs 2/rest/agents/update.mdx
diff --git a/docs/rest/authentication.mdx b/docs 2/rest/authentication.mdx
similarity index 100%
rename from docs/rest/authentication.mdx
rename to docs 2/rest/authentication.mdx
diff --git a/docs/rest/databases/create-databases.mdx b/docs 2/rest/databases/create-databases.mdx
similarity index 100%
rename from docs/rest/databases/create-databases.mdx
rename to docs 2/rest/databases/create-databases.mdx
diff --git a/docs/rest/databases/delete-databases.mdx b/docs 2/rest/databases/delete-databases.mdx
similarity index 100%
rename from docs/rest/databases/delete-databases.mdx
rename to docs 2/rest/databases/delete-databases.mdx
diff --git a/docs/rest/databases/list-database.mdx b/docs 2/rest/databases/list-database.mdx
similarity index 100%
rename from docs/rest/databases/list-database.mdx
rename to docs 2/rest/databases/list-database.mdx
diff --git a/docs/rest/databases/list-databases.mdx b/docs 2/rest/databases/list-databases.mdx
similarity index 100%
rename from docs/rest/databases/list-databases.mdx
rename to docs 2/rest/databases/list-databases.mdx
diff --git a/docs/rest/databases/update-databases.mdx b/docs 2/rest/databases/update-databases.mdx
similarity index 100%
rename from docs/rest/databases/update-databases.mdx
rename to docs 2/rest/databases/update-databases.mdx
diff --git a/docs/rest/files/delete.mdx b/docs 2/rest/files/delete.mdx
similarity index 100%
rename from docs/rest/files/delete.mdx
rename to docs 2/rest/files/delete.mdx
diff --git a/docs/rest/files/list.mdx b/docs 2/rest/files/list.mdx
similarity index 100%
rename from docs/rest/files/list.mdx
rename to docs 2/rest/files/list.mdx
diff --git a/docs/rest/files/upload.mdx b/docs 2/rest/files/upload.mdx
similarity index 100%
rename from docs/rest/files/upload.mdx
rename to docs 2/rest/files/upload.mdx
diff --git a/docs/rest/jobs/create.mdx b/docs 2/rest/jobs/create.mdx
similarity index 100%
rename from docs/rest/jobs/create.mdx
rename to docs 2/rest/jobs/create.mdx
diff --git a/docs/rest/jobs/delete.mdx b/docs 2/rest/jobs/delete.mdx
similarity index 100%
rename from docs/rest/jobs/delete.mdx
rename to docs 2/rest/jobs/delete.mdx
diff --git a/docs/rest/jobs/get.mdx b/docs 2/rest/jobs/get.mdx
similarity index 100%
rename from docs/rest/jobs/get.mdx
rename to docs 2/rest/jobs/get.mdx
diff --git a/docs/rest/jobs/list.mdx b/docs 2/rest/jobs/list.mdx
similarity index 100%
rename from docs/rest/jobs/list.mdx
rename to docs 2/rest/jobs/list.mdx
diff --git a/docs/rest/knowledge_bases/alter.mdx b/docs 2/rest/knowledge_bases/alter.mdx
similarity index 100%
rename from docs/rest/knowledge_bases/alter.mdx
rename to docs 2/rest/knowledge_bases/alter.mdx
diff --git a/docs/rest/knowledge_bases/create.mdx b/docs 2/rest/knowledge_bases/create.mdx
similarity index 100%
rename from docs/rest/knowledge_bases/create.mdx
rename to docs 2/rest/knowledge_bases/create.mdx
diff --git a/docs/rest/knowledge_bases/delete.mdx b/docs 2/rest/knowledge_bases/delete.mdx
similarity index 100%
rename from docs/rest/knowledge_bases/delete.mdx
rename to docs 2/rest/knowledge_bases/delete.mdx
diff --git a/docs/rest/knowledge_bases/get.mdx b/docs 2/rest/knowledge_bases/get.mdx
similarity index 100%
rename from docs/rest/knowledge_bases/get.mdx
rename to docs 2/rest/knowledge_bases/get.mdx
diff --git a/docs/rest/knowledge_bases/insert.mdx b/docs 2/rest/knowledge_bases/insert.mdx
similarity index 100%
rename from docs/rest/knowledge_bases/insert.mdx
rename to docs 2/rest/knowledge_bases/insert.mdx
diff --git a/docs/rest/knowledge_bases/list.mdx b/docs 2/rest/knowledge_bases/list.mdx
similarity index 100%
rename from docs/rest/knowledge_bases/list.mdx
rename to docs 2/rest/knowledge_bases/list.mdx
diff --git a/docs/rest/knowledge_bases/query.mdx b/docs 2/rest/knowledge_bases/query.mdx
similarity index 100%
rename from docs/rest/knowledge_bases/query.mdx
rename to docs 2/rest/knowledge_bases/query.mdx
diff --git a/docs/rest/ml_engines/create.mdx b/docs 2/rest/ml_engines/create.mdx
similarity index 100%
rename from docs/rest/ml_engines/create.mdx
rename to docs 2/rest/ml_engines/create.mdx
diff --git a/docs/rest/ml_engines/delete.mdx b/docs 2/rest/ml_engines/delete.mdx
similarity index 100%
rename from docs/rest/ml_engines/delete.mdx
rename to docs 2/rest/ml_engines/delete.mdx
diff --git a/docs/rest/ml_engines/list.mdx b/docs 2/rest/ml_engines/list.mdx
similarity index 100%
rename from docs/rest/ml_engines/list.mdx
rename to docs 2/rest/ml_engines/list.mdx
diff --git a/docs/rest/models/delete-model.mdx b/docs 2/rest/models/delete-model.mdx
similarity index 100%
rename from docs/rest/models/delete-model.mdx
rename to docs 2/rest/models/delete-model.mdx
diff --git a/docs/rest/models/describe-model.mdx b/docs 2/rest/models/describe-model.mdx
similarity index 100%
rename from docs/rest/models/describe-model.mdx
rename to docs 2/rest/models/describe-model.mdx
diff --git a/docs/rest/models/finetune.mdx b/docs 2/rest/models/finetune.mdx
similarity index 100%
rename from docs/rest/models/finetune.mdx
rename to docs 2/rest/models/finetune.mdx
diff --git a/docs/rest/models/list-model.mdx b/docs 2/rest/models/list-model.mdx
similarity index 100%
rename from docs/rest/models/list-model.mdx
rename to docs 2/rest/models/list-model.mdx
diff --git a/docs/rest/models/list-models.mdx b/docs 2/rest/models/list-models.mdx
similarity index 100%
rename from docs/rest/models/list-models.mdx
rename to docs 2/rest/models/list-models.mdx
diff --git a/docs/rest/models/manage-model-versions.mdx b/docs 2/rest/models/manage-model-versions.mdx
similarity index 100%
rename from docs/rest/models/manage-model-versions.mdx
rename to docs 2/rest/models/manage-model-versions.mdx
diff --git a/docs/rest/models/query-model-joined-with-data.mdx b/docs 2/rest/models/query-model-joined-with-data.mdx
similarity index 100%
rename from docs/rest/models/query-model-joined-with-data.mdx
rename to docs 2/rest/models/query-model-joined-with-data.mdx
diff --git a/docs/rest/models/query-model.mdx b/docs 2/rest/models/query-model.mdx
similarity index 100%
rename from docs/rest/models/query-model.mdx
rename to docs 2/rest/models/query-model.mdx
diff --git a/docs/rest/models/retrain.mdx b/docs 2/rest/models/retrain.mdx
similarity index 100%
rename from docs/rest/models/retrain.mdx
rename to docs 2/rest/models/retrain.mdx
diff --git a/docs/rest/models/train-model.mdx b/docs 2/rest/models/train-model.mdx
similarity index 100%
rename from docs/rest/models/train-model.mdx
rename to docs 2/rest/models/train-model.mdx
diff --git a/docs/rest/overview.mdx b/docs 2/rest/overview.mdx
similarity index 100%
rename from docs/rest/overview.mdx
rename to docs 2/rest/overview.mdx
diff --git a/docs/rest/projects/create.mdx b/docs 2/rest/projects/create.mdx
similarity index 100%
rename from docs/rest/projects/create.mdx
rename to docs 2/rest/projects/create.mdx
diff --git a/docs/rest/projects/drop.mdx b/docs 2/rest/projects/drop.mdx
similarity index 100%
rename from docs/rest/projects/drop.mdx
rename to docs 2/rest/projects/drop.mdx
diff --git a/docs/rest/projects/get-project.mdx b/docs 2/rest/projects/get-project.mdx
similarity index 100%
rename from docs/rest/projects/get-project.mdx
rename to docs 2/rest/projects/get-project.mdx
diff --git a/docs/rest/projects/get-projects.mdx b/docs 2/rest/projects/get-projects.mdx
similarity index 100%
rename from docs/rest/projects/get-projects.mdx
rename to docs 2/rest/projects/get-projects.mdx
diff --git a/docs/rest/sql.mdx b/docs 2/rest/sql.mdx
similarity index 51%
rename from docs/rest/sql.mdx
rename to docs 2/rest/sql.mdx
index 0a8d5bd0f5b..4b8604931d0 100644
--- a/docs/rest/sql.mdx
+++ b/docs 2/rest/sql.mdx
@@ -20,6 +20,29 @@ String that contains the SQL query that needs to be executed.
+
+
+Format of the response. Available options:
+- `null` (default) - returns all data in a single JSON response
+- `"sse"` - returns data as Server-Sent Events stream
+- `"jsonlines"` - returns data as JSON Lines stream (one JSON object per line)
+
+Use `"sse"` or `"jsonlines"` for streaming large result sets to avoid loading all data into memory at once.
+
+
+
+
+
+Optional context object, e.g., `{"db": "mindsdb"}` to specify the database.
+
+
+
+
+
+Optional parameters for parameterized queries, e.g., `{"name": "value"}`.
+
+
+
### Response
@@ -55,9 +78,32 @@ curl --request POST \
{
"query": "SELECT * FROM example_db.demo_data.home_rentals LIMIT 10;"
}
+'
+```
+```shell Shell (Streaming with SSE)
+curl --request POST \
+ --url https://cloud.mindsdb.com/api/sql/query \
+ --header 'Content-Type: application/json' \
+ --data '
+{
+ "query": "SELECT * FROM example_db.demo_data.home_rentals;",
+ "response_format": "sse"
+}
+'
```
+```shell Shell (Streaming with JSON Lines)
+curl --request POST \
+ --url https://cloud.mindsdb.com/api/sql/query \
+ --header 'Content-Type: application/json' \
+ --data '
+{
+ "query": "SELECT * FROM example_db.demo_data.home_rentals;",
+ "response_format": "jsonlines"
+}
+'
+```
```python Python
import requests
@@ -70,8 +116,8 @@ resp = requests.post(url, json={'query':
-```json Response
- {
+```json Response (Default)
+{
"column_names": [
"sqft",
"rental_price"
@@ -90,7 +136,22 @@ resp = requests.post(url, json={'query':
]
],
"type": "table"
- }
+}
+```
+
+```text Response (SSE format)
+data: {"type": "table", "column_names": ["sqft", "rental_price"], "context": {"db": "mindsdb"}}
+
+data: [[917, 3901], [194, 2042]]
+
+data: [[543, 1871], [289, 1563]]
+
+```
+
+```text Response (JSON Lines format)
+{"type": "table", "column_names": ["sqft", "rental_price"], "context": {"db": "mindsdb"}}
+[[917, 3901], [194, 2042]]
+[[543, 1871], [289, 1563]]
```
diff --git a/docs/rest/tables/create-table.mdx b/docs 2/rest/tables/create-table.mdx
similarity index 100%
rename from docs/rest/tables/create-table.mdx
rename to docs 2/rest/tables/create-table.mdx
diff --git a/docs/rest/tables/delete-table.mdx b/docs 2/rest/tables/delete-table.mdx
similarity index 100%
rename from docs/rest/tables/delete-table.mdx
rename to docs 2/rest/tables/delete-table.mdx
diff --git a/docs/rest/tables/delete.mdx b/docs 2/rest/tables/delete.mdx
similarity index 100%
rename from docs/rest/tables/delete.mdx
rename to docs 2/rest/tables/delete.mdx
diff --git a/docs/rest/tables/insert.mdx b/docs 2/rest/tables/insert.mdx
similarity index 100%
rename from docs/rest/tables/insert.mdx
rename to docs 2/rest/tables/insert.mdx
diff --git a/docs/rest/tables/list-table.mdx b/docs 2/rest/tables/list-table.mdx
similarity index 100%
rename from docs/rest/tables/list-table.mdx
rename to docs 2/rest/tables/list-table.mdx
diff --git a/docs/rest/tables/list-tables.mdx b/docs 2/rest/tables/list-tables.mdx
similarity index 100%
rename from docs/rest/tables/list-tables.mdx
rename to docs 2/rest/tables/list-tables.mdx
diff --git a/docs/rest/tables/update.mdx b/docs 2/rest/tables/update.mdx
similarity index 100%
rename from docs/rest/tables/update.mdx
rename to docs 2/rest/tables/update.mdx
diff --git a/docs/rest/usage.mdx b/docs 2/rest/usage.mdx
similarity index 100%
rename from docs/rest/usage.mdx
rename to docs 2/rest/usage.mdx
diff --git a/docs/rest/views/create-view.mdx b/docs 2/rest/views/create-view.mdx
similarity index 100%
rename from docs/rest/views/create-view.mdx
rename to docs 2/rest/views/create-view.mdx
diff --git a/docs/rest/views/delete-views.mdx b/docs 2/rest/views/delete-views.mdx
similarity index 100%
rename from docs/rest/views/delete-views.mdx
rename to docs 2/rest/views/delete-views.mdx
diff --git a/docs/rest/views/list-view.mdx b/docs 2/rest/views/list-view.mdx
similarity index 100%
rename from docs/rest/views/list-view.mdx
rename to docs 2/rest/views/list-view.mdx
diff --git a/docs/rest/views/list-views.mdx b/docs 2/rest/views/list-views.mdx
similarity index 100%
rename from docs/rest/views/list-views.mdx
rename to docs 2/rest/views/list-views.mdx
diff --git a/docs/rest/views/update-view.mdx b/docs 2/rest/views/update-view.mdx
similarity index 100%
rename from docs/rest/views/update-view.mdx
rename to docs 2/rest/views/update-view.mdx
diff --git a/docs/sdks/community-sdk.mdx b/docs 2/sdks/community-sdk.mdx
similarity index 100%
rename from docs/sdks/community-sdk.mdx
rename to docs 2/sdks/community-sdk.mdx
diff --git a/docs/sdks/javascript/agents.mdx b/docs 2/sdks/javascript/agents.mdx
similarity index 100%
rename from docs/sdks/javascript/agents.mdx
rename to docs 2/sdks/javascript/agents.mdx
diff --git a/docs/sdks/javascript/batchQuery.mdx b/docs 2/sdks/javascript/batchQuery.mdx
similarity index 100%
rename from docs/sdks/javascript/batchQuery.mdx
rename to docs 2/sdks/javascript/batchQuery.mdx
diff --git a/docs/sdks/javascript/connect.mdx b/docs 2/sdks/javascript/connect.mdx
similarity index 100%
rename from docs/sdks/javascript/connect.mdx
rename to docs 2/sdks/javascript/connect.mdx
diff --git a/docs/sdks/javascript/create_database.mdx b/docs 2/sdks/javascript/create_database.mdx
similarity index 100%
rename from docs/sdks/javascript/create_database.mdx
rename to docs 2/sdks/javascript/create_database.mdx
diff --git a/docs/sdks/javascript/create_job.mdx b/docs 2/sdks/javascript/create_job.mdx
similarity index 100%
rename from docs/sdks/javascript/create_job.mdx
rename to docs 2/sdks/javascript/create_job.mdx
diff --git a/docs/sdks/javascript/create_ml_engine.mdx b/docs 2/sdks/javascript/create_ml_engine.mdx
similarity index 100%
rename from docs/sdks/javascript/create_ml_engine.mdx
rename to docs 2/sdks/javascript/create_ml_engine.mdx
diff --git a/docs/sdks/javascript/create_model.mdx b/docs 2/sdks/javascript/create_model.mdx
similarity index 100%
rename from docs/sdks/javascript/create_model.mdx
rename to docs 2/sdks/javascript/create_model.mdx
diff --git a/docs/sdks/javascript/create_project.mdx b/docs 2/sdks/javascript/create_project.mdx
similarity index 100%
rename from docs/sdks/javascript/create_project.mdx
rename to docs 2/sdks/javascript/create_project.mdx
diff --git a/docs/sdks/javascript/create_table.mdx b/docs 2/sdks/javascript/create_table.mdx
similarity index 100%
rename from docs/sdks/javascript/create_table.mdx
rename to docs 2/sdks/javascript/create_table.mdx
diff --git a/docs/sdks/javascript/create_view.mdx b/docs 2/sdks/javascript/create_view.mdx
similarity index 100%
rename from docs/sdks/javascript/create_view.mdx
rename to docs 2/sdks/javascript/create_view.mdx
diff --git a/docs/sdks/javascript/delete_file.mdx b/docs 2/sdks/javascript/delete_file.mdx
similarity index 100%
rename from docs/sdks/javascript/delete_file.mdx
rename to docs 2/sdks/javascript/delete_file.mdx
diff --git a/docs/sdks/javascript/delete_from.mdx b/docs 2/sdks/javascript/delete_from.mdx
similarity index 100%
rename from docs/sdks/javascript/delete_from.mdx
rename to docs 2/sdks/javascript/delete_from.mdx
diff --git a/docs/sdks/javascript/delete_table.mdx b/docs 2/sdks/javascript/delete_table.mdx
similarity index 100%
rename from docs/sdks/javascript/delete_table.mdx
rename to docs 2/sdks/javascript/delete_table.mdx
diff --git a/docs/sdks/javascript/describe.mdx b/docs 2/sdks/javascript/describe.mdx
similarity index 100%
rename from docs/sdks/javascript/describe.mdx
rename to docs 2/sdks/javascript/describe.mdx
diff --git a/docs/sdks/javascript/drop_database.mdx b/docs 2/sdks/javascript/drop_database.mdx
similarity index 100%
rename from docs/sdks/javascript/drop_database.mdx
rename to docs 2/sdks/javascript/drop_database.mdx
diff --git a/docs/sdks/javascript/drop_job.mdx b/docs 2/sdks/javascript/drop_job.mdx
similarity index 100%
rename from docs/sdks/javascript/drop_job.mdx
rename to docs 2/sdks/javascript/drop_job.mdx
diff --git a/docs/sdks/javascript/drop_ml_engine.mdx b/docs 2/sdks/javascript/drop_ml_engine.mdx
similarity index 100%
rename from docs/sdks/javascript/drop_ml_engine.mdx
rename to docs 2/sdks/javascript/drop_ml_engine.mdx
diff --git a/docs/sdks/javascript/drop_model.mdx b/docs 2/sdks/javascript/drop_model.mdx
similarity index 100%
rename from docs/sdks/javascript/drop_model.mdx
rename to docs 2/sdks/javascript/drop_model.mdx
diff --git a/docs/sdks/javascript/drop_project.mdx b/docs 2/sdks/javascript/drop_project.mdx
similarity index 100%
rename from docs/sdks/javascript/drop_project.mdx
rename to docs 2/sdks/javascript/drop_project.mdx
diff --git a/docs/sdks/javascript/drop_view.mdx b/docs 2/sdks/javascript/drop_view.mdx
similarity index 100%
rename from docs/sdks/javascript/drop_view.mdx
rename to docs 2/sdks/javascript/drop_view.mdx
diff --git a/docs/sdks/javascript/finetune.mdx b/docs 2/sdks/javascript/finetune.mdx
similarity index 100%
rename from docs/sdks/javascript/finetune.mdx
rename to docs 2/sdks/javascript/finetune.mdx
diff --git a/docs/sdks/javascript/get_database.mdx b/docs 2/sdks/javascript/get_database.mdx
similarity index 100%
rename from docs/sdks/javascript/get_database.mdx
rename to docs 2/sdks/javascript/get_database.mdx
diff --git a/docs/sdks/javascript/get_status.mdx b/docs 2/sdks/javascript/get_status.mdx
similarity index 100%
rename from docs/sdks/javascript/get_status.mdx
rename to docs 2/sdks/javascript/get_status.mdx
diff --git a/docs/sdks/javascript/insert_into_table.mdx b/docs 2/sdks/javascript/insert_into_table.mdx
similarity index 100%
rename from docs/sdks/javascript/insert_into_table.mdx
rename to docs 2/sdks/javascript/insert_into_table.mdx
diff --git a/docs/sdks/javascript/installation.mdx b/docs 2/sdks/javascript/installation.mdx
similarity index 100%
rename from docs/sdks/javascript/installation.mdx
rename to docs 2/sdks/javascript/installation.mdx
diff --git a/docs/sdks/javascript/join_on.mdx b/docs 2/sdks/javascript/join_on.mdx
similarity index 100%
rename from docs/sdks/javascript/join_on.mdx
rename to docs 2/sdks/javascript/join_on.mdx
diff --git a/docs/sdks/javascript/list_data_handlers.mdx b/docs 2/sdks/javascript/list_data_handlers.mdx
similarity index 100%
rename from docs/sdks/javascript/list_data_handlers.mdx
rename to docs 2/sdks/javascript/list_data_handlers.mdx
diff --git a/docs/sdks/javascript/list_databases.mdx b/docs 2/sdks/javascript/list_databases.mdx
similarity index 100%
rename from docs/sdks/javascript/list_databases.mdx
rename to docs 2/sdks/javascript/list_databases.mdx
diff --git a/docs/sdks/javascript/list_ml_engines.mdx b/docs 2/sdks/javascript/list_ml_engines.mdx
similarity index 100%
rename from docs/sdks/javascript/list_ml_engines.mdx
rename to docs 2/sdks/javascript/list_ml_engines.mdx
diff --git a/docs/sdks/javascript/list_ml_handlers.mdx b/docs 2/sdks/javascript/list_ml_handlers.mdx
similarity index 100%
rename from docs/sdks/javascript/list_ml_handlers.mdx
rename to docs 2/sdks/javascript/list_ml_handlers.mdx
diff --git a/docs/sdks/javascript/list_models.mdx b/docs 2/sdks/javascript/list_models.mdx
similarity index 100%
rename from docs/sdks/javascript/list_models.mdx
rename to docs 2/sdks/javascript/list_models.mdx
diff --git a/docs/sdks/javascript/list_projects.mdx b/docs 2/sdks/javascript/list_projects.mdx
similarity index 100%
rename from docs/sdks/javascript/list_projects.mdx
rename to docs 2/sdks/javascript/list_projects.mdx
diff --git a/docs/sdks/javascript/list_views.mdx b/docs 2/sdks/javascript/list_views.mdx
similarity index 100%
rename from docs/sdks/javascript/list_views.mdx
rename to docs 2/sdks/javascript/list_views.mdx
diff --git a/docs/sdks/javascript/manage-model-versions.mdx b/docs 2/sdks/javascript/manage-model-versions.mdx
similarity index 100%
rename from docs/sdks/javascript/manage-model-versions.mdx
rename to docs 2/sdks/javascript/manage-model-versions.mdx
diff --git a/docs/sdks/javascript/native_queries.mdx b/docs 2/sdks/javascript/native_queries.mdx
similarity index 100%
rename from docs/sdks/javascript/native_queries.mdx
rename to docs 2/sdks/javascript/native_queries.mdx
diff --git a/docs/sdks/javascript/overview.mdx b/docs 2/sdks/javascript/overview.mdx
similarity index 100%
rename from docs/sdks/javascript/overview.mdx
rename to docs 2/sdks/javascript/overview.mdx
diff --git a/docs/sdks/javascript/query.mdx b/docs 2/sdks/javascript/query.mdx
similarity index 100%
rename from docs/sdks/javascript/query.mdx
rename to docs 2/sdks/javascript/query.mdx
diff --git a/docs/sdks/javascript/query_files.mdx b/docs 2/sdks/javascript/query_files.mdx
similarity index 100%
rename from docs/sdks/javascript/query_files.mdx
rename to docs 2/sdks/javascript/query_files.mdx
diff --git a/docs/sdks/javascript/query_jobs.mdx b/docs 2/sdks/javascript/query_jobs.mdx
similarity index 100%
rename from docs/sdks/javascript/query_jobs.mdx
rename to docs 2/sdks/javascript/query_jobs.mdx
diff --git a/docs/sdks/javascript/query_table.mdx b/docs 2/sdks/javascript/query_table.mdx
similarity index 100%
rename from docs/sdks/javascript/query_table.mdx
rename to docs 2/sdks/javascript/query_table.mdx
diff --git a/docs/sdks/javascript/query_view.mdx b/docs 2/sdks/javascript/query_view.mdx
similarity index 100%
rename from docs/sdks/javascript/query_view.mdx
rename to docs 2/sdks/javascript/query_view.mdx
diff --git a/docs/sdks/javascript/retrain.mdx b/docs 2/sdks/javascript/retrain.mdx
similarity index 100%
rename from docs/sdks/javascript/retrain.mdx
rename to docs 2/sdks/javascript/retrain.mdx
diff --git a/docs/sdks/javascript/update_table.mdx b/docs 2/sdks/javascript/update_table.mdx
similarity index 100%
rename from docs/sdks/javascript/update_table.mdx
rename to docs 2/sdks/javascript/update_table.mdx
diff --git a/docs/sdks/javascript/upload_file.mdx b/docs 2/sdks/javascript/upload_file.mdx
similarity index 100%
rename from docs/sdks/javascript/upload_file.mdx
rename to docs 2/sdks/javascript/upload_file.mdx
diff --git a/docs/sdks/overview.mdx b/docs 2/sdks/overview.mdx
similarity index 100%
rename from docs/sdks/overview.mdx
rename to docs 2/sdks/overview.mdx
diff --git a/docs/sdks/python/agents.mdx b/docs 2/sdks/python/agents.mdx
similarity index 96%
rename from docs/sdks/python/agents.mdx
rename to docs 2/sdks/python/agents.mdx
index f5cf58e11ce..b6c170b90fc 100644
--- a/docs/sdks/python/agents.mdx
+++ b/docs 2/sdks/python/agents.mdx
@@ -307,6 +307,12 @@ This parameter defines the time the agent can take to come back with an answer.
For example, when the `timeout` parameter is set to 10, the agent has 10 seconds to return an answer. If the agent takes longer than 10 seconds, it aborts the process and comes back with an answer indicating its failure to return an answer within the defined time interval.
+### `mode`
+
+This parameter defines the agent's response style, allowing users to partially control the output format. Supported values include `text` and `sql`.
+
+When set, the agent will tailor its responses to match the specified format. Note that the agent may still adapt its output when necessary to ensure clarity or correctness.
+
## Get Agents
You can get an existing agent with the `get()` method.
diff --git a/docs/sdks/python/connect.mdx b/docs 2/sdks/python/connect.mdx
similarity index 100%
rename from docs/sdks/python/connect.mdx
rename to docs 2/sdks/python/connect.mdx
diff --git a/docs/sdks/python/create_database.mdx b/docs 2/sdks/python/create_database.mdx
similarity index 100%
rename from docs/sdks/python/create_database.mdx
rename to docs 2/sdks/python/create_database.mdx
diff --git a/docs/sdks/python/create_job.mdx b/docs 2/sdks/python/create_job.mdx
similarity index 100%
rename from docs/sdks/python/create_job.mdx
rename to docs 2/sdks/python/create_job.mdx
diff --git a/docs/sdks/python/create_ml_engine.mdx b/docs 2/sdks/python/create_ml_engine.mdx
similarity index 100%
rename from docs/sdks/python/create_ml_engine.mdx
rename to docs 2/sdks/python/create_ml_engine.mdx
diff --git a/docs/sdks/python/create_model.mdx b/docs 2/sdks/python/create_model.mdx
similarity index 100%
rename from docs/sdks/python/create_model.mdx
rename to docs 2/sdks/python/create_model.mdx
diff --git a/docs/sdks/python/create_project.mdx b/docs 2/sdks/python/create_project.mdx
similarity index 100%
rename from docs/sdks/python/create_project.mdx
rename to docs 2/sdks/python/create_project.mdx
diff --git a/docs/sdks/python/create_table.mdx b/docs 2/sdks/python/create_table.mdx
similarity index 100%
rename from docs/sdks/python/create_table.mdx
rename to docs 2/sdks/python/create_table.mdx
diff --git a/docs/sdks/python/create_view.mdx b/docs 2/sdks/python/create_view.mdx
similarity index 100%
rename from docs/sdks/python/create_view.mdx
rename to docs 2/sdks/python/create_view.mdx
diff --git a/docs/sdks/python/delete_file.mdx b/docs 2/sdks/python/delete_file.mdx
similarity index 100%
rename from docs/sdks/python/delete_file.mdx
rename to docs 2/sdks/python/delete_file.mdx
diff --git a/docs/sdks/python/delete_from.mdx b/docs 2/sdks/python/delete_from.mdx
similarity index 100%
rename from docs/sdks/python/delete_from.mdx
rename to docs 2/sdks/python/delete_from.mdx
diff --git a/docs/sdks/python/delete_table.mdx b/docs 2/sdks/python/delete_table.mdx
similarity index 100%
rename from docs/sdks/python/delete_table.mdx
rename to docs 2/sdks/python/delete_table.mdx
diff --git a/docs/sdks/python/describe.mdx b/docs 2/sdks/python/describe.mdx
similarity index 100%
rename from docs/sdks/python/describe.mdx
rename to docs 2/sdks/python/describe.mdx
diff --git a/docs/sdks/python/drop_database.mdx b/docs 2/sdks/python/drop_database.mdx
similarity index 100%
rename from docs/sdks/python/drop_database.mdx
rename to docs 2/sdks/python/drop_database.mdx
diff --git a/docs/sdks/python/drop_job.mdx b/docs 2/sdks/python/drop_job.mdx
similarity index 100%
rename from docs/sdks/python/drop_job.mdx
rename to docs 2/sdks/python/drop_job.mdx
diff --git a/docs/sdks/python/drop_ml_engine.mdx b/docs 2/sdks/python/drop_ml_engine.mdx
similarity index 100%
rename from docs/sdks/python/drop_ml_engine.mdx
rename to docs 2/sdks/python/drop_ml_engine.mdx
diff --git a/docs/sdks/python/drop_model.mdx b/docs 2/sdks/python/drop_model.mdx
similarity index 100%
rename from docs/sdks/python/drop_model.mdx
rename to docs 2/sdks/python/drop_model.mdx
diff --git a/docs/sdks/python/drop_project.mdx b/docs 2/sdks/python/drop_project.mdx
similarity index 100%
rename from docs/sdks/python/drop_project.mdx
rename to docs 2/sdks/python/drop_project.mdx
diff --git a/docs/sdks/python/drop_view.mdx b/docs 2/sdks/python/drop_view.mdx
similarity index 100%
rename from docs/sdks/python/drop_view.mdx
rename to docs 2/sdks/python/drop_view.mdx
diff --git a/docs/sdks/python/finetune.mdx b/docs 2/sdks/python/finetune.mdx
similarity index 100%
rename from docs/sdks/python/finetune.mdx
rename to docs 2/sdks/python/finetune.mdx
diff --git a/docs/sdks/python/get-batch-predictions.mdx b/docs 2/sdks/python/get-batch-predictions.mdx
similarity index 100%
rename from docs/sdks/python/get-batch-predictions.mdx
rename to docs 2/sdks/python/get-batch-predictions.mdx
diff --git a/docs/sdks/python/get-single-prediction.mdx b/docs 2/sdks/python/get-single-prediction.mdx
similarity index 100%
rename from docs/sdks/python/get-single-prediction.mdx
rename to docs 2/sdks/python/get-single-prediction.mdx
diff --git a/docs/sdks/python/get_history.mdx b/docs 2/sdks/python/get_history.mdx
similarity index 100%
rename from docs/sdks/python/get_history.mdx
rename to docs 2/sdks/python/get_history.mdx
diff --git a/docs/sdks/python/get_status.mdx b/docs 2/sdks/python/get_status.mdx
similarity index 100%
rename from docs/sdks/python/get_status.mdx
rename to docs 2/sdks/python/get_status.mdx
diff --git a/docs/sdks/python/insert_into_table.mdx b/docs 2/sdks/python/insert_into_table.mdx
similarity index 100%
rename from docs/sdks/python/insert_into_table.mdx
rename to docs 2/sdks/python/insert_into_table.mdx
diff --git a/docs/sdks/python/installation.mdx b/docs 2/sdks/python/installation.mdx
similarity index 100%
rename from docs/sdks/python/installation.mdx
rename to docs 2/sdks/python/installation.mdx
diff --git a/docs/sdks/python/join_on.mdx b/docs 2/sdks/python/join_on.mdx
similarity index 100%
rename from docs/sdks/python/join_on.mdx
rename to docs 2/sdks/python/join_on.mdx
diff --git a/docs/sdks/python/knowledge_bases/create.mdx b/docs 2/sdks/python/knowledge_bases/create.mdx
similarity index 100%
rename from docs/sdks/python/knowledge_bases/create.mdx
rename to docs 2/sdks/python/knowledge_bases/create.mdx
diff --git a/docs/sdks/python/knowledge_bases/insert_data.mdx b/docs 2/sdks/python/knowledge_bases/insert_data.mdx
similarity index 100%
rename from docs/sdks/python/knowledge_bases/insert_data.mdx
rename to docs 2/sdks/python/knowledge_bases/insert_data.mdx
diff --git a/docs/sdks/python/knowledge_bases/overview.mdx b/docs 2/sdks/python/knowledge_bases/overview.mdx
similarity index 100%
rename from docs/sdks/python/knowledge_bases/overview.mdx
rename to docs 2/sdks/python/knowledge_bases/overview.mdx
diff --git a/docs/sdks/python/knowledge_bases/query.mdx b/docs 2/sdks/python/knowledge_bases/query.mdx
similarity index 100%
rename from docs/sdks/python/knowledge_bases/query.mdx
rename to docs 2/sdks/python/knowledge_bases/query.mdx
diff --git a/docs/sdks/python/list_data_handlers.mdx b/docs 2/sdks/python/list_data_handlers.mdx
similarity index 100%
rename from docs/sdks/python/list_data_handlers.mdx
rename to docs 2/sdks/python/list_data_handlers.mdx
diff --git a/docs/sdks/python/list_databases.mdx b/docs 2/sdks/python/list_databases.mdx
similarity index 100%
rename from docs/sdks/python/list_databases.mdx
rename to docs 2/sdks/python/list_databases.mdx
diff --git a/docs/sdks/python/list_jobs.mdx b/docs 2/sdks/python/list_jobs.mdx
similarity index 100%
rename from docs/sdks/python/list_jobs.mdx
rename to docs 2/sdks/python/list_jobs.mdx
diff --git a/docs/sdks/python/list_ml_engines.mdx b/docs 2/sdks/python/list_ml_engines.mdx
similarity index 100%
rename from docs/sdks/python/list_ml_engines.mdx
rename to docs 2/sdks/python/list_ml_engines.mdx
diff --git a/docs/sdks/python/list_ml_handlers.mdx b/docs 2/sdks/python/list_ml_handlers.mdx
similarity index 100%
rename from docs/sdks/python/list_ml_handlers.mdx
rename to docs 2/sdks/python/list_ml_handlers.mdx
diff --git a/docs/sdks/python/list_models.mdx b/docs 2/sdks/python/list_models.mdx
similarity index 100%
rename from docs/sdks/python/list_models.mdx
rename to docs 2/sdks/python/list_models.mdx
diff --git a/docs/sdks/python/list_projects.mdx b/docs 2/sdks/python/list_projects.mdx
similarity index 100%
rename from docs/sdks/python/list_projects.mdx
rename to docs 2/sdks/python/list_projects.mdx
diff --git a/docs/sdks/python/list_views.mdx b/docs 2/sdks/python/list_views.mdx
similarity index 100%
rename from docs/sdks/python/list_views.mdx
rename to docs 2/sdks/python/list_views.mdx
diff --git a/docs/sdks/python/manage-model-versions.mdx b/docs 2/sdks/python/manage-model-versions.mdx
similarity index 100%
rename from docs/sdks/python/manage-model-versions.mdx
rename to docs 2/sdks/python/manage-model-versions.mdx
diff --git a/docs/sdks/python/native_queries.mdx b/docs 2/sdks/python/native_queries.mdx
similarity index 100%
rename from docs/sdks/python/native_queries.mdx
rename to docs 2/sdks/python/native_queries.mdx
diff --git a/docs/sdks/python/overview.mdx b/docs 2/sdks/python/overview.mdx
similarity index 100%
rename from docs/sdks/python/overview.mdx
rename to docs 2/sdks/python/overview.mdx
diff --git a/docs/sdks/python/query_files.mdx b/docs 2/sdks/python/query_files.mdx
similarity index 100%
rename from docs/sdks/python/query_files.mdx
rename to docs 2/sdks/python/query_files.mdx
diff --git a/docs/sdks/python/query_projects.mdx b/docs 2/sdks/python/query_projects.mdx
similarity index 100%
rename from docs/sdks/python/query_projects.mdx
rename to docs 2/sdks/python/query_projects.mdx
diff --git a/docs/sdks/python/query_table.mdx b/docs 2/sdks/python/query_table.mdx
similarity index 100%
rename from docs/sdks/python/query_table.mdx
rename to docs 2/sdks/python/query_table.mdx
diff --git a/docs/sdks/python/query_view.mdx b/docs 2/sdks/python/query_view.mdx
similarity index 100%
rename from docs/sdks/python/query_view.mdx
rename to docs 2/sdks/python/query_view.mdx
diff --git a/docs/sdks/python/refresh_job.mdx b/docs 2/sdks/python/refresh_job.mdx
similarity index 100%
rename from docs/sdks/python/refresh_job.mdx
rename to docs 2/sdks/python/refresh_job.mdx
diff --git a/docs/sdks/python/refresh_model.mdx b/docs 2/sdks/python/refresh_model.mdx
similarity index 100%
rename from docs/sdks/python/refresh_model.mdx
rename to docs 2/sdks/python/refresh_model.mdx
diff --git a/docs/sdks/python/retrain.mdx b/docs 2/sdks/python/retrain.mdx
similarity index 100%
rename from docs/sdks/python/retrain.mdx
rename to docs 2/sdks/python/retrain.mdx
diff --git a/docs/sdks/python/update_table.mdx b/docs 2/sdks/python/update_table.mdx
similarity index 100%
rename from docs/sdks/python/update_table.mdx
rename to docs 2/sdks/python/update_table.mdx
diff --git a/docs/sdks/python/upload_file.mdx b/docs 2/sdks/python/upload_file.mdx
similarity index 100%
rename from docs/sdks/python/upload_file.mdx
rename to docs 2/sdks/python/upload_file.mdx
diff --git a/docs/setup/cloud/aws-marketplace.mdx b/docs 2/setup/cloud/aws-marketplace.mdx
similarity index 100%
rename from docs/setup/cloud/aws-marketplace.mdx
rename to docs 2/setup/cloud/aws-marketplace.mdx
diff --git a/docs/setup/community-deploys-mindsdb.mdx b/docs 2/setup/community-deploys-mindsdb.mdx
similarity index 100%
rename from docs/setup/community-deploys-mindsdb.mdx
rename to docs 2/setup/community-deploys-mindsdb.mdx
diff --git a/docs/setup/custom-config.mdx b/docs 2/setup/custom-config.mdx
similarity index 92%
rename from docs/setup/custom-config.mdx
rename to docs 2/setup/custom-config.mdx
index 89772412af0..0a76a038edc 100644
--- a/docs/setup/custom-config.mdx
+++ b/docs 2/setup/custom-config.mdx
@@ -191,6 +191,38 @@ Connection parameters for the MySQL API include:
+
+
+The `mcp` section configures the [MCP server](/model-context-protocol/usage).
+
+```json
+"api": {
+ "mcp": {
+ "cors": {
+ "enabled": true,
+ "allow_origins": [],
+ "allow_origin_regex": "https?://(localhost|127\\.0\\.0\\.1)(:\\d+)?",
+ "allow_headers": ["*"]
+ },
+ "rate_limit": {
+ "enabled": false,
+ "requests_per_minute": 60
+ },
+ "dns_rebinding_protection": false
+ }
+}
+```
+
+* `cors.enabled`: Enables CORS headers on MCP endpoints. Can also be set via `MINDSDB_MCP_CORS_ENABLED`.
+* `cors.allow_origins`: List of allowed origins. Can also be set via `MINDSDB_MCP_ALLOW_ORIGINS` (comma-separated).
+* `cors.allow_origin_regex`: Regex pattern for allowed origins. Can also be set via `MINDSDB_MCP_ALLOW_ORIGIN_REGEXP`.
+* `cors.allow_headers`: List of allowed request headers. Can also be set via `MINDSDB_MCP_ALLOW_HEADERS` (comma-separated).
+* `rate_limit.enabled`: Enables per-IP rate limiting. Can also be set via `MINDSDB_MCP_RATE_LIMIT_ENABLED`.
+* `rate_limit.requests_per_minute`: Maximum number of requests per minute per IP. Can also be set via `MINDSDB_MCP_RATE_LIMIT_RPM`.
+* `dns_rebinding_protection`: When `true`, the MCP transport validates the `Host` header against a list of known-safe hosts to prevent DNS rebinding attacks. Disabled by default (`false`). Enable it when running MindsDB locally and you want to restrict MCP access to `localhost` only. Can also be set via `MINDSDB_MCP_DNS_REBINDING_PROTECTION`.
+
+
+
#### `cache`
diff --git a/docs/setup/environment-vars.mdx b/docs 2/setup/environment-vars.mdx
similarity index 100%
rename from docs/setup/environment-vars.mdx
rename to docs 2/setup/environment-vars.mdx
diff --git a/docs/setup/mindsdb-apis.mdx b/docs 2/setup/mindsdb-apis.mdx
similarity index 100%
rename from docs/setup/mindsdb-apis.mdx
rename to docs 2/setup/mindsdb-apis.mdx
diff --git a/docs/setup/my_config.json b/docs 2/setup/my_config.json
similarity index 100%
rename from docs/setup/my_config.json
rename to docs 2/setup/my_config.json
diff --git a/docs/setup/open_telemetry.mdx b/docs 2/setup/open_telemetry.mdx
similarity index 100%
rename from docs/setup/open_telemetry.mdx
rename to docs 2/setup/open_telemetry.mdx
diff --git a/docs/setup/self-hosted/docker-desktop.mdx b/docs 2/setup/self-hosted/docker-desktop.mdx
similarity index 100%
rename from docs/setup/self-hosted/docker-desktop.mdx
rename to docs 2/setup/self-hosted/docker-desktop.mdx
diff --git a/docs/setup/self-hosted/docker.mdx b/docs 2/setup/self-hosted/docker.mdx
similarity index 100%
rename from docs/setup/self-hosted/docker.mdx
rename to docs 2/setup/self-hosted/docker.mdx
diff --git a/docs/setup/self-hosted/pip/linux.mdx b/docs 2/setup/self-hosted/pip/linux.mdx
similarity index 100%
rename from docs/setup/self-hosted/pip/linux.mdx
rename to docs 2/setup/self-hosted/pip/linux.mdx
diff --git a/docs/setup/self-hosted/pip/macos.mdx b/docs 2/setup/self-hosted/pip/macos.mdx
similarity index 100%
rename from docs/setup/self-hosted/pip/macos.mdx
rename to docs 2/setup/self-hosted/pip/macos.mdx
diff --git a/docs/setup/self-hosted/pip/source.mdx b/docs 2/setup/self-hosted/pip/source.mdx
similarity index 100%
rename from docs/setup/self-hosted/pip/source.mdx
rename to docs 2/setup/self-hosted/pip/source.mdx
diff --git a/docs/setup/self-hosted/pip/windows.mdx b/docs 2/setup/self-hosted/pip/windows.mdx
similarity index 100%
rename from docs/setup/self-hosted/pip/windows.mdx
rename to docs 2/setup/self-hosted/pip/windows.mdx
diff --git a/docs/setup/system-defaults.mdx b/docs 2/setup/system-defaults.mdx
similarity index 100%
rename from docs/setup/system-defaults.mdx
rename to docs 2/setup/system-defaults.mdx
diff --git a/docs/sitemaps/contribute.mdx b/docs 2/sitemaps/contribute.mdx
similarity index 100%
rename from docs/sitemaps/contribute.mdx
rename to docs 2/sitemaps/contribute.mdx
diff --git a/docs/sitemaps/faq.mdx b/docs 2/sitemaps/faq.mdx
similarity index 100%
rename from docs/sitemaps/faq.mdx
rename to docs 2/sitemaps/faq.mdx
diff --git a/docs/sitemaps/get_started_documentation.mdx b/docs 2/sitemaps/get_started_documentation.mdx
similarity index 100%
rename from docs/sitemaps/get_started_documentation.mdx
rename to docs 2/sitemaps/get_started_documentation.mdx
diff --git a/docs/sitemaps/integrations.mdx b/docs 2/sitemaps/integrations.mdx
similarity index 100%
rename from docs/sitemaps/integrations.mdx
rename to docs 2/sitemaps/integrations.mdx
diff --git a/docs/sitemaps/rest_api.mdx b/docs 2/sitemaps/rest_api.mdx
similarity index 100%
rename from docs/sitemaps/rest_api.mdx
rename to docs 2/sitemaps/rest_api.mdx
diff --git a/docs/sitemaps/sdks.mdx b/docs 2/sitemaps/sdks.mdx
similarity index 100%
rename from docs/sitemaps/sdks.mdx
rename to docs 2/sitemaps/sdks.mdx
diff --git a/docs/sitemaps/sql_api.mdx b/docs 2/sitemaps/sql_api.mdx
similarity index 100%
rename from docs/sitemaps/sql_api.mdx
rename to docs 2/sitemaps/sql_api.mdx
diff --git a/docs/sitemaps/use_cases.mdx b/docs 2/sitemaps/use_cases.mdx
similarity index 97%
rename from docs/sitemaps/use_cases.mdx
rename to docs 2/sitemaps/use_cases.mdx
index c1c0baa3637..771cca4429f 100644
--- a/docs/sitemaps/use_cases.mdx
+++ b/docs 2/sitemaps/use_cases.mdx
@@ -37,7 +37,6 @@ https://docs.mindsdb.com/use-cases/data_enrichment/hugging-face-inference-api-ex
Predictive Analytics:
https://docs.mindsdb.com/use-cases/predictive_analytics/overview
https://docs.mindsdb.com/use-cases/predictive_analytics/house-sales-forecasting
-https://docs.mindsdb.com/use-cases/predictive_analytics/expenditures-statsforecast
https://docs.mindsdb.com/use-cases/predictive_analytics/eeg-forecasting
In-Database Machine Learning:
diff --git a/docs/sql/data-insights.mdx b/docs 2/sql/data-insights.mdx
similarity index 100%
rename from docs/sql/data-insights.mdx
rename to docs 2/sql/data-insights.mdx
diff --git a/docs/sql/feature-eng.mdx b/docs 2/sql/feature-eng.mdx
similarity index 100%
rename from docs/sql/feature-eng.mdx
rename to docs 2/sql/feature-eng.mdx
diff --git a/docs/sql/project.mdx b/docs 2/sql/project.mdx
similarity index 100%
rename from docs/sql/project.mdx
rename to docs 2/sql/project.mdx
diff --git a/docs/sql/table-structure.mdx b/docs 2/sql/table-structure.mdx
similarity index 100%
rename from docs/sql/table-structure.mdx
rename to docs 2/sql/table-structure.mdx
diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/batch.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/batch.png
similarity index 100%
rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/batch.png
rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/batch.png
diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/createmysqldb.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/createmysqldb.png
similarity index 100%
rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/createmysqldb.png
rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/createmysqldb.png
diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/query.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/query.png
similarity index 100%
rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/query.png
rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/query.png
diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/querythemodel.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/querythemodel.png
similarity index 100%
rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/querythemodel.png
rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/querythemodel.png
diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/railwaydetails.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/railwaydetails.png
similarity index 100%
rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/railwaydetails.png
rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/railwaydetails.png
diff --git a/docs/tutorials/sentiment-analysis-intercom-data-airbyte.mdx b/docs 2/tutorials/sentiment-analysis-intercom-data-airbyte.mdx
similarity index 100%
rename from docs/tutorials/sentiment-analysis-intercom-data-airbyte.mdx
rename to docs 2/tutorials/sentiment-analysis-intercom-data-airbyte.mdx
diff --git a/docs/use-cases/ai-powered_data_retrieval/overview.mdx b/docs 2/use-cases/ai-powered_data_retrieval/overview.mdx
similarity index 100%
rename from docs/use-cases/ai-powered_data_retrieval/overview.mdx
rename to docs 2/use-cases/ai-powered_data_retrieval/overview.mdx
diff --git a/docs/use-cases/ai-powered_data_retrieval/recommenders.mdx b/docs 2/use-cases/ai-powered_data_retrieval/recommenders.mdx
similarity index 100%
rename from docs/use-cases/ai-powered_data_retrieval/recommenders.mdx
rename to docs 2/use-cases/ai-powered_data_retrieval/recommenders.mdx
diff --git a/docs/use-cases/ai_agents/build_ai_agents.mdx b/docs 2/use-cases/ai_agents/build_ai_agents.mdx
similarity index 100%
rename from docs/use-cases/ai_agents/build_ai_agents.mdx
rename to docs 2/use-cases/ai_agents/build_ai_agents.mdx
diff --git a/docs/use-cases/ai_agents/chatbots_agents.mdx b/docs 2/use-cases/ai_agents/chatbots_agents.mdx
similarity index 100%
rename from docs/use-cases/ai_agents/chatbots_agents.mdx
rename to docs 2/use-cases/ai_agents/chatbots_agents.mdx
diff --git a/docs/use-cases/ai_agents/create-chatbot-kb.mdx b/docs 2/use-cases/ai_agents/create-chatbot-kb.mdx
similarity index 100%
rename from docs/use-cases/ai_agents/create-chatbot-kb.mdx
rename to docs 2/use-cases/ai_agents/create-chatbot-kb.mdx
diff --git a/docs/use-cases/ai_agents/create-chatbot.mdx b/docs 2/use-cases/ai_agents/create-chatbot.mdx
similarity index 100%
rename from docs/use-cases/ai_agents/create-chatbot.mdx
rename to docs 2/use-cases/ai_agents/create-chatbot.mdx
diff --git a/docs/use-cases/ai_agents/llm-chatbot-ui.mdx b/docs 2/use-cases/ai_agents/llm-chatbot-ui.mdx
similarity index 100%
rename from docs/use-cases/ai_agents/llm-chatbot-ui.mdx
rename to docs 2/use-cases/ai_agents/llm-chatbot-ui.mdx
diff --git a/docs/use-cases/ai_agents/overview.mdx b/docs 2/use-cases/ai_agents/overview.mdx
similarity index 100%
rename from docs/use-cases/ai_agents/overview.mdx
rename to docs 2/use-cases/ai_agents/overview.mdx
diff --git a/docs/use-cases/ai_workflow_automation/ai_workflow.mdx b/docs 2/use-cases/ai_workflow_automation/ai_workflow.mdx
similarity index 100%
rename from docs/use-cases/ai_workflow_automation/ai_workflow.mdx
rename to docs 2/use-cases/ai_workflow_automation/ai_workflow.mdx
diff --git a/docs/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx b/docs 2/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx
similarity index 100%
rename from docs/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx
rename to docs 2/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx
diff --git a/docs/use-cases/ai_workflow_automation/overview.mdx b/docs 2/use-cases/ai_workflow_automation/overview.mdx
similarity index 100%
rename from docs/use-cases/ai_workflow_automation/overview.mdx
rename to docs 2/use-cases/ai_workflow_automation/overview.mdx
diff --git a/docs/use-cases/ai_workflow_automation/slack-chatbot.mdx b/docs 2/use-cases/ai_workflow_automation/slack-chatbot.mdx
similarity index 100%
rename from docs/use-cases/ai_workflow_automation/slack-chatbot.mdx
rename to docs 2/use-cases/ai_workflow_automation/slack-chatbot.mdx
diff --git a/docs/use-cases/ai_workflow_automation/twilio-chatbot.mdx b/docs 2/use-cases/ai_workflow_automation/twilio-chatbot.mdx
similarity index 100%
rename from docs/use-cases/ai_workflow_automation/twilio-chatbot.mdx
rename to docs 2/use-cases/ai_workflow_automation/twilio-chatbot.mdx
diff --git a/docs/use-cases/ai_workflow_automation/twitter-chatbot.mdx b/docs 2/use-cases/ai_workflow_automation/twitter-chatbot.mdx
similarity index 100%
rename from docs/use-cases/ai_workflow_automation/twitter-chatbot.mdx
rename to docs 2/use-cases/ai_workflow_automation/twitter-chatbot.mdx
diff --git a/docs/use-cases/ai_workflow_automation/twitterbot-mariadb-enterprise-server-skysql.mdx b/docs 2/use-cases/ai_workflow_automation/twitterbot-mariadb-enterprise-server-skysql.mdx
similarity index 100%
rename from docs/use-cases/ai_workflow_automation/twitterbot-mariadb-enterprise-server-skysql.mdx
rename to docs 2/use-cases/ai_workflow_automation/twitterbot-mariadb-enterprise-server-skysql.mdx
diff --git a/docs/use-cases/automated_finetuning/data.csv b/docs 2/use-cases/automated_finetuning/data.csv
similarity index 100%
rename from docs/use-cases/automated_finetuning/data.csv
rename to docs 2/use-cases/automated_finetuning/data.csv
diff --git a/docs/use-cases/automated_finetuning/openai.mdx b/docs 2/use-cases/automated_finetuning/openai.mdx
similarity index 100%
rename from docs/use-cases/automated_finetuning/openai.mdx
rename to docs 2/use-cases/automated_finetuning/openai.mdx
diff --git a/docs/use-cases/automated_finetuning/overview.mdx b/docs 2/use-cases/automated_finetuning/overview.mdx
similarity index 100%
rename from docs/use-cases/automated_finetuning/overview.mdx
rename to docs 2/use-cases/automated_finetuning/overview.mdx
diff --git a/docs/use-cases/data_enrichment/hugging-face-examples.mdx b/docs 2/use-cases/data_enrichment/hugging-face-examples.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/hugging-face-examples.mdx
rename to docs 2/use-cases/data_enrichment/hugging-face-examples.mdx
diff --git a/docs/use-cases/data_enrichment/hugging-face-inference-api-examples.mdx b/docs 2/use-cases/data_enrichment/hugging-face-inference-api-examples.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/hugging-face-inference-api-examples.mdx
rename to docs 2/use-cases/data_enrichment/hugging-face-inference-api-examples.mdx
diff --git a/docs/use-cases/data_enrichment/image-generator.mdx b/docs 2/use-cases/data_enrichment/image-generator.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/image-generator.mdx
rename to docs 2/use-cases/data_enrichment/image-generator.mdx
diff --git a/docs/use-cases/data_enrichment/json-from-text.mdx b/docs 2/use-cases/data_enrichment/json-from-text.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/json-from-text.mdx
rename to docs 2/use-cases/data_enrichment/json-from-text.mdx
diff --git a/docs/use-cases/data_enrichment/overview.mdx b/docs 2/use-cases/data_enrichment/overview.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/overview.mdx
rename to docs 2/use-cases/data_enrichment/overview.mdx
diff --git a/docs/use-cases/data_enrichment/question-answering-inside-mongodb-with-openai.mdx b/docs 2/use-cases/data_enrichment/question-answering-inside-mongodb-with-openai.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/question-answering-inside-mongodb-with-openai.mdx
rename to docs 2/use-cases/data_enrichment/question-answering-inside-mongodb-with-openai.mdx
diff --git a/docs/use-cases/data_enrichment/question-answering-inside-mysql-with-openai.mdx b/docs 2/use-cases/data_enrichment/question-answering-inside-mysql-with-openai.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/question-answering-inside-mysql-with-openai.mdx
rename to docs 2/use-cases/data_enrichment/question-answering-inside-mysql-with-openai.mdx
diff --git a/docs/use-cases/data_enrichment/sentiment-analysis-inside-mongodb-with-openai.mdx b/docs 2/use-cases/data_enrichment/sentiment-analysis-inside-mongodb-with-openai.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/sentiment-analysis-inside-mongodb-with-openai.mdx
rename to docs 2/use-cases/data_enrichment/sentiment-analysis-inside-mongodb-with-openai.mdx
diff --git a/docs/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai.mdx b/docs 2/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai.mdx
rename to docs 2/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai.mdx
diff --git a/docs/use-cases/data_enrichment/text-sentiment-hf.mdx b/docs 2/use-cases/data_enrichment/text-sentiment-hf.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/text-sentiment-hf.mdx
rename to docs 2/use-cases/data_enrichment/text-sentiment-hf.mdx
diff --git a/docs/use-cases/data_enrichment/text-summarization-inside-mongodb-with-openai.mdx b/docs 2/use-cases/data_enrichment/text-summarization-inside-mongodb-with-openai.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/text-summarization-inside-mongodb-with-openai.mdx
rename to docs 2/use-cases/data_enrichment/text-summarization-inside-mongodb-with-openai.mdx
diff --git a/docs/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai.mdx b/docs 2/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai.mdx
similarity index 100%
rename from docs/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai.mdx
rename to docs 2/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai.mdx
diff --git a/docs/use-cases/in-database_ml/ai-tables.mdx b/docs 2/use-cases/in-database_ml/ai-tables.mdx
similarity index 100%
rename from docs/use-cases/in-database_ml/ai-tables.mdx
rename to docs 2/use-cases/in-database_ml/ai-tables.mdx
diff --git a/docs/use-cases/in-database_ml/byom.mdx b/docs 2/use-cases/in-database_ml/byom.mdx
similarity index 100%
rename from docs/use-cases/in-database_ml/byom.mdx
rename to docs 2/use-cases/in-database_ml/byom.mdx
diff --git a/docs/use-cases/in-database_ml/overview.mdx b/docs 2/use-cases/in-database_ml/overview.mdx
similarity index 100%
rename from docs/use-cases/in-database_ml/overview.mdx
rename to docs 2/use-cases/in-database_ml/overview.mdx
diff --git a/docs/use-cases/mcp_api/cursor.mdx b/docs 2/use-cases/mcp_api/cursor.mdx
similarity index 100%
rename from docs/use-cases/mcp_api/cursor.mdx
rename to docs 2/use-cases/mcp_api/cursor.mdx
diff --git a/docs/use-cases/mcp_api/mindsdb_mcp_server.mdx b/docs 2/use-cases/mcp_api/mindsdb_mcp_server.mdx
similarity index 100%
rename from docs/use-cases/mcp_api/mindsdb_mcp_server.mdx
rename to docs 2/use-cases/mcp_api/mindsdb_mcp_server.mdx
diff --git a/docs/use-cases/mcp_api/overview.mdx b/docs 2/use-cases/mcp_api/overview.mdx
similarity index 100%
rename from docs/use-cases/mcp_api/overview.mdx
rename to docs 2/use-cases/mcp_api/overview.mdx
diff --git a/docs/use-cases/predictive_analytics/overview.mdx b/docs 2/use-cases/predictive_analytics/overview.mdx
similarity index 86%
rename from docs/use-cases/predictive_analytics/overview.mdx
rename to docs 2/use-cases/predictive_analytics/overview.mdx
index 91c0b0c9b43..7e210a07769 100644
--- a/docs/use-cases/predictive_analytics/overview.mdx
+++ b/docs 2/use-cases/predictive_analytics/overview.mdx
@@ -21,7 +21,6 @@ Available tutorials:
-
diff --git a/docs/assets/BearHeroImageMindsDB.jpeg b/docs/assets/BearHeroImageMindsDB.jpeg
deleted file mode 100644
index 0e017c94693..00000000000
Binary files a/docs/assets/BearHeroImageMindsDB.jpeg and /dev/null differ
diff --git a/docs/assets/MindsDBLightwood@3x.png b/docs/assets/MindsDBLightwood@3x.png
deleted file mode 100644
index 74c7d8ca2ec..00000000000
Binary files a/docs/assets/MindsDBLightwood@3x.png and /dev/null differ
diff --git a/docs/assets/TWbot - hero Snoopstein.png b/docs/assets/TWbot - hero Snoopstein.png
deleted file mode 100644
index 476c9101759..00000000000
Binary files a/docs/assets/TWbot - hero Snoopstein.png and /dev/null differ
diff --git a/docs/assets/TWbot-response-image.png b/docs/assets/TWbot-response-image.png
deleted file mode 100644
index 179df1ad4ce..00000000000
Binary files a/docs/assets/TWbot-response-image.png and /dev/null differ
diff --git a/docs/assets/TWbot-response1.png b/docs/assets/TWbot-response1.png
deleted file mode 100644
index 8322450a5c5..00000000000
Binary files a/docs/assets/TWbot-response1.png and /dev/null differ
diff --git a/docs/assets/TWbot-response2.png b/docs/assets/TWbot-response2.png
deleted file mode 100644
index ec2ed3f9d1b..00000000000
Binary files a/docs/assets/TWbot-response2.png and /dev/null differ
diff --git a/docs/assets/TWbot-response3.png b/docs/assets/TWbot-response3.png
deleted file mode 100644
index 9e988bb273b..00000000000
Binary files a/docs/assets/TWbot-response3.png and /dev/null differ
diff --git a/docs/assets/TWbot-response4.png b/docs/assets/TWbot-response4.png
deleted file mode 100644
index 21effa5798a..00000000000
Binary files a/docs/assets/TWbot-response4.png and /dev/null differ
diff --git a/docs/assets/TWbot-response5.png b/docs/assets/TWbot-response5.png
deleted file mode 100644
index 6a868c65550..00000000000
Binary files a/docs/assets/TWbot-response5.png and /dev/null differ
diff --git a/docs/assets/a2a-unavailable.png b/docs/assets/a2a-unavailable.png
deleted file mode 100644
index 20ede064b14..00000000000
Binary files a/docs/assets/a2a-unavailable.png and /dev/null differ
diff --git a/docs/assets/agent_diagram.png b/docs/assets/agent_diagram.png
deleted file mode 100644
index 56e7689bada..00000000000
Binary files a/docs/assets/agent_diagram.png and /dev/null differ
diff --git a/docs/assets/ai_system_deployment.png b/docs/assets/ai_system_deployment.png
deleted file mode 100644
index 720384986c1..00000000000
Binary files a/docs/assets/ai_system_deployment.png and /dev/null differ
diff --git a/docs/assets/ai_workflow_automation.png b/docs/assets/ai_workflow_automation.png
deleted file mode 100644
index 72184ecc384..00000000000
Binary files a/docs/assets/ai_workflow_automation.png and /dev/null differ
diff --git a/docs/assets/cloud-login.png b/docs/assets/cloud-login.png
deleted file mode 100644
index 32e39f0437e..00000000000
Binary files a/docs/assets/cloud-login.png and /dev/null differ
diff --git a/docs/assets/cloud-signup.png b/docs/assets/cloud-signup.png
deleted file mode 100644
index 50673620fa0..00000000000
Binary files a/docs/assets/cloud-signup.png and /dev/null differ
diff --git a/docs/assets/cloud/cloud-signup-filledout.png b/docs/assets/cloud/cloud-signup-filledout.png
deleted file mode 100644
index 91175fb2146..00000000000
Binary files a/docs/assets/cloud/cloud-signup-filledout.png and /dev/null differ
diff --git a/docs/assets/cloud/dedicated_instance_off.png b/docs/assets/cloud/dedicated_instance_off.png
deleted file mode 100644
index 25930e0dedc..00000000000
Binary files a/docs/assets/cloud/dedicated_instance_off.png and /dev/null differ
diff --git a/docs/assets/cloud/dedicated_instance_on.png b/docs/assets/cloud/dedicated_instance_on.png
deleted file mode 100644
index 8c3d7dc6751..00000000000
Binary files a/docs/assets/cloud/dedicated_instance_on.png and /dev/null differ
diff --git a/docs/assets/cloud/email.png b/docs/assets/cloud/email.png
deleted file mode 100644
index 781bb2e7b1b..00000000000
Binary files a/docs/assets/cloud/email.png and /dev/null differ
diff --git a/docs/assets/cloud/gui.png b/docs/assets/cloud/gui.png
deleted file mode 100644
index ba3c270db21..00000000000
Binary files a/docs/assets/cloud/gui.png and /dev/null differ
diff --git a/docs/assets/cloud/import_file.png b/docs/assets/cloud/import_file.png
deleted file mode 100644
index 24f0332896e..00000000000
Binary files a/docs/assets/cloud/import_file.png and /dev/null differ
diff --git a/docs/assets/cloud/import_file_2.png b/docs/assets/cloud/import_file_2.png
deleted file mode 100644
index 2219fb87387..00000000000
Binary files a/docs/assets/cloud/import_file_2.png and /dev/null differ
diff --git a/docs/assets/cloud/login.png b/docs/assets/cloud/login.png
deleted file mode 100644
index bdd8c788657..00000000000
Binary files a/docs/assets/cloud/login.png and /dev/null differ
diff --git a/docs/assets/cloud/plan_table.png b/docs/assets/cloud/plan_table.png
deleted file mode 100644
index a5b2db3206c..00000000000
Binary files a/docs/assets/cloud/plan_table.png and /dev/null differ
diff --git a/docs/assets/connect_compass_cloud.png b/docs/assets/connect_compass_cloud.png
deleted file mode 100644
index 7cd7304ac6f..00000000000
Binary files a/docs/assets/connect_compass_cloud.png and /dev/null differ
diff --git a/docs/assets/connect_compass_srv.png b/docs/assets/connect_compass_srv.png
deleted file mode 100644
index f1107f229b6..00000000000
Binary files a/docs/assets/connect_compass_srv.png and /dev/null differ
diff --git a/docs/assets/connect_compassm.png b/docs/assets/connect_compassm.png
deleted file mode 100644
index cb411e0bdde..00000000000
Binary files a/docs/assets/connect_compassm.png and /dev/null differ
diff --git a/docs/assets/connect_mongo_compass.png b/docs/assets/connect_mongo_compass.png
deleted file mode 100644
index 4871a35bd44..00000000000
Binary files a/docs/assets/connect_mongo_compass.png and /dev/null differ
diff --git a/docs/assets/connect_mongo_compass_1.png b/docs/assets/connect_mongo_compass_1.png
deleted file mode 100644
index 4d0bdfd61d0..00000000000
Binary files a/docs/assets/connect_mongo_compass_1.png and /dev/null differ
diff --git a/docs/assets/connect_mongo_compass_2.png b/docs/assets/connect_mongo_compass_2.png
deleted file mode 100644
index 3168594878f..00000000000
Binary files a/docs/assets/connect_mongo_compass_2.png and /dev/null differ
diff --git a/docs/assets/connect_mongo_compass_3.png b/docs/assets/connect_mongo_compass_3.png
deleted file mode 100644
index c2dbd33da42..00000000000
Binary files a/docs/assets/connect_mongo_compass_3.png and /dev/null differ
diff --git a/docs/assets/connect_mongo_shell.png b/docs/assets/connect_mongo_shell.png
deleted file mode 100644
index 3a75a60332e..00000000000
Binary files a/docs/assets/connect_mongo_shell.png and /dev/null differ
diff --git a/docs/assets/connect_mongo_shell_1.png b/docs/assets/connect_mongo_shell_1.png
deleted file mode 100644
index c954afc41eb..00000000000
Binary files a/docs/assets/connect_mongo_shell_1.png and /dev/null differ
diff --git a/docs/assets/connect_mongo_shell_2.png b/docs/assets/connect_mongo_shell_2.png
deleted file mode 100644
index 76908fa0a80..00000000000
Binary files a/docs/assets/connect_mongo_shell_2.png and /dev/null differ
diff --git a/docs/assets/contribute.png b/docs/assets/contribute.png
deleted file mode 100644
index adbceacb572..00000000000
Binary files a/docs/assets/contribute.png and /dev/null differ
diff --git a/docs/assets/data/mssql-select.gif b/docs/assets/data/mssql-select.gif
deleted file mode 100644
index 759755c4983..00000000000
Binary files a/docs/assets/data/mssql-select.gif and /dev/null differ
diff --git a/docs/assets/databases/mdb-mysql.png b/docs/assets/databases/mdb-mysql.png
deleted file mode 100644
index 640138effcf..00000000000
Binary files a/docs/assets/databases/mdb-mysql.png and /dev/null differ
diff --git a/docs/assets/databases/mdb-postgres.png b/docs/assets/databases/mdb-postgres.png
deleted file mode 100644
index 7bf055cb840..00000000000
Binary files a/docs/assets/databases/mdb-postgres.png and /dev/null differ
diff --git a/docs/assets/databases/mongodb/mongo-mdb-code.png b/docs/assets/databases/mongodb/mongo-mdb-code.png
deleted file mode 100644
index a3ec12afc94..00000000000
Binary files a/docs/assets/databases/mongodb/mongo-mdb-code.png and /dev/null differ
diff --git a/docs/assets/databases/mongodb/mongo-mdb-current.png b/docs/assets/databases/mongodb/mongo-mdb-current.png
deleted file mode 100644
index 6910119bb0b..00000000000
Binary files a/docs/assets/databases/mongodb/mongo-mdb-current.png and /dev/null differ
diff --git a/docs/assets/databases/mongodb/mongo-mdb.png b/docs/assets/databases/mongodb/mongo-mdb.png
deleted file mode 100644
index 4d3f6043800..00000000000
Binary files a/docs/assets/databases/mongodb/mongo-mdb.png and /dev/null differ
diff --git a/docs/assets/dbeaver-check-predictor-status.png b/docs/assets/dbeaver-check-predictor-status.png
deleted file mode 100644
index 6904f13dd14..00000000000
Binary files a/docs/assets/dbeaver-check-predictor-status.png and /dev/null differ
diff --git a/docs/assets/dbeaver-configure-cloud-connection.png b/docs/assets/dbeaver-configure-cloud-connection.png
deleted file mode 100644
index 51ad1f2c8b6..00000000000
Binary files a/docs/assets/dbeaver-configure-cloud-connection.png and /dev/null differ
diff --git a/docs/assets/dbeaver-configure-docker-connection.png b/docs/assets/dbeaver-configure-docker-connection.png
deleted file mode 100644
index 434da45f9d0..00000000000
Binary files a/docs/assets/dbeaver-configure-docker-connection.png and /dev/null differ
diff --git a/docs/assets/dbeaver-create-connection.png b/docs/assets/dbeaver-create-connection.png
deleted file mode 100644
index 6d22deca2c8..00000000000
Binary files a/docs/assets/dbeaver-create-connection.png and /dev/null differ
diff --git a/docs/assets/dbeaver-create-database.png b/docs/assets/dbeaver-create-database.png
deleted file mode 100644
index a742e832bad..00000000000
Binary files a/docs/assets/dbeaver-create-database.png and /dev/null differ
diff --git a/docs/assets/dbeaver-create-predictor-simple.png b/docs/assets/dbeaver-create-predictor-simple.png
deleted file mode 100644
index b17cdad392b..00000000000
Binary files a/docs/assets/dbeaver-create-predictor-simple.png and /dev/null differ
diff --git a/docs/assets/dbeaver-create-script.png b/docs/assets/dbeaver-create-script.png
deleted file mode 100644
index ea4f0447bcf..00000000000
Binary files a/docs/assets/dbeaver-create-script.png and /dev/null differ
diff --git a/docs/assets/dbeaver-empty-script.png b/docs/assets/dbeaver-empty-script.png
deleted file mode 100644
index bb984b71c1c..00000000000
Binary files a/docs/assets/dbeaver-empty-script.png and /dev/null differ
diff --git a/docs/assets/dbeaver-home-rentals-prediction-results.png b/docs/assets/dbeaver-home-rentals-prediction-results.png
deleted file mode 100644
index 5c0032063df..00000000000
Binary files a/docs/assets/dbeaver-home-rentals-prediction-results.png and /dev/null differ
diff --git a/docs/assets/dbeaver-home-rentals-prediction.png b/docs/assets/dbeaver-home-rentals-prediction.png
deleted file mode 100644
index 0f7215e9723..00000000000
Binary files a/docs/assets/dbeaver-home-rentals-prediction.png and /dev/null differ
diff --git a/docs/assets/dbeaver-predict-home-rentals.png b/docs/assets/dbeaver-predict-home-rentals.png
deleted file mode 100644
index d4d35eea7b5..00000000000
Binary files a/docs/assets/dbeaver-predict-home-rentals.png and /dev/null differ
diff --git a/docs/assets/dbeaver-preview-data.png b/docs/assets/dbeaver-preview-data.png
deleted file mode 100644
index 31794e39954..00000000000
Binary files a/docs/assets/dbeaver-preview-data.png and /dev/null differ
diff --git a/docs/assets/getting-started.png b/docs/assets/getting-started.png
deleted file mode 100644
index 168ff02e040..00000000000
Binary files a/docs/assets/getting-started.png and /dev/null differ
diff --git a/docs/assets/icons/Cloud.svg b/docs/assets/icons/Cloud.svg
deleted file mode 100644
index 082345353ab..00000000000
--- a/docs/assets/icons/Cloud.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
\ No newline at end of file
diff --git a/docs/assets/icons/Database.svg b/docs/assets/icons/Database.svg
deleted file mode 100644
index 25326bf3c90..00000000000
--- a/docs/assets/icons/Database.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
\ No newline at end of file
diff --git a/docs/assets/icons/Explainable.svg b/docs/assets/icons/Explainable.svg
deleted file mode 100644
index 96514672a8f..00000000000
--- a/docs/assets/icons/Explainable.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
\ No newline at end of file
diff --git a/docs/assets/icons/GUI.svg b/docs/assets/icons/GUI.svg
deleted file mode 100644
index ea99f55e989..00000000000
--- a/docs/assets/icons/GUI.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
\ No newline at end of file
diff --git a/docs/assets/icons/Python.svg b/docs/assets/icons/Python.svg
deleted file mode 100644
index 6ac826a7654..00000000000
--- a/docs/assets/icons/Python.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
\ No newline at end of file
diff --git a/docs/assets/icons/Server.svg b/docs/assets/icons/Server.svg
deleted file mode 100644
index 94f40f4adbd..00000000000
--- a/docs/assets/icons/Server.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
\ No newline at end of file
diff --git a/docs/assets/icons/sdk.svg b/docs/assets/icons/sdk.svg
deleted file mode 100644
index 25cdebf4f30..00000000000
--- a/docs/assets/icons/sdk.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
\ No newline at end of file
diff --git a/docs/assets/info/query.png b/docs/assets/info/query.png
deleted file mode 100644
index 48301f93d97..00000000000
Binary files a/docs/assets/info/query.png and /dev/null differ
diff --git a/docs/assets/info/select.png b/docs/assets/info/select.png
deleted file mode 100644
index 731cf3e3257..00000000000
Binary files a/docs/assets/info/select.png and /dev/null differ
diff --git a/docs/assets/install-dependencies-gui-x.png b/docs/assets/install-dependencies-gui-x.png
deleted file mode 100644
index a4afc61191c..00000000000
Binary files a/docs/assets/install-dependencies-gui-x.png and /dev/null differ
diff --git a/docs/assets/integration-image.png b/docs/assets/integration-image.png
deleted file mode 100644
index 20a2feb7ff8..00000000000
Binary files a/docs/assets/integration-image.png and /dev/null differ
diff --git a/docs/assets/lightwood.png b/docs/assets/lightwood.png
deleted file mode 100644
index d55f5c7fe5e..00000000000
Binary files a/docs/assets/lightwood.png and /dev/null differ
diff --git a/docs/assets/mdb_image.png b/docs/assets/mdb_image.png
deleted file mode 100644
index 84a7720081f..00000000000
Binary files a/docs/assets/mdb_image.png and /dev/null differ
diff --git a/docs/assets/mdb_logo.png b/docs/assets/mdb_logo.png
deleted file mode 100755
index 7e1ee76aebd..00000000000
Binary files a/docs/assets/mdb_logo.png and /dev/null differ
diff --git a/docs/assets/mdb_logo_name.png b/docs/assets/mdb_logo_name.png
deleted file mode 100755
index 88374501af6..00000000000
Binary files a/docs/assets/mdb_logo_name.png and /dev/null differ
diff --git a/docs/assets/mdb_logo_w.svg b/docs/assets/mdb_logo_w.svg
deleted file mode 100644
index cd35d4eddab..00000000000
--- a/docs/assets/mdb_logo_w.svg
+++ /dev/null
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/docs/assets/mindsdb-local-editor.png b/docs/assets/mindsdb-local-editor.png
deleted file mode 100644
index 0aa8d85e0f5..00000000000
Binary files a/docs/assets/mindsdb-local-editor.png and /dev/null differ
diff --git a/docs/assets/mindsdb_gui_editor/learning_hub.png b/docs/assets/mindsdb_gui_editor/learning_hub.png
deleted file mode 100644
index ecb700afafd..00000000000
Binary files a/docs/assets/mindsdb_gui_editor/learning_hub.png and /dev/null differ
diff --git a/docs/assets/mindsdb_homepage_diagram.png b/docs/assets/mindsdb_homepage_diagram.png
deleted file mode 100644
index c77fc5b623f..00000000000
Binary files a/docs/assets/mindsdb_homepage_diagram.png and /dev/null differ
diff --git a/docs/assets/mindsdb_logo.png b/docs/assets/mindsdb_logo.png
deleted file mode 100644
index 2dc5824d871..00000000000
Binary files a/docs/assets/mindsdb_logo.png and /dev/null differ
diff --git a/docs/assets/predictors/clickhouse-insert.gif b/docs/assets/predictors/clickhouse-insert.gif
deleted file mode 100644
index b1161582f69..00000000000
Binary files a/docs/assets/predictors/clickhouse-insert.gif and /dev/null differ
diff --git a/docs/assets/predictors/clickhouse-query.gif b/docs/assets/predictors/clickhouse-query.gif
deleted file mode 100644
index 76d231b4a2a..00000000000
Binary files a/docs/assets/predictors/clickhouse-query.gif and /dev/null differ
diff --git a/docs/assets/predictors/column-importance.png b/docs/assets/predictors/column-importance.png
deleted file mode 100644
index 565d0fde145..00000000000
Binary files a/docs/assets/predictors/column-importance.png and /dev/null differ
diff --git a/docs/assets/predictors/mariadb-insert.gif b/docs/assets/predictors/mariadb-insert.gif
deleted file mode 100644
index 795d61c6f61..00000000000
Binary files a/docs/assets/predictors/mariadb-insert.gif and /dev/null differ
diff --git a/docs/assets/predictors/mariadb-status.gif b/docs/assets/predictors/mariadb-status.gif
deleted file mode 100644
index 98be86a25c8..00000000000
Binary files a/docs/assets/predictors/mariadb-status.gif and /dev/null differ
diff --git a/docs/assets/predictors/mongo/mongo-insert.gif b/docs/assets/predictors/mongo/mongo-insert.gif
deleted file mode 100644
index 39d36ca5be3..00000000000
Binary files a/docs/assets/predictors/mongo/mongo-insert.gif and /dev/null differ
diff --git a/docs/assets/predictors/mssql-status.gif b/docs/assets/predictors/mssql-status.gif
deleted file mode 100644
index c7e617b7923..00000000000
Binary files a/docs/assets/predictors/mssql-status.gif and /dev/null differ
diff --git a/docs/assets/predictors/mysql-insert.gif b/docs/assets/predictors/mysql-insert.gif
deleted file mode 100644
index 2eeb0bd0f7e..00000000000
Binary files a/docs/assets/predictors/mysql-insert.gif and /dev/null differ
diff --git a/docs/assets/predictors/mysql-query.gif b/docs/assets/predictors/mysql-query.gif
deleted file mode 100644
index 611f2fd76b3..00000000000
Binary files a/docs/assets/predictors/mysql-query.gif and /dev/null differ
diff --git a/docs/assets/predictors/mysql-status.gif b/docs/assets/predictors/mysql-status.gif
deleted file mode 100644
index 96ee51fc6db..00000000000
Binary files a/docs/assets/predictors/mysql-status.gif and /dev/null differ
diff --git a/docs/assets/predictors/postgresql-insert.gif b/docs/assets/predictors/postgresql-insert.gif
deleted file mode 100644
index 08ed88afe93..00000000000
Binary files a/docs/assets/predictors/postgresql-insert.gif and /dev/null differ
diff --git a/docs/assets/predictors/postgresql-status.gif b/docs/assets/predictors/postgresql-status.gif
deleted file mode 100644
index 8fe657ac8f2..00000000000
Binary files a/docs/assets/predictors/postgresql-status.gif and /dev/null differ
diff --git a/docs/assets/predictors/train-advanced.gif b/docs/assets/predictors/train-advanced.gif
deleted file mode 100644
index 2bed2542a40..00000000000
Binary files a/docs/assets/predictors/train-advanced.gif and /dev/null differ
diff --git a/docs/assets/predictors/train-timeseries.gif b/docs/assets/predictors/train-timeseries.gif
deleted file mode 100644
index 891ccf8d944..00000000000
Binary files a/docs/assets/predictors/train-timeseries.gif and /dev/null differ
diff --git a/docs/assets/report-issue.gif b/docs/assets/report-issue.gif
deleted file mode 100644
index f2f770bfa5c..00000000000
Binary files a/docs/assets/report-issue.gif and /dev/null differ
diff --git a/docs/assets/report_issues/1_issue_types.png b/docs/assets/report_issues/1_issue_types.png
deleted file mode 100644
index 35f5b80f559..00000000000
Binary files a/docs/assets/report_issues/1_issue_types.png and /dev/null differ
diff --git a/docs/assets/report_issues/1_reporting_new_issue.png b/docs/assets/report_issues/1_reporting_new_issue.png
deleted file mode 100644
index 331a1ba3148..00000000000
Binary files a/docs/assets/report_issues/1_reporting_new_issue.png and /dev/null differ
diff --git a/docs/assets/report_issues/2_bug_report.png b/docs/assets/report_issues/2_bug_report.png
deleted file mode 100644
index 23b8ad0454c..00000000000
Binary files a/docs/assets/report_issues/2_bug_report.png and /dev/null differ
diff --git a/docs/assets/report_issues/2_bug_report_form_1.png b/docs/assets/report_issues/2_bug_report_form_1.png
deleted file mode 100644
index b5b9a21fa75..00000000000
Binary files a/docs/assets/report_issues/2_bug_report_form_1.png and /dev/null differ
diff --git a/docs/assets/report_issues/2_bug_report_form_2.png b/docs/assets/report_issues/2_bug_report_form_2.png
deleted file mode 100644
index ad9790049b2..00000000000
Binary files a/docs/assets/report_issues/2_bug_report_form_2.png and /dev/null differ
diff --git a/docs/assets/report_issues/2_bug_report_form_3.png b/docs/assets/report_issues/2_bug_report_form_3.png
deleted file mode 100644
index 866dab3ed89..00000000000
Binary files a/docs/assets/report_issues/2_bug_report_form_3.png and /dev/null differ
diff --git a/docs/assets/report_issues/2_bug_report_form_4.png b/docs/assets/report_issues/2_bug_report_form_4.png
deleted file mode 100644
index 090879b2724..00000000000
Binary files a/docs/assets/report_issues/2_bug_report_form_4.png and /dev/null differ
diff --git a/docs/assets/report_issues/2_bug_report_form_5.png b/docs/assets/report_issues/2_bug_report_form_5.png
deleted file mode 100644
index 9522dafaa46..00000000000
Binary files a/docs/assets/report_issues/2_bug_report_form_5.png and /dev/null differ
diff --git a/docs/assets/report_issues/3_feature_request.png b/docs/assets/report_issues/3_feature_request.png
deleted file mode 100644
index a40bf424f17..00000000000
Binary files a/docs/assets/report_issues/3_feature_request.png and /dev/null differ
diff --git a/docs/assets/report_issues/3_feature_request_form_1.png b/docs/assets/report_issues/3_feature_request_form_1.png
deleted file mode 100644
index 72b959d883c..00000000000
Binary files a/docs/assets/report_issues/3_feature_request_form_1.png and /dev/null differ
diff --git a/docs/assets/report_issues/3_feature_request_form_2.png b/docs/assets/report_issues/3_feature_request_form_2.png
deleted file mode 100644
index 7798e3113f7..00000000000
Binary files a/docs/assets/report_issues/3_feature_request_form_2.png and /dev/null differ
diff --git a/docs/assets/report_issues/3_feature_request_form_3.png b/docs/assets/report_issues/3_feature_request_form_3.png
deleted file mode 100644
index 0c328662c15..00000000000
Binary files a/docs/assets/report_issues/3_feature_request_form_3.png and /dev/null differ
diff --git a/docs/assets/report_issues/3_feature_request_form_4.png b/docs/assets/report_issues/3_feature_request_form_4.png
deleted file mode 100644
index dcf29dec23d..00000000000
Binary files a/docs/assets/report_issues/3_feature_request_form_4.png and /dev/null differ
diff --git a/docs/assets/report_issues/4_improve_docs.png b/docs/assets/report_issues/4_improve_docs.png
deleted file mode 100644
index 6d4c6bd2a4b..00000000000
Binary files a/docs/assets/report_issues/4_improve_docs.png and /dev/null differ
diff --git a/docs/assets/report_issues/4_improve_docs_form_1.png b/docs/assets/report_issues/4_improve_docs_form_1.png
deleted file mode 100644
index 34555ee09c4..00000000000
Binary files a/docs/assets/report_issues/4_improve_docs_form_1.png and /dev/null differ
diff --git a/docs/assets/report_issues/4_improve_docs_form_2.png b/docs/assets/report_issues/4_improve_docs_form_2.png
deleted file mode 100644
index 6c74fc78e26..00000000000
Binary files a/docs/assets/report_issues/4_improve_docs_form_2.png and /dev/null differ
diff --git a/docs/assets/report_issues/4_improve_docs_form_3.png b/docs/assets/report_issues/4_improve_docs_form_3.png
deleted file mode 100644
index 3ad7ad6bf45..00000000000
Binary files a/docs/assets/report_issues/4_improve_docs_form_3.png and /dev/null differ
diff --git a/docs/assets/report_issues/5_new_integration.png b/docs/assets/report_issues/5_new_integration.png
deleted file mode 100644
index 76d538124b4..00000000000
Binary files a/docs/assets/report_issues/5_new_integration.png and /dev/null differ
diff --git a/docs/assets/report_issues/5_new_integration_form_1.png b/docs/assets/report_issues/5_new_integration_form_1.png
deleted file mode 100644
index faff515679c..00000000000
Binary files a/docs/assets/report_issues/5_new_integration_form_1.png and /dev/null differ
diff --git a/docs/assets/report_issues/5_new_integration_form_2.png b/docs/assets/report_issues/5_new_integration_form_2.png
deleted file mode 100644
index 10b02645877..00000000000
Binary files a/docs/assets/report_issues/5_new_integration_form_2.png and /dev/null differ
diff --git a/docs/assets/report_issues/5_new_integration_form_3.png b/docs/assets/report_issues/5_new_integration_form_3.png
deleted file mode 100644
index 7ef021fb9cb..00000000000
Binary files a/docs/assets/report_issues/5_new_integration_form_3.png and /dev/null differ
diff --git a/docs/assets/report_issues/5_new_integration_form_4.png b/docs/assets/report_issues/5_new_integration_form_4.png
deleted file mode 100644
index 64afecdb6db..00000000000
Binary files a/docs/assets/report_issues/5_new_integration_form_4.png and /dev/null differ
diff --git a/docs/assets/report_issues/5_new_integration_form_5.png b/docs/assets/report_issues/5_new_integration_form_5.png
deleted file mode 100644
index e278788c71b..00000000000
Binary files a/docs/assets/report_issues/5_new_integration_form_5.png and /dev/null differ
diff --git a/docs/assets/report_issues/5_new_integration_form_6.png b/docs/assets/report_issues/5_new_integration_form_6.png
deleted file mode 100644
index a9bed7296e7..00000000000
Binary files a/docs/assets/report_issues/5_new_integration_form_6.png and /dev/null differ
diff --git a/docs/assets/report_issues/6_security_vulnerability.png b/docs/assets/report_issues/6_security_vulnerability.png
deleted file mode 100644
index 8b31fcba775..00000000000
Binary files a/docs/assets/report_issues/6_security_vulnerability.png and /dev/null differ
diff --git a/docs/assets/report_issues/6_security_vulnerability_form_1.png b/docs/assets/report_issues/6_security_vulnerability_form_1.png
deleted file mode 100644
index ffb45993174..00000000000
Binary files a/docs/assets/report_issues/6_security_vulnerability_form_1.png and /dev/null differ
diff --git a/docs/assets/report_issues/6_security_vulnerability_form_2.png b/docs/assets/report_issues/6_security_vulnerability_form_2.png
deleted file mode 100644
index e8305dd7a72..00000000000
Binary files a/docs/assets/report_issues/6_security_vulnerability_form_2.png and /dev/null differ
diff --git a/docs/assets/report_issues/6_security_vulnerability_form_3.png b/docs/assets/report_issues/6_security_vulnerability_form_3.png
deleted file mode 100644
index 1339a145e54..00000000000
Binary files a/docs/assets/report_issues/6_security_vulnerability_form_3.png and /dev/null differ
diff --git a/docs/assets/report_issues/6_security_vulnerability_form_4.png b/docs/assets/report_issues/6_security_vulnerability_form_4.png
deleted file mode 100644
index 45efe6f28be..00000000000
Binary files a/docs/assets/report_issues/6_security_vulnerability_form_4.png and /dev/null differ
diff --git a/docs/assets/report_issues/6_security_vulnerability_form_5.png b/docs/assets/report_issues/6_security_vulnerability_form_5.png
deleted file mode 100644
index c594a715d9c..00000000000
Binary files a/docs/assets/report_issues/6_security_vulnerability_form_5.png and /dev/null differ
diff --git a/docs/assets/sentiment_analysis_diagram.png b/docs/assets/sentiment_analysis_diagram.png
deleted file mode 100644
index d163a6843da..00000000000
Binary files a/docs/assets/sentiment_analysis_diagram.png and /dev/null differ
diff --git a/docs/assets/sql/add-file-data.png b/docs/assets/sql/add-file-data.png
deleted file mode 100644
index d5ff62b99c1..00000000000
Binary files a/docs/assets/sql/add-file-data.png and /dev/null differ
diff --git a/docs/assets/sql/analytics_shift.png b/docs/assets/sql/analytics_shift.png
deleted file mode 100644
index 8ee0d7526d2..00000000000
Binary files a/docs/assets/sql/analytics_shift.png and /dev/null differ
diff --git a/docs/assets/sql/connectcloud.png b/docs/assets/sql/connectcloud.png
deleted file mode 100644
index da88ef566ad..00000000000
Binary files a/docs/assets/sql/connectcloud.png and /dev/null differ
diff --git a/docs/assets/sql/connectdb.png b/docs/assets/sql/connectdb.png
deleted file mode 100644
index a76dd4cd10a..00000000000
Binary files a/docs/assets/sql/connectdb.png and /dev/null differ
diff --git a/docs/assets/sql/datasource.gif b/docs/assets/sql/datasource.gif
deleted file mode 100644
index fef984cadfa..00000000000
Binary files a/docs/assets/sql/datasource.gif and /dev/null differ
diff --git a/docs/assets/sql/datasource_listing.png b/docs/assets/sql/datasource_listing.png
deleted file mode 100644
index f54dbd61be6..00000000000
Binary files a/docs/assets/sql/datasource_listing.png and /dev/null differ
diff --git a/docs/assets/sql/dbeaver-local.png b/docs/assets/sql/dbeaver-local.png
deleted file mode 100644
index 1200e1586d7..00000000000
Binary files a/docs/assets/sql/dbeaver-local.png and /dev/null differ
diff --git a/docs/assets/sql/dbeaver8.png b/docs/assets/sql/dbeaver8.png
deleted file mode 100644
index ccc23fa0b37..00000000000
Binary files a/docs/assets/sql/dbeaver8.png and /dev/null differ
diff --git a/docs/assets/sql/drop.png b/docs/assets/sql/drop.png
deleted file mode 100644
index 2b9cbd213cb..00000000000
Binary files a/docs/assets/sql/drop.png and /dev/null differ
diff --git a/docs/assets/sql/file.png b/docs/assets/sql/file.png
deleted file mode 100644
index 2cbaa03f960..00000000000
Binary files a/docs/assets/sql/file.png and /dev/null differ
diff --git a/docs/assets/sql/machine_learning_lifecycle.png b/docs/assets/sql/machine_learning_lifecycle.png
deleted file mode 100644
index f52f642decf..00000000000
Binary files a/docs/assets/sql/machine_learning_lifecycle.png and /dev/null differ
diff --git a/docs/assets/sql/mysql-client.gif b/docs/assets/sql/mysql-client.gif
deleted file mode 100644
index 3bbd821221f..00000000000
Binary files a/docs/assets/sql/mysql-client.gif and /dev/null differ
diff --git a/docs/assets/sql/select.png b/docs/assets/sql/select.png
deleted file mode 100644
index e580d64c835..00000000000
Binary files a/docs/assets/sql/select.png and /dev/null differ
diff --git a/docs/assets/sql/select_bulk.png b/docs/assets/sql/select_bulk.png
deleted file mode 100644
index 0ba44b132f3..00000000000
Binary files a/docs/assets/sql/select_bulk.png and /dev/null differ
diff --git a/docs/assets/sql/select_file.png b/docs/assets/sql/select_file.png
deleted file mode 100644
index 306b84b67b2..00000000000
Binary files a/docs/assets/sql/select_file.png and /dev/null differ
diff --git a/docs/assets/sql/select_hr.png b/docs/assets/sql/select_hr.png
deleted file mode 100644
index febe5cf5249..00000000000
Binary files a/docs/assets/sql/select_hr.png and /dev/null differ
diff --git a/docs/assets/sql/select_hra.png b/docs/assets/sql/select_hra.png
deleted file mode 100644
index b3024153e21..00000000000
Binary files a/docs/assets/sql/select_hra.png and /dev/null differ
diff --git a/docs/assets/sql/show.png b/docs/assets/sql/show.png
deleted file mode 100644
index 9e3c9f089c8..00000000000
Binary files a/docs/assets/sql/show.png and /dev/null differ
diff --git a/docs/assets/sql/status.png b/docs/assets/sql/status.png
deleted file mode 100644
index e4a4e846d05..00000000000
Binary files a/docs/assets/sql/status.png and /dev/null differ
diff --git a/docs/assets/sql/test_connection_dbeaver.png b/docs/assets/sql/test_connection_dbeaver.png
deleted file mode 100644
index b53e1d25367..00000000000
Binary files a/docs/assets/sql/test_connection_dbeaver.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/Mushrooms/Selection_004.png b/docs/assets/sql/tutorials/Mushrooms/Selection_004.png
deleted file mode 100644
index 60765f6734e..00000000000
Binary files a/docs/assets/sql/tutorials/Mushrooms/Selection_004.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/Mushrooms/create.png b/docs/assets/sql/tutorials/Mushrooms/create.png
deleted file mode 100644
index a58fb5170fa..00000000000
Binary files a/docs/assets/sql/tutorials/Mushrooms/create.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/Mushrooms/database.png b/docs/assets/sql/tutorials/Mushrooms/database.png
deleted file mode 100644
index da64041148c..00000000000
Binary files a/docs/assets/sql/tutorials/Mushrooms/database.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/Mushrooms/dbintegration.png b/docs/assets/sql/tutorials/Mushrooms/dbintegration.png
deleted file mode 100644
index 5011c8b514b..00000000000
Binary files a/docs/assets/sql/tutorials/Mushrooms/dbintegration.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/Mushrooms/mushroomsselect.png b/docs/assets/sql/tutorials/Mushrooms/mushroomsselect.png
deleted file mode 100644
index 63462cfd043..00000000000
Binary files a/docs/assets/sql/tutorials/Mushrooms/mushroomsselect.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/Mushrooms/prediction.png b/docs/assets/sql/tutorials/Mushrooms/prediction.png
deleted file mode 100644
index 4077748ab11..00000000000
Binary files a/docs/assets/sql/tutorials/Mushrooms/prediction.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/Mushrooms/statuscheck.png b/docs/assets/sql/tutorials/Mushrooms/statuscheck.png
deleted file mode 100644
index a9225ed4139..00000000000
Binary files a/docs/assets/sql/tutorials/Mushrooms/statuscheck.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/bodyfat/connect-database.png b/docs/assets/sql/tutorials/bodyfat/connect-database.png
deleted file mode 100644
index 1f4bc04e0c7..00000000000
Binary files a/docs/assets/sql/tutorials/bodyfat/connect-database.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/bodyfat/create.png b/docs/assets/sql/tutorials/bodyfat/create.png
deleted file mode 100644
index 1c29ba80331..00000000000
Binary files a/docs/assets/sql/tutorials/bodyfat/create.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/bodyfat/database-connected.png b/docs/assets/sql/tutorials/bodyfat/database-connected.png
deleted file mode 100644
index 7de3cbfa5f0..00000000000
Binary files a/docs/assets/sql/tutorials/bodyfat/database-connected.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/bodyfat/join.png b/docs/assets/sql/tutorials/bodyfat/join.png
deleted file mode 100644
index 34e271329ea..00000000000
Binary files a/docs/assets/sql/tutorials/bodyfat/join.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/bodyfat/prediction.png b/docs/assets/sql/tutorials/bodyfat/prediction.png
deleted file mode 100644
index ccb29486ab5..00000000000
Binary files a/docs/assets/sql/tutorials/bodyfat/prediction.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/bodyfat/selectdata.png b/docs/assets/sql/tutorials/bodyfat/selectdata.png
deleted file mode 100644
index 2dba31a42f5..00000000000
Binary files a/docs/assets/sql/tutorials/bodyfat/selectdata.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/bodyfat/status.png b/docs/assets/sql/tutorials/bodyfat/status.png
deleted file mode 100644
index 205bdebb9c0..00000000000
Binary files a/docs/assets/sql/tutorials/bodyfat/status.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/bodyfat/upload_file.png b/docs/assets/sql/tutorials/bodyfat/upload_file.png
deleted file mode 100644
index 971f921735f..00000000000
Binary files a/docs/assets/sql/tutorials/bodyfat/upload_file.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/connect.gif b/docs/assets/sql/tutorials/connect.gif
deleted file mode 100644
index a9c96e0c84f..00000000000
Binary files a/docs/assets/sql/tutorials/connect.gif and /dev/null differ
diff --git a/docs/assets/sql/tutorials/connect.png b/docs/assets/sql/tutorials/connect.png
deleted file mode 100644
index 3a5f3e60c0b..00000000000
Binary files a/docs/assets/sql/tutorials/connect.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/create_ds.gif b/docs/assets/sql/tutorials/create_ds.gif
deleted file mode 100644
index 6d8e61f9670..00000000000
Binary files a/docs/assets/sql/tutorials/create_ds.gif and /dev/null differ
diff --git a/docs/assets/sql/tutorials/crop-prediction/database-integration-mariadb.png b/docs/assets/sql/tutorials/crop-prediction/database-integration-mariadb.png
deleted file mode 100644
index 82c012e883c..00000000000
Binary files a/docs/assets/sql/tutorials/crop-prediction/database-integration-mariadb.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/customer_churn/create_churn.png b/docs/assets/sql/tutorials/customer_churn/create_churn.png
deleted file mode 100644
index e8e09d8c433..00000000000
Binary files a/docs/assets/sql/tutorials/customer_churn/create_churn.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/customer_churn/customer_churn.png b/docs/assets/sql/tutorials/customer_churn/customer_churn.png
deleted file mode 100644
index b512debb315..00000000000
Binary files a/docs/assets/sql/tutorials/customer_churn/customer_churn.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/customer_churn/customer_churn2.png b/docs/assets/sql/tutorials/customer_churn/customer_churn2.png
deleted file mode 100644
index 5dd32ae5069..00000000000
Binary files a/docs/assets/sql/tutorials/customer_churn/customer_churn2.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/customer_churn/select.png b/docs/assets/sql/tutorials/customer_churn/select.png
deleted file mode 100644
index 0431e61d605..00000000000
Binary files a/docs/assets/sql/tutorials/customer_churn/select.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/customer_churn/upload.png b/docs/assets/sql/tutorials/customer_churn/upload.png
deleted file mode 100644
index 9cd89702011..00000000000
Binary files a/docs/assets/sql/tutorials/customer_churn/upload.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/1prediction.png b/docs/assets/sql/tutorials/heart-disease/1prediction.png
deleted file mode 100644
index 386ee0d5986..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/1prediction.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/2.1prediction.png b/docs/assets/sql/tutorials/heart-disease/2.1prediction.png
deleted file mode 100644
index 694db4a5ca0..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/2.1prediction.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/2ndprediction.png b/docs/assets/sql/tutorials/heart-disease/2ndprediction.png
deleted file mode 100644
index 61eb5cb9652..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/2ndprediction.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/3rdprediction.png b/docs/assets/sql/tutorials/heart-disease/3rdprediction.png
deleted file mode 100644
index 1d6e5e886fa..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/3rdprediction.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/connect_db.png b/docs/assets/sql/tutorials/heart-disease/connect_db.png
deleted file mode 100644
index d2ff26c758b..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/connect_db.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/connect_mysql_client.png b/docs/assets/sql/tutorials/heart-disease/connect_mysql_client.png
deleted file mode 100644
index 045900c4288..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/connect_mysql_client.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/create.png b/docs/assets/sql/tutorials/heart-disease/create.png
deleted file mode 100644
index c2d19f2ce2c..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/create.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/create_predictor.png b/docs/assets/sql/tutorials/heart-disease/create_predictor.png
deleted file mode 100644
index f0a87aac086..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/create_predictor.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/database.png b/docs/assets/sql/tutorials/heart-disease/database.png
deleted file mode 100644
index d607c7a540c..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/database.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/dataselection.png b/docs/assets/sql/tutorials/heart-disease/dataselection.png
deleted file mode 100644
index 400b1d0231a..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/dataselection.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/dbintegration.png b/docs/assets/sql/tutorials/heart-disease/dbintegration.png
deleted file mode 100644
index 5011c8b514b..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/dbintegration.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/join_query.gif b/docs/assets/sql/tutorials/heart-disease/join_query.gif
deleted file mode 100644
index 2b41dc45d7d..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/join_query.gif and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/predictor_status.png b/docs/assets/sql/tutorials/heart-disease/predictor_status.png
deleted file mode 100644
index 7c86f66666a..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/predictor_status.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/select_prediction_query.png b/docs/assets/sql/tutorials/heart-disease/select_prediction_query.png
deleted file mode 100644
index 1ae0639d1e4..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/select_prediction_query.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/selectpredictor.png b/docs/assets/sql/tutorials/heart-disease/selectpredictor.png
deleted file mode 100644
index d28cc8575b4..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/selectpredictor.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/heart-disease/use_mindsdb.png b/docs/assets/sql/tutorials/heart-disease/use_mindsdb.png
deleted file mode 100644
index e5aade0af41..00000000000
Binary files a/docs/assets/sql/tutorials/heart-disease/use_mindsdb.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insert.png b/docs/assets/sql/tutorials/insert.png
deleted file mode 100644
index 8f2fa8df549..00000000000
Binary files a/docs/assets/sql/tutorials/insert.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/add-database-cloud-mindsdb-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/add-database-cloud-mindsdb-sql.png
deleted file mode 100644
index 039e479fd66..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/add-database-cloud-mindsdb-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/connect-mindsdb-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/connect-mindsdb-sql.png
deleted file mode 100644
index 28f4f5936d5..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/connect-mindsdb-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-bitcoin-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-bitcoin-sql.png
deleted file mode 100644
index 1ba4621c05b..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-bitcoin-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-isurance-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-isurance-sql.png
deleted file mode 100644
index dd71194df5e..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-isurance-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-bitcoin-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-bitcoin-sql.png
deleted file mode 100644
index 0688bebeac3..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-bitcoin-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-insurance-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-insurance-sql.png
deleted file mode 100644
index 9937b9e4a82..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-insurance-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-bitcoin-table.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-bitcoin-table.png
deleted file mode 100644
index 1970b54c5a9..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-bitcoin-table.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-databases-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-databases-sql.png
deleted file mode 100644
index 540e119131e..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-databases-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-insurance-table.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-insurance-table.png
deleted file mode 100644
index 1006202b031..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-insurance-table.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-bitcoin-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-bitcoin-sql.png
deleted file mode 100644
index b3302211ebf..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-bitcoin-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-isurance-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-isurance-sql.png
deleted file mode 100644
index e3a4cf0832b..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-isurance-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql-2.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql-2.png
deleted file mode 100644
index 6a695cbef93..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql-2.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql.png
deleted file mode 100644
index 119fa2222ef..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/success-connect-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/success-connect-sql.png
deleted file mode 100644
index 21bcd93c307..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/success-connect-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/add-database-cloud-mindsdb-sql.png b/docs/assets/sql/tutorials/insurance-cost/add-database-cloud-mindsdb-sql.png
deleted file mode 100644
index 039e479fd66..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/add-database-cloud-mindsdb-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/connect-mindsdb-sql.png b/docs/assets/sql/tutorials/insurance-cost/connect-mindsdb-sql.png
deleted file mode 100644
index 28f4f5936d5..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/connect-mindsdb-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/create-prediction-isurance-sql.png b/docs/assets/sql/tutorials/insurance-cost/create-prediction-isurance-sql.png
deleted file mode 100644
index dd71194df5e..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/create-prediction-isurance-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/create-predictor-insurance-sql.png b/docs/assets/sql/tutorials/insurance-cost/create-predictor-insurance-sql.png
deleted file mode 100644
index 9937b9e4a82..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/create-predictor-insurance-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/create_db.png b/docs/assets/sql/tutorials/insurance-cost/create_db.png
deleted file mode 100644
index 0397217b658..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/create_db.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/insurance_predictor.png b/docs/assets/sql/tutorials/insurance-cost/insurance_predictor.png
deleted file mode 100644
index f0eccd7686e..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/insurance_predictor.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/prediction_insurance.png b/docs/assets/sql/tutorials/insurance-cost/prediction_insurance.png
deleted file mode 100644
index 484efef6678..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/prediction_insurance.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/select_insurance.png b/docs/assets/sql/tutorials/insurance-cost/select_insurance.png
deleted file mode 100644
index fdf0ee65a9b..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/select_insurance.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/show-databases-sql.png b/docs/assets/sql/tutorials/insurance-cost/show-databases-sql.png
deleted file mode 100644
index 540e119131e..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/show-databases-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/show-insurance-table.png b/docs/assets/sql/tutorials/insurance-cost/show-insurance-table.png
deleted file mode 100644
index 1006202b031..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/show-insurance-table.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/show-predictor-isurance-sql.png b/docs/assets/sql/tutorials/insurance-cost/show-predictor-isurance-sql.png
deleted file mode 100644
index e3a4cf0832b..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/show-predictor-isurance-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/show-tables-sql-2.png b/docs/assets/sql/tutorials/insurance-cost/show-tables-sql-2.png
deleted file mode 100644
index 6a695cbef93..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/show-tables-sql-2.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/show-tables-sql.png b/docs/assets/sql/tutorials/insurance-cost/show-tables-sql.png
deleted file mode 100644
index 119fa2222ef..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/show-tables-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/insurance-cost/success-connect-sql.png b/docs/assets/sql/tutorials/insurance-cost/success-connect-sql.png
deleted file mode 100644
index 21bcd93c307..00000000000
Binary files a/docs/assets/sql/tutorials/insurance-cost/success-connect-sql.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/process-quality/database-integration.png b/docs/assets/sql/tutorials/process-quality/database-integration.png
deleted file mode 100644
index fe96f4c119e..00000000000
Binary files a/docs/assets/sql/tutorials/process-quality/database-integration.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/process-quality/database.png b/docs/assets/sql/tutorials/process-quality/database.png
deleted file mode 100644
index d607c7a540c..00000000000
Binary files a/docs/assets/sql/tutorials/process-quality/database.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/process-quality/dbintegration.png b/docs/assets/sql/tutorials/process-quality/dbintegration.png
deleted file mode 100644
index 5011c8b514b..00000000000
Binary files a/docs/assets/sql/tutorials/process-quality/dbintegration.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/select.png b/docs/assets/sql/tutorials/select.png
deleted file mode 100644
index 2fa767edccb..00000000000
Binary files a/docs/assets/sql/tutorials/select.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/selecti.png b/docs/assets/sql/tutorials/selecti.png
deleted file mode 100644
index 4cce94e7e26..00000000000
Binary files a/docs/assets/sql/tutorials/selecti.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/1-ML_audience.png b/docs/assets/sql/tutorials/snowflake-superset/1-ML_audience.png
deleted file mode 100644
index a9f65cb53e6..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/1-ML_audience.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/10-DBeaver connection.png b/docs/assets/sql/tutorials/snowflake-superset/10-DBeaver connection.png
deleted file mode 100644
index 8722f1c41cd..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/10-DBeaver connection.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/11-Dbeaver2.png b/docs/assets/sql/tutorials/snowflake-superset/11-Dbeaver2.png
deleted file mode 100644
index 42690cc8298..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/11-Dbeaver2.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/12-show_dtabases.png b/docs/assets/sql/tutorials/snowflake-superset/12-show_dtabases.png
deleted file mode 100644
index 71ec0af4a88..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/12-show_dtabases.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/13-info_schema.png b/docs/assets/sql/tutorials/snowflake-superset/13-info_schema.png
deleted file mode 100644
index 7d0ff3a0b90..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/13-info_schema.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/14-table.png b/docs/assets/sql/tutorials/snowflake-superset/14-table.png
deleted file mode 100644
index 7a15d17fdad..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/14-table.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/15-query.png b/docs/assets/sql/tutorials/snowflake-superset/15-query.png
deleted file mode 100644
index 3dd98bb2c6b..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/15-query.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/16-predictive_query.png b/docs/assets/sql/tutorials/snowflake-superset/16-predictive_query.png
deleted file mode 100644
index 46164d46d65..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/16-predictive_query.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/17-stops_by_route_Superset.jpg b/docs/assets/sql/tutorials/snowflake-superset/17-stops_by_route_Superset.jpg
deleted file mode 100644
index c131dce035b..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/17-stops_by_route_Superset.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/18-timeseries_chart.jpg b/docs/assets/sql/tutorials/snowflake-superset/18-timeseries_chart.jpg
deleted file mode 100644
index bf4d473c0a6..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/18-timeseries_chart.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/19-timeseries2.jpg b/docs/assets/sql/tutorials/snowflake-superset/19-timeseries2.jpg
deleted file mode 100644
index 2d44bec204c..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/19-timeseries2.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/2-ML_workflow.png b/docs/assets/sql/tutorials/snowflake-superset/2-ML_workflow.png
deleted file mode 100644
index 7775334643e..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/2-ML_workflow.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/20-filters1.jpg b/docs/assets/sql/tutorials/snowflake-superset/20-filters1.jpg
deleted file mode 100644
index 44adb88da32..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/20-filters1.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/20-filters2.jpg b/docs/assets/sql/tutorials/snowflake-superset/20-filters2.jpg
deleted file mode 100644
index eebecd869f5..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/20-filters2.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/21-graph.jpg b/docs/assets/sql/tutorials/snowflake-superset/21-graph.jpg
deleted file mode 100644
index 1a41b0e0541..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/21-graph.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/22-graph.jpg b/docs/assets/sql/tutorials/snowflake-superset/22-graph.jpg
deleted file mode 100644
index 57cf5ae4bec..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/22-graph.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/23-graph.jpg b/docs/assets/sql/tutorials/snowflake-superset/23-graph.jpg
deleted file mode 100644
index 7bf3035448c..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/23-graph.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/24-MindsDB_ML-Workflow.png b/docs/assets/sql/tutorials/snowflake-superset/24-MindsDB_ML-Workflow.png
deleted file mode 100644
index ba0bc2c9c51..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/24-MindsDB_ML-Workflow.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income-debt.jpg b/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income-debt.jpg
deleted file mode 100644
index c3a3c3d6001..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income-debt.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income_table.jpg b/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income_table.jpg
deleted file mode 100644
index 71b8e3d8058..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income_table.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/4-AI_Tables-income-debt-query.jpg b/docs/assets/sql/tutorials/snowflake-superset/4-AI_Tables-income-debt-query.jpg
deleted file mode 100644
index 19ce5b15f7d..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/4-AI_Tables-income-debt-query.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/4-debt_vs_income.png b/docs/assets/sql/tutorials/snowflake-superset/4-debt_vs_income.png
deleted file mode 100644
index d1e5c60ab06..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/4-debt_vs_income.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query-table.jpg b/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query-table.jpg
deleted file mode 100644
index 12966b717d6..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query-table.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query.jpg b/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query.jpg
deleted file mode 100644
index 8fed6732107..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null-table.jpg b/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null-table.jpg
deleted file mode 100644
index 25b6415008b..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null-table.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null.jpg b/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null.jpg
deleted file mode 100644
index 2122c847f60..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml-table.jpg b/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml-table.jpg
deleted file mode 100644
index eb4735d750f..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml-table.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml.jpg b/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml.jpg
deleted file mode 100644
index 36c4225f717..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/8-multivariate_problem.jpg b/docs/assets/sql/tutorials/snowflake-superset/8-multivariate_problem.jpg
deleted file mode 100644
index 8e437b53e23..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/8-multivariate_problem.jpg and /dev/null differ
diff --git a/docs/assets/sql/tutorials/snowflake-superset/9-connect_to_MindsDB.png b/docs/assets/sql/tutorials/snowflake-superset/9-connect_to_MindsDB.png
deleted file mode 100644
index 45e505128f4..00000000000
Binary files a/docs/assets/sql/tutorials/snowflake-superset/9-connect_to_MindsDB.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/status.png b/docs/assets/sql/tutorials/status.png
deleted file mode 100644
index a60ad71df42..00000000000
Binary files a/docs/assets/sql/tutorials/status.png and /dev/null differ
diff --git a/docs/assets/sql/tutorials/use.png b/docs/assets/sql/tutorials/use.png
deleted file mode 100644
index 25f47fbe9ec..00000000000
Binary files a/docs/assets/sql/tutorials/use.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-clickhouse/describe-table.png b/docs/assets/tutorials/aitables-clickhouse/describe-table.png
deleted file mode 100644
index c10d33fc8d0..00000000000
Binary files a/docs/assets/tutorials/aitables-clickhouse/describe-table.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-clickhouse/select-info.png b/docs/assets/tutorials/aitables-clickhouse/select-info.png
deleted file mode 100644
index fc9fee0a20d..00000000000
Binary files a/docs/assets/tutorials/aitables-clickhouse/select-info.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-clickhouse/select-info2.png b/docs/assets/tutorials/aitables-clickhouse/select-info2.png
deleted file mode 100644
index 215fce4a09e..00000000000
Binary files a/docs/assets/tutorials/aitables-clickhouse/select-info2.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-clickhouse/show-tables.png b/docs/assets/tutorials/aitables-clickhouse/show-tables.png
deleted file mode 100644
index d5081904f6c..00000000000
Binary files a/docs/assets/tutorials/aitables-clickhouse/show-tables.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/database.png b/docs/assets/tutorials/aitables-mariadb/database.png
deleted file mode 100644
index a53ec9a4667..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/database.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/mdb-maria.png b/docs/assets/tutorials/aitables-mariadb/mdb-maria.png
deleted file mode 100644
index d4293d55ba2..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/mdb-maria.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/mdb-ver.png b/docs/assets/tutorials/aitables-mariadb/mdb-ver.png
deleted file mode 100644
index 0e741b06d95..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/mdb-ver.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/predicted-info.png b/docs/assets/tutorials/aitables-mariadb/predicted-info.png
deleted file mode 100644
index 78c90805f88..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/predicted-info.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/predicted.png b/docs/assets/tutorials/aitables-mariadb/predicted.png
deleted file mode 100644
index e004f66ee76..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/predicted.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/predicted1.png b/docs/assets/tutorials/aitables-mariadb/predicted1.png
deleted file mode 100644
index aa15856adb0..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/predicted1.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/select-data.png b/docs/assets/tutorials/aitables-mariadb/select-data.png
deleted file mode 100644
index edf1f79f0ca..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/select-data.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/training-finish.png b/docs/assets/tutorials/aitables-mariadb/training-finish.png
deleted file mode 100644
index 90650d48663..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/training-finish.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/training-run.png b/docs/assets/tutorials/aitables-mariadb/training-run.png
deleted file mode 100644
index 44c8736e2f2..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/training-run.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mariadb/training.png b/docs/assets/tutorials/aitables-mariadb/training.png
deleted file mode 100644
index 658fa1dff8f..00000000000
Binary files a/docs/assets/tutorials/aitables-mariadb/training.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mssql/AI Tables.jpg b/docs/assets/tutorials/aitables-mssql/AI Tables.jpg
deleted file mode 100644
index 12af031ca62..00000000000
Binary files a/docs/assets/tutorials/aitables-mssql/AI Tables.jpg and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mssql/train-model.png b/docs/assets/tutorials/aitables-mssql/train-model.png
deleted file mode 100644
index c722f0c6ee3..00000000000
Binary files a/docs/assets/tutorials/aitables-mssql/train-model.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mysql/list_tables.png b/docs/assets/tutorials/aitables-mysql/list_tables.png
deleted file mode 100644
index ce044c906b0..00000000000
Binary files a/docs/assets/tutorials/aitables-mysql/list_tables.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mysql/select_status.png b/docs/assets/tutorials/aitables-mysql/select_status.png
deleted file mode 100644
index 4d40ac3fd5a..00000000000
Binary files a/docs/assets/tutorials/aitables-mysql/select_status.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-mysql/select_table.png b/docs/assets/tutorials/aitables-mysql/select_table.png
deleted file mode 100644
index f7e9ed4b014..00000000000
Binary files a/docs/assets/tutorials/aitables-mysql/select_table.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-postgresql/list_schema.png b/docs/assets/tutorials/aitables-postgresql/list_schema.png
deleted file mode 100644
index 3d982cf610a..00000000000
Binary files a/docs/assets/tutorials/aitables-postgresql/list_schema.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-postgresql/mindsdb_started.png b/docs/assets/tutorials/aitables-postgresql/mindsdb_started.png
deleted file mode 100644
index 243c2d6c2bb..00000000000
Binary files a/docs/assets/tutorials/aitables-postgresql/mindsdb_started.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-postgresql/select_model.png b/docs/assets/tutorials/aitables-postgresql/select_model.png
deleted file mode 100644
index 0c30c1a440a..00000000000
Binary files a/docs/assets/tutorials/aitables-postgresql/select_model.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-postgresql/select_status.png b/docs/assets/tutorials/aitables-postgresql/select_status.png
deleted file mode 100644
index 08eaea986a1..00000000000
Binary files a/docs/assets/tutorials/aitables-postgresql/select_status.png and /dev/null differ
diff --git a/docs/assets/tutorials/aitables-postgresql/select_table.png b/docs/assets/tutorials/aitables-postgresql/select_table.png
deleted file mode 100644
index c5b4f6b07eb..00000000000
Binary files a/docs/assets/tutorials/aitables-postgresql/select_table.png and /dev/null differ
diff --git a/docs/assets/tutorials/crops/2ndprediction.png b/docs/assets/tutorials/crops/2ndprediction.png
deleted file mode 100644
index 208516c42c8..00000000000
Binary files a/docs/assets/tutorials/crops/2ndprediction.png and /dev/null differ
diff --git a/docs/assets/tutorials/crops/createcropspredictor.png b/docs/assets/tutorials/crops/createcropspredictor.png
deleted file mode 100644
index bdaf210462f..00000000000
Binary files a/docs/assets/tutorials/crops/createcropspredictor.png and /dev/null differ
diff --git a/docs/assets/tutorials/crops/cropprediction.png b/docs/assets/tutorials/crops/cropprediction.png
deleted file mode 100644
index f70b75753e8..00000000000
Binary files a/docs/assets/tutorials/crops/cropprediction.png and /dev/null differ
diff --git a/docs/assets/tutorials/crops/database.png b/docs/assets/tutorials/crops/database.png
deleted file mode 100644
index d607c7a540c..00000000000
Binary files a/docs/assets/tutorials/crops/database.png and /dev/null differ
diff --git a/docs/assets/tutorials/crops/select_datasource.png b/docs/assets/tutorials/crops/select_datasource.png
deleted file mode 100644
index ace649d18cf..00000000000
Binary files a/docs/assets/tutorials/crops/select_datasource.png and /dev/null differ
diff --git a/docs/assets/tutorials/crops/selectfromfiles.png b/docs/assets/tutorials/crops/selectfromfiles.png
deleted file mode 100644
index f0cb9fa4ebf..00000000000
Binary files a/docs/assets/tutorials/crops/selectfromfiles.png and /dev/null differ
diff --git a/docs/assets/tutorials/crops/statuscheck.png b/docs/assets/tutorials/crops/statuscheck.png
deleted file mode 100644
index 49f8b56a13c..00000000000
Binary files a/docs/assets/tutorials/crops/statuscheck.png and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/Connecting_database_to_MindsdbCloud.gif b/docs/assets/tutorials/diabetes/Connecting_database_to_MindsdbCloud.gif
deleted file mode 100644
index ec838c4aeb3..00000000000
Binary files a/docs/assets/tutorials/diabetes/Connecting_database_to_MindsdbCloud.gif and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/DBdiabetes.png b/docs/assets/tutorials/diabetes/DBdiabetes.png
deleted file mode 100644
index 6eac862fc64..00000000000
Binary files a/docs/assets/tutorials/diabetes/DBdiabetes.png and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/connecting_mysql_client.gif b/docs/assets/tutorials/diabetes/connecting_mysql_client.gif
deleted file mode 100644
index 07534bfad86..00000000000
Binary files a/docs/assets/tutorials/diabetes/connecting_mysql_client.gif and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/create_predictor.png b/docs/assets/tutorials/diabetes/create_predictor.png
deleted file mode 100644
index 0f0288411b1..00000000000
Binary files a/docs/assets/tutorials/diabetes/create_predictor.png and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/database.png b/docs/assets/tutorials/diabetes/database.png
deleted file mode 100644
index d607c7a540c..00000000000
Binary files a/docs/assets/tutorials/diabetes/database.png and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/pg4admin/diabetes_logo.png b/docs/assets/tutorials/diabetes/pg4admin/diabetes_logo.png
deleted file mode 100644
index 29b54b8f4bd..00000000000
Binary files a/docs/assets/tutorials/diabetes/pg4admin/diabetes_logo.png and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/pg4admin/images.png b/docs/assets/tutorials/diabetes/pg4admin/images.png
deleted file mode 100644
index 7aa8fa107b6..00000000000
Binary files a/docs/assets/tutorials/diabetes/pg4admin/images.png and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/pg4admin/index.jpeg b/docs/assets/tutorials/diabetes/pg4admin/index.jpeg
deleted file mode 100644
index b4488fe8a58..00000000000
Binary files a/docs/assets/tutorials/diabetes/pg4admin/index.jpeg and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/prediction.png b/docs/assets/tutorials/diabetes/prediction.png
deleted file mode 100644
index 2f1badedd68..00000000000
Binary files a/docs/assets/tutorials/diabetes/prediction.png and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/predictor.png b/docs/assets/tutorials/diabetes/predictor.png
deleted file mode 100644
index dc446528989..00000000000
Binary files a/docs/assets/tutorials/diabetes/predictor.png and /dev/null differ
diff --git a/docs/assets/tutorials/diabetes/select_predictor.png b/docs/assets/tutorials/diabetes/select_predictor.png
deleted file mode 100644
index 472452bb893..00000000000
Binary files a/docs/assets/tutorials/diabetes/select_predictor.png and /dev/null differ
diff --git a/docs/assets/tutorials/llamaindex/1.ml_engine.png b/docs/assets/tutorials/llamaindex/1.ml_engine.png
deleted file mode 100644
index 7425e7e13f2..00000000000
Binary files a/docs/assets/tutorials/llamaindex/1.ml_engine.png and /dev/null differ
diff --git a/docs/assets/tutorials/llamaindex/2.create_model.png b/docs/assets/tutorials/llamaindex/2.create_model.png
deleted file mode 100644
index a118a16f6d0..00000000000
Binary files a/docs/assets/tutorials/llamaindex/2.create_model.png and /dev/null differ
diff --git a/docs/assets/tutorials/llamaindex/3.describe.png b/docs/assets/tutorials/llamaindex/3.describe.png
deleted file mode 100644
index b0f2546d04a..00000000000
Binary files a/docs/assets/tutorials/llamaindex/3.describe.png and /dev/null differ
diff --git a/docs/assets/tutorials/llamaindex/4.select_model.png b/docs/assets/tutorials/llamaindex/4.select_model.png
deleted file mode 100644
index b99e27a57b3..00000000000
Binary files a/docs/assets/tutorials/llamaindex/4.select_model.png and /dev/null differ
diff --git a/docs/assets/tutorials/llamaindex/5.batch.png b/docs/assets/tutorials/llamaindex/5.batch.png
deleted file mode 100644
index 6a49c9ccd14..00000000000
Binary files a/docs/assets/tutorials/llamaindex/5.batch.png and /dev/null differ
diff --git a/docs/assets/tutorials/monkeylearn/model3.png b/docs/assets/tutorials/monkeylearn/model3.png
deleted file mode 100644
index 6581255841a..00000000000
Binary files a/docs/assets/tutorials/monkeylearn/model3.png and /dev/null differ
diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-launch-skysql.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-launch-skysql.png
deleted file mode 100644
index c830cf0ac14..00000000000
Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-launch-skysql.png and /dev/null differ
diff --git a/docs/assets/tutorials/zero-shot-classification-postgresql-pg-admin.png b/docs/assets/tutorials/zero-shot-classification-postgresql-pg-admin.png
deleted file mode 100644
index 1f9f4379d53..00000000000
Binary files a/docs/assets/tutorials/zero-shot-classification-postgresql-pg-admin.png and /dev/null differ
diff --git a/docs/assets/tutorials/zero-shot-classification-postgresql.png b/docs/assets/tutorials/zero-shot-classification-postgresql.png
deleted file mode 100644
index 27e1a750d08..00000000000
Binary files a/docs/assets/tutorials/zero-shot-classification-postgresql.png and /dev/null differ
diff --git a/docs/assets/what_is_mindsdb.png b/docs/assets/what_is_mindsdb.png
deleted file mode 100644
index 6b54159e543..00000000000
Binary files a/docs/assets/what_is_mindsdb.png and /dev/null differ
diff --git a/docs/assets/what_is_mindsdb2.png b/docs/assets/what_is_mindsdb2.png
deleted file mode 100644
index 3d1508da6f1..00000000000
Binary files a/docs/assets/what_is_mindsdb2.png and /dev/null differ
diff --git a/docs/dark_mode.html b/docs/dark_mode.html
new file mode 100644
index 00000000000..1b588574922
--- /dev/null
+++ b/docs/dark_mode.html
@@ -0,0 +1,187 @@
+
+
+
+
+ Type & Buttons (dark) β Anton Cowork
+
+
+
+
+
MindsDB lets you build semantic search pipelines entirely in SQL. Connect your data sources, shape them with views and tables, index unstructured content into knowledge bases, and expose everything through a natural-language agent β all without leaving your SQL client.
+
+
+
+
ποΈ Connect Data
+
βββ
+
π§ Index in KB
+
βββ
+
π€ Query via Agent
+
+
+
+
+
+
+ optional
+
π§ Views
+ +
+
π Projects
+
+
+
+
+
+
+
+
+
+
Quickstart
+
Five SQL statements to go from zero to a working semantic search agent:
+
SQL
+
-- 1. Connect your data source
+CREATE DATABASE my_pg WITH ENGINE = 'postgres',
+PARAMETERS = {"host":"localhost","port":5432,"user":"user","password":"pass","database":"mydb"};
+
+-- 2. Create a project to organize work
+CREATE PROJECT search_project;
+
+-- 3. Create a knowledge base (semantic index)
+CREATE KNOWLEDGE_BASE search_project.docs_kb
+USING
+ embedding_model = {"provider":"openai","model_name":"text-embedding-3-large","api_key":"sk-..."},
+ content_columns = ['body'],
+ metadata_columns = ['title','category'],
+ id_column = 'doc_id';
+
+-- 4. Index your data
+INSERT INTO search_project.docs_kb
+ SELECT doc_id, title, category, body FROM my_pg.documents;
+
+-- 5. Build the agent
+CREATE AGENT search_project.my_agent
+USING
+ model = {"provider":"openai","model_name":"gpt-4o","api_key":"sk-..."},
+ data = {"knowledge_bases":["search_project.docs_kb"]},
+ prompt_template = 'docs_kb contains product documentation. Answer user questions using it.';
+
+-- Query it
+SELECT answer FROM search_project.my_agent
+WHERE question = 'How do I reset my password?';
MindsDB exposes a MySQL-compatible wire protocol. Any MySQL client can connect:
+
+
Client
Host
Port
User
Password
+
+
MySQL CLI / DBeaver / TablePlus
127.0.0.1
47335
mindsdb
(empty)
+
MindsDB Editor
http://127.0.0.1:47334
+
SQLAlchemy
mysql+pymysql://mindsdb@127.0.0.1:47335/mindsdb
+
+
+
BASH β MySQL CLI
+
mysql -h 127.0.0.1 --port 47335 -u mindsdb -p
+
PYTHON β SQLAlchemy
+
from sqlalchemy import create_engine
+engine = create_engine("mysql+pymysql://mindsdb@127.0.0.1:47335/mindsdb")
+with engine.connect() as conn:
+ result = conn.execute("SELECT answer FROM my_agent WHERE question = 'hello'")
+
+
+
+
+
+
Databases
+
Databases are connections to external data sources β your Postgres, MySQL, S3, Snowflake, MongoDB, etc. MindsDB never copies your data; it queries it live each time.
-- List all databases and projects
+SHOW DATABASES;
+SHOW FULL DATABASES;
+
+-- Filter to data sources only
+SHOW FULL DATABASES WHERE type = 'data';
+
+-- Via information_schema
+SELECT * FROM information_schema.databases;
+
+-- List tables in a database
+SHOW TABLES FROM datasource_name;
Pass database-native syntax through MindsDB without translation. Useful for database-specific functions, MongoDB-QL, Snowflake SQL extensions, etc.
+
SQL β PostgreSQL native query
+
SELECT * FROM my_postgres (
+ SELECT
+ model,
+ year,
+ price,
+ ROUND(CAST((mpg / 2.3521458) AS numeric), 1) AS kml,
+ COUNT(*) OVER (PARTITION BY model, year) AS units_to_sell
+ FROM demo_data.used_cars
+);
+
+
SQL β MongoDB-QL native query
+
SELECT * FROM my_mongo (
+ db.products.find({"category": "electronics"}).limit(50)
+);
+
+
SQL β create view from native query
+
CREATE VIEW enriched_cars FROM my_postgres (
+ SELECT *, ROUND(CAST((mpg / 2.3521458) AS numeric), 1) AS kml
+ FROM demo_data.used_cars
+);
+
+
+
+
+
+
Projects
+
Projects are namespaces that hold knowledge bases, agents, views, and jobs. They let you organize your AI pipeline by use case (e.g. search_prod, search_staging).
-- List all projects
+SHOW DATABASES WHERE type = 'project';
+SHOW FULL DATABASES WHERE type = 'project';
+
+-- List objects in a project
+SHOW TABLES FROM project_name;
+SHOW VIEWS FROM project_name;
+SHOW KNOWLEDGE_BASES FROM project_name;
+
+
+
+
+
+
Tables & Views
+
Use tables and views to shape and materialize data before indexing it into a knowledge base. Views are saved SELECT statements (virtual); tables are materialized results (physical).
-- Clean and materialize support tickets for KB ingestion
+CREATE OR REPLACE TABLE my_pg.clean_tickets (
+ SELECT
+ id,
+ category,
+ TRIM(LOWER(subject)) AS subject,
+ body,
+ created_at
+ FROM my_pg.raw_support_tickets
+ WHERE body IS NOT NULL
+ AND LENGTH(body) > 20
+);
A view is a saved SELECT that executes on every access. Perfect for data preparation before sending to a knowledge base.
+
+
CREATE VIEW
+
CREATE VIEW [IF NOT EXISTS] [project_name.]view_name AS (
+ SELECT columns
+ FROM integration_name.table_name
+ WHERE ...
+);
+
+
+
SQL β create filtered view for KB ingestion
+
-- View joining tickets with product metadata for richer indexing
+CREATE VIEW search_project.enriched_tickets AS (
+ SELECT
+ t.id,
+ t.body,
+ t.created_at,
+ p.name AS product_name,
+ p.category AS product_category
+ FROM my_pg.support_tickets t
+ JOIN my_pg.products p ON t.product_id = p.id
+ WHERE t.status = 'closed'
+);
MindsDB supports standard SELECT with all standard clauses. Queries that reference one integration are pushed down to that engine. Cross-integration queries are executed in MindsDB's DuckDB-backed engine.
+
+
SQL β basic select
+
SELECT location, MAX(sqft)
+FROM my_pg.home_rentals
+GROUP BY location
+LIMIT 5;
+
+
SQL β subquery on integration data
+
-- Wrap in subquery when integration doesn't support GROUP BY
+SELECT type, MAX(bedrooms), LAST(price)
+FROM my_mongo (
+ db.house_sales.find().limit(300)
+) GROUP BY 1;
+
+
SQL β UNION ALL
+
SELECT id, content, 'tickets' AS source FROM my_pg.support_tickets
+UNION ALL
+SELECT id, body AS content, 'forums' AS source FROM my_pg.forum_posts;
Insert rows into an integration table from a subquery. The destination table must already exist.
+
+
INSERT INTO
+
INSERT INTO integration_name.table_name
+ (SELECT ...);
+
+
+
SQL
+
-- Archive processed tickets into a separate table
+INSERT INTO my_pg.archived_tickets (
+ SELECT * FROM my_pg.support_tickets
+ WHERE resolved_at < '2024-01-01'
+);
-- Simple delete
+DELETE FROM my_pg.table_name
+WHERE column_name = 'value';
+
+-- Delete with subquery
+DELETE FROM my_pg.support_tickets
+WHERE id IN (
+ SELECT id FROM my_pg.resolved_tickets
+ WHERE resolved_at < '2023-01-01'
+);
Standard SQL JOINs work across tables within the same integration or after bridging with a view/subquery. Use JOINs to denormalize and enrich data before indexing.
+
SQL β cross-table join for data prep
+
-- Enrich tickets with user and product info before KB insert
+SELECT
+ t.id,
+ t.body,
+ u.name AS user_name,
+ u.plan AS user_plan,
+ p.name AS product_name,
+ p.category AS product_category
+FROM my_pg.support_tickets t
+JOIN my_pg.users u ON t.user_id = u.id
+JOIN my_pg.products p ON t.product_id = p.id
+WHERE t.body IS NOT NULL;
+
+
SQL β multi-source join via subqueries
+
SELECT pg_data.id, mongo_data.tags
+FROM (SELECT id, title FROM my_pg.articles) AS pg_data
+JOIN (SELECT article_id, tags FROM my_mongo.article_tags) AS mongo_data
+ ON pg_data.id = mongo_data.article_id;
Standard conditional logic in SELECT, WHERE, and other clauses.
+
SQL
+
SELECT
+ id,
+ body,
+ CASE
+ WHEN priority = 1 THEN 'critical'
+ WHEN priority BETWEEN 2 AND 3 THEN 'high'
+ WHEN priority = 4 THEN 'medium'
+ ELSE 'low'
+ END AS priority_label
+FROM my_pg.support_tickets;
Common Table Expressions create named temporary result sets for modular, readable queries.
+
SQL
+
WITH
+-- Step 1: Get recent tickets
+recent AS (
+ SELECT id, user_id, product_id, body
+ FROM my_pg.support_tickets
+ WHERE created_at > '2024-01-01'
+),
+-- Step 2: Join product info
+enriched AS (
+ SELECT
+ r.id,
+ r.body,
+ p.name AS product_name,
+ p.category AS category
+ FROM recent r
+ JOIN my_pg.products p ON r.product_id = p.id
+)
+-- Final: Insert into KB
+INSERT INTO search_project.tickets_kb
+ SELECT * FROM enriched;
+
+
+
+
+
+
Knowledge Bases
+
A knowledge base is the semantic index at the heart of MindsDB's search capabilities. It combines an embedding model, an optional reranking model, and a vector store to enable context-aware retrieval over any data you load into it.
+
+
π§
Knowledge bases match content by meaning, not keywords. "reset credentials" and "forgot password" return the same document even though no words overlap.
+
+
How it works
+
+
1
Create
Register the KB with an embedding model, optional reranking model, storage backend, and column mapping.
+
2
Insert
Feed rows from any table or view. Each row is chunked, embedded, and written to the vector store.
+
3
Query
Use WHERE content = '...' for semantic search, metadata columns for filtering, and relevance to threshold results.
+
4
Connect to Agent
Reference the KB in a CREATE AGENT statement β the agent reasons over it automatically.
Classifies each chunk into 4 relevance levels (0.25 / 0.5 / 0.75 / 1.0). Relevance = weighted sum of class probabilities.
+
binary
Relevant / not relevant. Uses log probability of the positive class.
+
+
+
+
storage
+
The vector database to store embeddings in. Connect it first with CREATE DATABASE.
+
β
Recommended: PGVector β₯ 0.8.0 for best performance and hybrid search support.
+
π‘
MindsDB Docker Desktop Extension includes a built-in PGVector β storage is optional when using it.
+
+
metadata_columns
+
Array of column names used as metadata. Metadata enables fast pre-filtering before or alongside semantic search.
+
β
A column cannot be in both metadata_columns and content_columns.
+
+
content_columns
+
Array of column names whose text gets chunked and embedded. Multiple columns are concatenated. Defaults to a column named content if not specified.
+
+
id_column
+
Column that uniquely identifies each source row. Optional β defaults to the MD5 hash of content columns. Used for upsert logic when re-inserting data.
+
SQL β auto-generate ID when none exists
+
INSERT INTO my_kb (
+ SELECT ROW_NUMBER() OVER (ORDER BY created_at) AS id, *
+ FROM my_pg.raw_documents
+);
Modify an existing KB configuration. The storage backend and embedding model type cannot be changed (would break existing embeddings), but you can rotate API keys, swap reranking models, and update column mappings.
+
+
ALTER KNOWLEDGE_BASE
+
ALTER KNOWLEDGE_BASE kb_name
+USING
+ param_name = value,
+ ...;
-- Add new fields while keeping existing ones filterable
+ALTER KNOWLEDGE_BASE support_kb
+USING
+ metadata_columns = ['product_name', 'priority', 'created_at', 'region'];
+
+
β
Changing metadata_columns doesn't remove old stored metadata. Only the columns listed in the most recent ALTER can be used in WHERE filters going forward.
-- Only search within a specific product and priority
+SELECT id, chunk_content, relevance
+FROM search_project.support_kb
+WHERE content = 'cannot connect'
+ AND product_name = 'DataSync Pro'
+ AND priority <= 2
+ AND relevance >= 0.5;
+
+
SQL β metadata-only filter (no vector search)
+
SELECT *
+FROM search_project.support_kb
+WHERE product_name = 'DataSync Pro'
+ AND created_at BETWEEN '2024-01-01' AND '2024-12-31';
+
+
Supported filtering operators
+
+
Type
Operators
+
+
Semantic (content col)
= 'query', LIKE 'query', NOT LIKE, IN ('q1','q2'), NOT IN, OR, AND (intersection)
+
Metadata
=, !=, <>, >, <, >=, <=, BETWEEN, LIKE, IN, NOT IN, AND, OR, NOT
+
Exclusion
id != x, id NOT IN (SELECT id FROM kb WHERE content = '...')
+
+
+
+
π‘
Default LIMIT is 10. Default relevance threshold is β₯ 0 (no filtering). Specify both to control result count and quality independently.
Hybrid search combines semantic similarity (vector embeddings) with exact keyword matching (BM25 full-text index). Use it when your queries include specific identifiers, acronyms, product codes, or technical terms that embeddings might miss.
+
+
β
Hybrid search requires PGVector as the knowledge base storage backend.
+
+
SQL β enable hybrid search (default alpha)
+
SELECT *
+FROM search_project.support_kb
+WHERE content = 'ACME-213 error'
+ AND hybrid_search = true; -- alpha defaults to 0.5
+
+
SQL β tune the semantic/keyword balance
+
-- hybrid_search_alpha: 0 = pure keyword, 1 = pure semantic
+SELECT *
+FROM search_project.support_kb
+WHERE content = 'ticket ERR-4421'
+ AND hybrid_search_alpha = 0.3; -- lean toward exact keyword match
+
+
SQL β disable reranker for hybrid search
+
-- Uses alpha-weighted average of BM25 + embedding scores instead
+SELECT *
+FROM search_project.support_kb
+WHERE content = 'ERR-4421'
+ AND hybrid_search_alpha = 0.2
+ AND reranking = false;
+
+
How it works
+
When you trigger hybrid search, both paths run in parallel:
+
+
Path
Method
Best for
+
+
Semantic
Embedding vector similarity
Conceptual queries, natural language, paraphrases
+
Keyword
BM25 full-text index
Exact terms, product codes, acronyms, ticket IDs
+
+
+
Results from both paths are merged and reranked (via the KB's reranking model if available, or via alpha-weighted averaging if not).
+
+
β
When to use hybrid search: any time users search for specific identifiers, technical terms, model numbers, or internal terminology alongside natural language queries.
DROP KNOWLEDGE_BASERemoves the KB and all stored embeddings
+
DROP KNOWLEDGE_BASE [IF EXISTS] kb_name;
+DROP KNOWLEDGE_BASE [IF EXISTS] project_name.kb_name;
+
+
β
This permanently removes all embeddings and metadata from the vector store. Cannot be undone.
+
+
+
+
+
+
+
Agents
+
An agent is the conversational interface over your data. It combines an LLM with access to knowledge bases and database tables, enabling natural language queries over structured and unstructured data alike. This is the final piece of the semantic search pipeline.
+
+
How agents work
+
+
1
Input Processing
Builds a real-time data catalog from 5-row samples of each connected object. Extracts the question and structures LLM input.
+
2
Planning
Determines which knowledge bases and tables are relevant. Prepares SQL queries as needed.
+
3
Exploration Loop
Executes queries, collects results, adjusts if needed. Up to 20 queries per request.
+
4
Synthesis
Aggregates results and synthesizes a natural language or structured response.
+
+
+
π‘
Performance tip: keep connected objects to β€ 10. Create views to pre-aggregate and simplify data before connecting to the agent. The clearer your prompt_template, the more accurate the responses.
Query the agent with a natural language question. The agent returns either a free-text answer or structured columns depending on how you write the SELECT.
+
+
SQL β natural language answer
+
SELECT answer
+FROM search_project.support_agent
+WHERE question = 'What are the most common issues with DataSync Pro?';
+
+
SQL β structured output
+
-- Agent formats its response to match the requested columns
+SELECT issue_type, ticket_count, example_ticket_id
+FROM search_project.support_agent
+WHERE question = 'What are the top 5 issue types for DataSync Pro this month?';
+
+
SQL β override params at query time
+
-- Test with a different model without changing the agent definition
+SELECT answer
+FROM search_project.support_agent
+WHERE question = 'Summarize open critical tickets'
+USING
+ model = {
+ "provider": "anthropic",
+ "model_name": "claude-3-5-sonnet-20241022",
+ "api_key": "sk-ant-..."
+ };
-- Drop an agent
+DROP AGENT agent_name;
+DROP AGENT project_name.agent_name;
+
+-- List agents
+SHOW AGENTS;
+SHOW AGENTS WHERE project = 'search_project';
+SHOW AGENTS WHERE name = 'support_agent';
+
+
+
+
+
+
Jobs
+
Jobs schedule any SQL statement (or sequence of statements) to run automatically β once at a future time, or repeatedly on an interval. Use jobs to keep your knowledge bases up to date as new data arrives.
LAST stores the maximum value seen in the previous run. Use it to process only new rows on each execution β turning any data source into a stream.
+
+
SQL β basic LAST usage
+
-- First run: returns nothing (no prior state)
+-- Second run: returns rows inserted since the first run
+SELECT id, body
+FROM my_pg.support_tickets
+WHERE id > LAST;
+
+
SQL β LAST with seed value for first run
+
-- First run uses 1000 as the seed; subsequent runs use LAST
+SELECT id, body
+FROM my_pg.support_tickets
+WHERE id > COALESCE(LAST, 1000);
+
+
π‘
To reset the LAST context in the editor: SET context = 0; or SET context = null;
-- All jobs
+SHOW JOBS;
+
+-- Jobs in a project
+SHOW JOBS WHERE project = 'search_project';
+
+-- Full details
+SELECT * FROM search_project.jobs;
+SELECT * FROM information_schema.jobs;
+
+-- Execution history (includes errors)
+SELECT * FROM log.jobs_history
+WHERE project = 'search_project'
+ORDER BY run_start DESC
+LIMIT 20;
+
+
The jobs table has columns: NAME, PROJECT, RUN_START, RUN_END, NEXT_RUN_AT, SCHEDULE_STR, QUERY. The history table adds ERROR.
-
-## About Qdrant π
-
-A High-performance, massive-scale vector database for the next generation of AI. Also available in the cloud.
-
-## Implementation
-
-The handler uses the [qdrant-client](https://github.com/qdrant/qdrant-client) Python library to establish a connection to a Qdrant instance.
-
-
-## Usage
-To use this handler and get started with Qdrant, the following syntax can be used.
-```sql
-CREATE DATABASE qdrant_test
-WITH ENGINE = "qdrant",
-PARAMETERS = {
- "location": ":memory:",
- "collection_config": {
- "size": 386,
- "distance": "Cosine"
- }
-}
-```
-The available arguments for instantiating Qdrant can be found [here](https://github.com/mindsdb/mindsdb/blob/23a509cb26bacae9cc22475497b8644e3f3e23c3/mindsdb/integrations/handlers/qdrant_handler/qdrant_handler.py#L408-L468).
-
-## Creating a new table
-
-- Qdrant options for creating a collection can be specified as `collection_config` in the `CREATE DATABASE` parameters.
-- By default, UUIDs are set as collection IDs. You can provide your own IDs under the `id` column.
-```sql
-CREATE TABLE qdrant_test.test_table (
- SELECT embeddings,'{"source": "bbc"}' as metadata FROM mysql_demo_db.test_embeddings
-);
-```
-
-## Querying the database
-
-#### Perform a full retrieval using the following syntax.
-
-```sql
-SELECT * FROM qdrant_test.test_table
-```
-By default, the `LIMIT` is set to 10 and the `OFFSET` is set to 0.
-
-#### Perform a similarity search using your embeddings, like so
-```sql
-SELECT * FROM qdrant_test.test_table
-WHERE search_vector = (select embeddings from mysql_demo_db.test_embeddings limit 1)
-```
-
-#### Perform a search using filters
-```sql
-SELECT * FROM qdrant_test.test_table
-WHERE `metadata.source` = 'bbc';
-```
-
-#### Delete entries using IDs
-```sql
-DELETE FROM qtest.test_table_6
-WHERE id = 2
-```
-
-#### Delete entries using filters
-```sql
-DELETE * FROM qdrant_test.test_table
-WHERE `metadata.source` = 'bbc';
-```
-
-#### Drop a table
-```sql
- DROP TABLE qdrant_test.test_table;
-```
-
-## NOTICE
-Qdrant supports payload indexing that vastly improves retrieval efficiency with filters and is highly recommended. Please note that this feature currently cannot be configured via MindsDB and must be set up separately if needed.
-
-For detailed information on payload indexing, you can refer to the documentation available [here](https://qdrant.tech/documentation/concepts/indexing/#payload-index).
diff --git a/mindsdb/integrations/handlers/qdrant_handler/__about__.py b/mindsdb/integrations/handlers/qdrant_handler/__about__.py
deleted file mode 100644
index 8f6ae16e494..00000000000
--- a/mindsdb/integrations/handlers/qdrant_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Qdrant handler"
-__package_name__ = "mindsdb_qdrant_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Qdrant"
-__author__ = "Qdrant team"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/qdrant_handler/__init__.py b/mindsdb/integrations/handlers/qdrant_handler/__init__.py
deleted file mode 100644
index d46e5ffbf35..00000000000
--- a/mindsdb/integrations/handlers/qdrant_handler/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __description__ as description
-from .__about__ import __version__ as version
-from .connection_args import connection_args, connection_args_example
-try:
- from .qdrant_handler import QdrantHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Qdrant"
-name = "qdrant"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "connection_args",
- "connection_args_example",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/qdrant_handler/connection_args.py b/mindsdb/integrations/handlers/qdrant_handler/connection_args.py
deleted file mode 100644
index 4e58664af86..00000000000
--- a/mindsdb/integrations/handlers/qdrant_handler/connection_args.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- location={
- "type": ARG_TYPE.STR,
- "description": "If `:memory:` - use in-memory Qdrant instance. If a remote URL - connect to a remote Qdrant instance. Example: `http://localhost:6333`",
- "required": False,
- },
- url={
- "type": ARG_TYPE.STR,
- "description": "URL of Qdrant service. Either host or a string of type [scheme]<[port][prefix]. Ex: http://localhost:6333/service/v1",
- },
- host={
- "type": ARG_TYPE.STR,
- "description": "Host name of Qdrant service. The port and host are used to construct the connection URL.",
- "required": False,
- },
- port={
- "type": ARG_TYPE.INT,
- "description": "Port of the REST API interface. Default: 6333",
- "required": False,
- },
- grpc_port={
- "type": ARG_TYPE.INT,
- "description": "Port of the gRPC interface. Default: 6334",
- "required": False,
- },
- prefer_grpc={
- "type": ARG_TYPE.BOOL,
- "description": "If `true` - use gPRC interface whenever possible in custom methods. Default: false",
- "required": False,
- },
- https={
- "type": ARG_TYPE.BOOL,
- "description": "If `true` - use https protocol.",
- "required": False,
- },
- api_key={
- "type": ARG_TYPE.PWD,
- "description": "API key for authentication in Qdrant Cloud.",
- "required": False,
- "secret": True
- },
- prefix={
- "type": ARG_TYPE.STR,
- "description": "If set, the value is added to the REST URL path. Example: `service/v1` will result in `http://localhost:6333/service/v1/{qdrant-endpoint}` for REST API",
- "required": False,
- },
- timeout={
- "type": ARG_TYPE.INT,
- "description": "Timeout for REST and gRPC API requests. Defaults to 5.0 seconds for REST and unlimited for gRPC",
- "required": False,
- },
- path={
- "type": ARG_TYPE.STR,
- "description": "Persistence path for a local Qdrant instance.",
- "required": False,
- },
- collection_config={
- "type": ARG_TYPE.DICT,
- "description": "Collection creation configuration. See https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection",
- "required": True,
- },
-)
-
-connection_args_example = {
- "location": ":memory:",
- "collection_config": {
- "size": 386,
- "distance": "Cosine"
- }
-}
diff --git a/mindsdb/integrations/handlers/qdrant_handler/icon.svg b/mindsdb/integrations/handlers/qdrant_handler/icon.svg
deleted file mode 100644
index 08617d7fa12..00000000000
--- a/mindsdb/integrations/handlers/qdrant_handler/icon.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/qdrant_handler/qdrant_handler.py b/mindsdb/integrations/handlers/qdrant_handler/qdrant_handler.py
deleted file mode 100644
index cd28a262a37..00000000000
--- a/mindsdb/integrations/handlers/qdrant_handler/qdrant_handler.py
+++ /dev/null
@@ -1,402 +0,0 @@
-import ast
-from typing import Any, List, Optional
-from itertools import zip_longest
-
-from qdrant_client import QdrantClient, models
-import pandas as pd
-
-from mindsdb.integrations.libs.response import HandlerResponse
-from mindsdb.integrations.libs.response import RESPONSE_TYPE
-from mindsdb.integrations.libs.response import HandlerResponse as Response
-from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse
-from mindsdb.integrations.libs.vectordatabase_handler import (
- FilterCondition,
- FilterOperator,
- TableField,
- VectorStoreHandler,
-)
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class QdrantHandler(VectorStoreHandler):
- """Handles connection and execution of the Qdrant statements."""
-
- name = "qdrant"
-
- def __init__(self, name: str, **kwargs):
- super().__init__(name)
- connection_data = kwargs.get("connection_data").copy()
- # Qdrant offers several configuration and optmization options at the time of collection creation
- # Since the create table statement doesn't have a way to pass these options
- # We are requiring the user to pass these options in the connection_data
- # These options are documented here. https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection
- self.collection_config = connection_data.pop("collection_config")
- self.connect(**connection_data)
-
- def connect(self, **kwargs):
- """Connect to a Qdrant instance.
- A Qdrant client can be instantiated with a REST, GRPC interface or in-memory for testing.
- To use the in-memory instance, specify the location argument as ':memory:'."""
- if self.is_connected:
- return self._client
-
- try:
- self._client = QdrantClient(**kwargs)
- self.is_connected = True
- return self._client
- except Exception as e:
- logger.error(f"Error instantiating a Qdrant client: {e}")
- self.is_connected = False
-
- def disconnect(self):
- """Close the database connection."""
- if self.is_connected:
- self._client.close()
- self._client = None
- self.is_connected = False
-
- def check_connection(self) -> StatusResponse:
- """Check the connection to the Qdrant database.
-
- Returns:
- StatusResponse: Indicates if the connection is alive
- """
- need_to_close = not self.is_connected
-
- try:
- # Using a trivial operation to get the connection status
- # As there isn't a universal ping method for the REST, GRPC and in-memory interface
- self._client.get_locks()
- response_code = StatusResponse(True)
- except Exception as e:
- logger.error(f"Error connecting to a Qdrant instance: {e}")
- response_code = StatusResponse(False, error_message=str(e))
- finally:
- if response_code.success and need_to_close:
- self.disconnect()
- if not response_code.success and self.is_connected:
- self.is_connected = False
-
- return response_code
-
- def drop_table(self, table_name: str, if_exists=True):
- """Delete a collection from the Qdrant Instance.
-
- Args:
- table_name (str): The name of the collection to be dropped
- if_exists (bool, optional): Throws an error if this value is set to false and the collection doesn't exist. Defaults to True.
-
- Returns:
- HandlerResponse: _description_
- """
- result = self._client.delete_collection(table_name)
- if not (result or if_exists):
- raise Exception(f"Table {table_name} does not exist!")
-
- def get_tables(self) -> HandlerResponse:
- """Get the list of collections in the Qdrant instance.
-
- Returns:
- HandlerResponse: The common query handler response with a list of table names
- """
- collection_response = self._client.get_collections()
- collections_name = pd.DataFrame(
- columns=["table_name"],
- data=[collection.name for collection in collection_response.collections],
- )
- return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_name)
-
- def get_columns(self, table_name: str) -> HandlerResponse:
- try:
- _ = self._client.get_collection(table_name)
- except ValueError:
- return Response(
- resp_type=RESPONSE_TYPE.ERROR,
- error_message=f"Table {table_name} does not exist!",
- )
- return super().get_columns(table_name)
-
- def insert(
- self, table_name: str, data: pd.DataFrame, columns: List[str] = None
- ):
- """Handler for the insert query
-
- Args:
- table_name (str): The name of the table to be inserted into
- data (pd.DataFrame): The data to be inserted
- columns (List[str], optional): Columns to be inserted into. Unused as the values are derived from the "data" argument. Defaults to None.
-
- Returns:
- HandlerResponse: The common query handler response
- """
- assert len(data[TableField.ID.value]) == len(data[TableField.EMBEDDINGS.value]), "Number of ids and embeddings must be equal"
-
- # Qdrant doesn't have a distinction between documents and metadata
- # Any data that is to be stored should be placed in the "payload" field
- data = data.to_dict(orient="list")
- payloads = []
- content_list = data[TableField.CONTENT.value]
- if TableField.METADATA.value in data:
- metadata_list = data[TableField.METADATA.value]
- else:
- metadata_list = [None] * len(data)
- for document, metadata in zip_longest(content_list, metadata_list, fillvalue=None):
- payload = {}
-
- # Insert the document with a "document" key in the payload
- if document is not None:
- payload["document"] = document
-
- # Unpack all the metadata fields into the payload
- if metadata is not None:
- if isinstance(metadata, str):
- metadata = ast.literal_eval(metadata)
- payload = {**payload, **metadata}
-
- if payload:
- payloads.append(payload)
-
- # IDs can be either integers or strings(UUIDs)
- # The following step ensures proper type of numberic values
- ids = [int(id) if str(id).isdigit() else id for id in data[TableField.ID.value]]
- self._client.upsert(table_name, points=models.Batch(
- ids=ids,
- vectors=data[TableField.EMBEDDINGS.value],
- payloads=payloads
- ))
-
- def create_table(self, table_name: str, if_not_exists=True):
- """Create a collection with the given name in the Qdrant database.
-
- Args:
- table_name (str): Name of the table(Collection) to be created
- if_not_exists (bool, optional): Throws an error if this value is set to false and the collection already exists. Defaults to True.
-
- Returns:
- HandlerResponse: The common query handler response
- """
- try:
- # Create a collection with the collection name and collection_config set during __init__
- self._client.create_collection(table_name, self.collection_config)
- except ValueError as e:
- if if_not_exists is False:
- raise e
-
- def _get_qdrant_filter(self, operator: FilterOperator, value: Any) -> dict:
- """ Map the filter operator to the Qdrant filter
- We use a match and not a dict so as to conditionally construct values
- With a dict, all the values the values will be constructed
- Generating models.Range() with a str type value fails
-
- Args:
- operator (FilterOperator): FilterOperator specified in the query. Eg >=, <=, =
- value (Any): Value specified in the query
-
- Raises:
- Exception: If an unsupported operator is specified
-
- Returns:
- dict: A dict of Qdrant filtering clauses
- """
- if operator == FilterOperator.EQUAL:
- return {"match": models.MatchValue(value=value)}
- elif operator == FilterOperator.NOT_EQUAL:
- return {"match": models.MatchExcept(**{"except": [value]})}
- elif operator == FilterOperator.LESS_THAN:
- return {"range": models.Range(lt=value)}
- elif operator == FilterOperator.LESS_THAN_OR_EQUAL:
- return {"range": models.Range(lte=value)}
- elif operator == FilterOperator.GREATER_THAN:
- return {"range": models.Range(gt=value)}
- elif operator == FilterOperator.GREATER_THAN_OR_EQUAL:
- return {"range": models.Range(gte=value)}
- else:
- raise Exception(f"Operator {operator} is not supported by Qdrant!")
-
- def _translate_filter_conditions(
- self, conditions: List[FilterCondition]
- ) -> Optional[dict]:
- """
- Translate a list of FilterCondition objects a dict that can be used by Qdrant.
- Filtering clause docs can be found here: https://qdrant.tech/documentation/concepts/filtering/
- E.g.,
- [
- FilterCondition(
- column="metadata.created_at",
- op=FilterOperator.LESS_THAN,
- value=7132423,
- ),
- FilterCondition(
- column="metadata.created_at",
- op=FilterOperator.GREATER_THAN,
- value=2323432,
- )
- ]
- -->
- models.Filter(
- must=[
- models.FieldCondition(
- key="created_at",
- match=models.Range(lt=7132423),
- ),
- models.FieldCondition(
- key="created_at",
- match=models.Range(gt=2323432),
- ),
- ]
- )
- """
- # We ignore all non-metadata conditions
- if conditions is None:
- return None
- filter_conditions = [
- condition
- for condition in conditions
- if condition.column.startswith(TableField.METADATA.value)
- ]
- if len(filter_conditions) == 0:
- return None
-
- qdrant_filters = []
- for condition in filter_conditions:
- payload_key = condition.column.split(".")[-1]
- qdrant_filters.append(
- models.FieldCondition(key=payload_key, **self._get_qdrant_filter(condition.op, condition.value))
- )
-
- return models.Filter(must=qdrant_filters) if qdrant_filters else None
-
- def update(
- self, table_name: str, data: pd.DataFrame, columns: List[str] = None
- ):
- # insert makes upsert
- return self.insert(table_name, data)
-
- def select(self, table_name: str, columns: Optional[List[str]] = None,
- conditions: Optional[List[FilterCondition]] = None, offset: int = 0, limit: int = 10) -> pd.DataFrame:
- """Select query handler
- Eg: SELECT * FROM qdrant.test_table
-
- Args:
- table_name (str): The name of the table to be queried
- columns (Optional[List[str]], optional): List of column names specified in the query. Defaults to None.
- conditions (Optional[List[FilterCondition]], optional): List of "where" conditionals. Defaults to None.
- offset (int, optional): Offset the results by the provided value. Defaults to 0.
- limit (int, optional): Number of results to return. Defaults to 10.
-
- Returns:
- HandlerResponse: The common query handler response
- """
-
- # Validate and set offset and limit as None is passed if not set in the query
- offset = offset if offset is not None else 0
- limit = limit if limit is not None else 10
-
- # Full scroll if no where conditions are specified
- if not conditions:
- results = self._client.scroll(table_name, limit=limit, offset=offset)
- payload = self._process_select_results(results[0], columns)
- return payload
-
- # Filter conditions
- vector_filter = [condition.value for condition in conditions if condition.column == TableField.SEARCH_VECTOR.value]
- id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value]
- query_filters = self._translate_filter_conditions(conditions)
-
- # Prefer returning results by IDs first
- if id_filters:
-
- if len(id_filters) > 0:
- # is wrapped to a list
- if isinstance(id_filters[0], list):
- id_filters = id_filters[0]
- # convert to int if possible
- id_filters = [int(id) if isinstance(id, str) and id.isdigit() else id for id in id_filters]
-
- results = self._client.retrieve(table_name, ids=id_filters)
- # Followed by the search_vector value
- elif vector_filter:
- # Perform a similarity search with the first vector filter
- results = self._client.search(table_name, query_vector=vector_filter[0], query_filter=query_filters or None, limit=limit, offset=offset)
- elif query_filters:
- results = self._client.scroll(table_name, scroll_filter=query_filters, limit=limit, offset=offset)[0]
-
- # Process results
- payload = self._process_select_results(results, columns)
- return payload
-
- def _process_select_results(self, results=None, columns=None):
- """Private method to process the results of a select query
-
- Args:
- results: A List[Records] or List[ScoredPoint]. Defaults to None
- columns: List of column names specified in the query. Defaults to None
-
- Returns:
- Dataframe: A processed pandas dataframe
- """
- ids, documents, metadata, distances = [], [], [], []
-
- for result in results:
- ids.append(result.id)
- # The documents and metadata are stored as a dict in the payload
- documents.append(result.payload["document"])
- metadata.append({k: v for k, v in result.payload.items() if k != "document"})
-
- # Score is only available for similarity search results
- if "score" in result:
- distances.append(result.score)
-
- payload = {
- TableField.ID.value: ids,
- TableField.CONTENT.value: documents,
- TableField.METADATA.value: metadata,
- }
-
- # Filter result columns
- if columns:
- payload = {
- column: payload[column]
- for column in columns
- if column != TableField.EMBEDDINGS.value and column in payload
- }
-
- # If the distance list is empty, don't add it to the result
- if distances:
- payload[TableField.DISTANCE.value] = distances
-
- return pd.DataFrame(payload)
-
- def delete(
- self, table_name: str, conditions: List[FilterCondition] = None
- ):
- """Delete query handler
-
- Args:
- table_name (str): List of column names specified in the query. Defaults to None.
- conditions (List[FilterCondition], optional): List of "where" conditionals. Defaults to None.
-
- Raises:
- Exception: If no conditions are specified
-
- Returns:
- HandlerResponse: The common query handler response
- """
- filters = self._translate_filter_conditions(conditions)
- # Get id filters
- ids = [
- condition.value
- for condition in conditions
- if condition.column == TableField.ID.value
- ] or None
-
- if filters is None and ids is None:
- raise Exception("Delete query must have at least one condition!")
-
- if ids:
- self._client.delete(table_name, points_selector=models.PointIdsList(points=ids))
-
- if filters:
- self._client.delete(table_name, points_selector=models.FilterSelector(filter=filters))
diff --git a/mindsdb/integrations/handlers/qdrant_handler/requirements.txt b/mindsdb/integrations/handlers/qdrant_handler/requirements.txt
deleted file mode 100644
index 0ba481f5871..00000000000
--- a/mindsdb/integrations/handlers/qdrant_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-qdrant-client
-urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/questdb_handler/__about__.py b/mindsdb/integrations/handlers/questdb_handler/__about__.py
deleted file mode 100644
index fb4c7e4a057..00000000000
--- a/mindsdb/integrations/handlers/questdb_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB QuestDB handler'
-__package_name__ = 'mindsdb_questdb_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for QuestDB"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/questdb_handler/__init__.py b/mindsdb/integrations/handlers/questdb_handler/__init__.py
deleted file mode 100644
index 04ddc2c8807..00000000000
--- a/mindsdb/integrations/handlers/questdb_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .questdb_handler import QuestDBHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-
-title = 'QuestDB'
-name = 'questdb'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title',
- 'description', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/questdb_handler/icon.svg b/mindsdb/integrations/handlers/questdb_handler/icon.svg
deleted file mode 100644
index a504d5e24a7..00000000000
--- a/mindsdb/integrations/handlers/questdb_handler/icon.svg
+++ /dev/null
@@ -1,25 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/questdb_handler/questdb_handler.py b/mindsdb/integrations/handlers/questdb_handler/questdb_handler.py
deleted file mode 100644
index 78b15821eeb..00000000000
--- a/mindsdb/integrations/handlers/questdb_handler/questdb_handler.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import pandas as pd
-import numpy as np
-
-from questdb.ingress import Sender
-
-from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler
-from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-from mindsdb.utilities import log
-
-
-logger = log.getLogger(__name__)
-
-
-class QuestDBHandler(PostgresHandler):
- """
- This handler handles connection and execution of the QuestDB statements.
- TODO: check the dialect for questdb
- """
- name = 'questdb'
-
- def __init__(self, name, **kwargs):
- super().__init__(name, **kwargs)
-
- def get_tables(self):
- """
- List all tabels in QuestDB
- """
- query = "SHOW TABLES"
- response = super().native_query(query)
- return response
-
- def get_columns(self, table_name):
- """
- List information about the table
- """
- query = f"SELECT * FROM tables() WHERE name='{table_name}';"
- response = super().native_query(query)
- return response
-
- def qdb_connect(self):
- args = self.connection_args
- conf = f"http::addr={args['host']}:9000;username={args['user']};password={args['password']};"
- return Sender.from_conf(conf)
-
- def insert(self, table_name: str, df: pd.DataFrame):
-
- with self.qdb_connect() as sender:
- try:
- # find datetime column
- at_col = None
- for col, dtype in df.dtypes.items():
- if np.issubdtype(dtype, np.datetime64):
- at_col = col
- if at_col is None:
- raise Exception(f'Unable to find datetime column: {df.dtypes}')
-
- sender.dataframe(df, table_name=table_name, at=at_col)
- response = Response(RESPONSE_TYPE.OK)
-
- except Exception as e:
- logger.error(f'Error running insert to {table_name} on {self.database}, {e}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_code=0,
- error_message=str(e)
- )
-
- return response
diff --git a/mindsdb/integrations/handlers/questdb_handler/requirements.txt b/mindsdb/integrations/handlers/questdb_handler/requirements.txt
deleted file mode 100644
index 3c72ed894fc..00000000000
--- a/mindsdb/integrations/handlers/questdb_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-questdb
diff --git a/mindsdb/integrations/handlers/questdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/questdb_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py b/mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py
deleted file mode 100644
index 9b214a31697..00000000000
--- a/mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.questdb_handler.questdb_handler import QuestDBHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class QuestDBHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "127.0.0.1",
- "port": "8812",
- "user": "admin",
- "password": "quest",
- "database": "questdb"
- }
- }
- cls.handler = QuestDBHandler('test_questdb_handler', **cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_describe_table(self):
- described = self.handler.describe_table("house_rentals_data")
- assert described['type'] is not RESPONSE_TYPE.ERROR
-
- def test_2_get_tables(self):
- tables = self.handler.get_tables()
- assert tables['type'] is not RESPONSE_TYPE.ERROR
-
- def test_3_select_query(self):
- query = "SELECT * FROM house_rentals_data WHERE 'id'='1'"
- result = self.handler.query(query)
- assert len(result) > 0
diff --git a/mindsdb/integrations/handlers/quickbooks_handler/README.md b/mindsdb/integrations/handlers/quickbooks_handler/README.md
deleted file mode 100644
index a5afb4230a6..00000000000
--- a/mindsdb/integrations/handlers/quickbooks_handler/README.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# Quickbooks Handler
-
-Quickbooks handler for MindsDB provides interfaces to connect to Quickbooks via APIs and pull data into MindsDB.
-
----
-
-## Table of Contents
-
-- [About Quickbooks](#about-Quickbooks)
- - [Quickbooks Handler Implementation](#Quickbooks-handler-implementation)
- - [Implemented Features](#implemented-features)
- - [TODO](#todo)
- - [Example Usage](#example-usage)
----
-## About Quickbooks
-
-Quickbooks is an accounting software package developed and marketed by Intuit. Quickbooks products are geared mainly toward small and medium-sized businesses and offer on-premises accounting applications as well as cloud-based versions that accept business payments, manage and pay bills, and payroll functions.
-
-## Quickbooks Handler Implementation
-
-This handler was implemented using the official Quickbooks API. It provides a simple and easy-to-use interface to access the Quickbooks API.
-
-
-## Implemented Features
-
-- Fetch the following TABLES
- - vendors
- - employees
- - purchases
- - accounts
- - bills
- - bill_payments
-
-## TODO
-
-- (List any pending features or improvements here)
-
-## Example Usage
-```
-CREATE DATABASE my_qboo
-With
- ENGINE = "quickbooks",
- PARAMETERS = {
- "client_id": "",
- "client_secret": "",
- "realm_id":"",
- "refresh_token":"",
- "environment":''
- };
-
-```
-
-After setting up the Quickbooks Handler, you can use SQL queries to fetch data from Quickbooks:
-
-```sql
-SELECT *
-FROM my_qboo.vendors;
-```
-
-To fetch data from the employees table:
-
-```sql
-SELECT *
-FROM my_qboo.employees;
-```
diff --git a/mindsdb/integrations/handlers/quickbooks_handler/__about__.py b/mindsdb/integrations/handlers/quickbooks_handler/__about__.py
deleted file mode 100644
index 7d7af16128e..00000000000
--- a/mindsdb/integrations/handlers/quickbooks_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Quickbooks handler'
-__package_name__ = 'mindsdb_quickbooks_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Quickbooks"
-__author__ = 'Maro Akpobi'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/quickbooks_handler/__init__.py b/mindsdb/integrations/handlers/quickbooks_handler/__init__.py
deleted file mode 100644
index 958c44792c4..00000000000
--- a/mindsdb/integrations/handlers/quickbooks_handler/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .quickbooks_handler import (
- QuickbooksHandler as Handler
- )
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'QuickBooks'
-name = 'quickbooks'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/quickbooks_handler/icon.svg b/mindsdb/integrations/handlers/quickbooks_handler/icon.svg
deleted file mode 100644
index d4b6845dbf4..00000000000
--- a/mindsdb/integrations/handlers/quickbooks_handler/icon.svg
+++ /dev/null
@@ -1,11 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py b/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py
deleted file mode 100644
index f321b0def15..00000000000
--- a/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import os
-from qbosdk import QuickbooksOnlineSDK
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse
-)
-from mindsdb.utilities.config import Config
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-from .quickbooks_table import AccountsTable, PurchasesTable, BillPaymentsTable, VendorsTable, BillsTable, EmployeesTable
-
-logger = log.getLogger(__name__)
-
-
-class QuickbooksHandler(APIHandler):
- """
- A class for handling connections and interactions with Quickbooks API.
- """
-
- def __init__(self, name=None, **kwargs):
- super().__init__(name)
-
- args = kwargs.get('connection_data', {})
-
- self.connection_args = {}
- handler_config = Config().get('quickbooks_handler', {})
- for k in ['client_id', 'client_secret', 'refresh_token', 'realm_id', 'environment']:
- if k in args:
- self.connection_args[k] = args[k]
- elif f'QUICKBOOKS_{k.upper()}' in os.environ:
- self.connection_args[k] = os.environ[f'QUICKBOOKS_{k.upper()}']
- elif k in handler_config:
- self.connection_args[k] = handler_config[k]
-
- self.quickbooks = None
- self.is_connected = False
-
- accountso = AccountsTable(self)
- self._register_table('accountso', accountso)
- purchases = PurchasesTable(self)
- self._register_table('purchases', purchases)
- bills_payments = BillPaymentsTable(self)
- self._register_table('bills_payments', bills_payments)
- vendors = VendorsTable(self)
- self._register_table('vendors', vendors)
- bills = BillsTable(self)
- self._register_table('bills', bills)
- employees = EmployeesTable(self)
- self._register_table('employees', employees)
-
- def connect(self):
- if self.is_connected is True:
- return self.quickbooks
-
- self.quickbooks = QuickbooksOnlineSDK(
- client_id=self.connection_args['client_id'],
- client_secret=self.connection_args['client_secret'],
- realm_id=self.connection_args['realm_id'],
- refresh_token=self.connection_args['refresh_token'],
- environment=self.connection_args['environment']
- )
- self.is_connected = True
- return self.quickbooks
-
- def check_connection(self) -> StatusResponse:
- response = StatusResponse(False)
-
- try:
- quickbooks = self.connect()
- quickbooks.accounts.get()
- logger.info(quickbooks.accounts.get())
- response.success = True
-
- except Exception as e:
- response.error_message = f'Error connecting to Quickbooks API: {e}. '
- logger.error(response.error_message)
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
- Parameters
- ----------
- query : str
- query in a native format
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
diff --git a/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py b/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py
deleted file mode 100644
index 02344198094..00000000000
--- a/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py
+++ /dev/null
@@ -1,374 +0,0 @@
-import pandas as pd
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb_sql_parser import ast
-
-
-class AccountsTable(APITable):
-
- def flatten_dict(self, data: dict, prefix: str = ""):
- flat_data = {}
- for key, value in data.items():
- if isinstance(value, dict):
- flattened_sub_dict = self.flatten_dict(value, f"{key}_")
- flat_data.update(flattened_sub_dict)
- else:
- flat_data[f"{prefix}{key}"] = value
- return flat_data
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- qbo = self.handler.connect()
- accounts_data = qbo.accounts.get()
- flattened_accounts_data = [self.flatten_dict(account) for account in accounts_data]
- result = pd.DataFrame(flattened_accounts_data)
- self.filter_columns(result, query)
- return result
-
- def get_columns(self):
- return [
- 'Name',
- 'SubAccount',
- 'FullyQualifiedName',
- 'Active',
- 'Classification',
- 'AccountType',
- 'AccountSubType',
- 'CurrentBalance',
- 'CurrentBalanceWithSubAccounts',
- 'CurrencyRef_value',
- 'CurrencyRef_name',
- 'domain',
- 'sparse',
- 'Id',
- 'SyncToken',
- 'MetaData_CreateTime',
- 'MetaData_LastUpdatedTime',
- ]
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.value)
- if len(columns) > 0:
- result = result[columns]
-
-
-class PurchasesTable(APITable):
-
- def flatten_dict(self, data: dict, prefix: str = ""):
- flat_data = {}
- for key, value in data.items():
- if isinstance(value, dict):
- flattened_sub_dict = self.flatten_dict(value, f"{key}_")
- flat_data.update(flattened_sub_dict)
- elif isinstance(value, list):
- for i, item in enumerate(value):
- if isinstance(item, dict):
- flattened_sub_dict = self.flatten_dict(item, f"{key}_{i}_")
- flat_data.update(flattened_sub_dict)
- else:
- flat_data[f"{prefix}{key}_{i}"] = item
- else:
- flat_data[f"{prefix}{key}"] = value
- return flat_data
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- qbo = self.handler.connect()
- purchases_data = qbo.purchases.get()
- flattened_purchases_data = [self.flatten_dict(purchase) for purchase in purchases_data]
- result = pd.DataFrame(flattened_purchases_data)
- self.filter_columns(result, query)
- return result
-
- def get_columns(self):
- return [
- 'AccountRef_value',
- 'AccountRef_name',
- 'PaymentType',
- 'Credit',
- 'TotalAmt',
- 'domain',
- 'sparse',
- 'Id',
- 'SyncToken',
- 'MetaData_CreateTime',
- 'MetaData_LastUpdatedTime',
- 'TxnDate',
- 'CurrencyRef_value',
- 'CurrencyRef_name',
- 'EntityRef_value',
- 'EntityRef_name',
- 'EntityRef_type',
- 'Line_0_Id',
- 'Line_0_Amount',
- 'Line_0_DetailType'
- # Add more columns for additional line items if needed
- ]
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.value)
- if len(columns) > 0:
- result = result[columns]
-
-
-class BillPaymentsTable(APITable):
- def flatten_dict(self, data: dict, prefix: str = ""):
- flat_data = {}
- for key, value in data.items():
- if isinstance(value, dict):
- flattened_sub_dict = self.flatten_dict(value, f"{prefix}{key}_")
- flat_data.update(flattened_sub_dict)
- elif isinstance(value, list):
- for i, item in enumerate(value):
- if isinstance(item, dict):
- flattened_sub_dict = self.flatten_dict(item, f"{prefix}{key}_{i}_")
- flat_data.update(flattened_sub_dict)
- else:
- flat_data[f"{prefix}{key}_{i}"] = item
- else:
- flat_data[prefix + key] = value
- return flat_data
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- qbo = self.handler.connect()
- billpayments_data = qbo.bill_payments.get()
- flattened_billpayments_data = [self.flatten_dict(bp) for bp in billpayments_data]
- result = pd.DataFrame(flattened_billpayments_data)
- self.filter_columns(result, query)
- return result
-
- def get_columns(self):
- return [
- 'VendorRef_value',
- 'VendorRef_name',
- 'PayType',
- 'CreditCardPayment_CCAccountRef_value',
- 'CreditCardPayment_CCAccountRef_name',
- 'CheckPayment_BankAccountRef_value',
- 'CheckPayment_BankAccountRef_name',
- 'TotalAmt',
- 'Id',
- 'SyncToken',
- 'MetaData_CreateTime',
- 'MetaData_LastUpdatedTime',
- 'DocNumber',
- 'TxnDate',
- 'CurrencyRef_value',
- 'CurrencyRef_name',
- 'Line_0_Amount',
- 'Line_0_LinkedTxn_0_TxnId',
- 'Line_0_LinkedTxn_0_TxnType',
- 'domain',
- 'sparse',
- ]
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.value)
- if len(columns) > 0:
- result = result[columns]
-
-
-class VendorsTable(APITable):
-
- def flatten_dict(self, data: dict, prefix: str = ""):
- flat_data = {}
- for key, value in data.items():
- if isinstance(value, dict):
- flattened_sub_dict = self.flatten_dict(value, f"{key}_")
- flat_data.update(flattened_sub_dict)
- else:
- flat_data[f"{prefix}{key}"] = value
- return flat_data
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- qbo = self.handler.connect()
- vendors_data = qbo.vendors.get()
- flattened_vendors_data = [self.flatten_dict(vendor) for vendor in vendors_data]
- result = pd.DataFrame(flattened_vendors_data)
- self.filter_columns(result, query)
- return result
-
- def get_columns(self):
- return [
- 'Balance',
- 'Vendor1099',
- 'CurrencyRef_value',
- 'CurrencyRef_name',
- 'Id',
- 'SyncToken',
- 'MetaData_CreateTime',
- 'MetaData_LastUpdatedTime',
- 'DisplayName',
- 'PrintOnCheckName',
- 'Active',
- 'domain',
- 'sparse'
- ]
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.value)
- if len(columns) > 0:
- result = result[columns]
-
-
-class BillsTable(APITable):
-
- def flatten_dict(self, data: dict, prefix: str = ""):
- flat_data = {}
- for key, value in data.items():
- if isinstance(value, dict):
- flattened_sub_dict = self.flatten_dict(value, f"{prefix}{key}_")
- flat_data.update(flattened_sub_dict)
- elif isinstance(value, list):
- for i, item in enumerate(value):
- if isinstance(item, dict):
- flattened_sub_dict = self.flatten_dict(item, f"{prefix}{key}_{i}_")
- flat_data.update(flattened_sub_dict)
- else:
- flat_data[f"{prefix}{key}_{i}"] = item
- else:
- flat_data[prefix + key] = value
- return flat_data
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- qbo = self.handler.connect()
- bills_data = qbo.bills.get()
- flattened_bills_data = [self.flatten_dict(bill) for bill in bills_data]
- result = pd.DataFrame(flattened_bills_data)
- self.filter_columns(result, query)
- return result
-
- def get_columns(self):
- return [
- 'DueDate',
- 'Balance',
- 'Id',
- 'SyncToken',
- 'MetaData_CreateTime',
- 'MetaData_LastUpdatedTime',
- 'TxnDate',
- 'CurrencyRef_value',
- 'CurrencyRef_name',
- 'VendorRef_value',
- 'VendorRef_name',
- 'APAccountRef_value',
- 'APAccountRef_name',
- 'TotalAmt',
- 'Line_0_Id',
- 'Line_0_LineNum',
- 'Line_0_Description',
- 'Line_0_Amount',
- 'Line_0_DetailType',
- 'Line_0_ItemBasedExpenseLineDetail_BillableStatus',
- 'Line_0_ItemBasedExpenseLineDetail_ItemRef_value',
- 'Line_0_ItemBasedExpenseLineDetail_ItemRef_name',
- 'Line_0_ItemBasedExpenseLineDetail_UnitPrice',
- 'Line_0_ItemBasedExpenseLineDetail_Qty',
- 'Line_1_Id',
- 'Line_1_LineNum',
- 'Line_1_Description',
- 'Line_1_Amount',
- 'Line_1_DetailType',
- 'Line_1_ItemBasedExpenseLineDetail_BillableStatus',
- 'Line_1_ItemBasedExpenseLineDetail_ItemRef_value',
- 'Line_1_ItemBasedExpenseLineDetail_ItemRef_name',
- 'Line_1_ItemBasedExpenseLineDetail_UnitPrice',
- 'Line_1_ItemBasedExpenseLineDetail_Qty',
- 'domain',
- 'sparse'
- ]
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.value)
- if len(columns) > 0:
- result = result[columns]
-
-
-class EmployeesTable(APITable):
-
- def flatten_dict(self, data: dict, prefix: str = ""):
- flat_data = {}
- for key, value in data.items():
- if isinstance(value, dict):
- flattened_sub_dict = self.flatten_dict(value, f"{prefix}{key}_")
- flat_data.update(flattened_sub_dict)
- elif isinstance(value, list):
- for i, item in enumerate(value):
- if isinstance(item, dict):
- flattened_sub_dict = self.flatten_dict(item, f"{prefix}{key}_{i}_")
- flat_data.update(flattened_sub_dict)
- else:
- flat_data[f"{prefix}{key}_{i}"] = item
- else:
- flat_data[prefix + key] = value
- return flat_data
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- qbo = self.handler.connect()
- employees_data = qbo.employees.get()
- flattened_employees_data = [self.flatten_dict(employee) for employee in employees_data]
- result = pd.DataFrame(flattened_employees_data)
- self.filter_columns(result, query)
- return result
-
- def get_columns(self):
- return [
- 'BillableTime',
- 'Id',
- 'SyncToken',
- 'MetaData_CreateTime',
- 'MetaData_LastUpdatedTime',
- 'GivenName',
- 'FamilyName',
- 'DisplayName',
- 'PrintOnCheckName',
- 'Active',
- 'PrimaryPhone_FreeFormNumber',
- 'HiredDate',
- 'domain',
- 'sparse'
- ]
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.value)
- if len(columns) > 0:
- result = result[columns]
diff --git a/mindsdb/integrations/handlers/quickbooks_handler/requirements.txt b/mindsdb/integrations/handlers/quickbooks_handler/requirements.txt
deleted file mode 100644
index cf4f4b95815..00000000000
--- a/mindsdb/integrations/handlers/quickbooks_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-qbosdk
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/ray_serve_handler/__about__.py b/mindsdb/integrations/handlers/ray_serve_handler/__about__.py
deleted file mode 100644
index 9b4ca6ff8c3..00000000000
--- a/mindsdb/integrations/handlers/ray_serve_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Ray Serve handler'
-__package_name__ = 'mindsdb_ray_serve_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Ray Serve"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2023- mindsdb'
diff --git a/mindsdb/integrations/handlers/ray_serve_handler/__init__.py b/mindsdb/integrations/handlers/ray_serve_handler/__init__.py
deleted file mode 100644
index 25daf9b4410..00000000000
--- a/mindsdb/integrations/handlers/ray_serve_handler/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-from mindsdb.integrations.handlers.ray_serve_handler.__about__ import __version__ as version, __description__ as description
-try:
- from mindsdb.integrations.handlers.ray_serve_handler.ray_serve_handler import RayServeHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Ray Serve'
-name = 'ray_serve'
-type = HANDLER_TYPE.ML
-icon_path = 'icon.svg'
-permanent = False
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/ray_serve_handler/example.md b/mindsdb/integrations/handlers/ray_serve_handler/example.md
deleted file mode 100644
index c3ffedccd5b..00000000000
--- a/mindsdb/integrations/handlers/ray_serve_handler/example.md
+++ /dev/null
@@ -1,175 +0,0 @@
-# Preliminaries
-
-To use Ray Serve models through MindsDB, you need to create a file that uses Ray Serve to deploy both training and inference endpoints for MindsDB to hook into.
-
-There are several (slightly different) ways to achieve this, but we recommend the approach used in our [docs page](https://docs.mindsdb.com/custom-model/ray-serve):
-
-```python
-import ray
-from fastapi import Request, FastAPI
-from ray import serve
-
-# [other model-specific imports here]
-
-app = FastAPI()
-ray.init()
-serve.start(detached=True)
-
-
-async def parse_req(request: Request):
- """Parse a json payload from a post request into a dataframe and target column."""""
- data = await request.json()
- target = data.get('target', None)
- di = json.loads(data['df'])
- df = pd.DataFrame(di)
- return df, target
-
-
-@serve.deployment(route_prefix="/my_model")
-@serve.ingress(app)
-class MyModel:
- @app.post("/train")
- async def train(self, request: Request):
- df, target = await parse_req(request)
- # [...model fitting logic here]
- return {'status': 'ok'}
-
- @app.post("/predict")
- async def predict(self, request: Request):
- df, _ = await parse_req(request)
- # [...model inference logic here]
- pred_dict = {'prediction': [x for x in predictions]}
- return pred_dict
-
-
-if __name__ == '__main__':
- MyModel.deploy()
- while True:
- time.sleep(1)
-```
-
-Once your script looks similar to the one above, save it (e.g. as `model.py`) and setup the server by simply running the file:
-
-```bash
-python model.py
-```
-
-# MindsDB commands
-
-To create the engine:
-
-```sql
-CREATE ML_ENGINE rayserve FROM ray_serve;
-```
-
-While the Ray Serve is active, you can use the following command to create the model inside MindsDB by triggering the train endpoint:
-
-```sql
-CREATE MODEL mindsdb.rayserve_model
-FROM integration_name (SELECT * FROM table_name)
-PREDICT target
-USING
-engine='ray_serve',
-train_url='http://ray_serve_url:port/my_model/train',
-predict_url='http://ray_serve_url:port/my_model/predict';
-```
-
-In a local deployment, `ray_serve_url = localhost` and `port = 8000`. The `my_model` part of the URL is the `route_prefix` in the script above.
-
-Once the model is created, you should wait until the training process finishes.
-
-Then, you can query it as any other MindsDB model:
-```sql
-SELECT input_col, target_col
-FROM rayserve_model
-WHERE input_col=some_value; -- could also use a JOIN here, as usual
-
-DESCRIBE rayserve_model;
-```
-
-# End to end example
-
-Here, we take a look at a simple linear regression done on a single feature to predict a numerical target.
-
-`model.py` looks like this:
-
-```python
-import ray
-from fastapi import Request, FastAPI
-from ray import serve
-import time
-import pandas as pd
-import json
-from sklearn.linear_model import LogisticRegression
-
-
-app = FastAPI()
-ray.init()
-serve.start(detached=True)
-
-
-async def parse_req(request: Request):
- data = await request.json()
- target = data.get('target', None)
- di = json.loads(data['df'])
- df = pd.DataFrame(di)
- return df, target
-
-
-@serve.deployment(route_prefix="/my_model")
-@serve.ingress(app)
-class MyModel:
- @app.post("/train")
- async def train(self, request: Request):
- df, target = await parse_req(request)
- feature_cols = list(set(list(df.columns)) - set([target]))
- self.feature_cols = feature_cols
- X = df.loc[:, self.feature_cols]
- Y = list(df[target])
- self.model = LogisticRegression()
- self.model.fit(X, Y)
- return {'status': 'ok'}
-
- @app.post("/predict")
- async def predict(self, request: Request):
- df, _ = await parse_req(request)
- X = df.loc[:, self.feature_cols]
- predictions = self.model.predict(X)
- pred_dict = {'prediction': [float(x) for x in predictions]}
- return pred_dict
-
-
-if __name__ == '__main__':
- MyModel.deploy()
-
- while True:
- time.sleep(1)
-```
-
-And the MindsDB commands to train and query the model are:
-
-```sql
-CREATE ML_ENGINE rayserve FROM ray_serve; -- assumes user hasn't registered the engine prior to this example
-
-SELECT sqft, rental_price FROM example_db.demo_data.home_rentals LIMIT 10; -- toy dataset. we'll use `sqft` as the input feature and `rental_price` as the target
-
-CREATE MODEL mindsdb.rayserve_model
-FROM example_db (SELECT sqft, rental_price FROM demo_data.home_rentals LIMIT 10)
-PREDICT rental_price
-USING
-engine='ray_serve',
-train_url='http://127.0.0.1:8000/my_model/train',
-predict_url='http://127.0.0.1:8000/my_model/predict';
-
-SELECT sqft, rental_price
-FROM rayserve_model
-WHERE sqft=917;
-
-DESCRIBE rayserve_model;
-```
-
-By the end of the entire process, you should get back a response with the predicted `rental_price`:
-
-| sqft | rental_price |
-|------|--------------|
-| 917 | 3901 |
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/ray_serve_handler/icon.svg b/mindsdb/integrations/handlers/ray_serve_handler/icon.svg
deleted file mode 100644
index 2a11028288e..00000000000
--- a/mindsdb/integrations/handlers/ray_serve_handler/icon.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py b/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py
deleted file mode 100644
index 9abca34a885..00000000000
--- a/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import io
-import json
-
-import requests
-from typing import Dict, Optional
-
-import pandas as pd
-import pyarrow.parquet as pq
-
-from mindsdb.integrations.libs.base import BaseMLEngine
-
-
-class RayServeException(Exception):
- pass
-
-
-class RayServeHandler(BaseMLEngine):
- """
- The Ray Serve integration engine needs to have a working connection to Ray Serve. For this:
- - A Ray Serve server should be running
-
- Example:
-
- """ # noqa
- name = 'ray_serve'
-
- @staticmethod
- def create_validation(target, args=None, **kwargs):
- if not args.get('using'):
- raise Exception("Error: This engine requires some parameters via the 'using' clause. Please refer to the documentation of the Ray Serve handler and try again.") # noqa
- if not args['using'].get('train_url'):
- raise Exception("Error: Please provide a URL for the training endpoint.")
- if not args['using'].get('predict_url'):
- raise Exception("Error: Please provide a URL for the prediction endpoint.")
-
- def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
- # TODO: use join_learn_process to notify users when ray has finished the training process
- args = args['using'] # ignore the rest of the problem definition
- args['target'] = target
- self.model_storage.json_set('args', args)
- try:
- if args.get('is_parquet', False):
- buffer = io.BytesIO()
- df.to_parquet(buffer)
- resp = requests.post(args['train_url'],
- files={"df": ("df", buffer.getvalue(), "application/octet-stream")},
- data={"args": json.dumps(args), "target": target},
- )
- else:
- resp = requests.post(args['train_url'],
- json={'df': df.to_json(orient='records'), 'target': target, 'args': args},
- headers={'content-type': 'application/json; format=pandas-records'})
- except requests.exceptions.InvalidSchema:
- raise Exception("Error: The URL provided for the training endpoint is invalid.")
-
- error = None
- try:
- resp = resp.json()
- except json.JSONDecodeError:
- error = resp.text
- else:
- if resp.get('status') != 'ok':
- error = resp['status']
-
- if error:
- raise RayServeException(f"Error: {error}")
-
- def predict(self, df, args=None):
- args = {**(self.model_storage.json_get('args')), **args} # merge incoming args
- pred_args = args.get('predict_params', {})
- args = {**args, **pred_args} # merge pred_args
- if args.get('is_parquet', False):
- buffer = io.BytesIO()
- df.attrs['pred_args'] = pred_args
- df.to_parquet(buffer)
- resp = requests.post(args['predict_url'],
- files={"df": ("df", buffer.getvalue(), "application/octet-stream")},
- data={"pred_args": json.dumps(pred_args)},
- )
- else:
- resp = requests.post(args['predict_url'],
- json={'df': df.to_json(orient='records'), 'pred_args': pred_args},
- headers={'content-type': 'application/json; format=pandas-records'})
- content_type = resp.headers.get("Content-Type", "")
- if "application/octet-stream" in content_type:
- try:
- buffer = io.BytesIO(resp.content)
- table = pq.read_table(buffer)
- response = table.to_pandas()
- except Exception:
- error = 'Could not decode parquet.'
- else:
- try:
- response = resp.json()
- except json.JSONDecodeError:
- error = resp.text
-
- if 'prediction' in response:
- target = args['target']
- if target != 'prediction':
- # rename prediction to target
- response[target] = response.pop('prediction')
- return pd.DataFrame(response)
- else:
- # something wrong
- error = response
-
- raise RayServeException(f"Error: {error}")
-
- def describe(self, key: Optional[str] = None) -> pd.DataFrame:
- args = self.model_storage.json_get('args')
- description = {
- 'TRAIN_URL': [args['train_url']],
- 'PREDICT_URL': [args['predict_url']],
- 'TARGET': [args['target']],
- }
- return pd.DataFrame.from_dict(description)
diff --git a/mindsdb/integrations/handlers/reddit_handler/README.md b/mindsdb/integrations/handlers/reddit_handler/README.md
deleted file mode 100644
index cd774178c89..00000000000
--- a/mindsdb/integrations/handlers/reddit_handler/README.md
+++ /dev/null
@@ -1,82 +0,0 @@
-# Reddit Handler
-
-Reddit handler for MindsDB provides interfaces to connect to Reddit via APIs and pull data into MindsDB.
-
----
-
-## Table of Contents
-
-- [Reddit Handler](#reddit-handler)
- - [Table of Contents](#table-of-contents)
- - [About Reddit](#about-reddit)
- - [Reddit Handler Implementation](#reddit-handler-implementation)
- - [Reddit Handler Initialization](#reddit-handler-initialization)
- - [Implemented Features](#implemented-features)
- - [TODO](#todo)
- - [Example Usage](#example-usage)
-
----
-
-## About Reddit
-
-Reddit is a network of communities based on people's interests. It provides a platform for users to submit links, create content, and have discussions about various topics.
-
-## Reddit Handler Implementation
-
-This handler was implemented using the [PRAW (Python Reddit API Wrapper)](https://praw.readthedocs.io/en/latest/) library. PRAW is a Python package that provides a simple and easy-to-use interface to access the Reddit API.
-
-## Reddit Handler Initialization
-
-The Reddit handler is initialized with the following parameters:
-
-- `client_id`: a required Reddit API client ID
-- `client_secret`: a required Reddit API client secret
-- `user_agent`: a required user agent string to identify your application
-
-## How to get your Reddit credentials.
-
-1. Visit Reddit App Preferences (https://www.reddit.com/prefs/apps) or [https://old.reddit.com/prefs/apps/](https://old.reddit.com/prefs/apps/)
-2. Scroll to the bottom and click "create another app..."
-3. Fill out the name, description, and redirect url for your app, then click "create app"
-4. Now you should be able to see the personal use script, secret, and name of your app. Store those as environment variables CLIENT_ID, CLIENT_SECRET, and USER_AGENT respectively.
-
-## Implemented Features
-
-- Fetch submissions from a subreddit based on sorting type and limit.
-- (Add other implemented features here)
-
-## TODO
-
-- (List any pending features or improvements here)
-
-## Example Usage
-```
-CREATE DATABASE my_reddit
-With
- ENGINE = 'reddit',
- PARAMETERS = {
- "client_id":"YOUR_CLIENT_ID",
- "client_secret":"YOUR_CLIENT_SECRET",
- "user_agent":"YOUR_USER_AGENT"
- };
-```
-
-After setting up the Reddit Handler, you can use SQL queries to fetch data from Reddit:
-
-```sql
-SELECT *
-FROM my_reddit.submission
-WHERE subreddit = 'MachineLearning' AND sort_type = 'top' AND items = 5;
-```
-
-`items`: Number of items to fetch from the subreddit.
-
-`sort_type`: Sorting type for the subreddit. Can be one of `hot`, `new`, `top`, `controversial`, `gilded`, `wiki`, `mod`, `rising`.
-
-Each Post in a subreddit has a unique ID. You can use this ID to fetch comments for a particular post.
-
-```
-SELECT *
-FROM my_reddit.comment
-WHERE submission_id = '12gls93'
-```
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/reddit_handler/__about__.py b/mindsdb/integrations/handlers/reddit_handler/__about__.py
deleted file mode 100644
index 64aed327ab8..00000000000
--- a/mindsdb/integrations/handlers/reddit_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Reddit handler'
-__package_name__ = 'mindsdb_reddit_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Reddit"
-__author__ = 'Maro Akpobi'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/reddit_handler/__init__.py b/mindsdb/integrations/handlers/reddit_handler/__init__.py
deleted file mode 100644
index 7314efc20d6..00000000000
--- a/mindsdb/integrations/handlers/reddit_handler/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .reddit_handler import (
- RedditHandler as Handler
- )
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Reddit'
-name = 'reddit'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/reddit_handler/icon.svg b/mindsdb/integrations/handlers/reddit_handler/icon.svg
deleted file mode 100644
index e9f9923a08c..00000000000
--- a/mindsdb/integrations/handlers/reddit_handler/icon.svg
+++ /dev/null
@@ -1,11 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/reddit_handler/reddit_handler.py b/mindsdb/integrations/handlers/reddit_handler/reddit_handler.py
deleted file mode 100644
index 4d314f620b1..00000000000
--- a/mindsdb/integrations/handlers/reddit_handler/reddit_handler.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import praw
-import os
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-from mindsdb.utilities.config import Config
-from mindsdb.utilities import log
-
-from .reddit_tables import CommentTable, SubmissionTable
-
-logger = log.getLogger(__name__)
-
-
-class RedditHandler(APIHandler):
-
- def __init__(self, name=None, **kwargs):
- super().__init__(name)
-
- args = kwargs.get('connection_data', {})
-
- self.connection_args = {}
- handler_config = Config().get('reddit_handler', {})
- for k in ['client_id', 'client_secret', 'user_agent']:
- if k in args:
- self.connection_args[k] = args[k]
- elif f'REDDIT_{k.upper()}' in os.environ:
- self.connection_args[k] = os.environ[f'REDDIT_{k.upper()}']
- elif k in handler_config:
- self.connection_args[k] = handler_config[k]
-
- self.reddit = None
- self.is_connected = False
-
- comment = CommentTable(self)
- self._register_table('comment', comment)
-
- submission = SubmissionTable(self)
- self._register_table('submission', submission)
-
- def connect(self):
- """Authenticate with the Reddit API using the client ID, client secret
- and user agent provided in the constructor.
- """
- if self.is_connected is True:
- return self.reddit
-
- self.reddit = praw.Reddit(
- client_id=self.connection_args['client_id'],
- client_secret=self.connection_args['client_secret'],
- user_agent=self.connection_args['user_agent'],
- )
-
- self.is_connected = True
- return self.reddit
-
- def check_connection(self) -> StatusResponse:
- '''It evaluates if the connection with Reddit API is alive and healthy.
- Returns:
- HandlerStatusResponse
- '''
-
- response = StatusResponse(False)
-
- try:
- reddit = self.connect()
- reddit.user.me()
- response.success = True
-
- except Exception as e:
- response.error_message = f'Error connecting to Reddit api: {e}. '
- logger.error(response.error_message)
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query_string: str = None):
- '''It parses any native statement string and acts upon it (for example, raw syntax commands).
- Args:
- query (Any): query in native format (str for sql databases,
- dict for mongo, api's json etc)
- Returns:
- HandlerResponse
- '''
-
- method_name, params = self.parse_native_query(query_string)
- if method_name == 'get_submission':
- df = self.get_submission(params)
- elif method_name == 'get_subreddit':
- df = self.get_subreddit(params)
- else:
- raise ValueError(f"Method '{method_name}' not supported by RedditHandler")
-
- return Response(
- RESPONSE_TYPE.TABLE,
- data_frame=df
- )
diff --git a/mindsdb/integrations/handlers/reddit_handler/reddit_tables.py b/mindsdb/integrations/handlers/reddit_handler/reddit_tables.py
deleted file mode 100644
index ecbd063b2f2..00000000000
--- a/mindsdb/integrations/handlers/reddit_handler/reddit_tables.py
+++ /dev/null
@@ -1,194 +0,0 @@
-import pandas as pd
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
-
-
-class CommentTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- '''Select data from the comment table and return it as a pandas DataFrame.
-
- Args:
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the selected data.
- '''
-
- reddit = self.handler.connect()
-
- submission_id = None
- conditions = extract_comparison_conditions(query.where)
- for condition in conditions:
- if condition[0] == '=' and condition[1] == 'submission_id':
- submission_id = condition[2]
- break
-
- if submission_id is None:
- raise ValueError('Submission ID is missing in the SQL query')
-
- submission = reddit.submission(id=submission_id)
- submission.comments.replace_more(limit=None)
-
- result = []
- for comment in submission.comments.list():
- data = {
- 'id': comment.id,
- 'body': comment.body,
- 'author': comment.author.name if comment.author else None,
- 'created_utc': comment.created_utc,
- 'score': comment.score,
- 'permalink': comment.permalink,
- 'ups': comment.ups,
- 'downs': comment.downs,
- 'subreddit': comment.subreddit.display_name,
- }
- result.append(data)
-
- result = pd.DataFrame(result)
- self.filter_columns(result, query)
- return result
-
- def get_columns(self):
- '''Get the list of column names for the comment table.
-
- Returns:
- list: A list of column names for the comment table.
- '''
- return [
- 'id',
- 'body',
- 'author',
- 'created_utc',
- 'permalink',
- 'score',
- 'ups',
- 'downs',
- 'subreddit',
- ]
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.value)
- if len(columns) > 0:
- result = result[columns]
-
-
-class SubmissionTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- '''Select data from the submission table and return it as a pandas DataFrame.
-
- Args:
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the selected data.
- '''
-
- reddit = self.handler.connect()
-
- subreddit_name = None
- sort_type = None
- conditions = extract_comparison_conditions(query.where)
- for condition in conditions:
- if condition[0] == '=' and condition[1] == 'subreddit':
- subreddit_name = condition[2]
- elif condition[0] == '=' and condition[1] == 'sort_type':
- sort_type = condition[2]
- elif condition[0] == '=' and condition[1] == 'items':
- items = int(condition[2])
-
- if not sort_type:
- sort_type = 'hot'
- if not subreddit_name:
- return pd.DataFrame()
-
- if sort_type == 'new':
- submissions = reddit.subreddit(subreddit_name).new(limit=items)
- elif sort_type == 'rising':
- submissions = reddit.subreddit(subreddit_name).rising(limit=items)
- elif sort_type == 'controversial':
- submissions = reddit.subreddit(subreddit_name).controversial(limit=items)
- elif sort_type == 'top':
- submissions = reddit.subreddit(subreddit_name).top(limit=items)
- else:
- submissions = reddit.subreddit(subreddit_name).hot(limit=items)
-
- result = []
- for submission in submissions:
- data = {
- 'id': submission.id,
- 'title': submission.title,
- 'author': submission.author.name if submission.author else None,
- 'created_utc': submission.created_utc,
- 'score': submission.score,
- 'num_comments': submission.num_comments,
- 'permalink': submission.permalink,
- 'url': submission.url,
- 'ups': submission.ups,
- 'downs': submission.downs,
- 'num_crossposts': submission.num_crossposts,
- 'subreddit': submission.subreddit.display_name,
- 'selftext': submission.selftext,
- }
- result.append(data)
-
- result = pd.DataFrame(result)
- self.filter_columns(result, query)
- return result
-
- def get_columns(self):
- '''Get the list of column names for the submission table.
-
- Returns:
- list: A list of column names for the submission table.
- '''
- return [
- 'id',
- 'title',
- 'author',
- 'created_utc',
- 'permalink',
- 'num_comments',
- 'score',
- 'ups',
- 'downs',
- 'num_crossposts',
- 'subreddit',
- 'selftext'
- ]
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError
- else:
- columns = self.get_columns()
-
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- result = pd.DataFrame([], columns=columns)
- else:
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- result = result[columns]
-
- if query is not None and query.limit is not None:
- return result.head(query.limit.value)
-
- return result
diff --git a/mindsdb/integrations/handlers/reddit_handler/requirements.txt b/mindsdb/integrations/handlers/reddit_handler/requirements.txt
deleted file mode 100644
index 9d9d90a490b..00000000000
--- a/mindsdb/integrations/handlers/reddit_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-praw
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/rest_api_handler/README.md b/mindsdb/integrations/handlers/rest_api_handler/README.md
new file mode 100644
index 00000000000..037d6ff429a
--- /dev/null
+++ b/mindsdb/integrations/handlers/rest_api_handler/README.md
@@ -0,0 +1,168 @@
+---
+title: REST API
+sidebarTitle: REST API
+---
+
+This documentation describes the integration of MindsDB with generic REST APIs using bearer-token authentication.
+The integration allows MindsDB to forward HTTP requests to any REST API using stored credentials via the passthrough endpoint β no SQL table mapping required.
+
+### Prerequisites
+
+Before proceeding, ensure the following prerequisites are met:
+
+1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop).
+2. Obtain a bearer token (API key, personal access token, etc.) for the target REST API.
+
+## Connection
+
+Establish a connection to a REST API from MindsDB by executing the following SQL command:
+
+```sql
+CREATE DATABASE my_api
+WITH ENGINE = 'rest_api',
+PARAMETERS = {
+ "base_url": "https://api.example.com",
+ "bearer_token": "your_token_here"
+};
+```
+
+Required connection parameters include the following:
+
+* `base_url`: The base URL of the REST API (e.g. `https://api.example.com`). All passthrough request paths are appended to this URL.
+* `bearer_token`: The bearer token used for authentication. Injected as `Authorization: Bearer ` on every request.
+
+Optional connection parameters include the following:
+
+* `default_headers`: A JSON object of static headers added to every request (e.g. `{"Accept": "application/json"}`).
+* `allowed_hosts`: A list of allowed hostnames for passthrough requests. Defaults to the hostname of `base_url`. Use `["*"]` to disable host containment.
+* `test_path`: The path used by the `/passthrough/test` endpoint to verify connectivity. Defaults to `/`.
+
+### Examples
+
+Connect to the HubSpot API:
+
+```sql
+CREATE DATABASE my_hubspot
+WITH ENGINE = 'rest_api',
+PARAMETERS = {
+ "base_url": "https://api.hubapi.com",
+ "bearer_token": "pat-eu1-..."
+};
+```
+
+Connect to a custom internal API with default headers:
+
+```sql
+CREATE DATABASE my_internal_api
+WITH ENGINE = 'rest_api',
+PARAMETERS = {
+ "base_url": "https://internal.example.com/api/v2",
+ "bearer_token": "sk-...",
+ "default_headers": {"Accept": "application/json", "X-Team": "data"},
+ "test_path": "/health"
+};
+```
+
+Connect to an API with multiple allowed hosts:
+
+```sql
+CREATE DATABASE my_multi_region_api
+WITH ENGINE = 'rest_api',
+PARAMETERS = {
+ "base_url": "https://api.example.com",
+ "bearer_token": "your_token",
+ "allowed_hosts": ["api.example.com", "api.eu.example.com"]
+};
+```
+
+## Usage
+
+This handler is **passthrough-only** β it does not expose SQL tables. All interaction is through the REST passthrough endpoint.
+
+### Passthrough Requests
+
+Send HTTP requests to the upstream API through MindsDB:
+
+```
+POST /api/integrations/my_api/passthrough
+```
+
+```json
+{
+ "method": "GET",
+ "path": "/v1/users",
+ "query": {"limit": "10"},
+ "headers": {"Accept": "application/json"}
+}
+```
+
+The response wraps the upstream HTTP response:
+
+```json
+{
+ "status_code": 200,
+ "headers": {"content-type": "application/json"},
+ "body": {"results": [...]},
+ "content_type": "application/json"
+}
+```
+
+Supported HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`.
+
+### Testing the Connection
+
+Verify that the base URL, token, and host allowlist are configured correctly:
+
+```
+POST /api/integrations/my_api/passthrough/test
+```
+
+Returns:
+
+```json
+{"ok": true, "status_code": 200, "host": "api.example.com", "latency_ms": 140}
+```
+
+Or on failure:
+
+```json
+{"ok": false, "error_code": "auth_failed", "message": "upstream rejected credentials; base URL and allowlist look correct"}
+```
+
+## Security
+
+- **Credentials are never exposed.** The bearer token is stored in MindsDB and injected at request time. It is never returned to the caller.
+- **Host containment.** Requests are restricted to hostnames in the allowlist (defaults to the `base_url` host). Private/loopback IP addresses are rejected by default.
+- **Header filtering.** Callers cannot override `Authorization`, `Host`, `Cookie`, or `Proxy-*` headers.
+- **Response scrubbing.** If the upstream API echoes the token in responses, it is replaced with `[REDACTED_API_KEY]` before returning to the caller.
+- **Size limits.** Request bodies are capped at 1 MB, response bodies at 10 MB (configurable via environment variables).
+
+## Troubleshooting
+
+
+`base_url is not configured`
+
+* **Symptoms**: Passthrough requests fail with a configuration error.
+* **Checklist**:
+ 1. Ensure `base_url` is provided in the connection parameters.
+ 2. The URL must include the scheme (`https://`).
+
+
+
+`host 'X' is not in the datasource allowlist`
+
+* **Symptoms**: Passthrough requests to a valid URL are rejected.
+* **Checklist**:
+ 1. The request path may resolve to a different hostname than `base_url`.
+ 2. Add the hostname to `allowed_hosts` in the connection parameters.
+ 3. Use `["*"]` to disable host containment (not recommended for production).
+
+
+
+`upstream rejected credentials (401/403)`
+
+* **Symptoms**: The `/passthrough/test` endpoint returns `error_code: "auth_failed"`.
+* **Checklist**:
+ 1. Verify the bearer token is valid and not expired.
+ 2. Check that the token has the required scopes/permissions for the API endpoints you are calling.
+
diff --git a/mindsdb/integrations/handlers/rest_api_handler/__about__.py b/mindsdb/integrations/handlers/rest_api_handler/__about__.py
new file mode 100644
index 00000000000..b7f131f401c
--- /dev/null
+++ b/mindsdb/integrations/handlers/rest_api_handler/__about__.py
@@ -0,0 +1,9 @@
+__title__ = "MindsDB REST API handler"
+__package_name__ = "mindsdb_rest_api_handler"
+__version__ = "0.0.1"
+__description__ = "MindsDB handler for generic REST APIs with bearer-token passthrough"
+__author__ = "MindsDB Inc"
+__github__ = "https://github.com/mindsdb/mindsdb"
+__pypi__ = "https://pypi.org/project/mindsdb/"
+__license__ = "MIT"
+__copyright__ = "Copyright 2026 - mindsdb"
diff --git a/mindsdb/integrations/handlers/confluence_handler/__init__.py b/mindsdb/integrations/handlers/rest_api_handler/__init__.py
similarity index 79%
rename from mindsdb/integrations/handlers/confluence_handler/__init__.py
rename to mindsdb/integrations/handlers/rest_api_handler/__init__.py
index a87e7512ed4..d9f8fcf24eb 100644
--- a/mindsdb/integrations/handlers/confluence_handler/__init__.py
+++ b/mindsdb/integrations/handlers/rest_api_handler/__init__.py
@@ -4,18 +4,18 @@
from .connection_args import connection_args, connection_args_example
try:
- from .confluence_handler import ConfluenceHandler as Handler
+ from .rest_api_handler import RestApiHandler as Handler
import_error = None
except Exception as e:
Handler = None
import_error = e
-title = "Confluence"
-name = "confluence"
+title = "REST API"
+name = "rest_api"
type = HANDLER_TYPE.DATA
icon_path = "icon.svg"
-support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY
+support_level = HANDLER_SUPPORT_LEVEL.MINDSDB
__all__ = [
"Handler",
@@ -27,6 +27,6 @@
"description",
"import_error",
"icon_path",
- "connection_args_example",
"connection_args",
+ "connection_args_example",
]
diff --git a/mindsdb/integrations/handlers/rest_api_handler/connection_args.py b/mindsdb/integrations/handlers/rest_api_handler/connection_args.py
new file mode 100644
index 00000000000..bba20202ba0
--- /dev/null
+++ b/mindsdb/integrations/handlers/rest_api_handler/connection_args.py
@@ -0,0 +1,44 @@
+from collections import OrderedDict
+
+from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
+
+connection_args = OrderedDict(
+ base_url={
+ "type": ARG_TYPE.STR,
+ "description": "Base URL of the REST API (e.g. https://api.example.com)",
+ "required": True,
+ "label": "Base URL",
+ },
+ bearer_token={
+ "type": ARG_TYPE.PWD,
+ "description": "Bearer token injected as Authorization: Bearer ",
+ "required": True,
+ "label": "Bearer Token",
+ "secret": True,
+ },
+ default_headers={
+ "type": ARG_TYPE.DICT,
+ "description": 'Static headers added to every request (e.g. {"Accept": "application/json"})',
+ "required": False,
+ "label": "Default Headers",
+ },
+ allowed_hosts={
+ "type": ARG_TYPE.LIST,
+ "description": 'Allowed hostnames for passthrough requests. Defaults to the base_url host. Use ["*"] to disable containment.',
+ "required": False,
+ "label": "Allowed Hosts",
+ },
+ test_path={
+ "type": ARG_TYPE.STR,
+ "description": "Path used by the /passthrough/test endpoint. Defaults to /",
+ "required": False,
+ "label": "Test Path",
+ },
+)
+
+connection_args_example = OrderedDict(
+ base_url="https://api.example.com",
+ bearer_token="your_token_here",
+ default_headers={"Accept": "application/json"},
+ allowed_hosts=["api.example.com"],
+)
diff --git a/mindsdb/integrations/handlers/rest_api_handler/icon.svg b/mindsdb/integrations/handlers/rest_api_handler/icon.svg
new file mode 100644
index 00000000000..2346f8d4d3e
--- /dev/null
+++ b/mindsdb/integrations/handlers/rest_api_handler/icon.svg
@@ -0,0 +1,8 @@
+
diff --git a/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py b/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py
new file mode 100644
index 00000000000..a55ad7c1f4d
--- /dev/null
+++ b/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py
@@ -0,0 +1,87 @@
+from typing import Any
+
+from mindsdb.integrations.libs.api_handler import APIHandler
+from mindsdb.integrations.libs.passthrough import PassthroughMixin
+from mindsdb.integrations.libs.passthrough_types import PassthroughRequest
+from mindsdb.integrations.libs.response import (
+ HandlerStatusResponse as StatusResponse,
+ HandlerResponse as Response,
+ RESPONSE_TYPE,
+)
+from mindsdb.utilities import log
+
+logger = log.getLogger(__name__)
+
+
+class RestApiHandler(APIHandler, PassthroughMixin):
+ """Generic REST API handler β passthrough only, no SQL tables.
+
+ This is the "bring your own URL" escape hatch for any bearer-token API
+ that mindsdb doesn't have a named handler for. Users supply a base_url
+ and a bearer_token and get full passthrough access.
+ """
+
+ name = "rest_api"
+
+ def __init__(self, name: str, **kwargs: Any) -> None:
+ super().__init__(name)
+ self.connection_data = kwargs.get("connection_data") or {}
+ self.kwargs = kwargs
+ self.is_connected = False
+
+ # PassthroughMixin reads these instance attributes at runtime.
+ self._bearer_token_arg = "bearer_token"
+ self._base_url_default = None # user must supply base_url
+
+ # Build the test request from connection_data. Default to GET /
+ # unless the user provided a custom test_path.
+ test_path = self.connection_data.get("test_path", "/")
+ if not test_path.startswith("/"):
+ test_path = f"/{test_path}"
+ self._test_request = PassthroughRequest(method="GET", path=test_path)
+
+ def connect(self) -> None:
+ """No persistent connection needed β passthrough is stateless.
+
+ Validation happens in check_connection(), which we
+ call separately during CREATE DATABASE.
+ """
+ self.is_connected = True
+
+ def check_connection(self) -> StatusResponse:
+ """Validate that base_url and bearer_token are present."""
+ response = StatusResponse(False)
+ try:
+ base_url = self._build_base_url()
+ if not base_url:
+ response.error_message = "base_url is required"
+ return response
+ token = self.connection_data.get(self._bearer_token_arg)
+ if not token:
+ response.error_message = "bearer_token is required"
+ return response
+ response.success = True
+ self.is_connected = True
+ except Exception as e:
+ response.error_message = str(e)
+ return response
+
+ def native_query(self, query: str) -> Response:
+ """Not supported β use passthrough instead."""
+ return Response(
+ RESPONSE_TYPE.ERROR,
+ error_message="rest_api handler is passthrough-only. Use the /passthrough endpoint.",
+ )
+
+ def get_tables(self) -> Response:
+ """No SQL tables β passthrough only."""
+ import pandas as pd
+
+ return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())
+
+ def get_columns(self, table_name: str) -> Response:
+ """No SQL tables β passthrough only."""
+ return Response(
+ RESPONSE_TYPE.ERROR,
+ error_message="rest_api handler is passthrough-only. No tables available.",
+ )
diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/README.md b/mindsdb/integrations/handlers/rocket_chat_handler/README.md
deleted file mode 100644
index ef0821a9010..00000000000
--- a/mindsdb/integrations/handlers/rocket_chat_handler/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Rocket Chat API Handler
-
-This handler integrates with the [Rocket Chat API](https://developer.rocket.chat/reference/api) to read and write messages.
-
-### Connect to the Rocket Chat API
-We start by creating a database to connect to the Rocket Chat API.
-
-```
-CREATE DATABASE my_rocket_chat
-WITH
- ENGINE = "rocket_chat"
- PARAMETERS = {
- "username": ,
- "password": ,
- "domain":
- };
-```
-
-### Select Data
-To see if the connection was successful, try searching for messages in a channel
-
-```
-SELECT *
-FROM my_rocket_chat.channel_messages WHERE room_id="GENERAL";
-```
-
-Each row should look like this:
-
-| id | room_id | bot_id | text | username | name | sent_at |
-| ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- |
-| PbrLoFpxYk2bbkvyA | GENERAL | [NULL] | Sample message | minds.db | MindsDB | 2023-05-05T16:41:57.998Z |
-
-where:
-* id - ID of the message
-* room_id - ID of the channel/room the message was sent in
-* bot_id - ID of the bot that sent this message if applicable
-* text - Actual message text
-* username - Username for sent message
-* name: Full name for sent message
-* sent_at: When the message was sent in 'YYYY-MM-DDTHH:MM:SS.mmmZ' format
-
-
-## Posting Messages
-
-You can also post messages to a Rocket Chat channel using MindsDB:
-
-```
-INSERT INTO my_rocket_chat.channel_messages (room_id, text) VALUES ("GENERAL", "This is a test message!")
-```
-
-Supported insert columns:
-* room_id (REQUIRED) - ID of room to send message to
-* text (REQUIRED) - Message to send
-* alias - What the message's name will appear as (username will still display).
-* emoji - Sets the avatar on the message to be this emoji (e.g. :smirk:).
-* avatar - Image URL to use for the message avatar.
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/__about__.py b/mindsdb/integrations/handlers/rocket_chat_handler/__about__.py
deleted file mode 100644
index 19877330b71..00000000000
--- a/mindsdb/integrations/handlers/rocket_chat_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Rocket Chat handler'
-__package_name__ = 'mindsdb_rocket_chat_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for the Rocket Chat API"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2023 - mindsdb'
diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/__init__.py b/mindsdb/integrations/handlers/rocket_chat_handler/__init__.py
deleted file mode 100644
index e4ab474d8b6..00000000000
--- a/mindsdb/integrations/handlers/rocket_chat_handler/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .rocket_chat_handler import RocketChatHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Rocket Chat'
-name = 'rocket_chat'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/icon.svg b/mindsdb/integrations/handlers/rocket_chat_handler/icon.svg
deleted file mode 100644
index 5ae6e6bca20..00000000000
--- a/mindsdb/integrations/handlers/rocket_chat_handler/icon.svg
+++ /dev/null
@@ -1,6 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/requirements.txt b/mindsdb/integrations/handlers/rocket_chat_handler/requirements.txt
deleted file mode 100644
index c5ef3d60283..00000000000
--- a/mindsdb/integrations/handlers/rocket_chat_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-rocketchat_API
-urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py b/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py
deleted file mode 100644
index 327f4adf45f..00000000000
--- a/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import pandas as pd
-
-from rocketchat_API.rocketchat import RocketChat
-
-from mindsdb.integrations.handlers.rocket_chat_handler.rocket_chat_tables import (
- ChannelMessagesTable, ChannelsTable, DirectsTable, DirectMessagesTable, UsersTable)
-from mindsdb.integrations.libs.api_handler import APIChatHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
-)
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-
-logger = log.getLogger(__name__)
-
-
-class RocketChatHandler(APIChatHandler):
- """A class for handling connections and interactions with the Rocket Chat API.
-
- Attributes:
- username (str): Rocket Chat username to use for authentication.
- password (str): Rocket Chat username to use for authentication.
- auth_token (str): Rocket Chat authorization token to use for all API requests.
- auth_user_id (str): Rocket Chat user ID to associate with all API requests
- domain (str): Path to Rocket Chat domain to use (e.g. https://mindsdb.rocket.chat).
- client (RocketChatClient): The `RocketChatClient` object for interacting with the Rocket Chat API.
- is_connected (bool): Whether or not the API client is connected to the Rocket Chat API.
- """
-
- def __init__(self, name: str = None, **kwargs):
- """Registers all API tables and prepares the handler for an API connection.
-
- Args:
- name: (str): The handler name to use
- """
- super().__init__(name)
- self.username = None
- self.password = None
- self.auth_token = None
- self.auth_user_id = None
- self.domain = None
-
- args = kwargs.get('connection_data', {})
- if 'domain' not in args:
- raise ValueError('Must include Rocket Chat "domain" to read and write messages')
- self.domain = args['domain']
-
- if 'token' in args and 'user_id' in args:
- self.auth_token = args['token']
- self.auth_user_id = args['user_id']
- elif 'username' in args and 'password' in args:
- self.username = args['username']
- self.password = args['password']
- else:
- raise ValueError('Need "token" and "user_id", or "username" and "password" to connect to Rocket Chat')
-
- self.client = None
- self.is_connected = False
-
- self._register_table('channels', ChannelsTable(self))
-
- self._register_table('channel_messages', ChannelMessagesTable(self))
-
- self._register_table('directs', DirectsTable(self))
-
- self._register_table('direct_messages', DirectMessagesTable(self))
-
- self._register_table('users', UsersTable(self))
-
- def get_chat_config(self):
- params = {
- 'polling': {
- 'type': 'message_count',
- 'table': 'directs',
- 'chat_id_col': '_id',
- 'count_col': 'msgs'
- },
- 'chat_table': {
- 'name': 'direct_messages',
- 'chat_id_col': 'room_id',
- 'username_col': 'username',
- 'text_col': 'text',
- 'time_col': 'sent_at',
- }
- }
- return params
-
- def get_my_user_name(self):
- info = self.call_api('me')
- return info['username']
-
- def connect(self):
- """Creates a new Rocket Chat API client if needed and sets it as the client to use for requests.
-
- Returns newly created Rocket Chat API client, or current client if already set.
- """
- if self.is_connected and self.client is not None:
- return self.client
-
- self.client = RocketChat(
- user=self.username,
- password=self.password,
- auth_token=self.auth_token,
- user_id=self.auth_user_id,
- server_url=self.domain
- )
-
- self.is_connected = True
- return self.client
-
- def check_connection(self) -> StatusResponse:
- """Checks connection to Rocket Chat API by sending a ping request.
-
- Returns StatusResponse indicating whether or not the handler is connected.
- """
-
- response = StatusResponse(False)
-
- try:
- self.connect()
- response.success = True
- except Exception as e:
- logger.error(f'Error connecting to Rocket Chat API: {e}!')
- response.error_message = e
-
- if response.success is False:
- self.is_connected = False
- return response
-
- def native_query(self, query: str = None) -> Response:
- ast = parse_sql(query)
- return self.query(ast)
-
- def call_api(self, method_name: str = None, *args, **kwargs) -> pd.DataFrame:
- """Calls the Rocket Chat API method with the given params.
-
- Returns results as a pandas DataFrame.
-
- Args:
- method_name (str): Method name to call
- params (Dict): Params to pass to the API call
- """
- client = self.connect()
-
- method = getattr(client, method_name)
-
- messages_response = method(*args, **kwargs)
-
- if not messages_response.ok:
- messages_response.raise_for_status()
- return messages_response.json()
diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py b/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py
deleted file mode 100644
index 5cd36f5119e..00000000000
--- a/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import pandas as pd
-
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.integrations.utilities.sql_utils import conditions_to_filter, project_dataframe, sort_dataframe
-from mindsdb_sql_parser import ast
-
-
-def message_to_dataframe_row(message: dict):
- message['id'] = message['_id']
- message['room_id'] = message['rid']
- message['text'] = message['msg']
- message['sent_at'] = message['ts']
-
- if 'u' in message:
- if 'username' in message['u']:
- message['username'] = message['u']['username']
- if 'name' in message['u']:
- message['name'] = message['u']['name']
- if 'bot' in message and 'i' in message['bot']:
- message['bot_id'] = message['bot']['i']
- return message
-
-
-class ChannelsTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- message_data = self.handler.call_api('channels_list')
- df = pd.DataFrame(message_data['channels'])
- df = project_dataframe(df, query.targets, self.get_columns())
-
- return df
-
- def get_columns(self):
- """Gets all columns to be returned in pandas DataFrame responses"""
- return [
- '_id',
- 'name',
- 'usersCount',
- 'msgs',
- ]
-
-
-class ChannelMessagesTable(APITable):
- """Manages SELECT and INSERT operations for Rocket Chat messages."""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Selects message data from the Rocket Chat API and returns it as a pandas DataFrame.
-
- Returns dataframe representing the Rocket Chat API results.
-
- Args:
- query (ast.Select): Given SQL SELECT query
- """
- filters = conditions_to_filter(query.where)
-
- if 'room_id' not in filters:
- raise NotImplementedError()
-
- params = {}
-
- if query.limit:
- params['count'] = query.limit
-
- # See Channel Messages endpoint:
- # https://developer.rocket.chat/reference/api/rest-api/endpoints/core-endpoints/channels-endpoints/messages
- message_data = self.handler.call_api('channels_history', filters['room_id'], **params)
-
- # Only return the columns we need to.
- message_rows = [message_to_dataframe_row(m) for m in message_data['messages']]
- df = pd.DataFrame(message_rows)
-
- df = sort_dataframe(df, query.order_by)
- df = project_dataframe(df, query.targets, self.get_columns())
-
- return df
-
- def insert(self, query: ast.Insert):
- """Posts a message using the Rocket Chat API.
-
- Args:
- query (ast.Insert): Given SQL INSERT query
- """
- # See Post Message endpoint:
- # https://developer.rocket.chat/reference/api/rest-api/endpoints/core-endpoints/chat-endpoints/postmessage
- column_names = [col.name for col in query.columns]
- for insert_row in query.values:
- insert_params = dict(zip(column_names, insert_row))
-
- self.handler.call_api('chat_post_message', **insert_params)
-
- def get_columns(self):
- """Gets all columns to be returned in pandas DataFrame responses"""
- return [
- 'id',
- 'room_id',
- 'bot_id',
- 'text',
- 'username',
- 'name',
- 'sent_at'
- ]
-
-
-class DirectsTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- message_data = self.handler.call_api('im_list')
- df = pd.DataFrame(message_data['ims'])
- df = project_dataframe(df, query.targets, self.get_columns())
-
- return df
-
- def insert(self, query: ast.Insert):
- column_names = [col.name for col in query.columns]
- for insert_row in query.values:
- insert_params = dict(zip(column_names, insert_row))
-
- self.handler.call_api('im_create', **insert_params)
-
- def get_columns(self):
- """Gets all columns to be returned in pandas DataFrame responses"""
- return [
- '_id',
- 'usernames',
- 'usersCount',
- 'msgs',
- ]
-
-
-class DirectMessagesTable(APITable):
-
- def select(self, query: ast.Select) -> pd.DataFrame:
-
- filters = conditions_to_filter(query.where)
-
- if 'room_id' not in filters:
- raise NotImplementedError()
-
- params = {}
-
- if query.limit:
- params['count'] = query.limit
-
- message_data = self.handler.call_api('im_history', filters['room_id'], **params)
-
- message_rows = [message_to_dataframe_row(m) for m in message_data['messages']]
- df = pd.DataFrame(message_rows)
- df = sort_dataframe(df, query.order_by)
- df = project_dataframe(df, query.targets, self.get_columns())
-
- return df
-
- def insert(self, query: ast.Insert):
-
- column_names = [col.name for col in query.columns]
- for insert_row in query.values:
- insert_params = dict(zip(column_names, insert_row))
-
- if 'username' in insert_params:
- # resolve username
- resp = self.handler.call_api('users_info', insert_params['username'])
- if 'user' in resp:
- insert_params['room_id'] = resp['user']['_id']
- del insert_params['username']
- else:
- raise ValueError(f'User not found: {insert_params["username"]}')
-
- self.handler.call_api('chat_post_message', **insert_params)
-
- def get_columns(self):
- """Gets all columns to be returned in pandas DataFrame responses"""
- return [
- 'id',
- 'room_id',
- 'bot_id',
- 'text',
- 'username',
- 'name',
- 'sent_at'
- ]
-
-
-class UsersTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- message_data = self.handler.call_api('users_list')
- df = pd.DataFrame(message_data['users'])
- df = project_dataframe(df, query.targets, self.get_columns())
-
- return df
-
- def get_columns(self):
- """Gets all columns to be returned in pandas DataFrame responses"""
- return [
- '_id',
- 'username',
- 'name',
- 'status',
- 'active',
- 'type',
- ]
diff --git a/mindsdb/integrations/handlers/rockset_handler/__about__.py b/mindsdb/integrations/handlers/rockset_handler/__about__.py
deleted file mode 100644
index 4c23fb194f5..00000000000
--- a/mindsdb/integrations/handlers/rockset_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Rockset handler'
-__package_name__ = 'mindsdb_rockset_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Rockset"
-__author__ = 'Alissa Troiano'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/rockset_handler/__init__.py b/mindsdb/integrations/handlers/rockset_handler/__init__.py
deleted file mode 100644
index 0a005037459..00000000000
--- a/mindsdb/integrations/handlers/rockset_handler/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .rockset_handler import RocksetHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Rockset'
-name = 'rockset'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/rockset_handler/connection_args.py b/mindsdb/integrations/handlers/rockset_handler/connection_args.py
deleted file mode 100644
index 6f306302d8f..00000000000
--- a/mindsdb/integrations/handlers/rockset_handler/connection_args.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'Rockset user name'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'Rockset password',
- 'secret': True
- },
- api_key={
- 'type': ARG_TYPE.STR,
- 'description': 'Rockset API key'
- },
- api_server={
- 'type': ARG_TYPE.STR,
- 'description': 'Rockset API server'
- },
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'Rockset host'
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'Rockset port'
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'Rockset database'
- }
-)
-connection_args_example = OrderedDict(
- user='rockset',
- password='rockset',
- api_key="adkjf234rksjfa23waejf2",
- api_server='api-us-west-2.rockset.io',
- host='localhost',
- port='3306',
- database='test'
-)
diff --git a/mindsdb/integrations/handlers/rockset_handler/icon.svg b/mindsdb/integrations/handlers/rockset_handler/icon.svg
deleted file mode 100644
index 73cfa304953..00000000000
--- a/mindsdb/integrations/handlers/rockset_handler/icon.svg
+++ /dev/null
@@ -1,5 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/rockset_handler/requirements.txt b/mindsdb/integrations/handlers/rockset_handler/requirements.txt
deleted file mode 100644
index ee467569031..00000000000
--- a/mindsdb/integrations/handlers/rockset_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
--r mindsdb/integrations/handlers/mysql_handler/requirements.txt
diff --git a/mindsdb/integrations/handlers/rockset_handler/rockset_handler.py b/mindsdb/integrations/handlers/rockset_handler/rockset_handler.py
deleted file mode 100644
index aae7d02dcdf..00000000000
--- a/mindsdb/integrations/handlers/rockset_handler/rockset_handler.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler
-
-
-class RocksetHandler(MySQLHandler):
- """
- This handler handles connection and execution of the Rockset integration
- """
- name = 'rockset'
-
- def __init__(self, name, **kwargs):
- super().__init__(name, **kwargs)
diff --git a/mindsdb/integrations/handlers/rockset_handler/tests/__init__.py b/mindsdb/integrations/handlers/rockset_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/rockset_handler/tests/test.png b/mindsdb/integrations/handlers/rockset_handler/tests/test.png
deleted file mode 100644
index 684533fe6e4..00000000000
Binary files a/mindsdb/integrations/handlers/rockset_handler/tests/test.png and /dev/null differ
diff --git a/mindsdb/integrations/handlers/rockset_handler/tests/test2.png b/mindsdb/integrations/handlers/rockset_handler/tests/test2.png
deleted file mode 100644
index e9d54745b7a..00000000000
Binary files a/mindsdb/integrations/handlers/rockset_handler/tests/test2.png and /dev/null differ
diff --git a/mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py b/mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py
deleted file mode 100644
index 6b98fbeeb6a..00000000000
--- a/mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import unittest
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-from mindsdb.integrations.handlers.rockset_handler.rockset_handler import RocksetHandler
-
-
-class RocksetHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_args": {
- "host": '127.0.0.1',
- "port": 3306,
- "user": "rockset",
- "password": "rockset"
- }
- }
- cls.handler = RocksetHandler('test_rockset_handler', **cls.kwargs)
-
- def test_0_connect(self):
- self.handler.check_connection()
-
- def test_1_get_tables(self):
- tables = self.handler.get_tables()
- self.assertEqual(tables, [])
-
- def test_2_get_columns(self):
- columns = self.handler.get_columns('test')
- self.assertEqual(columns, [])
-
- def test_3_query(self):
- response = self.handler.query('SELECT 1')
- self.assertEqual(response['type'], RESPONSE_TYPE.QUERY)
- self.assertEqual(response['data'], [[1]])
diff --git a/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py b/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py
index d816e233147..e87b44317e0 100644
--- a/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py
+++ b/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py
@@ -5,6 +5,8 @@
from salesforce_api.exceptions import AuthenticationError, RestRequestCouldNotBeUnderstoodError
from mindsdb.integrations.libs.api_handler import MetaAPIHandler
+from mindsdb.integrations.libs.passthrough import PassthroughMixin
+from mindsdb.integrations.libs.passthrough_types import PassthroughRequest
from mindsdb.integrations.libs.response import (
HandlerResponse as Response,
HandlerStatusResponse as StatusResponse,
@@ -18,13 +20,29 @@
logger = log.getLogger(__name__)
-class SalesforceHandler(MetaAPIHandler):
+class SalesforceHandler(MetaAPIHandler, PassthroughMixin):
"""
This handler handles the connection and execution of SQL statements on Salesforce.
"""
name = "salesforce"
+ # REST passthrough configuration. Salesforce's base URL is per-org
+ # (`instance_url`) and is normally discovered at auth time. v1 requires
+ # the caller to supply both `access_token` and `instance_url` explicitly
+ # in connection_data; dynamic discovery from the username/password flow
+ # is deferred to a future refresh-aware mixin.
+ _bearer_token_arg = "access_token"
+ _base_url_default = None
+ _test_request = PassthroughRequest(method="GET", path="/services/data/v60.0/")
+
+ def _build_base_url(self) -> str | None:
+ data = self._get_connection_data()
+ instance_url = data.get("instance_url")
+ if not instance_url:
+ return None
+ return str(instance_url).rstrip("/")
+
def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None:
"""
Initializes the handler.
diff --git a/mindsdb/integrations/handlers/sap_erp_handler/README.md b/mindsdb/integrations/handlers/sap_erp_handler/README.md
deleted file mode 100644
index 07007313807..00000000000
--- a/mindsdb/integrations/handlers/sap_erp_handler/README.md
+++ /dev/null
@@ -1,82 +0,0 @@
-# SAP ERP Handler
-
-This handler allows you to interact with SAP ERP API available [here](https://api.sap.com/api/API_BUSINESS_PARTNER/overview).
-
-## About SAP ERP
-
-SAP ERP is enterprise resource planning software developed by the company SAP SE.
-
-## Limitations
-
-Currently this handler only supports `SELECT` queries
-
-## SAP ERP Handler Initialization
-
-You can create the database via the following query:
-
-```sql
-CREATE DATABASE sap_datasource
-WITH ENGINE = "sap_erp",
-PARAMETERS = {
- "api_key": "...",
- "base_url": "https://sandbox.api.sap.com/s4hanacloud/sap/opu/odata/sap/API_BUSINESS_PARTNER/"
-};
-```
-
-To select from various tables, you can use `SELECT` statement:
-
-```sql
-SELECT * FROM sap_datasource.address_email_address;
-```
-
-## Available tables
-
-`address_email_address`: email address data linked to all business partner address records in the system
-`address_fax_number`: fax address data linked to all the business partner address records in the system
-`address_home_page`: home page url address records linked to all business partner address records in the system
-`address_phone_number`: all the mobile/telephone address records linked to all the business partner address records in the system
-`bp_addr_depdnt_intl_loc_number`: address dependent data for the business partner address by using the key fields business partner number and address id
-`bp_contact_to_address`: workplace address records linked to all the business partner contact records in the system.
-`bp_contact_to_func_and_dept`: contact person department and function data linked to all business partner contact records in the system
-`bp_credit_worthiness`: contact person department and function data linked to all business partner contact records in the system
-`bp_data_controller`: business partner data controllers of all the available records linked to business partners in the system
-`bp_financial_services_extn`: financial services business partner attributes of all the available records linked to business partners in the system
-`bp_financial_services_reporting`: financial services reporting attributes of all the available records linked to business partners in the system
-`bp_fiscal_year_information`: business partner fiscal year information of all the available records linked to business partners in the system.
-`bp_relationship`: business partner relationship data fields of all the available records in the system
-`bu_pa_address_usage`: all the address usage records linked to all business partner address records in the system
-`bu_pa_identification`: business partner identification data fields of all the records available records in the system
-`bu_pa_industry`: business partner industry data fields of all the available records in the system
-`business_partner`: general data fields of all the business partner records available in the system
-`business_partner_address`: business partner address data fields of all the available records in the system
-`business_partner_contact`: business partner contact data fields of all the available records in the system
-`business_partner_payment_card`: business partner payment card data fields of all the available records in the system
-`business_partner_rating`: business partner ratings of all the available records linked to business partners in the system
-`business_partner_role`: business partner role data fields of all the records available records in the system
-`business_partner_tax_number`: tax number data of all the available records linked to business partners in the system
-`business_partner_address_dependent_tax_number`: address dependent tax number data of all the available records linked to business partners in the system
-`cust_addr_depdnt_ext_identifier`: address dependent external identifiers of all the available records linked to customers in the system
-`customer`: general data of all the customer records available in the system
-`customer_company`: customer company data fields of all the available records in the system linked to customer
-`customer_company_text`: customer company text records attached to customer company in the system
-`customer_dunning`: dunning records attached to customer company in the system
-`customer_sales_area`: customer sales area data fields of all the available records in the system
-`customer_sales_area_tax`: customer sales area tax data fields of all the available records in the system
-`customer_sales_area_text`: customer sales area text fields of all the available records in the system linked to customer sales areas
-`customer_tax_grouping`: customer tax grouping data attached to a customer in the system
-`customer_text`: customer text data attached to a customer in the system
-`customer_unloading_point`: unloading point data attached to a customer in the system
-`customer_withholding_tax`: withholding tax records attached to customer company in the system
-`customer_sales_partner_func`: partner function fields of all the available records in the system linked to customer sales areas
-`customer_sales_area_addr_depdnt_info`: address dependent customer sales area data fields of all the available records in the system
-`customer_sales_area_addr_depdnt_tax_info`: address dependent customer sales area tax data fields of all the available records in the system
-`customer_unloading_point_addr_depdnt_info`: address dependent customer unloading point data fields of all the available records in the system
-`supplier`: general data of all the supplier records available in the system
-`supplier_company`: supplier company data available in the system
-`supplier_company_text`: supplier company text data attached to supplier company in the system
-`supplier_dunning`: dunning records attached to supplier company in the system
-`supplier_partner_func`: partner function fields of all the available records in the system linked to supplier purchasing organization
-`supplier_purchasing_org`: supplier purchasing organization data attached to supplier records in the system
-`supplier_purchasing_org_text`: supplier purchasing organization text data attached to purchasing organization in the system
-`supplier_text`: supplier text data attached to purchasing organization in the system
-`supplier_withholding_tax`: withholding tax records attached to supplier company in the system
diff --git a/mindsdb/integrations/handlers/sap_erp_handler/__about__.py b/mindsdb/integrations/handlers/sap_erp_handler/__about__.py
deleted file mode 100644
index 4fd6177814e..00000000000
--- a/mindsdb/integrations/handlers/sap_erp_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB SAP ERP handler"
-__package_name__ = "mindsdb_sap_erp_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for SAP ERP"
-__author__ = "Aditya Azad"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/sap_erp_handler/__init__.py b/mindsdb/integrations/handlers/sap_erp_handler/__init__.py
deleted file mode 100644
index 7c515006d0e..00000000000
--- a/mindsdb/integrations/handlers/sap_erp_handler/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __description__ as description
-from .__about__ import __version__ as version
-
-try:
- from .sap_erp_handler import SAPERPHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "SAP ERP"
-name = "sap_erp"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/sap_erp_handler/api.py b/mindsdb/integrations/handlers/sap_erp_handler/api.py
deleted file mode 100644
index cf812d22462..00000000000
--- a/mindsdb/integrations/handlers/sap_erp_handler/api.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import requests
-from urllib.parse import urljoin
-
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-def move_under(d, key_contents_to_move, key_to_move_under=None):
- """
- Moves the keys in nested dict with key key_contents_to_move under the dict defined by key_to_move_under.
- If no key_to_move_under is provided, the keys are moved under d.
- eg.
- Calling this on the following dict (d) with key_contents_to_move = "a":
- {
- "a": {
- "b": 1,
- "c": 2
- }
- }
- results in:
- {
- "b": 1,
- "c": 2
- }
- """
- if key_contents_to_move not in d:
- return
- for k, v in d[key_contents_to_move].items():
- if key_to_move_under:
- d[key_to_move_under][k] = v
- else:
- d[k] = v
- del d[key_contents_to_move]
-
-
-class SAPERP:
-
- def __init__(self, url: str, api_key: str) -> None:
- self.base_url = url
- self.api_key = api_key
-
- def _request(self, method: str, relative_endpoint: str, data=None):
- kwargs = {
- "method": method,
- "url": urljoin(self.base_url, relative_endpoint),
- "headers": {
- "APIKey": self.api_key,
- "Accept": "application/json",
- "DataServiceVersion": "2.0"
- }
- }
- if data is not None:
- kwargs["data"] = data
- return requests.request(**kwargs)
-
- def is_connected(self) -> bool:
- if self._request("get", "").ok:
- return True
- return False
-
- def get(self, endpoint):
- """ Common method for all get endpoints """
- try:
- resp = self._request("get", endpoint)
- if resp.ok:
- resp = resp.json()["d"]
- if "results" in resp:
- resp = resp["results"]
- else:
- resp = [resp]
- else:
- resp = []
- for r in resp:
- move_under(r, "__metadata")
- return resp
- except Exception as e:
- logger.error(f"Error requesting endpoint {endpoint}: {e}")
- return {}
diff --git a/mindsdb/integrations/handlers/sap_erp_handler/icon.svg b/mindsdb/integrations/handlers/sap_erp_handler/icon.svg
deleted file mode 100644
index 1849de1d44a..00000000000
--- a/mindsdb/integrations/handlers/sap_erp_handler/icon.svg
+++ /dev/null
@@ -1,13 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_handler.py b/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_handler.py
deleted file mode 100644
index b437b0abc5b..00000000000
--- a/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_handler.py
+++ /dev/null
@@ -1,161 +0,0 @@
-from collections import OrderedDict
-from mindsdb_sql_parser import parse_sql
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-from mindsdb.integrations.handlers.sap_erp_handler.api import SAPERP
-from mindsdb.integrations.handlers.sap_erp_handler.sap_erp_tables import (
- AddressEmailAddressTable,
- AddressFaxNumberTable,
- AddressHomePageURLTable,
- AddressPhoneNumberTable,
- BPAddrDepdntIntlLocNumberTable,
- BPContactToAddressTable,
- BPContactToFuncAndDeptTable,
- BPCreditWorthinessTable,
- BPDataControllerTable,
- BPFinancialServicesExtnTable,
- BPFinancialServicesReportingTable,
- BPFiscalYearInformationTable,
- BPRelationshipTable,
- BuPaAddressUsageTable,
- BuPaIdentificationTable,
- BuPaIndustryTable,
- BusinessPartnerTable,
- BusinessPartnerAddressTable,
- BusinessPartnerContactTable,
- BusinessPartnerPaymentCardTable,
- BusinessPartnerRatingTable,
- BusinessPartnerRoleTable,
- BusinessPartnerTaxNumberTable,
- BusPartAddrDepdntTaxNumberTable,
- CustAddrDepdntExtIdentifierTable,
- CustAddrDepdntInformationTable,
- CustomerCompanyTable,
- CustomerCompanyTextTable,
- CustomerDunningTable,
- CustomerSalesAreaTable,
- CustomerSalesAreaTaxTable,
- CustomerSalesAreaTextTable,
- CustomerTaxGroupingTable,
- CustomerTextTable,
- CustomerUnloadingPointTable,
- CustomerWithHoldingTaxTable,
- CustSalesPartnerFuncTable,
- CustSlsAreaAddrDepdntInfoTable,
- CustSlsAreaAddrDepdntTaxInfoTable,
- CustUnldgPtAddrDepdntInfoTable,
- SupplierTable,
- SupplierCompanyTable,
- SupplierCompanyTextTable,
- SupplierDunningTable,
- SupplierPartnerFuncTable,
- SupplierPurchasingOrgTable,
- SupplierPurchasingOrgTextTable,
- SupplierTextTable,
- SupplierWithHoldingTaxTable,
-)
-
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse
-
-
-class SAPERPHandler(APIHandler):
-
- def __init__(self, name: str, **kwargs) -> None:
- super().__init__(name)
- self.connection = None
- self.is_connected = False
- self.api_key = kwargs.get("connection_data", {}).get("api_key", "")
- self.base_url = kwargs.get("connection_data", {}).get("base_url", "")
- _tables = [
- AddressEmailAddressTable,
- AddressFaxNumberTable,
- AddressHomePageURLTable,
- AddressPhoneNumberTable,
- BPAddrDepdntIntlLocNumberTable,
- BPContactToAddressTable,
- BPContactToFuncAndDeptTable,
- BPCreditWorthinessTable,
- BPDataControllerTable,
- BPFinancialServicesExtnTable,
- BPFinancialServicesReportingTable,
- BPFiscalYearInformationTable,
- BPRelationshipTable,
- BuPaAddressUsageTable,
- BuPaIdentificationTable,
- BuPaIndustryTable,
- BusinessPartnerTable,
- BusinessPartnerAddressTable,
- BusinessPartnerContactTable,
- BusinessPartnerPaymentCardTable,
- BusinessPartnerRatingTable,
- BusinessPartnerRoleTable,
- BusinessPartnerTaxNumberTable,
- BusPartAddrDepdntTaxNumberTable,
- CustAddrDepdntExtIdentifierTable,
- CustAddrDepdntInformationTable,
- CustomerCompanyTable,
- CustomerCompanyTextTable,
- CustomerDunningTable,
- CustomerSalesAreaTable,
- CustomerSalesAreaTaxTable,
- CustomerSalesAreaTextTable,
- CustomerTaxGroupingTable,
- CustomerTextTable,
- CustomerUnloadingPointTable,
- CustomerWithHoldingTaxTable,
- CustSalesPartnerFuncTable,
- CustSlsAreaAddrDepdntInfoTable,
- CustSlsAreaAddrDepdntTaxInfoTable,
- CustUnldgPtAddrDepdntInfoTable,
- SupplierTable,
- SupplierCompanyTable,
- SupplierCompanyTextTable,
- SupplierDunningTable,
- SupplierPartnerFuncTable,
- SupplierPurchasingOrgTable,
- SupplierPurchasingOrgTextTable,
- SupplierTextTable,
- SupplierWithHoldingTaxTable,
- ]
- for Table in _tables:
- self._register_table(Table.name, Table(self))
- self.connect()
-
- def check_connection(self) -> StatusResponse:
- resp = StatusResponse(False)
- if self.connection and not self.connection.is_connected():
- resp.error = "Client not connected"
- else:
- resp.success = True
- return resp
-
- def connect(self) -> SAPERP:
- self.connection = SAPERP(self.base_url, self.api_key)
- return self.connection
-
- def native_query(self, query: str) -> StatusResponse:
- ast = parse_sql(query)
- return self.query(ast)
-
-
-connection_args = OrderedDict(
- api_key={
- 'type': ARG_TYPE.STR,
- 'description': 'API Token for accessing SAP ERP',
- 'required': True,
- 'label': 'API Key',
- },
- base_url={
- 'type': ARG_TYPE.STR,
- 'description': 'Base URL of the host',
- 'required': True,
- 'label': 'Base URL',
- }
-)
-
-
-connection_args_example = OrderedDict(
- api_key='23d6b9e0c2fab7eba2e8b7e452cead47',
- base_url='https://sandbox.api.sap.com/s4hanacloud/sap/opu/odata/sap/API_BUSINESS_PARTNER/'
-)
diff --git a/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_tables.py b/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_tables.py
deleted file mode 100644
index 6d545393634..00000000000
--- a/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_tables.py
+++ /dev/null
@@ -1,1928 +0,0 @@
-from typing import List
-
-import pandas as pd
-from mindsdb_sql_parser import ast
-
-from mindsdb.integrations.utilities.handlers.query_utilities import (
- SELECTQueryExecutor,
- SELECTQueryParser,
-)
-from mindsdb.integrations.libs.api_handler import APIHandler, APITable
-from mindsdb.integrations.utilities.sql_utils import conditions_to_filter
-
-
-class CustomAPITable(APITable):
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.handler.connect()
-
- def get_columns(self, ignore: List[str] = []) -> List[str]:
- return [item for item in self.columns if item not in ignore]
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- raise NotImplementedError()
-
- def parse_select(self, query: ast.Select, table_name: str):
- select_statement_parser = SELECTQueryParser(query, table_name, self.get_columns())
- self.selected_columns, self.where_conditions, self.order_by_conditions, self.result_limit = select_statement_parser.parse_query()
-
- def get_where_param(self, query: ast.Select, param: str):
- params = conditions_to_filter(query.where)
- if param not in params:
- raise Exception(f"WHERE condition does not have '{param}' selector")
- return params[param]
-
- def apply_query_params(self, df, query):
- select_statement_parser = SELECTQueryParser(query, self.name, self.get_columns())
- selected_columns, _, order_by_conditions, result_limit = select_statement_parser.parse_query()
- select_statement_executor = SELECTQueryExecutor(df, selected_columns, [], order_by_conditions, result_limit)
- return select_statement_executor.execute_query()
-
-
-class AddressEmailAddressTable(CustomAPITable):
- """Email address data linked to all business partner address records in the system"""
-
- name: str = "address_email_address"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "AddressID",
- "Person",
- "OrdinalNumber",
- "IsDefaultEmailAddress",
- "EmailAddress",
- "SearchEmailAddress",
- "AddressCommunicationRemarkText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_AddressEmailAddress")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class AddressFaxNumberTable(CustomAPITable):
- """Fax address data linked to all the business partner address records in the system"""
-
- name: str = "address_fax_number"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "AddressID",
- "Person",
- "OrdinalNumber",
- "IsDefaultFaxNumber",
- "FaxCountry",
- "FaxNumber",
- "FaxNumberExtension",
- "InternationalFaxNumber",
- "AddressCommunicationRemarkText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_AddressFaxNumber")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class AddressHomePageURLTable(CustomAPITable):
- """Home page URL address records linked to all business partner address records in the system"""
-
- name: str = "address_home_page"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "AddressID",
- "Person",
- "OrdinalNumber",
- "ValidityStartDate",
- "IsDefaultURLAddress",
- "SearchURLAddress",
- "AddressCommunicationRemarkText",
- "URLFieldLength",
- "WebsiteURL",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_AddressHomePageURL")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class AddressPhoneNumberTable(CustomAPITable):
- """All the mobile/telephone address records linked to all the business partner address records in the system"""
-
- name: str = "address_phone_number"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "AddressID",
- "Person",
- "OrdinalNumber",
- "DestinationLocationCountry",
- "IsDefaultPhoneNumber",
- "PhoneNumber",
- "PhoneNumberExtension",
- "InternationalPhoneNumber",
- "PhoneNumberType",
- "AddressCommunicationRemarkText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_AddressPhoneNumber")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPAddrDepdntIntlLocNumberTable(CustomAPITable):
- """address dependent data for the business partner address by using the key fields business partner number and address ID"""
-
- name: str = "bp_addr_depdnt_intl_loc_number"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "AddressID",
- "InternationalLocationNumber1",
- "InternationalLocationNumber2",
- "InternationalLocationNumber3",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPAddrDepdntIntlLocNumber")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPContactToAddressTable(CustomAPITable):
- """Workplace address records linked to all the business partner contact records in the system."""
-
- name: str = "bp_contact_to_address"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "RelationshipNumber",
- "BusinessPartnerCompany",
- "BusinessPartnerPerson",
- "ValidityEndDate",
- "AddressID",
- "AddressNumber",
- "AdditionalStreetPrefixName",
- "AdditionalStreetSuffixName",
- "AddressTimeZone",
- "CareOfName",
- "CityCode",
- "CityName",
- "CompanyPostalCode",
- "Country",
- "County",
- "DeliveryServiceNumber",
- "DeliveryServiceTypeCode",
- "District",
- "FormOfAddress",
- "FullName",
- "HomeCityName",
- "HouseNumber",
- "HouseNumberSupplementText",
- "Language",
- "POBox",
- "POBoxDeviatingCityName",
- "POBoxDeviatingCountry",
- "POBoxDeviatingRegion",
- "POBoxIsWithoutNumber",
- "POBoxLobbyName",
- "POBoxPostalCode",
- "Person",
- "PostalCode",
- "PrfrdCommMediumType",
- "Region",
- "StreetName",
- "StreetPrefixName",
- "StreetSuffixName",
- "TaxJurisdiction",
- "TransportZone",
- "AddressRepresentationCode",
- "ContactPersonBuilding",
- "ContactPersonPrfrdCommMedium",
- "ContactRelationshipDepartment",
- "ContactRelationshipFunction",
- "CorrespondenceShortName",
- "Floor",
- "InhouseMail",
- "IsDefaultAddress",
- "RoomNumber",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPContactToAddress")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPContactToFuncAndDeptTable(CustomAPITable):
- """Contact person department and function data linked to all business partner contact records in the system"""
-
- name: str = "bp_contact_to_func_and_dept"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "RelationshipNumber",
- "BusinessPartnerCompany",
- "BusinessPartnerPerson",
- "ValidityEndDate",
- "ContactPersonAuthorityType",
- "ContactPersonDepartment",
- "ContactPersonDepartmentName",
- "ContactPersonFunction",
- "ContactPersonFunctionName",
- "ContactPersonRemarkText",
- "ContactPersonVIPType",
- "EmailAddress",
- "FaxNumber",
- "FaxNumberExtension",
- "PhoneNumber",
- "PhoneNumberExtension",
- "RelationshipCategory",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPContactToFuncAndDept")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPCreditWorthinessTable(CustomAPITable):
- """Contact person department and function data linked to all business partner contact records in the system"""
-
- name: str = "bp_credit_worthiness"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "BusPartCreditStanding",
- "BPCreditStandingStatus",
- "CreditRatingAgency",
- "BPCreditStandingComment",
- "BPCreditStandingDate",
- "BPCreditStandingRating",
- "BPLegalProceedingStatus",
- "BPLglProceedingInitiationDate",
- "BusinessPartnerIsUnderOath",
- "BusinessPartnerOathDate",
- "BusinessPartnerIsBankrupt",
- "BusinessPartnerBankruptcyDate",
- "BPForeclosureIsInitiated",
- "BPForeclosureDate",
- "BPCrdtWrthnssAccessChkIsActive",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPCreditWorthiness")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPDataControllerTable(CustomAPITable):
- """Business partner data controllers of all the available records linked to business partners in the system"""
-
- name: str = "bp_data_controller"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "DataController",
- "PurposeForPersonalData",
- "DataControlAssignmentStatus",
- "BPDataControllerIsDerived",
- "PurposeDerived",
- "PurposeType",
- "BusinessPurposeFlag",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPDataController")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPFinancialServicesExtnTable(CustomAPITable):
- """Financial services business partner attributes of all the available records linked to business partners in the system"""
-
- name: str = "bp_financial_services_extn"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "BusinessPartnerIsVIP",
- "TradingPartner",
- "FactoryCalendar",
- "BusinessPartnerOfficeCountry",
- "BusinessPartnerOfficeRegion",
- "BPRegisteredOfficeName",
- "BPBalanceSheetCurrency",
- "BPLastCptlIncrAmtInBalShtCrcy",
- "BPLastCapitalIncreaseYear",
- "BPBalanceSheetDisplayType",
- "BusinessPartnerCitizenship",
- "BPMaritalPropertyRegime",
- "BusinessPartnerIncomeCurrency",
- "BPNumberOfChildren",
- "BPNumberOfHouseholdMembers",
- "BPAnnualNetIncAmtInIncomeCrcy",
- "BPMonthlyNetIncAmtInIncomeCrcy",
- "BPAnnualNetIncomeYear",
- "BPMonthlyNetIncomeMonth",
- "BPMonthlyNetIncomeYear",
- "BPPlaceOfDeathName",
- "CustomerIsUnwanted",
- "UndesirabilityReason",
- "UndesirabilityComment",
- "LastCustomerContactDate",
- "BPGroupingCharacter",
- "BPLetterSalutation",
- "BusinessPartnerTargetGroup",
- "BusinessPartnerEmployeeGroup",
- "BusinessPartnerIsEmployee",
- "BPTermnBusRelationsBankDate",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPFinancialServicesExtn")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPFinancialServicesReportingTable(CustomAPITable):
- """Financial services reporting attributes of all the available records linked to business partners in the system"""
-
- name: str = "bp_financial_services_reporting"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "BPIsNonResident",
- "BPNonResidencyStartDate",
- "BPIsMultimillionLoanRecipient",
- "BPLoanReportingBorrowerNumber",
- "BPLoanRptgBorrowerEntityNumber",
- "BPCreditStandingReview",
- "BPCreditStandingReviewDate",
- "BusinessPartnerLoanToManager",
- "BPCompanyRelationship",
- "BPLoanReportingCreditorNumber",
- "BPOeNBIdentNumber",
- "BPOeNBTargetGroup",
- "BPOeNBIdentNumberAssigned",
- "BPOeNBInstituteNumber",
- "BusinessPartnerIsOeNBInstitute",
- "BusinessPartnerGroup",
- "BPGroupAssignmentCategory",
- "BusinessPartnerGroupName",
- "BusinessPartnerLegalEntity",
- "BPGerAstRglnRestrictedAstQuota",
- "BusinessPartnerDebtorGroup",
- "BusinessPartnerBusinessPurpose",
- "BusinessPartnerRiskGroup",
- "BPRiskGroupingDate",
- "BPHasGroupAffiliation",
- "BPIsMonetaryFinInstitution",
- "BPCrdtStandingReviewIsRequired",
- "BPLoanMonitoringIsRequired",
- "BPHasCreditingRelief",
- "BPInvestInRstrcdAstIsAuthzd",
- "BPCentralBankCountryRegion",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPFinancialServicesReporting")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPFiscalYearInformationTable(CustomAPITable):
- """Business partner fiscal year information of all the available records linked to business partners in the system."""
-
- name: str = "bp_fiscal_year_information"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "BusinessPartnerFiscalYear",
- "BPBalanceSheetCurrency",
- "BPAnnualStockholderMeetingDate",
- "BPFiscalYearStartDate",
- "BPFiscalYearEndDate",
- "BPFiscalYearIsClosed",
- "BPFiscalYearClosingDate",
- "BPFsclYrCnsldtdFinStatementDte",
- "BPCapitalStockAmtInBalShtCrcy",
- "BPIssdStockCptlAmtInBalShtCrcy",
- "BPPartcipnCertAmtInBalShtCrcy",
- "BPEquityCapitalAmtInBalShtCrcy",
- "BPGrossPremiumAmtInBalShtCrcy",
- "BPNetPremiumAmtInBalShtCrcy",
- "BPAnnualSalesAmtInBalShtCrcy",
- "BPAnnualNetIncAmtInBalShtCrcy",
- "BPDividendDistrAmtInBalShtCrcy",
- "BPDebtRatioInYears",
- "BPAnnualPnLAmtInBalShtCrcy",
- "BPBalSheetTotalAmtInBalShtCrcy",
- "BPNumberOfEmployees",
- "BPCptlReserveAmtInBalShtCrcy",
- "BPLglRevnRsrvAmtInBalShtCrcy",
- "RevnRsrvOwnStkAmtInBalShtCrcy",
- "BPStatryReserveAmtInBalShtCrcy",
- "BPOthRevnRsrvAmtInBalShtCrcy",
- "BPPnLCarryfwdAmtInBalShtCrcy",
- "BPSuborddLbltyAmtInBalShtCrcy",
- "BPRetOnTotalCptlEmpldInPercent",
- "BPDebtClearancePeriodInYears",
- "BPFinancingCoeffInPercent",
- "BPEquityRatioInPercent",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPFiscalYearInformation")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BPRelationshipTable(CustomAPITable):
- """Business partner relationship data fields of all the available records in the system"""
-
- name: str = "bp_relationship"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "RelationshipNumber",
- "BusinessPartner1",
- "BusinessPartner2",
- "ValidityEndDate",
- "ValidityStartDate",
- "IsStandardRelationship",
- "RelationshipCategory",
- "BPRelationshipType",
- "CreatedByUser",
- "CreationDate",
- "CreationTime",
- "LastChangedByUser",
- "LastChangeDate",
- "LastChangeTime",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BPRelationship")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BuPaAddressUsageTable(CustomAPITable):
- """All the address usage records linked to all business partner address records in the system"""
-
- name: str = "bu_pa_address_usage"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "ValidityEndDate",
- "AddressUsage",
- "AddressID",
- "ValidityStartDate",
- "StandardUsage",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BuPaAddressUsage")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BuPaIdentificationTable(CustomAPITable):
- """Business partner identification data fields of all the records available records in the system"""
-
- name: str = "bu_pa_identification"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "BPIdentificationType",
- "BPIdentificationNumber",
- "BPIdnNmbrIssuingInstitute",
- "BPIdentificationEntryDate",
- "Country",
- "Region",
- "ValidityStartDate",
- "ValidityEndDate",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BuPaIdentification")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BuPaIndustryTable(CustomAPITable):
- """Business partner industry data fields of all the available records in the system"""
-
- name: str = "bu_pa_industry"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "IndustrySector",
- "IndustrySystemType",
- "BusinessPartner",
- "IsStandardIndustry",
- "IndustryKeyDescription",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BuPaIndustry")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BusinessPartnerTable(CustomAPITable):
- """General data fields of all the business partner records available in the system"""
-
- name: str = "business_partner"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "Customer",
- "Supplier",
- "AcademicTitle",
- "AuthorizationGroup",
- "BusinessPartnerCategory",
- "BusinessPartnerFullName",
- "BusinessPartnerGrouping",
- "BusinessPartnerName",
- "BusinessPartnerUUID",
- "CorrespondenceLanguage",
- "CreatedByUser",
- "CreationDate",
- "CreationTime",
- "FirstName",
- "FormOfAddress",
- "Industry",
- "InternationalLocationNumber1",
- "InternationalLocationNumber2",
- "IsFemale",
- "IsMale",
- "IsNaturalPerson",
- "IsSexUnknown",
- "GenderCodeName",
- "Language",
- "LastChangeDate",
- "LastChangeTime",
- "LastChangedByUser",
- "LastName",
- "LegalForm",
- "OrganizationBPName1",
- "OrganizationBPName2",
- "OrganizationBPName3",
- "OrganizationBPName4",
- "OrganizationFoundationDate",
- "OrganizationLiquidationDate",
- "SearchTerm1",
- "SearchTerm2",
- "AdditionalLastName",
- "BirthDate",
- "BusinessPartnerBirthDateStatus",
- "BusinessPartnerBirthplaceName",
- "BusinessPartnerDeathDate",
- "BusinessPartnerIsBlocked",
- "BusinessPartnerType",
- "ETag",
- "GroupBusinessPartnerName1",
- "GroupBusinessPartnerName2",
- "IndependentAddressID",
- "InternationalLocationNumber3",
- "MiddleName",
- "NameCountry",
- "NameFormat",
- "PersonFullName",
- "PersonNumber",
- "IsMarkedForArchiving",
- "BusinessPartnerIDByExtSystem",
- "BusinessPartnerPrintFormat",
- "BusinessPartnerOccupation",
- "BusPartMaritalStatus",
- "BusPartNationality",
- "BusinessPartnerBirthName",
- "BusinessPartnerSupplementName",
- "NaturalPersonEmployerName",
- "LastNamePrefix",
- "LastNameSecondPrefix",
- "Initials",
- "BPDataControllerIsNotRequired",
- "TradingPartner",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BusinessPartner")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BusinessPartnerAddressTable(CustomAPITable):
- """Business partner address data fields of all the available records in the system"""
-
- name: str = "business_partner_address"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "AddressID",
- "ValidityStartDate",
- "ValidityEndDate",
- "AuthorizationGroup",
- "AddressUUID",
- "AdditionalStreetPrefixName",
- "AdditionalStreetSuffixName",
- "AddressTimeZone",
- "CareOfName",
- "CityCode",
- "CityName",
- "CompanyPostalCode",
- "Country",
- "County",
- "DeliveryServiceNumber",
- "DeliveryServiceTypeCode",
- "District",
- "FormOfAddress",
- "FullName",
- "HomeCityName",
- "HouseNumber",
- "HouseNumberSupplementText",
- "Language",
- "POBox",
- "POBoxDeviatingCityName",
- "POBoxDeviatingCountry",
- "POBoxDeviatingRegion",
- "POBoxIsWithoutNumber",
- "POBoxLobbyName",
- "POBoxPostalCode",
- "Person",
- "PostalCode",
- "PrfrdCommMediumType",
- "Region",
- "StreetName",
- "StreetPrefixName",
- "StreetSuffixName",
- "TaxJurisdiction",
- "TransportZone",
- "AddressIDByExternalSystem",
- "CountyCode",
- "TownshipCode",
- "TownshipName",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BusinessPartnerAddress")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BusinessPartnerContactTable(CustomAPITable):
- """Business partner contact data fields of all the available records in the system"""
-
- name: str = "business_partner_contact"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "RelationshipNumber",
- "BusinessPartnerCompany",
- "BusinessPartnerPerson",
- "ValidityEndDate",
- "ValidityStartDate",
- "IsStandardRelationship",
- "RelationshipCategory",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BusinessPartnerContact")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BusinessPartnerPaymentCardTable(CustomAPITable):
- """Business partner payment card data fields of all the available records in the system"""
-
- name: str = "business_partner_payment_card"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "PaymentCardID",
- "PaymentCardType",
- "CardNumber",
- "IsStandardCard",
- "CardDescription",
- "ValidityDate",
- "ValidityEndDate",
- "CardHolder",
- "CardIssuingBank",
- "CardIssueDate",
- "PaymentCardLock",
- "MaskedCardNumber",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BusinessPartnerPaymentCard")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BusinessPartnerRatingTable(CustomAPITable):
- """Business partner ratings of all the available records linked to business partners in the system"""
-
- name: str = "business_partner_rating"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "BusinessPartnerRatingProcedure",
- "BPRatingValidityEndDate",
- "BusinessPartnerRatingGrade",
- "BusinessPartnerRatingTrend",
- "BPRatingValidityStartDate",
- "BPRatingCreationDate",
- "BusinessPartnerRatingComment",
- "BusinessPartnerRatingIsAllowed",
- "BPRatingIsValidOnKeyDate",
- "BusinessPartnerRatingKeyDate",
- "BusinessPartnerRatingIsExpired",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BusinessPartnerRating")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BusinessPartnerRoleTable(CustomAPITable):
- """Business partner role data fields of all the records available records in the system"""
-
- name: str = "business_partner_role"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "BusinessPartnerRole",
- "ValidFrom",
- "ValidTo",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BusinessPartnerRole")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BusinessPartnerTaxNumberTable(CustomAPITable):
- """Tax number data of all the available records linked to business partners in the system"""
-
- name: str = "business_partner_tax_number"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "BPTaxType",
- "BPTaxNumber",
- "BPTaxLongNumber",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BusinessPartnerTaxNumber")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class BusPartAddrDepdntTaxNumberTable(CustomAPITable):
- """Address dependent tax number data of all the available records linked to business partners in the system"""
-
- name: str = "business_partner_address_dependent_tax_number"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "BusinessPartner",
- "AddressID",
- "BPTaxType",
- "BPTaxNumber",
- "BPTaxLongNumber",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_BusPartAddrDepdntTaxNmbr")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustAddrDepdntExtIdentifierTable(CustomAPITable):
- """Address dependent external identifiers of all the available records linked to customers in the system"""
-
- name: str = "cust_addr_depdnt_ext_identifier"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "AddressID",
- "CustomerExternalRefID",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustAddrDepdntExtIdentifier")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustAddrDepdntInformationTable(CustomAPITable):
- """General data of all the customer records available in the system"""
-
- name: str = "customer"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "AuthorizationGroup",
- "BillingIsBlockedForCustomer",
- "CreatedByUser",
- "CreationDate",
- "CustomerAccountGroup",
- "CustomerClassification",
- "CustomerFullName",
- "BPCustomerFullName",
- "CustomerName",
- "BPCustomerName",
- "DeliveryIsBlocked",
- "FreeDefinedAttribute01",
- "FreeDefinedAttribute02",
- "FreeDefinedAttribute03",
- "FreeDefinedAttribute04",
- "FreeDefinedAttribute05",
- "FreeDefinedAttribute06",
- "FreeDefinedAttribute07",
- "FreeDefinedAttribute08",
- "FreeDefinedAttribute09",
- "FreeDefinedAttribute10",
- "NFPartnerIsNaturalPerson",
- "OrderIsBlockedForCustomer",
- "PostingIsBlocked",
- "Supplier",
- "CustomerCorporateGroup",
- "FiscalAddress",
- "Industry",
- "IndustryCode1",
- "IndustryCode2",
- "IndustryCode3",
- "IndustryCode4",
- "IndustryCode5",
- "InternationalLocationNumber1",
- "InternationalLocationNumber2",
- "InternationalLocationNumber3",
- "NielsenRegion",
- "PaymentReason",
- "ResponsibleType",
- "TaxNumber1",
- "TaxNumber2",
- "TaxNumber3",
- "TaxNumber4",
- "TaxNumber5",
- "TaxNumberType",
- "VATRegistration",
- "DeletionIndicator",
- "ExpressTrainStationName",
- "TrainStationName",
- "CityCode",
- "County",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_Customer")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerCompanyTable(CustomAPITable):
- """Customer company data fields of all the available records in the system linked to customer"""
-
- name: str = "customer_company"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "CompanyCode",
- "APARToleranceGroup",
- "AccountByCustomer",
- "AccountingClerk",
- "AccountingClerkFaxNumber",
- "AccountingClerkInternetAddress",
- "AccountingClerkPhoneNumber",
- "AlternativePayerAccount",
- "AuthorizationGroup",
- "CollectiveInvoiceVariant",
- "CustomerAccountNote",
- "CustomerHeadOffice",
- "CustomerSupplierClearingIsUsed",
- "HouseBank",
- "InterestCalculationCode",
- "InterestCalculationDate",
- "IntrstCalcFrequencyInMonths",
- "IsToBeLocallyProcessed",
- "ItemIsToBePaidSeparately",
- "LayoutSortingRule",
- "PaymentBlockingReason",
- "PaymentMethodsList",
- "PaymentReason",
- "PaymentTerms",
- "PaytAdviceIsSentbyEDI",
- "PhysicalInventoryBlockInd",
- "ReconciliationAccount",
- "RecordPaymentHistoryIndicator",
- "UserAtCustomer",
- "DeletionIndicator",
- "CashPlanningGroup",
- "KnownOrNegotiatedLeave",
- "ValueAdjustmentKey",
- "CustomerAccountGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerCompany")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerCompanyTextTable(CustomAPITable):
- """Customer company text records attached to customer company in the system"""
-
- name: str = "customer_company_text"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "CompanyCode",
- "Language",
- "LongTextID",
- "LongText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerCompanyText")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerDunningTable(CustomAPITable):
- """Dunning records attached to customer company in the system"""
-
- name: str = "customer_dunning"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "CompanyCode",
- "DunningArea",
- "DunningBlock",
- "DunningLevel",
- "DunningProcedure",
- "DunningRecipient",
- "LastDunnedOn",
- "LegDunningProcedureOn",
- "DunningClerk",
- "AuthorizationGroup",
- "CustomerAccountGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerDunning")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerSalesAreaTable(CustomAPITable):
- """Customer sales area data fields of all the available records in the system"""
-
- name: str = "customer_sales_area"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "SalesOrganization",
- "DistributionChannel",
- "Division",
- "AccountByCustomer",
- "AuthorizationGroup",
- "BillingIsBlockedForCustomer",
- "CompleteDeliveryIsDefined",
- "CreditControlArea",
- "Currency",
- "CustIsRlvtForSettlmtMgmt",
- "CustomerABCClassification",
- "CustomerAccountAssignmentGroup",
- "CustomerGroup",
- "CustomerIsRebateRelevant",
- "CustomerPaymentTerms",
- "CustomerPriceGroup",
- "CustomerPricingProcedure",
- "CustProdProposalProcedure",
- "DeliveryIsBlockedForCustomer",
- "DeliveryPriority",
- "IncotermsClassification",
- "IncotermsLocation2",
- "IncotermsVersion",
- "IncotermsLocation1",
- "IncotermsSupChnLoc1AddlUUID",
- "IncotermsSupChnLoc2AddlUUID",
- "IncotermsSupChnDvtgLocAddlUUID",
- "DeletionIndicator",
- "IncotermsTransferLocation",
- "InspSbstHasNoTimeOrQuantity",
- "InvoiceDate",
- "ItemOrderProbabilityInPercent",
- "ManualInvoiceMaintIsRelevant",
- "MaxNmbrOfPartialDelivery",
- "OrderCombinationIsAllowed",
- "OrderIsBlockedForCustomer",
- "OverdelivTolrtdLmtRatioInPct",
- "PartialDeliveryIsAllowed",
- "PriceListType",
- "ProductUnitGroup",
- "ProofOfDeliveryTimeValue",
- "SalesGroup",
- "SalesItemProposal",
- "SalesOffice",
- "ShippingCondition",
- "SlsDocIsRlvtForProofOfDeliv",
- "SlsUnlmtdOvrdelivIsAllwd",
- "SupplyingPlant",
- "SalesDistrict",
- "UnderdelivTolrtdLmtRatioInPct",
- "InvoiceListSchedule",
- "ExchangeRateType",
- "AdditionalCustomerGroup1",
- "AdditionalCustomerGroup2",
- "AdditionalCustomerGroup3",
- "AdditionalCustomerGroup4",
- "AdditionalCustomerGroup5",
- "PaymentGuaranteeProcedure",
- "CustomerAccountGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerSalesArea")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerSalesAreaTaxTable(CustomAPITable):
- """Customer sales area tax data fields of all the available records in the system"""
-
- name: str = "customer_sales_area_tax"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "SalesOrganization",
- "DistributionChannel",
- "Division",
- "DepartureCountry",
- "CustomerTaxCategory",
- "CustomerTaxClassification",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerSalesAreaTax")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerSalesAreaTextTable(CustomAPITable):
- """Customer sales area text fields of all the available records in the system linked to customer sales areas"""
-
- name: str = "customer_sales_area_text"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "SalesOrganization",
- "DistributionChannel",
- "Division",
- "Language",
- "LongTextID",
- "LongText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerSalesAreaText")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerTaxGroupingTable(CustomAPITable):
- """Customer tax grouping data attached to a customer in the system"""
-
- name: str = "customer_tax_grouping"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "CustomerTaxGroupingCode",
- "CustTaxGrpExemptionCertificate",
- "CustTaxGroupExemptionRate",
- "CustTaxGroupExemptionStartDate",
- "CustTaxGroupExemptionEndDate",
- "CustTaxGroupSubjectedStartDate",
- "CustTaxGroupSubjectedEndDate",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerTaxGrouping")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerTextTable(CustomAPITable):
- """Customer text data attached to a customer in the system"""
-
- name: str = "customer_text"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "Language",
- "LongTextID",
- "LongText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerText")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerUnloadingPointTable(CustomAPITable):
- """Unloading point data attached to a customer in the system"""
-
- name: str = "customer_unloading_point"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "UnloadingPointName",
- "CustomerFactoryCalenderCode",
- "BPGoodsReceivingHoursCode",
- "IsDfltBPUnloadingPoint",
- "MondayMorningOpeningTime",
- "MondayMorningClosingTime",
- "MondayAfternoonOpeningTime",
- "MondayAfternoonClosingTime",
- "TuesdayMorningOpeningTime",
- "TuesdayMorningClosingTime",
- "TuesdayAfternoonOpeningTime",
- "TuesdayAfternoonClosingTime",
- "WednesdayMorningOpeningTime",
- "WednesdayMorningClosingTime",
- "WednesdayAfternoonOpeningTime",
- "WednesdayAfternoonClosingTime",
- "ThursdayMorningOpeningTime",
- "ThursdayMorningClosingTime",
- "ThursdayAfternoonOpeningTime",
- "ThursdayAfternoonClosingTime",
- "FridayMorningOpeningTime",
- "FridayMorningClosingTime",
- "FridayAfternoonOpeningTime",
- "FridayAfternoonClosingTime",
- "SaturdayMorningOpeningTime",
- "SaturdayMorningClosingTime",
- "SaturdayAfternoonOpeningTime",
- "SaturdayAfternoonClosingTime",
- "SundayMorningOpeningTime",
- "SundayMorningClosingTime",
- "SundayAfternoonOpeningTime",
- "SundayAfternoonClosingTime",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerUnloadingPoint")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustomerWithHoldingTaxTable(CustomAPITable):
- """Withholding tax records attached to customer company in the system"""
-
- name: str = "customer_withholding_tax"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "CompanyCode",
- "WithholdingTaxType",
- "WithholdingTaxCode",
- "WithholdingTaxAgent",
- "ObligationDateBegin",
- "ObligationDateEnd",
- "WithholdingTaxNumber",
- "WithholdingTaxCertificate",
- "WithholdingTaxExmptPercent",
- "ExemptionDateBegin",
- "ExemptionDateEnd",
- "ExemptionReason",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustomerWithHoldingTax")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustSalesPartnerFuncTable(CustomAPITable):
- """Partner function fields of all the available records in the system linked to customer sales areas"""
-
- name: str = "customer_sales_partner_func"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "SalesOrganization",
- "DistributionChannel",
- "Division",
- "PartnerCounter",
- "PartnerFunction",
- "BPCustomerNumber",
- "CustomerPartnerDescription",
- "DefaultPartner",
- "Supplier",
- "PersonnelNumber",
- "ContactPerson",
- "AddressID",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustSalesPartnerFunc")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustSlsAreaAddrDepdntInfoTable(CustomAPITable):
- """Address dependent customer sales area data fields of all the available records in the system"""
-
- name: str = "customer_sales_area_addr_depdnt_info"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "SalesOrganization",
- "DistributionChannel",
- "Division",
- "AddressID",
- "IncotermsClassification",
- "IncotermsLocation1",
- "IncotermsLocation2",
- "IncotermsSupChnLoc1AddlUUID",
- "IncotermsSupChnLoc2AddlUUID",
- "IncotermsSupChnDvtgLocAddlUUID",
- "DeliveryIsBlocked",
- "SalesOffice",
- "SalesGroup",
- "ShippingCondition",
- "SupplyingPlant",
- "IncotermsVersion",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustSlsAreaAddrDepdntInfo")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustSlsAreaAddrDepdntTaxInfoTable(CustomAPITable):
- """Address dependent customer sales area tax data fields of all the available records in the system"""
-
- name: str = "customer_sales_area_addr_depdnt_tax_info"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "SalesOrganization",
- "DistributionChannel",
- "Division",
- "AddressID",
- "DepartureCountry",
- "CustomerTaxCategory",
- "CustomerTaxClassification",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustSlsAreaAddrDepdntTaxInfo")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class CustUnldgPtAddrDepdntInfoTable(CustomAPITable):
- """Address dependent customer unloading point data fields of all the available records in the system"""
-
- name: str = "customer_unloading_point_addr_depdnt_info"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Customer",
- "AddressID",
- "UnloadingPointName",
- "CustomerFactoryCalenderCode",
- "BPGoodsReceivingHoursCode",
- "IsDfltBPUnloadingPoint",
- "MondayMorningOpeningTime",
- "MondayMorningClosingTime",
- "MondayAfternoonOpeningTime",
- "MondayAfternoonClosingTime",
- "TuesdayMorningOpeningTime",
- "TuesdayMorningClosingTime",
- "TuesdayAfternoonOpeningTime",
- "TuesdayAfternoonClosingTime",
- "WednesdayMorningOpeningTime",
- "WednesdayMorningClosingTime",
- "WednesdayAfternoonOpeningTime",
- "WednesdayAfternoonClosingTime",
- "ThursdayMorningOpeningTime",
- "ThursdayMorningClosingTime",
- "ThursdayAfternoonOpeningTime",
- "ThursdayAfternoonClosingTime",
- "FridayMorningOpeningTime",
- "FridayMorningClosingTime",
- "FridayAfternoonOpeningTime",
- "FridayAfternoonClosingTime",
- "SaturdayMorningOpeningTime",
- "SaturdayMorningClosingTime",
- "SaturdayAfternoonOpeningTime",
- "SaturdayAfternoonClosingTime",
- "SundayMorningOpeningTime",
- "SundayMorningClosingTime",
- "SundayAfternoonOpeningTime",
- "SundayAfternoonClosingTime",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_CustUnldgPtAddrDepdntInfo")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierTable(CustomAPITable):
- """General data of all the supplier records available in the system"""
-
- name: str = "supplier"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "AlternativePayeeAccountNumber",
- "AuthorizationGroup",
- "CreatedByUser",
- "CreationDate",
- "Customer",
- "PaymentIsBlockedForSupplier",
- "PostingIsBlocked",
- "PurchasingIsBlocked",
- "SupplierAccountGroup",
- "SupplierFullName",
- "SupplierName",
- "VATRegistration",
- "BirthDate",
- "ConcatenatedInternationalLocNo",
- "DeletionIndicator",
- "FiscalAddress",
- "Industry",
- "InternationalLocationNumber1",
- "InternationalLocationNumber2",
- "InternationalLocationNumber3",
- "IsNaturalPerson",
- "PaymentReason",
- "ResponsibleType",
- "SuplrQltyInProcmtCertfnValidTo",
- "SuplrQualityManagementSystem",
- "SupplierCorporateGroup",
- "SupplierProcurementBlock",
- "TaxNumber1",
- "TaxNumber2",
- "TaxNumber3",
- "TaxNumber4",
- "TaxNumber5",
- "TaxNumberResponsible",
- "TaxNumberType",
- "SuplrProofOfDelivRlvtCode",
- "BR_TaxIsSplit",
- "DataExchangeInstructionKey",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_Supplier")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierCompanyTable(CustomAPITable):
- """supplier company data available in the system"""
-
- name: str = "supplier_company"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "CompanyCode",
- "AuthorizationGroup",
- "CompanyCodeName",
- "PaymentBlockingReason",
- "SupplierIsBlockedForPosting",
- "AccountingClerk",
- "AccountingClerkFaxNumber",
- "AccountingClerkPhoneNumber",
- "SupplierClerk",
- "SupplierClerkURL",
- "PaymentMethodsList",
- "PaymentReason",
- "PaymentTerms",
- "ClearCustomerSupplier",
- "IsToBeLocallyProcessed",
- "ItemIsToBePaidSeparately",
- "PaymentIsToBeSentByEDI",
- "HouseBank",
- "CheckPaidDurationInDays",
- "Currency",
- "BillOfExchLmtAmtInCoCodeCrcy",
- "SupplierClerkIDBySupplier",
- "ReconciliationAccount",
- "InterestCalculationCode",
- "InterestCalculationDate",
- "IntrstCalcFrequencyInMonths",
- "SupplierHeadOffice",
- "AlternativePayee",
- "LayoutSortingRule",
- "APARToleranceGroup",
- "SupplierCertificationDate",
- "SupplierAccountNote",
- "WithholdingTaxCountry",
- "DeletionIndicator",
- "CashPlanningGroup",
- "IsToBeCheckedForDuplicates",
- "MinorityGroup",
- "SupplierAccountGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_SupplierCompany")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierCompanyTextTable(CustomAPITable):
- """Supplier company text data attached to supplier company in the system"""
-
- name: str = "supplier_company_text"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "CompanyCode",
- "Language",
- "LongTextID",
- "LongText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_SupplierCompanyText")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierDunningTable(CustomAPITable):
- """Dunning records attached to supplier company in the system"""
-
- name: str = "supplier_dunning"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "CompanyCode",
- "DunningArea",
- "DunningBlock",
- "DunningLevel",
- "DunningProcedure",
- "DunningRecipient",
- "LastDunnedOn",
- "LegDunningProcedureOn",
- "DunningClerk",
- "AuthorizationGroup",
- "SupplierAccountGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_SupplierDunning")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierPartnerFuncTable(CustomAPITable):
- """Partner function fields of all the available records in the system linked to supplier purchasing organization"""
-
- name: str = "supplier_partner_func"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "PurchasingOrganization",
- "SupplierSubrange",
- "Plant",
- "PartnerFunction",
- "PartnerCounter",
- "DefaultPartner",
- "CreationDate",
- "CreatedByUser",
- "ReferenceSupplier",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_SupplierPartnerFunc")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierPurchasingOrgTable(CustomAPITable):
- """Supplier purchasing organization data attached to supplier records in the system"""
-
- name: str = "supplier_purchasing_org"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "PurchasingOrganization",
- "AutomaticEvaluatedRcptSettlmt",
- "CalculationSchemaGroupCode",
- "DeletionIndicator",
- "EvaldReceiptSettlementIsActive",
- "IncotermsClassification",
- "IncotermsTransferLocation",
- "IncotermsVersion",
- "IncotermsLocation1",
- "IncotermsLocation2",
- "IncotermsSupChnLoc1AddlUUID",
- "IncotermsSupChnLoc2AddlUUID",
- "IncotermsSupChnDvtgLocAddlUUID",
- "IntrastatCrsBorderTrMode",
- "InvoiceIsGoodsReceiptBased",
- "InvoiceIsMMServiceEntryBased",
- "MaterialPlannedDeliveryDurn",
- "MinimumOrderAmount",
- "PaymentTerms",
- "PlanningCycle",
- "PricingDateControl",
- "ProdStockAndSlsDataTransfPrfl",
- "ProductUnitGroup",
- "PurOrdAutoGenerationIsAllowed",
- "PurchaseOrderCurrency",
- "PurchasingGroup",
- "PurchasingIsBlockedForSupplier",
- "RoundingProfile",
- "ShippingCondition",
- "SuplrDiscountInKindIsGranted",
- "SuplrInvcRevalIsAllowed",
- "SuplrIsRlvtForSettlmtMgmt",
- "SuplrPurgOrgIsRlvtForPriceDetn",
- "SupplierABCClassificationCode",
- "SupplierAccountNumber",
- "SupplierIsReturnsSupplier",
- "SupplierPhoneNumber",
- "SupplierRespSalesPersonName",
- "SupplierConfirmationControlKey",
- "IsOrderAcknRqd",
- "AuthorizationGroup",
- "SupplierAccountGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_SupplierPurchasingOrg")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierPurchasingOrgTextTable(CustomAPITable):
- """Supplier purchasing organization text data attached to purchasing organization in the system"""
-
- name: str = "supplier_purchasing_org_text"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "PurchasingOrganization",
- "Language",
- "LongTextID",
- "LongText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_SupplierPurchasingOrgText")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierTextTable(CustomAPITable):
- """Supplier text data attached to purchasing organization in the system"""
-
- name: str = "supplier_text"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "Language",
- "LongTextID",
- "LongText",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_SupplierText")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
-
-
-class SupplierWithHoldingTaxTable(CustomAPITable):
- """Withholding tax records attached to supplier company in the system"""
-
- name: str = "supplier_withholding_tax"
- columns: List[str] = [
- "id",
- "uri",
- "type",
- "Supplier",
- "CompanyCode",
- "WithholdingTaxType",
- "ExemptionDateBegin",
- "ExemptionDateEnd",
- "ExemptionReason",
- "IsWithholdingTaxSubject",
- "RecipientType",
- "WithholdingTaxCertificate",
- "WithholdingTaxCode",
- "WithholdingTaxExmptPercent",
- "WithholdingTaxNumber",
- "AuthorizationGroup",
- ]
-
- def __init__(self, handler: APIHandler):
- super().__init__(handler)
- self.connection = self.handler.connect()
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- data = self.connection.get("A_SupplierWithHoldingTax")
- df = pd.DataFrame.from_records(data)
- return self.apply_query_params(df, query)
diff --git a/mindsdb/integrations/handlers/sap_erp_handler/tests/__init__.py b/mindsdb/integrations/handlers/sap_erp_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/scylla_handler/README.md b/mindsdb/integrations/handlers/scylla_handler/README.md
deleted file mode 100644
index 26ae1998535..00000000000
--- a/mindsdb/integrations/handlers/scylla_handler/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# MindsDB ScyllaDB Handler
-
-This README provides details on the ScyllaDB handler integration for MindsDB.
-
-## Introduction to ScyllaDB
-
-ScyllaDB is an open-source distributed NoSQL wide-column data store. It was purposefully designed to offer compatibility with Apache Cassandra while outperforming it with higher throughputs and reduced latencies. For a comprehensive understanding of ScyllaDB, visit ScyllaDB's official website.
-
-### Integration Implementation
-
-The ScyllaDB handler for MindsDB was developed using the scylla-driver library for Python.
-Connection Parameters:
-
-- host: Host name or IP address of ScyllaDB.
-- port: Connection port
-- user: Authentication username. Optional; required only if authentication is enabled.
-- password: Authentication password. Optional; required only if authentication is enabled.
-- keyspace: The specific keyspace (top-level container for tables) to connect to.
-- protocol_version: Optional. Defaults to 4.
-- secure_connect_bundle: Optional. Needed only for connections to DataStax Astra.
-
-## Usage Guide
-
-To set up a connection between MindsDB and a Scylla server, utilize the following SQL syntax:
-
-```sql
-
-CREATE DATABASE scylladb_datasource
-WITH ENGINE='scylladb',
-PARAMETERS={
- "user":"user@mindsdb.com",
- "password": "pass",
- "host": "127.0.0.1",
- "port": 9042,
- "keyspace": "test_data"
-};
-```
-
-> βΉοΈ Tip: The protocol version is set to 4 by default. Should you wish to modify it, simply include "protocol_version": 5 within the PARAMETERS dictionary in the query above.
-
-## Querying the Keyspace:
-
-With the connection established, you can execute queries on your keyspace as demonstrated below:
-
-```sql
-SELECT * FROM scylladb_datasource.keystore.example_table LIMIT 10;
-```
diff --git a/mindsdb/integrations/handlers/scylla_handler/__about__.py b/mindsdb/integrations/handlers/scylla_handler/__about__.py
deleted file mode 100644
index 60e50993337..00000000000
--- a/mindsdb/integrations/handlers/scylla_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Scylla handler'
-__package_name__ = 'mindsdb_scylla_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Scylla"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/scylla_handler/__init__.py b/mindsdb/integrations/handlers/scylla_handler/__init__.py
deleted file mode 100644
index a809585a2d9..00000000000
--- a/mindsdb/integrations/handlers/scylla_handler/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args
-try:
- from .scylla_handler import ScyllaHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-
-title = 'ScyllaDB'
-name = 'scylladb'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/scylla_handler/connection_args.py b/mindsdb/integrations/handlers/scylla_handler/connection_args.py
deleted file mode 100644
index 12ab6247776..00000000000
--- a/mindsdb/integrations/handlers/scylla_handler/connection_args.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'User name',
- 'required': True,
- 'label': 'User'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'Password',
- 'required': True,
- 'label': 'Password',
- 'secret': True
- },
- protocol_version={
- 'type': ARG_TYPE.INT,
- 'description': 'is not required, and default to 4.',
- 'required': False,
- 'label': 'Protocol version'
- },
- host={
- 'type': ARG_TYPE.STR,
- 'description': ' is the host name or IP address of the ScyllaDB.',
- 'required': True,
- 'label': 'Host'
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'Server port',
- 'required': True,
- 'label': 'Port'
- },
- keyspace={
- 'type': ARG_TYPE.STR,
- 'description': 'is the keyspace to connect to. It is a top level container for tables.',
- 'required': True,
- 'label': 'Keyspace'
- },
- secure_connect_bundle={
- 'type': ARG_TYPE.STR,
- 'description': 'Path or URL to the secure connect bundle',
- 'required': True,
- 'label': 'Host'
- }
-)
diff --git a/mindsdb/integrations/handlers/scylla_handler/icon.svg b/mindsdb/integrations/handlers/scylla_handler/icon.svg
deleted file mode 100644
index 3ea6d0e1a63..00000000000
--- a/mindsdb/integrations/handlers/scylla_handler/icon.svg
+++ /dev/null
@@ -1,17 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/scylla_handler/requirements.txt b/mindsdb/integrations/handlers/scylla_handler/requirements.txt
deleted file mode 100644
index cfb07fc70e1..00000000000
--- a/mindsdb/integrations/handlers/scylla_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-scylla-driver
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/scylla_handler/scylla_handler.py b/mindsdb/integrations/handlers/scylla_handler/scylla_handler.py
deleted file mode 100644
index 87dd688165a..00000000000
--- a/mindsdb/integrations/handlers/scylla_handler/scylla_handler.py
+++ /dev/null
@@ -1,206 +0,0 @@
-import tempfile
-
-import pandas as pd
-import requests
-
-from cassandra.cluster import Cluster
-from cassandra.auth import PlainTextAuthProvider
-from cassandra.util import Date
-
-from mindsdb_sql_parser import parse_sql
-from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb_sql_parser import ast
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class ScyllaHandler(DatabaseHandler):
- """
- This handler handles connection and execution of the Scylla statements.
- """
- name = 'scylla'
-
- def __init__(self, name=None, **kwargs):
- super().__init__(name)
- self.parser = parse_sql
- self.connection_args = kwargs.get('connection_data')
- self.session = None
- self.is_connected = False
-
- def download_secure_bundle(self, url, max_size=10 * 1024 * 1024):
- """
- Downloads the secure bundle from a given URL and stores it in a temporary file.
-
- :param url: URL of the secure bundle to be downloaded.
- :param max_size: Maximum allowable size of the bundle in bytes. Defaults to 10MB.
- :return: Path to the downloaded secure bundle saved as a temporary file.
- :raises ValueError: If the secure bundle size exceeds the allowed `max_size`.
-
- TODO:
- - Find a way to periodically clean up or delete the temporary files
- after they have been used to prevent filling up storage over time.
- """
- response = requests.get(url, stream=True, timeout=10)
- response.raise_for_status()
-
- content_length = int(response.headers.get('content-length', 0))
- if content_length > max_size:
- raise ValueError("Secure bundle is larger than the allowed size!")
-
- with tempfile.NamedTemporaryFile(delete=False) as temp_file:
- size_downloaded = 0
- for chunk in response.iter_content(chunk_size=8192):
- size_downloaded += len(chunk)
- if size_downloaded > max_size:
- raise ValueError("Secure bundle is larger than the allowed size!")
- temp_file.write(chunk)
- return temp_file.name
-
- def connect(self):
- """
- Handles the connection to a Scylla keystore.
- """
- if self.is_connected is True:
- return self.session
-
- auth_provider = None
- if any(key in self.connection_args for key in ('user', 'password')):
- if all(key in self.connection_args for key in ('user', 'password')):
- auth_provider = PlainTextAuthProvider(
- username=self.connection_args['user'], password=self.connection_args['password']
- )
- else:
- raise ValueError("If authentication is required, both 'user' and 'password' must be provided!")
-
- connection_props = {
- 'auth_provider': auth_provider
- }
- connection_props['protocol_version'] = self.connection_args.get('protocol_version', 4)
- secure_connect_bundle = self.connection_args.get('secure_connect_bundle')
-
- if secure_connect_bundle:
- # Check if the secure bundle is a URL
- if secure_connect_bundle.startswith(('http://', 'https://')):
- secure_connect_bundle = self.download_secure_bundle(secure_connect_bundle)
- connection_props['cloud'] = {
- 'secure_connect_bundle': secure_connect_bundle
- }
- else:
- connection_props['contact_points'] = [self.connection_args['host']]
- connection_props['port'] = int(self.connection_args['port'])
-
- cluster = Cluster(**connection_props)
- session = cluster.connect(self.connection_args.get('keyspace'))
-
- self.is_connected = True
- self.session = session
- return self.session
-
- def check_connection(self) -> StatusResponse:
- """
- Check the connection of the Scylla database
- :return: success status and error message if error occurs
- """
- response = StatusResponse(False)
-
- try:
- session = self.connect()
- # TODO: change the healthcheck
- session.execute('SELECT release_version FROM system.local').one()
- response.success = True
- except Exception as e:
- logger.error(f'Error connecting to Scylla {self.connection_args["keyspace"]}, {e}!')
- response.error_message = e
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def prepare_response(self, resp):
- # replace cassandra types
- data = []
- for row in resp:
- row2 = {}
- for k, v in row._asdict().items():
- if isinstance(v, Date):
- v = v.date()
- row2[k] = v
- data.append(row2)
- return data
-
- def native_query(self, query: str) -> Response:
- """
- Receive SQL query and runs it
- :param query: The SQL query to run in MySQL
- :return: returns the records from the current recordset
- """
- session = self.connect()
- try:
- resp = session.execute(query).all()
- resp = self.prepare_response(resp)
- if resp:
- response = Response(
- RESPONSE_TYPE.TABLE,
- pd.DataFrame(
- resp
- )
- )
- else:
- response = Response(RESPONSE_TYPE.OK)
- except Exception as e:
- logger.error(f'Error running query: {query} on {self.connection_args["keyspace"]}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(e)
- )
- return response
-
- def query(self, query: ASTNode) -> Response:
- """
- Retrieve the data from the SQL statement.
- """
-
- # remove table alias because Cassandra Query Language doesn't support it
- if isinstance(query, ast.Select):
- if isinstance(query.from_table, ast.Identifier) and query.from_table.alias is not None:
- query.from_table.alias = None
-
- # remove table name from fields
- table_name = query.from_table.parts[-1]
-
- for target in query.targets:
- if isinstance(target, ast.Identifier):
- if target.parts[0] == table_name:
- target.parts.pop(0)
-
- renderer = SqlalchemyRender('mysql')
- query_str = renderer.get_string(query, with_failback=True)
- return self.native_query(query_str)
-
- def get_tables(self) -> Response:
- """
- Get a list with all of the tabels in MySQL
- """
- q = "DESCRIBE TABLES;"
- result = self.native_query(q)
- df = result.data_frame
- result.data_frame = df.rename(columns={df.columns[0]: 'table_name'})
- return result
-
- def get_columns(self, table_name) -> Response:
- """
- Show details about the table
- """
- q = f"DESCRIBE {table_name};"
- result = self.native_query(q)
- return result
diff --git a/mindsdb/integrations/handlers/scylla_handler/tests/__init__.py b/mindsdb/integrations/handlers/scylla_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/scylla_handler/tests/test_scylla_handler.py b/mindsdb/integrations/handlers/scylla_handler/tests/test_scylla_handler.py
deleted file mode 100644
index be3de5cb503..00000000000
--- a/mindsdb/integrations/handlers/scylla_handler/tests/test_scylla_handler.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.scylla_handler.scylla_handler import ScyllaHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class ScyllaHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "",
- "port": "9042",
- "user": "",
- "password": "",
- "keyspace": "test_data"
- }
- }
- cls.handler = ScyllaHandler('test_scylla_handler', **cls.kwargs)
-
- def test_0_connect(self):
- self.handler.check_connection()
-
- def test_1_native_query_show_keyspaces(self):
- dbs = self.handler.native_query("DESC KEYSPACES;")
- assert dbs['type'] is not RESPONSE_TYPE.ERROR
-
- def test_2_get_tables(self):
- tbls = self.handler.get_tables()
- assert tbls['type'] is not RESPONSE_TYPE.ERROR
-
- def test_3_describe_table(self):
- described = self.handler.get_columns("home_rentals")
- assert described['type'] is RESPONSE_TYPE.TABLE
-
- def test_4_select_query(self):
- query = "SELECT * FROM home_rentals WHERE 'id'='3712'"
- result = self.handler.query(query)
- assert result['type'] is RESPONSE_TYPE.TABLE
diff --git a/mindsdb/integrations/handlers/sendinblue_handler/README.md b/mindsdb/integrations/handlers/sendinblue_handler/README.md
deleted file mode 100644
index 35c66fc489a..00000000000
--- a/mindsdb/integrations/handlers/sendinblue_handler/README.md
+++ /dev/null
@@ -1,82 +0,0 @@
-# Sendinblue Handler
-
-Sendinblue handler for MindsDB provides interfaces to connect to Sendinblue via APIs and pull repository data into MindsDB.
-
----
-
-## Table of Contents
-
-- [Sendinblue Handler](#github-handler)
- - [Table of Contents](#table-of-contents)
- - [About Sendinblue](#about-githhub)
- - [Sendinblue Handler Implementation](#sendinblue-handler-implementation)
- - [Sendinblue Handler Initialization](#sendinblue-handler-initialization)
- - [Implemented Features](#implemented-features)
- - [TODO](#todo)
- - [Example Usage](#example-usage)
-
----
-
-## About Sendinblue
-
-Sendinblue is the only all-in-one digital marketing platform empowering B2B and B2C businesses, ecommerce sellers and agencies to build customer relationships through end to end digital marketing campaigns, transactional messaging, and marketing automation.
-
-https://www.sendinblue.com/about/
-
-## Sendinblue Handler Implementation
-
-This handler was implemented using [sib-api-v3-sdk](https://github.com/sendinblue/APIv3-python-library), the Python SDK for Sendinblue.
-
-## Sendinblue Handler Initialization
-
-The Sendinblue handler is initialized with the following parameters:
-
-- `api_key`: a required Sendinblue API key to use for authentication
-
-Read about creating a Sendinblue API key [here](https://developers.sendinblue.com/docs).
-
-## Implemented Features
-
-- [x] Sendinblue Email Campaigns Table for a given Repository
- - [x] Support SELECT
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
-
-## TODO
-
-- [ ] Support INSERT, UPDATE and DELETE for Email Campaigns table
-- [ ] Sendinblue Contacts table
-- [ ] Sendinblue Companies table
-- [ ] Sendinblue Conversations table
-- [ ] Sendinblue Deals table
-- [ ] Many more
-
-## Example Usage
-
-The first step is to create a database with the new `sendinblue` engine by passing in the required `api_key` parameter:
-
-~~~~sql
-CREATE DATABASE sib_datasource
-WITH ENGINE = 'sendinblue',
-PARAMETERS = {
- "api_key": "xkeysib-..."
-};
-~~~~
-
-Use the established connection to query your database:
-
-~~~~sql
-SELECT * FROM sib_datasource.email_campaigns
-~~~~
-
-Run more advanced queries:
-
-~~~~sql
-SELECT id, name
-FROM sib_datasource.email_campaigns
-WHERE status = 'sent'
-ORDER BY name
-LIMIT 5
-~~~~
diff --git a/mindsdb/integrations/handlers/sendinblue_handler/__about__.py b/mindsdb/integrations/handlers/sendinblue_handler/__about__.py
deleted file mode 100644
index d4f0f95f892..00000000000
--- a/mindsdb/integrations/handlers/sendinblue_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Sendinblue handler"
-__package_name__ = "mindsdb_sendinblue_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Sendinblue"
-__author__ = "Minura Punchihewa"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/sendinblue_handler/__init__.py b/mindsdb/integrations/handlers/sendinblue_handler/__init__.py
deleted file mode 100644
index 61e0aab853f..00000000000
--- a/mindsdb/integrations/handlers/sendinblue_handler/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .sendinblue_handler import SendinblueHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Sendinblue"
-name = "sendinblue"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/sendinblue_handler/icon.svg b/mindsdb/integrations/handlers/sendinblue_handler/icon.svg
deleted file mode 100644
index 4de5f88af28..00000000000
--- a/mindsdb/integrations/handlers/sendinblue_handler/icon.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sendinblue_handler/requirements.txt b/mindsdb/integrations/handlers/sendinblue_handler/requirements.txt
deleted file mode 100644
index 6f0b49768ef..00000000000
--- a/mindsdb/integrations/handlers/sendinblue_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-sib_api_v3_sdk
-urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py b/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py
deleted file mode 100644
index 81f27a9ba6d..00000000000
--- a/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import sib_api_v3_sdk
-
-from mindsdb.integrations.handlers.sendinblue_handler.sendinblue_tables import EmailCampaignsTable
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
-)
-
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-
-logger = log.getLogger(__name__)
-
-
-class SendinblueHandler(APIHandler):
- """
- The Sendinblue handler implementation.
- """
-
- name = 'sendinblue'
-
- def __init__(self, name: str, **kwargs):
- """
- Initialize the handler.
- Args:
- name (str): name of particular handler instance
- **kwargs: arbitrary keyword arguments.
- """
- super().__init__(name)
-
- connection_data = kwargs.get("connection_data", {})
- self.connection_data = connection_data
- self.kwargs = kwargs
-
- self.connection = None
- self.is_connected = False
-
- email_campaigns_data = EmailCampaignsTable(self)
- self._register_table("email_campaigns", email_campaigns_data)
-
- def connect(self):
- """
- Set up the connection required by the handler.
- Returns
- -------
- StatusResponse
- connection object
- """
- if self.is_connected is True:
- return self.connection
-
- configuration = sib_api_v3_sdk.Configuration()
- configuration.api_key['api-key'] = self.connection_data['api_key']
-
- self.connection = sib_api_v3_sdk.ApiClient(configuration)
-
- self.is_connected = True
-
- return self.connection
-
- def check_connection(self) -> StatusResponse:
- """
- Check connection to the handler.
- Returns:
- HandlerStatusResponse
- """
-
- response = StatusResponse(False)
-
- try:
- connection = self.connect()
- api_instance = sib_api_v3_sdk.AccountApi(connection)
- api_instance.get_account()
- response.success = True
- except Exception as e:
- logger.error('Error connecting to Sendinblue!')
- response.error_message = str(e)
-
- self.is_connected = response.success
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
- Parameters
- ----------
- query : str
- query in a native format
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
diff --git a/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py b/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py
deleted file mode 100644
index 139a4a99865..00000000000
--- a/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py
+++ /dev/null
@@ -1,276 +0,0 @@
-import sib_api_v3_sdk
-import pandas as pd
-
-
-from typing import List, Dict, Text, Any
-from mindsdb.utilities import log
-from mindsdb.integrations.libs.api_handler import APITable
-
-from mindsdb_sql_parser import ast
-from sib_api_v3_sdk.rest import ApiException
-
-
-from mindsdb.integrations.utilities.handlers.query_utilities import (
- SELECTQueryParser,
- SELECTQueryExecutor,
- UPDATEQueryExecutor,
- UPDATEQueryParser,
- DELETEQueryParser,
- DELETEQueryExecutor,
- INSERTQueryParser,
-)
-
-logger = log.getLogger(__name__)
-
-
-class EmailCampaignsTable(APITable):
- """The Sendinblue Email Campaigns Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the Sendinblue "GET /emailCampaigns" API endpoint.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Sendinblue Email Campaigns matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query, 'email_campaigns', self.get_columns()
- )
- (
- selected_columns,
- where_conditions,
- order_by_conditions,
- result_limit,
- ) = select_statement_parser.parse_query()
-
- email_campaigns_df = pd.json_normalize(
- self.get_email_campaigns(limit=result_limit)
- )
-
- select_statement_executor = SELECTQueryExecutor(
- email_campaigns_df, selected_columns, where_conditions, order_by_conditions
- )
- email_campaigns_df = select_statement_executor.execute_query()
-
- return email_campaigns_df
-
- def get_columns(self) -> List[str]:
- return pd.json_normalize(self.get_email_campaigns(limit=1)).columns.tolist()
-
- def get_email_campaigns(self, **kwargs):
- connection = self.handler.connect()
- email_campaigns_api_instance = sib_api_v3_sdk.EmailCampaignsApi(connection)
- email_campaigns = email_campaigns_api_instance.get_email_campaigns(**kwargs)
- return [email_campaign for email_campaign in email_campaigns.campaigns]
-
- def delete(self, query: ast.Delete) -> None:
- """
- Deletes an email campaign from Sendinblue.
-
- Parameters
- ----------
- query : ast.Delete
- Given SQL DELETE query
-
- Returns
- -------
- None
-
- Raises
- ------
- RuntimeError
- If an error occurs when calling Sendinblue's API
- """
- # this parses the DELETE statement to extract where conditions
- delete_statement_parser = DELETEQueryParser(query)
- where_conditions = delete_statement_parser.parse_query()
- # this retrieves the current list of email campaigns and normalize the data into a DataFrame
- email_campaigns_df = pd.json_normalize(self.get_email_campaigns())
- # this execute the delete query to filter out the campaigns to be deleted
- delete_query_executor = DELETEQueryExecutor(
- email_campaigns_df, where_conditions
- )
- # this gets the updated DataFrame after executing delete conditions
- email_campaigns_df = delete_query_executor.execute_query()
- campaign_ids = email_campaigns_df['id'].tolist()
- self.delete_email_campaigns(campaign_ids)
-
- def delete_email_campaigns(self, campaign_ids: List[Text]) -> None:
- # this establish a connection to Sendinblue API
- connection = self.handler.connect()
- email_campaigns_api_instance = sib_api_v3_sdk.EmailCampaignsApi(connection)
- for campaign_id in campaign_ids:
- try:
- email_campaigns_api_instance.delete_email_campaign(campaign_id)
- logger.info(f'Email Campaign {campaign_id} deleted')
- except ApiException as e:
- logger.error(
- f"Exception when calling EmailCampaignsApi->delete_email_campaign: {e}\n"
- )
- raise RuntimeError(
- f"Failed to execute the delete command for Email Campaign {campaign_id}"
- ) from e
-
- def update(self, query: 'ast.Update') -> None:
- """
- Updates data in Sendinblue "PUT /emailCampaigns/{campaignId}" API endpoint.
-
- Parameters
- ----------
- query : ast.Update
- Given SQL UPDATE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- RuntimeError
- If an error occurs when calling Sendinblue's API
- """
- # this parse the UPDATE statement to extract the new values and the conditions for the update
- update_statement_parser = UPDATEQueryParser(query)
- values_to_update, where_conditions = update_statement_parser.parse_query()
-
- email_campaigns_df = pd.json_normalize(self.get_email_campaigns())
- update_query_executor = UPDATEQueryExecutor(
- email_campaigns_df, where_conditions
- )
- # this retrieves the current list of email campaigns
- email_campaigns_df = update_query_executor.execute_query()
- # this extracts the IDs of the campaigns that have been updated
- campaign_ids = email_campaigns_df['id'].tolist()
-
- self.update_email_campaigns(campaign_ids, values_to_update)
-
- def update_email_campaigns(
- self, campaign_ids: List[int], values_to_update: Dict
- ) -> None:
- # this establish a connection to Sendinblue API
-
- connection = self.handler.connect()
- email_campaigns_api_instance = sib_api_v3_sdk.EmailCampaignsApi(connection)
-
- for campaign_id in campaign_ids:
- try:
- email_campaigns_api_instance.update_email_campaign(
- campaign_id, values_to_update
- )
- logger.info(f'Email Campaign {campaign_id} updated')
- except ApiException as e:
- logger.error(
- f"Exception when calling EmailCampaignsApi->update_email_campaign: {e}\n"
- )
- raise RuntimeError(
- f"Failed to execute the update command for Email Campaign {campaign_id}"
- ) from e
-
- def insert(self, query: 'ast.Insert') -> None:
- """
- Inserts new email campaigns into Sendinblue.
-
- Parameters
- ----------
- query : ast.Insert
- The SQL INSERT query to be parsed and executed.
-
- Raises
- ------
- ValueError
- If the necessary sender information is incomplete or incorrectly formatted.
- Exception
- For any unexpected errors during the email campaign creation.
- """
- # this defines columns that are supported and mandatory for an INSERT operation.
- supported_columns = [
- 'name', 'subject', 'sender_name', 'sender_email',
- 'html_content', 'scheduled_at', 'recipients_lists', 'tag'
- ]
- mandatory_columns = ['name', 'subject', 'sender_name', 'sender_email', 'html_content']
-
- # this Parse the INSERT query to extract data.
- insert_statement_parser = INSERTQueryParser(
- query, supported_columns=supported_columns,
- mandatory_columns=mandatory_columns, all_mandatory=True
- )
- email_campaigns_data = insert_statement_parser.parse_query()
-
- # this processes each campaign data extracted from the query.
- for email_campaign_data in email_campaigns_data:
- # this extracts and format sender information.
- sender_info = {}
- if 'sender_name' in email_campaign_data:
- sender_info['name'] = email_campaign_data.pop('sender_name')
- if 'sender_email' in email_campaign_data and email_campaign_data['sender_email'] is not None:
- sender_info['email'] = email_campaign_data.pop('sender_email')
- if 'sender_id' in email_campaign_data and email_campaign_data['sender_id'] is not None:
- sender_info['id'] = email_campaign_data.pop('sender_id')
-
- # this validates sender information.
- if not sender_info.get('name') or (not sender_info.get('email') and not sender_info.get('id')):
- raise ValueError("Sender information is incomplete or incorrectly formatted.")
-
- email_campaign_data['sender'] = sender_info
-
- # this creates each email campaign.
- self.create_email_campaign(email_campaign_data)
-
- def create_email_campaign(self, email_campaign_data: Dict[str, Any]) -> None:
- """
- Creates a new email campaign in Sendinblue.
-
- Parameters
- ----------
- email_campaign_data : Dict[str, Any]
- The data for the email campaign to be created.
-
- Raises
- ------
- Exception
- For any errors during the email campaign creation process.
- """
- # this establish a connection to the Sendinblue API.
- api_session = self.handler.connect()
- email_campaigns_api_instance = sib_api_v3_sdk.EmailCampaignsApi(api_session)
-
- # this logs the data for the email campaign being created.
- logger.info(f"Email campaign data before creating the object: {email_campaign_data}")
-
- try:
- # this creates the email campaign object and send it to Sendinblue.
- email_campaign = sib_api_v3_sdk.CreateEmailCampaign(**email_campaign_data)
- logger.info(f"Email campaign object after creation: {email_campaign}")
-
- # this executes the API call to create the campaign.
- created_campaign = email_campaigns_api_instance.create_email_campaign(email_campaign)
-
- # this checks and log the response from the API.
- if 'id' not in created_campaign.to_dict():
- logger.error('Email campaign creation failed')
- else:
- logger.info(f'Email Campaign {created_campaign.to_dict()["id"]} created')
- except ApiException as e:
- # this handles API exceptions and log the detailed response.
- logger.error(f"Exception when calling EmailCampaignsApi->create_email_campaign: {e}")
- if hasattr(e, 'body'):
- logger.error(f"Sendinblue API response body: {e.body}")
- raise Exception(f'Failed to create Email Campaign with data: {email_campaign_data}') from e
- except Exception as e:
- # this handles any other unexpected exceptions.
- logger.error(f"Unexpected error occurred: {e}")
- raise Exception(f'Unexpected error during Email Campaign creation: {e}') from e
diff --git a/mindsdb/integrations/handlers/serpstack_handler/README.md b/mindsdb/integrations/handlers/serpstack_handler/README.md
deleted file mode 100644
index 6db2284f49b..00000000000
--- a/mindsdb/integrations/handlers/serpstack_handler/README.md
+++ /dev/null
@@ -1,120 +0,0 @@
-# Serpstack Handler
-
-Serpstack handler for MindsDB connects to the Serpstack API and pulls data into MindsDB.
-
----
-
-## Table of Contents
-
-- [Serpstack Handler](#serpstack-handler)
- - [Table of Contents](#table-of-contents)
- - [About Serpstack](#about-serpstack)
- - [Serpstack Handler Implementation](#serpstack-handler-implementation)
- - [Serpstack Handler Initialisation](#serpstack-handler-initialisation)
- - [How to Get Your Serpstack Access Key](#how-to-get-your-serpstack-access-key)
- - [Implemented Features](#implemented-features)
- - [TODO](#todo)
- - [Example Usage](#example-usage)
-
----
-
-## About Serpstack
-
-Serpstack is a real-time search engine results API that provides accurate and reliable search data. It allows users to perform keyword searches and retrieve various types of search results including organic results, images, videos, news, and shopping results etc.
-
-## Serpstack Handler Implementation
-
-This handler was implemented using the Serpstack API. The API provides a simple and efficient way to access search engine results programmatically.
-
-## Serpstack Handler Initialisation
-
-The Serpstack handler is initialised with the following parameter:
-
-- `access_key`: a required API access key for the Serpstack API
-
-## How to Get Your Serpstack Access Key
-
-1. Sign up for an account on [Serpstack](https://serpstack.com).
-2. Choose the plan that suits your needs.
-3. Once the account is created, an access key will be generated for you. This key is required to authenticate API requests.
-
-## Implemented Features
-
-- Fetch organic search results based on a query.
-- Fetch image search results based on a query.
-- Fetch video search results based on a query.
-- Fetch news search results based on a query.
-- Fetch shopping search results based on a query.
-
-## TODO
-
-- Potentially create more tables for:
- - Knowledge graph
- - Inline tweets
- - Related searches
- - Questions
-
-## Example Usage
-
-### Create the Serpstack Database
-
-```sql
-CREATE DATABASE my_serpstack
-WITH
- ENGINE = 'serpstack',
- PARAMETERS = {
- "access_key": "YOUR_ACCESS_KEY"
- };
-```
-
-After setting up the Serpstack handler, the user can use SQL queries to fetch data from search engines using Serpstack. Note that the `query` parameter is required in order to make searches. If no results are found for a given query, a table filled with 'No results found' will be returned instead.
-
-### Fetch Organic Search Results
-
-```sql
-SELECT *
-FROM my_serpstack.organic_results
-WHERE query = 'KFC';
-```
-
-### Fetch Image Search Results
-
-```sql
-SELECT *
-FROM my_serpstack.image_results
-WHERE query = 'Dinosaurs';
-```
-
-### Fetch Video Search Results
-
-```sql
-SELECT *
-FROM my_serpstack.video_results
-WHERE query = 'NBA Finals';
-```
-
-### Fetch News Search Results
-
-```sql
-SELECT *
-FROM my_serpstack.news_results
-WHERE query = 'Euros 2024';
-```
-
-### Fetch Shopping Search Results
-
-```sql
-SELECT *
-FROM my_serpstack.shopping_results
-WHERE query = 'Nvidia 4090';
-```
-
-### Example with Additional Parameters
-
-You can include additional request parameters in your SQL queries. Refer to the [Serpstack documentation](https://serpstack.com/documentation) under **HTTP GET Request Parameters** for a full list of available parameters.
-
-```sql
-SELECT *
-FROM my_serpstack.organic_results
-WHERE query = 'McDonalds' AND gl = 'no' AND page = '2';
-```
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/serpstack_handler/__about__.py b/mindsdb/integrations/handlers/serpstack_handler/__about__.py
deleted file mode 100644
index 6c77916b05d..00000000000
--- a/mindsdb/integrations/handlers/serpstack_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Serpstack handler'
-__package_name__ = 'mindsdb_serpstack_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Serpstack API"
-__author__ = 'Anders Ooi'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2024 - mindsdb'
diff --git a/mindsdb/integrations/handlers/serpstack_handler/__init__.py b/mindsdb/integrations/handlers/serpstack_handler/__init__.py
deleted file mode 100644
index f06de8ab1d9..00000000000
--- a/mindsdb/integrations/handlers/serpstack_handler/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .serpstack_handler import SerpstackHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Serpstack"
-name = "serpstack"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path"
-]
diff --git a/mindsdb/integrations/handlers/serpstack_handler/icon.svg b/mindsdb/integrations/handlers/serpstack_handler/icon.svg
deleted file mode 100644
index 110bbdeb732..00000000000
--- a/mindsdb/integrations/handlers/serpstack_handler/icon.svg
+++ /dev/null
@@ -1,86 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/serpstack_handler/serpstack_handler.py b/mindsdb/integrations/handlers/serpstack_handler/serpstack_handler.py
deleted file mode 100644
index 48b7199b6df..00000000000
--- a/mindsdb/integrations/handlers/serpstack_handler/serpstack_handler.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import requests
-
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-
-from mindsdb.integrations.libs.api_handler import APIHandler
-
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response
-)
-
-from .serpstack_tables import (
- OrganicResultsTable, ImageResultsTable,
- VideoResultsTable, NewsResultsTable, ShoppingResultsTable
-)
-
-
-logger = log.getLogger(__name__)
-
-
-class SerpstackHandler(APIHandler):
- """A class for handling connections and interactions with the Serpstack API.
- Attributes:
- api_key (str): API access key for the Serpstack API.
- is_connected (bool): Whether or not the API client is connected to Serpstack.
- """
-
- name = 'serpstack'
-
- def __init__(self, name: str = None, **kwargs):
- super().__init__(name)
-
- connection_data = kwargs.get('connection_data', {})
- self.connection_data = connection_data
-
- self.access_key = None
- self.base_url = None
- self.is_connected = False
-
- if 'access_key' in self.connection_data:
- self.access_key = self.connection_data['access_key']
-
- # register tables
- organic_results = OrganicResultsTable(self)
- self._register_table('organic_results', organic_results)
-
- image_results = ImageResultsTable(self)
- self._register_table('image_results', image_results)
-
- video_results = VideoResultsTable(self)
- self._register_table('video_results', video_results)
-
- news_results = NewsResultsTable(self)
- self._register_table('news_results', news_results)
-
- shopping_results = ShoppingResultsTable(self)
- self._register_table('shopping_results', shopping_results)
-
- def connect(self):
- """ Sets up connection and returns the base URL to be used"""
-
- if self.is_connected:
- return self.base_url
-
- if not self.access_key:
- logger.error("No access key provided for Serpstack API")
- return None
-
- try:
- url = f"https://api.serpstack.com/search?access_key={self.access_key}"
- api_request = requests.get(url)
- api_response = api_request.json()
- # error 105 means that user is on a free plan
- if api_response['error']['code'] == 105:
- self.base_url = "http://api.serpstack.com/search"
- # error 310 means that missing search query, which means that user can use https
- elif api_response['error']['code'] == 310:
- self.base_url = "https://api.serpstack.com/search"
- # any other error suggests issues with the account
- else:
- logger.error(f"Failed to connect to Serpstack API: {api_response['error']['info']}")
- return None
-
- self.is_connected = True
- return self.base_url
-
- except Exception as e:
- logger.error(f"Failed to connect to Serpstack API: {str(e)}")
- return None
-
- def check_connection(self) -> StatusResponse:
- """ Checks connection to Serpstack API"""
- response = StatusResponse(False)
-
- try:
- self.connect()
- response.success = True
- response.copy_storage = True
- except Exception as e:
- response.error_message = (
- f"Failed to connect to Serpstack API: {str(e)}"
- )
- logger.error(response.error_message)
- response.success = False
-
- self.is_connected = response.success
-
- return response
-
- def native_query(self, query: str = None) -> Response:
- """Receive and process a raw query.
- Parameters
- ----------
- query : str
- query in a native format
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
diff --git a/mindsdb/integrations/handlers/serpstack_handler/serpstack_tables.py b/mindsdb/integrations/handlers/serpstack_handler/serpstack_tables.py
deleted file mode 100644
index b9e1def3fff..00000000000
--- a/mindsdb/integrations/handlers/serpstack_handler/serpstack_tables.py
+++ /dev/null
@@ -1,260 +0,0 @@
-import pandas as pd
-import requests
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
-
-
-class BaseResultsTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Selects data from the results table and returns it as a pandas DataFrame.
-
- Args:
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the selected data.
- """
- base_url = self.handler.connect()
-
- params = {'access_key': self.handler.access_key}
- conditions = extract_comparison_conditions(query.where)
- params.update({condition[1]: condition[2] for condition in conditions if condition[0] == '='})
-
- if 'query' not in params:
- raise ValueError('Query is missing in the SQL query')
- if 'type' not in params and hasattr(self, 'default_type'):
- params['type'] = self.default_type
- try:
- api_response = requests.get(base_url, params=params)
- api_response.raise_for_status() # raises HTTPError for bad responses
- api_result = api_response.json()
- except requests.exceptions.HTTPError as e:
- raise SystemError(f"HTTP error occurred: {e.response.status_code} - {e.response.reason}")
- except requests.exceptions.ConnectionError as e:
- raise SystemError(f"Connection error occurred: {str(e)}")
- except requests.exceptions.Timeout as e:
- raise SystemError(f"Request timeout: {str(e)}")
- except requests.exceptions.RequestException as e:
- raise SystemError(f"Request exception occurred: {str(e)}")
- except ValueError as e:
- raise SystemError(f"Failed to parse JSON response: {str(e)}")
-
- results = api_result.get(self.results_key, [])
- processed_results = [self.extract_data(result) for result in results]
-
- if len(processed_results) == 0:
- columns = self.get_columns()
- empty_data = {col: ["No results found"] for col in columns}
- return pd.DataFrame(empty_data, columns=columns)
-
- result_df = pd.DataFrame(processed_results)
- result_df = self.filter_columns(result_df, query)
- return result_df
-
- def extract_data(self, data):
- """
- Extracts the required data from the result.
-
- Args:
- data (dict): The result data.
-
- Returns:
- dict: A dictionary containing the extracted data.
- """
- raise NotImplementedError("Subclasses must implement this method.")
-
- def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
- """
- Filters the columns of the result DataFrame.
-
- Args:
- result (pandas.DataFrame): The result DataFrame.
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the filtered data.
- """
- columns = []
- if query is not None:
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError
- else:
- columns = self.get_columns()
-
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- result = pd.DataFrame([], columns=columns)
- else:
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- result = result[columns]
-
- if query is not None and query.limit is not None:
- result = result.head(query.limit.value)
-
- return result
-
-
-class OrganicResultsTable(BaseResultsTable):
- results_key = 'organic_results'
-
- def extract_data(self, organic):
- return {
- 'position': organic.get('position'),
- 'title': organic.get('title'),
- 'url': organic.get('url'),
- 'domain': organic.get('domain'),
- 'displayed_url': organic.get('displayed_url'),
- 'snippet': organic.get('snippet'),
- 'cached_page_url': organic.get('cached_page_url'),
- 'related_pages_url': organic.get('related_pages_url'),
- 'prerender': organic.get('prerender'),
- 'sitelinks': self._extract_sitelinks(organic.get('sitelinks')),
- 'rich_snippet': self._extract_rich_snippet(organic.get('rich_snippet'))
- }
-
- def _extract_sitelinks(self, sitelinks):
- if not sitelinks:
- return None
- return {
- 'inline': [{'title': link['title'], 'url': link['url']} for link in sitelinks.get('inline', [])],
- 'expanded': [{'title': link['title'], 'url': link['url']} for link in sitelinks.get('expanded', [])]
- }
-
- def _extract_rich_snippet(self, rich_snippet):
- if not rich_snippet:
- return None
- snippet_type = 'top' if 'top' in rich_snippet else 'bottom'
- return {
- 'detected_extensions': rich_snippet.get(snippet_type, {}).get('detected_extensions', []),
- 'extensions': rich_snippet.get(snippet_type, {}).get('extensions', [])
- }
-
- def get_columns(self):
- return [
- 'position',
- 'title',
- 'url',
- 'domain',
- 'displayed_url',
- 'snippet',
- 'cached_page_url',
- 'related_pages_url',
- 'prerender',
- 'sitelinks',
- 'rich_snippet'
- ]
-
-
-class ImageResultsTable(BaseResultsTable):
- results_key = 'image_results'
- default_type = 'images'
-
- def extract_data(self, image):
- return {
- 'position': image.get('position'),
- 'title': image.get('title'),
- 'width': image.get('width'),
- 'height': image.get('height'),
- 'image_url': image.get('image_url'),
- 'type': image.get('type'),
- 'url': image.get('url'),
- 'source': image.get('source')
- }
-
- def get_columns(self):
- return [
- 'position',
- 'title',
- 'width',
- 'height',
- 'image_url',
- 'type',
- 'url',
- 'source'
- ]
-
-
-class VideoResultsTable(BaseResultsTable):
- results_key = 'video_results'
- default_type = 'videos'
-
- def extract_data(self, video):
- return {
- 'position': video.get('position'),
- 'title': video.get('title'),
- 'url': video.get('url'),
- 'displayed_url': video.get('displayed_url'),
- 'uploaded': video.get('uploaded'),
- 'snippet': video.get('snippet'),
- 'length': video.get('length')
- }
-
- def get_columns(self):
- return [
- 'position',
- 'title',
- 'url',
- 'displayed_url',
- 'uploaded',
- 'snippet',
- 'length'
- ]
-
-
-class NewsResultsTable(BaseResultsTable):
- results_key = 'news_results'
- default_type = 'news'
-
- def extract_data(self, news):
- return {
- 'position': news.get('position'),
- 'title': news.get('title'),
- 'url': news.get('url'),
- 'source_name': news.get('source_name'),
- 'uploaded': news.get('uploaded'),
- 'uploaded_utc': news.get('uploaded_utc'),
- 'snippet': news.get('snippet'),
- 'thumbnail_url': news.get('thumbnail_url')
- }
-
- def get_columns(self):
- return [
- 'position',
- 'title',
- 'url',
- 'source_name',
- 'uploaded',
- 'uploaded_utc',
- 'snippet',
- 'thumbnail_url'
- ]
-
-
-class ShoppingResultsTable(BaseResultsTable):
- results_key = 'shopping_results'
- default_type = 'shopping'
-
- def extract_data(self, shopping):
- return {
- 'position': shopping.get('position'),
- 'title': shopping.get('title'),
- 'url': shopping.get('url')
- }
-
- def get_columns(self):
- return [
- 'position',
- 'title',
- 'url'
- ]
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/README.md b/mindsdb/integrations/handlers/sharepoint_handler/README.md
deleted file mode 100644
index 738cb2e4d89..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/README.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# Sharepoint Handler
-
-Sharepoint handler for MindsDB provides interfaces to connect to Sharepoint via graph APIs and pull data into MindsDB.
-
----
-
-## Table of Contents
-
-- [Sharepoint Handler](#Sharepoint-handler)
- - [Table of Contents](#table-of-contents)
- - [About Sharepoint](#about-sharepoint)
- - [Sharepoint Handler Implementation](#sharepoint-handler-implementation)
- - [Sharepoint Handler Initialization](#sharepoint-handler-initialization)
- - [Implemented Features](#implemented-features)
- - [Some useful definitions](#some-useful-definitions)
- - [TODO](#todo)
- - [Example Usage](#example-usage)
-
----
-
-## About Sharepoint
-
-SharePoint in Microsoft 365 empowers teamwork with dynamic and productive team sites for every project team, department, and division. Share files, data, news, and resources. Customize your site to streamline your teamβs work.
-
-## Sharepoint Handler Implementation
-
-This handler was implemented using the [Microsoft Graph API](https://learn.microsoft.com/en-us/graph/use-the-api) endpoint.
-
-Graph API is a REST API endpoint that provides a simple and easy-to-use interface to access many microsoft tools including Sharepoint.
-
-## Sharepoint Handler Initialization
-
-The Sharepoint handler is initialized with the following parameters:
-
-- `clientId`: (required) Microsoft App client ID
-- `clientSecret`: (required) client secret associated with the App
-- `tenantId`: (required) GUID of the tenant in which the App has been created
-
-## How to get your credentials.
-
-1. Visit Microsoft Entra admin center and register a new App
-2. Go to API permissions and grant all the permissions related to sharepoint sites and resources
-3. Go to Certificates & secrets tab of the app and create a new client secret
-4. Now go to the App overview page where you will find client-ID and tenant-ID of your App.
-
-## Implemented Features
-
-- Fetch sites associated with the account and ability to update the metadata associated with a site (deletion and creation of sites has not been implemented)
-- Fetch lists associated with the account and ability to create more lists, update fields associated with lists and deletion of lists
-- Fetch site columns associated with the account and ability to create more site columns, update fields associated with site columns and deletion of site columns
-- Fetch list items associated with the account and ability to create more list items, update fields associated with list items and deletion of list items
-
-
-## Some useful definitions
-
-### Sites:
-SharePoint-sites are essentially containers for information. The way you store and organize things in SharePoint is by Sites.
-
-### Lists:
-A list is a collection of data that you can share with your team members and people who you've provided access to. You'll find a number of ready-to-use list templates to provide a good starting point for organizing list items.
-
-https://support.microsoft.com/en-us/office/introduction-to-lists-0a1c3ace-def0-44af-b225-cfa8d92c52d7
-
-### Site columns
-A Site Column is a template of a configured column. By creating a Site Column, you can reuse it anywhere else in the site and not have to manually rebuild its configuration at each reuse.
-
-When creating a new column in a list or library, you have a choice to either "Create column" or "Add from existing site columns". Selecting the latter will add a replica of the Site Column to the location you are working.
-
-https://learn.microsoft.com/en-us/microsoft-365/community/what-is-site-column
-
-### List items
-A SharePoint list can be considered as a collection of items. The list items can be a variety of things, such as contacts, calendars, announcements, and issues-tracking.
-
-https://support.microsoft.com/en-us/office/introduction-to-lists-0a1c3ace-def0-44af-b225-cfa8d92c52d7
-
-### Difference between site column and list columns
-The main difference between site column and list columns is the scope of use.
-That is a list column will only be available to that particular list/library, and not outside that boundary.
-If you wish to use that column outside that list/library, you will have to recreate it at the new location.
-
-Site columns on the other hand, are created at the site level, and available to reuse from the site they're created in (as the starting point).
-
-https://learn.microsoft.com/en-us/microsoft-365/community/list-column-or-site-column-which-one-to-choose
-
-## TODO
-
-- Update and delete a site which is functionality that is not yet supported by Graph API
-- Add other tables like list columns and other components that are part of a sharepoint site
-- Replace the REST calls with an SDK. Currently, the Microsoft Graph SDK is under development/preview mode. In the future, we should replace the REST calls with library's methods.
-
-## Example Usage
-```
-CREATE DATABASE sharepoint_test
-With
- ENGINE = 'sharepoint',
- PARAMETERS = {
- "clientId":"YOUR_CLIENT_ID",
- "clientSecret":"YOUR_CLIENT_SECRET",
- "tenantId":"YOUR_TENANT_ID"
- };
-```
-
-After setting up the Sharepoint Handler, you can use SQL queries to fetch data from Sharepoint
-and perform CRUD operations on it:
-
-Example shows how to fetch all the lists associated with the account:
-```sql
-SELECT *
-FROM sharepoint_test.lists
-```
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/__about__.py b/mindsdb/integrations/handlers/sharepoint_handler/__about__.py
deleted file mode 100644
index 49537068f23..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Sharepoint handler"
-__package_name__ = "mindsdb_sharepoint_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Sharepoint"
-__author__ = "Abhijit Pal"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/__init__.py b/mindsdb/integrations/handlers/sharepoint_handler/__init__.py
deleted file mode 100644
index 8854ce77ae7..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .sharepoint_handler import SharepointHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Microsoft Sharepoint"
-name = "sharepoint"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/icon.svg b/mindsdb/integrations/handlers/sharepoint_handler/icon.svg
deleted file mode 100644
index f024ce4a95d..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/icon.svg
+++ /dev/null
@@ -1,23 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_api.py b/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_api.py
deleted file mode 100644
index 4651b4e8db0..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_api.py
+++ /dev/null
@@ -1,579 +0,0 @@
-import ast
-from datetime import datetime, timezone
-from typing import Text, List, Dict, Any
-
-from mindsdb.integrations.handlers.sharepoint_handler.utils import (
- bearer_token_request,
- get_an_entity,
- delete_an_entity,
- update_an_entity,
- create_an_entity,
-)
-
-
-class SharepointAPI:
- def __init__(
- self, client_id: str = None, client_secret: str = None, tenant_id: str = None
- ):
- self.client_id = client_id
- self.client_secret = client_secret
- self.tenant_id = tenant_id
- self.bearer_token = None
- self.is_connected = False
- self.expiration_time = datetime.now(timezone.utc).timestamp()
-
- def get_bearer_token(self) -> None:
- """
- Generates new bearer token for the credentials
-
- Returns
- None
- """
- response = bearer_token_request(
- client_id=self.client_id,
- tenant_id=self.tenant_id,
- client_secret=self.client_secret,
- )
- self.bearer_token = response["access_token"]
- self.expiration_time = int(response["expires_on"])
- self.is_connected = True
-
- def check_bearer_token_validity(self) -> bool:
- """
- Provides information whether a valid bearer token is available or not. Returns true if available
- otherwise false
-
- Returns
- bool
- """
- if (
- self.is_connected
- and datetime.now(timezone.utc).astimezone().timestamp()
- < self.expiration_time
- ):
- return True
- else:
- return False
-
- def disconnect(self) -> None:
- """
- Removes bearer token from the sharepoint API class (makes it null)
-
- Returns
- None
- """
- self.bearer_token = None
- self.is_connected = False
-
- def get_all_sites(self, limit: int = None) -> List[Dict[Text, Any]]:
- """
- Gets all sites associated with the account
-
- limit: limits the number of site information to be returned
-
- Returns
- response: metadata information corresponding to all sites
- """
- url = "https://graph.microsoft.com/v1.0/sites?search=*"
- response = get_an_entity(url=url, bearer_token=self.bearer_token)
- if limit:
- response = response[:limit]
- return response
-
- def update_sites(
- self, site_dict: List[Dict[Text, Text]], values_to_update: Dict[Text, Any]
- ) -> None:
- """
- Updates the given sites (site_dict) with the provided values (values_to_update)
- Calls the function update_a_site for every site
- site_dict: A dictionary containing site ids of the sites which are to be updated
- values_to_update: a dictionary which will be used to update the fields of the sites
-
- Returns
- None
- """
- for site_entry in site_dict:
- self.update_a_site(
- site_id=site_entry["siteId"],
- values_to_update=values_to_update,
- )
-
- def update_a_site(self, site_id: str, values_to_update: Dict[Text, Any]) -> None:
- """
- Updates a site with given values
- site_id: GUID of the site
- values_to_update: a dictionary values which will be used to update the properties of the site
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/"
- update_an_entity(
- url=url, values_to_update=values_to_update, bearer_token=self.bearer_token
- )
-
- def get_lists_by_site(
- self, site_id: str, limit: int = None
- ) -> List[Dict[Text, Any]]:
- """
- Gets lists' information corresponding to a site
-
- site_id: GUID of a site
- limit: limits the number of lists for which information is returned
-
- Returns
- response: metadata information/ fields corresponding to lists of the site
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists"
- response = get_an_entity(url=url, bearer_token=self.bearer_token)
- if limit:
- response = response[:limit]
- return response
-
- def get_all_lists(self, limit: int = None) -> List[Dict[Text, Any]]:
- """
- Gets all the lists' information assocaited with the account
-
- limit: puts a limit to the number of lists returned
-
- Returns
- response: returns metadata information regarding all the lists that have been made using that account
- """
- sites = self.get_all_sites()
- lists = []
- for site in sites:
- for list_dict in self.get_lists_by_site(site_id=site["id"].split(",")[1]):
- list_dict["siteName"] = site["name"]
- list_dict["siteId"] = site["id"].split(",")[1]
- lists.append(list_dict)
- if limit:
- lists = lists[:limit]
- return lists
-
- def delete_lists(self, list_dict: List[Dict[Text, Any]]) -> None:
- """
- Deletes lists for the given site ID and list ID
-
- list_dict: a dictionary values containing the list IDs which are to be deleted and
- their corresponding site IDs
-
- Returns
- None
- """
- for list_entry in list_dict:
- self.delete_a_list(site_id=list_entry["siteId"], list_id=list_entry["id"])
-
- def delete_a_list(self, site_id: str, list_id: str) -> None:
- """
- Deletes a list, given its list ID and its site ID
-
- site_id: GUID of the site in which the list is present
- list_id: GUID of the list which is to be deleted
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}"
- delete_an_entity(url=url, bearer_token=self.bearer_token)
-
- def update_lists(
- self, list_dict: List[Dict[Text, Text]], values_to_update: Dict[Text, Any]
- ) -> None:
- """
- Updates the given lists (list_dict) with the provided values (values_to_update)
- Calls the function update_a_list for every list
- list_dict: A dictionary containing ids of the list which are to be updated and also their site IDs
- values_to_update: a dictionary which will be used to update the fields of the lists
-
- Returns
- None
- """
- for list_entry in list_dict:
- self.update_a_list(
- site_id=list_entry["siteId"],
- list_id=list_entry["id"],
- values_to_update=values_to_update,
- )
-
- def update_a_list(
- self, site_id: str, list_id: str, values_to_update: Dict[Text, Any]
- ) -> None:
- """
- Updates a list with given values
- list_id: GUID of the list
- site_id: GUID of the site in which the list is present
- values_to_update: a dictionary values which will be used to update the properties of the list
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/"
- update_an_entity(
- url=url, bearer_token=self.bearer_token, values_to_update=values_to_update
- )
-
- def create_lists(self, data: List[Dict[Text, Any]]) -> None:
- """
- Creates lists with the information provided in the data parameter
- calls create_a_list for each entry of list metadata dictionary
-
- data: parameter which contains information such as the site IDs where the lists would be created
- and their metadata information which will be used to create them
-
- Returns
- None
- """
- for entry in data:
- self.create_a_list(
- site_id=entry["siteId"],
- column=entry.get("column"),
- display_name=entry["displayName"],
- list_template=entry["list"],
- )
-
- def create_a_list(
- self, site_id: str, list_template: str, display_name: str, column: str = None
- ) -> None:
- """
- Creates a list with metadata information provided in the params
-
- site_id: GUID of the site where the list is to be created
- list_template: a string which contains the list template information (type of list)
- eg.- "{'template': 'documentLibrary'}"
- display_name: the display name of the given list, which will be displayed in the site
- column: specifies the list of columns that should be created for the list
- eg.- "[{'name': 'Author', 'text': { }},{'name': 'PageCount', 'number': { }}]"
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/"
- payload = {}
- if column:
- column = ast.literal_eval(column)
- payload["column"] = column
- payload["displayName"] = display_name
- payload["list"] = ast.literal_eval(list_template)
- create_an_entity(url=url, payload=payload, bearer_token=self.bearer_token)
-
- def get_site_columns_by_site(
- self, site_id: str, limit: int = None
- ) -> List[Dict[Text, Any]]:
- """
- Gets columns' information corresponding to a site
-
- site_id: GUID of a site
- limit: limits the number of columns for which information is returned
-
- Returns
- response: metadata information/ fields corresponding to columns of the site
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/columns/"
- response = get_an_entity(url=url, bearer_token=self.bearer_token)
- if limit:
- response = response[:limit]
- return response
-
- def get_all_site_columns(self, limit: int = None) -> List[Dict[Text, Any]]:
- """
- Gets all the columns' information associated with the account
-
- limit: puts a limit to the number of columns returned
-
- Returns
- response: returns metadata information regarding all the columns that have been made using that account
- """
- sites = self.get_all_sites()
- site_columns = []
- for site in sites:
- for site_column_dict in self.get_site_columns_by_site(
- site_id=site["id"].split(",")[1]
- ):
- site_column_dict["siteName"] = site["name"]
- site_column_dict["siteId"] = site["id"].split(",")[1]
- site_columns.append(site_column_dict)
- if limit:
- site_columns = site_columns[:limit]
- return site_columns
-
- def update_site_columns(
- self,
- site_column_dict: List[Dict[Text, Text]],
- values_to_update: Dict[Text, Any],
- ) -> None:
- """
- Updates the given columns (site_column_dict) with the provided values (values_to_update)
- Calls the function update_a_site_column for every column
-
- site_column_dict: A dictionary containing ids of the column which are to be updated and
- also their site IDs
- values_to_update: a dictionary which will be used to update the fields of the columns
-
- Returns
- None
- """
- for site_column_entry in site_column_dict:
- self.update_a_site_column(
- site_id=site_column_entry["siteId"],
- column_id=site_column_entry["id"],
- values_to_update=values_to_update,
- )
-
- def update_a_site_column(
- self, site_id: str, column_id: str, values_to_update: Dict[Text, Any]
- ):
- """
- Updates a column with given values
-
- column_id: GUID of the column
- site_id: GUID of the site in which the column is present
- values_to_update: a dictionary values which will be used to update the properties of the column
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/columns/{column_id}"
- update_an_entity(
- url=url, values_to_update=values_to_update, bearer_token=self.bearer_token
- )
-
- def delete_site_columns(self, column_dict: List[Dict[Text, Any]]) -> None:
- """
- Deletes columns for the given site ID and column ID
-
- column_dict: a dictionary values containing the column IDs which are to be deleted and
- their corresponding site IDs
-
- Returns
- None
- """
- for column_entry in column_dict:
- self.delete_a_site_columns(
- site_id=column_entry["siteId"], column_id=column_entry["id"]
- )
-
- def delete_a_site_columns(self, site_id: str, column_id: str) -> None:
- """
- Deletes a column, given its column ID and its site ID
-
- site_id: GUID of the site in which the column is present
- column_id: GUID of the column which is to be deleted
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/columns/{column_id}"
- delete_an_entity(url=url, bearer_token=self.bearer_token)
-
- def create_site_columns(self, data: List[Dict[Text, Any]]) -> None:
- """
- Creates columns with the information provided in the data parameter
- calls create_a_site_column for each entry of column metadata dictionary
-
- data: parameter which contains information such as the site IDs where the columns would be created
- and their metadata information which will be used to create them
-
- Returns
- None
- """
- for entry in data:
- self.create_a_site_column(
- site_id=entry["siteId"],
- enforce_unique_values=entry.get("enforceUniqueValues"),
- hidden=entry.get("hidden"),
- indexed=entry.get("indexed"),
- name=entry["name"],
- text=entry.get("text"),
- )
-
- def create_a_site_column(
- self,
- site_id: str,
- enforce_unique_values: bool,
- hidden: bool,
- indexed: bool,
- name: str,
- text: str = None,
- ) -> None:
- """
- Creates a list with metadata information provided in the params
-
- site_id: GUID of the site where the column is to be created
- enforced_unique_values: if true, no two list items may have the same value for this column
- hidden: specifies whether the column is displayed in the user interface
- name: the API-facing name of the column as it appears in the fields on a listItem.
- text: details regarding the text values in the column
-
- Returns
- None
- """
-
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/columns/"
- payload = {}
- if text:
- text = ast.literal_eval(text)
- payload["text"] = text
- payload["name"] = name
- if enforce_unique_values is not None:
- payload["enforceUniqueValues"] = enforce_unique_values
- if hidden is not None:
- payload["hidden"] = hidden
- if indexed is not None:
- payload["indexed"] = indexed
- create_an_entity(url=url, payload=payload, bearer_token=self.bearer_token)
-
- def get_items_by_sites_and_lists(
- self, site_id: str, list_id: str, limit: int = None
- ) -> List[Dict[Text, Any]]:
- """
- Gets items' information corresponding to a site and a list
-
- site_id: GUID of a site
- list_id: GUID of a list
- limit: limits the number of columns for which information is returned
-
- Returns
- response: metadata information/ fields corresponding to list-items of the site
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items?expand=fields&select=*"
- response = get_an_entity(url=url, bearer_token=self.bearer_token)
- if limit:
- response = response[:limit]
- return response
-
- def get_all_items(self, limit: int = None) -> List[Dict[Text, Any]]:
- """
- Gets all the items' information associated with the account
-
- limit: puts a limit to the number of items returned
-
- Returns
- response: returns metadata information regarding all the items that are associated with that account
- """
- sites = self.get_all_sites()
- items = []
- for site in sites:
- site_id = site["id"].split(",")[1]
- for sharepoint_list in self.get_lists_by_site(site_id=site_id):
- for item_dict in self.get_items_by_sites_and_lists(
- site_id=site["id"].split(",")[1], list_id=sharepoint_list["id"]
- ):
- item_dict["siteName"] = site["name"]
- item_dict["siteId"] = site["id"].split(",")[1]
- item_dict["listId"] = sharepoint_list["id"]
- item_dict["list_name"] = sharepoint_list["displayName"]
- items.append(item_dict)
- if limit:
- items = items[:limit]
- return items
-
- def update_items(
- self, item_dict: List[Dict[Text, Text]], values_to_update: Dict[Text, Any]
- ) -> None:
- """
- Updates the given items (item_dict) with the provided values (values_to_update)
- Calls the function update_a_item for every column
-
- item_dict: A dictionary containing ids of the list-items which are to be updated and
- also their site IDs
- values_to_update: a dictionary which will be used to update the fields of the items
-
- Returns
- None
- """
- for item_entry in item_dict:
- self.update_an_item(
- site_id=item_entry["siteId"],
- list_id=item_entry["listId"],
- item_id=item_entry["id"],
- values_to_update=values_to_update,
- )
-
- def update_an_item(
- self,
- site_id: str,
- list_id: str,
- item_id: str,
- values_to_update: Dict[Text, Any],
- ):
- """
- Updates an item with given values
-
- item_id: GUID of the column
- list_id: GUID of the list
- site_id: GUID of the site in which the list is present
- values_to_update: a dictionary values which will be used to update the properties of the list-item
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items/{item_id}"
- update_an_entity(
- url=url, values_to_update=values_to_update, bearer_token=self.bearer_token
- )
-
- def delete_items(self, item_dict: List[Dict[Text, Any]]) -> None:
- """
- Deletes items for the given site ID and list ID
-
- item_dict: a dictionary values containing the item IDs which are to be deleted,
- their corresponding site IDs and their list IDs
-
- Returns
- None
- """
- for item_entry in item_dict:
- self.delete_an_item(
- site_id=item_entry["siteId"],
- list_id=item_entry["listId"],
- item_id=item_entry["id"],
- )
-
- def delete_an_item(self, site_id: str, list_id: str, item_id: str) -> None:
- """
- Deletes an item, given its item ID, its site ID and its list ID
-
- list_id: GUID of the list in which the site is present
- site_id: GUID of the site in which the list is present
- item_id: GUID of the item which is to be deleted
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items/{item_id}"
- delete_an_entity(url=url, bearer_token=self.bearer_token)
-
- def create_items(self, data: List[Dict[Text, Any]]) -> None:
- """
- Creates items with the information provided in the data parameter
- calls create_an_item for each entry of item metadata dictionary
-
- data: parameter which contains information such as the site IDs and list IDs where the items
- would be created and their metadata information which will be used to create them
-
- Returns
- None
- """
- for entry in data:
- self.create_an_item(
- site_id=entry["siteId"],
- list_id=entry["listId"],
- fields=entry.get("fields"),
- )
-
- def create_an_item(self, site_id: str, list_id: str, fields: str) -> None:
- """
- Creates an item with metadata information provided in the params
-
- site_id: GUID of the site where the list id present
- list_id: GUID of the list where the item is to be created
- fields: The values of the columns set on this list item.
-
- Returns
- None
- """
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items/"
- payload = {}
- if fields:
- payload["fields"] = ast.literal_eval(fields)
- create_an_entity(url=url, payload=payload, bearer_token=self.bearer_token)
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_handler.py b/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_handler.py
deleted file mode 100644
index 1672cde8f3d..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_handler.py
+++ /dev/null
@@ -1,147 +0,0 @@
-from mindsdb_sql_parser import parse_sql
-
-from mindsdb.integrations.handlers.sharepoint_handler.sharepoint_api import (
- SharepointAPI,
-)
-from mindsdb.integrations.handlers.sharepoint_handler.sharepoint_tables import (
- ListsTable,
- SitesTable,
- ListItemsTable,
- SiteColumnsTable,
-)
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-from mindsdb.utilities import log
-from collections import OrderedDict
-
-logger = log.getLogger(__name__)
-
-
-class SharepointHandler(APIHandler):
- """
- The Sharepoint handler implementation.
- """
-
- name = "sharepoint"
-
- def __init__(self, name: str, **kwargs):
- """
- Initialize the handler.
- Args:
- name (str): name of particular handler instance
- **kwargs: arbitrary keyword arguments.
- """
- super().__init__(name)
-
- connection_data = kwargs.get("connection_data", {})
- self.connection_data = connection_data
- self.kwargs = kwargs
-
- if not (
- self.connection_data["clientId"]
- and (
- self.connection_data["tenantId"]
- and self.connection_data["clientSecret"]
- )
- ):
- raise Exception(
- "client params and tenant id is required for Sharepoint connection!"
- )
-
- self.connection = None
- self.is_connected = False
- self._client = None
- lists_data = ListsTable(self)
- self._register_table("lists", lists_data)
-
- sites_data = SitesTable(self)
- self._register_table("sites", sites_data)
-
- site_columns_data = SiteColumnsTable(self)
- self._register_table("siteColumns", site_columns_data)
-
- list_items_data = ListItemsTable(self)
- self._register_table("listItems", list_items_data)
-
- def connect(self):
- """
- Set up the context connection required by the handler.
- Returns
- -------
- StatusResponse
- connection object
- """
- if self.is_connected is True:
- return self.connection
- self.connection = SharepointAPI(
- tenant_id=self.connection_data["tenantId"],
- client_id=self.connection_data["clientId"],
- client_secret=self.connection_data["clientSecret"],
- )
- self.connection.get_bearer_token()
- self.is_connected = True
- return self.connection
-
- def check_connection(self) -> StatusResponse:
- """
- Check connection to the handler.
- Returns:
- HandlerStatusResponse
- """
-
- response = StatusResponse(False)
-
- try:
- connection = self.connect()
- response.success = connection.check_bearer_token_validity()
- except Exception as e:
- logger.error("Error connecting to Sharepoint! " + str(e))
- response.error_message = str(e)
-
- self.is_connected = response.success
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
- Parameters
- ----------
- query : str
- query in a native format
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
-
-
-connection_args = OrderedDict(
- clientId={
- "type": ARG_TYPE.STR,
- "description": "Client Id of the App",
- "required": True,
- "label": "Client ID",
- },
- clientSecret={
- "type": ARG_TYPE.PWD,
- "description": "Client Secret of the App",
- "required": True,
- "label": "Client Secret",
- },
- tenantId={
- "type": ARG_TYPE.STR,
- "description": "Tenant Id of the tenant of the App",
- "required": True,
- "label": "Tenant ID",
- },
-)
-
-connection_args_example = OrderedDict(
- clientId="xxxx-xxxx-xxxx-xxxx",
- clientSecret="",
- tenantId="xxxx-xxxx-xxxx-xxxx",
-)
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_tables.py b/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_tables.py
deleted file mode 100644
index 04e9a0804a5..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_tables.py
+++ /dev/null
@@ -1,581 +0,0 @@
-import pandas as pd
-from typing import Text, List, Dict, Any
-
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.libs.api_handler import APITable
-
-from mindsdb.integrations.utilities.handlers.query_utilities.select_query_utilities import (
- SELECTQueryParser,
- SELECTQueryExecutor,
-)
-from mindsdb.integrations.utilities.handlers.query_utilities.delete_query_utilities import (
- DELETEQueryParser,
- DELETEQueryExecutor,
-)
-
-from mindsdb.integrations.utilities.handlers.query_utilities.update_query_utilities import (
- UPDATEQueryParser,
- UPDATEQueryExecutor,
-)
-
-from mindsdb.integrations.utilities.handlers.query_utilities import INSERTQueryParser
-
-
-class SitesTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Sharepoint Sites data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Sharepoint Sites matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- select_statement_parser = SELECTQueryParser(query, "sites", self.get_columns())
- (
- selected_columns,
- where_conditions,
- order_by_conditions,
- result_limit,
- ) = select_statement_parser.parse_query()
-
- sites_df = pd.json_normalize(self.get_sites(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- sites_df, selected_columns, where_conditions, order_by_conditions
- )
- sites_df = select_statement_executor.execute_query()
-
- return sites_df
-
- def update(self, query: ast.Update) -> None:
- """Updates data in the Sharepoint "PUT /lists" API endpoint.
-
- Parameters
- ----------
- query : ast.Update
- Given SQL UPDATE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- update_statement_parser = UPDATEQueryParser(query)
- values_to_update, where_conditions = update_statement_parser.parse_query()
-
- sites_df = pd.json_normalize(self.get_sites())
-
- update_query_executor = UPDATEQueryExecutor(sites_df, where_conditions)
-
- sites_df = update_query_executor.execute_query()
-
- sites_ids = sites_df[["siteId"]].to_dict(orient="records")
-
- self.update_sites(sites_ids, values_to_update)
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_sites(limit=1)).columns.tolist()
-
- def get_sites(self, **kwargs) -> List[Dict]:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- site_data = client.get_all_sites(**kwargs)
- return site_data
-
- def update_sites(self, site_ids: List[dict], values_to_update: dict) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.update_sites(site_ids, values_to_update)
-
-
-class ListsTable(APITable):
- def insert(self, query: ast.Insert) -> None:
- """Inserts data into the Sharepoint "POST /lists" API endpoint.
-
- Parameters
- ----------
- query : ast.Insert
- Given SQL INSERT query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- insert_statement_parser = INSERTQueryParser(
- query,
- supported_columns=["displayName", "columns", "list", "siteId"],
- mandatory_columns=["displayName", "list", "siteId"],
- all_mandatory=False,
- )
- lists_data = insert_statement_parser.parse_query()
- self.create_lists(lists_data)
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Sharepoint lists data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Sharepoint lists matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- select_statement_parser = SELECTQueryParser(query, "lists", self.get_columns())
- (
- selected_columns,
- where_conditions,
- order_by_conditions,
- result_limit,
- ) = select_statement_parser.parse_query()
-
- lists_df = pd.json_normalize(self.get_lists(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- lists_df, selected_columns, where_conditions, order_by_conditions
- )
- lists_df = select_statement_executor.execute_query()
-
- return lists_df
-
- def update(self, query: ast.Update) -> None:
- """Updates data in the Sharepoint "PUT /lists" API endpoint.
-
- Parameters
- ----------
- query : ast.Update
- Given SQL UPDATE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- update_statement_parser = UPDATEQueryParser(query)
- values_to_update, where_conditions = update_statement_parser.parse_query()
-
- lists_df = pd.json_normalize(self.get_lists())
-
- update_query_executor = UPDATEQueryExecutor(lists_df, where_conditions)
-
- lists_df = update_query_executor.execute_query()
-
- list_ids = lists_df[["id", "siteId"]].to_dict(orient="records")
-
- self.update_lists(list_ids, values_to_update)
-
- def delete(self, query: ast.Delete) -> None:
- """
- Deletes data from the Sharepoint "DELETE /lists" API endpoint.
-
- Parameters
- ----------
- query : ast.Delete
- Given SQL DELETE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- delete_statement_parser = DELETEQueryParser(query)
- where_conditions = delete_statement_parser.parse_query()
-
- lists_df = pd.json_normalize(self.get_lists())
-
- delete_query_executor = DELETEQueryExecutor(lists_df, where_conditions)
-
- lists_df = delete_query_executor.execute_query()
-
- list_ids = lists_df[["id", "siteId"]].to_dict(orient="records")
- self.delete_lists(list_ids)
-
- def get_lists(self, **kwargs) -> List[Dict]:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- lists_data = client.get_all_lists(**kwargs)
- return lists_data
-
- def delete_lists(self, list_ids: List[dict]) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.delete_lists(list_ids)
-
- def update_lists(self, list_ids: List[dict], values_to_update: dict) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.update_lists(list_ids, values_to_update)
-
- def create_lists(self, lists_data: List[Dict[Text, Any]]) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.create_lists(data=lists_data)
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_lists(limit=1)).columns.tolist()
-
-
-class SiteColumnsTable(APITable):
- def insert(self, query: ast.Insert) -> None:
- """Inserts data into the Sharepoint "POST /columns" API endpoint.
-
- Parameters
- ----------
- query : ast.Insert
- Given SQL INSERT query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- insert_statement_parser = INSERTQueryParser(
- query,
- supported_columns=[
- "text",
- "name",
- "indexed",
- "enforceUniqueValues",
- "hidden",
- "siteId",
- ],
- mandatory_columns=["name", "siteId"],
- all_mandatory=False,
- )
- site_columns_data = insert_statement_parser.parse_query()
- self.create_site_columns(site_columns_data)
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Sharepoint columns data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Sharepoint Columns matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- select_statement_parser = SELECTQueryParser(
- query, "siteColumns", self.get_columns()
- )
- (
- selected_columns,
- where_conditions,
- order_by_conditions,
- result_limit,
- ) = select_statement_parser.parse_query()
-
- site_columns_df = pd.json_normalize(self.get_site_columns(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- site_columns_df,
- selected_columns,
- where_conditions,
- order_by_conditions,
- )
- site_columns_df = select_statement_executor.execute_query()
-
- return site_columns_df
-
- def update(self, query: ast.Update) -> None:
- """Updates data in the Sharepoint "PUT /columns" API endpoint.
-
- Parameters
- ----------
- query : ast.Update
- Given SQL UPDATE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- update_statement_parser = UPDATEQueryParser(query)
- values_to_update, where_conditions = update_statement_parser.parse_query()
-
- site_columns_df = pd.json_normalize(self.get_site_columns())
-
- update_query_executor = UPDATEQueryExecutor(site_columns_df, where_conditions)
-
- site_columns_df = update_query_executor.execute_query()
-
- site_columns_ids = site_columns_df[["id", "siteId"]].to_dict(orient="records")
-
- self.update_site_columns(site_columns_ids, values_to_update)
-
- def delete(self, query: ast.Delete) -> None:
- """
- Deletes data from the Sharepoint "DELETE /columns" API endpoint.
-
- Parameters
- ----------
- query : ast.Delete
- Given SQL DELETE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- delete_statement_parser = DELETEQueryParser(query)
- where_conditions = delete_statement_parser.parse_query()
-
- site_columns_df = pd.json_normalize(self.get_site_columns())
-
- delete_query_executor = DELETEQueryExecutor(site_columns_df, where_conditions)
-
- site_columns_df = delete_query_executor.execute_query()
-
- site_columns_ids = site_columns_df[["id", "siteId"]].to_dict(orient="records")
- self.delete_site_columns(site_columns_ids)
-
- def get_site_columns(self, **kwargs) -> List[Dict]:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- site_columns_data = client.get_all_site_columns(**kwargs)
- return site_columns_data
-
- def delete_site_columns(self, sharepoint_column_ids: List[dict]) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.delete_site_columns(sharepoint_column_ids)
-
- def update_site_columns(
- self, sharepoint_column_ids: List[dict], values_to_update: dict
- ) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.update_site_columns(sharepoint_column_ids, values_to_update)
-
- def create_site_columns(
- self, sharepoint_column_data: List[Dict[Text, Any]]
- ) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.create_site_columns(data=sharepoint_column_data)
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_site_columns(limit=1)).columns.tolist()
-
-
-class ListItemsTable(APITable):
- def insert(self, query: ast.Insert) -> None:
- """Inserts data into the Sharepoint "POST /items" API endpoint.
-
- Parameters
- ----------
- query : ast.Insert
- Given SQL INSERT query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- insert_statement_parser = INSERTQueryParser(
- query,
- supported_columns=["fields", "listId", "siteId"],
- mandatory_columns=["listId", "siteId"],
- all_mandatory=False,
- )
- list_items_data = insert_statement_parser.parse_query()
- self.create_list_items(list_items_data)
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Sharepoint items data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Sharepoint items matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- select_statement_parser = SELECTQueryParser(
- query, "listItems", self.get_columns()
- )
- (
- selected_columns,
- where_conditions,
- order_by_conditions,
- result_limit,
- ) = select_statement_parser.parse_query()
-
- list_items_df = pd.json_normalize(self.get_list_items(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- list_items_df, selected_columns, where_conditions, order_by_conditions
- )
- list_items_df = select_statement_executor.execute_query()
-
- return list_items_df
-
- def update(self, query: ast.Update) -> None:
- """Updates data in the Sharepoint "PUT /items" API endpoint.
-
- Parameters
- ----------
- query : ast.Update
- Given SQL UPDATE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- update_statement_parser = UPDATEQueryParser(query)
- values_to_update, where_conditions = update_statement_parser.parse_query()
-
- list_items_df = pd.json_normalize(self.get_list_items())
-
- update_query_executor = UPDATEQueryExecutor(list_items_df, where_conditions)
-
- list_items_df = update_query_executor.execute_query()
-
- list_items_ids = list_items_df[["id", "siteId", "listId"]].to_dict(
- orient="records"
- )
-
- self.update_list_items(list_items_ids, values_to_update)
-
- def delete(self, query: ast.Delete) -> None:
- """
- Deletes data from the Sharepoint "DELETE /items" API endpoint.
-
- Parameters
- ----------
- query : ast.Delete
- Given SQL DELETE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- delete_statement_parser = DELETEQueryParser(query)
- where_conditions = delete_statement_parser.parse_query()
-
- list_items_df = pd.json_normalize(self.get_list_items())
-
- delete_query_executor = DELETEQueryExecutor(list_items_df, where_conditions)
-
- list_items_df = delete_query_executor.execute_query()
-
- list_items_ids = list_items_df[["id", "siteId", "listId"]].to_dict(
- orient="records"
- )
- self.delete_list_items(list_items_ids)
-
- def get_list_items(self, **kwargs) -> List[Dict]:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- list_items_data = client.get_all_items(**kwargs)
- return list_items_data
-
- def delete_list_items(self, list_item_ids: List[dict]) -> None:
- if not self.handler.connnection.check_connection():
- self.handler.connect()
- client = self.handler.connection
- client.delete_items(list_item_ids)
-
- def update_list_items(
- self, list_items_ids: List[dict], values_to_update: dict
- ) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.update_items(item_dict=list_items_ids, values_to_update=values_to_update)
-
- def create_list_items(self, list_items_data: List[Dict[Text, Any]]) -> None:
- if not self.handler.connection.check_bearer_token_validity():
- self.handler.connect()
- client = self.handler.connection
- client.create_items(data=list_items_data)
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_list_items(limit=1)).columns.tolist()
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/tests/__init__.py b/mindsdb/integrations/handlers/sharepoint_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/tests/test_sharepoint_handler.py b/mindsdb/integrations/handlers/sharepoint_handler/tests/test_sharepoint_handler.py
deleted file mode 100644
index 4048e5fa08b..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/tests/test_sharepoint_handler.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import os
-import unittest
-from mindsdb.integrations.handlers.sharepoint_handler import Handler as SharepointHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class SharepointHandlerTest(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "clientId": os.environ.get('CLIENT_ID'),
- "clientSecret": os.environ.get('CLIENT_SECRET'),
- "tenantId": os.environ.get('TENANT_ID'),
- }
- }
- cls.handler = SharepointHandler('test_sharepoint_handler', **cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_2_select_sites_query(self):
- query = "SELECT * FROM test_sharepoint_handler.sites"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_3_select_lists_query(self):
- query = "SELECT * FROM test_sharepoint_handler.lists"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_4_select_siteColumns_query(self):
- query = "SELECT * FROM test_sharepoint_handler.siteColumns"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_5_select_listItems_query(self):
- query = "SELECT * FROM test_sharepoint_handler.listItems"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_6_get_columns(self):
- columns = self.handler.get_columns('test_sharepoint_handler.siteColumns')
- assert columns.type is not RESPONSE_TYPE.ERROR
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/sharepoint_handler/utils.py b/mindsdb/integrations/handlers/sharepoint_handler/utils.py
deleted file mode 100644
index 9187581bd49..00000000000
--- a/mindsdb/integrations/handlers/sharepoint_handler/utils.py
+++ /dev/null
@@ -1,163 +0,0 @@
-import json
-from typing import Dict, List, Text, Any
-
-import requests
-from requests import Response
-
-
-def create_an_entity(url: str, payload: Dict[Text, Any], bearer_token: str) -> None:
- """
- Makes a POST request to the given url
- Creates an entity for the given url, bearer token and payload
-
- url: URL to which the get request is made
- bearer_token: authentication token for the request
- payload: a dictionary which provides the metadata information regarding the entity being created
-
- Returns
- None
- """
- payload = json.dumps(payload, indent=2)
- headers = {
- "Authorization": f"Bearer {bearer_token}",
- "Content-Type": "application/json",
- }
- getresponse(
- request_type="POST", url=url, headers=headers, payload=payload, files=[]
- )
-
-
-def get_an_entity(url: str, bearer_token: str) -> Any:
- """
- Makes a GET request to the given url
- Gets the entity for the given url and bearer token
-
- url: URL to which the get request is made
- bearer_token: authentication token for the request
-
- Returns
- Dictionary or list of dictionaries containing information/metadata corresponding to the entities
- """
- payload = {}
- headers = {
- "Authorization": f"Bearer {bearer_token}",
- "Content-Type": "application/json",
- }
- response = getresponse(
- request_type="GET", url=url, headers=headers, payload=payload, files=[]
- )
- response = response.json()["value"]
- return response
-
-
-def update_an_entity(
- url: str, values_to_update: Dict[Text, Any], bearer_token: str
-) -> None:
- """
- Makes a PATCH request to given url with the given values_to_update and bearer_token
- updates the entity with the provided values
-
- url: url provided by the user
- values_to_update: values that would be used to update the entity, would be passed in the payload
- Mostly would be a dictionary mapping fields to values
- bearer_token: authentication token passed in the header to make the request
-
- Returns
- None
- """
- payload = values_to_update
- payload = json.dumps(payload, indent=2)
- headers = {
- "Authorization": f"Bearer {bearer_token}",
- "Content-Type": "application/json",
- }
- getresponse(
- request_type="PATCH", url=url, headers=headers, payload=payload, files=[]
- )
-
-
-def delete_an_entity(url: str, bearer_token: str):
- """
- Makes a DELETE request to the given url
-
- url: url string provided to which the request would be made
- bearer_token: authorization token which will be used to execute the request
-
- Returns
- None
- """
- payload = {}
- headers = {
- "Authorization": f"Bearer {bearer_token}",
- "Content-Type": "application/json",
- }
- getresponse(
- request_type="DELETE", url=url, headers=headers, payload=payload, files=[]
- )
-
-
-def bearer_token_request(tenant_id: str, client_id: str, client_secret: str) -> Any:
- """
- Sends a request to login.microsoftonline.com for the given tenant to generate a bearer token
- which is then used in making graph api call
-
- tenant_id: tenant ID is a globally unique identifier (GUID) for your organization
- that is different from your organization or domain name
- client_id: client ID is a globally unique identifier (GUID) for your app registered in Entra
- client_secret: client secret is the password of the service principal or the app.
-
- Returns
- response: Dictionary containing bearer token and other information
- """
- url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
-
- payload = {
- "client_id": client_id,
- "client_secret": client_secret,
- "redirect_uri": "http://localhost",
- "grant_type": "client_credentials",
- "resource": "https://graph.microsoft.com",
- }
- files = []
- headers = {}
-
- response = getresponse(
- request_type="POST", url=url, headers=headers, payload=payload, files=files
- )
- response = response.json()
- return response
-
-
-def getresponse(
- url: str,
- payload: Dict[Text, Any],
- files: List[Any],
- headers: Dict[Text, Any],
- request_type: str,
-) -> Response:
- """
- Makes a standard HTTP request based on the params provided and returns the response.
- May raise an error if the response code does not indicate success
-
- url: url string provided to which the request would be made
- payload: the payload which supply additional info regarding the request
- files: the files that are needed to be passed in the request
- headers: additional information regarding the request
- may also indicate the type of content passed in the payload
- request_type: the request performs different actions based on the request type
- DELETE/POST/PATCH/GET
-
- Returns
- response: may return based on the response code
- """
- response = requests.request(
- request_type, url, headers=headers, data=payload, files=files
- )
- status_code = response.status_code
-
- if 400 <= status_code <= 499:
- raise Exception("Client error: " + response.text)
-
- if 500 <= status_code <= 599:
- raise Exception("Server error: " + response.text)
- return response
diff --git a/mindsdb/integrations/handlers/sheets_handler/README.md b/mindsdb/integrations/handlers/sheets_handler/README.md
deleted file mode 100644
index 39ec52944c7..00000000000
--- a/mindsdb/integrations/handlers/sheets_handler/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Google Sheets Handler
-
-This is the implementation of the Google Sheets handler for MindsDB.
-
-## Google Sheets
-Google Sheets is a spreadsheet program included as part of the free, web-based Google Docs Editors suite offered by Google.
-https://en.wikipedia.org/wiki/Google_Sheets
-
-## Implementation
-This handler was implemented using `duckdb`, a library that allows SQL queries to be executed on `pandas` DataFrames.
-
-In essence, when querying a particular sheet, the entire sheet will first be pulled into a `pandas` DataFrame using the Google Visualization API. Once this is done, SQL queries can be run on the DataFrame using `duckdb`.
-
-Note: Since the entire sheet needs to be pulled into memory first (DataFrame), it is recommended to be somewhat careful when querying large datasets so as not to overload your machine.
-
-The documentation for the Google Visualization API is available here,
-
-https://developers.google.com/chart/interactive/docs/reference
-
-The required arguments to establish a connection are,
-* `spreadsheet_id`: the unique ID of the Google Sheet.
-* `sheet_name`: the name of the sheet within the Google Sheet.
-
-## Usage
-In order to make use of this handler and connect to a Google Sheet in MindsDB, the following syntax can be used,
-~~~~sql
-CREATE DATABASE sheets_datasource
-WITH
-engine='sheets',
-parameters={
- "spreadsheet_id": "12wgS-1KJ9ymUM-6VYzQ0nJYGitONxay7cMKLnEE2_d0",
- "sheet_name": "iris"
-};
-~~~~
-
-Now, you can use this established connection to query your table as follows,
-~~~~sql
-SELECT * FROM sheets_datasource.example_tbl
-~~~~
-
-and for columns that contain special characters as follows,
-
-~~~~sql
-SELECT `column name` FROM sheets_datasource.example_tbl
-~~~~
-
-The name of the table will be the name of the relevant sheet, provided as an input to the `sheet_name` parameter.
-
-At the moment, only `SELECT` queries are allowed to be executed through `duckdb`. This, however, has no restriction on running machine learning algorithms against your data in Airtable using `CREATE PREDICTOR` statements.
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sheets_handler/__about__.py b/mindsdb/integrations/handlers/sheets_handler/__about__.py
deleted file mode 100644
index 10d096e69c9..00000000000
--- a/mindsdb/integrations/handlers/sheets_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Google Sheets handler'
-__package_name__ = 'mindsdb_sheets_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Google Sheets"
-__author__ = 'Minura Punchihewa'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/sheets_handler/__init__.py b/mindsdb/integrations/handlers/sheets_handler/__init__.py
deleted file mode 100644
index 4532b9b5afa..00000000000
--- a/mindsdb/integrations/handlers/sheets_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .sheets_handler import SheetsHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Google Sheets'
-name = 'sheets'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/sheets_handler/connection_args.py b/mindsdb/integrations/handlers/sheets_handler/connection_args.py
deleted file mode 100644
index 186c0776f7c..00000000000
--- a/mindsdb/integrations/handlers/sheets_handler/connection_args.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- spreadsheet_id={
- 'type': ARG_TYPE.STR,
- 'description': 'The unique ID of the Google Sheet.'
- },
- sheet_name={
- 'type': ARG_TYPE.STR,
- 'description': 'The name of the sheet within the Google Sheet.'
- }
-)
-
-connection_args_example = OrderedDict(
- spreadsheet_id='12wgS-1KJ9ymUM-6VYzQ0nJYGitONxay7cMKLnEE2_d0',
- sheet_name='iris'
-)
diff --git a/mindsdb/integrations/handlers/sheets_handler/icon.svg b/mindsdb/integrations/handlers/sheets_handler/icon.svg
deleted file mode 100644
index 8b4ba3c3c9b..00000000000
--- a/mindsdb/integrations/handlers/sheets_handler/icon.svg
+++ /dev/null
@@ -1,55 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sheets_handler/sheets_handler.py b/mindsdb/integrations/handlers/sheets_handler/sheets_handler.py
deleted file mode 100644
index 06494f9a367..00000000000
--- a/mindsdb/integrations/handlers/sheets_handler/sheets_handler.py
+++ /dev/null
@@ -1,441 +0,0 @@
-from typing import Optional
-
-import pandas as pd
-from pandas.api import types as pd_types
-import duckdb
-
-from mindsdb_sql_parser import parse_sql
-from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb.utilities import log
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE,
-)
-from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
-
-
-logger = log.getLogger(__name__)
-
-
-def _pandas_dtype_to_sql_type(dtype) -> str:
- """Convert pandas dtype to SQL data type string.
-
- Args:
- dtype: pandas dtype object
-
- Returns:
- str: SQL data type string (VARCHAR, INTEGER, DECIMAL, etc.)
- """
- # Handle string dtypes
- if pd_types.is_string_dtype(dtype):
- return "VARCHAR"
-
- # Handle integer dtypes
- if pd_types.is_integer_dtype(dtype):
- return "INTEGER"
-
- # Handle float/numeric dtypes
- if pd_types.is_float_dtype(dtype) or pd_types.is_numeric_dtype(dtype):
- return "DECIMAL"
-
- # Handle boolean dtypes
- if pd_types.is_bool_dtype(dtype):
- return "BOOLEAN"
-
- # Handle datetime dtypes
- if pd_types.is_datetime64_any_dtype(dtype):
- return "DATETIME"
-
- # Handle date dtypes
- if pd_types.is_date_dtype(dtype):
- return "DATE"
-
- # Default to VARCHAR for object and unknown types
- return "VARCHAR"
-
-
-def _infer_data_type(value) -> str:
- """Infer SQL data type from Python value.
-
- Args:
- value: Python value to infer type from
-
- Returns:
- str: SQL data type string (VARCHAR, INTEGER, DECIMAL, etc.)
- """
- if value is None:
- return "VARCHAR"
- elif isinstance(value, bool):
- return "BOOLEAN"
- elif isinstance(value, int):
- return "INTEGER"
- elif isinstance(value, float):
- return "DECIMAL"
- elif isinstance(value, str):
- # Check if it looks like a timestamp
- if "T" in value and ("Z" in value or "+" in value or "-" in value[-6:]):
- return "TIMESTAMP"
- # Check if it looks like a date
- try:
- pd.to_datetime(value)
- if len(value) == 10: # Just date, no time
- return "DATE"
- return "DATETIME"
- except (ValueError, TypeError):
- pass
- return "VARCHAR"
- elif pd_types.is_datetime64_any_dtype(type(value)) or isinstance(value, pd.Timestamp):
- return "DATETIME"
- else:
- return "VARCHAR"
-
-
-def _infer_data_type_from_samples(values) -> str:
- """Infer data type from multiple sample values for better accuracy.
-
- Args:
- values: List of sample values from a column
-
- Returns:
- str: SQL data type string
- """
- non_null_values = [v for v in values if v is not None and pd.notna(v)]
-
- if not non_null_values:
- return "VARCHAR"
-
- # Analyze types across all samples
- type_counts = {}
- for value in non_null_values:
- inferred_type = _infer_data_type(value)
- type_counts[inferred_type] = type_counts.get(inferred_type, 0) + 1
-
- # Return the most common type
- if type_counts:
- return max(type_counts.items(), key=lambda x: x[1])[0]
-
- return "VARCHAR"
-
-
-def _map_type(data_type: str) -> MYSQL_DATA_TYPE:
- """Map SQL data types to MySQL types.
-
- Args:
- data_type (str): The SQL data type name
-
- Returns:
- MYSQL_DATA_TYPE: The corresponding MySQL data type
- """
- if data_type is None:
- return MYSQL_DATA_TYPE.VARCHAR
-
- data_type_upper = data_type.upper()
-
- type_map = {
- "VARCHAR": MYSQL_DATA_TYPE.VARCHAR,
- "TEXT": MYSQL_DATA_TYPE.TEXT,
- "INTEGER": MYSQL_DATA_TYPE.INT,
- "INT": MYSQL_DATA_TYPE.INT,
- "BIGINT": MYSQL_DATA_TYPE.BIGINT,
- "DECIMAL": MYSQL_DATA_TYPE.DECIMAL,
- "FLOAT": MYSQL_DATA_TYPE.FLOAT,
- "DOUBLE": MYSQL_DATA_TYPE.DOUBLE,
- "BOOLEAN": MYSQL_DATA_TYPE.BOOL,
- "BOOL": MYSQL_DATA_TYPE.BOOL,
- "DATE": MYSQL_DATA_TYPE.DATE,
- "DATETIME": MYSQL_DATA_TYPE.DATETIME,
- "TIMESTAMP": MYSQL_DATA_TYPE.DATETIME,
- "TIME": MYSQL_DATA_TYPE.TIME,
- }
-
- return type_map.get(data_type_upper, MYSQL_DATA_TYPE.VARCHAR)
-
-
-def _cast_column_to_type(series: pd.Series, sql_type: str) -> pd.Series:
- """Cast a pandas Series to the appropriate type based on SQL data type.
-
- Args:
- series: pandas Series to cast
- sql_type: SQL data type string (VARCHAR, INTEGER, etc.)
-
- Returns:
- pandas Series with appropriate dtype
- """
- sql_type_upper = sql_type.upper() if sql_type else "VARCHAR"
-
- try:
- if sql_type_upper in ("INTEGER", "INT", "BIGINT"):
- # Try to convert to integer, handling NaN values
- return pd.to_numeric(series, errors="coerce").astype("Int64")
- elif sql_type_upper in ("DECIMAL", "FLOAT", "DOUBLE"):
- # Convert to float
- return pd.to_numeric(series, errors="coerce").astype("float64")
- elif sql_type_upper in ("BOOLEAN", "BOOL"):
- # Convert to boolean
- return series.astype("boolean")
- elif sql_type_upper in ("DATE", "DATETIME", "TIMESTAMP"):
- # Convert to datetime
- return pd.to_datetime(series, errors="coerce")
- else:
- # VARCHAR, TEXT, or unknown - keep as string
- return series.astype("string")
- except Exception as e:
- logger.warning(f"Error casting column to {sql_type}: {e}, keeping original type")
- return series
-
-
-class SheetsHandler(DatabaseHandler):
- """
- This handler handles connection and execution of queries against the Excel Sheet.
- TODO: add authentication for private sheets
- """
-
- name = "sheets"
-
- def __init__(self, name: str, connection_data: Optional[dict], **kwargs):
- """
- Initialize the handler.
- Args:
- name (str): name of particular handler instance
- connection_data (dict): parameters for connecting to the database
- **kwargs: arbitrary keyword arguments.
- """
- super().__init__(name)
- self.parser = parse_sql
- self.renderer = SqlalchemyRender("postgresql")
- self.connection_data = connection_data
- self.kwargs = kwargs
-
- self.connection = None
- self.is_connected = False
- self._column_types_cache = None # Cache for column types
-
- def __del__(self):
- if self.is_connected is True:
- self.disconnect()
-
- def connect(self) -> StatusResponse:
- """
- Set up the connection required by the handler.
- Returns:
- HandlerStatusResponse
- """
- url = f"https://docs.google.com/spreadsheets/d/{self.connection_data['spreadsheet_id']}/gviz/tq?tqx=out:csv&sheet={self.connection_data['sheet_name']}"
- try:
- self.sheet = pd.read_csv(url, on_bad_lines="skip")
- except pd.errors.EmptyDataError:
- error_msg = (
- f"Google Sheet '{self.connection_data['sheet_name']}' "
- f"(ID: {self.connection_data['spreadsheet_id']}) is empty or has no columns. "
- f"Please ensure the sheet contains data."
- )
- logger.error(error_msg)
- raise ValueError(error_msg)
- except Exception as e:
- error_msg = (
- f"Error reading Google Sheet '{self.connection_data['sheet_name']}' "
- f"(ID: {self.connection_data['spreadsheet_id']}): {str(e)}"
- )
- logger.error(error_msg)
- raise
-
- self.connection = duckdb.connect()
- self.connection.register(self.connection_data["sheet_name"], self.sheet)
- self.is_connected = True
- # Clear column types cache when reconnecting
- self._column_types_cache = None
-
- return self.connection
-
- def disconnect(self):
- """
- Close any existing connections.
- """
- if self.is_connected is False:
- return
-
- self.connection.close()
- self.is_connected = False
- return self.is_connected
-
- def check_connection(self) -> StatusResponse:
- """
- Check connection to the handler.
- Returns:
- HandlerStatusResponse
- """
- response = StatusResponse(False)
- need_to_close = self.is_connected is False
-
- try:
- self.connect()
- response.success = True
- except Exception as e:
- logger.error(f"Error connecting to the Google Sheet with ID {self.connection_data['spreadsheet_id']}, {e}!")
- response.error_message = str(e)
- finally:
- if response.success is True and need_to_close:
- self.disconnect()
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """
- Receive raw query and act upon it somehow.
- Args:
- query (str): query in native format
- Returns:
- HandlerResponse
- """
-
- need_to_close = self.is_connected is False
- connection = self.connect()
- try:
- result_df = connection.execute(query).fetchdf()
- if not result_df.empty:
- # Get column types and cast result columns accordingly
- column_types = self._get_column_types()
-
- # Cast each column to its inferred type
- for column_name in result_df.columns:
- if column_name in column_types:
- sql_type = column_types[column_name]
- logger.debug(f"Casting column '{column_name}' to type '{sql_type}'")
- result_df[column_name] = _cast_column_to_type(result_df[column_name], sql_type)
-
- response = Response(RESPONSE_TYPE.TABLE, result_df)
- else:
- response = Response(RESPONSE_TYPE.OK)
- connection.commit()
- except Exception as e:
- logger.error(
- f"Error running query: {query} on the Google Sheet with ID {self.connection_data['spreadsheet_id']}!"
- )
- response = Response(RESPONSE_TYPE.ERROR, error_message=str(e))
-
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> StatusResponse:
- """
- Receive query as AST (abstract syntax tree) and act upon it somehow.
- Args:
- query (ASTNode): sql query represented as AST. May be any kind
- of query: SELECT, INTSERT, DELETE, etc
- Returns:
- HandlerResponse
- """
- query_str = self.renderer.get_string(query, with_failback=True)
- return self.native_query(query_str)
-
- def get_tables(self) -> StatusResponse:
- """
- Return list of entities that will be accessible as tables.
- Returns:
- HandlerResponse
- """
- response = Response(
- RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([self.connection_data["sheet_name"]], columns=["table_name"])
- )
-
- return response
-
- def _get_column_types(self) -> dict:
- """Get or infer column types for the sheet.
-
- Returns:
- dict: Mapping of column names to SQL data types
- """
- # Return cached types if available
- if self._column_types_cache is not None:
- return self._column_types_cache
-
- # Ensure we're connected and have the sheet data
- if not hasattr(self, "sheet") or self.sheet is None:
- self.connect()
-
- # Sample 3 rows to infer data types from actual values
- sample_size = min(3, len(self.sheet))
- sample_data = self.sheet.head(sample_size) if not self.sheet.empty else pd.DataFrame()
-
- logger.debug(f"Sampling {sample_size} rows for column type inference")
- logger.debug(f"Sampled data:\n{sample_data.to_string()}")
- logger.debug(f"Column names: {list(self.sheet.columns)}")
-
- # Infer data types from sample values for each column
- column_types = {}
- for column_name in self.sheet.columns:
- if not sample_data.empty:
- column_values = sample_data[column_name].tolist()
- logger.debug(f"Column '{column_name}' sample values: {column_values}")
- inferred_type = _infer_data_type_from_samples(column_values)
- logger.debug(f"Column '{column_name}' inferred type: {inferred_type}")
- column_types[column_name] = inferred_type
- else:
- # If no data, fall back to pandas dtype
- dtype = self.sheet[column_name].dtype
- inferred_type = _pandas_dtype_to_sql_type(dtype) if dtype is not None else "VARCHAR"
- logger.debug(
- f"Column '{column_name}' has no sample data, using pandas dtype '{dtype}' -> '{inferred_type}'"
- )
- column_types[column_name] = inferred_type
-
- # Cache the types
- self._column_types_cache = column_types
- return column_types
-
- def get_columns(self, table_name: str) -> StatusResponse:
- """
- Returns a list of entity columns in standard information_schema.columns format.
- Args:
- table_name (str): name of one of tables returned by self.get_tables()
- Returns:
- HandlerResponse
- """
- # Ensure we're connected and have the sheet data
- if not hasattr(self, "sheet") or self.sheet is None:
- self.connect()
-
- # Validate that table_name matches the configured sheet_name
- if table_name != self.connection_data.get("sheet_name"):
- return Response(
- RESPONSE_TYPE.ERROR,
- error_message=f"Table '{table_name}' not found. Available table: {self.connection_data.get('sheet_name')}",
- )
-
- # Get column types
- column_types = self._get_column_types()
- sql_types = [column_types.get(col, "VARCHAR") for col in self.sheet.columns]
-
- # Transform to information_schema.columns format (all required fields)
- columns_data = []
- for ordinal_position, (column_name, sql_type) in enumerate(zip(self.sheet.columns, sql_types), start=1):
- columns_data.append(
- {
- "COLUMN_NAME": column_name,
- "DATA_TYPE": sql_type,
- "ORDINAL_POSITION": ordinal_position,
- "COLUMN_DEFAULT": None,
- "IS_NULLABLE": "YES", # Assume nullable since we can't determine from CSV
- "CHARACTER_MAXIMUM_LENGTH": None,
- "CHARACTER_OCTET_LENGTH": None,
- "NUMERIC_PRECISION": None,
- "NUMERIC_SCALE": None,
- "DATETIME_PRECISION": None,
- "CHARACTER_SET_NAME": None,
- "COLLATION_NAME": None,
- }
- )
-
- df = pd.DataFrame(columns_data)
- result = Response(RESPONSE_TYPE.TABLE, data_frame=df)
- result.to_columns_table_response(map_type_fn=_map_type)
-
- return result
diff --git a/mindsdb/integrations/handlers/sheets_handler/tests/__init__.py b/mindsdb/integrations/handlers/sheets_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/sheets_handler/tests/test_sheets_handler.py b/mindsdb/integrations/handlers/sheets_handler/tests/test_sheets_handler.py
deleted file mode 100644
index cacd2c98b56..00000000000
--- a/mindsdb/integrations/handlers/sheets_handler/tests/test_sheets_handler.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.sheets_handler.sheets_handler import SheetsHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class SheetsHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "spreadsheet_id": "12wgS-1KJ9ymUM-6VYzQ0nJYGitONxay7cMKLnEE2_d0",
- "sheet_name": "iris"
- }
- cls.handler = SheetsHandler('test_sheets_handler', cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_native_query_select(self):
- query = "SELECT * FROM iris"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_2_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_3_get_columns(self):
- columns = self.handler.get_columns()
- assert columns.type is not RESPONSE_TYPE.ERROR
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/shopify_handler/__init__.py b/mindsdb/integrations/handlers/shopify_handler/__init__.py
index a3057dbc5f6..b0ec9e5f3a0 100644
--- a/mindsdb/integrations/handlers/shopify_handler/__init__.py
+++ b/mindsdb/integrations/handlers/shopify_handler/__init__.py
@@ -15,7 +15,7 @@
name = "shopify"
type = HANDLER_TYPE.DATA
icon_path = "icon.svg"
-support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY
+support_level = HANDLER_SUPPORT_LEVEL.MINDSDB
__all__ = [
"Handler",
diff --git a/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py b/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py
index 5e876751566..055deb1a0f7 100644
--- a/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py
+++ b/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py
@@ -35,6 +35,8 @@
CompanyContactsTable,
)
from mindsdb.integrations.libs.api_handler import MetaAPIHandler
+from mindsdb.integrations.libs.passthrough import PassthroughMixin
+from mindsdb.integrations.libs.passthrough_types import PassthroughRequest
from mindsdb.integrations.libs.response import (
HandlerStatusResponse as StatusResponse,
HandlerResponse as Response,
@@ -51,13 +53,37 @@
logger = log.getLogger(__name__)
-class ShopifyHandler(MetaAPIHandler):
+class ShopifyHandler(MetaAPIHandler, PassthroughMixin):
"""
The Shopify handler implementation.
"""
name = "shopify"
+ # REST passthrough configuration. Shopify sends the Admin API token in
+ # `X-Shopify-Access-Token`, not `Authorization: Bearer`, so we override
+ # the default auth header. v1 requires the caller to pre-supply the
+ # access token in connection_data β the existing client_id/client_secret
+ # OAuth dance runs inside `connect()` and isn't surfaced to the mixin.
+ _bearer_token_arg = "access_token"
+ _auth_header_name = "X-Shopify-Access-Token"
+ _auth_header_format = "{token}"
+ _auth_mode = "custom"
+ _base_url_default = None
+ # Version-less path β Shopify redirects this to the current stable
+ # Admin API version, so the probe survives quarterly API releases.
+ _test_request = PassthroughRequest(method="GET", path="/admin/shop.json")
+
+ def _build_base_url(self) -> str | None:
+ data = self._get_connection_data()
+ shop = data.get("shop_url")
+ if not shop:
+ return None
+ shop = str(shop)
+ if not shop.startswith(("http://", "https://")):
+ shop = f"https://{shop}"
+ return shop.rstrip("/")
+
def __init__(self, name: str, **kwargs):
"""
Initialize the handler.
diff --git a/mindsdb/integrations/handlers/singlestore_handler/README.md b/mindsdb/integrations/handlers/singlestore_handler/README.md
deleted file mode 100644
index 2507b121413..00000000000
--- a/mindsdb/integrations/handlers/singlestore_handler/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-## Implementation
-
-This handler is implemented by extending the MySQLHandler.
-
-The required arguments to establish a connection are as follows:
-
-* `user` is the database user.
-* `password` is the database password.
-* `host` is the host name, IP address, or URL.
-* `port` is the port used to make TCP/IP connection.
-* `database` is the database name.
-
-There are several optional arguments that can be used as well.
-
-* `ssl` is the `ssl` parameter value that indicates whether SSL is enabled (`True`) or disabled (`False`).
-* `ssl_ca` is the SSL Certificate Authority.
-* `ssl_cert` stores SSL certificates.
-* `ssl_key` stores SSL keys.
-
-## Usage
-
-In order to make use of this handler and connect to the SingleStore database in MindsDB, the following syntax can be used:
-
-```sql
-CREATE DATABASE singlestore_datasource
-WITH
- ENGINE = 'singlestore',
- PARAMETERS = {
- "host": "127.0.0.1",
- "port": 3306,
- "database": "singlestore",
- "user": "root",
- "password": "password"
- };
-```
-
-You can use this established connection to query your table as follows:
-
-```sql
-SELECT *
-FROM singlestore_datasource.example_table;
-```
diff --git a/mindsdb/integrations/handlers/singlestore_handler/__about__.py b/mindsdb/integrations/handlers/singlestore_handler/__about__.py
deleted file mode 100644
index 5331cf6a310..00000000000
--- a/mindsdb/integrations/handlers/singlestore_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB SingleStore handler'
-__package_name__ = 'mindsdb_singlestore_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for SingleStore"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/singlestore_handler/__init__.py b/mindsdb/integrations/handlers/singlestore_handler/__init__.py
deleted file mode 100644
index 20eb42c98bb..00000000000
--- a/mindsdb/integrations/handlers/singlestore_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-try:
- from .singlestore_handler import SingleStoreHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-from .__about__ import __version__ as version, __description__ as description
-
-
-title = 'SingleStore'
-name = 'singlestore'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/singlestore_handler/icon.svg b/mindsdb/integrations/handlers/singlestore_handler/icon.svg
deleted file mode 100644
index 288499760e8..00000000000
--- a/mindsdb/integrations/handlers/singlestore_handler/icon.svg
+++ /dev/null
@@ -1,16 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/singlestore_handler/requirements.txt b/mindsdb/integrations/handlers/singlestore_handler/requirements.txt
deleted file mode 100644
index ee467569031..00000000000
--- a/mindsdb/integrations/handlers/singlestore_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
--r mindsdb/integrations/handlers/mysql_handler/requirements.txt
diff --git a/mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py b/mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py
deleted file mode 100644
index c1516194a48..00000000000
--- a/mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler
-from .__about__ import __version__ as version
-
-
-class SingleStoreHandler(MySQLHandler):
- """
- This handler handles connection and execution of the SingleStore statements.
- """
- name = 'singlestore'
-
- def __init__(self, name, **kwargs):
- kwargs['conn_attrs'] = {'mindsdb', 'MindsDB', version}
- super().__init__(name, **kwargs)
diff --git a/mindsdb/integrations/handlers/singlestore_handler/tests/__init__.py b/mindsdb/integrations/handlers/singlestore_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py b/mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py
deleted file mode 100644
index a4ff05afdfd..00000000000
--- a/mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import unittest
-
-from mindsdb.integrations.handlers.mysql_handler.mysql_handler import MySQLHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class MySQLHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "host": "localhost",
- "port": "3306",
- "user": "root",
- "password": "",
- "database": "test",
- "ssl": False
- }
- cls.handler = MySQLHandler('test_singlestore_handler', **cls.kwargs)
-
- def test_0_connect(self):
- self.handler.check_connection()
-
- def test_1_native_query_show_dbs(self):
- dbs = self.handler.native_query("SHOW DATABASES;")
- assert dbs['type'] is not RESPONSE_TYPE.ERROR
-
- def test_2_get_tables(self):
- tbls = self.handler.get_tables()
- assert tbls['type'] is not RESPONSE_TYPE.ERROR
-
- def test_5_drop_table(self):
- res = self.handler.native_query("DROP TABLE IF EXISTS test_mdb")
- assert res['type'] is not RESPONSE_TYPE.ERROR
-
- def test_4_create_table(self):
- res = self.handler.native_query("CREATE TABLE IF NOT EXISTS test_mdb (test_col INT)")
- assert res['type'] is not RESPONSE_TYPE.ERROR
-
- def test_7_select_query(self):
- query = "SELECT * FROM test_mdb WHERE 'id'='a'"
- result = self.handler.native_query(query)
- assert result['type'] is RESPONSE_TYPE.TABLE
diff --git a/mindsdb/integrations/handlers/slack_handler/README.md b/mindsdb/integrations/handlers/slack_handler/README.md
deleted file mode 100644
index cd0c2e0bb04..00000000000
--- a/mindsdb/integrations/handlers/slack_handler/README.md
+++ /dev/null
@@ -1,268 +0,0 @@
----
-title: Slack
-sidebarTitle: Slack
----
-
-This documentation describes the integration of MindsDB with [Slack](https://slack.com/), a cloud-based collaboration platform.
-The integration allows MindsDB to access data from Slack and enhance Slack with AI capabilities.
-
-## Prerequisites
-
-Before proceeding, ensure the following prerequisites are met:
-
-1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop).
-2. To connect Slack to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies).
-3. Install or ensure access to Slack.
-
-## Connection
-
-Establish a connection to Slack from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/slack_handler) as an engine.
-
-```sql
-CREATE DATABASE slack_datasource
-WITH ENGINE = 'slack',
-PARAMETERS = {
- "token": "values", -- required parameter
- "app_token": "values" -- optional parameter
-};
-```
-
-The Slack handler is initialized with the following parameters:
-
-* `token` is a Slack bot token to use for authentication.
-* `app_token` is a Slack app token to use for authentication.
-
-
-Please note that `app_token` is an optional parameter. Without providing it, you need to integrate an app into a Slack channel.
-
-
-### Method 1: Chatbot responds in direct messages to a Slack app
-
-One way to connect Slack is to use both bot and app tokens. By following the instructions below, you'll set up the Slack app and be able to message this Slack app directly to chat with the bot.
-
-
-If you want to use Slack in the [`CREATE CHATBOT`](/agents/chatbot) syntax, use this method of connecting Slack to MindsDB.
-
-
-
-Here is how to set up a Slack app and generate both a Slack bot token and a Slack app token:
-
- 1. Follow [this link](https://api.slack.com/apps) and sign in with your Slack account.
- 2. Create a new app `From scratch` or select an existing app.
- - Please note that the following instructions support apps created `From scratch`.
- - For apps created `From an app manifest`, please follow the [Slack docs here](https://api.slack.com/reference/manifests).
- 3. Go to *Basic Information* under *Settings*.
- - Under *App-Level Tokens*, click on *Generate Token and Scopes*.
- - Name the token `socket` and add the `connections:write` scope.
- - **Copy and save the `xapp-...` token - you'll need it to publish the chatbot.**
- 4. Go to *Socket Mode* under *Settings* and toggle the button to *Enable Socket Mode*.
- 5. Go to *OAuth & Permissions* under *Features*.
- - Add the following *Bot Token Scopes*:
- - app_mentions:read
- - channels:history
- - channels:read
- - chat:write
- - groups:history
- - groups:read (optional)
- - im:history
- - im:read
- - im:write
- - mpim:read (optional)
- - users.profile:read
- - users:read (optional)
- - In the *OAuth Tokens for Your Workspace* section, click on *Install to Workspace* and then *Allow*.
- - **Copy and save the `xoxb-...` token - you'll need it to publish the chatbot.**
- 6. Go to *App Home* under *Features* and click on the checkbox to *Allow users to send Slash commands and messages from the messages tab*.
- 7. Go to *Event Subscriptions* under *Features*.
- - Toggle the button to *Enable Events*.
- - Under *Subscribe to bot events*, click on *Add Bot User Event* and add `app_mention` and `message.im`.
- - Click on *Save Changes*.
- 8. Now you can use tokens from points 3 and 5 to initialize the Slack handler in MindsDB.
-
-
-
-This connection method enables you to chat directly with an app via Slack.
-
-Alternatively, you can connect an app to the Slack channel:
- - Go to the channel where you want to use the bot.
- - Right-click on the channel and select *View Channel Details*.
- - Select *Integrations*.
- - Click on *Add an App*.
-
-
-Here is how to connect Slack to MindsDB:
-
-```sql
-CREATE DATABASE slack_datasource
-WITH
- ENGINE = 'slack',
- PARAMETERS = {
- "token": "xoxb-...",
- "app_token": "xapp-..."
- };
-```
-
-It comes with the `conversations` and `messages` tables.
-
-### Method 2: Chatbot responds on a defined Slack channel
-
-Another way to connect to Slack is to use the bot token only. By following the instructions below, you'll set up the Slack app and integrate it into one of the channels from which you can directly chat with the bot.
-
-
-Here is how to set up a Slack app and generate a Slack bot token:
-
- 1. Follow [this link](https://api.slack.com/apps) and sign in with your Slack account.
- 2. Create a new app `From scratch` or select an existing app.
- - Please note that the following instructions support apps created `From scratch`.
- - For apps created `From an app manifest`, please follow the [Slack docs here](https://api.slack.com/reference/manifests).
- 3. Go to the *OAuth & Permissions* section.
- 4. Under the *Scopes* section, add the *Bot Token Scopes* necessary for your application. You can add more later as well.
- - channels:history
- - channels:read
- - chat:write
- - groups:read
- - im:read
- - mpim:read
- - users:read
- 5. Install the bot in your workspace.
- 6. Under the *OAuth Tokens for Your Workspace* section, copy the the *Bot User OAuth Token* value.
- 7. Open your Slack application and add the App/Bot to one of the channels:
- - Go to the channel where you want to use the bot.
- - Right-click on the channel and select *View Channel Details*.
- - Select *Integrations*.
- - Click on *Add an App*.
- 8. Now you can use the token from step 6 to initialize the Slack handler in MindsDB and use the channel name to query and write messages.
-
-
-Here is how to connect Slack to MindsDB:
-
-```sql
-CREATE DATABASE slack_datasource
-WITH
- ENGINE = 'slack',
- PARAMETERS = {
- "token": "xoxb-..."
- };
-```
-
-## Usage
-
-
-The following usage applies when **Connection Method 2** was used to connect Slack.
-
-See the usage for **Connection Method 1** [via the `CREATE CHATBOT` syntax](/sql/tutorials/create-chatbot).
-
-
-Retrieve data from a specified table by providing the integration and table names:
-
-```sql
-SELECT *
-FROM slack_datasource.table_name
-LIMIT 10;
-```
-
-## Supported Tables
-
-The Slack integration supports the following tables:
-
-### `conversations` Table
-
-The `conversations` virtual table is used to query conversations (channels, DMs, and groups) in the connected Slack workspace.
-
-```sql
--- Retrieve all conversations in the workspace
-SELECT *
-FROM slack_datasource.conversations;
-
--- Retrieve a specific conversation using its ID
-SELECT *
-FROM slack_datasource.conversations
-WHERE id = "";
-
--- Retrieve a specific conversation using its name
-SELECT *
-FROM slack_datasource.conversations
-WHERE name = "";
-```
-
-### `messages` Table
-
-The `messages` virtual table is used to query, post, update, and delete messages in specific conversations within the connected Slack workspace.
-
-```sql
--- Retrieve all messages from a specific conversation
--- channel_id is a required parameter and can be found in the conversations table
-SELECT *
-FROM slack_datasource.messages
-WHERE channel_id = "";
-
--- Post a new message
--- channel_id and text are required parameters
-INSERT INTO slack_datasource.messages (channel_id, text)
-VALUES("", "Hello from SQL!");
-
--- Update a bot-posted message
--- channel_id, ts, and text are required parameters
-UPDATE slack_datasource.messages
-SET text = "Updated message content"
-WHERE channel_id = "" AND ts = "";
-
--- Delete a bot-posted message
--- channel_id and ts are required parameters
-DELETE FROM slack_datasource.messages
-WHERE channel_id = "" AND ts = "";
-```
-
-
-You can also find the channel ID by right-clicking on the conversation in Slack, selecting 'View conversation details' or 'View channel details,' and copying the channel ID from the bottom of the 'About' tab.
-
-
-### `threads` Table
-
-The `threads` virtual table is used to query and post messages in threads within the connected Slack workspace.
-
-```sql
--- Retrieve all messages in a specific thread
--- channel_id and thread_ts are required parameters
--- thread_ts is the timestamp of the parent message and can be found in the messages table
-SELECT *
-FROM slack_datasource.threads
-WHERE channel_id = "" AND thread_ts = "";
-
--- Post a message to a thread
-INSERT INTO slack_datasource.threads (channel_id, thread_ts, text)
-VALUES("", "", "Replying to the thread!");
-```
-
-### `users` Table
-
-The `users` virtual table is used to query user information in the connected Slack workspace.
-
-```sql
--- Retrieve all users in the workspace
-SELECT *
-FROM slack_datasource.users;
-
--- Retrieve a specific user by name
-SELECT *
-FROM slack_datasource.users
-WHERE name = "John Doe";
-```
-
-## Rate Limit Considerations
-
-The Slack API enforces rate limits on data retrieval. Therefore, when querying the above tables, by default, the first 1000 (999 for `messages`) records are returned.
-
-To retrieve more records, use the `LIMIT` clause in your SQL queries. For example:
-
-```sql
-SELECT *
-FROM slack_datasource.conversations
-LIMIT 2000;
-```
-
-When using the LIMIT clause to query additional records, you may encounter Slack API rate limits.
-
-## Next Steps
-
-Follow [this tutorial](use-cases/ai_agents/build_ai_agents) to build an AI agent with MindsDB.
diff --git a/mindsdb/integrations/handlers/slack_handler/__about__.py b/mindsdb/integrations/handlers/slack_handler/__about__.py
deleted file mode 100644
index 2daf56c510f..00000000000
--- a/mindsdb/integrations/handlers/slack_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Slack Handler'
-__package_name__ = 'mindsdb_slack_handler'
-__version__ = '0.0.2'
-__description__ = 'MindsDB handler for Slack'
-__author__ = 'Tarun Chawla'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2023 - mindsdb'
diff --git a/mindsdb/integrations/handlers/slack_handler/__init__.py b/mindsdb/integrations/handlers/slack_handler/__init__.py
deleted file mode 100644
index be1e820efb1..00000000000
--- a/mindsdb/integrations/handlers/slack_handler/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .slack_handler import SlackHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Slack"
-name = "slack"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "support_level",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/slack_handler/connection_args.py b/mindsdb/integrations/handlers/slack_handler/connection_args.py
deleted file mode 100644
index 6e7f6bad93c..00000000000
--- a/mindsdb/integrations/handlers/slack_handler/connection_args.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- token={
- 'type': ARG_TYPE.STR,
- 'description': 'The bot token for the Slack app.',
- 'secret': True,
- 'required': True,
- 'label': 'Token',
- },
- app_token={
- 'type': ARG_TYPE.PWD,
- 'description': 'The app token for the Slack app.',
- 'secret': True,
- 'required': False,
- 'label': 'App Token'
- }
-)
-
-connection_args_example = OrderedDict(
- token='xapp-A111-222-xyz',
- app_token='xoxb-111-222-xyz'
-)
diff --git a/mindsdb/integrations/handlers/slack_handler/icon.svg b/mindsdb/integrations/handlers/slack_handler/icon.svg
deleted file mode 100644
index 9803b60eef9..00000000000
--- a/mindsdb/integrations/handlers/slack_handler/icon.svg
+++ /dev/null
@@ -1,13 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/slack_handler/requirements.txt b/mindsdb/integrations/handlers/slack_handler/requirements.txt
deleted file mode 100644
index 826b5c2cd2d..00000000000
--- a/mindsdb/integrations/handlers/slack_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-slack_sdk==3.30.0
diff --git a/mindsdb/integrations/handlers/slack_handler/slack_handler.py b/mindsdb/integrations/handlers/slack_handler/slack_handler.py
deleted file mode 100644
index 10dca335385..00000000000
--- a/mindsdb/integrations/handlers/slack_handler/slack_handler.py
+++ /dev/null
@@ -1,351 +0,0 @@
-from copy import deepcopy
-import datetime as dt
-import os
-import threading
-from typing import Any, Callable, Dict, List, Text
-
-import pandas as pd
-from slack_sdk import WebClient
-from slack_sdk.errors import SlackApiError
-from slack_sdk.socket_mode import SocketModeClient
-from slack_sdk.socket_mode.response import SocketModeResponse
-from slack_sdk.socket_mode.request import SocketModeRequest
-
-from mindsdb.integrations.handlers.slack_handler.slack_tables import (
- SlackConversationsTable,
- SlackMessagesTable,
- SlackThreadsTable,
- SlackUsersTable
-)
-from mindsdb.integrations.libs.api_handler import APIChatHandler, FuncParser
-from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
- HandlerStatusResponse as StatusResponse,
- RESPONSE_TYPE
-)
-from mindsdb.utilities.config import Config
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class SlackHandler(APIChatHandler):
- """
- This handler handles the connection and execution of SQL statements on Slack.
- Additionally, it allows the setup of a real-time connection to the Slack API using the Socket Mode for chat-bots.
- """
-
- def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None:
- """
- Initializes the handler.
-
- Args:
- name (Text): The name of the handler instance.
- connection_data (Dict): The connection data required to connect to the SAP HANA database.
- kwargs(Any): Arbitrary keyword arguments.
- """
- super().__init__(name)
- self.connection_data = connection_data
- self.kwargs = kwargs
-
- # If the parameters are not provided, check the environment variables and the handler configuration.
- handler_config = Config().get('slack_handler', {})
-
- for key in ['token', 'app_token']:
- if key not in self.connection_data:
- if f'SLACK_{key.upper()}' in os.environ:
- self.connection_data[key] = os.environ[f'SLACK_{key.upper()}']
- elif key in handler_config:
- self.connection_data[key] = handler_config[key]
-
- self.web_connection = None
- self._socket_connection = None
- self.is_connected = False
-
- self._register_table('conversations', SlackConversationsTable(self))
- self._register_table('messages', SlackMessagesTable(self))
- self._register_table('threads', SlackThreadsTable(self))
- self._register_table('users', SlackUsersTable(self))
-
- def connect(self) -> WebClient:
- """
- Establishes a connection to the Slack API using the WebClient.
-
- Returns:
- WebClient: The WebClient object to interact with the Slack API.
- """
- if self.is_connected is True:
- return self.web_connection
-
- # Check if the mandatory connection parameter (token) is available.
- if 'token' not in self.connection_data:
- raise ValueError('Required parameter (token) must be provided.')
-
- try:
- self.web_connection = WebClient(token=self.connection_data['token'])
- self.is_connected = True
- return self.web_connection
- except Exception as unknown_error:
- logger.error(f'Unknown error connecting to Slack API: {unknown_error}')
- raise
-
- def check_connection(self) -> StatusResponse:
- """
- Checks the status of the connection to the Slack API, both for the WebClient and the Socket Mode.
-
- Raises:
- SlackApiError: If an error occurs while connecting to the Slack API.
-
- Returns:
- StatusResponse: An object containing the success status and an error message if an error occurs.
- """
- response = StatusResponse(False)
-
- try:
- web_connection = self.connect()
- # Check the status of the web connection.
- web_connection.auth_test()
-
- # Check the status of the socket connection if the app_token is provided.
- if 'app_token' in self.connection_data:
- _socket_connection = SocketModeClient(
- app_token=self.connection_data['app_token'],
- web_client=web_connection
- )
- _socket_connection.connect()
- _socket_connection.disconnect()
-
- response.success = True
- except (SlackApiError, ValueError) as known_error:
- logger.error(f'Connection check to the Slack API failed, {known_error}!')
- response.error_message = str(known_error)
- except Exception as unknown_error:
- logger.error(f'Connection check to the Slack API failed due to an unknown error, {unknown_error}!')
- response.error_message = str(unknown_error)
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: Text = None) -> Response:
- """
- Executes native Slack SDK methods as specified in the query string.
-
- Args:
- query: The query string containing the method name and parameters.
-
- Returns:
- Response: A response object containing the result of the query.
- """
- method_name, params = FuncParser().from_string(query)
-
- df = self._call_slack_api(method_name, params)
-
- return Response(
- RESPONSE_TYPE.TABLE,
- data_frame=df
- )
-
- def _call_slack_api(self, method_name: Text = None, params: Dict = None) -> List[Dict]:
- """
- Calls the Slack SDK method with the specified method name and parameters.
-
- Args:
- method_name (Text): The name of the method to call.
- params (Dict): The parameters to pass to the method.
-
- Raises:
- SlackApiError: If an error occurs while calling the Slack SDK method
-
- Returns:
- List[Dict]: The result from running the Slack SDK method.
- """
- web_connection = self.connect()
- method = getattr(web_connection, method_name)
-
- items = []
- try:
- response = method(**params)
- response_data = deepcopy(response.data)
-
- # Get only the data items from the response data.
- items.extend(self._extract_data_from_response(response_data))
-
- # If the response contains a cursor, fetch the next page of results.
- if 'response_metadata' in response and 'next_cursor' in response['response_metadata']:
- while response['response_metadata']['next_cursor']:
- response = method(cursor=response['response_metadata']['next_cursor'], **params)
- response_data = deepcopy(response.data)
-
- # Get only the data items from the response data.
- items.extend(self._extract_data_from_response(response_data))
- except SlackApiError as slack_error:
- error = f"Error calling method '{method_name}' with params '{params}': {slack_error.response['error']}"
- logger.error(error)
- raise
-
- if items:
- df = pd.DataFrame(items)
-
- return df
-
- def _extract_data_from_response(self, response_data: Dict) -> List[Dict]:
- """
- Extracts the data items from the response object.
-
- Args:
- response_data (Dict): The response object containing the data items.
-
- Raises:
- ValueError: If the response data could not be parsed.
-
- Returns:
- List[Dict]: The data items extracted from the response object.
- """
- # Remove the metadata from the response.
- for key in ['ok', 'response_metadata', 'cache_ts', 'latest', 'pin_count', 'has_more']:
- if key in response_data:
- response_data.pop(key)
-
- # If the response contains only one key, return the value of that key as a list.
- if len(response_data) == 1:
- key = list(response_data.keys())[0]
- if isinstance(response_data[key], list):
- return response_data[key]
-
- else:
- return [response_data[key]]
-
- # Otherwise, raise an error.
- raise ValueError('Response data could not be parsed.')
-
- def get_chat_config(self) -> Dict:
- """
- Returns the chat configuration for the Slack handler.
-
- Returns:
- Dict: The chat configuration.
- """
- return {
- 'polling': {
- 'type': 'realtime',
- },
- 'memory': {
- 'type': 'handler',
- },
- 'tables': [
- {
- 'chat_table': {
- 'name': 'messages',
- 'chat_id_col': 'channel_id',
- 'username_col': 'user',
- 'text_col': 'text',
- 'time_col': 'created_at',
- }
- },
- {
- 'chat_table': {
- 'name': 'threads',
- 'chat_id_col': ['channel_id', 'thread_ts'],
- 'username_col': 'user',
- 'text_col': 'text',
- 'time_col': 'thread_ts',
- }
- }
- ]
- }
-
- def get_my_user_name(self) -> Text:
- """
- Gets the name of the bot user.
-
- Returns:
- Text: The name of the bot user.
- """
- web_connection = self.connect()
- user_info = web_connection.auth_test().data
- return user_info['bot_id']
-
- def subscribe(self, stop_event: threading.Event, callback: Callable, table_name: Text = 'messages',
- columns: List = None, **kwargs: Any) -> None:
- """
- Subscribes to the Slack API using the Socket Mode for real-time responses to messages.
-
- Args:
- stop_event (threading.Event): The event to stop the subscription.
- callback (Callable): The callback function to process the messages.
- table_name (Text): The name of the table to subscribe to.
- kwargs: Arbitrary keyword arguments.
- """
- if table_name not in ['messages', 'threads']:
- raise RuntimeError(f'Table {table_name} is not supported for subscription.')
-
- # Raise an error if columns are provided.
- # Since Slack subscriptions depend on events and not changes to the virtual tables, columns are not supported.
- if columns:
- raise RuntimeError('Columns are not supported for Slack subscriptions.')
-
- self._socket_connection = SocketModeClient(
- # This app-level token will be used only for establishing a connection.
- app_token=self.connection_data['app_token'], # xapp-A111-222-xyz
- # The WebClient for performing Web API calls in listeners.
- web_client=WebClient(token=self.connection_data['token']), # xoxb-111-222-xyz
- )
-
- def _process_websocket_message(client: SocketModeClient, request: SocketModeRequest) -> None:
- """
- Pre-processes the incoming WebSocket message from the Slack API and calls the callback function to process the message.
-
- Args:
- client (SocketModeClient): The client object to send the response.
- request (SocketModeRequest): The request object containing the payload.
- """
- # Acknowledge the request.
- response = SocketModeResponse(envelope_id=request.envelope_id)
- client.send_socket_mode_response(response)
-
- # Ignore requests that are not events.
- if request.type != 'events_api':
- return
-
- # Ignore duplicate requests.
- if request.retry_attempt is not None and request.retry_attempt > 0:
- return
-
- payload_event = request.payload['event']
- # Avoid responding to events other than direct messages and app mentions.
- if payload_event['type'] not in ('message', 'app_mention'):
- return
-
- # Avoid responding to unrelated events like message_changed, message_deleted, etc.
- if 'subtype' in payload_event:
- return
-
- # Avoid responding to messages from the bot.
- if 'bot_id' in payload_event:
- return
-
- key = {
- 'channel_id': payload_event['channel'],
- }
-
- row = {
- 'text': payload_event['text'],
- 'user': payload_event['user'],
- 'created_at': dt.datetime.fromtimestamp(float(payload_event['ts'])).strftime('%Y-%m-%d %H:%M:%S')
- }
-
- # Add thread_ts to the key and row if it is a thread message. This is used to identify threads.
- # This message should be handled via the threads table.
- if 'thread_ts' in payload_event:
- key['thread_ts'] = payload_event['thread_ts']
-
- callback(row, key)
-
- self._socket_connection.socket_mode_request_listeners.append(_process_websocket_message)
- self._socket_connection.connect()
-
- stop_event.wait()
-
- self._socket_connection.close()
diff --git a/mindsdb/integrations/handlers/slack_handler/slack_tables.py b/mindsdb/integrations/handlers/slack_handler/slack_tables.py
deleted file mode 100644
index 1a88ab0eda3..00000000000
--- a/mindsdb/integrations/handlers/slack_handler/slack_tables.py
+++ /dev/null
@@ -1,718 +0,0 @@
-import datetime as dt
-from typing import Any, Dict, List, Text
-
-from mindsdb_sql_parser.ast import Delete, Insert, Update
-import pandas as pd
-from slack_sdk.errors import SlackApiError
-
-from mindsdb.integrations.libs.api_handler import APIResource
-from mindsdb.integrations.utilities.sql_utils import (
- extract_comparison_conditions,
- FilterCondition,
- FilterOperator,
- SortColumn,
-)
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class SlackConversationsTable(APIResource):
- """
- This is the table abstraction for interacting with conversations via the Slack API.
- """
-
- def list(self, conditions: List[FilterCondition] = None, limit: int = None, **kwargs: Any) -> pd.DataFrame:
- """
- Retrieves a list of Slack conversations based on the specified conditions.
- If no channel ID(s) are provided, all channels are retrieved as follows:
- - If the provided limit is greater than 1000, no limit to the API call is provided and the results are paginated until the limit is reached.
- - Otherwise, the provided limit or a default limit of 1000 is used when making the API call.
-
- Therefore, if a user is to retrieve more than 1000 channels, the limit should be set to a value greater than 1000.
-
- The above is designed to prevent rate limiting by the Slack API.
-
- Args:
- conditions (List[FilterCondition]): The conditions to filter the conversations.
- limit (int): The limit of the conversations to return.
- kwargs(Any): Arbitrary keyword arguments.
-
- Raises:
- ValueError:
- - If an unsupported operator is used for the column 'id'.
- - If the channel ID(s) provided are not found.
- SlackApiError: If an error occurs when getting the channels from the Slack API.
-
- Returns:
- pd.DataFrame: The list of conversations.
- """
- channels = []
- for condition in conditions:
- value = condition.value
- op = condition.op
-
- # Handle the column 'id'.
- if condition.column == "id":
- if op not in [FilterOperator.EQUAL, FilterOperator.IN]:
- raise ValueError(f"Unsupported operator '{op}' for column 'id'")
-
- if op == FilterOperator.EQUAL:
- try:
- channels = [self.get_channel(value)]
- condition.applied = True
- except ValueError:
- raise
-
- if op == FilterOperator.IN:
- try:
- channels = self._get_channels(value if isinstance(value, list) else [value])
- condition.applied = True
- except ValueError:
- raise
-
- # If no channel ID(s) are provided, get all channels with the specified limit.
- if not channels:
- channels = self._get_all_channels(limit)
-
- for channel in channels:
- channel["created_at"] = dt.datetime.fromtimestamp(channel["created"])
- channel["updated_at"] = dt.datetime.fromtimestamp(channel["updated"] / 1000)
-
- return pd.DataFrame(channels, columns=self.get_columns())
-
- def get_channel(self, channel_id: Text) -> Dict:
- """
- Gets the channel data for the specified channel id.
-
- Args:
- channel_id (Text): The channel id.
-
- Raises:
- ValueError: If the channel ID is not found.
-
- Returns:
- Dict: The channel data.
- """
- client = self.handler.connect()
-
- try:
- response = client.conversations_info(channel=channel_id)
- except SlackApiError as slack_error:
- logger.error(f"Error getting channel '{channel_id}': {slack_error.response['error']}")
- raise ValueError(f"Channel '{channel_id}' not found")
-
- return response["channel"]
-
- def _get_channels(self, channel_ids: List[Text]) -> List[Dict]:
- """
- Gets the channel data for multiple channel ids.
- As it is unlikely that a large number of channels will be provided, the API rate limits are ignored here.
-
- Args:
- channel_ids (List[Text]): The list of channel ids.
-
- Raises:
- ValueError: If a channel ID is not found.
-
- Returns:
- List[Dict]: The list of channel data.
- """
- channels = []
- for channel_id in channel_ids:
- channel = self.get_channel(channel_id)
- channels.append(channel)
-
- return channels
-
- def _get_all_channels(self, limit: int = None) -> List[Dict]:
- """
- Gets the list of channels with a limit.
- If the provided limit is greater than 1000, no limit to the API call is provided and the results are paginated until the limit is reached.
- Otherwise, the provided limit or a default limit of 1000 is used when making the API call.
-
- Args:
- limit (int): The limit of channels to return.
-
- Raises:
- SlackApiError: If an error occurs when getting the channels from the Slack API.
-
- Returns:
- List[Dict]: The list of channels.
- """
- client = self.handler.connect()
-
- try:
- # If the limit is greater than 1000, paginate the results until the limit is reached.
- if limit and limit > 1000:
- response = client.conversations_list()
- channels = response["channels"]
-
- # Paginate the results until the limit is reached.
- while response["response_metadata"]["next_cursor"]:
- response = client.conversations_list(cursor=response["response_metadata"]["next_cursor"])
- channels.extend(response["channels"])
- if len(channels) >= limit:
- break
-
- channels = channels[:limit]
- # Otherwise, use the provided limit or a default limit of 1000.
- else:
- response = client.conversations_list(limit=limit if limit else 1000)
- channels = response["channels"]
- except SlackApiError as slack_error:
- logger.error(f"Error getting channels: {slack_error.response['error']}")
- raise
-
- return channels
-
- def get_columns(self) -> List[str]:
- """
- Retrieves the attributes (columns) of the Slack conversations.
-
- Returns:
- List[str]: The list of columns.
- """
- return [
- "id",
- "name",
- "is_channel",
- "is_group",
- "is_im",
- "is_mpim",
- "is_private",
- "is_archived",
- "is_general",
- "is_shared",
- "is_ext_shared",
- "is_org_shared",
- "creator",
- "created_at",
- "updated_at",
- ]
-
-
-class SlackMessagesTable(APIResource):
- """
- This is the table abstraction for interacting with messages via the Slack API.
- """
-
- def list(
- self, conditions: List[FilterCondition] = None, limit: int = None, sort: List[SortColumn] = None, **kwargs: Any
- ) -> pd.DataFrame:
- """
- Retrieves a list of messages from Slack conversations based on the specified conditions.
-
- `channel_id` is a required parameter to retrieve messages from a conversation.
-
- Messages are retrieved as follows for a given conversation:
- - If the provided limit is greater than 999, no limit to the API call is provided and the results are paginated until the limit is reached.
- - Otherwise, the provided limit or a default limit of 999 is used when making the API call.
-
- Therefore, if a user is to retrieve more than 999 messages, the limit should be set to a value greater than 999.
- The above is dependent on the other parameters provided in the conditions.
-
- The above is designed to prevent rate limiting by the Slack API.
-
- Args:
- conditions (List[FilterCondition]): The conditions to filter the messages.
- limit (int): The limit of the messages to return.
- sort (List[SortColumn]): The columns to sort the messages by.
- kwargs (Any): Arbitrary keyword arguments.
-
- Raises:
- ValueError:
- - If the 'channel_id' parameter is not provided.
- - If an unsupported operator is used for the column 'channel_id'.
- - If the channel ID provided is not found.
- SlackApiError: If an error occurs when getting the messages from the Slack API.
-
- Returns:
- pd.DataFrame: The list of messages.
- """
- client = self.handler.connect()
-
- # Build the parameters for the call to the Slack API.
- params = {}
- for condition in conditions:
- value = condition.value
- op = condition.op
-
- # Handle the column 'channel_id'.
- if condition.column == "channel_id":
- if op != FilterOperator.EQUAL:
- raise ValueError(f"Unsupported operator '{op}' for column 'channel_id'")
-
- # Check if the provided channel exists.
- try:
- channel = SlackConversationsTable(self.handler).get_channel(value)
- params["channel"] = value
- condition.applied = True
- except SlackApiError:
- raise ValueError(f"Channel '{value}' not found")
-
- # Handle the column 'created_at'.
- elif condition.column == "created_at" and value is not None:
- date = dt.datetime.fromisoformat(value).replace(tzinfo=dt.timezone.utc)
- if op == FilterOperator.GREATER_THAN:
- params["oldest"] = date.timestamp() + 1
- elif op == FilterOperator.GREATER_THAN_OR_EQUAL:
- params["oldest"] = date.timestamp()
- elif op == FilterOperator.LESS_THAN_OR_EQUAL:
- params["latest"] = date.timestamp()
- else:
- continue
- condition.applied = True
-
- if "channel" not in params:
- raise ValueError("To retrieve data from Slack, you need to provide the 'channel_id' parameter.")
-
- # Retrieve the messages from the Slack API.
- try:
- # If the limit is greater than 999, paginate the results until the limit is reached.
- if limit and limit > 999:
- params["limit"] = 999
- response = client.conversations_history(**params)
- messages = response["messages"]
-
- # Paginate the results until the limit is reached. response_metadata may be None.
- while response.get("response_metadata", {}).get("next_cursor"):
- response = client.conversations_history(
- cursor=response["response_metadata"]["next_cursor"], **params
- )
- messages.extend(response["messages"])
- if len(messages) >= limit:
- break
-
- messages = messages[:limit]
- # Otherwise, use the provided limit or a default limit of 999.
- else:
- params["limit"] = limit if limit else 999
- response = client.conversations_history(**params)
- messages = response["messages"]
- except SlackApiError as slack_error:
- logger.error(f"Error getting messages: {slack_error.response['error']}")
- raise
-
- result = pd.DataFrame(messages, columns=self.get_columns())
-
- result = result[result["text"].notnull()]
-
- # Add the channel ID and name to the result.
- result["channel_id"] = params["channel"]
- result["channel_name"] = channel["name"] if "name" in channel else None
-
- # Translate the time stamp into a 'created_at' field.
- result["created_at"] = pd.to_datetime(result["ts"].astype(float), unit="s").dt.strftime("%Y-%m-%d %H:%M:%S")
-
- # Sort the messages by the specified columns.
- if sort:
- result.sort_values(by=[col.column for col in sort], ascending=[col.ascending for col in sort], inplace=True)
-
- return result
-
- def insert(self, query: Insert):
- """
- Executes an INSERT SQL query represented by an ASTNode object and posts a message to a Slack channel.
-
- Args:
- query (Insert): An ASTNode object representing the SQL query to be executed.
-
- Raises:
- ValueError: If the 'channel_id' or 'text' parameters are not provided.
- SlackApiError: If an error occurs when posting the message to the Slack channel.
- """
- client = self.handler.connect()
-
- # Get column names and values from the query.
- columns = [col.name for col in query.columns]
- for row in query.values:
- params = dict(zip(columns, row))
-
- # Check if required parameters are provided.
- if "channel_id" not in params or "text" not in params:
- raise ValueError(
- "To insert data into Slack, you need to provide the 'channel_id' and 'text' parameters."
- )
-
- try:
- client.chat_postMessage(channel=params["channel_id"], text=params["text"])
- except SlackApiError as slack_error:
- logger.error(
- f"Error posting message to Slack channel '{params['channel_id']}': {slack_error.response['error']}"
- )
- raise
-
- def update(self, query: Update):
- """
- Executes an UPDATE SQL query represented by an ASTNode object and updates a message in a Slack channel.
-
- Args:
- query (Update): An ASTNode object representing the SQL query to be executed.
-
- Raises:
- ValueError:
- - If the 'channel_id', 'ts', or 'text' parameters are not provided.
- - If an unsupported operator is used for the columns.
- - If an unsupported column is used.
- - If the channel ID provided is not found.
- SlackApiError: If an error occurs when updating the message in the Slack channel.
- """
- client = self.handler.connect()
-
- conditions = extract_comparison_conditions(query.where)
-
- # Build the parameters for the call to the Slack API.
- params = {}
- # Extract the parameters from the conditions.
- for op, arg1, arg2 in conditions:
- # Handle the column 'channel_id'.
- if arg1 == "channel_id":
- # Check if the provided channel exists.
- try:
- SlackConversationsTable(self.handler).get_channel(arg2)
- params["channel"] = arg2
- except SlackApiError as slack_error:
- logger.error(f"Error getting channel '{arg2}': {slack_error.response['error']}")
- raise ValueError(f"Channel '{arg2}' not found")
-
- # Handle the column'ts'.
- elif arg1 == "ts":
- if op == "=":
- params[arg1] = str(arg2)
- else:
- raise ValueError(f"Unsupported operator '{op}' for column '{arg1}'")
-
- else:
- raise ValueError(f"Unsupported column '{arg1}'")
-
- # Extract the update columns and values.
- for col, val in query.update_columns.items():
- if col == "text":
- params[col] = str(val).strip("'")
- else:
- raise ValueError(f"Unsupported column '{col}'")
-
- # Check if required parameters are provided.
- if "channel" not in params or "ts" not in params or "text" not in params:
- raise ValueError(
- "To update a message in Slack, you need to provide the 'channel', 'ts', and 'text' parameters."
- )
-
- try:
- client.chat_update(channel=params["channel"], ts=params["ts"], text=params["text"].strip())
- except SlackApiError as slack_error:
- logger.error(
- f"Error updating message in Slack channel '{params['channel']}' with timestamp '{params['ts']}': {slack_error.response['error']}"
- )
- raise
-
- def delete(self, query: Delete):
- """
- Executes a DELETE SQL query represented by an ASTNode object and deletes a message from a Slack channel.
-
- Args:
- query (Delete): An ASTNode object representing the SQL query to be executed.
-
- Raises:
- ValueError:
- - If the 'channel_id' or 'ts' parameters are not provided.
- - If an unsupported operator is used for the columns.
- - If an unsupported column is used.
- - If the channel ID provided is not found.
- SlackApiError: If an error occurs when deleting the message from the Slack channel.
- """
- client = self.handler.connect()
-
- conditions = extract_comparison_conditions(query.where)
-
- # Build the parameters for the call to the Slack API.
- params = {}
- for op, arg1, arg2 in conditions:
- # Handle the column 'channel_id'.
- if arg1 == "channel_id":
- # Check if the provided channel exists.
- try:
- SlackConversationsTable(self.handler).get_channel(arg2)
- params["channel"] = arg2
- except SlackApiError as slack_error:
- logger.error(f"Error getting channel '{arg2}': {slack_error.response['error']}")
- raise ValueError(f"Channel '{arg2}' not found")
-
- # Handle the columns 'ts'.
- elif arg1 == "ts":
- if op == "=":
- params["ts"] = float(arg2)
- else:
- raise NotImplementedError(f"Unknown op: {op}")
-
- else:
- raise ValueError(f"Unsupported column '{arg1}'")
-
- # Check if required parameters are provided.
- if "channel" not in params or "ts" not in params:
- raise ValueError("To delete a message from Slack, you need to provide the 'channel' and 'ts' parameters.")
-
- try:
- client.chat_delete(channel=params["channel"], ts=params["ts"])
-
- except SlackApiError as slack_error:
- logger.error(
- f"Error deleting message in Slack channel '{params['channel']}' with timestamp '{params['ts']}': {slack_error.response['error']}"
- )
- raise
-
- def get_columns(self) -> List[Text]:
- """
- Retrieves the attributes (columns) of the Slack messages.
-
- Returns:
- List[str]: The list of columns.
- """
- return [
- "channel_id",
- "channel",
- "client_msg_id",
- "type",
- "subtype",
- "ts",
- "created_at",
- "user",
- "text",
- "attachments",
- "files",
- "reactions",
- "thread_ts",
- "reply_count",
- "reply_users_count",
- "latest_reply",
- "reply_users",
- ]
-
-
-class SlackThreadsTable(APIResource):
- """
- This is the table abstraction for interacting with threads in Slack conversations.
- """
-
- def list(
- self, conditions: List[FilterCondition] = None, limit: int = None, sort: List[SortColumn] = None, **kwargs: Any
- ) -> pd.DataFrame:
- """
- Retrieves a list of messages in a thread based on the specified conditions.
-
- `channel_id` and `thread_ts` are required parameters to retrieve messages from a thread.
-
- Messages are retrieved as follows for a given thread:
- - If the provided limit is greater than 1000, no limit to the API call is provided and the results are paginated until the limit is reached.
- - Otherwise, the provided limit or a default limit of 1000 is used when making the API call.
-
- Therefore, if a user is to retrieve more than 1000 messages, the limit should be set to a value greater than 1000.
-
- The above is designed to prevent rate limiting by the Slack API.
-
- Args:
- conditions (List[FilterCondition]): The conditions to filter the messages.
- limit (int): The limit of the messages to return.
- sort (List[SortColumn]): The columns to sort the messages by.
- kwargs (Any): Arbitrary keyword arguments.
-
- Raises:
- ValueError:
- - If the 'channel_id' or 'thread_ts' parameters are not provided.
- - If an unsupported operator is used for the columns.
- - If an unsupported column is used.
- - If the channel ID provided is not found.
- SlackApiError: If an error occurs when getting the messages from the Slack API.
-
- Returns:
- pd.DataFrame: The messages in the thread.
- """
- client = self.handler.connect()
-
- # Build the parameters for the call to the Slack API.
- params = {}
- for condition in conditions:
- value = condition.value
- op = condition.op
-
- # Handle the column 'channel_id'.
- if condition.column == "channel_id":
- if op != FilterOperator.EQUAL:
- raise ValueError(f"Unsupported operator '{op}' for column 'channel_id'")
-
- # Check if the provided channel exists.
- try:
- channel = SlackConversationsTable(self.handler).get_channel(value)
- params["channel"] = value
- condition.applied = True
- except SlackApiError as slack_error:
- logger.error(f"Error getting channel '{value}': {slack_error.response['error']}")
- raise ValueError(f"Channel '{value}' not found")
-
- # Handle the column 'thread_ts'.
- elif condition.column == "thread_ts":
- if op != FilterOperator.EQUAL:
- raise ValueError(f"Unsupported operator '{op}' for column 'thread_ts'")
-
- params["ts"] = value
-
- if "channel" not in params or "ts" not in params:
- raise ValueError(
- "To retrieve data from Slack, you need to provide the 'channel_id' and 'thread_ts' parameters."
- )
-
- # Retrieve the messages from the Slack API.
- try:
- # If the limit is greater than 1000, paginate the results until the limit is reached.
- if limit and limit > 1000:
- response = client.conversations_replies(**params)
- messages = response["messages"]
-
- # Paginate the results until the limit is reached.
- while response["response_metadata"]["next_cursor"]:
- response = client.conversations_replies(cursor=response["response_metadata"]["next_cursor"])
- messages.extend(response["messages"])
- if len(messages) >= limit:
- break
-
- messages = messages[:limit]
- # Otherwise, use the provided limit or a default limit of 1000.
- else:
- params["limit"] = limit if limit else 1000
- response = client.conversations_replies(**params)
- messages = response["messages"]
- except SlackApiError as slack_error:
- logger.error(f"Error getting messages: {slack_error.response['error']}")
- raise
-
- result = pd.DataFrame(messages, columns=self.get_columns())
-
- result = result[result["text"].notnull()]
-
- # Add the channel ID and name to the result.
- result["channel_id"] = params["channel"]
- result["channel_name"] = channel["name"] if "name" in channel else None
-
- # Sort the messages by the specified columns.
- if sort:
- result.sort_values(by=[col.column for col in sort], ascending=[col.ascending for col in sort], inplace=True)
-
- return result
-
- def insert(self, query: Insert):
- """
- Executes an INSERT SQL query represented by an ASTNode object and posts a message to a Slack thread.
-
- Args:
- query (Insert): An ASTNode object representing the SQL query to be executed.
-
- Raises:
- ValueError: If the 'channel_id', 'text', or 'thread_ts' parameters are not provided.
- """
- client = self.handler.connect()
-
- # Get column names and values from the query.
- columns = [col.name for col in query.columns]
- for row in query.values:
- params = dict(zip(columns, row))
-
- # Check if required parameters are provided.
- if "channel_id" not in params or "text" not in params or "thread_ts" not in params:
- raise ValueError(
- "To insert data into Slack, you need to provide the 'channel_id', 'text', and 'thread_ts' parameters."
- )
-
- try:
- client.chat_postMessage(
- channel=params["channel_id"], text=params["text"], thread_ts=params["thread_ts"]
- )
- except SlackApiError as slack_error:
- logger.error(
- f"Error posting message to Slack channel '{params['channel_id']}': {slack_error.response['error']}"
- )
- raise
-
- def get_columns(self) -> List[Text]:
- """
- Retrieves the attributes (columns) of the Slack threads.
-
- Returns:
- List[Text]: The list of columns.
- """
- return [
- "channel_id",
- "channel_name",
- "type",
- "user",
- "text",
- "ts",
- "client_msg_id",
- "thread_ts",
- "parent_user_id",
- "reply_count",
- "reply_users_count",
- "latest_reply",
- "reply_users",
- ]
-
-
-class SlackUsersTable(APIResource):
- """
- This is the table abstraction for interacting with users in Slack.
- """
-
- def list(self, conditions: List[FilterCondition] = None, limit: int = None, **kwargs: Any) -> pd.DataFrame:
- """
- Retrieves a list of users based on the specified conditions.
- Users are retrieved as follows:
- - If the provided limit is greater than 1000, no limit to the API call is provided and the results are paginated until the limit is reached.
- - Otherwise, the provided limit or a default limit of 1000 is used when making the API call.
-
- Therefore, if a user is to retrieve more than 1000 users, the limit should be set to a value greater than 1000.
-
- The above is designed to prevent rate limiting by the Slack API.
-
- Args:
- conditions (List[FilterCondition]): The conditions to filter the users.
- limit (int): The limit of the users to return.
- kwargs (Any): Arbitrary keyword arguments.
-
- Raises:
- SlackApiError: If an error occurs when getting the users from the Slack API.
- """
- client = self.handler.connect()
-
- # Retrieve the users from the Slack API.
- try:
- # If the limit is greater than 1000, paginate the results until the limit is reached.
- if limit and limit > 1000:
- response = client.users_list()
- users = response["members"]
-
- # Paginate the results until the limit is reached.
- while response["response_metadata"]["next_cursor"]:
- response = client.users_list(cursor=response["response_metadata"]["next_cursor"])
- users.extend(response["members"])
- if len(users) >= limit:
- break
-
- users = users[:limit]
- # Otherwise, use the provided limit or a default limit of 1000.
- else:
- response = client.users_list(limit=limit if limit else 1000)
- users = response["members"]
- except SlackApiError as slack_error:
- logger.error(f"Error getting users: {slack_error.response['error']}")
- raise
-
- return pd.DataFrame(users, columns=self.get_columns())
-
- def get_columns(self) -> List[Text]:
- """
- Retrieves the attributes (columns) of the Slack users.
-
- Returns:
- List[Text]: The list of columns.
- """
- return ["id", "name", "real_name"]
diff --git a/mindsdb/integrations/handlers/snowflake_handler/requirements.txt b/mindsdb/integrations/handlers/snowflake_handler/requirements.txt
index 706f9cd675f..b267c6e302d 100644
--- a/mindsdb/integrations/handlers/snowflake_handler/requirements.txt
+++ b/mindsdb/integrations/handlers/snowflake_handler/requirements.txt
@@ -1,2 +1,2 @@
-snowflake-connector-python[pandas]==3.15.0
-snowflake-sqlalchemy==1.7.0
+snowflake-connector-python[pandas]==4.4.0
+snowflake-sqlalchemy==1.9.0
diff --git a/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py b/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py
index 91e20c74e50..04898c3df63 100644
--- a/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py
+++ b/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py
@@ -1,24 +1,28 @@
-import psutil
+from typing import Any, Optional, List, Generator
+
import pandas
from pandas import DataFrame
from pandas.api import types as pd_types
from snowflake.sqlalchemy import snowdialect
from snowflake import connector
from snowflake.connector.errors import NotSupportedError
-from snowflake.connector.cursor import SnowflakeCursor, ResultMetadata
-from typing import Any, Optional, List
+from snowflake.connector.cursor import ResultMetadata
from mindsdb_sql_parser.ast.base import ASTNode
from mindsdb_sql_parser.ast import Select, Identifier
-from mindsdb.utilities import log
from mindsdb.integrations.libs.base import MetaDatabaseHandler
+from mindsdb.utilities import log
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
+from mindsdb.utilities.types.column import Column
from mindsdb.integrations.libs.response import (
HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE,
+ TableResponse,
+ OkResponse,
+ ErrorResponse,
+ DataHandlerResponse,
)
+
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
from .auth_types import (
@@ -50,9 +54,9 @@ def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE:
types_map = {
("NUMBER", "DECIMAL", "DEC", "NUMERIC"): MYSQL_DATA_TYPE.DECIMAL,
("INT , INTEGER , BIGINT , SMALLINT , TINYINT , BYTEINT"): MYSQL_DATA_TYPE.INT,
- ("FLOAT", "FLOAT4", "FLOAT8"): MYSQL_DATA_TYPE.FLOAT,
+ ("FLOAT", "FLOAT4", "FLOAT8", "FIXED"): MYSQL_DATA_TYPE.FLOAT,
("DOUBLE", "DOUBLE PRECISION", "REAL"): MYSQL_DATA_TYPE.DOUBLE,
- ("VARCHAR"): MYSQL_DATA_TYPE.VARCHAR,
+ ("VARCHAR",): MYSQL_DATA_TYPE.VARCHAR,
("CHAR", "CHARACTER", "NCHAR"): MYSQL_DATA_TYPE.CHAR,
("STRING", "TEXT", "NVARCHAR"): MYSQL_DATA_TYPE.TEXT,
("NVARCHAR2", "CHAR VARYING", "NCHAR VARYING"): MYSQL_DATA_TYPE.VARCHAR,
@@ -61,9 +65,11 @@ def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE:
("TIMESTAMP_NTZ", "DATETIME"): MYSQL_DATA_TYPE.DATETIME,
("DATE",): MYSQL_DATA_TYPE.DATE,
("TIME",): MYSQL_DATA_TYPE.TIME,
- ("TIMESTAMP_LTZ"): MYSQL_DATA_TYPE.DATETIME,
- ("TIMESTAMP_TZ"): MYSQL_DATA_TYPE.DATETIME,
- ("VARIANT", "OBJECT", "ARRAY", "MAP", "GEOGRAPHY", "GEOMETRY", "VECTOR"): MYSQL_DATA_TYPE.VARCHAR,
+ ("TIMESTAMP_LTZ",): MYSQL_DATA_TYPE.DATETIME,
+ ("TIMESTAMP_TZ",): MYSQL_DATA_TYPE.DATETIME,
+ ("OBJECT", "ARRAY"): MYSQL_DATA_TYPE.JSON,
+ ("VECTOR",): MYSQL_DATA_TYPE.VECTOR,
+ ("VARIANT", "MAP", "GEOGRAPHY", "GEOMETRY", "VECTOR"): MYSQL_DATA_TYPE.VARCHAR,
}
for db_types_list, mysql_data_type in types_map.items():
@@ -74,100 +80,85 @@ def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE:
return MYSQL_DATA_TYPE.VARCHAR
-def _make_table_response(result: DataFrame, cursor: SnowflakeCursor) -> Response:
- """Build response from result and cursor.
- NOTE: Snowflake return only 'general' type in description, so look on result's
- DF types and use types from description only if DF type is 'object'
+def _get_columns(description: list[ResultMetadata], sample: pandas.DataFrame = None) -> list[Column]:
+ """Get columns from Snowflake cursor description.
Args:
- result (DataFrame): result of the query.
- cursor (SnowflakeCursor): cursor object.
+ description (list[ResultMetadata]): cursor description metadata.
+ sample (pandas.DataFrame): data sample
Returns:
- Response: response object.
+ list[Column]: list of columns with mapped MySQL types.
"""
- description: list[ResultMetadata] = cursor.description
- mysql_types: list[MYSQL_DATA_TYPE] = []
+ result = []
for column in description:
- column_dtype = result[column.name].dtype
- description_column_type = connector.constants.FIELD_ID_TO_NAME.get(column.type_code)
- if description_column_type in ("OBJECT", "ARRAY"):
- mysql_types.append(MYSQL_DATA_TYPE.JSON)
- continue
- if description_column_type == "VECTOR":
- mysql_types.append(MYSQL_DATA_TYPE.VECTOR)
- continue
- if pd_types.is_integer_dtype(column_dtype):
- column_dtype_name = column_dtype.name
- if column_dtype_name in ("int8", "Int8"):
- mysql_types.append(MYSQL_DATA_TYPE.TINYINT)
- elif column_dtype in ("int16", "Int16"):
- mysql_types.append(MYSQL_DATA_TYPE.SMALLINT)
- elif column_dtype in ("int32", "Int32"):
- mysql_types.append(MYSQL_DATA_TYPE.MEDIUMINT)
- elif column_dtype in ("int64", "Int64"):
- mysql_types.append(MYSQL_DATA_TYPE.BIGINT)
- else:
- mysql_types.append(MYSQL_DATA_TYPE.INT)
- continue
- if pd_types.is_float_dtype(column_dtype):
- column_dtype_name = column_dtype.name
- if column_dtype_name in ("float16", "Float16"): # Float16 does not exists so far
- mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
- elif column_dtype_name in ("float32", "Float32"):
- mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
- elif column_dtype_name in ("float64", "Float64"):
- mysql_types.append(MYSQL_DATA_TYPE.DOUBLE)
- else:
- mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
- continue
- if pd_types.is_bool_dtype(column_dtype):
- mysql_types.append(MYSQL_DATA_TYPE.BOOLEAN)
- continue
- if pd_types.is_datetime64_any_dtype(column_dtype):
- mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
- series = result[column.name]
- # snowflake use pytz.timezone
- if series.dt.tz is not None and getattr(series.dt.tz, "zone", "UTC") != "UTC":
- series = series.dt.tz_convert("UTC")
- result[column.name] = series.dt.tz_localize(None)
- continue
-
- if pd_types.is_object_dtype(column_dtype):
- if description_column_type == "TEXT":
- # we can also check column.internal_size, if == 16777216 then it is TEXT, else VARCHAR(internal_size)
- mysql_types.append(MYSQL_DATA_TYPE.TEXT)
- continue
- elif description_column_type == "BINARY":
- # if column.internal_size == 8388608 then BINARY, else VARBINARY(internal_size)
- mysql_types.append(MYSQL_DATA_TYPE.BINARY)
- continue
- elif description_column_type == "DATE":
- mysql_types.append(MYSQL_DATA_TYPE.DATE)
- continue
- elif description_column_type == "TIME":
- mysql_types.append(MYSQL_DATA_TYPE.TIME)
- continue
-
- if description_column_type == "FIXED":
- if column.scale == 0:
- mysql_types.append(MYSQL_DATA_TYPE.INT)
- else:
- # It is NUMBER, DECIMAL or NUMERIC with scale > 0
- mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
- continue
- elif description_column_type == "REAL":
- mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
- continue
-
- mysql_types.append(MYSQL_DATA_TYPE.TEXT)
-
- df = DataFrame(
- result,
- columns=[column.name for column in description],
- )
-
- return Response(RESPONSE_TYPE.TABLE, data_frame=df, affected_rows=None, mysql_types=mysql_types)
+ mysql_type = None
+ sf_type_name = connector.constants.FIELD_ID_TO_NAME.get(column.type_code)
+ if sf_type_name is None:
+ logger.warning(f"Snowflake handler: unknown type code: {column.type_code}")
+ mysql_type = MYSQL_DATA_TYPE.VARCHAR
+
+ if sample is not None:
+ column_dtype = sample[column.name].dtype
+
+ if pd_types.is_integer_dtype(column_dtype):
+ column_dtype_name = column_dtype.name
+ if column_dtype_name in ("int8", "Int8"):
+ mysql_type = MYSQL_DATA_TYPE.TINYINT
+ elif column_dtype in ("int16", "Int16"):
+ mysql_type = MYSQL_DATA_TYPE.SMALLINT
+ elif column_dtype in ("int32", "Int32"):
+ mysql_type = MYSQL_DATA_TYPE.MEDIUMINT
+ elif column_dtype in ("int64", "Int64"):
+ mysql_type = MYSQL_DATA_TYPE.BIGINT
+ else:
+ mysql_type = MYSQL_DATA_TYPE.INT
+
+ elif pd_types.is_float_dtype(column_dtype):
+ column_dtype_name = column_dtype.name
+ if column_dtype_name in ("float16", "Float16"): # Float16 does not exists so far
+ mysql_type = MYSQL_DATA_TYPE.FLOAT
+ elif column_dtype_name in ("float32", "Float32"):
+ mysql_type = MYSQL_DATA_TYPE.FLOAT
+ elif column_dtype_name in ("float64", "Float64"):
+ mysql_type = MYSQL_DATA_TYPE.DOUBLE
+ else:
+ mysql_type = MYSQL_DATA_TYPE.FLOAT
+
+ elif pd_types.is_bool_dtype(column_dtype):
+ mysql_type = MYSQL_DATA_TYPE.BOOLEAN
+
+ elif pd_types.is_datetime64_any_dtype(column_dtype):
+ mysql_type = MYSQL_DATA_TYPE.DATETIME
+ series = sample[column.name]
+ # snowflake use pytz.timezone
+ if series.dt.tz is not None and getattr(series.dt.tz, "zone", "UTC") != "UTC":
+ series = series.dt.tz_convert("UTC")
+ sample[column.name] = series.dt.tz_localize(None)
+
+ elif pd_types.is_object_dtype(column_dtype):
+ if sf_type_name == "TEXT":
+ # we can also check column.internal_size, if == 16777216 then it is TEXT, else VARCHAR(internal_size)
+ mysql_type = MYSQL_DATA_TYPE.TEXT
+ elif sf_type_name == "BINARY":
+ # if column.internal_size == 8388608 then BINARY, else VARBINARY(internal_size)
+ mysql_type = MYSQL_DATA_TYPE.BINARY
+ elif sf_type_name == "DATE":
+ mysql_type = MYSQL_DATA_TYPE.DATE
+ elif sf_type_name == "TIME":
+ mysql_type = MYSQL_DATA_TYPE.TIME
+ elif sf_type_name == "FIXED":
+ if getattr(column, "scale", None) == 0:
+ mysql_type = MYSQL_DATA_TYPE.INT
+ else:
+ # It is NUMBER, DECIMAL or NUMERIC with scale > 0
+ mysql_type = MYSQL_DATA_TYPE.FLOAT
+
+ if mysql_type is None:
+ mysql_type = _map_type(sf_type_name)
+
+ result.append(Column(name=column.name, type=mysql_type, original_type=sf_type_name))
+ return result
class SnowflakeHandler(MetaDatabaseHandler):
@@ -176,6 +167,7 @@ class SnowflakeHandler(MetaDatabaseHandler):
"""
name = "snowflake"
+ stream_response = True
_auth_types = {
"key_pair": KeyPairAuthType(),
@@ -269,92 +261,84 @@ def check_connection(self) -> StatusResponse:
return response
- def native_query(self, query: str) -> Response:
- """
- Executes a SQL query on the Snowflake account and returns the result.
+ def native_query(self, query: str, stream: bool = True, **kwargs) -> TableResponse | OkResponse | ErrorResponse:
+ """Executes a SQL query on the Snowflake account and returns the result.
Args:
query (str): The SQL query to be executed.
+ stream (bool): If True - return TableResponse with generator inside.
Returns:
- Response: A response object containing the result of the query or an error message.
+ DataHandlerResponse: A response object containing the result of the query or an error message.
"""
+ generator = self._execute_fetch_batches(query)
+ try:
+ response: TableResponse = next(generator)
+ response.data_generator = generator
+ if stream is False:
+ response.fetchall()
+ except StopIteration as e:
+ response = e.value
+ if isinstance(response, DataHandlerResponse) is False:
+ raise
+
+ return response
+
+ def _execute_fetch_batches(
+ self, query: str
+ ) -> Generator[TableResponse | pandas.DataFrame, None, OkResponse | ErrorResponse]:
+ """Execute a SQL query and yield results in batches.
- need_to_close = self.is_connected is False
+ Args:
+ query (str): The SQL query to execute.
+
+ Yields:
+ TableResponse: First yield β response with column metadata and affected row count.
+ pandas.DataFrame: Subsequent yields β batches of query results.
+ Returns:
+ OkResponse: For DML statements (INSERT/DELETE/UPDATE) with affected row count.
+ ErrorResponse: If an exception occurs during query execution.
+ """
connection = self.connect()
- with connection.cursor(connector.DictCursor) as cur:
+ with connection.cursor(connector.DictCursor) as cursor:
try:
- cur.execute(query)
+ cursor.execute(query)
try:
try:
- batches_iter = cur.fetch_pandas_batches()
+ batches_iter = cursor.fetch_pandas_batches()
except ValueError:
# duplicated columns raises ValueError
raise NotSupportedError()
-
- batches = []
- memory_estimation_check_done = False
- batches_rowcount = 0
- total_rowcount = cur.rowcount or 0
+ try:
+ sample_df = next(batches_iter)
+ except StopIteration:
+ sample_df = None
+ columns = _get_columns(cursor.description, sample=sample_df)
+ yield TableResponse(data=sample_df, affected_rows=cursor.rowcount, columns=columns)
for batch_df in batches_iter:
- batches.append(batch_df)
- # region check the size of first batch (if it is big enough) to get an estimate of the full
- # dataset size. If it does not fit in memory - raise an error.
- # NOTE batch size cannot be set on client side. Also, Snowflake will download
- # 'CLIENT_PREFETCH_THREADS' count of chunks in parallel (by default 4), therefore this check
- # can not work in some cases.
- batches_rowcount += len(batch_df)
- if memory_estimation_check_done is False and batches_rowcount > 1000:
- memory_estimation_check_done = True
- available_memory_kb = psutil.virtual_memory().available >> 10
- batches_size_kb = sum(
- [(x.memory_usage(index=True, deep=True).sum() >> 10) for x in batches]
- )
- rest_rowcount = total_rowcount - batches_rowcount
- rest_estimated_size_kb = int((rest_rowcount / batches_rowcount) * batches_size_kb)
- # for pd.concat required at least x2 memory
- max_allowed_memory_kb = available_memory_kb / 2.4
- if max_allowed_memory_kb < rest_estimated_size_kb:
- error_message = (
- "The query result is too large to fit into available memory. "
- f"The dataset contains {total_rowcount} rows with an estimated size "
- f"of {rest_estimated_size_kb} KB, but only {max_allowed_memory_kb:.0f} KB "
- f"of memory is allowed fot the dataset. Please narrow down the query by adding filters "
- f"or a LIMIT clause to reduce the result set size."
- )
- logger.error(error_message)
- raise MemoryError(error_message)
- # endregion
- if len(batches) > 0:
- response = _make_table_response(result=pandas.concat(batches, ignore_index=True), cursor=cur)
- else:
- response = Response(RESPONSE_TYPE.TABLE, DataFrame([], columns=[x[0] for x in cur.description]))
+ yield batch_df
except NotSupportedError:
# Fallback for CREATE/DELETE/UPDATE. These commands returns table with single column,
# but it cannot be retrieved as pandas DataFrame.
- result = cur.fetchall()
+ result = cursor.fetchall()
match result:
case (
[{"number of rows inserted": affected_rows}]
| [{"number of rows deleted": affected_rows}]
| [{"number of rows updated": affected_rows, "number of multi-joined rows updated": _}]
):
- response = Response(RESPONSE_TYPE.OK, affected_rows=affected_rows)
+ response = OkResponse(affected_rows=affected_rows)
case list():
- response = Response(
- RESPONSE_TYPE.TABLE, DataFrame(result, columns=[x[0] for x in cur.description])
- )
+ response = TableResponse(data=DataFrame(result, columns=[x[0] for x in cursor.description]))
case _:
# Looks like SnowFlake always returns something in response, so this is suspicious
logger.warning("Snowflake did not return any data in response.")
- response = Response(RESPONSE_TYPE.OK)
+ response = OkResponse()
+ return response
except Exception as e:
logger.error(f"Error running query: {query} on {self.connection_data.get('database')}, {e}!")
- response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
-
- if need_to_close is True:
- self.disconnect()
+ return ErrorResponse(error_code=0, error_message=str(e))
if memory_pool is not None and memory_pool.backend_name == "jemalloc":
# This reduce memory consumption, but will slow down next query slightly.
@@ -362,9 +346,7 @@ def native_query(self, query: str) -> Response:
# and next query processing time may be even lower.
memory_pool.release_unused()
- return response
-
- def query(self, query: ASTNode) -> Response:
+ def query(self, query: ASTNode) -> DataHandlerResponse:
"""
Executes a SQL query represented by an ASTNode and retrieves the data.
@@ -372,7 +354,7 @@ def query(self, query: ASTNode) -> Response:
query (ASTNode): An ASTNode representing the SQL query to be executed.
Returns:
- Response: The response from the `native_query` method, containing the result of the SQL query execution.
+ DataHandlerResponse: The response from the `native_query` method, containing the result of the SQL query execution.
"""
query_str = self.renderer.get_string(query, with_failback=True)
@@ -381,7 +363,7 @@ def query(self, query: ASTNode) -> Response:
return self.lowercase_columns(result, query)
def lowercase_columns(self, result, query):
- if not isinstance(query, Select) or result.data_frame is None:
+ if not isinstance(query, Select) or not isinstance(result, TableResponse):
return result
quoted_columns = []
@@ -394,20 +376,19 @@ def lowercase_columns(self, result, query):
if column.is_quoted[-1]:
quoted_columns.append(column.parts[-1])
- rename_columns = {}
- for col in result.data_frame.columns:
- if col.isupper() and col not in quoted_columns:
- rename_columns[col] = col.lower()
- if rename_columns:
- result.data_frame = result.data_frame.rename(columns=rename_columns)
+ for col in result.columns:
+ col_name = col.alias or col.name
+ if col_name.isupper() and col_name not in quoted_columns:
+ col.alias = col_name.lower()
+
return result
- def get_tables(self) -> Response:
+ def get_tables(self) -> DataHandlerResponse:
"""
Retrieves a list of all non-system tables and views in the current schema of the Snowflake account.
Returns:
- Response: A response object containing the list of tables and views, formatted as per the `Response` class.
+ DataHandlerResponse: A response object containing the list of tables and views.
"""
query = """
@@ -418,7 +399,7 @@ def get_tables(self) -> Response:
"""
return self.native_query(query)
- def get_columns(self, table_name) -> Response:
+ def get_columns(self, table_name) -> DataHandlerResponse:
"""
Retrieves column details for a specified table in the Snowflake account.
@@ -426,7 +407,7 @@ def get_columns(self, table_name) -> Response:
table_name (str): The name of the table for which to retrieve column information.
Returns:
- Response: A response object containing the column details, formatted as per the `Response` class.
+ DataHandlerResponse: A response object containing the column details.
Raises:
ValueError: If the 'table_name' is not a valid string.
@@ -458,7 +439,7 @@ def get_columns(self, table_name) -> Response:
return result
- def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response:
+ def meta_get_tables(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse:
"""
Retrieves metadata information about the tables in the Snowflake database to be stored in the data catalog.
@@ -466,7 +447,7 @@ def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response:
table_names (list): A list of table names for which to retrieve metadata information.
Returns:
- Response: A response object containing the metadata information, formatted as per the `Response` class.
+ DataHandlerResponse: A response object containing the metadata information.
"""
query = """
SELECT
@@ -493,7 +474,7 @@ def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response:
result.data_frame["ROW_COUNT"] = result.data_frame["ROW_COUNT"].astype("Int64")
return result
- def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response:
+ def meta_get_columns(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse:
"""
Retrieves column metadata for the specified tables (or all tables if no list is provided).
@@ -501,7 +482,7 @@ def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response:
table_names (list): A list of table names for which to retrieve column metadata.
Returns:
- Response: A response object containing the column metadata.
+ DataHandlerResponse: A response object containing the column metadata.
"""
query = """
SELECT
@@ -529,7 +510,7 @@ def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response:
result = self.native_query(query)
return result
- def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> Response:
+ def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse:
"""
Retrieves basic column statistics: null %, distinct count.
Due to Snowflake limitations, this runs per-table not per-column.
@@ -546,11 +527,11 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) ->
columns_result = self.native_query(columns_query)
if (
- columns_result.type == RESPONSE_TYPE.ERROR
+ isinstance(columns_result, ErrorResponse)
or columns_result.data_frame is None
or columns_result.data_frame.empty
):
- return Response(RESPONSE_TYPE.ERROR, error_message="No columns found.")
+ return ErrorResponse(error_message="No columns found.")
columns_df = columns_result.data_frame
grouped = columns_df.groupby(["TABLE_SCHEMA", "TABLE_NAME"])
@@ -585,9 +566,13 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) ->
"""
try:
stats_res = self.native_query(stats_query)
- if stats_res.type != RESPONSE_TYPE.TABLE or stats_res.data_frame is None or stats_res.data_frame.empty:
+ if (
+ not isinstance(stats_res, TableResponse)
+ or stats_res.data_frame is None
+ or stats_res.data_frame.empty
+ ):
logger.warning(
- f"Could not retrieve stats for table {table_name}. Query returned no data or an error: {stats_res.error_message if stats_res.type == RESPONSE_TYPE.ERROR else 'No data'}"
+ f"Could not retrieve stats for table {table_name}. Query returned no data or an error: {stats_res.error_message if isinstance(stats_res, ErrorResponse) else 'No data'}"
)
# Add placeholder stats if query fails or returns empty
for _, row in group.iterrows():
@@ -646,11 +631,11 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) ->
)
if not all_stats:
- return Response(RESPONSE_TYPE.TABLE, data_frame=pandas.DataFrame())
+ return TableResponse(data=pandas.DataFrame())
- return Response(RESPONSE_TYPE.TABLE, data_frame=pandas.DataFrame(all_stats))
+ return TableResponse(data=pandas.DataFrame(all_stats))
- def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Response:
+ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse:
"""
Retrieves primary key information for the specified tables (or all tables if no list is provided).
@@ -658,7 +643,7 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Resp
table_names (list): A list of table names for which to retrieve primary key information.
Returns:
- Response: A response object containing the primary key information.
+ DataHandlerResponse: A response object containing the primary key information.
"""
try:
query = """
@@ -666,7 +651,7 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Resp
"""
response = self.native_query(query)
- if response.type == RESPONSE_TYPE.ERROR and response.error_message:
+ if isinstance(response, ErrorResponse):
logger.error(f"Query error in meta_get_primary_keys: {response.error_message}\nQuery:\n{query}")
df = response.data_frame
@@ -683,9 +668,9 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Resp
except Exception as e:
logger.error(f"Exception in meta_get_primary_keys: {e!r}")
- return Response(RESPONSE_TYPE.ERROR, error_message=f"Exception querying primary keys: {e!r}")
+ return ErrorResponse(error_message=f"Exception querying primary keys: {e!r}")
- def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Response:
+ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse:
"""
Retrieves foreign key information for the specified tables (or all tables if no list is provided).
@@ -693,7 +678,7 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp
table_names (list): A list of table names for which to retrieve foreign key information.
Returns:
- Response: A response object containing the foreign key information.
+ DataHandlerResponse: A response object containing the foreign key information.
"""
try:
query = """
@@ -701,7 +686,7 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp
"""
response = self.native_query(query)
- if response.type == RESPONSE_TYPE.ERROR and response.error_message:
+ if isinstance(response, ErrorResponse):
logger.error(f"Query error in meta_get_primary_keys: {response.error_message}\nQuery:\n{query}")
df = response.data_frame
@@ -712,10 +697,10 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp
df = df[["pk_table_name", "pk_column_name", "fk_table_name", "fk_column_name"]]
df = df.rename(
columns={
- "pk_table_name": "child_table_name",
- "pk_column_name": "child_column_name",
- "fk_table_name": "parent_table_name",
- "fk_column_name": "parent_column_name",
+ "pk_table_name": "parent_table_name",
+ "pk_column_name": "parent_column_name",
+ "fk_table_name": "child_table_name",
+ "fk_column_name": "child_column_name",
}
)
@@ -725,7 +710,7 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp
except Exception as e:
logger.error(f"Exception in meta_get_primary_keys: {e!r}")
- return Response(RESPONSE_TYPE.ERROR, error_message=f"Exception querying primary keys: {e!r}")
+ return ErrorResponse(error_message=f"Exception querying primary keys: {e!r}")
def meta_get_handler_info(self, **kwargs: Any) -> str:
"""
diff --git a/mindsdb/integrations/handlers/solace_handler/README.md b/mindsdb/integrations/handlers/solace_handler/README.md
deleted file mode 100644
index 568c6fc562b..00000000000
--- a/mindsdb/integrations/handlers/solace_handler/README.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# Solace Handler
-
-## Run solace locally (optional):
-
-Reference: https://solace.com/products/event-broker/software/getting-started/
-
-1. Run solace app in docker
-Here you can choose different user/password (instead of admin)
-```bash
-docker run -d -p 8080:8080 -p 55555:55555 -p 8008:8008 -p 1883:1883 -p 8000:8000 -p 5672:5672 -p 9000:9000 -p 2222:2222 --shm-size=2g --env username_admin_globalaccesslevel=admin --env username_admin_password=admin --name=solace solace/solace-pubsub-standard
-```
-
-2. Solace admin panel will be run on: http://localhost:8080/
-
-3. Test Publisher/Subscriber using JS client in "3. Run Samples" on [get started](https://solace.com/products/event-broker/software/getting-started/) page
-
-
-## Client description
-
-Pip module: https://pypi.org/project/solace-pubsubplus/
-
-Client api reference: https://docs.solace.com/API-Developer-Online-Ref-Documentation/python/source/rst/solace.messaging.html
-
-Client usage samples: https://github.com/SolaceSamples/solace-samples-python/tree/main
-
-
-## Connect to the Mindsdb to Solace
-
-Command parameters:
-```sql
-CREATE DATABASE my_solace
-WITH
- ENGINE = "solace"
- PARAMETERS = {
- "host": ,
- "username": ,
- "password": ,
- "vpn-name": -- optional, default is "default"
- };
-```
-
-Example:
-```sql
-CREATE DATABASE my_solace
-WITH
- ENGINE = "solace"
- PARAMETERS = {
- "host": "localhost:55555",
- "username": "admin",
- "password": "admin"
- };
-```
-
-## Usage
-
-Solace is event broker. You can send event or subscribe on event. But can't read past events
-
-### Publish event:
-
-Table name is topic name for publishing.
-```sql
-INSERT INTO my_solace. (, ...)
-VALUES (, ...)
-```
-
-If topic contents slash then it can be replaced by '.' similar to access sql objects
-For example to send event to topic `solace/test/my` with dict {'id': 1, 'name': 'me'} use
-```sql
-INSERT INTO my_solace.solace.test.my ('id', 'name', 'type')
-VALUES (1, 'me')
-```
-
-
-### Subscribe on event:
-
-It is possible to subscribe to incoming events using triggers.
-In this example events with topic `solace/test` will be inserted to my_pg.log table:
-```sql
-create trigger my_trigger
-on my_solace.solace.test
-(
- insert into my_pg.log
- select * from TABLE_DELTA
-)
-```
diff --git a/mindsdb/integrations/handlers/solace_handler/__about__.py b/mindsdb/integrations/handlers/solace_handler/__about__.py
deleted file mode 100644
index 218984459dd..00000000000
--- a/mindsdb/integrations/handlers/solace_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Solace handler'
-__package_name__ = 'mindsdb_solace_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for the Solace event broker"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2023 - mindsdb'
diff --git a/mindsdb/integrations/handlers/solace_handler/__init__.py b/mindsdb/integrations/handlers/solace_handler/__init__.py
deleted file mode 100644
index 697ad937451..00000000000
--- a/mindsdb/integrations/handlers/solace_handler/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .solace_handler import SolaceHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Solace'
-name = 'solace'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/solace_handler/icon.svg b/mindsdb/integrations/handlers/solace_handler/icon.svg
deleted file mode 100644
index f6a3f33ca8f..00000000000
--- a/mindsdb/integrations/handlers/solace_handler/icon.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/solace_handler/requirements.txt b/mindsdb/integrations/handlers/solace_handler/requirements.txt
deleted file mode 100644
index 0868c064846..00000000000
--- a/mindsdb/integrations/handlers/solace_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-solace-pubsubplus
diff --git a/mindsdb/integrations/handlers/solace_handler/solace_handler.py b/mindsdb/integrations/handlers/solace_handler/solace_handler.py
deleted file mode 100644
index c7206fb1f67..00000000000
--- a/mindsdb/integrations/handlers/solace_handler/solace_handler.py
+++ /dev/null
@@ -1,169 +0,0 @@
-import pandas as pd
-
-from solace.messaging.messaging_service import MessagingService, RetryStrategy
-from solace.messaging.resources.topic_subscription import TopicSubscription
-from solace.messaging.receiver.message_receiver import MessageHandler
-from solace.messaging.resources.topic import Topic
-from solace.messaging.receiver.inbound_message import InboundMessage
-
-from mindsdb_sql_parser import parse_sql
-from mindsdb_sql_parser import ast
-
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
-)
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class SolaceHandler(DatabaseHandler):
-
- def __init__(self, name: str = None, **kwargs):
- """Registers all API tables and prepares the handler for an API connection.
-
- Args:
- name: (str): The handler name to use
- """
- super().__init__(name)
- args = kwargs.get('connection_data', {})
-
- if 'host' not in args:
- raise ValueError('Host parameter is required')
-
- self.connection_args = args
- self.messaging_service = None
- self.is_connected = False
-
- def connect(self):
-
- broker_props = {
- "solace.messaging.transport.host": self.connection_args['host'],
- "solace.messaging.service.vpn-name": self.connection_args.get('vpn-name', 'default'),
- "solace.messaging.authentication.scheme.basic.username": self.connection_args.get('username'),
- "solace.messaging.authentication.scheme.basic.password": self.connection_args.get('password')
- }
-
- self.messaging_service = MessagingService\
- .builder()\
- .from_properties(broker_props) \
- .with_reconnection_retry_strategy(RetryStrategy.parametrized_retry(20, 3)) \
- .build()
-
- self.messaging_service.connect()
-
- self.direct_publisher = self.messaging_service\
- .create_direct_message_publisher_builder()\
- .build()
- self.direct_publisher.start()
-
- # TODO support persistent_publisher ?
-
- self.is_connected = True
- return self.messaging_service
-
- def disconnect(self):
-
- if self.is_connected is False:
- return
-
- self.direct_publisher.terminate()
- self.messaging_service.disconnect()
- self.is_connected = False
- return self.is_connected
-
- def check_connection(self) -> StatusResponse:
-
- response = StatusResponse(False)
-
- try:
- self.connect()
- response.success = True
- except Exception as e:
- logger.error(f'Error connecting to Solace: {e}!')
- response.error_message = e
-
- if response.success is False:
- self.is_connected = False
- return response
-
- def native_query(self, query: str = None) -> Response:
- ast = parse_sql(query)
- return self.query(ast)
-
- def query(self, query: ast.ASTNode):
- if isinstance(query, ast.Insert):
- result = self.handle_insert(query)
- else:
- raise NotImplementedError
- return result
-
- def get_columns(self, table_name: str) -> Response:
- df = pd.DataFrame([], columns=['Field'])
- df['Type'] = 'str'
-
- return Response(RESPONSE_TYPE.TABLE, df)
-
- def get_tables(self) -> Response:
- df = pd.DataFrame([], columns=['table_name', 'table_type'])
-
- return Response(RESPONSE_TYPE.TABLE, df)
-
- def handle_insert(self, query: ast.Insert):
-
- message_builder = self.messaging_service.message_builder()
-
- topic_name = '/'.join(query.table.parts)
-
- column_names = [col.name for col in query.columns]
- for insert_row in query.values:
- data = dict(zip(column_names, insert_row))
-
- outbound_message = message_builder.build(data)
- self.direct_publisher.publish(destination=Topic.of(topic_name), message=outbound_message)
-
- return Response(RESPONSE_TYPE.OK)
-
- def subscribe(self, stop_event, callback, table_name, columns=None, **kwargs):
-
- class MessageHandlerImpl(MessageHandler):
- def on_message(self, message: 'InboundMessage'):
- # Check if the payload is a dict
- payload = message.get_payload_as_dictionary()
- if payload is not None:
- data = payload
- else:
- # check as string
- payload = message.get_payload_as_string()
- if payload is None:
- payload = message.get_payload_as_bytes()
- if isinstance(payload, bytearray):
- payload = payload.decode()
- data = {'data': payload}
-
- if columns is not None:
- updated_columns = data.keys()
- if not set(columns) & set(updated_columns):
- # skip
- return
-
- callback(data)
-
- table = ast.Identifier(table_name)
- topic_name = '/'.join(table.parts)
-
- topics = [TopicSubscription.of(topic_name)]
- direct_receiver = self.messaging_service\
- .create_direct_message_receiver_builder()\
- .with_subscriptions(topics)\
- .build()
-
- direct_receiver.start()
- direct_receiver.receive_async(MessageHandlerImpl())
-
- stop_event.wait()
-
- direct_receiver.terminate()
diff --git a/mindsdb/integrations/handlers/solr_handler/README.md b/mindsdb/integrations/handlers/solr_handler/README.md
deleted file mode 100644
index 3aca855f674..00000000000
--- a/mindsdb/integrations/handlers/solr_handler/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Solr Handler
-
-This is the implementation of the Solr handler for MindsDB.
-
-## Solr
-The Apache Solr β’ is a highly reliable, scalable and fault tolerant, providing distributed indexing, replication and load-balanced querying, automated failover and recovery, centralized configuration and more.
-
-## Implementation
-This handler was implemented using the `sqlalchemy-solr` library, which provides a Python / SQLAlchemy interface.
-
-The required arguments to establish a connection are,
-* `username`: the username used to authenticate with the Solr server. This parameter is optional.
-* `password`: the password to authenticate the user with the Solr server. This parameter is optional.
-* `host`: the host name or IP address of the Solr server(.
-* `port`: the port number of the Solr server.
-* `server_path`: Defaults to solr in case not provided.
-* `collection`: Solr Collection name.
-* `use_ssl`: Defaults to false in case not provide. true|false
-Refer [https://pypi.org/project/sqlalchemy-solr/](https://pypi.org/project/sqlalchemy-solr/)
-
-## Usage
-In order to make use of this handler and connect to Hive in MindsDB, the following syntax can be used,
-~~~~sql
-CREATE DATABASE solr_datasource
-WITH
-engine='solr',
-parameters={
- "username": "demo_user",
- "password": "demo_password",
- "host": "127.0.0.1",
- "port": "8981",
- "server_path": "solr",
- "collection": "gettingstarted",
- "use_ssl": "false"
-};
-~~~~
-
-Now, you can use this established connection to query your database as follows,
-~~~~sql
-SELECT * FROM solr_datasource.gettingstarted limit 10000;
-~~~~
-
-## Requirements
-A Solr instance with a Parallel SQL supported up and running.
-
-There are certain limitations that need to be taken into account when issuing queries to Solr.
-Refer [https://solr.apache.org/guide/solr/latest/query-guide/sql-query.html#parallel-sql-queries](https://solr.apache.org/guide/solr/latest/query-guide/sql-query.html#parallel-sql-queries).
-
-Tip: Don't forget to put limit in the end of the SQL statement
diff --git a/mindsdb/integrations/handlers/solr_handler/__about__.py b/mindsdb/integrations/handlers/solr_handler/__about__.py
deleted file mode 100644
index 7588d2cbc0d..00000000000
--- a/mindsdb/integrations/handlers/solr_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Solr handler'
-__package_name__ = 'mindsdb_solr_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Solr"
-__author__ = 'Biswadip Paul'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/solr_handler/__init__.py b/mindsdb/integrations/handlers/solr_handler/__init__.py
deleted file mode 100644
index c7301f8f9cf..00000000000
--- a/mindsdb/integrations/handlers/solr_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .solr_handler import SolrHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Apache Solr'
-name = 'solr'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/solr_handler/connection_args.py b/mindsdb/integrations/handlers/solr_handler/connection_args.py
deleted file mode 100644
index 706c3c4844f..00000000000
--- a/mindsdb/integrations/handlers/solr_handler/connection_args.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- username={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the Solr server.'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'The password to authenticate the user with the Solr server.',
- 'secret': True
- },
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The host name or IP address of the Solr server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.'
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'The TCP/IP port of the Solr server. Must be an integer.'
- },
- server_path={
- 'type': ARG_TYPE.STR,
- 'description': 'The server path connecting with the Solr server. Defaults to solr when not provided.'
- },
- collection={
- 'type': ARG_TYPE.STR,
- 'description': 'The collection name to use for the query in the Solr server.'
- },
- use_ssl={
- 'type': ARG_TYPE.BOOL,
- 'description': 'The flag to set ssl for the query in the Solr server.Defaults to false.'
- }
-)
-
-connection_args_example = OrderedDict(
- username="demo_user",
- password="demo_password",
- host="127.0.0.1",
- port=8981,
- server_path="solr",
- collection="gettingstarted",
- use_ssl=False,
-)
diff --git a/mindsdb/integrations/handlers/solr_handler/icon.svg b/mindsdb/integrations/handlers/solr_handler/icon.svg
deleted file mode 100644
index 2c468142129..00000000000
--- a/mindsdb/integrations/handlers/solr_handler/icon.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/solr_handler/requirements.txt b/mindsdb/integrations/handlers/solr_handler/requirements.txt
deleted file mode 100644
index a5a42f708cd..00000000000
--- a/mindsdb/integrations/handlers/solr_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-sqlalchemy-solr
-sqlparse>=0.5.4 # not directly required, pinned by Snyk to avoid a vulnerability
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/solr_handler/solr_handler.py b/mindsdb/integrations/handlers/solr_handler/solr_handler.py
deleted file mode 100644
index 5071f579a76..00000000000
--- a/mindsdb/integrations/handlers/solr_handler/solr_handler.py
+++ /dev/null
@@ -1,177 +0,0 @@
-from typing import Optional
-import pandas as pd
-
-from sqlalchemy import create_engine
-
-from mindsdb_sql_parser import parse_sql
-from mindsdb_sql_parser.ast.base import ASTNode
-
-from mindsdb.utilities import log
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-
-logger = log.getLogger(__name__)
-
-
-class SolrHandler(DatabaseHandler):
- """
- This handler handles connection and execution of the Solr SQL statements.
- """
-
- name = 'solr'
-
- def __init__(self, name: str, connection_data: Optional[dict], **kwargs):
- super().__init__(name)
- self.parser = parse_sql
- self.dialect = 'solr'
-
- if ('host' not in connection_data) or ('port' not in connection_data) or ('collection' not in connection_data):
- raise Exception("The host, port and collection parameter should be provided!")
-
- optional_parameters = ['use_ssl', 'username', 'password']
- for parameter in optional_parameters:
- if parameter not in connection_data:
- connection_data[parameter] = None
-
- if connection_data.get('use_ssl', False):
- connection_data['use_ssl'] = True
- else:
- connection_data['use_ssl'] = False
-
- self.connection_data = connection_data
- self.kwargs = kwargs
-
- self.connection = None
- self.is_connected = False
-
- def __del__(self):
- if self.is_connected is True:
- self.disconnect()
-
- def connect(self):
- """
- Set up the connection required by the handler.
- Returns:
- HandlerStatusResponse
- """
- if self.is_connected is True:
- return self.connection
-
- config = {
- 'username': self.connection_data.get('username'),
- 'password': self.connection_data.get('password'),
- 'host': self.connection_data.get('host'),
- 'port': self.connection_data.get('port'),
- 'server_path': self.connection_data.get('server_path', 'solr'),
- 'collection': self.connection_data.get('collection'),
- 'use_ssl': self.connection_data.get('use_ssl')
- }
-
- connection = create_engine("solr://{username}:{password}@{host}:{port}/{server_path}/{collection}/sql?use_ssl={use_ssl}".format(**config))
- self.is_connected = True
- self.connection = connection.connect()
- return self.connection
-
- def disconnect(self):
- """
- Close any existing connections.
- """
- if self.is_connected is False:
- return
- self.connection.close()
- self.is_connected = False
- return
-
- def check_connection(self) -> StatusResponse:
- """
- Check the connection of the Solr database
- Returns:
- HandlerStatusResponse
- """
-
- response = StatusResponse(False)
- need_to_close = self.is_connected is False
-
- try:
- self.connect()
- response.success = True
- except Exception as e:
- logger.error(f'Error connecting to Solr {self.connection_data["host"]}, {e}!')
- response.error_message = str(e)
-
- if response.success is True and need_to_close:
- self.disconnect()
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: str) -> Response:
- """
- Receive raw query and act upon it somehow.
- Args:
- query (str): query in native format
- Returns:
- HandlerResponse
- """
-
- need_to_close = self.is_connected is False
-
- connection = self.connect()
-
- try:
- result = connection.execute(query)
- columns = list(result.keys())
- if result:
- response = Response(
- RESPONSE_TYPE.TABLE,
- pd.DataFrame(
- result,
- columns=columns
- )
- )
- else:
- response = Response(RESPONSE_TYPE.OK)
-
- except Exception as e:
- logger.error(f'Error running query: {query} on {self.connection_data["host"]}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(e)
- )
-
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> Response:
- """
- Retrieve the data from the SQL statement.
- """
- return self.native_query(query.to_string())
-
- def get_tables(self) -> Response:
- """
- Get a list with all of the tables in Solr
- """
- result = {}
- result['data_frame'] = pd.DataFrame([self.connection_data.get('collection')])
- df = result.data_frame
- result.data_frame = df.rename(columns={df.columns[0]: 'table_name'})
- return result
-
- def get_columns(self, table_name) -> Response:
- """
- Show details about the table
- """
- q = f"select * from {table_name} limit 1"
- result = self.native_query(q)
- df = pd.DataFrame([[col] for col in result.data_frame.columns])
- result.data_frame = df.rename(columns={df.columns[0]: 'column_name'})
- return result
diff --git a/mindsdb/integrations/handlers/solr_handler/tests/__init__.py b/mindsdb/integrations/handlers/solr_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py b/mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py
deleted file mode 100644
index 14abd69b72f..00000000000
--- a/mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import unittest
-
-from mindsdb.integrations.handlers.solr_handler.solr_handler import SolrHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class SolrHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "username": "demo_user",
- "password": "demo_password",
- "host": "172.22.0.4",
- "port": 8983,
- "server_path": "solr",
- "collection": "gettingstarted",
- "use_ssl": False
- }
- cls.handler = SolrHandler('test_solr_handler', **cls.kwargs)
-
- def test_0_connect(self):
- self.handler.check_connection()
-
- def test_2_get_tables(self):
- tbls = self.handler.get_tables()
- assert tbls['type'] is not RESPONSE_TYPE.ERROR
-
- def test_6_describe_table(self):
- described = self.handler.get_columns("gettingstarted")
- assert described['type'] is RESPONSE_TYPE.TABLE
-
- def test_7_select_query(self):
- query = "SELECT * FROM gettingstarted WHERE id='apple' limit 1000"
- result = self.handler.query(query)
- assert result['type'] is RESPONSE_TYPE.TABLE
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/sqlany_handler/README.md b/mindsdb/integrations/handlers/sqlany_handler/README.md
deleted file mode 100644
index deed1b39948..00000000000
--- a/mindsdb/integrations/handlers/sqlany_handler/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# SAP SQL Anywhere Handler
-
-This is the implementation of the SAP SQL Anywhere handler for MindsDB.
-
-## SAP SQL Anywhere
-
-
-SAP SQL Anywhere Embedded Database for Application Software
-enables secure, reliable data management for servers where no DBA is available and synchronization for tens of thousands of mobile devices, Internet of Things (IoT) systems, and remote environments. [Read more](https://www.sap.com/products/technology-platform/sql-anywhere.html).
-
-## Implementation
-
-This handler was implemented using `sqlanydb` - the Python driver for SAP SQL Anywhere.
-
-The required arguments to establish a connection are,
-
-* `host`: the host name or IP address of the SAP SQL Anywhere instance
-* `port`: the port number of the SAP SQL Anywhere instance
-* `user`: specifies the user name
-* `password`: specifies the password for the user
-* `database`: sets the current database
-* `server`: sets the current server
-
-## Usage
-
-Based on the current connected database we have a table called `TEST` that was created using
-the following SQL statements:
-
-~~~~sql
-CREATE TABLE TEST
-(
- ID INTEGER NOT NULL,
- NAME NVARCHAR(1),
- DESCRIPTION NVARCHAR(1)
-);
-
-CREATE UNIQUE INDEX TEST_ID_INDEX
- ON TEST (ID);
-
-ALTER TABLE TEST
- ADD CONSTRAINT TEST_PK
- PRIMARY KEY (ID);
-
-INSERT INTO TEST
-VALUES (1, 'h', 'w');
-~~~~
-
-In order to make use of this handler and connect to the SAP SQL Anywhere database in MindsDB, the following syntax can be used:
-
-~~~~sql
-CREATE DATABASE sap_sqlany_trial
-WITH ENGINE = 'sqlany',
-PARAMETERS = {
- "user": "DBADMIN",
- "password": "password",
- "host": "localhost",
- "port": "55505",
- "server": "TestMe",
- "database": "MINDSDB"
-};
-~~~~
-
-**Note**: The above example assumes usage of SAP SQL Anywhere Cloud, which requires the `encrypt` parameter to be set to `true` and uses port `443`.
-
-Now, you can use this established connection to query your database as follows:
-
-~~~~sql
-SELECT * FROM sap_sqlany_trial.test
-~~~~
-
-| ID | NAME | DESCRIPTION |
-|----|------|-------------|
-| 1 | h | w |
-
-
diff --git a/mindsdb/integrations/handlers/sqlany_handler/__about__.py b/mindsdb/integrations/handlers/sqlany_handler/__about__.py
deleted file mode 100644
index 9660683e899..00000000000
--- a/mindsdb/integrations/handlers/sqlany_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB SAP SQL Anywhere handler'
-__package_name__ = 'mindsdb_sqlany_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for SAP SQL Anywhere"
-__author__ = 'Michael Lantz'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022 - mindsdb'
diff --git a/mindsdb/integrations/handlers/sqlany_handler/__init__.py b/mindsdb/integrations/handlers/sqlany_handler/__init__.py
deleted file mode 100644
index a9eacd081c8..00000000000
--- a/mindsdb/integrations/handlers/sqlany_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .sqlany_handler import SQLAnyHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'SAP SQL Anywhere'
-name = 'sqlany'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/sqlany_handler/connection_args.py b/mindsdb/integrations/handlers/sqlany_handler/connection_args.py
deleted file mode 100644
index 919fcd82089..00000000000
--- a/mindsdb/integrations/handlers/sqlany_handler/connection_args.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-# For complete list of parameters: https://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.help.sqlanywhere.12.0.1/dbadmin/da-conparm.html
-connection_args = OrderedDict(
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The IP address/host name of the SAP SQL Anywhere instance host.'
- },
- port={
- 'type': ARG_TYPE.STR,
- 'description': 'The port number of the SAP SQL Anywhere instance.'
- },
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'Specifies the user name.'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'Specifies the password for the user.',
- 'secret': True
- },
- server={
- 'type': ARG_TYPE.STR,
- 'description': 'Specifies the name of the server to connect to.'
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'Specifies the name of the database to connect to.'
- },
- encrypt={
- 'type': ARG_TYPE.BOOL,
- 'description': 'Enables or disables TLS encryption.'
- },
-)
-
-connection_args_example = OrderedDict(
- host='localhost',
- port=55505,
- user='DBADMIN',
- password='password',
- serverName='TestMe',
- database='MINDSDB'
-)
diff --git a/mindsdb/integrations/handlers/sqlany_handler/icon.svg b/mindsdb/integrations/handlers/sqlany_handler/icon.svg
deleted file mode 100644
index e0e489c013b..00000000000
--- a/mindsdb/integrations/handlers/sqlany_handler/icon.svg
+++ /dev/null
@@ -1,13 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sqlany_handler/requirements.txt b/mindsdb/integrations/handlers/sqlany_handler/requirements.txt
deleted file mode 100644
index f198424b491..00000000000
--- a/mindsdb/integrations/handlers/sqlany_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-sqlalchemy-sqlany
-sqlanydb
diff --git a/mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py b/mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py
deleted file mode 100644
index d98e31cfc36..00000000000
--- a/mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py
+++ /dev/null
@@ -1,180 +0,0 @@
-from pandas import DataFrame
-
-import sqlanydb
-import sqlalchemy_sqlany.base as sqlany_dialect
-
-from mindsdb_sql_parser import parse_sql
-from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-
-from mindsdb.utilities import log
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-
-logger = log.getLogger(__name__)
-
-
-class SQLAnyHandler(DatabaseHandler):
- """
- This handler handles connection and execution of the SAP SQL Anywhere statements.
- """
-
- name = 'sqlany'
-
- def __init__(self, name: str, connection_data: dict, **kwargs):
- super().__init__(name)
-
- self.dialect = 'sqlany'
- self.parser = parse_sql
- self.connection_data = connection_data
- self.renderer = SqlalchemyRender(sqlany_dialect.SQLAnyDialect)
- self.host = self.connection_data.get('host')
- self.port = self.connection_data.get('port')
- self.userid = self.connection_data.get('user')
- self.password = self.connection_data.get('password')
- self.server = self.connection_data.get('server')
- self.databaseName = self.connection_data.get('database')
- self.encryption = self.connection_data.get('encrypt', False)
- self.connection = None
- self.is_connected = False
-
- def __del__(self):
- if self.is_connected is True:
- self.disconnect()
-
- def connect(self):
- """
- Handles the connection to a SAP SQL Anywhere database insance.
- """
-
- if self.is_connected is True:
- return self.connection
-
- if self.port.strip().isnumeric():
- self.host += ":" + self.port.strip()
-
- if self.encryption:
- self.encryption = "SIMPLE"
- else:
- self.encryption = "NONE"
-
- connection = sqlanydb.connect(
- host=self.host,
- userid=self.userid,
- password=self.password,
- server=self.server,
- databaseName=self.databaseName,
- encryption=self.encryption
- )
- self.is_connected = True
- self.connection = connection
- return self.connection
-
- def disconnect(self):
- """
- Disconnects from the SAP SQL Anywhere database
- """
-
- if self.is_connected is True:
- self.connection.close()
- self.is_connected = False
-
- def check_connection(self) -> StatusResponse:
- """
- Check the connection of the SAP SQL Anywhere database
- :return: success status and error message if error occurs
- """
-
- response = StatusResponse(False)
- need_to_close = self.is_connected is False
-
- try:
- connection = self.connect()
- cur = connection.cursor()
- cur.execute('SELECT 1 FROM SYS.DUMMY;')
- response.success = True
- except sqlanydb.Error as e:
- logger.error(f'Error connecting to SAP SQL Anywhere {self.host}, {e}!')
- response.error_message = e
-
- if response.success is True and need_to_close:
- self.disconnect()
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: str) -> Response:
- """
- Receive SQL query and runs it
- :param query: The SQL query to run in SAP SQL Anywhere
- :return: returns the records from the current recordset
- """
-
- need_to_close = self.is_connected is False
-
- connection = self.connect()
- cur = connection.cursor()
- try:
- cur.execute(query)
- if not cur.description:
- response = Response(RESPONSE_TYPE.OK)
- else:
- result = cur.fetchall()
- response = Response(
- RESPONSE_TYPE.TABLE,
- DataFrame(
- result,
- columns=[x[0] for x in cur.description]
- )
- )
- connection.commit()
- except Exception as e:
- logger.error(f'Error running query: {query} on {self.connection}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_code=0,
- error_message=str(e)
- )
- connection.rollback()
-
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> Response:
- """
- Retrieve the data from the SQL statement with eliminated rows that dont satisfy the WHERE condition
- """
-
- query_str = self.renderer.get_string(query, with_failback=True)
- return self.native_query(query_str)
-
- def get_tables(self) -> Response:
- """
- List all tables in SAP SQL Anywhere in the current schema
- """
-
- return self.native_query("""
- SELECT USER_NAME(ob.UID) AS SCHEMA_NAME
- , st.TABLE_NAME
- , st.TABLE_TYPE
- FROM SYSOBJECTS ob
- INNER JOIN SYS.SYSTABLE st on ob.ID = st.OBJECT_ID
- WHERE ob.TYPE='U' AND st.TABLE_TYPE <> 'GBL TEMP'
- """)
-
- def get_columns(self, table_name: str) -> Response:
- """
- List all columns in a table in SAP SQL Anywhere in the current schema
- :param table_name: the table name for which to list the columns
- :return: returns the columns in the table
- """
-
- return self.renderer.dialect.get_columns(table_name)
diff --git a/mindsdb/integrations/handlers/sqlany_handler/tests/test_sqlany_handler.py b/mindsdb/integrations/handlers/sqlany_handler/tests/test_sqlany_handler.py
deleted file mode 100644
index 89856bf3150..00000000000
--- a/mindsdb/integrations/handlers/sqlany_handler/tests/test_sqlany_handler.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import os
-import unittest
-
-from mindsdb.integrations.handlers.sqlany_handler.sqlany_handler import SQLAnyHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-"""
-create table TEST
-(
- ID INTEGER not null,
- NAME NVARCHAR(1),
- DESCRIPTION NVARCHAR(1)
-);
-
-create unique index TEST_ID_INDEX
- on TEST (ID);
-
-alter table TEST
- add constraint TEST_PK
- primary key (ID);
-
-insert into TEST
-values (1, 'h', 'w');
-"""
-
-
-class SQLAnyHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "host": os.environ.get('SQLANY_HOST', 'localhost'),
- "port": os.environ.get('SQLANY_PORT', 55505),
- "user": "DBA",
- "password": os.environ.get('SQLANY_PASSWORD', 'password'),
- "server": "TestMe",
- "database": "MINDSDB"
- }
- cls.handler = SQLAnyHandler('test_sqlany_handler', cls.kwargs)
-
- def test_0_connect(self):
- assert self.handler.connect()
-
- def test_1_check_connection(self):
- assert self.handler.check_connection().success is True
-
- def test_2_get_columns(self):
- assert self.handler.get_columns('TEST').resp_type is not RESPONSE_TYPE.ERROR
-
- def test_3_get_tables(self):
- assert self.handler.get_tables().resp_type is not RESPONSE_TYPE.ERROR
-
- def test_4_select_query(self):
- query = 'SELECT * FROM TEST WHERE ID=2'
- assert self.handler.query(query).resp_type is RESPONSE_TYPE.TABLE
-
- def test_5_update_query(self):
- query = 'UPDATE TEST SET NAME=\'s\' WHERE ID=1'
- assert self.handler.query(query).resp_type is RESPONSE_TYPE.OK
-
-
-if __name__ == "__main__":
- unittest.main(failfast=True)
diff --git a/mindsdb/integrations/handlers/sqlite_handler/README.md b/mindsdb/integrations/handlers/sqlite_handler/README.md
deleted file mode 100644
index db5836911a4..00000000000
--- a/mindsdb/integrations/handlers/sqlite_handler/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# SQLite Handler
-
-This is the implementation of the SQLite handler for MindsDB.
-
-## SQLite
-SQLite is an in-process library that implements a self-contained, serverless, zero-configuration, transactional SQL database engine. The code for SQLite is in the public domain and is thus free for use for any purpose, commercial or private. SQLite is the most widely deployed database in the world with more applications than we can count, including several high-profile projects.
-https://www.sqlite.org/about.html
-
-## Implementation
-This handler was implemented using the standard `sqlite3` library that comes with Python.
-
-The only required argument to establish a connection is `db_file`. This points to the database file that the connection is to be made to.
-
-Optionally, this may also be set to `:memory:`, which will create an in-memory database.
-
-## Usage
-If you have local file that need to connect into MindsDB, you have to [deploy MindsDB locally](https://docs.mindsdb.com/setup/self-hosted/pip/source), ways like via Docker or via pip. Then copy the file into the desired folder in source folder. This way MindsDB can successfully access your file.
-
-In order to make use of this handler and connect to a SQLite database in MindsDB, the following syntax can be used,
-~~~~sql
-CREATE DATABASE sqlite_datasource
-WITH
-engine='sqlite',
-parameters={
- "db_file":"example.db"
-};
-~~~~
-
-Now, you can use this established connection to query your database as follows,
-~~~~sql
-SELECT * FROM sqlite_datasource.example_tbl
-~~~~
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sqlite_handler/__about__.py b/mindsdb/integrations/handlers/sqlite_handler/__about__.py
deleted file mode 100644
index 5513897e6f1..00000000000
--- a/mindsdb/integrations/handlers/sqlite_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB SQLite handler'
-__package_name__ = 'mindsdb_sqlite_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for SQLite"
-__author__ = 'Minura Punchihewa'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/sqlite_handler/__init__.py b/mindsdb/integrations/handlers/sqlite_handler/__init__.py
deleted file mode 100644
index 9dd2b517ed3..00000000000
--- a/mindsdb/integrations/handlers/sqlite_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .sqlite_handler import SQLiteHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'SQLite'
-name = 'sqlite'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/sqlite_handler/connection_args.py b/mindsdb/integrations/handlers/sqlite_handler/connection_args.py
deleted file mode 100644
index 006cd49af9a..00000000000
--- a/mindsdb/integrations/handlers/sqlite_handler/connection_args.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- db_file={
- 'type': ARG_TYPE.STR,
- 'description': 'The database file where the data will be stored. The special path name :memory: can be provided'
- ' to create a temporary database in RAM.'
- }
-)
-
-connection_args_example = OrderedDict(
- db_file='chinook.db'
-)
diff --git a/mindsdb/integrations/handlers/sqlite_handler/icon.svg b/mindsdb/integrations/handlers/sqlite_handler/icon.svg
deleted file mode 100644
index 9de0e827cf8..00000000000
--- a/mindsdb/integrations/handlers/sqlite_handler/icon.svg
+++ /dev/null
@@ -1,12 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py b/mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py
deleted file mode 100644
index d72be9edd7f..00000000000
--- a/mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py
+++ /dev/null
@@ -1,183 +0,0 @@
-import os
-from typing import Optional
-
-import pandas as pd
-import sqlite3
-
-from mindsdb_sql_parser import parse_sql
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb.integrations.libs.base import DatabaseHandler
-
-from mindsdb_sql_parser.ast.base import ASTNode
-
-from mindsdb.utilities import log
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE,
-)
-
-
-logger = log.getLogger(__name__)
-
-
-class SQLiteHandler(DatabaseHandler):
- """
- This handler handles connection and execution of the SQLite statements.
- """
-
- name = "sqlite"
-
- def __init__(self, name: str, connection_data: Optional[dict], **kwargs):
- """
- Initialize the handler.
- Args:
- name (str): name of particular handler instance
- connection_data (dict): parameters for connecting to the database
- **kwargs: arbitrary keyword arguments.
- """
- super().__init__(name)
- self.parser = parse_sql
- self.dialect = "sqlite"
- self.connection_data = connection_data
- self.kwargs = kwargs
-
- # SQLite objects created in a thread can only be used in that same thread.
- self.thread_safe = False
-
- self.connection = None
- self.is_connected = False
-
- def __del__(self):
- if self.is_connected is True:
- self.disconnect()
-
- def connect(self) -> StatusResponse:
- """
- Set up the connection required by the handler.
- Returns:
- HandlerStatusResponse
- """
-
- if self.is_connected is True:
- return self.connection
-
- self.connection = sqlite3.connect(self.connection_data["db_file"])
- self.is_connected = True
-
- return self.connection
-
- def disconnect(self):
- """
- Close any existing connections.
- """
-
- if self.is_connected is False:
- return
-
- self.connection.close()
- self.is_connected = False
- return self.is_connected
-
- def check_connection(self) -> StatusResponse:
- """
- Check connection to the handler.
- Returns:
- HandlerStatusResponse
- """
-
- response = StatusResponse(False)
- need_to_close = self.is_connected is False
-
- try:
- if not os.path.isfile(self.connection_data["db_file"]):
- raise FileNotFoundError(
- f"File '{self.connection_data['db_file']}' not found. Use ':memory:' to create an in-memory database if you don't have a file."
- )
- self.connect()
- response.success = True
- except Exception as e:
- logger.error(f"Error connecting to SQLite {self.connection_data['db_file']}, {e}!")
- response.error_message = str(e)
- finally:
- if response.success is True and need_to_close:
- self.disconnect()
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """
- Receive raw query and act upon it somehow.
- Args:
- query (str): query in native format
- Returns:
- HandlerResponse
- """
-
- need_to_close = self.is_connected is False
-
- connection = self.connect()
- cursor = connection.cursor()
-
- try:
- cursor.execute(query)
- result = cursor.fetchall()
- if result:
- response = Response(
- RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description])
- )
- else:
- connection.commit()
- response = Response(RESPONSE_TYPE.OK)
- except Exception as e:
- logger.error(f"Error running query: {query} on {self.connection_data['db_file']}!")
- response = Response(RESPONSE_TYPE.ERROR, error_message=str(e))
-
- cursor.close()
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> StatusResponse:
- """
- Receive query as AST (abstract syntax tree) and act upon it somehow.
- Args:
- query (ASTNode): sql query represented as AST. May be any kind
- of query: SELECT, INTSERT, DELETE, etc
- Returns:
- HandlerResponse
- """
- renderer = SqlalchemyRender("sqlite")
- query_str = renderer.get_string(query, with_failback=True)
- return self.native_query(query_str)
-
- def get_tables(self) -> StatusResponse:
- """
- Return list of entities that will be accessible as tables.
- Returns:
- HandlerResponse
- """
-
- query = "SELECT name from sqlite_master where type= 'table';"
- result = self.native_query(query)
- df = result.data_frame
- result.data_frame = df.rename(columns={df.columns[0]: "table_name"})
- return result
-
- def get_columns(self, table_name: str) -> StatusResponse:
- """
- Returns a list of entity columns.
- Args:
- table_name (str): name of one of tables returned by self.get_tables()
- Returns:
- HandlerResponse
- """
-
- query = f"PRAGMA table_info([{table_name}]);"
- result = self.native_query(query)
- df = result.data_frame
- result.data_frame = df.rename(columns={"name": "column_name", "type": "data_type"})
- return result
diff --git a/mindsdb/integrations/handlers/sqlite_handler/tests/__init__.py b/mindsdb/integrations/handlers/sqlite_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/sqlite_handler/tests/test_sqlite_handler.py b/mindsdb/integrations/handlers/sqlite_handler/tests/test_sqlite_handler.py
deleted file mode 100644
index df4e2364118..00000000000
--- a/mindsdb/integrations/handlers/sqlite_handler/tests/test_sqlite_handler.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.sqlite_handler.sqlite_handler import SQLiteHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class SQLiteHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "db_file": "chinook.db",
- }
- cls.handler = SQLiteHandler('test_sqlite_handler', cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_native_query_select(self):
- query = "SELECT * FROM customers"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_2_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_4_get_columns(self):
- columns = self.handler.get_columns('customers')
- assert columns.type is not RESPONSE_TYPE.ERROR
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/README.md b/mindsdb/integrations/handlers/sqreamdb_handler/README.md
deleted file mode 100644
index be28b93fa95..00000000000
--- a/mindsdb/integrations/handlers/sqreamdb_handler/README.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# SQreamDB Handler
-
-This is the implementation of the SQreamDB handler for MindsDB.
-
-## SQreamDB
-A SQL database that empowers organizations to perform complex analytics on a petabyte-scale of data and gain time-sensitive business insights faster and cheaper than from any other solution.
-
-## Implementation
-This handler was implemented using the `pysqream`, a Python library that allows you to use Python code to run SQL commands on SQreamDB Database.
-
-The required arguments to establish a connection are,
-* `user`: username asscociated with database
-* `password`: password to authenticate your access
-* `host`: host to server IP Address or hostname
-* `port`: port through which sevice is exposed
-* `database`: Database name to be connected
-* `service`: Optional: service queue (default: "sqream")
-* `use_ssl`: use SSL connection (default: False)
-* `clustered`: Optional: Connect through load balancer, or direct to worker (Default: false - direct to worker)
-
-
-## Usage
-In order to make use of this handler and connect to SQreamDB in MindsDB, the following syntax can be used,
-~~~~sql
-CREATE DATABASE sqreamdb_datasource
-WITH
-engine='sqreamdb',
-parameters={
- "user":"master",
- "password":"sqream",
- "host":"127.0.0.1",
- "port":5000,
- "database":"sqream"
-};
-~~~~
-
-Now, you can use this established connection to query your database as follows,
-~~~~sql
-SELECT * FROM sqreamdb_datasource.sampledb;
-~~~~
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/__about__.py b/mindsdb/integrations/handlers/sqreamdb_handler/__about__.py
deleted file mode 100644
index d9fa3bd21e9..00000000000
--- a/mindsdb/integrations/handlers/sqreamdb_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB SQreamDB handler'
-__package_name__ = 'mindsdb_sqreamdb_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for SQreamDB"
-__author__ = 'Parthiv Makwana'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2023- mindsdb'
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/__init__.py b/mindsdb/integrations/handlers/sqreamdb_handler/__init__.py
deleted file mode 100644
index ae4cb4252c5..00000000000
--- a/mindsdb/integrations/handlers/sqreamdb_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .sqreamdb_handler import SQreamDBHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'SQreamDB'
-name = 'sqreamdb'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py b/mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py
deleted file mode 100644
index 9de67479193..00000000000
--- a/mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The host name or IP address of the SQreamDB server/database.'
- },
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the SQreamDB server.'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'The password to authenticate the user with the SQreamDB server.',
- 'secret': True
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'Specify port to connect SQreamDB server'
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'Specify database name to connect SQreamDB server'
- },
- service={
- 'type': ARG_TYPE.STR,
- 'description': 'Optional: service queue (default: "sqream")'
- },
- use_ssl={
- 'type': ARG_TYPE.BOOL,
- 'description': 'use SSL connection (default: False)'
- },
- clustered={
- 'type': ARG_TYPE.BOOL,
- 'description': 'Optional: Connect through load balancer, or direct to worker (Default: false - direct to worker)'
- },
-)
-
-connection_args_example = OrderedDict(
- host='127.0.0.1',
- port=5000,
- password='sqream',
- user='master',
- database='sqream'
-)
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/icon.svg b/mindsdb/integrations/handlers/sqreamdb_handler/icon.svg
deleted file mode 100644
index cef3ffe5143..00000000000
--- a/mindsdb/integrations/handlers/sqreamdb_handler/icon.svg
+++ /dev/null
@@ -1,21 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/requirements.txt b/mindsdb/integrations/handlers/sqreamdb_handler/requirements.txt
deleted file mode 100644
index 3fa27e840dd..00000000000
--- a/mindsdb/integrations/handlers/sqreamdb_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pysqream>=3.2.5
-pysqream_sqlalchemy>=0.8
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py b/mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py
deleted file mode 100644
index 23dbdc4a670..00000000000
--- a/mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from typing import Optional
-
-from mindsdb_sql_parser.ast.base import ASTNode
-
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.utilities import log
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-import pandas as pd
-import pysqream as db
-
-from pysqream_sqlalchemy.dialect import SqreamDialect
-
-logger = log.getLogger(__name__)
-
-
-class SQreamDBHandler(DatabaseHandler):
-
- name = 'sqreamdb'
-
- def __init__(self, name: str, connection_data: Optional[dict], **kwargs):
- """ Initialize the handler
- Args:
- name (str): name of particular handler instance
- connection_data (dict): parameters for connecting to the database
- **kwargs: arbitrary keyword arguments.
- """
- super().__init__(name)
-
- self.connection_data = connection_data
-
- self.connection = None
- self.is_connected = False
-
- def connect(self):
- """
- Handles the connection to a YugabyteSQL database insance.
- """
- if self.is_connected is True:
- return self.connection
-
- args = {
- "database": self.connection_data.get('database'),
- "host": self.connection_data.get('host'),
- "port": self.connection_data.get('port'),
- "username": self.connection_data.get('user'),
- "password": self.connection_data.get('password'),
- "clustered": self.connection_data.get('clustered', False),
- "use_ssl": self.connection_data.get('use_ssl', False),
- "service": self.connection_data.get('service', 'sqream')
- }
-
- connection = db.connect(**args)
-
- self.is_connected = True
- self.connection = connection
- return self.connection
-
- def check_connection(self) -> StatusResponse:
- """
- Check the connection of the SQreamDB database
- :return: success status and error message if error occurs
- """
- response = StatusResponse(False)
- need_to_close = self.is_connected is False
-
- try:
- connection = self.connect()
- with connection.cursor() as cur:
- cur.execute('select 1;')
- response.success = True
- except db.Error as e:
- logger.error(f'Error connecting to SQreamDB {self.database}, {e}!')
- response.error_message = e
-
- if response.success is True and need_to_close:
- self.disconnect()
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive raw query and act upon it somehow.
- Args:
- query (Any): query in native format (str for sql databases,
- dict for mongo, etc)
- Returns:
- HandlerResponse
- """
- need_to_close = self.is_connected is False
- conn = self.connect()
- with conn.cursor() as cur:
- try:
- cur.execute(query)
-
- if cur.rowcount > 0 and query.upper().startswith('SELECT'):
- result = cur.fetchall()
- response = Response(
- RESPONSE_TYPE.TABLE,
- data_frame=pd.DataFrame(
- result,
- columns=[x[0] for x in cur.description]
- )
- )
- else:
- response = Response(RESPONSE_TYPE.OK)
- self.connection.commit()
- except Exception as e:
- logger.error(f'Error running query: {query} on {self.database}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(e)
- )
- self.connection.rollback()
-
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> Response:
- """
- Retrieve the data from the SQL statement
- """
- renderer = SqlalchemyRender(SqreamDialect)
- query_str = renderer.get_string(query, with_failback=True)
- return self.native_query(query_str)
-
- def get_tables(self) -> Response:
- """
- List all tables in SQreamDB stored in 'sqream_catalog'
- """
-
- query = "SELECT table_name FROM sqream_catalog.tables"
-
- return self.query(query)
-
- def get_columns(self, table_name):
- query = f"""SELECT column_name, type_name
- FROM sqream_catalog.columns
- WHERE table_name = '{table_name}';
- """
- return self.query(query)
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/sqreamdb_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py b/mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py
deleted file mode 100644
index afc5a608028..00000000000
--- a/mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.sqreamdb_handler.sqreamdb_handler import SQreamDBHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class SQreamDBHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "127.0.0.1",
- "port": "5000",
- "user": "sqream",
- "password": "sqream",
- "database": "master"
- }
- }
- cls.handler = SQreamDBHandler('test_sqreamdb_handler', **cls.kwargs)
-
- def test_0_connect(self):
- self.handler.connect()
-
- def test_1_drop_table(self):
- res = self.handler.query("DROP TABLE IF EXISTS LOVE")
- assert res.type is not RESPONSE_TYPE.ERROR
-
- def test_2_create_table(self):
- res = self.handler.query("CREATE TABLE IF NOT EXISTS LOVE (LOVER varchar(20))")
- assert res.type is not RESPONSE_TYPE.ERROR
-
- def test_3_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_4_select_query(self):
- query = "SELECT * FROM AUTHORS"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_5_check_connection(self):
- self.handler.check_connection()
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/starrocks_handler/README.md b/mindsdb/integrations/handlers/starrocks_handler/README.md
deleted file mode 100644
index aca5a1d7d4a..00000000000
--- a/mindsdb/integrations/handlers/starrocks_handler/README.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# StarRocks Handler
-
-This is the implementation of the StarRocks Handler for MindsDB.
-
-## StarRocks
-StarRocks is the next-generation data platform designed to make data-intensive real-time analytics fast and easy. It delivers query speeds 5 to 10 times faster than other popular solutions. StarRocks can perform real-time analytics well while updating historical records. It can also enhance real-time analytics with historical data from data lakes easily. With StarRocks, you can get rid of the de-normalized tables and get the best performance and flexibility.
-
-
-## Implementation
-
-This handler was implemented by extending mysql connector.
-
-The required arguments to establish a connection are:
-
-* `host`: the host name of the StarRocks connection
-* `port`: the port to use when connecting
-* `user`: the user to authenticate
-* `password`: the password to authenticate the user
-* `database`: database name
-
-## Usage
-
-In order to make use of this handler and connect to a StarRocks server in MindsDB, the following syntax can be used:
-
-```sql
-CREATE DATABASE starrocks_datasource
-WITH ENGINE = "starrocks",
-PARAMETERS = {
- "user": "root",
- "password": "",
- "host": "localhost",
- "port": 9030,
- "database": "starrocks "
-}
-```
-
-Now, you can use this established connection to query your database as follows:
-
-```sql
-SELECT * FROM starrocks_datasource.loveU LIMIT 10;
-```
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/starrocks_handler/__about__.py b/mindsdb/integrations/handlers/starrocks_handler/__about__.py
deleted file mode 100644
index a59a12a2bd9..00000000000
--- a/mindsdb/integrations/handlers/starrocks_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB StarRocks handler'
-__package_name__ = 'mindsdb_starrocks_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for StarRocks"
-__author__ = 'Parthiv Makwana'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/starrocks_handler/__init__.py b/mindsdb/integrations/handlers/starrocks_handler/__init__.py
deleted file mode 100644
index 4efd81b8a0a..00000000000
--- a/mindsdb/integrations/handlers/starrocks_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-try:
- from .starrocks_handler import StarRocksHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-from .__about__ import __version__ as version, __description__ as description
-
-
-title = 'StarRocks'
-name = 'starrocks'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/starrocks_handler/icon.svg b/mindsdb/integrations/handlers/starrocks_handler/icon.svg
deleted file mode 100644
index 8d5a6f8d5d5..00000000000
--- a/mindsdb/integrations/handlers/starrocks_handler/icon.svg
+++ /dev/null
@@ -1,9 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/starrocks_handler/requirements.txt b/mindsdb/integrations/handlers/starrocks_handler/requirements.txt
deleted file mode 100644
index ee467569031..00000000000
--- a/mindsdb/integrations/handlers/starrocks_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
--r mindsdb/integrations/handlers/mysql_handler/requirements.txt
diff --git a/mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py b/mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py
deleted file mode 100644
index 7c1a3a221c6..00000000000
--- a/mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-from collections import OrderedDict
-
-
-from mindsdb.integrations.handlers.mysql_handler import Handler as MysqlHandler
-
-
-class StarRocksHandler(MysqlHandler):
- """
- This handler handles connection and execution of the StarRocks statements.
- """
- name = 'starrocks'
-
- def __init__(self, name, **kwargs):
- super().__init__(name, **kwargs)
-
-
-connection_args = OrderedDict(
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the StarRocks server.'
- },
- password={
- 'type': ARG_TYPE.STR,
- 'description': 'The password to authenticate the user with the StarRocks server.'
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'The database name to use when connecting with the StarRocks server.'
- },
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The host name or IP address of the StarRocks server.'
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'The TCP/IP port of the StarRocks server. Must be an integer.'
- }
-)
-
-connection_args_example = OrderedDict(
- host='127.0.0.1',
- port=5432,
- user='root',
- password='??>',
- database='database'
-)
diff --git a/mindsdb/integrations/handlers/starrocks_handler/tests/__init__.py b/mindsdb/integrations/handlers/starrocks_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py b/mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py
deleted file mode 100644
index 7620d137806..00000000000
--- a/mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.starrocks_handler.starrocks_handler import StarRocksHandler
-from mindsdb.integrations.libs.response import RESPONSE_TYPE
-
-
-class StarRocksHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "localhost",
- "port": 9030,
- "user": "root",
- "password": "",
- "database": "starrocks",
- }
- }
- cls.handler = StarRocksHandler('test_starrocks_handler', **cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_connect(self):
- assert self.handler.connect()
-
- def test_2_create_table(self):
- query = "CREATE Table IF NOT EXISTS Lover(name varchar(101));"
- result = self.handler.query(query)
- assert result.type is not RESPONSE_TYPE.ERROR
-
- def test_3_insert(self):
- query = "INSERT INTO LOVER VALUES('Shiv Shakti');"
- result = self.handler.query(query)
- assert result.type is not RESPONSE_TYPE.ERROR
-
- def test_4_native_query_select(self):
- query = "SELECT * FROM LOVER;"
- result = self.handler.query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_5_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is RESPONSE_TYPE.TABLE
-
- def test_6_get_columns(self):
- columns = self.handler.get_columns('LOVER')
-
- query = "DROP Table IF EXISTS Lover;"
- self.handler.query(query)
- assert columns.type is not RESPONSE_TYPE.ERROR
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/strapi_handler/README.md b/mindsdb/integrations/handlers/strapi_handler/README.md
deleted file mode 100644
index 5595ce71c99..00000000000
--- a/mindsdb/integrations/handlers/strapi_handler/README.md
+++ /dev/null
@@ -1,146 +0,0 @@
-# Strapi Handler
-
-Strapi handler is a MindsDB handler for Strapi. It allows you to query Strapi collections using SQL.
-
-## What is Strapi ?
-
-Strapi is the leading open-source Headless CMS. Strapi gives developers the freedom to use their favorite tools and frameworks while allowing editors to easily manage their content and distribute it anywhere.
-
-## Strapi Handler Initialization
-
-The Strapi handler is initialized with the following parameters:
-
-- `host` - the host of the Strapi server
-- `port` - the port of the Strapi server
-- `api_token` - the api token of the Strapi server
-- `plural_api_ids` - the list of plural api ids of the collections
-
-## Implemented Features
-
-- `SELECT` - select data from a collection
-- `WHERE` - filter data from a collection
-- `LIMIT` - limit data from a collection
-- `INSERT` - insert data into a collection
-- `UPDATE` - update data from a collection
-
-Note: We can use collection name as table name in SQL.
-
-## Example Usage
-
-The first step is to create a database with the new `Strapi` engine.
-
-```sql
-CREATE DATABASE myshop --- display name for database.
-WITH ENGINE = 'strapi', --- name of the mindsdb handler
-PARAMETERS = {
- "host" : "", --- host, it can be an ip or an url.
- "port" : "", --- common port is 1337.
- "api_token": "", --- api token of the strapi server.
- "plural_api_ids" : [""] --- plural api ids of the collections.
-};
-```
-
-Example:
-
-```sql
-CREATE DATABASE myshop
-WITH ENGINE = 'strapi',
-PARAMETERS = {
- "host" : "localhost",
- "port" : "1337",
- "api_token": "c56c000d867e95848c",
- "plural_api_ids" : ["products", "sellers"]
-};
-```
-
----
-
-### SELECT
-
-```sql
-SELECT *
-FROM myshop.;
-```
-
-Example:
-
-```sql
-SELECT *
-FROM myshop.products;
-```
-
----
-
-### WHERE
-
-```sql
-SELECT *
-FROM myshop.
-WHERE = ;
-```
-
-Example:
-
-```sql
-SELECT description, price
-FROM myshop.products
-WHERE id = 1;
-```
-
----
-
-### LIMIT
-
-```sql
-SELECT *
-FROM myshop.
-LIMIT ;
-```
-
-Example:
-
-```sql
-SELECT *
-FROM myshop.products
-LIMIT 10;
-```
-
----
-
-### INSERT
-
-```sql
-INSERT INTO myshop. (, , ...)
-VALUES (, , ...);
-```
-
-Example:
-
-```sql
-INSERT INTO myshop.sellers (name, email, sellerid)
-VALUES ('Ram', 'ram@gmail.com', 'ram');
-```
-
-Note: You only able to insert data into the collection which has `create` permission.
-
----
-
-### UPDATE
-
-```sql
-UPDATE myshop.
-SET = , = , ...
-WHERE = ;
-```
-
-Example
-
-```sql
-
-UPDATE myshop.products
-SET price = 299,
-avaiablity = false
-WHERE id = 1;
-```
-
-Note: You only able to update data into the collection which has `update` permission.
diff --git a/mindsdb/integrations/handlers/strapi_handler/__about__.py b/mindsdb/integrations/handlers/strapi_handler/__about__.py
deleted file mode 100644
index 199f17ec162..00000000000
--- a/mindsdb/integrations/handlers/strapi_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Strapi handler"
-__package_name__ = "mindsdb_strapi_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Strapi"
-__author__ = "Ritwick Raj Makhal"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/strapi_handler/__init__.py b/mindsdb/integrations/handlers/strapi_handler/__init__.py
deleted file mode 100644
index 6ce97fc3e59..00000000000
--- a/mindsdb/integrations/handlers/strapi_handler/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __description__ as description
-from .__about__ import __version__ as version
-
-try:
- from .strapi_handler import StrapiHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Strapi"
-name = "strapi"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/strapi_handler/icon.svg b/mindsdb/integrations/handlers/strapi_handler/icon.svg
deleted file mode 100644
index 1b42789ef0b..00000000000
--- a/mindsdb/integrations/handlers/strapi_handler/icon.svg
+++ /dev/null
@@ -1,7 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/strapi_handler/strapi_handler.py b/mindsdb/integrations/handlers/strapi_handler/strapi_handler.py
deleted file mode 100644
index 2338edc0f0d..00000000000
--- a/mindsdb/integrations/handlers/strapi_handler/strapi_handler.py
+++ /dev/null
@@ -1,153 +0,0 @@
-from mindsdb.integrations.handlers.strapi_handler.strapi_tables import StrapiTable
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse
-from mindsdb_sql_parser import parse_sql
-from mindsdb.utilities import log
-import requests
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-from collections import OrderedDict
-import pandas as pd
-
-logger = log.getLogger(__name__)
-
-
-class StrapiHandler(APIHandler):
- def __init__(self, name: str, **kwargs) -> None:
- """initializer method
-
- Args:
- name (str): handler name
- """
- super().__init__(name)
-
- self.connection = None
- self.is_connected = False
- args = kwargs.get('connection_data', {})
- if 'host' in args and 'port' in args:
- self._base_url = f"http://{args['host']}:{args['port']}"
- if 'api_token' in args:
- self._api_token = args['api_token']
- if 'plural_api_ids' in args:
- self._plural_api_ids = args['plural_api_ids']
- # Registers tables for each collections in strapi
- for pluralApiId in self._plural_api_ids:
- self._register_table(table_name=pluralApiId, table_class=StrapiTable(handler=self, name=pluralApiId))
-
- def check_connection(self) -> StatusResponse:
- """checking the connection
-
- Returns:
- StatusResponse: whether the connection is still up
- """
- response = StatusResponse(False)
- try:
- self.connect()
- response.success = True
- except Exception as e:
- logger.error(f'Error connecting to Strapi API: {e}!')
- response.error_message = e
-
- self.is_connected = response.success
- return response
-
- def connect(self) -> StatusResponse:
- """making the connectino object
- """
- if self.is_connected and self.connection:
- return self.connection
-
- try:
- headers = {"Authorization": f"Bearer {self._api_token}"}
- response = requests.get(f"{self._base_url}", headers=headers)
- if response.status_code == 200:
- self.connection = response
- self.is_connected = True
- return StatusResponse(True)
- else:
- raise Exception(f"Error connecting to Strapi API: {response.status_code} - {response.text}")
- except Exception as e:
- logger.error(f'Error connecting to Strapi API: {e}!')
- return StatusResponse(False, error_message=e)
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
-
- Parameters
- ----------
- query : str
- query in a native format
-
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
-
- def call_strapi_api(self, method: str, endpoint: str, params: dict = {}, json_data: dict = {}) -> pd.DataFrame:
- headers = {"Authorization": f"Bearer {self._api_token}"}
- url = f"{self._base_url}{endpoint}"
-
- if method.upper() in ('GET', 'POST', 'PUT', 'DELETE'):
- headers['Content-Type'] = 'application/json'
-
- if method.upper() in ('POST', 'PUT', 'DELETE'):
- response = requests.request(method, url, headers=headers, params=params, data=json_data)
- else:
- response = requests.get(url, headers=headers, params=params)
-
- if response.status_code == 200:
- data = response.json()
- # Create an empty DataFrame
- df = pd.DataFrame()
- if isinstance(data.get('data', None), list):
- for item in data['data']:
- # Add 'id' and 'attributes' to the DataFrame
- row_data = {'id': item['id'], **item['attributes']}
- df = df._append(row_data, ignore_index=True)
- return df
- elif isinstance(data.get('data', None), dict):
- # Add 'id' and 'attributes' to the DataFrame
- row_data = {'id': data['data']['id'], **data['data']['attributes']}
- df = df._append(row_data, ignore_index=True)
- return df
- else:
- raise Exception(f"Error connecting to Strapi API: {response.status_code} - {response.text}")
-
- return pd.DataFrame()
-
-
-connection_args = OrderedDict(
- api_token={
- "type": ARG_TYPE.PWD,
- "description": "Strapi API key to use for authentication.",
- "required": True,
- "label": "Api token",
- },
- host={
- "type": ARG_TYPE.URL,
- "description": "Strapi API host to connect to.",
- "required": True,
- "label": "Host",
- },
- port={
- "type": ARG_TYPE.INT,
- "description": "Strapi API port to connect to.",
- "required": True,
- "label": "Port",
- },
- plural_api_ids={
- "type": list,
- "description": "Plural API id to use for querying.",
- "required": True,
- "label": "Plural API id",
- },
-)
-
-connection_args_example = OrderedDict(
- host="localhost",
- port=1337,
- api_token="c56c000d867e95848c",
- plural_api_ids=["posts", "portfolios"],
-)
diff --git a/mindsdb/integrations/handlers/strapi_handler/strapi_tables.py b/mindsdb/integrations/handlers/strapi_handler/strapi_tables.py
deleted file mode 100644
index 48b125ad976..00000000000
--- a/mindsdb/integrations/handlers/strapi_handler/strapi_tables.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from typing import List
-import pandas as pd
-from mindsdb.integrations.libs.api_handler import APIHandler, APITable
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
-from mindsdb_sql_parser.ast.select.constant import Constant
-import json
-
-
-class StrapiTable(APITable):
-
- def __init__(self, handler: APIHandler, name: str):
- super().__init__(handler)
- self.name = name
- # get all the fields of a collection as columns
- self.columns = self.handler.call_strapi_api(method='GET', endpoint=f'/api/{name}').columns
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Triggered at the SELECT query
-
- Args:
- query (ast.Select): User's entered query
-
- Returns:
- pd.DataFrame: The queried information
- """
- # Initialize _id and selected_columns
- _id = None
- selected_columns = []
-
- # Get id from where clause, if available
- conditions = extract_comparison_conditions(query.where)
- for op, arg1, arg2 in conditions:
- if arg1 == 'id' and op == '=':
- _id = arg2
- else:
- raise ValueError("Unsupported condition in WHERE clause")
-
- # Get selected columns from query
- for target in query.targets:
- if isinstance(target, ast.Star):
- selected_columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- selected_columns.append(target.parts[-1])
- else:
- raise ValueError(f"Unknown query target {type(target)}")
-
- # Initialize the result DataFrame
- result_df = None
-
- if _id is not None:
- # Fetch data using the provided endpoint for the specific id
- df = self.handler.call_strapi_api(method='GET', endpoint=f'/api/{self.name}/{_id}')
-
- if len(df) > 0:
- result_df = df[selected_columns]
- else:
- # Fetch data without specifying an id
- page_size = 100 # The page size you want to use for API requests
- limit = query.limit.value if query.limit else None
- result_df = pd.DataFrame(columns=selected_columns)
-
- if limit:
- # Calculate the number of pages required
- page_count = (limit + page_size - 1) // page_size
- else:
- page_count = 1
-
- for page in range(1, page_count + 1):
- if limit:
- # Calculate the page size for this request
- current_page_size = min(page_size, limit)
- else:
- current_page_size = page_size
-
- df = self.handler.call_strapi_api(method='GET', endpoint=f'/api/{self.name}', params={'pagination[page]': page, 'pagination[pageSize]': current_page_size})
-
- if len(df) == 0:
- break
-
- result_df = pd.concat([result_df, df[selected_columns]], ignore_index=True)
-
- if limit:
- limit -= current_page_size
-
- return result_df
-
- def insert(self, query: ast.Insert) -> None:
- """triggered at the INSERT query
- Args:
- query (ast.Insert): user's entered query
- """
- data = {'data': {}}
- for column, value in zip(query.columns, query.values[0]):
- if isinstance(value, Constant):
- data['data'][column.name] = value.value
- else:
- data['data'][column.name] = value
- self.handler.call_strapi_api(method='POST', endpoint=f'/api/{self.name}', json_data=json.dumps(data))
-
- def update(self, query: ast.Update) -> None:
- """triggered at the UPDATE query
-
- Args:
- query (ast.Update): user's entered query
- """
- conditions = extract_comparison_conditions(query.where)
- # Get id from query
- for op, arg1, arg2 in conditions:
- if arg1 == 'id' and op == '=':
- _id = arg2
- else:
- raise NotImplementedError
- data = {'data': {}}
- for key, value in query.update_columns.items():
- if isinstance(value, Constant):
- data['data'][key] = value.value
- self.handler.call_strapi_api(method='PUT', endpoint=f'/api/{self.name}/{_id}', json_data=json.dumps(data))
-
- def get_columns(self, ignore: List[str] = []) -> List[str]:
- """columns
-
- Args:
- ignore (List[str], optional): exclusion items. Defaults to [].
-
- Returns:
- List[str]: available columns with `ignore` items removed from the list.
- """
-
- return [item for item in self.columns if item not in ignore]
diff --git a/mindsdb/integrations/handlers/strapi_handler/tests/__init__.py b/mindsdb/integrations/handlers/strapi_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/strapi_handler/tests/test_strapi_handler.py b/mindsdb/integrations/handlers/strapi_handler/tests/test_strapi_handler.py
deleted file mode 100644
index a7dd95481dc..00000000000
--- a/mindsdb/integrations/handlers/strapi_handler/tests/test_strapi_handler.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.strapi_handler.strapi_handler import StrapiHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class StrapiHandlerTest(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- connection_data = {
- 'host': 'localhost',
- 'port': '1337',
- 'api_token': 'c56c000d867e95848c',
- 'plural_api_ids': ['products', 'sellers']}
- cls.handler = StrapiHandler(name='myshop', connection_data=connection_data)
-
- def test_0_check_connection(self):
- # Ensure the connection is successful
- self.assertTrue(self.handler.check_connection())
-
- def test_1_get_table(self):
- assert self.handler.get_tables() is not RESPONSE_TYPE.ERROR
-
- def test_2_get_columns(self):
- assert self.handler.get_columns('products') is not RESPONSE_TYPE.ERROR
-
- def test_3_get_data(self):
- # Ensure that you can retrieve data from a table
- data = self.handler.native_query('SELECT * FROM products')
- assert data.type is not RESPONSE_TYPE.ERROR
-
- def test_4_get_data_with_condition(self):
- # Ensure that you can retrieve data with a condition
- data = self.handler.native_query('SELECT * FROM products WHERE id = 1')
- assert data.type is not RESPONSE_TYPE.ERROR
-
- def test_5_insert_data(self):
- # Ensure that data insertion is successful
- query = "INSERT INTO myshop.sellers (name, email, sellerid) VALUES ('Ram', 'ram@gmail.com', 'ramu4')"
- result = self.handler.native_query(query)
- self.assertTrue(result)
-
- def test_6_update_data(self):
- # Ensure that data updating is successful
- query = "UPDATE products SET name = 'test2' WHERE id = 1"
- result = self.handler.native_query(query)
- self.assertTrue(result)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/strava_handler/Readme.md b/mindsdb/integrations/handlers/strava_handler/Readme.md
deleted file mode 100644
index e91d23343ed..00000000000
--- a/mindsdb/integrations/handlers/strava_handler/Readme.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Strava Handler
-
-Strava handler for MindsDB provides interfaces to connect with strava via APIs and pull the workout data of your fitness club into MindsDB.
-
-## Strava
-Strava is app used for tracking physical exercise and share the data with your social network
-
-## Strava Handler Initialization
-
-The Strava handler is initialized with the following parameters:
-
-- `strava_api_token`: Strava API key to use for authentication
-
-Please follow [this link](https://developers.strava.com/docs/getting-started/) to generate the token for accessing strava API
-
-## Implemented Features
-
-- [x] Strava all_clubs table
- - [x] Support LIMIT
- - [x] Support ORDER BY
- - [x] Support column selection
-
-- [x] Strava club_activities table
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
-
-
-## Example Usage
-
-The first step is to create a database with the new `Strava` engine.
-
-~~~~sql
-CREATE DATABASE mindsdb_strava
-WITH ENGINE = 'strava',
-PARAMETERS = {
- "strava_client_id": "your-strava-client-id",
- "strava_access_token": "your-strava-api-key-token"
-};
-~~~~
-
-Use the established connection to query the Strava all_clubs table
-
-~~~~sql
-SELECT * FROM mindsdb_strava.all_clubs;
-~~~~
-
-Use the established connection to query the Strava club_activities table
-
-~~~~sql
-SELECT * FROM mindsdb_strava.club_activities
-WHERE strava_club_id = 195748;
-~~~~
-
-
-Advanced queries for the strava handler
-
-~~~~sql
-SELECT id,localized_sport_type,country,member_count FROM
-mindsdb_strava.all_clubs
-ORDER by id ASC
-LIMIT 10;
-~~~~~~~
-
-~~~~sql
-SELECT name, distance, sport_type
-FROM
-mindsdb_strava.club_activities
-WHERE strava_club_id = 195748
-ORDER BY distance ASC
-LIMIT 10;
-~~~~
diff --git a/mindsdb/integrations/handlers/strava_handler/__about__.py b/mindsdb/integrations/handlers/strava_handler/__about__.py
deleted file mode 100644
index f1142b5d0ba..00000000000
--- a/mindsdb/integrations/handlers/strava_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Strava handler"
-__package_name__ = "mindsdb_strava_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Strava"
-__author__ = "Balaji Seetharaman"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/strava_handler/__init__.py b/mindsdb/integrations/handlers/strava_handler/__init__.py
deleted file mode 100644
index 5cb5523853b..00000000000
--- a/mindsdb/integrations/handlers/strava_handler/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .strava_handler import StravaHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Strava"
-name = "strava"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/strava_handler/icon.svg b/mindsdb/integrations/handlers/strava_handler/icon.svg
deleted file mode 100644
index 8ad79231341..00000000000
--- a/mindsdb/integrations/handlers/strava_handler/icon.svg
+++ /dev/null
@@ -1,12 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/strava_handler/requirements.txt b/mindsdb/integrations/handlers/strava_handler/requirements.txt
deleted file mode 100644
index 006c115a469..00000000000
--- a/mindsdb/integrations/handlers/strava_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-stravalib
-urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/mindsdb/integrations/handlers/strava_handler/strava_handler.py b/mindsdb/integrations/handlers/strava_handler/strava_handler.py
deleted file mode 100644
index f7cff0f8ff3..00000000000
--- a/mindsdb/integrations/handlers/strava_handler/strava_handler.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from mindsdb.integrations.handlers.strava_handler.strava_tables import StravaAllClubsTable
-from mindsdb.integrations.handlers.strava_handler.strava_tables import StravaClubActivitesTable
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
-)
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-
-from collections import OrderedDict
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-from stravalib.client import Client
-
-logger = log.getLogger(__name__)
-
-
-class StravaHandler(APIHandler):
- """Strava handler implementation"""
-
- def __init__(self, name=None, **kwargs):
- """Initialize the Strava handler.
- Parameters
- ----------
- name : str
- name of a handler instance
- """
- super().__init__(name)
-
- connection_data = kwargs.get("connection_data", {})
-
- self.parser = parse_sql
- self.connection_data = connection_data
- self.kwargs = kwargs
- self.connection = None
- self.is_connected = False
-
- strava_all_clubs_data = StravaAllClubsTable(self)
- self._register_table("all_clubs", strava_all_clubs_data)
-
- strava_club_activites_data = StravaClubActivitesTable(self)
- self._register_table("club_activities", strava_club_activites_data)
-
- def connect(self) -> StatusResponse:
- """Set up the connection required by the handler.
- Returns
- -------
- StatusResponse
- connection object
- """
- if self.is_connected is True:
- return self.connection
-
- client = Client()
- client.access_token = self.connection_data['strava_access_token']
- self.connection = client
-
- return self.connection
-
- def check_connection(self) -> StatusResponse:
- """Check connection to the handler.
- Returns
- -------
- StatusResponse
- Status confirmation
- """
- response = StatusResponse(False)
-
- try:
- self.connect()
- response.success = True
- except Exception as e:
- logger.error(f"Error connecting to Strava API: {e}!")
- response.error_message = e
-
- self.is_connected = response.success
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
- Parameters
- ----------
- query : str
- query in a native format
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
-
-
-connection_args = OrderedDict(
- strava_client_id={
- 'type': ARG_TYPE.STR,
- 'description': 'Client id for accessing Strava Application API'
- },
- strava_access_token={
- 'type': ARG_TYPE.STR,
- 'description': 'Access Token for accessing Strava Application API'
- }
-)
-
-connection_args_example = OrderedDict(
- strava_client_id='',
- strava_access_token=''
-)
diff --git a/mindsdb/integrations/handlers/strava_handler/strava_tables.py b/mindsdb/integrations/handlers/strava_handler/strava_tables.py
deleted file mode 100644
index 9784d4ec06d..00000000000
--- a/mindsdb/integrations/handlers/strava_handler/strava_tables.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import pandas as pd
-
-from typing import List
-
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
-from mindsdb.utilities import log
-
-from mindsdb_sql_parser import ast
-
-
-logger = log.getLogger(__name__)
-
-
-class StravaAllClubsTable(APITable):
- """Strava List all Clubs Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the Strava "getLoggedInAthleteClubs" API endpoint
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
- Returns
- -------
- pd.DataFrame
- strava "List Athlete Clubs " matching the query
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- order_by_conditions = {}
-
- if query.order_by and len(query.order_by) > 0:
- order_by_conditions["columns"] = []
- order_by_conditions["ascending"] = []
-
- for an_order in query.order_by:
- if an_order.field.parts[0] != "id":
- next
- if an_order.field.parts[1] in self.get_columns():
- order_by_conditions["columns"].append(an_order.field.parts[1])
-
- if an_order.direction == "ASC":
- order_by_conditions["ascending"].append(True)
- else:
- order_by_conditions["ascending"].append(False)
- else:
- raise ValueError(
- f"Order by unknown column {an_order.field.parts[1]}"
- )
- strava_clubs_df = self.call_strava_allclubs_api()
-
- selected_columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- selected_columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- selected_columns.append(target.parts[-1])
- else:
- raise ValueError(f"Unknown query target {type(target)}")
-
- if len(strava_clubs_df) == 0:
- strava_clubs_df = pd.DataFrame([], columns=selected_columns)
- else:
- strava_clubs_df.columns = self.get_columns()
- for col in set(strava_clubs_df.columns).difference(set(selected_columns)):
- strava_clubs_df = strava_clubs_df.drop(col, axis=1)
-
- if len(order_by_conditions.get("columns", [])) > 0:
- strava_clubs_df = strava_clubs_df.sort_values(
- by=order_by_conditions["columns"],
- ascending=order_by_conditions["ascending"],
- )
-
- if query.limit:
- strava_clubs_df = strava_clubs_df.head(query.limit.value)
-
- return strava_clubs_df
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
- Returns
- -------
- List[str]
- List of columns
- """
- return [
- 'id',
- 'name',
- 'sport_type',
- 'city',
- 'state',
- 'country',
- 'member_count',
- ]
-
- def call_strava_allclubs_api(self):
- """Pulls all the records from the given and returns it select()
-
- Returns
- -------
- pd.DataFrame of all the records of the "List Athlete Clubs" API end point
- """
-
- clubs = self.handler.connect().get_athlete_clubs()
-
- club_cols = self.get_columns()
- data = []
-
- for club in clubs:
- club_dict = club.dict()
- data.append([club_dict.get(x) for x in club_cols])
-
- all_strava_clubs_df = pd.DataFrame(data, columns=club_cols)
-
- return all_strava_clubs_df
-
-
-class StravaClubActivitesTable(APITable):
- """Strava List Club Activities by id Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the Strava "List Club Activities " API endpoint
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
- Returns
- -------
- pd.DataFrame
- strava "List Club Activities" matching the query
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- conditions = extract_comparison_conditions(query.where)
-
- order_by_conditions = {}
- clubs_kwargs = {}
-
- if query.order_by and len(query.order_by) > 0:
- order_by_conditions["columns"] = []
- order_by_conditions["ascending"] = []
-
- for an_order in query.order_by:
- if an_order.field.parts[0] != "id":
- next
- if an_order.field.parts[1] in self.get_columns():
- order_by_conditions["columns"].append(an_order.field.parts[1])
-
- if an_order.direction == "ASC":
- order_by_conditions["ascending"].append(True)
- else:
- order_by_conditions["ascending"].append(False)
- else:
- raise ValueError(
- f"Order by unknown column {an_order.field.parts[1]}"
- )
-
- for a_where in conditions:
- if a_where[1] == "strava_club_id":
- if a_where[0] != "=":
- raise ValueError("Unsupported where operation for strava_club_id ")
- clubs_kwargs["type"] = a_where[2]
- else:
- raise ValueError(f"Unsupported where argument {a_where[1]}")
-
- strava_club_activities_df = self.call_strava_clubactivities_api(a_where[2])
-
- selected_columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- selected_columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- selected_columns.append(target.parts[-1])
- else:
- raise ValueError(f"Unknown query target {type(target)}")
-
- if len(strava_club_activities_df) == 0:
- strava_club_activities_df = pd.DataFrame([], columns=selected_columns)
- else:
- strava_club_activities_df.columns = self.get_columns()
- for col in set(strava_club_activities_df.columns).difference(set(selected_columns)):
- strava_club_activities_df = strava_club_activities_df.drop(col, axis=1)
-
- if len(order_by_conditions.get("columns", [])) > 0:
- strava_club_activities_df = strava_club_activities_df.sort_values(
- by=order_by_conditions["columns"],
- ascending=order_by_conditions["ascending"],
- )
-
- if query.limit:
- strava_club_activities_df = strava_club_activities_df.head(query.limit.value)
-
- return strava_club_activities_df
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
- Returns
- -------
- List[str]
- List of columns
- """
- return [
- 'name',
- 'distance',
- 'moving_time',
- 'elapsed_time',
- 'total_elevation_gain',
- 'sport_type',
- 'athlete.firstname',
- ]
-
- def call_strava_clubactivities_api(self, club_id):
- """Pulls all the records from the given and returns it select()
-
- Returns
- -------
- pd.DataFrame of all the records of the "getClubActivitiesById" API end point
- """
-
- club_activities = self.handler.connect().get_club_activities(club_id)
-
- club_cols = self.get_columns()
- data = []
-
- for club in club_activities:
- club_dict = club.dict()
- data.append([club_dict.get(x) for x in club_cols])
-
- all_strava_club_activities_df = pd.DataFrame(data, columns=club_cols)
-
- return all_strava_club_activities_df
diff --git a/mindsdb/integrations/handlers/stripe_handler/README.md b/mindsdb/integrations/handlers/stripe_handler/README.md
deleted file mode 100644
index a71252d5a39..00000000000
--- a/mindsdb/integrations/handlers/stripe_handler/README.md
+++ /dev/null
@@ -1,156 +0,0 @@
-# Stripe Handler
-
-Stripe handler for MindsDB provides interfaces to connect to Stripe via APIs and pull store data into MindsDB.
-
----
-
-## Table of Contents
-
-- [Stripe Handler](#stripe-handler)
- - [Table of Contents](#table-of-contents)
- - [About Stripe](#about-stripe)
- - [Stripe Handler Implementation](#stripe-handler-implementation)
- - [Stripe Handler Initialization](#stripe-handler-initialization)
- - [Implemented Features](#implemented-features)
- - [TODO](#todo)
- - [Example Usage](#example-usage)
-
----
-
-## About Stripe
-
-Stripe is a payment services provider that lets merchants accept credit and debit cards or other payments.
-
-https://www.nerdwallet.com/article/small-business/what-is-stripe
-
-## Stripe Handler Implementation
-
-This handler was implemented using [stripe-python](https://github.com/stripe/stripe-python), the Python library for the Stripe API.
-
-## Stripe Handler Initialization
-
-The Stripe handler is initialized with the following parameters:
-
-- `api_key`: a Stripe API key. You can find your API keys in the Stripe Dashboard. [Read more](https://stripe.com/docs/keys).
-
-## Implemented Features
-
-- [x] Stripe Products Table for a given account
- - [x] Support SELECT
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
- - [x] Support INSERT
- - [x] Support UPDATE
- - [x] Support DELETE
-- [x] Stripe Customers Table for a given account
- - [x] Support SELECT
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
-- [x] Stripe Payment Intents Table for a given account
- - [x] Support SELECT
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
-- [x] Stripe Refunds Table for a given account
- - [x] Support SELECT
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
-- [x] Stripe Payouts Table for a given account
- - [x] Support SELECT
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
-
-## TODO
-
-- [ ] Support INSERT, UPDATE and DELETE for Customers and Payment Intents tables
-- [ ] Stripe Charges table
-- [ ] Stripe Balance table
-- [ ] Many more
-
-## Example Usage
-
-The first step is to create a database with the new `stripe` engine by passing in the required `api_key` parameter:
-
-~~~~sql
-CREATE DATABASE stripe_datasource
-WITH ENGINE = 'stripe',
-PARAMETERS = {
- "api_key": "sk_..."
-};
-~~~~
-
-Use the established connection to query your database:
-
-### Querying the Customer Data
-~~~~sql
-SELECT * FROM stripe_datasource.customers
-~~~~
-
-or, for the `payouts` table
-~~~~sql
-SELECT * FROM stripe_datasource.payouts
-~~~~
-
-or, for the `products` table
-~~~~sql
-SELECT * FROM stripe_datasource.products
-~~~~
-
-Run more advanced queries:
-
-~~~~sql
-SELECT name, email
-FROM stripe_datasource.customers
-WHERE currency = 'inr'
-ORDER BY name
-LIMIT 5
-~~~~
-
-### Querying the Refund data
-~~~~sql
-SELECT * FROM stripe_datasource.refunds
-~~~~
-
-Run more advanced queries:
-
-~~~~sql
-SELECT name, email
-FROM stripe_datasource.refunds
-WHERE currency = 'inr'
-ORDER BY name
-LIMIT 5
-~~~~
-
-
-~~~~sql
-SELECT id, name, active
-FROM stripe_datasource.products
-WHERE active = true
-ORDER BY name
-LIMIT 5
-~~~~
-
-~~~~sql
-INSERT INTO stripe_datasource.products(name)
-VALUES('product_name')
-~~~~
-
-~~~~sql
-UPDATE stripe_datasource.products
-SET name = 'product_name_updated'
-WHERE name = 'product_name'
-~~~~
-
-~~~~sql
-DELETE FROM stripe_datasource.products
-WHERE name = 'product_name_updated'
-~~~~
diff --git a/mindsdb/integrations/handlers/stripe_handler/__about__.py b/mindsdb/integrations/handlers/stripe_handler/__about__.py
deleted file mode 100644
index cf073898a61..00000000000
--- a/mindsdb/integrations/handlers/stripe_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Stripe handler"
-__package_name__ = "mindsdb_stripe_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Stripe"
-__author__ = "Minura Punchihewa"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/stripe_handler/__init__.py b/mindsdb/integrations/handlers/stripe_handler/__init__.py
deleted file mode 100644
index 3f39afb0ee8..00000000000
--- a/mindsdb/integrations/handlers/stripe_handler/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .stripe_handler import StripeHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Stripe"
-name = "stripe"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/stripe_handler/icon.svg b/mindsdb/integrations/handlers/stripe_handler/icon.svg
deleted file mode 100644
index 49fcdfe093f..00000000000
--- a/mindsdb/integrations/handlers/stripe_handler/icon.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/stripe_handler/requirements.txt b/mindsdb/integrations/handlers/stripe_handler/requirements.txt
deleted file mode 100644
index 233b8cec1c0..00000000000
--- a/mindsdb/integrations/handlers/stripe_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-stripe
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/stripe_handler/stripe_handler.py b/mindsdb/integrations/handlers/stripe_handler/stripe_handler.py
deleted file mode 100644
index fbbe4b97d89..00000000000
--- a/mindsdb/integrations/handlers/stripe_handler/stripe_handler.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import stripe
-from mindsdb.integrations.handlers.stripe_handler.stripe_tables import CustomersTable, ProductsTable, PaymentIntentsTable, RefundsTable, PayoutsTable
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
-)
-
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-
-logger = log.getLogger(__name__)
-
-
-class StripeHandler(APIHandler):
- """
- The Stripe handler implementation.
- """
-
- name = 'stripe'
-
- def __init__(self, name: str, **kwargs):
- """
- Initialize the handler.
- Args:
- name (str): name of particular handler instance
- **kwargs: arbitrary keyword arguments.
- """
- super().__init__(name)
-
- connection_data = kwargs.get("connection_data", {})
- self.connection_data = connection_data
- self.kwargs = kwargs
-
- self.connection = None
- self.is_connected = False
-
- customers_data = CustomersTable(self)
- self._register_table("customers", customers_data)
-
- products_data = ProductsTable(self)
- self._register_table("products", products_data)
-
- payment_intents_data = PaymentIntentsTable(self)
- self._register_table("payment_intents", payment_intents_data)
-
- payouts_data = PayoutsTable(self)
- self._register_table("payouts", payouts_data)
-
- refunds_data = RefundsTable(self)
- self._register_table("refunds", refunds_data)
-
- def connect(self):
- """
- Set up the connection required by the handler.
- Returns
- -------
- StatusResponse
- connection object
- """
- if self.is_connected is True:
- return self.connection
-
- stripe.api_key = self.connection_data['api_key']
-
- self.connection = stripe
- self.is_connected = True
-
- return self.connection
-
- def check_connection(self) -> StatusResponse:
- """
- Check connection to the handler.
- Returns:
- HandlerStatusResponse
- """
-
- response = StatusResponse(False)
-
- try:
- stripe = self.connect()
- stripe.Account.retrieve()
- response.success = True
- except Exception as e:
- logger.error('Error connecting to Stripe!')
- response.error_message = str(e)
-
- self.is_connected = response.success
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
- Parameters
- ----------
- query : str
- query in a native format
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
diff --git a/mindsdb/integrations/handlers/stripe_handler/stripe_tables.py b/mindsdb/integrations/handlers/stripe_handler/stripe_tables.py
deleted file mode 100644
index 094de9f5dbd..00000000000
--- a/mindsdb/integrations/handlers/stripe_handler/stripe_tables.py
+++ /dev/null
@@ -1,492 +0,0 @@
-import pandas as pd
-import stripe
-from typing import Text, List, Dict, Any
-
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.integrations.utilities.handlers.query_utilities import INSERTQueryParser, DELETEQueryParser, UPDATEQueryParser, DELETEQueryExecutor, UPDATEQueryExecutor
-from mindsdb.integrations.utilities.handlers.query_utilities.select_query_utilities import SELECTQueryParser, SELECTQueryExecutor
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class CustomersTable(APITable):
- """The Stripe Customers Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Stripe Customer data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Stripe Customers matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'customers',
- self.get_columns()
- )
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- customers_df = pd.json_normalize(self.get_customers(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- customers_df,
- selected_columns,
- where_conditions,
- order_by_conditions
- )
- customers_df = select_statement_executor.execute_query()
-
- return customers_df
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_customers(limit=1)).columns.tolist()
-
- def get_customers(self, **kwargs) -> List[Dict]:
- stripe = self.handler.connect()
- customers = stripe.Customer.list(**kwargs)
- return [customer.to_dict() for customer in customers]
-
-
-class ProductsTable(APITable):
- """The Stripe Products Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Stripe Product data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Stripe Products matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'products',
- self.get_columns()
- )
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- products_df = pd.json_normalize(self.get_products(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- products_df,
- selected_columns,
- where_conditions,
- order_by_conditions
- )
- products_df = select_statement_executor.execute_query()
-
- return products_df
-
- def insert(self, query: ast.Insert) -> None:
- """
- Inserts data into Stripe "POST v1/products" API endpoint.
-
- Parameters
- ----------
- query : ast.Insert
- Given SQL INSERT query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- insert_statement_parser = INSERTQueryParser(
- query,
- supported_columns=['id', 'name', 'active', 'description', 'metadata'],
- mandatory_columns=['name'],
- all_mandatory=False,
- )
- product_data = insert_statement_parser.parse_query()
- self.create_products(product_data)
-
- def update(self, query: ast.Update) -> None:
- """
- Updates data from Stripe "POST v1/products/:id" API endpoint.
-
- Parameters
- ----------
- query : ast.Update
- Given SQL UPDATE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- update_statement_parser = UPDATEQueryParser(query)
- values_to_update, where_conditions = update_statement_parser.parse_query()
-
- products_df = pd.json_normalize(self.get_products())
- update_query_executor = UPDATEQueryExecutor(
- products_df,
- where_conditions
- )
-
- products_df = update_query_executor.execute_query()
- product_ids = products_df['id'].tolist()
- self.update_products(product_ids, values_to_update)
-
- def delete(self, query: ast.Delete) -> None:
- """
- Deletes data from Stripe "DELETE v1/products/:id" API endpoint.
-
- Parameters
- ----------
- query : ast.Delete
- Given SQL DELETE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- delete_statement_parser = DELETEQueryParser(query)
- where_conditions = delete_statement_parser.parse_query()
-
- products_df = pd.json_normalize(self.get_products())
- delete_query_executor = DELETEQueryExecutor(
- products_df,
- where_conditions
- )
-
- products_df = delete_query_executor.execute_query()
- product_ids = products_df['id'].tolist()
- self.delete_products(product_ids)
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_products(limit=1)).columns.tolist()
-
- def get_products(self, **kwargs) -> List[Dict]:
- stripe = self.handler.connect()
- products = stripe.Product.list(**kwargs)
- return [product.to_dict() for product in products]
-
- def create_products(self, product_data: List[Dict[Text, Any]]) -> None:
- stripe = self.handler.connect()
- for product in product_data:
- created_product = stripe.Product.create(**product)
- if 'id' not in created_product.to_dict():
- raise Exception('Product creation failed')
- else:
- logger.info(f'Product {created_product.to_dict()["id"]} created')
-
- def update_products(self, product_ids: List[Text], values_to_update: Dict[Text, Any]) -> None:
- stripe = self.handler.connect()
- for product_id in product_ids:
- updated_product = stripe.Product.modify(product_id, **values_to_update)
- if 'id' not in updated_product.to_dict():
- raise Exception('Product update failed')
- else:
- logger.info(f'Product {updated_product.to_dict()["id"]} updated')
-
- def delete_products(self, product_ids: List[Text]) -> None:
- stripe = self.handler.connect()
- for product_id in product_ids:
- deleted_product = stripe.Product.delete(product_id)
- if 'id' not in deleted_product.to_dict():
- raise Exception('Product deletion failed')
- else:
- logger.info(f'Product {deleted_product.to_dict()["id"]} deleted')
-
-
-class PaymentIntentsTable(APITable):
- """The Stripe Payment Intents Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Stripe Payment Intents data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Stripe Payment Intents matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'payment_intents',
- self.get_columns()
- )
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- payment_intents_df = pd.json_normalize(self.get_payment_intents(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- payment_intents_df,
- selected_columns,
- where_conditions,
- order_by_conditions
- )
- payment_intents_df = select_statement_executor.execute_query()
-
- return payment_intents_df
-
- def delete(self, query: ast.Delete) -> None:
- """
- Cancels Stripe Payment Intents and updates the local data.
-
- Parameters
- ----------
- query : ast.Delete
- Given SQL DELETE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- delete_statement_parser = DELETEQueryParser(query)
- where_conditions = delete_statement_parser.parse_query()
-
- if 'payment_intents_df' not in self.__dict__:
- self.payment_intents_df = pd.json_normalize(self.get_payment_intents())
-
- delete_query_executor = DELETEQueryExecutor(
- self.payment_intents_df,
- where_conditions
- )
-
- canceled_payment_intents_df = delete_query_executor.execute_query()
-
- payment_intent_ids = canceled_payment_intents_df['id'].tolist()
- self.cancel_payment_intents(payment_intent_ids)
-
- self.payment_intents_df = self.payment_intents_df[~self.payment_intents_df['id'].isin(payment_intent_ids)]
-
- def cancel_payment_intents(self, payment_intent_ids: List[str]) -> None:
- stripe = self.handler.connect()
- for payment_intent_id in payment_intent_ids:
- try:
-
- payment_intent = stripe.PaymentIntent.retrieve(payment_intent_id)
- if payment_intent.status in ['requires_payment_method', 'requires_capture', 'requires_confirmation', 'requires_action', 'processing']:
- stripe.PaymentIntent.cancel(payment_intent_id)
- else:
- logger.warning(f"Payment intent {payment_intent_id} is in status {payment_intent.status} and cannot be canceled.")
- except stripe.error.StripeError as e:
- logger.error(f"Error cancelling payment intent {payment_intent_id}: {str(e)}")
-
- def update(self, query: 'ast.Update') -> None:
- """
- Updates data in Stripe "POST /v1/payment_intents/:id" API endpoint.
-
- Parameters
- ----------
- query : ast.Update
- Given SQL UPDATE query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- update_statement_parser = UPDATEQueryParser(query)
- values_to_update, where_conditions = update_statement_parser.parse_query()
-
- payment_intents_df = pd.json_normalize(self.get_payment_intents())
- update_query_executor = UPDATEQueryExecutor(
- payment_intents_df,
- where_conditions
- )
-
- payment_intents_df = update_query_executor.execute_query()
- payment_intent_ids = payment_intents_df['id'].tolist()
- self.update_payment_intents(payment_intent_ids, values_to_update)
-
- def update_payment_intents(self, payment_intent_ids: list, values_to_update: dict) -> None:
- for payment_intent_id in payment_intent_ids:
- stripe.PaymentIntent.modify(payment_intent_id, **values_to_update)
-
- def insert(self, query: 'ast.Insert') -> None:
- """
- Inserts data into Stripe "POST /v1/payment_intents" API endpoint.
-
- Parameters
- ----------
- query : ast.Insert
- Given SQL INSERT query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- insert_statement_parser = INSERTQueryParser(
- query,
- supported_columns=['amount', 'currency', 'description', 'payment_method_types'],
- mandatory_columns=['amount', 'currency'],
- all_mandatory=True
- )
- payment_intent_data = insert_statement_parser.parse_query()
- self.create_payment_intent(payment_intent_data)
-
- def create_payment_intent(self, payment_intent_data: list) -> None:
- for data in payment_intent_data:
- stripe.PaymentIntent.create(**data)
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_payment_intents(limit=1)).columns.tolist()
-
- def get_payment_intents(self, **kwargs) -> List[Dict]:
- stripe = self.handler.connect()
- payment_intents = stripe.PaymentIntent.list(**kwargs)
- return [payment_intent.to_dict() for payment_intent in payment_intents]
-
-
-class RefundsTable(APITable):
- """The Stripe Refund Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Stripe Refund data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Stripe Refunds matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'refunds',
- self.get_columns()
- )
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- refunds_df = pd.json_normalize(self.get_refunds(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- refunds_df,
- selected_columns,
- where_conditions,
- order_by_conditions
- )
- refunds_df = select_statement_executor.execute_query()
-
- return refunds_df
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_refunds(limit=1)).columns.tolist()
-
- def get_refunds(self, **kwargs) -> List[Dict]:
- stripe = self.handler.connect()
- refunds = stripe.Refund.list(**kwargs)
- return [refund.to_dict() for refund in refunds]
-
-
-class PayoutsTable(APITable):
- """The Stripe Payouts Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """
- Pulls Stripe Payout data.
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Stripe Payouts matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'payouts',
- self.get_columns()
- )
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- payouts_df = pd.json_normalize(self.get_payouts(limit=result_limit))
- select_statement_executor = SELECTQueryExecutor(
- payouts_df,
- selected_columns,
- where_conditions,
- order_by_conditions
- )
- payouts_df = select_statement_executor.execute_query()
-
- return payouts_df
-
- def get_columns(self) -> List[Text]:
- return pd.json_normalize(self.get_payouts(limit=1)).columns.tolist()
-
- def get_payouts(self, **kwargs) -> List[Dict]:
- stripe = self.handler.connect()
- payouts = stripe.Payout.list(**kwargs)
- return [payout.to_dict() for payout in payouts]
diff --git a/mindsdb/integrations/handlers/supabase_handler/README.md b/mindsdb/integrations/handlers/supabase_handler/README.md
deleted file mode 100644
index 23cac452dd0..00000000000
--- a/mindsdb/integrations/handlers/supabase_handler/README.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Supabase Handler
-
-This is the implementation of the Supabase handler for MindsDB.
-
-## Supabase
-
-Supabase is an open source Firebase alternative. Start your project with a Postgres Database, Authentication, instant APIs, Realtime subscriptions and Storage.
-
-## Implementation
-
-This handler was implemented by extending postres connector.
-
-The required arguments to establish a connection are:
-
-* `host`: the host name of the Supabase connection
-* `port`: the port to use when connecting
-* `user`: the user to authenticate
-* `password`: the password to authenticate the user
-* `database`: database name
-
-## Usage
-
-In order to make use of this handler and connect to a Supabase server in MindsDB, the following syntax can be used:
-
-```sql
-CREATE DATABASE example_supabase_data
-WITH ENGINE = "supabase",
-PARAMETERS = {
- "user": "root",
- "password": "root",
- "host": "hostname",
- "port": "5432",
- "database": "postgres"
-}
-```
-
-Now, you can use this established connection to query your database as follows:
-
-```sql
-SELECT * FROM example_supabase_data.public.rentals LIMIT 10;
-```
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/supabase_handler/__about__.py b/mindsdb/integrations/handlers/supabase_handler/__about__.py
deleted file mode 100644
index 3c7e24e244e..00000000000
--- a/mindsdb/integrations/handlers/supabase_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Supabase handler'
-__package_name__ = 'mindsdb_supabase_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Supabase"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/supabase_handler/__init__.py b/mindsdb/integrations/handlers/supabase_handler/__init__.py
deleted file mode 100644
index 13a43ce1105..00000000000
--- a/mindsdb/integrations/handlers/supabase_handler/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL
-
-try:
- from .supabase_handler import SupabaseHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-from .__about__ import __version__ as version, __description__ as description
-
-
-title = "Supabase"
-name = "supabase"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
- "support_level",
-]
diff --git a/mindsdb/integrations/handlers/supabase_handler/icon.svg b/mindsdb/integrations/handlers/supabase_handler/icon.svg
deleted file mode 100644
index 245ae5106fc..00000000000
--- a/mindsdb/integrations/handlers/supabase_handler/icon.svg
+++ /dev/null
@@ -1,15 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/supabase_handler/supabase_handler.py b/mindsdb/integrations/handlers/supabase_handler/supabase_handler.py
deleted file mode 100644
index 8236acade97..00000000000
--- a/mindsdb/integrations/handlers/supabase_handler/supabase_handler.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler
-
-
-class SupabaseHandler(PostgresHandler):
- """
- This handler handles connection and execution of the Supabase statements.
- """
- name = 'supabase'
-
- def __init__(self, name, **kwargs):
- super().__init__(name, **kwargs)
diff --git a/mindsdb/integrations/handlers/supabase_handler/tests/__init__.py b/mindsdb/integrations/handlers/supabase_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py b/mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py
deleted file mode 100644
index 311bf5190d7..00000000000
--- a/mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.supabase_handler.supabase_handler import SupabaseHandler
-
-
-class SupabaseHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "localhost",
- "port": "3306",
- "user": "root",
- "password": "root",
- "database": "test",
- "ssl": False
- }
- }
- cls.handler = SupabaseHandler('test_supabase_handler', **cls.kwargs)
-
- def test_0_connect(self):
- self.handler.connect()
-
- def test_1_check_connection(self):
- assert self.handler.check_connection()
-
- def test_2_native_query_show_dbs(self):
- dbs = self.handler.native_query("SHOW DATABASES;")
- assert isinstance(dbs, list)
-
- def test_3_get_tables(self):
- tbls = self.handler.get_tables()
- assert isinstance(tbls, list)
-
- def test_5_create_table(self):
- try:
- self.handler.native_query("CREATE TABLE test_mdb (test_col INT)")
- except Exception:
- pass
-
- def test_6_describe_table(self):
- described = self.handler.get_columns("dt_test")
- assert isinstance(described, list)
-
- def test_7_select_query(self):
- query = "SELECT * FROM dt_test WHERE 'id'='a'"
- self.handler.native_query(query)
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/README.md b/mindsdb/integrations/handlers/surrealdb_handler/README.md
deleted file mode 100644
index 8423cf04e96..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/README.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# Surrealdb Handler
-
-This is the implementation of the Surrealdb handler for MindsDB.
-
-## Surrealdb
-
-SurrealDB is an innovative NewSQL cloud database, suitable for serverless applications,
-jamstack applications, single-page applications, and traditional applications.
-It is unmatched in its versatility and financial value, with the ability for deployment on cloud,
-on-premise, embedded, and edge computing environments.
-
-## Implementation
-
-This handler was implemented by using the python library `pysurrealdb`.
-
-The required arguments to establish a connection are:
-
-* `host`: the host name of the Surrealdb connection
-* `port`: the port to use when connecting
-* `user`: the user to authenticate
-* `password`: the password to authenticate the user
-* `database`: database name to be connected
-* `namespace`: namespace name to be connected
-
-## Usage
-
-In order to make use of this handler and connect to a SurrealDB server. First you need to have [SurrealDB](https://surrealdb.com/install) installed and once you have it installed.
-
-To use SurrealDB we have to start the SurrealDB server in our local environment. For that you need to give following command in the terminal:
-```
-surreal start --user root --pass root
-```
-
-This will start the server and start accepting requests from port `8000`. Now, in another terminal session, give the following command:
-```
-surreal sql --conn http://localhost:8000 \
---user root --pass root --ns testns --db testdb
-```
-
-This will create a namespace `testns` for your project and a database `testdb` in order to proceed further.
-
-Here, let's create a table in our newly created database with the following:
-```
-CREATE dev SET name='again', status='founder';
-```
-
-This will create a table named `dev` with column `name` and `status`.
-
-(If you want to use SurrealDB in public cloud editor, feel free to skip the following steps.)
-
-## Testing SurrealDB in the local environment
-
-Use the following query to create a SurrealDB database in the MindsDB environment.
-
-```sql
-CREATE DATABASE exampledb
-WITH ENGINE = 'surrealdb',
-PARAMETERS = {
- "host": "localhost",
- "port": "8000",
- "user": "root",
- "password": "root",
- "database": "testdb",
- "namespace": "testns"
-};
-```
-
-Now, you can use this established connection to query your database tables as follows:
-
-```sql
-SELECT * FROM exampledb.dev;
-```
-
-## Testing SurrealDB in the public cloud environment
-
-To establish a connection with our SurrealDB server which is running locally to the public cloud instance is not that simple. We are going to use `ngrok tunneling` to connect cloud instance to the local SurrealDB server. You can follow this [guide](https://docs.mindsdb.com/sql/create/database#making-your-local-database-available-to-mindsdb) for that.
-
-In our case with `ngrok` we will use:
-```
-ngrok tcp 8000
-```
-
-From there, it generated a forwarding dns for me which is:
-```
-tcp://6.tcp.ngrok.io:17141 -> localhost:8000
-```
-
-It will be different in your case. With this let's connect to the public cloud using
-
-```sql
-CREATE DATABASE exampledb
-WITH ENGINE = 'surrealdb',
-PARAMETERS = {
- "host": "6.tcp.ngrok.io",
- "port": "17141",
- "user": "root",
- "password": "root",
- "database": "testdb",
- "namespace": "testns"
-};
-```
-
-Please change the `host` and `port` properties in the `PARAMETERS` clause based on the values which you got.
-
-We can also query the `dev` table which we created with
-```sql
-SELECT * FROM exampledb.dev;
-```
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/__about__.py b/mindsdb/integrations/handlers/surrealdb_handler/__about__.py
deleted file mode 100644
index 782f87d5a99..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB SurrealDB handler'
-__package_name__ = 'mindsdb_surrealdb_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for SurrealDB"
-__author__ = 'Emmanouil Dellatolas & Georgios Artopoulos'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2023- mindsdb'
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/__init__.py b/mindsdb/integrations/handlers/surrealdb_handler/__init__.py
deleted file mode 100644
index 05b05c8d02e..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .surrealdb_handler import SurrealDBHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'SurrealDB'
-name = 'surrealdb'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/connection_args.py b/mindsdb/integrations/handlers/surrealdb_handler/connection_args.py
deleted file mode 100644
index 5901f103833..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/connection_args.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the SurrealDB server.',
- 'required': True,
- 'label': 'User'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'The password to authenticate the user with the SurrealDB server.',
- 'required': True,
- 'label': 'Password',
- 'secret': True
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'The database name to use when connecting with the SurrealDB server.',
- 'required': True,
- 'label': 'Database name'
- },
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The host name or IP address of the SurrealDB server. ',
- 'required': True,
- 'label': 'Host'
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'The TCP/IP port of the SurrealDB server. Must be an integer.',
- 'required': True,
- 'label': 'Port'
- },
- namespace={
- 'type': ARG_TYPE.STR,
- 'description': 'The namespace name to be connected',
- 'required': True,
- 'label': 'Namespace'
- }
-)
-connection_args_example = OrderedDict(
- host='localhost',
- port=17141,
- user='admin',
- password='password',
- database='test',
- namespace='test'
-)
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/icon.svg b/mindsdb/integrations/handlers/surrealdb_handler/icon.svg
deleted file mode 100644
index 026ef43b8af..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/icon.svg
+++ /dev/null
@@ -1,9 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/requirements.txt b/mindsdb/integrations/handlers/surrealdb_handler/requirements.txt
deleted file mode 100644
index ac8efd5a7cb..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pysurrealdb
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py b/mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py
deleted file mode 100644
index 2b8e3b3ee89..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from typing import Optional
-import pysurrealdb as surreal
-import pandas as pd
-
-from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-from .utils.surreal_get_info import table_names, column_info
-
-
-logger = log.getLogger(__name__)
-
-
-class SurrealDBHandler(DatabaseHandler):
- """
- This handler handles connection and execution of the SurrealDB statements.
- """
- name = 'surrealdb'
-
- def __init__(self, name: str, connection_data: Optional[dict], **kwargs):
- """ Initialize the handler
- Args:
- name (str): name of particular handler instance
- connection_data (dict): parameters for connecting to the database
- **kwargs: arbitrary keyword arguments.
- """
- super().__init__(name)
- self.database = connection_data['database']
- self.parser = parse_sql
- self.dialect = "surrealdb"
- self.kwargs = kwargs
- self.namespace = connection_data['namespace']
- self.user = connection_data['user']
- self.password = connection_data['password']
- self.host = connection_data['host']
- self.port = connection_data['port']
-
- self.connection = None
- self.is_connected = False
-
- def connect(self):
- """
- Establishes a connection to the MindsDB database.
- Returns:
- HandlerStatusResponse
- """
- if self.is_connected is True:
- return self.connection
- try:
- self.connection = surreal.connect(
- database=self.database,
- host=self.host,
- port=self.port,
- user=self.user,
- password=self.password,
- namespace=self.namespace,
- )
- self.is_connected = True
- except Exception as e:
- logger.error(f"Error while connecting to SurrealDB, {e}")
-
- return self.connection
-
- def check_connection(self) -> StatusResponse:
- """
- Check connection to the handler.
- Returns:
- HandlerStatusResponse
- """
- response_code = StatusResponse(False)
- need_to_close = self.is_connected is False
- try:
- self.connect()
- response_code.success = True
- except Exception as e:
- logger.error(f'Error connecting to SurrealDB, {e}!')
- response_code.error_message = str(e)
- finally:
- if response_code.success is True and need_to_close:
- self.disconnect()
- if response_code.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response_code
-
- def disconnect(self):
- """
- Close the existing connection to the SurrealDB database
- """
- if self.is_connected is False:
- return
- try:
- self.connection.close()
- self.is_connected = False
- except Exception as e:
- logger.error(f"Error while disconnecting to SurrealDB, {e}")
-
- return
-
- def native_query(self, query: str) -> Response:
- """
- Receive raw query and act upon it somehow.
- Args:
- query (Any): query in SurrealQL to execute
- Returns:
- HandlerResponse
- """
- need_to_close = self.is_connected is False
- conn = self.connect()
- cur = conn.cursor()
- try:
- cur.execute(query)
- result = cur.fetchall()
- if result:
- response = Response(
- RESPONSE_TYPE.TABLE,
- data_frame=pd.DataFrame(
- result,
- columns=[x[0] for x in cur.description],
- )
- )
- else:
- response = Response(RESPONSE_TYPE.OK)
- except Exception as e:
- logger.error(f'Error running query: {query} on SurrealDB!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(e)
- )
-
- cur.close()
-
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> Response:
- """
- Receive query as AST (abstract syntax tree) and act upon it somehow.
- Args:
- query (ASTNode): sql query represented as AST. It may be any kind
- of query: SELECT, INSERT, DELETE, etc
- Returns:
- HandlerResponse
- """
- query_string = query.to_string()
-
- # ensure the correct query is passed
- last_word = query_string.split()[-1]
- query_string = query_string.replace(last_word + '.', "")
- return self.native_query(query_string)
-
- def get_tables(self) -> Response:
- """
- Get list of tables from the database that will be accessible.
- Returns:
- HandlerResponse
- """
- conn = self.connect()
- # get table names
- tables = table_names(conn)
-
- # construct pandas dataframe
- df = pd.DataFrame(tables, columns=['table_name'])
-
- response = Response(
- RESPONSE_TYPE.TABLE, df
- )
- return response
-
- def get_columns(self, table: str) -> Response:
- """ Return list of columns in table
- Args:
- table (str): name of the table to get column names and types from.
- Returns:
- HandlerResponse
- """
- conn = self.connect()
- # get name and type of each column in the table
- columns, types = column_info(conn, table)
-
- # construct pandas dataframe
- df = pd.DataFrame(columns, columns=['table_name'])
- df['data_type'] = types
-
- response = Response(
- RESPONSE_TYPE.TABLE, df
- )
- return response
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/surrealdb_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/tests/test_surrealdb_handler.py b/mindsdb/integrations/handlers/surrealdb_handler/tests/test_surrealdb_handler.py
deleted file mode 100644
index 9f7bf72585e..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/tests/test_surrealdb_handler.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import unittest
-
-from mindsdb.integrations.handlers.surrealdb_handler.surrealdb_handler import SurrealDBHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class SurrealdbHandlerTest(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "localhost",
- "port": "8000",
- "user": "admin",
- "password": "password",
- "namespace": "test",
- "database": "test"
- }
- }
- cls.handler = SurrealDBHandler('test_surrealdb_handler', **cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_create_table(self):
- res = self.handler.native_query("CREATE person SET name = 'Tobie', company = 'SurrealDB', "
- "skills = ['Rust', 'Go', 'JavaScript'];")
- assert res.type is not RESPONSE_TYPE.ERROR
-
- def test_2_insert(self):
- res = self.handler.native_query("UPDATE person SET name = 'Jamie'")
- assert res.type is not RESPONSE_TYPE.ERROR
-
- def test_3_select_query(self):
- query = "SELECT * FROM person"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_4_get_columns(self):
- columns = self.handler.get_columns('person')
- assert columns.type is not RESPONSE_TYPE.ERROR
-
- def test_5_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_6_drop_table(self):
- res = self.handler.native_query("REMOVE table person")
- assert res.type is not RESPONSE_TYPE.ERROR
-
- def test_7_disconnect(self):
- assert self.handler.disconnect() is None
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/utils/__init__.py b/mindsdb/integrations/handlers/surrealdb_handler/utils/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/surrealdb_handler/utils/surreal_get_info.py b/mindsdb/integrations/handlers/surrealdb_handler/utils/surreal_get_info.py
deleted file mode 100644
index 8dd98f22ff0..00000000000
--- a/mindsdb/integrations/handlers/surrealdb_handler/utils/surreal_get_info.py
+++ /dev/null
@@ -1,20 +0,0 @@
-def table_names(connection):
- query = "INFO for DB"
- dict_1 = connection.query(query)
- dict_2 = dict_1['tb']
- tables = list(dict_2.keys())
- return tables
-
-
-def column_info(connection, table):
- query = "INFO FOR TABLE " + table
- dict_1 = connection.query(query)
- dict_2 = dict_1['fd']
- columns = list(dict_2.keys())
- types = []
-
- for value in dict_2.values():
- a = value.split('TYPE ', 1)[1]
- type = a.split()[0]
- types.append(type)
- return columns, types
diff --git a/mindsdb/integrations/handlers/symbl_handler/README.md b/mindsdb/integrations/handlers/symbl_handler/README.md
deleted file mode 100644
index 9a9633156df..00000000000
--- a/mindsdb/integrations/handlers/symbl_handler/README.md
+++ /dev/null
@@ -1,84 +0,0 @@
----
-title: Symbl
-sidebarTitle: Symbl
----
-
-This documentation describes the integration of MindsDB with [Symbl](https://symbl.ai/), a platform with state-of-the-art and task-specific LLMs that enables businesses to analyze multi-party conversations at scale.
-This integration allows MindsDB to process conversation data and extract insights from it.
-
-## Prerequisites
-
-Before proceeding, ensure the following prerequisites are met:
-
-1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop).
-2. To connect Symbl to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies).
-
-
-Please note that in order to successfully install the dependencies for Symbl, it is necessary to install `portaudio` and few other Linux packages in the Docker container first. To do this, run the following commands:
-
-1. Start an interactive shell in the container:
-```bash
-docker exec -it mindsdb_container sh
-```
-If you haven't specified a name when spinning up the MindsDB container with `docker run`, you can find it by running `docker ps`.
-
-
-If you are using Docker Desktop, you can navigate to 'Containers', locate the multi-container application running the extension, click on the `mindsdb_service` container and then click on the 'Exec' tab to start an interactive shell.
-
-
-2. Install the required packages:
-```bash
-apt-get update && apt-get install -y \
- libportaudio2 libportaudiocpp0 portaudio19-dev \
- python3-dev \
- build-essential \
- && rm -rf /var/lib/apt/lists/*
-```
-
-
-## Connection
-
-Establish a connection to your Symbl from MindsDB by executing the following SQL command:
-
-```sql
-CREATE DATABASE mindsdb_symbl
-WITH ENGINE = 'symbl',
-PARAMETERS = {
- "app_id": "app_id",
- "app_secret":"app_secret"
-};
-```
-
-Required connection parameters include the following:
-
-* `app_id`: The Symbl app identifier.
-* `app_secret`: The Symbl app secret.
-
-## Usage
-
-First, process the conversation data and get the conversation ID via the `get_conversation_id` table:
-
-```sql
-SELECT *
-FROM mindsdb_symbl.get_conversation_id
-WHERE audio_url="https://symbltestdata.s3.us-east-2.amazonaws.com/newPhonecall.mp3";
-```
-
-Next, use the conversation ID to get the results of the above from the other supported tables:
-
-```sql
-SELECT *
-FROM mindsdb_symbl.get_messages
-WHERE conversation_id="5682305049034752";
-```
-
-Other supported tables include:
-
-* `get_topics`
-* `get_questions`
-* `get_analytics`
-* `get_action_items`
-
-
-The above examples utilize `mindsdb_symbl` as the datasource name, which is defined in the `CREATE DATABASE` command.
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/symbl_handler/__about__.py b/mindsdb/integrations/handlers/symbl_handler/__about__.py
deleted file mode 100644
index d0a7a091e2b..00000000000
--- a/mindsdb/integrations/handlers/symbl_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Symbl handler"
-__package_name__ = "mindsdb_symbl_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Symbl"
-__author__ = "Abhilash K R"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/symbl_handler/__init__.py b/mindsdb/integrations/handlers/symbl_handler/__init__.py
deleted file mode 100644
index e383d9d668f..00000000000
--- a/mindsdb/integrations/handlers/symbl_handler/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .symbl_handler import SymblHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Symbl"
-name = "symbl"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
- "connection_args_example",
- "connection_args",
-]
diff --git a/mindsdb/integrations/handlers/symbl_handler/connection_args.py b/mindsdb/integrations/handlers/symbl_handler/connection_args.py
deleted file mode 100644
index fd12e54fe9c..00000000000
--- a/mindsdb/integrations/handlers/symbl_handler/connection_args.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- app_id={
- "type": ARG_TYPE.PWD,
- "description": "App ID of symbl",
- "required": True,
- "label": "app_id",
- },
- app_secret={
- "type": ARG_TYPE.PWD,
- "description": "App Secret of symbl",
- "required": True,
- "label": "app_secret",
- "secret": True
- }
-)
-
-connection_args_example = OrderedDict(
- app_id="app_id",
- app_secret="app_secret"
-)
diff --git a/mindsdb/integrations/handlers/symbl_handler/icon.svg b/mindsdb/integrations/handlers/symbl_handler/icon.svg
deleted file mode 100644
index 48b5eaf4500..00000000000
--- a/mindsdb/integrations/handlers/symbl_handler/icon.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/symbl_handler/requirements.txt b/mindsdb/integrations/handlers/symbl_handler/requirements.txt
deleted file mode 100644
index 143677d423b..00000000000
--- a/mindsdb/integrations/handlers/symbl_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-symbl
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/symbl_handler/symbl_handler.py b/mindsdb/integrations/handlers/symbl_handler/symbl_handler.py
deleted file mode 100644
index 3f45f83b12f..00000000000
--- a/mindsdb/integrations/handlers/symbl_handler/symbl_handler.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import symbl
-from mindsdb.integrations.handlers.symbl_handler.symbl_tables import (
- GetConversationTable,
- GetMessagesTable,
- GetTopicsTable,
- GetQuestionsTable,
- GetAnalyticsTable,
- GetActionItemsTable,
- GetFollowUpsTable
-)
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
-)
-
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-
-
-logger = log.getLogger(__name__)
-
-
-class SymblHandler(APIHandler):
- """The Symbl handler implementation"""
-
- def __init__(self, name: str, **kwargs):
- """Initialize the Symbl handler.
-
- Parameters
- ----------
- name : str
- name of a handler instance
- """
- super().__init__(name)
-
- connection_data = kwargs.get("connection_data", {})
- self.connection_data = connection_data
- self.credentials = {"app_id": self.connection_data.get("app_id"), "app_secret": self.connection_data.get("app_secret")}
- self.kwargs = kwargs
- self.is_connected = False
-
- conversation_id_data = GetConversationTable(self)
- self._register_table("get_conversation_id", conversation_id_data)
-
- messages_data = GetMessagesTable(self)
- self._register_table("get_messages", messages_data)
-
- topics_data = GetTopicsTable(self)
- self._register_table("get_topics", topics_data)
-
- question_data = GetQuestionsTable(self)
- self._register_table("get_questions", question_data)
-
- analytics_data = GetAnalyticsTable(self)
- self._register_table("get_analytics", analytics_data)
-
- ai_data = GetActionItemsTable(self)
- self._register_table("get_action_items", ai_data)
-
- follow_up_data = GetFollowUpsTable(self)
- self._register_table("get_follow_ups", follow_up_data)
-
- def connect(self) -> StatusResponse:
- """Set up the connection required by the handler.
-
- Returns
- -------
- StatusResponse
- connection object
- """
- resp = StatusResponse(False)
- try:
- symbl.AuthenticationToken.get_access_token(self.credentials)
- resp.success = True
- self.is_connected = True
- except Exception as ex:
- resp.success = False
- resp.error_message = ex
- self.is_connected = False
- return resp
-
- def check_connection(self) -> StatusResponse:
- """Check connection to the handler.
-
- Returns
- -------
- StatusResponse
- Status confirmation
- """
- return self.connect()
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
-
- Parameters
- ----------
- query : str
- query in a native format
-
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
diff --git a/mindsdb/integrations/handlers/symbl_handler/symbl_tables.py b/mindsdb/integrations/handlers/symbl_handler/symbl_tables.py
deleted file mode 100644
index d2a3ff601eb..00000000000
--- a/mindsdb/integrations/handlers/symbl_handler/symbl_tables.py
+++ /dev/null
@@ -1,648 +0,0 @@
-import pandas as pd
-import json
-import symbl
-from typing import List
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor
-from mindsdb.utilities import log
-from mindsdb_sql_parser import ast
-
-logger = log.getLogger(__name__)
-
-
-class GetConversationTable(APITable):
- """The Get Conversation Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Get the conversation Id for the given audio file" API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- conversation id
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'get_conversation_id',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
- for op, arg1, arg2 in where_conditions:
- if arg1 == 'audio_url':
- if op == '=':
- search_params["audio_url"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for audio_url column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("audio_url" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("audio_url column has to be present in where clause.")
-
- df = pd.DataFrame(columns=self.get_columns())
-
- payload = {"url": search_params.get("audio_url")}
- conversation_object = symbl.Audio.process_url(payload=payload, credentials=self.handler.credentials)
-
- df = pd.json_normalize({"conversation_id": conversation_object.get_conversation_id()})
-
- select_statement_executor = SELECTQueryExecutor(
- df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "conversation_id"
- ]
-
-
-class GetMessagesTable(APITable):
- """The Get Messages Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Get the messages for the given conversation id" API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Messages
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'get_messages',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
- for op, arg1, arg2 in where_conditions:
- if arg1 == 'conversation_id':
- if op == '=':
- search_params["conversation_id"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for conversation_id column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("conversation_id" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("conversation_id column has to be present in where clause.")
-
- df = pd.DataFrame(columns=self.get_columns())
-
- resp = symbl.Conversations.get_messages(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials)
-
- resp = self.parse_response(resp)
-
- df = pd.json_normalize(resp["messages"])
-
- select_statement_executor = SELECTQueryExecutor(
- df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def parse_response(self, res):
- return json.loads(json.dumps(res.to_dict(), default=str))
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "conversation_id",
- "end_time",
- "id",
- "phrases",
- "sentiment",
- "start_time",
- "text",
- "words",
- "_from.email",
- "_from.id",
- "_from.name"
- ]
-
-
-class GetTopicsTable(APITable):
- """The Get Topics Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Get the topics for the given conversation id" API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Topics
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'get_topics',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
- for op, arg1, arg2 in where_conditions:
- if arg1 == 'conversation_id':
- if op == '=':
- search_params["conversation_id"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for conversation_id column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("conversation_id" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("conversation_id column has to be present in where clause.")
-
- df = pd.DataFrame(columns=self.get_columns())
-
- resp = symbl.Conversations.get_topics(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials)
-
- resp = self.parse_response(resp)
-
- df = pd.json_normalize(resp["topics"])
-
- select_statement_executor = SELECTQueryExecutor(
- df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def parse_response(self, res):
- return json.loads(json.dumps(res.to_dict(), default=str))
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "id",
- "text",
- "type",
- "score",
- "message_ids",
- "entities",
- "sentiment",
- "parent_refs"
- ]
-
-
-class GetQuestionsTable(APITable):
- """The Get Questions Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Get the questions for the given conversation id" API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Questions
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'get_questions',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
- for op, arg1, arg2 in where_conditions:
- if arg1 == 'conversation_id':
- if op == '=':
- search_params["conversation_id"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for conversation_id column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("conversation_id" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("conversation_id column has to be present in where clause.")
-
- df = pd.DataFrame(columns=self.get_columns())
-
- resp = symbl.Conversations.get_questions(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials)
-
- resp = self.parse_response(resp)
-
- df = pd.json_normalize(resp["questions"])
-
- select_statement_executor = SELECTQueryExecutor(
- df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def parse_response(self, res):
- return json.loads(json.dumps(res.to_dict(), default=str))
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "id",
- "text",
- "type",
- "score",
- "message_ids",
- "_from.id",
- "_from.name",
- "_from.user_id"
- ]
-
-
-class GetFollowUpsTable(APITable):
- """The Get FollowUps Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Get the follow ups for the given conversation id" API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- follow up Questions
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'get_follow_ups',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
- for op, arg1, arg2 in where_conditions:
- if arg1 == 'conversation_id':
- if op == '=':
- search_params["conversation_id"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for conversation_id column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("conversation_id" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("conversation_id column has to be present in where clause.")
-
- df = pd.DataFrame(columns=self.get_columns())
-
- resp = symbl.Conversations.get_follow_ups(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials)
-
- resp = self.parse_response(resp)
-
- df = pd.json_normalize(resp["follow_ups"])
-
- select_statement_executor = SELECTQueryExecutor(
- df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def parse_response(self, res):
- return json.loads(json.dumps(res.to_dict(), default=str))
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "id",
- "text",
- "type",
- "score",
- "message_ids",
- "entities",
- "phrases",
- "definitive",
- "due_by",
- "_from.id",
- "_from.name",
- "_from.user_id",
- "assignee.id",
- "assignee.name",
- "assignee.email"
- ]
-
-
-class GetActionItemsTable(APITable):
- """The Get Action items Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Get the action items for the given conversation id" API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- action items
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'get_action_items',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
- for op, arg1, arg2 in where_conditions:
- if arg1 == 'conversation_id':
- if op == '=':
- search_params["conversation_id"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for conversation_id column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("conversation_id" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("conversation_id column has to be present in where clause.")
-
- df = pd.DataFrame(columns=self.get_columns())
-
- resp = symbl.Conversations.get_action_items(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials)
-
- resp = self.parse_response(resp)
-
- df = pd.json_normalize(resp["action_items"])
-
- select_statement_executor = SELECTQueryExecutor(
- df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def parse_response(self, res):
- return json.loads(json.dumps(res.to_dict(), default=str))
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "id",
- "text",
- "type",
- "score",
- "message_ids",
- "entities",
- "phrases",
- "definitive",
- "due_by",
- "_from.id",
- "_from.name",
- "_from.user_id",
- "assignee.id",
- "assignee.name",
- "assignee.email"
- ]
-
-
-class GetAnalyticsTable(APITable):
- """The Get Analytics Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Get the analytics for the given conversation id" API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- metrics
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'get_analytics',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
- for op, arg1, arg2 in where_conditions:
- if arg1 == 'conversation_id':
- if op == '=':
- search_params["conversation_id"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for conversation_id column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("conversation_id" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("conversation_id column has to be present in where clause.")
-
- df = pd.DataFrame(columns=self.get_columns())
-
- resp = symbl.Conversations.get_analytics(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials)
-
- resp = self.parse_response(resp)
-
- df = pd.json_normalize(resp["metrics"])
-
- select_statement_executor = SELECTQueryExecutor(
- df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def parse_response(self, res):
- return json.loads(json.dumps(res.to_dict(), default=str))
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "type",
- "percent",
- "seconds"
- ]
diff --git a/mindsdb/integrations/handlers/tdengine_handler/README.md b/mindsdb/integrations/handlers/tdengine_handler/README.md
deleted file mode 100644
index 5c88cc770d5..00000000000
--- a/mindsdb/integrations/handlers/tdengine_handler/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# TDEngine Handler
-
-This is the implementation of the TDEngine handler for MindsDB.
-
-## TDEngine
-TDengine is an open source, high-performance, cloud native time-series database optimized for Internet of Things (IoT), Connected Cars, and Industrial IoT. It enables efficient, real-time data ingestion, processing, and monitoring of TB and even PB scale data per day, generated by billions of sensors and data collectors. TDengine differentiates itself from other time-series databases with the following advantages:
-
-* High Performance
-* Simplified Solution
-* Cloud Native
-* Ease of Use
-* Easy Data Analytics
-* Open Source
-
-
-
-## Implementation
-This handler was implemented using the `taos/taosrest`, a Python library that allows you to use Python code to run SQL commands on TDEngine Server.
-
-The required arguments to establish a connection are,
-* `user`: username asscociated with server
-* `password`: password to authenticate your access
-* `url`: Url to TDEngine server. For local server url is localhost:6041 (Default)
-* `token`: Unique token provide while using TDEngine Cloud.
-* `database`: Database name to be connected
-
-
-## Usage
-In order to make use of this handler and connect to TDEngine in MindsDB, the following syntax can be used,
-
-~~~~sql
-CREATE DATABASE TDEngine_datasource
-WITH
-engine='tdengine',
-parameters={
- "user":"root",
- "password":"taosdata",
- "url":"127.0.0.1:6041",
- "database":"test"
-};
-~~~~
-
-
-**Note :-> You can sepecify token instead of user and password while using TDEngine.**
-
-
-Now, you can use this established connection to query your database as follows,
-~~~~sql
-SELECT * FROM TDEngine_datasource.d0;
-~~~~
diff --git a/mindsdb/integrations/handlers/tdengine_handler/__about__.py b/mindsdb/integrations/handlers/tdengine_handler/__about__.py
deleted file mode 100644
index 04d11dbd1c6..00000000000
--- a/mindsdb/integrations/handlers/tdengine_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB TDEngine handler'
-__package_name__ = 'mindsdb_tdengine_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for TDEngine"
-__author__ = 'Parthiv Makwana'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/tdengine_handler/__init__.py b/mindsdb/integrations/handlers/tdengine_handler/__init__.py
deleted file mode 100644
index 48ad9239d64..00000000000
--- a/mindsdb/integrations/handlers/tdengine_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .tdengine_handler import TDEngineHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'TDEngine'
-name = 'tdengine'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/tdengine_handler/connection_args.py b/mindsdb/integrations/handlers/tdengine_handler/connection_args.py
deleted file mode 100644
index 6c7cc1b4c00..00000000000
--- a/mindsdb/integrations/handlers/tdengine_handler/connection_args.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the TDEngine server.'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'The password to authenticate the user with the TDEngine server.',
- 'secret': True
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'The database name to use when connecting with the TDEngine server.'
- },
- url={
- 'type': ARG_TYPE.STR,
- 'description': 'The url of the TDEngine server Instance. '
- },
- token={
- 'type': ARG_TYPE.INT,
- 'description': 'Unique Token to COnnect TDEngine'
- },
-)
-
-connection_args_example = OrderedDict(
- url='127.0.0.1:6041',
- token='',
- user='root',
- password='taosdata',
- database='test'
-)
diff --git a/mindsdb/integrations/handlers/tdengine_handler/icon.svg b/mindsdb/integrations/handlers/tdengine_handler/icon.svg
deleted file mode 100644
index c1ddccb67d1..00000000000
--- a/mindsdb/integrations/handlers/tdengine_handler/icon.svg
+++ /dev/null
@@ -1,59 +0,0 @@
-
diff --git a/mindsdb/integrations/handlers/tdengine_handler/requirements.txt b/mindsdb/integrations/handlers/tdengine_handler/requirements.txt
deleted file mode 100644
index 7c95bc73ea5..00000000000
--- a/mindsdb/integrations/handlers/tdengine_handler/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-taospy
-requests>=2.32.4 # not directly required, pinned by Snyk to avoid a vulnerability
-urllib3>=2.5.0 # not directly required, pinned by Snyk to avoid a vulnerability
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py b/mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py
deleted file mode 100644
index 99477597493..00000000000
--- a/mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from typing import Optional
-import pandas as pd
-import taosrest as td
-from taosrest import sqlalchemy as SA
-
-from mindsdb_sql_parser import parse_sql
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb_sql_parser.ast.base import ASTNode
-
-from mindsdb.utilities import log
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-
-logger = log.getLogger(__name__)
-
-
-class TDEngineHandler(DatabaseHandler):
- """
- This handler handles connection and execution of the TDEngine statements.
- """
-
- name = 'tdengine'
-
- def __init__(self, name, connection_data: Optional[dict], **kwargs):
- super().__init__(name)
-
- self.parser = parse_sql
- self.dialect = 'tdengine'
- self.kwargs = kwargs
- self.connection_data = connection_data
-
- self.connection = None
- self.is_connected = False
-
- def connect(self):
- if self.is_connected is True:
- return self.connection
-
- config = {
- 'url': self.connection_data.get('url', "http://localhost:6041"),
- 'token': self.connection_data.get('token'),
- 'user': self.connection_data.get('user', 'root'),
- 'password': self.connection_data.get('password', 'taosdata'),
- 'database': self.connection_data.get('database')
- }
-
- connection = td.connect(**config)
- self.is_connected = True
- self.connection = connection
- return self.connection
-
- def disconnect(self):
- if self.is_connected is False:
- return
- self.connection.close()
- self.is_connected = False
- return
-
- def check_connection(self) -> StatusResponse:
-
- result = StatusResponse(False)
- need_to_close = self.is_connected is False
-
- try:
- connection = self.connect()
- result.success = connection is not None
- except Exception as e:
- logger.error(f'Error connecting to TDEngine {self.connection_data["database"]}, {e}!')
- result.error_message = str(e)
-
- if result.success is True and need_to_close:
- self.disconnect()
- if result.success is False and self.is_connected is True:
- self.is_connected = False
-
- return result
-
- def native_query(self, query: str) -> Response:
- """
- Receive SQL query and runs it
- :param query: The SQL query to run in TDEngine
- :return: returns the records from the current recordset
- """
-
- need_to_close = self.is_connected is False
-
- connection = self.connect()
- cur = connection.cursor()
- try:
- cur.execute(query)
-
- if cur.rowcount != 0:
- result = cur.fetchall()
- response = Response(
- RESPONSE_TYPE.TABLE,
- pd.DataFrame(
- result,
- columns=[x[0] for x in cur.description]
- )
- )
- else:
- response = Response(RESPONSE_TYPE.OK)
- connection.commit()
- except Exception as e:
- logger.error(f'Error running query: {query} on {self.connection_data["database"]}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(e)
- )
- # connection.rollback()
- cur.close()
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> Response:
- """
- Retrieve the data from the SQL statement.
- """
- renderer = SqlalchemyRender(SA.TaosRestDialect)
- query_str = renderer.get_string(query, with_failback=True)
- return self.native_query(query_str)
-
- def get_tables(self) -> Response:
- """
- Get a list with all of the tabels in TDEngine
- """
- q = 'SHOW TABLES;'
-
- return self.native_query(q)
-
- def get_columns(self, table_name) -> Response:
- """
- Show details about the table
- """
- q = f'DESCRIBE {table_name};'
-
- return self.native_query(q)
diff --git a/mindsdb/integrations/handlers/tdengine_handler/tests/__init__.py b/mindsdb/integrations/handlers/tdengine_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py b/mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py
deleted file mode 100644
index abdfbc01b90..00000000000
--- a/mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.tdengine_handler.tdengine_handler import TDEngineHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class TDEngineHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "token": '',
- "url": "********.cloud.tdengine.com",
- "database": "temp"
- }
- cls.handler = TDEngineHandler('test_tdengine_handler', connection_data=cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_connect(self):
- assert self.handler.connect()
-
- def test_2_create_table(self):
- query = "CREATE Table `hari` USING `temp` (`id`) TAGS (0);"
- result = self.handler.query(query)
- assert result.type is not RESPONSE_TYPE.ERROR
- pass
-
- def test_3_insert(self):
- query = "INSERT INTO hari VALUES (NOW, 12);"
- result = self.handler.query(query)
- assert result.type is not RESPONSE_TYPE.ERROR
-
- def test_4_native_query_select(self):
- query = "SELECT * FROM hari;"
- result = self.handler.query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_5_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is RESPONSE_TYPE.TABLE
-
- def test_6_get_columns(self):
- columns = self.handler.get_columns('hari')
- assert columns.type is not RESPONSE_TYPE.ERROR
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/teradata_handler/README.md b/mindsdb/integrations/handlers/teradata_handler/README.md
deleted file mode 100644
index aaa707b8ffb..00000000000
--- a/mindsdb/integrations/handlers/teradata_handler/README.md
+++ /dev/null
@@ -1,101 +0,0 @@
----
-title: Teradata
-sidebarTitle: Teradata
----
-
-This documentation describes the integration of MindsDB with [Teradata](https://www.teradata.com/why-teradata), the complete cloud analytics and data platform for Trusted AI.
-The integration allows MindsDB to access data from Teradata and enhance Teradata with AI capabilities.
-
-## Prerequisites
-
-Before proceeding, ensure the following prerequisites are met:
-
-1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop).
-2. To connect Teradata to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies).
-
-## Connection
-
-Establish a connection to Teradata from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/teradata_handler) as an engine.
-
-```sql
-CREATE DATABASE teradata_datasource
-WITH
- ENGINE = 'teradata',
- PARAMETERS = {
- "host": "192.168.0.41",
- "user": "demo_user",
- "password": "demo_password",
- "database": "example_db"
- };
-```
-
-Required connection parameters include the following:
-
-* `host`: The hostname, IP address, or URL of the Teradata server.
-* `user`: The username for the Teradata database.
-* `password`: The password for the Teradata database.
-
-Optional connection parameters include the following:
-
-* `database`: The name of the Teradata database to connect to. Defaults is the user's default database.
-
-## Usage
-
-Retrieve data from a specified table by providing the integration, database and table names:
-
-```sql
-SELECT *
-FROM teradata_datasource.database_name.table_name
-LIMIT 10;
-```
-
-Run Teradata SQL queries directly on the connected Teradata database:
-
-```sql
-SELECT * FROM teradata_datasource (
-
- --Native Query Goes Here
- SELECT emp_id, emp_name, job_duration AS tsp
- FROM employee
- EXPAND ON job_duration AS tsp BY INTERVAL '1' YEAR
- FOR PERIOD(DATE '2006-01-01', DATE '2008-01-01');
-
-);
-```
-
-
-The above examples utilize `teradata_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command.
-
-
-## Troubleshooting
-
-
-`Database Connection Error`
-
-* **Symptoms**: Failure to connect MindsDB with the Teradata database.
-* **Checklist**:
- 1. Make sure the Teradata database is active.
- 2. Confirm that host, user and password are correct. Try a direct connection using a client like DBeaver.
- 3. Ensure a stable network between MindsDB and Teradata.
-
-
-
-`SQL statement cannot be parsed by mindsdb_sql`
-
-* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters.
-* **Checklist**:
- 1. Ensure table names with spaces or special characters are enclosed in backticks.
- 2. Examples:
- * Incorrect: SELECT * FROM integration.travel-data
- * Incorrect: SELECT * FROM integration.'travel-data'
- * Correct: SELECT * FROM integration.\`travel-data\`
-
-
-
-`Connection Timeout Error`
-
-* **Symptoms**: Connection to the Teradata database times out or queries take too long to execute.
-* **Checklist**:
- 1. Ensure the Teradata server is running and accessible (if the server has been idle for a long time, it may have shut down automatically).
-
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/teradata_handler/__about__.py b/mindsdb/integrations/handlers/teradata_handler/__about__.py
deleted file mode 100644
index 01ddccf8f86..00000000000
--- a/mindsdb/integrations/handlers/teradata_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Teradata handler'
-__package_name__ = 'mindsdb_teradata_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Teradata"
-__author__ = 'Sudipto Ghosh'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022 - mindsdb'
diff --git a/mindsdb/integrations/handlers/teradata_handler/__init__.py b/mindsdb/integrations/handlers/teradata_handler/__init__.py
deleted file mode 100644
index 25110d36467..00000000000
--- a/mindsdb/integrations/handlers/teradata_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .teradata_handler import TeradataHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Teradata'
-name = 'teradata'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/teradata_handler/connection_args.py b/mindsdb/integrations/handlers/teradata_handler/connection_args.py
deleted file mode 100644
index 5555fcd9ff7..00000000000
--- a/mindsdb/integrations/handlers/teradata_handler/connection_args.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The hostname, IP address, or URL of the Teradata server.',
- 'required': True,
- 'label': 'Host'
- },
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The username for the Teradata database.',
- 'required': True,
- 'label': 'User'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'The password for the Teradata database.',
- 'secret': True,
- 'required': True,
- 'label': 'Password'
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': "The name of the Teradata database to connect to. Defaults is the user's default database.",
- 'required': False,
- 'label': 'Database'
- }
-)
-
-connection_args_example = OrderedDict(
- host='192.168.0.41',
- user='dbc',
- password='dbc',
- database='HR'
-)
diff --git a/mindsdb/integrations/handlers/teradata_handler/icon.svg b/mindsdb/integrations/handlers/teradata_handler/icon.svg
deleted file mode 100644
index 1f0a9890769..00000000000
--- a/mindsdb/integrations/handlers/teradata_handler/icon.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/teradata_handler/requirements.txt b/mindsdb/integrations/handlers/teradata_handler/requirements.txt
deleted file mode 100644
index 4fbb8efdbbf..00000000000
--- a/mindsdb/integrations/handlers/teradata_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-teradatasql
-teradatasqlalchemy
diff --git a/mindsdb/integrations/handlers/teradata_handler/teradata_handler.py b/mindsdb/integrations/handlers/teradata_handler/teradata_handler.py
deleted file mode 100644
index 1233dba64d6..00000000000
--- a/mindsdb/integrations/handlers/teradata_handler/teradata_handler.py
+++ /dev/null
@@ -1,245 +0,0 @@
-from typing import Any, Dict, Text
-
-from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from pandas import DataFrame
-import teradatasql
-from teradatasql import OperationalError
-import teradatasqlalchemy.dialect as teradata_dialect
-
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-from mindsdb.utilities import log
-
-
-logger = log.getLogger(__name__)
-
-
-class TeradataHandler(DatabaseHandler):
- """
- This handler handles the connection and execution of SQL statements on Teradata.
- """
-
- name = 'teradata'
-
- def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None:
- """
- Initializes the handler.
-
- Args:
- name (Text): The name of the handler instance.
- connection_data (Dict): The connection data required to connect to the Teradata database.
- kwargs: Arbitrary keyword arguments.
- """
- super().__init__(name)
- self.connection_data = connection_data
- self.kwargs = kwargs
-
- self.connection = None
- self.is_connected = False
-
- def __del__(self) -> None:
- """
- Closes the connection when the handler instance is deleted.
- """
- if self.is_connected is True:
- self.disconnect()
-
- def connect(self) -> teradatasql.TeradataConnection:
- """
- Establishes a connection to the Teradata database.
-
- Raises:
- ValueError: If the expected connection parameters are not provided.
- teradatasql.OperationalError: If an error occurs while connecting to the Teradata database.
-
- Returns:
- teradatasql.TeradataConnection: A connection object to the Teradata database.
- """
- if self.is_connected is True:
- return self.connection
-
- # Mandatory connection parameters.
- if not all(key in self.connection_data for key in ['host', 'user', 'password']):
- raise ValueError('Required parameters (host, user, password) must be provided.')
-
- config = {
- 'host': self.connection_data.get('host'),
- 'user': self.connection_data.get('user'),
- 'password': self.connection_data.get('password')
- }
-
- # Optional connection parameters.
- if 'database' in self.connection_data:
- config['database'] = self.connection_data.get('database')
-
- try:
- self.connection = teradatasql.connect(
- **config,
- )
- self.is_connected = True
- return self.connection
- except OperationalError as operational_error:
- logger.error(f'Error connecting to Teradata, {operational_error}!')
- raise
- except Exception as unknown_error:
- logger.error(f'Unknown error connecting to Teradata, {unknown_error}!')
- raise
-
- def disconnect(self) -> None:
- """
- Closes the connection to the Teradata database if it's currently open.
- """
- if self.is_connected is False:
- return
-
- self.connection.close()
- self.is_connected = False
- return
-
- def check_connection(self) -> StatusResponse:
- """
- Checks the status of the connection to the Teradata database.
-
- Returns:
- StatusResponse: An object containing the success status and an error message if an error occurs.
- """
- response = StatusResponse(False)
- need_to_close = self.is_connected is False
-
- try:
- connection = self.connect()
- with connection.cursor() as cur:
- cur.execute('SELECT 1 FROM (SELECT 1 AS "dual") AS "dual"')
- response.success = True
- except (OperationalError, ValueError) as known_error:
- logger.error(f'Connection check to Teradata failed, {known_error}!')
- response.error_message = str(known_error)
- except Exception as unknown_error:
- logger.error(f'Connection check to Teradata failed due to an unknown error, {unknown_error}!')
- response.error_message = str(unknown_error)
-
- if response.success is True and need_to_close:
- self.disconnect()
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: Text) -> Response:
- """
- Executes a native SQL query on the Teradata database and returns the result.
-
- Args:
- query (Text): The SQL query to be executed.
-
- Returns:
- Response: A response object containing the result of the query or an error message.
- """
- need_to_close = self.is_connected is False
-
- connection = self.connect()
- with connection.cursor() as cur:
- try:
- cur.execute(query)
- if not cur.description:
- response = Response(RESPONSE_TYPE.OK)
- else:
- result = cur.fetchall()
- response = Response(
- RESPONSE_TYPE.TABLE,
- DataFrame(
- result,
- columns=[x[0] for x in cur.description]
- )
- )
- connection.commit()
- except OperationalError as operational_error:
- logger.error(f'Error running query: {query} on {self.connection_data["database"]}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(operational_error)
- )
- connection.rollback()
- except Exception as unknown_error:
- logger.error(f'Unknown error running query: {query} on {self.connection_data["database"]}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(unknown_error)
- )
- connection.rollback()
-
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> Response:
- """
- Executes a SQL query represented by an ASTNode on the Teradata database and retrieves the data (if any).
-
- Args:
- query (ASTNode): An ASTNode representing the SQL query to be executed.
-
- Returns:
- Response: The response from the `native_query` method, containing the result of the SQL query execution.
- """
- renderer = SqlalchemyRender(teradata_dialect.TeradataDialect)
- query_str = renderer.get_string(query, with_failback=True)
- return self.native_query(query_str)
-
- def get_tables(self) -> Response:
- """
- Retrieves a list of all non-system tables in the Teradata database.
-
- Returns:
- Response: A response object containing a list of tables in the Teradata database.
- """
- query = f"""
- SELECT
- TableName AS table_name,
- TableKind AS table_type
- FROM DBC.TablesV
- WHERE DatabaseName = '{self.connection_data.get('database') if self.connection_data.get('database') else self.connection_data.get('user')}'
- AND (TableKind = 'T'
- OR TableKind = 'O'
- OR TableKind = 'Q'
- OR TableKind = 'V')
- """
- result = self.native_query(query)
-
- df = result.data_frame
- df['table_type'] = df['table_type'].apply(lambda x: 'VIEW' if x == 'V' else 'BASE TABLE')
-
- result.data_frame = df
- return result
-
- def get_columns(self, table_name: Text) -> Response:
- """
- Retrieves column details for a specified table in the Teradata database.
-
- Args:
- table_name (Text): The name of the table for which to retrieve column information.
-
- Raises:
- ValueError: If the 'table_name' is not a valid string.
-
- Returns:
- Response: A response object containing the column details.
- """
- if not table_name or not isinstance(table_name, str):
- raise ValueError("Invalid table name provided.")
-
- query = f"""
- SELECT ColumnName AS "Field",
- ColumnType AS "Type"
- FROM DBC.ColumnsV
- WHERE DatabaseName = '{self.connection_data.get('database') if self.connection_data.get('database') else self.connection_data.get('user')}'
- AND TableName = '{table_name}'
- """
-
- return self.native_query(query)
diff --git a/mindsdb/integrations/handlers/teradata_handler/tests/test_teradata_handler.py b/mindsdb/integrations/handlers/teradata_handler/tests/test_teradata_handler.py
deleted file mode 100644
index a1e7a085032..00000000000
--- a/mindsdb/integrations/handlers/teradata_handler/tests/test_teradata_handler.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import os
-import unittest
-
-from mindsdb.integrations.handlers.teradata_handler.teradata_handler import TeradataHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-"""
-CREATE
-DATABASE HR
-AS PERMANENT = 60e6, -- 60MB
- SPOOL = 120e6; -- 120MB
-
-CREATE
-SET TABLE HR.Employees (
- GlobalID INTEGER,
- FirstName VARCHAR(30),
- LastName VARCHAR(30),
- DateOfBirth DATE FORMAT 'YYYY-MM-DD',
- JoinedDate DATE FORMAT 'YYYY-MM-DD',
- DepartmentCode BYTEINT
-)
-UNIQUE PRIMARY INDEX ( GlobalID );
-
-INSERT INTO HR.Employees (GlobalID,
- FirstName,
- LastName,
- DateOfBirth,
- JoinedDate,
- DepartmentCode)
-VALUES (101,
- 'Adam',
- 'Tworkowski',
- '1980-01-05',
- '2004-08-01',
- 01);
-"""
-
-
-class TeradataHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "host": os.environ.get('TERADATA_HOST', 'localhost'),
- "user": "dbc",
- "password": "dbc",
- "database": "HR"
- }
- cls.handler = TeradataHandler('test_teradata_handler', cls.kwargs)
-
- def test_0_connect(self):
- assert self.handler.connect()
-
- def test_1_check_connection(self):
- assert self.handler.check_connection().success is True
-
- def test_2_get_columns(self):
- assert self.handler.get_columns('Employees').resp_type is not RESPONSE_TYPE.ERROR
-
- def test_3_get_tables(self):
- assert self.handler.get_tables().resp_type is not RESPONSE_TYPE.ERROR
-
- def test_4_select_query(self):
- query = 'SELECT * FROM HR.Employees WHERE GlobalID=101'
- assert self.handler.query(query).resp_type is RESPONSE_TYPE.TABLE
-
-
-if __name__ == "__main__":
- unittest.main(failfast=True)
diff --git a/mindsdb/integrations/handlers/tidb_handler/__about__.py b/mindsdb/integrations/handlers/tidb_handler/__about__.py
deleted file mode 100644
index 1c45aea2316..00000000000
--- a/mindsdb/integrations/handlers/tidb_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB TiDB handler'
-__package_name__ = 'mindsdb_tidb_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for TiDB"
-__author__ = 'Ryan Leung'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/tidb_handler/__init__.py b/mindsdb/integrations/handlers/tidb_handler/__init__.py
deleted file mode 100644
index 33613098b13..00000000000
--- a/mindsdb/integrations/handlers/tidb_handler/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .tidb_handler import TiDBHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-
-title = 'TiDB'
-name = 'tidb'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/tidb_handler/connection_args.py b/mindsdb/integrations/handlers/tidb_handler/connection_args.py
deleted file mode 100644
index 9f3137cf664..00000000000
--- a/mindsdb/integrations/handlers/tidb_handler/connection_args.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the TiDB server.'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'The password to authenticate the user with the TiDB server.',
- 'secret': True
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'The database name to use when connecting with the TiDB server.'
- },
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The host name or IP address of the TiDB server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.'
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'The TCP/IP port of the TiDB server. Must be an integer.'
- }
-)
-
-connection_args_example = OrderedDict(
- host='127.0.0.1',
- port=4000,
- user='root',
- password='password',
- database='database'
-)
diff --git a/mindsdb/integrations/handlers/tidb_handler/icon.svg b/mindsdb/integrations/handlers/tidb_handler/icon.svg
deleted file mode 100644
index 8d8b4c2f087..00000000000
--- a/mindsdb/integrations/handlers/tidb_handler/icon.svg
+++ /dev/null
@@ -1,5 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/tidb_handler/requirements.txt b/mindsdb/integrations/handlers/tidb_handler/requirements.txt
deleted file mode 100644
index ee467569031..00000000000
--- a/mindsdb/integrations/handlers/tidb_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
--r mindsdb/integrations/handlers/mysql_handler/requirements.txt
diff --git a/mindsdb/integrations/handlers/tidb_handler/tests/__init__.py b/mindsdb/integrations/handlers/tidb_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py b/mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py
deleted file mode 100644
index 056d9368c21..00000000000
--- a/mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.tidb_handler.tidb_handler import TiDBHandler
-
-
-class TiDBHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "host": "localhost",
- "port": "4000",
- "user": "root",
- "password": "root",
- "database": "test",
- "ssl": False
- }
- cls.handler = TiDBHandler('test_tidb_handler', **cls.kwargs)
-
- def test_0_connect(self):
- self.handler.connect()
-
- def test_1_check_connection(self):
- assert self.handler.check_connection()
-
- def test_2_native_query_show_dbs(self):
- dbs = self.handler.native_query("SHOW DATABASES;")
- assert isinstance(dbs, list)
-
- def test_3_get_tables(self):
- tbls = self.handler.get_tables()
- assert isinstance(tbls, list)
-
- def test_5_create_table(self):
- try:
- self.handler.native_query("CREATE TABLE test_tidb (test_col INT)")
- except Exception:
- pass
-
- def test_6_describe_table(self):
- described = self.handler.get_columns("dt_test")
- assert isinstance(described, list)
-
- def test_7_select_query(self):
- query = "SELECT * FROM dt_test WHERE 'id'='a'"
- self.handler.native_query(query)
diff --git a/mindsdb/integrations/handlers/tidb_handler/tidb_handler.py b/mindsdb/integrations/handlers/tidb_handler/tidb_handler.py
deleted file mode 100644
index 0fe85491b47..00000000000
--- a/mindsdb/integrations/handlers/tidb_handler/tidb_handler.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler
-
-
-class TiDBHandler(MySQLHandler):
- """
- This handler handles connection and execution of the TiDB statements.
- """
- name = 'tidb'
-
- def __init__(self, name, **kwargs):
- super().__init__(name, **kwargs)
diff --git a/mindsdb/integrations/handlers/timescaledb_handler/__init__.py b/mindsdb/integrations/handlers/timescaledb_handler/__init__.py
index 1d89157bd60..189cb89b856 100644
--- a/mindsdb/integrations/handlers/timescaledb_handler/__init__.py
+++ b/mindsdb/integrations/handlers/timescaledb_handler/__init__.py
@@ -14,7 +14,7 @@
name = "timescaledb"
type = HANDLER_TYPE.DATA
icon_path = "icon.svg"
-support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY
+support_level = HANDLER_SUPPORT_LEVEL.MINDSDB
__all__ = [
"Handler",
diff --git a/mindsdb/integrations/handlers/trino_handler/__about__.py b/mindsdb/integrations/handlers/trino_handler/__about__.py
deleted file mode 100644
index 005535dc9dc..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Trino handler'
-__package_name__ = 'mindsdb_trino_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Trino"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/trino_handler/__init__.py b/mindsdb/integrations/handlers/trino_handler/__init__.py
deleted file mode 100644
index e8fb9797ded..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-try:
- from .trino_handler import TrinoHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-from .__about__ import __version__ as version, __description__ as description
-
-
-title = 'Trino'
-name = 'trino'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/trino_handler/icon.svg b/mindsdb/integrations/handlers/trino_handler/icon.svg
deleted file mode 100644
index 9a8e83369bc..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/icon.svg
+++ /dev/null
@@ -1,23 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/trino_handler/requirements.txt b/mindsdb/integrations/handlers/trino_handler/requirements.txt
deleted file mode 100644
index 08448033d7d..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-trino~=0.313.0
-pyhive
diff --git a/mindsdb/integrations/handlers/trino_handler/tests/__init__.py b/mindsdb/integrations/handlers/trino_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/trino_handler/tests/test_trino_config.ini b/mindsdb/integrations/handlers/trino_handler/tests/test_trino_config.ini
deleted file mode 100644
index 04cd5653138..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/tests/test_trino_config.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-[KERBEROS_CONFIG]
-http_scheme = https
-dialect = trino
-ca_bundle = /etc/ssl/certs/ca-bundle.crt
-hostname_override = GS.COM
-port = 8080
-config = /etc/krb5.conf
diff --git a/mindsdb/integrations/handlers/trino_handler/tests/test_trino_handler.py b/mindsdb/integrations/handlers/trino_handler/tests/test_trino_handler.py
deleted file mode 100644
index 8d06bb1658e..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/tests/test_trino_handler.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import unittest
-
-from mindsdb.api.mysql.mysql_proxy.mysql_proxy import RESPONSE_TYPE
-from mindsdb.integrations.handlers.trino_handler.trino_handler import TrinoHandler
-
-
-class TrinoHandlerTest(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "qa.analytics.quantum.site.gs.com",
- "port": "8090",
- "user": "dqsvcuat",
- "password": "",
- "catalog": "gsam_dev2imddata_elastic",
- "schema": "default",
- "service_name": "HTTP/qa.analytics.quantum.site.gs.com",
- "config_file_name": "test_trino_config.ini"
- }
- }
- cls.handler = TrinoHandler('test_trino_handler', **cls.kwargs)
-
- def test_0_canary(self):
- print('Running canary test')
- assert True
- print('Canary test ran successfully')
-
- def test_1_check_connection(self):
- conn_status = self.handler.check_connection()
- print('Trino connection status: ', conn_status)
- assert conn_status.get('success')
-
- def test_2_get_tables(self):
- tables = self.handler.get_tables()
- assert tables
-
- def test_3_describe_table(self):
- described = self.handler.get_columns("axioma_att_2021-12")
- assert described['type'] is not RESPONSE_TYPE.ERROR
-
- # TODO: complete tests implementation
- # def test_4_select_query(self):
- # query = "SELECT * FROM data.test_mdb WHERE 'id'='1'"
- # result = self.handler.query(query)
- # assert result['type'] is RESPONSE_TYPE.TABLE
- #
diff --git a/mindsdb/integrations/handlers/trino_handler/trino_config.ini b/mindsdb/integrations/handlers/trino_handler/trino_config.ini
deleted file mode 100644
index 7f16108bf45..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/trino_config.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-[KERBEROS_CONFIG]
-http_scheme = https
-dialect = trino
-ca_bundle = /etc/ssl/certs/ca-bundle.crt
-hostname_override = GS.COM
-port = 8080
-config = /etc/krb5.conf
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/trino_handler/trino_config_provider.py b/mindsdb/integrations/handlers/trino_handler/trino_config_provider.py
deleted file mode 100644
index 4231a4755fe..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/trino_config_provider.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from configparser import ConfigParser
-
-
-class TrinoConfigProvider:
-
- def __init__(self, **kwargs):
- self.config_file_name = kwargs.get('config_file_name')
- self.config_parser = ConfigParser()
- self.config_parser.read(self.config_file_name)
-
- def get_trino_kerberos_config(self):
- return self.config_parser['KERBEROS_CONFIG']
diff --git a/mindsdb/integrations/handlers/trino_handler/trino_handler.py b/mindsdb/integrations/handlers/trino_handler/trino_handler.py
deleted file mode 100644
index 8c5d88c90b2..00000000000
--- a/mindsdb/integrations/handlers/trino_handler/trino_handler.py
+++ /dev/null
@@ -1,182 +0,0 @@
-import re
-from typing import Dict
-import pandas as pd
-from pyhive import sqlalchemy_trino
-from mindsdb_sql_parser import parse_sql, ASTNode
-from trino.auth import BasicAuthentication
-from trino.dbapi import connect
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.utilities import log
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-logger = log.getLogger(__name__)
-
-
-class TrinoHandler(DatabaseHandler):
- """
- This handler handles connection and execution of the Trino statements
-
- kerberos is not implemented yet
- """
-
- name = 'trino'
-
- def __init__(self, name, connection_data, **kwargs):
- super().__init__(name)
- self.parser = parse_sql
- self.connection_data = connection_data
- '''
- service_name = kwargs.get('service_name')
- self.config_file_name = kwargs.get('config_file_name')
- self.trino_config_provider = TrinoConfigProvider(config_file_name=self.config_file_name)
- self.kerberos_config = self.trino_config_provider.get_trino_kerberos_config()
- self.http_scheme = self.kerberos_config['http_scheme']
- self.dialect = self.kerberos_config['dialect']
- config = self.kerberos_config['config']
- hostname_override = self.kerberos_config['hostname_override']
- principal = f"{kwargs.get('user')}@{hostname_override}"
- ca_bundle = self.kerberos_config['ca_bundle']
- self.auth_config = KerberosAuthentication(config=config,
- service_name=service_name,
- principal=principal,
- ca_bundle=ca_bundle,
- hostname_override=hostname_override)
- '''
- self.connection = None
- self.is_connected = False
- self.with_clause = ""
-
- def connect(self):
- """"
- Handles the connection to a Trino instance.
- """
- if self.is_connected is True:
- return self.connection
-
- # option configuration
- http_scheme = 'http'
- auth = None
- auth_config = None
- password = None
-
- if 'auth' in self.connection_data:
- auth = self.connection_data['auth']
- if 'password' in self.connection_data:
- password = self.connection_data['password']
- if 'http_scheme' in self.connection_data:
- http_scheme = self.connection_data['http_scheme']
- if 'with' in self.connection_data:
- self.with_clause = self.connection_data['with']
-
- if password and auth == 'kerberos':
- raise Exception("Kerberos authorization doesn't support password.")
- elif password:
- auth_config = BasicAuthentication(self.connection_data['user'], password)
-
- if auth:
- conn = connect(
- host=self.connection_data['host'],
- port=self.connection_data['port'],
- user=self.connection_data['user'],
- catalog=self.connection_data['catalog'],
- schema=self.connection_data['schema'],
- http_scheme=http_scheme,
- auth=auth_config)
- else:
- conn = connect(
- host=self.connection_data['host'],
- port=self.connection_data['port'],
- user=self.connection_data['user'],
- catalog=self.connection_data['catalog'],
- schema=self.connection_data['schema'])
-
- self.is_connected = True
- self.connection = conn
- return conn
-
- def check_connection(self) -> StatusResponse:
- """
- Check the connection of the Trino instance
- :return: success status and error message if error occurs
- """
- response = StatusResponse(False)
-
- try:
- connection = self.connect()
- cur = connection.cursor()
- cur.execute("SELECT 1")
- response.success = True
- except Exception as e:
- logger.error(f'Error connecting to Trino {self.connection_data["schema"]}, {e}!')
- response.error_message = str(e)
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: str) -> Response:
- """
- Receive SQL query and runs it
- :param query: The SQL query to run in Trino
- :return: returns the records from the current recordset
- """
- try:
- connection = self.connect()
- cur = connection.cursor()
- result = cur.execute(query)
- if result and cur.description:
- response = Response(
- RESPONSE_TYPE.TABLE,
- data_frame=pd.DataFrame(
- result,
- columns=[x[0] for x in cur.description]
- )
- )
- else:
- response = Response(RESPONSE_TYPE.OK)
- connection.commit()
- except Exception as e:
- logger.error(f'Error connecting to Trino {self.connection_data["schema"]}, {e}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(e)
- )
- return response
-
- def query(self, query: ASTNode) -> Response:
- # Utilize trino dialect from sqlalchemy
- # implement WITH clause as default for all table
- # in future, this behavior should be changed to support more detail
- # level
- # also, for simple the current implement is using rendered query string
- # another method that directly manipulate ASTNOde is prefered
- renderer = SqlalchemyRender(sqlalchemy_trino.TrinoDialect)
- query_str = renderer.get_string(query, with_failback=True)
- modified_query_str = re.sub(
- r"(?is)(CREATE.+TABLE.+\(.*\))",
- f"\\1 {self.with_clause}",
- query_str
- )
- return self.native_query(modified_query_str)
-
- def get_tables(self) -> Response:
- """
- List all tables in Trino
- :return: list of all tables
- """
- query = "SHOW TABLES"
- response = self.native_query(query)
- df = response.data_frame
- response.data_frame = df.rename(columns={df.columns[0]: 'table_name'})
- return response
-
- def get_columns(self, table_name: str) -> Dict:
- query = f'DESCRIBE "{table_name}"'
- response = self.native_query(query)
- return response
diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/README.md b/mindsdb/integrations/handlers/tripadvisor_handler/README.md
deleted file mode 100644
index fc95f8114f8..00000000000
--- a/mindsdb/integrations/handlers/tripadvisor_handler/README.md
+++ /dev/null
@@ -1,153 +0,0 @@
-# TripAdvisor handler #5369
-
-Custom Python Wrapper: tripadvisor_api.py
-
-This handler integrates with the [TripAdvisor API](https://tripadvisor-content-api.readme.io/reference/overview). This integration will enable TripAdvisor users to use ML to research travel destinations and more. The custom Python wrapper had to be made which can be found in the **tripadvisor_api.py**.
-
-Approved users of the Tripadvisor Content API can access the following business details for accommodations, restaurants, and attractions:
-
-- Location ID, name, address, latitude & longitude
-- Read reviews link, write-a-review link
-- Overall rating, ranking, subratings, awards, the number of reviews the rating is based on, rating bubbles image
-- Price level symbol, accommodation category/subcategory, attraction type, restaurant cuisine(s)
-
-# Connecting to TripAdvisor and Calling APIs
-
-CREATE DATABASE my_tripadvisor
-WITH
-ENGINE = 'tripadvisor'
-PARAMETERS = {'api_key': 'INSERT YOUR API KEY'};
-
-This is how you start the connection to TripAdvisor API via MindsDB.
-
-## Find Search API Reference
-
-The Location Search request returns up to 10 locations found by the given search query.
-You can use category ("hotels", "attractions", "restaurants", "geos"), phone number, address, and latitude/longtitude to search with more accuracy.
-
-```
-SELECT *
-FROM my_tripadvisor.searchLocationTable
-WHERE searchQuery = 'New York';
-```
-
-The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/searchforlocations
-
-## Location Details API reference
-
-A Location Details request returns comprehensive information about a location (hotel, restaurant, or an attraction) such as name, address, rating, and URLs for the listing on Tripadvisor.
-
-```
-SELECT *
-FROM my_tripadvisor.locationDetailsTable
-WHERE locationId = '23322232';
-```
-
-The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/getlocationdetails
-
-## Location Reviews API reference
-
-The Location Reviews request returns up to 5 of the most recent reviews for a specific location.
-
-```
-SELECT *
-FROM my_tripadvisor.reviewsTable
-WHERE locationId = '99288';
-```
-
-The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/getlocationreviews
-
-## Location Photos API reference
-
-The Location Reviews request returns up to 5 of the most recent reviews for a specific location.
-
-```
-SELECT *
-FROM my_tripadvisor.photosTable
-WHERE location_id = '99288';
-```
-
-The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/getlocationphotos
-
-## Location Nearby API reference
-
-The Location Reviews request returns up to 5 of the most recent reviews for a specific location.
-
-```
-SELECT *
-FROM my_tripadvisor.nearbyLocationTable
-WHERE latLong = '40.780825, -73.972781';
-```
-
-The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/searchfornearbylocations
-
-# Use Case: Sentiment Analysis of Reviews from TripAdvisor with OpenAI
-
-## Search Hotels in New York and Choose one
-
-We will search for the hotels in New York in the area around the given longitude and latitude. If we execute the
-SQL script then we will get the following result which is displayed below the SQL script. We will choose
-"Arthouse Hotel New York City" to see the reviews.
-
-```
-SELECT *
-FROM my_tripadvisor.searchLocationTable
-WHERE searchQuery='New York' and latLong='40.780825, -73.972781' and category='hotels';
-```
-
-### Result:
-
-| location_id | name | distance | rating | bearing | street1 | street2 | city | state | country | postalcode | address_string | phone | latitude | longitude |
-| ----------- | ---------------------------- | ------------------ | ------ | ------- | --------------------------------- | ------- | ------------- | -------- | ------------- | ---------- | --------------------------------------------------------------- | ------ | -------- | --------- |
-| **99288** | Arthouse Hotel New York City | 0.4528974982904253 | [NULL] | west | 2178 Broadway at West 77th Street | [NULL] | New York City | New York | United States | 10024-6647 | 2178 Broadway at West 77th Street, New York City, NY 10024-6647 | [NULL] | [NULL] | [NULL] |
-| 112064 | Warwick New York | 1.300511583690564 | [NULL] | south | 65 W 54th St | | New York City | New York | United States | 10019 | 65 W 54th St, New York City, NY 10019 | [NULL] | [NULL] | [NULL] |
-| 611947 | New York Hilton Midtown | 1.3284012949631072 | [NULL] | south | 1335 Avenue Of The Americas | [NULL] | New York City | New York | United States | 10019-6078 | 1335 Avenue Of The Americas, New York City, NY 10019-6078 | [NULL] | [NULL] | [NULL] |
-
-.
-.
-.
-
-## Creating the OpenAI model
-
-Creating the model and the prompt.
-
-```
-CREATE MODEL sentiment_classifier_gpt_tripadvisor
-PREDICT sentiment
-USING
-engine = 'openai',
-prompt_template = 'describe the sentiment of the reviews
-strictly as "positive", "neutral", or "negative".
-"I love the product":positive
-"It is a scam":negative
-"{{review}}.":',
-api_key='Your API KEY';
-```
-
-## Perform sentiment analysis
-
-```
-SELECT input.text_review, output.sentiment
-FROM my_tripadvisor.reviewsTable AS input
-JOIN sentiment_classifier_gpt_tripadvisor AS output
-WHERE input.locationId = '99288';
-```
-
-### Result:
-
-| text_review | sentiment |
-| ----------- | --------- |
-
-| Stayed here numerous times over the years and loved the hotel. This visit was disappointing from start to finish. No hand soap in room and told to go to CVS to purchase some. Rude staff at check-in. Don't know what happened to the once best hotel on the Upper West! | negative |
-
-| Physically the room was appalling. The undersized bathroom you could not actually close the door without squeezing in between the metal (?) vanity and toilet. No chair or table nor room to do any exercising or stretching. You will need to stretch to be able to close the bath door and open the shower door. Most hotels will have auto close room doors balance so they auto lock. Not the Arthouse... here you better make sure you hear the click on the lock which takes considerable force. The poor balancing will leave your room wide open. The staff is courteous but wonder about their veracity as they told us our rooms were "upgraded" to a king room. If this was an upgrade ....sheesh hate to have seen the original room. No mention was made of the "urban fee" which provided dicey wifi with a deceptive title. The SeraFina restaurant was disappointing ans it appears wo be more rundown than the hotel with cracked vinyl benches for seating with average Italian food at best. NYC has many fine boutique Hotels..... this isn't one of them. | negative |
-
-| Hotel is just ok at best and is incredibly strict about cancellation policies. I would avoid booking here. It is not nice enough to warrant choosing this over another hotel in the area with better customer service. | negative |
-
-| Although we have stayed here before, this was a horrible experience! I booked the Broadway Deluxe King Room in the picture which looks very comfortable but they gave us a room 1/3 smaller and claimed it was the "same category" as the one I booked. We've stayed in the room before and it was quite a good size, had an extra chair, good sized closet and bathroom. This room was tiny, had no extra chair and the bathroom was so small one could barely turn around in it. When I complained to the manager he gave me a credit of the "resort fee" and said the exact line of rooms in the picture wasn't available. They shouldn't show a picture of a room on the website that is substantially different than the one a guest stays in.
-
-In addition, the shade in the room was broken, it took 3 requests over 2 days to get a few extra towels and there wasn't hot (only lukewarm) water for the first two days of our stay despite multiple complaints.
-
-I would avoid this hotel in the future. | negative |
-
-| We were in the neighborhood and dropped into Arthouse's lobby bar with live music. We ended up staying for a few hours and had an absolutely lovely time. Shout out to our server Elviz for the quick service and to the piano player Greg for a memorable night! The best music we've heard in a long time. Lovely hotel and would highly recommend! We will be back | positive |
diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/__about__.py b/mindsdb/integrations/handlers/tripadvisor_handler/__about__.py
deleted file mode 100644
index 1f17bf29a54..00000000000
--- a/mindsdb/integrations/handlers/tripadvisor_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB TripAdvisor handler"
-__package_name__ = "mindsdb_tripadvisor_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Tripadvisor"
-__author__ = "Ton Hoang Nguyen (Bill)"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2022- mindsdb"
diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/__init__.py b/mindsdb/integrations/handlers/tripadvisor_handler/__init__.py
deleted file mode 100644
index 1a5bd7d000e..00000000000
--- a/mindsdb/integrations/handlers/tripadvisor_handler/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .tripadvisor_handler import TripAdvisorHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "TripAdvisor"
-name = "tripadvisor"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/icon.svg b/mindsdb/integrations/handlers/tripadvisor_handler/icon.svg
deleted file mode 100644
index f5107c072f8..00000000000
--- a/mindsdb/integrations/handlers/tripadvisor_handler/icon.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_api.py b/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_api.py
deleted file mode 100644
index c2a47ecb461..00000000000
--- a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_api.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import requests
-from requests import Response
-from enum import Enum
-
-
-class TripAdvisorAPICall(Enum):
- """TripAdvisor API references"""
-
- SEARCH_LOCATION = 1
- LOCATION_DETAILS = 2
- PHOTOS = 3
- REVIEWS = 4
- NEARBY_SEARCH = 5
-
-
-class TripAdvisorAPI:
- """A class for checking the connection to the TripAdvisor Content API and making requests.
-
- Attributes:
- api_key (str): The unique API key to access Tripadvisor content.
- """
-
- def __init__(self, api_key):
- self.api_key = api_key
-
- def checkTripAdvisorConnection(self):
- """
- Check the connection with TripAdvisor
- """
- url = "https://api.content.tripadvisor.com/api/v1/location/search?language=en&key={api_key}&searchQuery={searchQuery}".format(
- api_key=self.api_key, searchQuery="London"
- )
-
- headers = {"accept": "application/json"}
- response = requests.get(url, headers=headers)
- status_code = response.status_code
-
- if status_code >= 400 and status_code <= 499:
- raise Exception("Client error: " + response.text)
-
- if status_code >= 500 and status_code <= 599:
- raise Exception("Server error: " + response.text)
-
- def getResponse(self, url: str) -> Response:
- """
- Getting a response from the API call
- """
- headers = {"accept": "application/json"}
- response = requests.get(url, headers=headers)
- return response
-
- def getURLQuery(self, url: str, params_dict: dict) -> str:
- """
- Processing the query and adding parameters to the URL
- """
- for idx, (queryParam, value) in enumerate(params_dict.items()):
- if value is not None or value != "":
- if value != "" and any(
- next_value != "" or value is not None
- for next_value in list(params_dict.values())[idx + 1:]
- ):
- url += "{queryParam}={value}&".format(
- queryParam=queryParam, value=value
- )
- else:
- url += "{queryParam}={value}".format(
- queryParam=queryParam, value=value
- )
- return url
-
- def location_search(
- self,
- url: str,
- params_dict: dict,
- language: str = "en",
- ) -> Response:
- """
- The Location Search request returns up to 10 locations found by the given search query. You can use category ("hotels", "attractions", "restaurants", "geos"),
- phone number, address, and latitude/longtitude to search with more accuracy.
-
- Args:
- searchQuery (str): Text to use for searching based on the name of the location.
- category (str): Filters result set based on property type. Valid options are "hotels", "attractions", "restaurants", and "geos".
- phone (str): Phone number to filter the search results by (this can be in any format with spaces and dashes but without the "+" sign at the beginning).
- address (str): Address to filter the search results by.
- latLong (str): Latitude/Longitude pair to scope down the search around a specifc point - eg. "42.3455,-71.10767".
- radius (int): Length of the radius from the provided latitude/longitude pair to filter results.
- radiusUnit (str): Unit for length of the radius. Valid options are "km", "mi", "m" (km=kilometers, mi=miles, m=meters.
- language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages.
-
- Returns:
- response: Response object with response data as application/json
- """
-
- url = url + "search?language={language}&key={api_key}&".format(
- api_key=self.api_key, language=language
- )
-
- url = self.getURLQuery(url, params_dict)
- response = self.getResponse(url)
-
- return response
-
- def location_details(
- self, url: str, params_dict: dict, locationId: str, language: str = "en"
- ) -> Response:
- """
- A Location Details request returns comprehensive information about a location (hotel, restaurant, or an attraction) such as name, address, rating, and URLs for the listing
- on Tripadvisor.
-
- Args:
- locationId (str): A unique identifier for a location on Tripadvisor. The location ID can be obtained using the Location Search.
- language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages.
- currency (str): The currency code to use for request and response (should follow ISO 4217).
-
- Returns:
- response (Response): Response object with response data as application/json
- """
- url = url + "{locationId}/details?language={language}&key={api_key}&".format(
- locationId=locationId, api_key=self.api_key, language=language
- )
- url = self.getURLQuery(url, params_dict)
- response = self.getResponse(url)
- return response
-
- def location_reviews(
- self, url: str, locationId: str, language: str = "en"
- ) -> Response:
- """
- The Location Reviews request returns up to 5 of the most recent reviews for a specific location. Please note that the limits are different for the beta subscribers.
-
- Args:
- locationId (str): A unique identifier for a location on Tripadvisor. The location ID can be obtained using the Location Search.
- language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages.
-
- Returns:
- response: Response object with response data as application/json
- """
- url = url + "{locationId}/reviews?language={language}&key={api_key}&".format(
- locationId=locationId, api_key=self.api_key, language=language
- )
-
- response = self.getResponse(url)
- return response
-
- def location_photos(self, url: str, locationId: str, language: str = "en") -> Response:
- """
- The Location Photos request returns up to 5 high-quality photos for a specific location. Please note that the limits are different for the beta subscribers.
- You need to upgrade to get the higher limits mentioned here. The photos are ordered by recency.
-
- Args:
- locationId (str): A unique identifier for a location on Tripadvisor. The location ID can be obtained using the Location Search.
- language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages.
-
- Returns:
- response: Response object with response data as application/json
- """
- url = url + "{locationId}/photos?language={language}&key={api_key}".format(locationId=locationId, language=language, api_key=self.api_key)
- response = self.getResponse(url)
- return response
-
- def location_nearby_search(self, url: str, params_dict: dict, language: str = "en") -> Response:
- """
- The Nearby Location Search request returns up to 10 locations found near the given latitude/longtitude.
- You can use category ("hotels", "attractions", "restaurants", "geos"), phone number, address to search with more accuracy.
-
- Args:
- latLong (str): Latitude/Longitude pair to scope down the search around a specifc point - eg. "42.3455,-71.10767".
- category (str): Filters result set based on property type. Valid options are "hotels", "attractions", "restaurants", and "geos".
- phone (str): Phone number to filter the search results by (this can be in any format with spaces and dashes but without the "+" sign at the beginning).
- address (str): Address to filter the search results by.
- radius (int): Length of the radius from the provided latitude/longitude pair to filter results.
- radiusUnit (str): Unit for length of the radius. Valid options are "km", "mi", "m" (km=kilometers, mi=miles, m=meters.
- language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages.
-
- Returns:
- response: Response object with response data as application/json
- """
-
- url = url + "nearby_search?language={language}&key={api_key}&".format(
- api_key=self.api_key, language=language
- )
-
- url = self.getURLQuery(url, params_dict)
- response = self.getResponse(url)
- return response
-
- def getTripAdvisorData(self, apiCall, **params):
- """
- Making a request based on the query and receive data from TripAdvisor.
- """
- url = "https://api.content.tripadvisor.com/api/v1/location/"
- params_dict = params
-
- if apiCall == TripAdvisorAPICall.SEARCH_LOCATION:
- response = self.location_search(url, params_dict)
- return response.json()["data"]
-
- elif apiCall == TripAdvisorAPICall.LOCATION_DETAILS:
- response = self.location_details(
- url, params_dict, params_dict["locationId"]
- )
- return response.json()
-
- elif apiCall == TripAdvisorAPICall.REVIEWS:
- response = self.location_reviews(url, params_dict["locationId"])
- return response.json()["data"]
-
- elif apiCall == TripAdvisorAPICall.PHOTOS:
- response = self.location_photos(url, params_dict["locationId"])
- return response.json()["data"]
-
- elif apiCall == TripAdvisorAPICall.NEARBY_SEARCH:
- response = self.location_nearby_search(url, params_dict)
- return response.json()["data"]
diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_handler.py b/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_handler.py
deleted file mode 100644
index 28980d7acff..00000000000
--- a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_handler.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import os
-
-import pandas as pd
-
-from mindsdb.utilities import log
-from mindsdb.utilities.config import Config
-
-from mindsdb.integrations.libs.api_handler import APIHandler
-
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
-)
-
-from .tripadvisor_table import SearchLocationTable
-from .tripadvisor_table import LocationDetailsTable
-from .tripadvisor_table import ReviewsTable
-from .tripadvisor_table import PhotosTable
-from .tripadvisor_table import NearbyLocationTable
-from .tripadvisor_api import TripAdvisorAPI
-from .tripadvisor_api import TripAdvisorAPICall
-
-logger = log.getLogger(__name__)
-
-
-class TripAdvisorHandler(APIHandler):
- """A class for handling connections and interactions with the TripAdvisor Content API.
-
- Attributes:
- api_key (str): The unique API key to access Tripadvisor content.
- api (TripAdvisorAPI): The `TripAdvisorAPI` object for checking the connection to the TripAdvisor API.
- """
-
- def __init__(self, name=None, **kwargs):
- super().__init__(name)
-
- args = kwargs.get("connection_data", {})
- self._tables = {}
-
- self.connection_args = {}
- handler_config = Config().get("tripadvisor_handler", {})
- for k in ["api_key"]:
- if k in args:
- self.connection_args[k] = args[k]
- elif f"TRIPADVISOR_{k.upper()}" in os.environ:
- self.connection_args[k] = os.environ[f"TRIPADVISOR_{k.upper()}"]
- elif k in handler_config:
- self.connection_args[k] = handler_config[k]
-
- self.api = None
- self.is_connected = False
-
- tripAdvisor = SearchLocationTable(self)
- self._register_table("searchLocationTable", tripAdvisor)
-
- tripAdvisorLocationDetails = LocationDetailsTable(self)
- self._register_table("locationDetailsTable", tripAdvisorLocationDetails)
-
- tripAdvisorReviews = ReviewsTable(self)
- self._register_table("reviewsTable", tripAdvisorReviews)
-
- tripAdvisorPhotos = PhotosTable(self)
- self._register_table("photosTable", tripAdvisorPhotos)
-
- tripAdvisorNearbyLocation = NearbyLocationTable(self)
- self._register_table("nearbyLocationTable", tripAdvisorNearbyLocation)
-
- def connect(self, api_version=2):
- """Check the connection with TripAdvisor API"""
-
- if self.is_connected is True:
- return self.api
-
- self.api = TripAdvisorAPI(api_key=self.connection_args["api_key"])
-
- self.is_connected = True
- return self.api
-
- def check_connection(self) -> StatusResponse:
- """This function evaluates if the connection is alive and healthy"""
- response = StatusResponse(False)
-
- try:
- api = self.connect()
-
- # make a random http call with searching a location.
- # it raises an error in case if auth is not success and returns not-found otherwise
- api.connectTripAdvisor()
- response.success = True
-
- except Exception as e:
- response.error_message = f"Error connecting to TripAdvisor api: {e}"
- logger.error(response.error_message)
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def call_tripadvisor_searchlocation_api(
- self, method_name: str = None, params: dict = None
- ) -> pd.DataFrame:
- """It processes the JSON data from the call and transforms it into pandas.Dataframe"""
- if self.is_connected is False:
- self.connect()
-
- locations = self.api.getTripAdvisorData(
- TripAdvisorAPICall.SEARCH_LOCATION, **params
- )
- result = []
-
- for loc in locations:
- data = {
- "location_id": loc.get("location_id"),
- "name": loc.get("name"),
- "distance": loc.get("distance"),
- "rating": loc.get("rating"),
- "bearing": loc.get("bearing"),
- "street1": loc.get("address_obj").get("street1"),
- "street2": loc.get("address_obj").get("street2"),
- "city": loc.get("address_obj").get("city"),
- "state": loc.get("address_obj").get("state"),
- "country": loc.get("address_obj").get("country"),
- "postalcode": loc.get("address_obj").get("postalcode"),
- "address_string": loc.get("address_obj").get("address_string"),
- "phone": loc.get("address_obj").get("phone"),
- "latitude": loc.get("address_obj").get("latitude"),
- "longitude": loc.get("address_obj").get("longitude"),
- }
- result.append(data)
- result = pd.DataFrame(result)
- return result
-
- def call_tripadvisor_location_details_api(
- self, method_name: str = None, params: dict = None
- ) -> pd.DataFrame:
- """It processes the JSON data from the call and transforms it into pandas.Dataframe"""
- if self.is_connected is False:
- self.connect()
-
- loc = self.api.getTripAdvisorData(TripAdvisorAPICall.LOCATION_DETAILS, **params)
- result = []
-
- data = {
- "location_id": loc.get("location_id"),
- "name": loc.get("name"),
- "distance": loc.get("distance"),
- "rating": loc.get("rating"),
- "bearing": loc.get("bearing"),
- "street1": loc.get("address_obj").get("street1"),
- "street2": loc.get("address_obj").get("street2"),
- "city": loc.get("address_obj").get("city"),
- "state": loc.get("address_obj").get("state"),
- "country": loc.get("address_obj").get("country"),
- "postalcode": loc.get("address_obj").get("postalcode"),
- "address_string": loc.get("address_obj").get("address_string"),
- "phone": loc.get("address_obj").get("phone"),
- "latitude": loc.get("address_obj").get("latitude"),
- "longitude": loc.get("address_obj").get("longitude"),
- "web_url": loc.get("web_url"),
- "timezone": loc.get("timezone"),
- "email": loc.get("email"),
- "website": loc.get("website"),
- "write_review": loc.get("write_review"),
- "ranking_data": str(loc.get("ranking_data")),
- "rating_image_url": loc.get("rating_image_url"),
- "num_reviews": loc.get("num_reviews"),
- "review_rating_count": loc.get("review_rating_count"),
- "subratings": loc.get("subratings"),
- "photo_count": loc.get("photo_count"),
- "see_all_photos": loc.get("see_all_photos"),
- "price_level": loc.get("price_level"),
- "parent_brand": loc.get("parent_brand"),
- "brand": loc.get("brand"),
- "ancestors": str(loc.get("ancestors")),
- "periods": str(loc.get("hours").get("periods"))
- if loc.get("hours") is not None
- else None,
- "weekday": str(loc.get("hours").get("weekday_text"))
- if loc.get("weekday") is not None
- else None,
- "amenities": str(loc.get("amenities")),
- "features": str(loc.get("features")),
- "cuisines": str(loc.get("cuisine")),
- "styles": str(loc.get("styles")),
- "neighborhood_info": str(loc.get("neighborhood_info")),
- "awards": str(loc.get("awards")),
- "trip_types": str(loc.get("trip_types")),
- "groups": str(loc.get("groups")),
- }
-
- result.append(data)
-
- result = pd.DataFrame(result)
- return result
-
- def call_tripadvisor_reviews_api(
- self, method_name: str = None, params: dict = None
- ) -> pd.DataFrame:
- """It processes the JSON data from the call and transforms it into pandas.Dataframe"""
- if self.is_connected is False:
- self.connect()
-
- locations = self.api.getTripAdvisorData(TripAdvisorAPICall.REVIEWS, **params)
- result = []
-
- for loc in locations:
- data = {
- "id": loc.get("id"),
- "lang": loc.get("lang"),
- "location_id": loc.get("location_id"),
- "published_date": loc.get("published_date"),
- "rating": loc.get("rating"),
- "helpful_votes": loc.get("helpful_votes"),
- "rating_image_url": loc.get("rating_image_url"),
- "url": loc.get("url"),
- "trip_type": loc.get("trip_type"),
- "travel_date": loc.get("travel_date"),
- "text_review": loc.get("text"),
- "title": loc.get("title"),
- "owner_response": loc.get("owner_response"),
- "is_machine_translated": loc.get("is_machine_translated"),
- "user": str(loc.get("user")),
- "subratings": str(loc.get("subratings")),
- }
- result.append(data)
- result = pd.DataFrame(result)
- return result
-
- def call_tripadvisor_photos_api(
- self, method_name: str = None, params: dict = None
- ) -> pd.DataFrame:
- """It processes the JSON data from the call and transforms it into pandas.Dataframe"""
- if self.is_connected is False:
- self.connect()
-
- locations = self.api.getTripAdvisorData(TripAdvisorAPICall.PHOTOS, **params)
- result = []
-
- for loc in locations:
- data = {
- "id": loc.get("id"),
- "is_blessed": loc.get("is_blessed"),
- "album": loc.get("album"),
- "caption": loc.get("caption"),
- "published_date": loc.get("published_date"),
- "images": str(loc.get("images")),
- "source": str(loc.get("source")),
- "user": str(loc.get("user")),
- }
- result.append(data)
- result = pd.DataFrame(result)
- return result
-
- def call_tripadvisor_nearby_location_api(
- self, method_name: str = None, params: dict = None
- ) -> pd.DataFrame:
- """It processes the JSON data from the call and transforms it into pandas.Dataframe"""
- if self.is_connected is False:
- self.connect()
-
- locations = self.api.getTripAdvisorData(TripAdvisorAPICall.NEARBY_SEARCH, **params)
- result = []
-
- for loc in locations:
- data = {
- "location_id": loc.get("location_id"),
- "name": loc.get("name"),
- "distance": loc.get("distance"),
- "rating": loc.get("rating"),
- "bearing": loc.get("bearing"),
- "address_obj": str(loc.get("address_obj")),
- }
- result.append(data)
- result = pd.DataFrame(result)
- return result
diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_table.py b/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_table.py
deleted file mode 100644
index 4462d287a0a..00000000000
--- a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_table.py
+++ /dev/null
@@ -1,477 +0,0 @@
-import pandas as pd
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
-
-
-class SearchLocationTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Select data from the search_location table and return it as a pandas DataFrame.
-
- Args:
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the selected data.
- """
-
- self.handler.connect()
-
- conditions = extract_comparison_conditions(query.where)
-
- allowed_keys = set(
- [
- "searchQuery",
- "category",
- "phone",
- "address",
- "latLong",
- "radius",
- "radiusUnit",
- "language",
- ]
- )
-
- params = {}
- filters = []
- for op, arg1, arg2 in conditions:
- if op == "or":
- raise NotImplementedError("OR is not supported")
- elif op == "=" and arg1 in allowed_keys:
- params[arg1] = arg2
- elif op != "=":
- raise NotImplementedError(f"Unknown op: {op}")
- else:
- filters.append([op, arg1, arg2])
-
- if query.limit is not None:
- params["max_results"] = query.limit.value
-
- if "searchQuery" not in params and "latLong" not in params:
- # search not works without searchQuery, use 'London'
- params["searchQuery"] = "London"
-
- result = self.handler.call_tripadvisor_searchlocation_api(params=params)
-
- # filter targets
- columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = []
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError
-
- if len(columns) == 0:
- columns = self.get_columns()
-
- # columns to lower case
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- result = pd.DataFrame([], columns=columns)
- else:
- # add absent columns
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- # filter by columns
- result = result[columns]
- return result
-
- def get_columns(self):
- """Get the list of column names for the search_location table.
-
- Returns:
- list: A list of column names for the search_location table.
- """
- return [
- "location_id",
- "name",
- "distance",
- "rating",
- "bearing",
- "street1",
- "street2",
- "city",
- "state",
- "country",
- "postalcode",
- "address_string",
- "phone",
- "latitude",
- "longitude",
- ]
-
-
-class LocationDetailsTable(APITable):
- result_json = []
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Select data from the location_details table and return it as a pandas DataFrame.
-
- Args:
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the selected data.
- """
-
- self.handler.connect()
-
- conditions = extract_comparison_conditions(query.where)
-
- allowed_keys = set(["locationId", "currency", "language"])
-
- params = {}
- filters = []
- for op, arg1, arg2 in conditions:
- if op == "or":
- raise NotImplementedError("OR is not supported")
- elif op == "=" and arg1 in allowed_keys:
- params[arg1] = arg2
- elif op != "=":
- raise NotImplementedError(f"Unknown op: {op}")
- else:
- filters.append([op, arg1, arg2])
-
- if query.limit is not None:
- params["max_results"] = query.limit.value
-
- if "locationId" not in params:
- # search not works without searchQuery, use 'London'
- params["locationId"] = "23322232"
-
- result = self.handler.call_tripadvisor_location_details_api(params=params)
-
- # filter targets
- columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = []
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError
-
- if len(columns) == 0:
- columns = self.get_columns()
-
- # columns to lower case
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- result = pd.DataFrame([], columns=columns)
- else:
- # add absent columns
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- # filter by columns
- result = result[columns]
- return result
-
- def get_columns(self):
- """Get the list of column names for the location_details table.
-
- Returns:
- list: A list of column names for the location_details table.
- """
- return [
- "location_id",
- "distance",
- "name",
- "description",
- "web_url",
- "street1",
- "street2",
- "city",
- "state",
- "country",
- "postalcode",
- "address_string",
- "latitude",
- "longitude",
- "timezone",
- "email",
- "phone",
- "website",
- "write_review",
- "ranking_data",
- "rating",
- "rating_image_url",
- "num_reviews",
- "photo_count",
- "see_all_photos",
- "price_level",
- "brand",
- "parent_brand",
- "ancestors",
- "periods",
- "weekday",
- "features",
- "cuisines",
- "amenities",
- "trip_types",
- "styles",
- "awards",
- "neighborhood_info",
- "parent_brand",
- "brand",
- "groups",
- ]
-
-
-class ReviewsTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Select data from the reviews table and return it as a pandas DataFrame.
-
- Args:
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the selected data.
- """
-
- self.handler.connect()
-
- conditions = extract_comparison_conditions(query.where)
-
- allowed_keys = set(["locationId", "language"])
-
- params = {}
- filters = []
- for op, arg1, arg2 in conditions:
- if op == "or":
- raise NotImplementedError("OR is not supported")
- elif op == "=" and arg1 in allowed_keys:
- params[arg1] = arg2
- elif op != "=":
- raise NotImplementedError(f"Unknown op: {op}")
- else:
- filters.append([op, arg1, arg2])
-
- if query.limit is not None:
- params["max_results"] = query.limit.value
-
- if "locationId" not in params:
- # search not works without searchQuery, use 'London'
- params["locationId"] = "23322232"
-
- result = self.handler.call_tripadvisor_reviews_api(params=params)
-
- # filter targets
- columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = []
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError
-
- if len(columns) == 0:
- columns = self.get_columns()
-
- # columns to lower case
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- result = pd.DataFrame([], columns=columns)
- else:
- # add absent columns
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- # filter by columns
- result = result[columns]
- return result
-
- def get_columns(self):
- """Get the list of column names for the reviews table.
-
- Returns:
- list: A list of column names for the reviews table.
- """
- return [
- "id",
- "lang",
- "location_id",
- "published_date",
- "rating",
- "helpful_votes",
- "rating_image_url",
- "url",
- "trip_type",
- "travel_date",
- "text_review",
- "title",
- "owner_response",
- "is_machine_translated",
- "user",
- "subratings",
- ]
-
-
-class PhotosTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Select data from the photos table and return it as a pandas DataFrame.
-
- Args:
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the selected data.
- """
-
- conditions = extract_comparison_conditions(query.where)
-
- allowed_keys = set(["locationId", "language"])
-
- params = {}
- filters = []
- for op, arg1, arg2 in conditions:
- if op == "or":
- raise NotImplementedError("OR is not supported")
- elif op == "=" and arg1 in allowed_keys:
- params[arg1] = arg2
- elif op != "=":
- raise NotImplementedError(f"Unknown op: {op}")
- else:
- filters.append([op, arg1, arg2])
-
- if query.limit is not None:
- params["max_results"] = query.limit.value
-
- if "locationId" not in params:
- params["locationId"] = "23322232"
-
- result = self.handler.call_tripadvisor_photos_api(params=params)
-
- # filter targets
- columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = []
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError
-
- if len(columns) == 0:
- columns = self.get_columns()
-
- # columns to lower case
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- result = pd.DataFrame([], columns=columns)
- else:
- # add absent columns
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- # filter by columns
- result = result[columns]
- return result
-
- def get_columns(self):
- """Get the list of column names for the photos table.
-
- Returns:
- list: A list of column names for the photos table.
- """
- return [
- "id",
- "is_blessed",
- "album",
- "caption",
- "published_date",
- "images",
- "source",
- "user",
- ]
-
-
-class NearbyLocationTable(APITable):
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Select data from the nearby_location table and return it as a pandas DataFrame.
-
- Args:
- query (ast.Select): The SQL query to be executed.
-
- Returns:
- pandas.DataFrame: A pandas DataFrame containing the selected data.
- """
-
- conditions = extract_comparison_conditions(query.where)
-
- allowed_keys = set(["latLong", "language", "category", "phone", "address", "radius", "radiusUnit"])
-
- params = {}
- filters = []
- for op, arg1, arg2 in conditions:
- if op == "or":
- raise NotImplementedError("OR is not supported")
- elif op == "=" and arg1 in allowed_keys:
- params[arg1] = arg2
- elif op != "=":
- raise NotImplementedError(f"Unknown op: {op}")
- else:
- filters.append([op, arg1, arg2])
-
- if query.limit is not None:
- params["max_results"] = query.limit.value
-
- if "latLong" not in params:
- params["latLong"] = "40.780825, -73.972781"
-
- result = self.handler.call_tripadvisor_nearby_location_api(params=params)
-
- # filter targets
- columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = []
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError
-
- if len(columns) == 0:
- columns = self.get_columns()
-
- # columns to lower case
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- result = pd.DataFrame([], columns=columns)
- else:
- # add absent columns
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- # filter by columns
- result = result[columns]
- return result
-
- def get_columns(self):
- """Get the list of column names for the nearby_location table.
-
- Returns:
- list: A list of column names for the nearby_location table.
- """
- return [
- "location_id",
- "name",
- "distance",
- "rating",
- "bearing",
- "address_obj",
- ]
diff --git a/mindsdb/integrations/handlers/twilio_handler/README.md b/mindsdb/integrations/handlers/twilio_handler/README.md
deleted file mode 100644
index 70671efd31a..00000000000
--- a/mindsdb/integrations/handlers/twilio_handler/README.md
+++ /dev/null
@@ -1,96 +0,0 @@
-
-
-# Twilio Handler
-
-Twilio handler for MindsDB provides interfaces to connect to Twilio via APIs and send or retrieve SMS data into MindsDB.
-
----
-
-## Table of Contents
-
-- [Twilio Handler](#twilio-handler)
- - [Table of Contents](#table-of-contents)
- - [About Twilio](#about-twilio)
- - [Twilio Handler Implementation](#twilio-handler-implementation)
- - [Twilio Handler Initialization](#twilio-handler-initialization)
- - [How to get your Twilio credentials](#how-to-get-your-twilio-credentials)
- - [Implemented Features](#implemented-features)
- - [TODO](#todo)
- - [Example Usage](#example-usage)
-
----
-
-## About Twilio
-
-Twilio provides a cloud communication platform which allows software developers to programmatically make and receive phone calls, send and receive text messages, and perform other communication functions using its web service APIs.
-
-## Twilio Handler Implementation
-
-This handler was implemented using the [Twilio Python SDK](https://www.twilio.com/docs/libraries/python). The SDK provides a simple and efficient way to interact with the Twilio API.
-
-## Twilio Handler Initialization
-
-The Twilio handler is initialized with the following parameters:
-
-- `account_sid`: a required Twilio Account SID
-- `auth_token`: a required Twilio Authentication Token
-- `phone_number`: a required Twilio phone number
-
-## How to get your Twilio credentials
-
-1. Sign up for a Twilio account or log into your existing account.
-2. Navigate to the [Twilio Console Dashboard](https://www.twilio.com/console).
-3. Here you will find your `ACCOUNT SID` and `AUTH TOKEN`.
-4. To get a Twilio phone number, navigate to the "Phone Numbers" section and either use an existing number or buy a new one.
-5. Store these as environment variables: `TWILIO_ACCOUNT_SID`, `TWILIO_AUTH_TOKEN`, and `TWILIO_PHONE_NUMBER` respectively.
-
-## Implemented Features
-
-- Send an SMS to a given number with a specified body.
-- Fetch the last `n` messages sent or received by the Twilio phone number.
-
-## TODO
-
-- Implement support for making and receiving calls.
-- Add more detailed logging and error handling.
-
-## Example Usage
-
-```sql
--- To send an SMS
-CREATE DATABASE my_twilio
-With
- ENGINE = 'twilio',
- PARAMETERS = {
- "account_sid":"YOUR_ACCOUNT_SID",
- "auth_token":"YOUR_AUTH_TOKEN"
- };
-```
-
-You can now run queries as follows:
-
-```sql
--- Get all messages
-SELECT * FROM my_twilio.messages LIMIT 100;
-
--- get all messages sent for all numbers in this account
-SELECT * FROM
- my_twilio.phone_numbers
- LEFT JOIN my_twilio.messages
- ON my_twilio.phone_numbers.phone_number = my_twilio.messages.from_number;
-
--- Get message with sid
-SELECT sid, to_number, from_number FROM my_twilio.messages where sid="SMbefbd64e3caa7d4c147a0aab82d47";
-
--- filter to and from
-SELECT * FROM my_twilio.messages where from_number="+15129222338";
-SELECT * FROM my_twilio.messages where to_number="+15129222338";
-
-
--- send messages:
-INSERT INTO my_twilio.messages (to_number, from_number, body)
-values("+15129222338", "+16122530327", "wow! testing this");
-
-select * from my_twilio(
-fetch_messages(date_sent_after='2022-10-29 09:46:29.000000')
-```
diff --git a/mindsdb/integrations/handlers/twilio_handler/__about__.py b/mindsdb/integrations/handlers/twilio_handler/__about__.py
deleted file mode 100644
index 0b7b8c6b3bf..00000000000
--- a/mindsdb/integrations/handlers/twilio_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Twilio handler'
-__package_name__ = 'mindsdb_twilio_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Twilio"
-__author__ = 'Lizzie Siegle'
-__github__ = 'https://github.com/elizabethsiegle'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2023- mindsdb'
diff --git a/mindsdb/integrations/handlers/twilio_handler/__init__.py b/mindsdb/integrations/handlers/twilio_handler/__init__.py
deleted file mode 100644
index ac08eb0ff34..00000000000
--- a/mindsdb/integrations/handlers/twilio_handler/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .twilio_handler import (
- TwilioHandler as Handler
- )
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Twilio'
-name = 'twilio'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/twilio_handler/icon.svg b/mindsdb/integrations/handlers/twilio_handler/icon.svg
deleted file mode 100644
index 99adef76aef..00000000000
--- a/mindsdb/integrations/handlers/twilio_handler/icon.svg
+++ /dev/null
@@ -1,11 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/twilio_handler/requirements.txt b/mindsdb/integrations/handlers/twilio_handler/requirements.txt
deleted file mode 100644
index 1e2071a390f..00000000000
--- a/mindsdb/integrations/handlers/twilio_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-twilio
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/twilio_handler/twilio_handler.py b/mindsdb/integrations/handlers/twilio_handler/twilio_handler.py
deleted file mode 100644
index 0a6708d5034..00000000000
--- a/mindsdb/integrations/handlers/twilio_handler/twilio_handler.py
+++ /dev/null
@@ -1,385 +0,0 @@
-import os
-
-import re
-from twilio.rest import Client
-import pandas as pd
-from mindsdb.integrations.libs.api_handler import APIHandler, APITable
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-from mindsdb.utilities.config import Config
-from mindsdb.utilities import log
-from mindsdb.integrations.utilities.date_utils import parse_local_date
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, project_dataframe, filter_dataframe
-
-from mindsdb_sql_parser import ast
-
-logger = log.getLogger(__name__)
-
-
-class PhoneNumbersTable(APITable):
-
- def select(self, query: ast.Select) -> Response:
-
- conditions = extract_comparison_conditions(query.where)
-
- params = {}
- filters = []
- for op, arg1, arg2 in conditions:
-
- if op == 'or':
- raise NotImplementedError('OR is not supported')
- else:
- filters.append([op, arg1, arg2])
-
- if query.limit is not None:
- params['limit'] = query.limit.value
-
- result = self.handler.list_phone_numbers(params, df=True)
-
- # filter targets
- result = filter_dataframe(result, filters)
-
- # project targets
- result = project_dataframe(result, query.targets, self.get_columns())
-
- return result
-
- def get_columns(self):
- return [
- 'sid',
- 'date_created',
- 'date_updated',
- 'phone_number',
- 'friendly_name',
- 'account_sid',
- 'capabilities',
- 'number_status',
- 'api_version',
- 'voice_url',
- 'sms_url',
- 'uri'
- ]
-
-
-class MessagesTable(APITable):
-
- def select(self, query: ast.Select) -> Response:
-
- conditions = extract_comparison_conditions(query.where)
-
- params = {}
- filters = []
- for op, arg1, arg2 in conditions:
-
- if op == 'or':
- raise NotImplementedError('OR is not supported')
- if arg1 == 'sent_at' and arg2 is not None:
-
- date = parse_local_date(arg2)
-
- if op == '>':
- params['date_sent_after'] = date
- elif op == '<':
- params['date_sent_before'] = date
- else:
- raise NotImplementedError
-
- # also add to post query filter because date_sent_after=date1 will include date1
- filters.append([op, arg1, arg2])
-
- elif arg1 == 'sid':
- if op == '=':
- params['sid'] = arg2
- # TODO: implement IN
- else:
- NotImplementedError('Only "from_number=" is implemented')
- elif arg1 == 'from_number':
- if op == '=':
- params['from_number'] = arg2
- # TODO: implement IN
- else:
- NotImplementedError('Only "from_number=" is implemented')
-
- elif arg1 == 'to_number':
- if op == '=':
- params['to_number'] = arg2
- # TODO: implement IN
- else:
- NotImplementedError('Only "to_number=" is implemented')
-
- else:
- filters.append([op, arg1, arg2])
-
- result = self.handler.fetch_messages(params, df=True)
-
- # filter targets
- result = filter_dataframe(result, filters)
-
- if query.limit is not None:
- result = result[:int(query.limit.value)]
-
- # project targets
- result = project_dataframe(result, query.targets, self.get_columns())
-
- return result
-
- def get_columns(self):
- return [
- 'sid',
- 'from_number',
- 'to_number',
- 'body',
- 'direction',
- 'msg_status',
- 'sent_at', # datetime.strptime(str(msg.date_sent), '%Y-%m-%d %H:%M:%S%z'),
- 'account_sid',
- 'price',
- 'price_unit',
- 'api_version',
- 'uri'
- ]
-
- def insert(self, query: ast.Insert):
- # https://docs.tweepy.org/en/stable/client.html#tweepy.Client.create_tweet
- columns = [col.name for col in query.columns]
-
- ret = []
-
- insert_params = ["to_number", "from_number", "body", 'media_url']
- for row in query.values:
- params = dict(zip(columns, row))
-
- # split long text over 1500 symbols
- max_text_len = 1500
- text = params['body']
- words = re.split('( )', text)
- messages = []
-
- text2 = ''
- pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
- for word in words:
- # replace the links in word to string with the length as twitter short url (23)
- word2 = re.sub(pattern, '-' * 23, word)
- if len(text2) + len(word2) > max_text_len - 3 - 7: # 3 is for ..., 7 is for (10/11)
- messages.append(text2.strip())
-
- text2 = ''
- text2 += word
-
- # the last message
- if text2.strip() != '':
- messages.append(text2.strip())
-
- len_messages = len(messages)
-
- for i, text in enumerate(messages):
- if i < len_messages - 1:
- text += '...'
- else:
- text += ' '
-
- if i >= 1:
- text += f'({i + 1}/{len_messages})'
- # only send image on first url
- if 'media_url' in params:
- del params['media_url']
-
- params['body'] = text
- params_to_send = {key: params[key] for key in insert_params if (key in params)}
- ret_row = self.handler.send_sms(params_to_send, ret_as_dict=True)
- ret_row['body'] = text
- ret.append(ret_row)
-
- return pd.DataFrame(ret)
-
-
-class TwilioHandler(APIHandler):
-
- def __init__(self, name=None, **kwargs):
- super().__init__(name)
-
- args = kwargs.get('connection_data', {})
-
- self.connection_args = {}
- handler_config = Config().get('twilio_handler', {})
- for k in ['account_sid', 'auth_token']:
- if k in args:
- self.connection_args[k] = args[k]
- elif f'TWILIO_{k.upper()}' in os.environ:
- self.connection_args[k] = os.environ[f'TWILIO_{k.upper()}']
- elif k in handler_config:
- self.connection_args[k] = handler_config[k]
-
- self.client = None
- self.is_connected = False
-
- messages = MessagesTable(self)
- phone_numbers = PhoneNumbersTable(self)
- self._register_table('messages', messages)
- self._register_table('phone_numbers', phone_numbers)
-
- def connect(self):
- """Authenticate with the Twilio API using the account_sid and auth_token provided in the constructor."""
- if self.is_connected is True:
- return self.client
-
- self.client = Client(
- self.connection_args['account_sid'],
- self.connection_args['auth_token']
- )
-
- self.is_connected = True
- return self.client
-
- def check_connection(self) -> StatusResponse:
- '''It evaluates if the connection with Twilio API is alive and healthy.'''
- response = StatusResponse(False)
-
- try:
- self.connect()
- # Maybe make a harmless API request to verify connection, but be mindful of rate limits and costs
- response.success = True
-
- except Exception as e:
- response.error_message = f'Error connecting to Twilio api: {e}. '
- logger.error(response.error_message)
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def parse_native_query(self, query_string: str):
- """Parses the native query string of format method(arg1=val1, arg2=val2, ...) and returns the method name and arguments."""
-
- # Adjust regex to account for the possibility of no arguments inside the parenthesis
- match = re.match(r'(\w+)\(([^)]*)\)', query_string)
- if not match:
- raise ValueError(f"Invalid query format: {query_string}")
-
- method_name = match.group(1)
- arg_string = match.group(2)
-
- # Extract individual arguments
- args = {}
- if arg_string: # Check if there are any arguments
- for arg in arg_string.split(','):
- arg = arg.strip()
- key, value = arg.split('=')
- args[key.strip()] = value.strip()
-
- return method_name, args
-
- def native_query(self, query_string: str = None):
- '''It parses any native statement string and acts upon it (for example, raw syntax commands).'''
-
- method_name, params = self.parse_native_query(query_string)
- if method_name == 'send_sms':
- response = self.send_sms(params)
- elif method_name == 'fetch_messages':
- response = self.fetch_messages(params)
- elif method_name == 'list_phone_numbers':
- response = self.list_phone_numbers(params)
- else:
- raise ValueError(f"Method '{method_name}' not supported by TwilioHandler")
-
- return response
-
- def send_sms(self, params, ret_as_dict=False):
- message = self.client.messages.create(
- to=params.get("to_number"),
- from_=params.get('from_number'),
- body=params.get("body"),
- media_url=params.get("media_url")
- )
-
- if ret_as_dict is True:
- return {'sid': message.sid, 'status': message.status}
- return Response(
- RESPONSE_TYPE.MESSAGE,
- sid=message.sid,
- status=message.status
- )
-
- def fetch_messages(self, params, df=False):
- limit = int(params.get('limit', 1000))
- sid = params.get('sid', None)
- # Convert date strings to datetime objects if provided
- date_sent_after = params.get('date_sent_after', None)
- date_sent_before = params.get('date_sent_before', None)
- # Extract 'from_' and 'body' search criteria from params
- from_number = params.get('from_number', None)
- to_number = params.get('to_number', None)
- args = {
- 'limit': limit,
- 'date_sent_after': date_sent_after,
- 'date_sent_before': date_sent_before,
- 'from_': from_number,
- 'to': to_number
- }
-
- args = {arg: val for arg, val in args.items() if val is not None}
- if sid:
- messages = [self.client.messages(sid).fetch()]
- else:
- messages = self.client.messages.list(**args)
-
- # Extract all possible properties for each message
- data = []
- for msg in messages:
- msg_data = {
- 'sid': msg.sid,
- 'to_number': msg.to,
- 'from_number': msg.from_,
- 'body': msg.body,
- 'direction': msg.direction,
- 'msg_status': msg.status,
- 'sent_at': msg.date_created.replace(tzinfo=None),
- 'account_sid': msg.account_sid,
- 'price': msg.price,
- 'price_unit': msg.price_unit,
- 'api_version': msg.api_version,
- 'uri': msg.uri,
- # 'media_url': [media.uri for media in msg.media.list()]
- # ... Add other properties as needed
- }
- data.append(msg_data)
-
- if df is True:
- return pd.DataFrame(data)
- return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(data))
-
- def list_phone_numbers(self, params, df=False):
- limit = int(params.get('limit', 100))
- args = {
- 'limit': limit
- }
- args = {arg: val for arg, val in args.items() if val is not None}
- phone_numbers = self.client.incoming_phone_numbers.list(**args)
-
- # Extract properties for each phone number
- data = []
- for number in phone_numbers:
- num_data = {
- 'sid': number.sid,
- 'date_created': number.date_created,
- 'date_updated': number.date_updated,
- 'phone_number': number.phone_number,
- 'friendly_name': number.friendly_name,
- 'account_sid': number.account_sid,
- 'capabilities': number.capabilities,
- 'number_status': number.status,
- 'api_version': number.api_version,
- 'voice_url': number.voice_url,
- 'sms_url': number.sms_url,
- 'uri': number.uri,
- # ... Add other properties as needed
- }
- data.append(num_data)
-
- if df is True:
- return pd.DataFrame(data)
- return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(data))
diff --git a/mindsdb/integrations/handlers/twitter_handler/README.md b/mindsdb/integrations/handlers/twitter_handler/README.md
deleted file mode 100644
index 9e3b41c8301..00000000000
--- a/mindsdb/integrations/handlers/twitter_handler/README.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Build your own Twitter AI agent
-
-Imagine you want to engage with your followers on Twitter and answer all their questions promptly.
-Thanks to MindsDB, you can train an AI system that helps you manage automation on Twitter responses.
-
-
-For this particular example, we would like to automatically respond to the people that tweet about us as follows:
-- If the message is positive, write a short thank you note.
-- If the message is negative or the message has a question, invite them to our slack channel.
-
-
-We will build our Twitter AI tool in a few SQL commands in MindsDB.
-
-
-Let's start by connecting our Twitter account. To do that, you can follow these [steps](https://developer.twitter.com/en/docs/authentication/oauth-2-0/bearer-tokens) to obtain a BEARER TOKEN from twitter.
-
-
-```
-# Should be able to create a twitter database
-CREATE DATABASE my_twitter
-With
- ENGINE = 'twitter',
- PARAMETERS = {
- "bearer_token": "twitter bearer TOKEN"
- };
-```
-
-This creates a database called my_twitter. This database ships with a table called tweets that we can use to search for tweets as well as to write tweets.
-
-
-## Searching for tweets in SQL
-
-Let's get a list of tweets that contain or hashtag the word mindsdb
-
-```
-SELECT
- id, created_at, author_username, text
-FROM my_twitter.tweets
-WHERE
- query = '(mindsdb OR #mindsdb) -is:retweet -is:reply'
- AND created_at > '2023-02-16'
-LIMIT 20;
-```
-
-MindsDB Twitter integration also supports native queries, which in this case, will be calling any function in [tweepy]
-(https://docs.tweepy.org/en/stable/client.html)
-```
-# this should handle authentication and pagination for us
-SELECT * FROM my_twitter (
- search_recent_tweets(
- query = '(mindsdb OR #mindsdb) -is:retweet -is:reply'',
- start_time = '2023-02-16T00:00:00.000Z',
- max_results = 2
- )
-);
-```
-
-## Writing tweets using SQL
-
-Let's test by tweeting a few things.
-
-```
-INSERT INTO my_twitter.tweets (reply_to_tweet_id, text)
-VALUES
- (1626198053446369280, 'MindsDB is great! now its super simple to build ML powered apps'),
- (1626198053446369280, 'Holy!! MindsDB is the best thing they have invented for developers doing ML'),
-```
-
-Those tweets should be live now on Twitter, like magic, right?
-
-## Let's use AI to write responses for us
-
-To do this, we would like to create a machine-learning model that can write responses.
-We will be using OpenAI GPT-3 for this. The way it works is that we will create a model that can take a prompt and give a message based on that prompt.
-The query looks like:
-
-```
-CREATE MODEL mindsdb.twitter_response_model
-PREDICT response
-USING
- engine = 'openai',
- max_tokens = 200,
- prompt_template = 'from tweet "{{text}}" by "{{author_username}}", if their comment is a question, invite them to join the MindsDB slack using this link http://bitly.com/abc. Otherwise, simply write a thank you message';
-```
-
-This created a virtual AI table called twitter_response_model. We can query this model as if it was a table, but it will generate responses for us, as follows:
-
-```
-SELECT response FROM mindsdb.twitter_response_model
-WHERE author_username = '@pedro' and text = 'I love this, can I learn more?';
-```
-
-Now, let's test it with some of the tweets on Twitter. So, we are going to join that model with the query that we had worked on before that gets positive and neutral comments:
-
-```
-SELECT t.id, t.author_username, t.text, r.response
-FROM my_twitter.tweets t
-JOIN mindsdb.twitter_response_model r
-WHERE
- t.query = '(mindsdb OR #mindsdb) -is:retweet -is:reply''
-LIMIT 2
-```
-# Schedule a job
-
-Finally, we can now automate the responses by writing a job that:
-- Checks for new tweets
-- Generates a response using the OpenAI model
-- Tweets the responses back
-
-All this in one SQL command:
-
-```
-CREATE JOB auto_respond AS (
-
- INSERT INTO my_twitter.tweets (in_reply_to_tweet_id, text)
- SELECT
- t.id AS in_reply_to_tweet_id,
- r.response AS text
- FROM my_twitter.tweets t
- JOIN mindsdb.twitter_response_model r
- WHERE
- t.query = '(mindsdb OR #mindsdb) -is:retweet -is:reply''
- AND t.created_at > "{{PREVIOUS_START_DATETIME}}"
- limit 2
-)
-EVERY HOUR
-```
-
-And there it is every hour, we will be checking for the new tweets created_at = {{PREVIOUS_START_DATETIME}}, and insert into tweets, the responses generated by OpenAI GPT-3.
-
diff --git a/mindsdb/integrations/handlers/twitter_handler/__about__.py b/mindsdb/integrations/handlers/twitter_handler/__about__.py
deleted file mode 100644
index 4a9f0c28453..00000000000
--- a/mindsdb/integrations/handlers/twitter_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Twitter handler'
-__package_name__ = 'mindsdb_twitter_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Twitter"
-__author__ = 'MindsDB Inc'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/twitter_handler/__init__.py b/mindsdb/integrations/handlers/twitter_handler/__init__.py
deleted file mode 100644
index f7225d1cbc3..00000000000
--- a/mindsdb/integrations/handlers/twitter_handler/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .twitter_handler import (
- TwitterHandler as Handler
- )
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Twitter'
-name = 'twitter'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/twitter_handler/icon.svg b/mindsdb/integrations/handlers/twitter_handler/icon.svg
deleted file mode 100644
index 4797d6b315d..00000000000
--- a/mindsdb/integrations/handlers/twitter_handler/icon.svg
+++ /dev/null
@@ -1,10 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/twitter_handler/requirements.txt b/mindsdb/integrations/handlers/twitter_handler/requirements.txt
deleted file mode 100644
index 6a62bcc576e..00000000000
--- a/mindsdb/integrations/handlers/twitter_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-tweepy
diff --git a/mindsdb/integrations/handlers/twitter_handler/twitter_handler.py b/mindsdb/integrations/handlers/twitter_handler/twitter_handler.py
deleted file mode 100644
index da83eaa7dc2..00000000000
--- a/mindsdb/integrations/handlers/twitter_handler/twitter_handler.py
+++ /dev/null
@@ -1,469 +0,0 @@
-import re
-import os
-import datetime as dt
-import time
-from collections import defaultdict
-import io
-import requests
-
-import pandas as pd
-import tweepy
-
-from mindsdb.utilities import log
-from mindsdb.utilities.config import Config
-
-from mindsdb_sql_parser import ast
-
-from mindsdb.integrations.libs.api_handler import APIHandler, APITable, FuncParser
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
-from mindsdb.integrations.utilities.date_utils import parse_utc_date
-
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-logger = log.getLogger(__name__)
-
-
-class TweetsTable(APITable):
-
- def select(self, query: ast.Select) -> Response:
-
- conditions = extract_comparison_conditions(query.where)
-
- params = {}
- filters = []
- for op, arg1, arg2 in conditions:
-
- if op == 'or':
- raise NotImplementedError('OR is not supported')
- if arg1 == 'created_at':
- date = parse_utc_date(arg2)
- if op == '>':
- # "tweets/search/recent" doesn't accept dates earlier than 7 days
- if (dt.datetime.now(dt.timezone.utc) - date).days > 7:
- # skip this condition
- continue
- params['start_time'] = date
- elif op == '<':
- params['end_time'] = date
- else:
- raise NotImplementedError
-
- elif arg1 == 'query':
- if op == '=':
- params[arg1] = arg2
- else:
- NotImplementedError(f'Unknown op: {op}')
-
- elif arg1 == 'id':
- if op == '>':
- params['since_id'] = arg2
- elif op == '>=':
- raise NotImplementedError("Please use 'id > value'")
- elif op == '<':
- params['until_id'] = arg2
- elif op == '<=':
- raise NotImplementedError("Please use 'id < value'")
- else:
- NotImplementedError('Search with "id=" is not implemented')
-
- else:
- filters.append([op, arg1, arg2])
-
- if query.limit is not None:
- params['max_results'] = query.limit.value
-
- params['expansions'] = ['author_id', 'in_reply_to_user_id']
- params['tweet_fields'] = ['created_at', 'conversation_id', 'referenced_tweets']
- params['user_fields'] = ['name', 'username']
-
- if 'query' not in params:
- # search not works without query, use 'mindsdb'
- params['query'] = 'mindsdb'
-
- result = self.handler.call_twitter_api(
- method_name='search_recent_tweets',
- params=params,
- filters=filters
- )
-
- # filter targets
- columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = []
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError
-
- if len(columns) == 0:
- columns = self.get_columns()
-
- # columns to lower case
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- result = pd.DataFrame([], columns=columns)
- else:
- # add absent columns
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- # filter by columns
- result = result[columns]
- return result
-
- def get_columns(self):
- return [
- 'id',
- 'created_at',
- 'text',
- 'edit_history_tweet_ids',
- 'author_id',
- 'author_name',
- 'author_username',
- 'conversation_id',
- 'in_reply_to_tweet_id',
- 'in_retweeted_to_tweet_id',
- 'in_quote_to_tweet_id',
- 'in_reply_to_user_id',
- 'in_reply_to_user_name',
- 'in_reply_to_user_username',
- ]
-
- def insert(self, query: ast.Insert):
- # https://docs.tweepy.org/en/stable/client.html#tweepy.Client.create_tweet
- columns = [col.name for col in query.columns]
-
- insert_params = ('consumer_key', 'consumer_secret', 'access_token', 'access_token_secret')
- for p in insert_params:
- if p not in self.handler.connection_args:
- raise Exception(f'To insert data into Twitter, you need to provide the following parameters when connecting it to MindsDB: {insert_params}') # noqa
-
- for row in query.values:
- params = dict(zip(columns, row))
-
- # split long text over 280 symbols
- max_text_len = 280
- text = params['text']
-
- # Post image if column media_url is provided, only do this on last tweet
- media_ids = None
- if 'media_url' in params:
- media_url = params.pop('media_url')
-
- # create an in memory file
- resp = requests.get(media_url)
- img = io.BytesIO(resp.content)
-
- # upload media to twitter
- api_v1 = self.handler.create_connection(api_version=1)
- content_type = resp.headers['Content-Type']
- file_type = content_type.split('/')[-1]
- media = api_v1.media_upload(filename="img.{file_type}".format(file_type=file_type), file=img)
-
- media_ids = [media.media_id]
-
- words = re.split('( )', text)
-
- messages = []
-
- text2 = ''
- pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
- for word in words:
- # replace the links in word to string with the length as twitter short url (23)
- word2 = re.sub(pattern, '-' * 23, word)
- if len(text2) + len(word2) > max_text_len - 3 - 7: # 3 is for ..., 7 is for (10/11)
- messages.append(text2.strip())
-
- text2 = ''
- text2 += word
-
- # the last message
- if text2.strip() != '':
- messages.append(text2.strip())
-
- len_messages = len(messages)
- for i, text in enumerate(messages):
- if i < len_messages - 1:
- text += '...'
- else:
- text += ' '
- # publish media with the last message
- if media_ids is not None:
- params['media_ids'] = media_ids
-
- text += f'({i + 1}/{len_messages})'
-
- params['text'] = text
- ret = self.handler.call_twitter_api('create_tweet', params)
- inserted_id = ret.id[0]
- params['in_reply_to_tweet_id'] = inserted_id
-
-
-class TwitterHandler(APIHandler):
- """A class for handling connections and interactions with the Twitter API.
-
- Attributes:
- bearer_token (str): The consumer key for the Twitter app.
- api (tweepy.API): The `tweepy.API` object for interacting with the Twitter API.
-
- """
-
- def __init__(self, name=None, **kwargs):
- super().__init__(name)
-
- args = kwargs.get('connection_data', {})
-
- self.connection_args = {}
- handler_config = Config().get('twitter_handler', {})
- for k in ['bearer_token', 'consumer_key', 'consumer_secret',
- 'access_token', 'access_token_secret', 'wait_on_rate_limit']:
- if k in args:
- self.connection_args[k] = args[k]
- elif f'TWITTER_{k.upper()}' in os.environ:
- self.connection_args[k] = os.environ[f'TWITTER_{k.upper()}']
- elif k in handler_config:
- self.connection_args[k] = handler_config[k]
-
- self.api = None
- self.is_connected = False
-
- tweets = TweetsTable(self)
- self._register_table('tweets', tweets)
-
- def create_connection(self, api_version=2):
- if api_version == 1:
- auth = tweepy.OAuthHandler(
- self.connection_args['consumer_key'],
- self.connection_args['consumer_secret']
- )
- auth.set_access_token(
- self.connection_args['access_token'],
- self.connection_args['access_token_secret']
- )
- return tweepy.API(auth)
-
- return tweepy.Client(**self.connection_args)
-
- def connect(self, api_version=2):
- """Authenticate with the Twitter API using the API keys and secrets stored in the `consumer_key`, `consumer_secret`, `access_token`, and `access_token_secret` attributes.""" # noqa
-
- if self.is_connected is True:
- return self.api
-
- self.api = self.create_connection()
-
- self.is_connected = True
- return self.api
-
- def check_connection(self) -> StatusResponse:
-
- response = StatusResponse(False)
-
- try:
- api = self.connect()
-
- # call get_user with unknown id.
- # it raises an error in case if auth is not success and returns not-found otherwise
- # api.get_me() is not exposed for OAuth 2.0 App-only authorisation
- api.get_user(id=1)
- response.success = True
-
- except tweepy.Unauthorized as e:
- response.error_message = f'Error connecting to Twitter api: {e}. Check bearer_token'
- logger.error(response.error_message)
-
- if response.success is True and len(self.connection_args) > 1:
- # not only bearer_token, check read-write mode (OAuth 2.0 Authorization Code with PKCE)
- try:
- api = self.connect()
-
- api.get_me()
-
- except tweepy.Unauthorized as e:
- keys = 'consumer_key', 'consumer_secret', 'access_token', 'access_token_secret'
- response.error_message = f'Error connecting to Twitter api: {e}. Check' + ', '.join(keys)
- logger.error(response.error_message)
-
- response.success = False
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query_string: str = None):
- method_name, params = FuncParser().from_string(query_string)
-
- df = self.call_twitter_api(method_name, params)
-
- return Response(
- RESPONSE_TYPE.TABLE,
- data_frame=df
- )
-
- def _apply_filters(self, data, filters):
- if not filters:
- return data
-
- data2 = []
- for row in data:
- add = False
- for op, key, value in filters:
- value2 = row.get(key)
- if isinstance(value, int):
- # twitter returns ids as string
- value = str(value)
-
- if op in ('!=', '<>'):
- if value == value2:
- break
- elif op in ('==', '='):
- if value != value2:
- break
- elif op == 'in':
- if not isinstance(value, list):
- value = [value]
- if value2 not in value:
- break
- elif op == 'not in':
- if not isinstance(value, list):
- value = [value]
- if value2 in value:
- break
- else:
- raise NotImplementedError(f'Unknown filter: {op}')
- # only if there wasn't breaks
- add = True
- if add:
- data2.append(row)
- return data2
-
- def call_twitter_api(self, method_name: str = None, params: dict = None, filters: list = None):
-
- # method > table > columns
- expansions_map = {
- 'search_recent_tweets': {
- 'users': ['author_id', 'in_reply_to_user_id'],
- },
- 'search_all_tweets': {
- 'users': ['author_id'],
- },
- }
-
- api = self.connect()
- method = getattr(api, method_name)
-
- # pagination handle
-
- count_results = None
- if 'max_results' in params:
- count_results = params['max_results']
-
- data = []
- includes = defaultdict(list)
-
- max_page_size = 100
- min_page_size = 10
- left = None
-
- limit_exec_time = time.time() + 60
-
- if filters:
- # if we have filters: do big page requests
- params['max_results'] = max_page_size
-
- while True:
- if time.time() > limit_exec_time:
- raise RuntimeError('Handler request timeout error')
-
- if count_results is not None:
- left = count_results - len(data)
- if left == 0:
- break
- elif left < 0:
- # got more results that we need
- data = data[:left]
- break
-
- if left > max_page_size:
- params['max_results'] = max_page_size
- elif left < min_page_size:
- params['max_results'] = min_page_size
- else:
- params['max_results'] = left
-
- logger.debug(f'>>>twitter in: {method_name}({params})')
- resp = method(**params)
-
- if hasattr(resp, 'includes'):
- for table, records in resp.includes.items():
- includes[table].extend([r.data for r in records])
-
- if isinstance(resp.data, list):
- chunk = [r.data for r in resp.data]
- else:
- if isinstance(resp.data, dict):
- data.append(resp.data)
- if hasattr(resp.data, 'data') and isinstance(resp.data.data, dict):
- data.append(resp.data.data)
- break
-
- # unwind columns
- for row in chunk:
- if 'referenced_tweets' in row:
- refs = row['referenced_tweets']
- if isinstance(refs, list) and len(refs) > 0:
- if refs[0]['type'] == 'replied_to':
- row['in_reply_to_tweet_id'] = refs[0]['id']
- if refs[0]['type'] == 'retweeted':
- row['in_retweeted_to_tweet_id'] = refs[0]['id']
- if refs[0]['type'] == 'quoted':
- row['in_quote_to_tweet_id'] = refs[0]['id']
-
- if filters:
- chunk = self._apply_filters(chunk, filters)
-
- # limit output
- if left is not None:
- chunk = chunk[:left]
-
- data.extend(chunk)
- # next page ?
- if count_results is not None and hasattr(resp, 'meta') and 'next_token' in resp.meta:
- params['next_token'] = resp.meta['next_token']
- else:
- break
-
- df = pd.DataFrame(data)
-
- # enrich
- expansions = expansions_map.get(method_name)
- if expansions is not None:
- for table, records in includes.items():
- df_ref = pd.DataFrame(records)
-
- if table not in expansions:
- continue
-
- for col_id in expansions[table]:
- col = col_id[:-3] # cut _id
- if col_id not in df.columns:
- continue
-
- col_map = {
- col_ref: f'{col}_{col_ref}'
- for col_ref in df_ref.columns
- }
- df_ref2 = df_ref.rename(columns=col_map)
- df_ref2 = df_ref2.drop_duplicates(col_id)
-
- df = df.merge(df_ref2, on=col_id, how='left')
-
- return df
diff --git a/mindsdb/integrations/handlers/vertex_handler/README.md b/mindsdb/integrations/handlers/vertex_handler/README.md
deleted file mode 100644
index c20ce65b813..00000000000
--- a/mindsdb/integrations/handlers/vertex_handler/README.md
+++ /dev/null
@@ -1,95 +0,0 @@
-[Vertex AI](https://cloud.google.com/vertex-ai) offers everything you need to build and use generative AIβfrom AI solutions, to Search and Conversation, to 130+ foundation models, to a unified AI platform.
-
-## Setup
-
-MindsDB provides the Vertex handler that enables you to connect Vertex AI models within MindsDB.
-
-
-### AI Engine
-
-Before creating a model, it is required to create an AI engine based on the provided handler.
-
-> If you installed MindsDB locally, make sure to install all Vertex dependencies by running `pip install mindsdb[vertex]` or `pip install .[vertex]`.
-
-You can create an Vertex engine using this command:
-
-```sql
-CREATE ML_ENGINE vertex FROM vertex
-USING
- project_id="vertex-1111",
- location="us-central1",
- staging_bucket="gs://my_staging_bucket",
- experiment="my-experiment",
- experiment_description="my experiment description",
- service_account_key_json = {
- "type": "service_account",
- "project_id": "vertex-1111",
- "private_key_id": "aaaaaaaaaa",
- "private_key": "---------BIG STRING WITH KEY-------\n",
- "client_email": "testvertexvaitest-11111.iam.gserviceaccount.com",
- "client_id": "1111111111111",
- "auth_uri": "https://accounts.google.com/o/oauth2/auth",
- "token_uri": "https://oauth2.googleapis.com/token",
- "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
- "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/testbigquery%40bgtest-11111.iam.gserviceaccount.com",
- "universe_domain": "googleapis.com"
- };
-```
-
-> Please note that you need to provide your service_account key here. It is also possible to pass your service account key as either a file path or a URL using the 'service_account_key_file` and `service_account_key_url` parameters respectively.
-
-```sql
-CREATE ML_ENGINE vertex FROM vertex
-USING
- project_id="vertex-1111",
- location="us-central1",
- staging_bucket="gs://my_staging_bucket",
- experiment="my-experiment",
- experiment_description="my experiment description",
- service_account_key_file="/home/user/MyProjects/vertex-1111.json";
-```
-
-```sql
-CREATE ML_ENGINE vertex FROM vertex
-USING
- project_id="vertex-1111",
- location="us-central1",
- staging_bucket="gs://my_staging_bucket",
- experiment="my-experiment",
- experiment_description="my experiment description",
- service_account_key_url="https://storage.googleapis.com/vertex-1111.json?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=example%40example.iam.gserviceaccount.com%2F20220215%2Fus-central1%2Fstorage%2Fgoog4_request&X-Goog-Date=20220215T000000Z&X-Goog-Expires=3600&X-Goog-SignedHeaders=host&X-Goog-Signature=abcd1234";
-```
-
-
-The name of the engine (here, `vertex`) should be used as a value for the `engine` parameter in the `USING` clause of the `CREATE MODEL` statement.
-
-### AI Model
-
-The [`CREATE MODEL`](/sql/create/model) statement is used to create, train, and deploy models within MindsDB.
-
-```sql
-CREATE MODEL mindsdb.vertex_anomaly_detection_model
-PREDICT cut
-USING
- engine = 'vertex',
- model_name = 'diamonds_anomaly_detection',
- custom_model = True;
-```
-
-Where:
-
-| Name | Description |
-|-------------------|---------------------------------------------------------------------------|
-| `engine` | It defines the Vertex engine. |
-| `model_name` | It is used to provide the name of the model. |
-| `custom_model` | Is it custom model or not |
-
-## Usage
-
-Once you have created an Vertex model, you can use it to make predictions.
-
-```sql
-SELECT t.cut, m.cut as anomaly
-FROM files.vertex_anomaly_detection as t
-JOIN mindsdb.vertex_anomaly_detection_model as m;
-```
diff --git a/mindsdb/integrations/handlers/vertex_handler/__about__.py b/mindsdb/integrations/handlers/vertex_handler/__about__.py
deleted file mode 100644
index 16ce5f91660..00000000000
--- a/mindsdb/integrations/handlers/vertex_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Vertex handler"
-__package_name__ = "mindsdb_vertex_handler"
-__version__ = "0.0.0"
-__description__ = "MindsDB handler for Google Vertex AI API"
-__author__ = "MindsDB Inc"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2022- mindsdb"
diff --git a/mindsdb/integrations/handlers/vertex_handler/__init__.py b/mindsdb/integrations/handlers/vertex_handler/__init__.py
deleted file mode 100644
index 0ff792afcf3..00000000000
--- a/mindsdb/integrations/handlers/vertex_handler/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .vertex_handler import VertexHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Google Vertex AI"
-name = "vertex"
-type = HANDLER_TYPE.ML
-icon_path = "icon.png"
-permanent = False
-
-__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"]
diff --git a/mindsdb/integrations/handlers/vertex_handler/icon.png b/mindsdb/integrations/handlers/vertex_handler/icon.png
deleted file mode 100644
index f20026f37b3..00000000000
Binary files a/mindsdb/integrations/handlers/vertex_handler/icon.png and /dev/null differ
diff --git a/mindsdb/integrations/handlers/vertex_handler/requirements.txt b/mindsdb/integrations/handlers/vertex_handler/requirements.txt
deleted file mode 100644
index afe78fb59a1..00000000000
--- a/mindsdb/integrations/handlers/vertex_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-google-cloud-aiplatform>=1.35.0
--r mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/vertex_handler/vertex_client.py b/mindsdb/integrations/handlers/vertex_handler/vertex_client.py
deleted file mode 100755
index 9f5d98529c5..00000000000
--- a/mindsdb/integrations/handlers/vertex_handler/vertex_client.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from mindsdb.utilities import log
-from google.cloud.aiplatform import init, TabularDataset, Model, Endpoint
-import pandas as pd
-
-from mindsdb.integrations.utilities.handlers.auth_utilities.google import GoogleServiceAccountOAuth2Manager
-
-logger = log.getLogger(__name__)
-
-
-class VertexClient:
- """A class to interact with Vertex AI"""
-
- def __init__(self, args_json, credentials_url=None, credentials_file=None, credentials_json=None):
- google_sa_oauth2_manager = GoogleServiceAccountOAuth2Manager(
- credentials_url=credentials_url,
- credentials_file=credentials_file,
- credentials_json=credentials_json,
- )
- credentials = google_sa_oauth2_manager.get_oauth2_credentials()
-
- init(
- credentials=credentials,
- project=args_json["project_id"],
- location=args_json["location"],
- staging_bucket=args_json["staging_bucket"],
- # the name of the experiment to use to track
- # logged metrics and parameters
- experiment=args_json["experiment"],
- # description of the experiment above
- experiment_description=args_json["experiment_description"],
- )
-
- def print_datasets(self):
- """Print all datasets and dataset ids in the project"""
- for dataset in TabularDataset.list():
- logger.info(f"Dataset display name: {dataset.display_name}, ID: {dataset.name}")
-
- def print_models(self):
- """Print all model names and model ids in the project"""
- for model in Model.list():
- logger.info(f"Model display name: {model.display_name}, ID: {model.name}")
-
- def print_endpoints(self):
- """Print all endpoints and endpoint ids in the project"""
- for endpoint in Endpoint.list():
- logger.info(f"Endpoint display name: {endpoint.display_name}, ID: {endpoint.name}")
-
- def get_model_by_display_name(self, display_name):
- """Get a model by its display name"""
- try:
- return Model.list(filter=f"display_name={display_name}")[0]
- except IndexError:
- logger.info(f"Model with display name {display_name} not found")
-
- def get_endpoint_by_display_name(self, display_name):
- """Get an endpoint by its display name"""
- try:
- return Endpoint.list(filter=f"display_name={display_name}")[0]
- except IndexError:
- logger.info(f"Endpoint with display name {display_name} not found")
-
- def get_model_by_id(self, model_id):
- """Get a model by its ID"""
- try:
- return Model(model_name=model_id)
- except IndexError:
- logger.info(f"Model with ID {model_id} not found")
-
- def deploy_model(self, model):
- """Deploy a model to an endpoint - long runtime"""
- endpoint = model.deploy()
- return endpoint
-
- def predict_from_df(self, endpoint_display_name, df, custom_model=False):
- """Make a prediction from a Pandas dataframe"""
- endpoint = self.get_endpoint_by_display_name(endpoint_display_name)
- if custom_model:
- records = df.values.tolist()
- else:
- records = df.astype(str).to_dict(orient="records") # list of dictionaries
- prediction = endpoint.predict(instances=records)
- return prediction
-
- def predict_from_csv(self, endpoint_display_name, csv_to_predict):
- """Make a prediction from a CSV file"""
- df = pd.read_csv(csv_to_predict)
- return self.predict_from_df(endpoint_display_name, df)
-
- def predict_from_dict(self, endpoint_display_name, data):
-
- # convert to list of dictionaries
- instances = [dict(zip(data.keys(), values)) for values in zip(*data.values())]
- endpoint = self.get_endpoint_by_display_name(endpoint_display_name)
- prediction = endpoint.predict(instances=instances)
- return prediction
diff --git a/mindsdb/integrations/handlers/vertex_handler/vertex_handler.py b/mindsdb/integrations/handlers/vertex_handler/vertex_handler.py
deleted file mode 100644
index d2bef2a2be9..00000000000
--- a/mindsdb/integrations/handlers/vertex_handler/vertex_handler.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import pandas as pd
-from mindsdb.integrations.libs.base import BaseMLEngine
-from mindsdb.integrations.handlers.vertex_handler.vertex_client import VertexClient
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class VertexHandler(BaseMLEngine):
- """Handler for the Vertex Google AI cloud API"""
-
- name = "Vertex"
-
- def create(self, target, args=None, **kwargs):
- """Logs in to Vertex and deploy a pre-trained model to an endpoint.
-
- If the endpoint already exists for the model, we do nothing.
-
- If the endpoint does not exist, we create it and deploy the model to it.
- The runtime for this is long, it took 15 minutes for a small model.
- """
- assert "using" in args, "Must provide USING arguments for this handler"
- args = args["using"]
-
- model_name = args.pop("model_name")
- custom_model = args.pop("custom_model", False)
-
- # get credentials from engine
- credentials_url, credentials_file, credentials_json = self._get_credentials_from_engine()
-
- # get vertex args from handler then update args from model
- vertex_args = self.engine_storage.json_get('args')
- vertex_args.update(args)
-
- vertex = VertexClient(vertex_args, credentials_url, credentials_file, credentials_json)
-
- model = vertex.get_model_by_display_name(model_name)
- if not model:
- raise Exception(f"Vertex model {model_name} not found")
- endpoint_name = model_name + "_endpoint"
- if vertex.get_endpoint_by_display_name(endpoint_name):
- logger.info(f"Endpoint {endpoint_name} already exists, skipping deployment")
- else:
- logger.info(f"Starting deployment at {endpoint_name}")
- endpoint = vertex.deploy_model(model)
- endpoint.display_name = endpoint_name
- endpoint.update()
- logger.info(f"Endpoint {endpoint_name} deployed")
-
- predict_args = {}
- predict_args["target"] = target
- predict_args["endpoint_name"] = endpoint_name
- predict_args["custom_model"] = custom_model
- self.model_storage.json_set("predict_args", predict_args)
- self.model_storage.json_set("vertex_args", vertex_args)
-
- def predict(self, df, args=None):
- """Predict using the deployed model by calling the endpoint."""
-
- if "__mindsdb_row_id" in df.columns:
- df.drop("__mindsdb_row_id", axis=1, inplace=True) # TODO is this required?
-
- predict_args = self.model_storage.json_get("predict_args")
- vertex_args = self.model_storage.json_get("vertex_args")
-
- # get credentials from engine
- credentials_url, credentials_file, credentials_json = self._get_credentials_from_engine()
-
- vertex = VertexClient(vertex_args, credentials_url, credentials_file, credentials_json)
- results = vertex.predict_from_df(predict_args["endpoint_name"], df, custom_model=predict_args["custom_model"])
-
- if predict_args["custom_model"]:
- return pd.DataFrame(results.predictions, columns=[predict_args["target"]])
- else:
- return pd.DataFrame(results.predictions)
-
- def create_engine(self, connection_args):
- # check if one of credentials_url, credentials_file, or credentials_json is provided
- if 'service_account_key_url' not in connection_args and 'service_account_key_file' not in connection_args and 'service_account_key_json' not in connection_args:
- raise KeyError('Either service_account_key_url, service_account_key_file, or service_account_key_json must be provided')
-
- self.engine_storage.json_set('args', connection_args)
-
- def _get_credentials_from_engine(self):
- engine_args = self.engine_storage.json_get('args')
-
- return engine_args.get('service_account_key_url'), engine_args.get('service_account_key_file'), engine_args.get('service_account_key_json')
diff --git a/mindsdb/integrations/handlers/vertica_handler/README.md b/mindsdb/integrations/handlers/vertica_handler/README.md
deleted file mode 100644
index c488b3b628e..00000000000
--- a/mindsdb/integrations/handlers/vertica_handler/README.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Vertica Handler
-
-This is the implementation of the Vertica handler for MindsDB.
-
-## Vertica
-The column-oriented Vertica Analytics Platform was designed to manage large, fast-growing volumes of data and with fast query performance for data warehouses and other query-intensive applications. The product claims to greatly improve query performance over traditional relational database systems, and to provide high availability and exabyte scalability on commodity enterprise servers. Vertica runs on multiple cloud computing systems as well as on Hadoop nodes. Vertica's Eon Mode separates compute from storage, using S3 object storage and dynamic allocation of compute notes
-
-## Implementation
-This handler was implemented using the `vertica-python`, a Python library that allows you to use Python code to run SQL commands on Vertica Database.
-
-The required arguments to establish a connection are,
-* `user`: username associated with database
-* `password`: password to authenticate your access
-* `host`: host to server IP Address or hostname
-* `port`: port through which TCPIP connection is to be made
-* `database`: Database name to be connected
-* `schema`: schema name to get tables
-
-## Usage
-In order to make use of this handler and connect to Vertica in MindsDB, the following syntax can be used,
-~~~~sql
-CREATE DATABASE vertica_datasource
-WITH
-engine='vertica',
-parameters={
- "user":"dbadmin",
- "password":"password",
- "host":"127.0.0.1",
- "port":5433,
- "schema_name":"public",
- "database":"VMart"
-};
-~~~~
-
-Now, you can use this established connection to query your database as follows,
-~~~~sql
-SELECT * FROM vertica_datasource.TEST;
-~~~~
diff --git a/mindsdb/integrations/handlers/vertica_handler/__about__.py b/mindsdb/integrations/handlers/vertica_handler/__about__.py
deleted file mode 100644
index 471521dca24..00000000000
--- a/mindsdb/integrations/handlers/vertica_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Vertica handler'
-__package_name__ = 'mindsdb_vertica_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Vertica"
-__author__ = 'Parthiv Makwana'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/vertica_handler/__init__.py b/mindsdb/integrations/handlers/vertica_handler/__init__.py
deleted file mode 100644
index 3eb42e18d09..00000000000
--- a/mindsdb/integrations/handlers/vertica_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .vertica_handler import VerticaHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Vertica'
-name = 'vertica'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'connection_args', 'connection_args_example', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/vertica_handler/connection_args.py b/mindsdb/integrations/handlers/vertica_handler/connection_args.py
deleted file mode 100644
index 31c319fe63a..00000000000
--- a/mindsdb/integrations/handlers/vertica_handler/connection_args.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the Vertica server.'
- },
- password={
- 'type': ARG_TYPE.PWD,
- 'description': 'The password to authenticate the user with the VERTICA server.',
- 'secret': True
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'The database name to use when connecting with the VERTICA server.'
- },
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The host name or IP address of the VERTICA server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.'
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'The TCP/IP port of the VERTICA server. Must be an integer.'
- },
- schema_name={
- 'type': ARG_TYPE.STR,
- 'description': 'Table are listed according to schema name (it is optional). Note: Default value is "public"'
- }
-)
-
-connection_args_example = OrderedDict(
- host='127.0.0.1',
- port=5433,
- user='root',
- password='password',
- database='database',
- schema_name='xyz'
-)
diff --git a/mindsdb/integrations/handlers/vertica_handler/icon.svg b/mindsdb/integrations/handlers/vertica_handler/icon.svg
deleted file mode 100644
index 8238e34339f..00000000000
--- a/mindsdb/integrations/handlers/vertica_handler/icon.svg
+++ /dev/null
@@ -1,8 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/vertica_handler/requirements.txt b/mindsdb/integrations/handlers/vertica_handler/requirements.txt
deleted file mode 100644
index 175898794d5..00000000000
--- a/mindsdb/integrations/handlers/vertica_handler/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-vertica-python
-sqlalchemy-vertica-python
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/vertica_handler/tests/__init__.py b/mindsdb/integrations/handlers/vertica_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py b/mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py
deleted file mode 100644
index d7d09122a22..00000000000
--- a/mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.vertica_handler.vertica_handler import VerticaHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class VerticaHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": '127.0.0.1',
- "port": 5433,
- "user": 'dbadmin',
- "password": '',
- "database": 'VMart',
- "schema_name": 'public'
- }
-
- }
- cls.handler = VerticaHandler('test_vertica_handler', cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_connect(self):
- assert self.handler.connect()
-
- def test_2_create_table(self):
- query = "CREATE Table TEST(id Number(1),Name Varchar(33))"
- result = self.handler.query(query)
- assert result.type is not RESPONSE_TYPE.ERROR
-
- def test_3_insert(self):
- query = "INSERT INTO TEST (1,'lOVe yOU)"
- result = self.handler.query(query)
- assert result.type is not RESPONSE_TYPE.ERROR
-
- def test_4_native_query_select(self):
- query = "SELECT * FROM TEST;"
- result = self.handler.query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_5_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.TABLE
-
- def test_6_get_columns(self):
- columns = self.handler.get_columns('TEMP')
- assert columns.type is not RESPONSE_TYPE.ERROR
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/vertica_handler/vertica_handler.py b/mindsdb/integrations/handlers/vertica_handler/vertica_handler.py
deleted file mode 100644
index 2df20f6ed98..00000000000
--- a/mindsdb/integrations/handlers/vertica_handler/vertica_handler.py
+++ /dev/null
@@ -1,157 +0,0 @@
-from typing import Optional
-
-import pandas as pd
-import vertica_python as vp
-
-from mindsdb_sql_parser import parse_sql
-from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb_sql_parser.ast.base import ASTNode
-
-from mindsdb.utilities import log
-from mindsdb.integrations.libs.base import DatabaseHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-# from sqlalchemy_vertica.dialect_pyodbc import VerticaDialect
-from sqla_vertica_python.vertica_python import VerticaDialect
-
-logger = log.getLogger(__name__)
-
-
-class VerticaHandler(DatabaseHandler):
- """
- This handler handles connection and execution of the Vertica statements.
- """
-
- name = 'vertica'
-
- def __init__(self, name, connection_data: Optional[dict], **kwargs):
- super().__init__(name)
-
- self.parser = parse_sql
- self.dialect = 'vertica'
- self.kwargs = kwargs
- self.connection_data = connection_data
- self.schema_name = connection_data['schema_name'] if 'schema_name' in connection_data else "public"
-
- self.connection = None
- self.is_connected = False
-
- def connect(self):
- if self.is_connected is True:
- return self.connection
-
- config = {
- 'host': self.connection_data['host'],
- 'port': self.connection_data['port'],
- 'user': self.connection_data['user'],
- 'password': self.connection_data['password'],
- 'database': self.connection_data['database']
- }
-
- connection = vp.connect(**config)
- self.is_connected = True
- self.connection = connection
- return self.connection
-
- def disconnect(self):
- if self.is_connected is False:
- return
- self.connection.close()
- self.is_connected = False
- return
-
- def check_connection(self) -> StatusResponse:
-
- result = StatusResponse(False)
- need_to_close = self.is_connected is False
-
- try:
- connection = self.connect()
- result.success = connection.opened()
- except Exception as e:
- logger.error(f'Error connecting to Vertica {self.connection_data["database"]}, {e}!')
- result.error_message = str(e)
-
- if result.success is True and need_to_close:
- self.disconnect()
- if result.success is False and self.is_connected is True:
- self.is_connected = False
-
- return result
-
- def native_query(self, query: str) -> Response:
- """
- Receive SQL query and runs it
- :param query: The SQL query to run in VERTICA
- :return: returns the records from the current recordset
- """
-
- need_to_close = self.is_connected is False
-
- connection = self.connect()
- with connection.cursor() as cur:
- try:
- e = cur.execute(query)
- result = e.fetchall()
- if e.rowcount != -1:
-
- response = Response(
- RESPONSE_TYPE.TABLE,
- pd.DataFrame(
- result,
- columns=[x.name for x in cur.description]
- )
- )
- else:
- response = Response(RESPONSE_TYPE.OK)
- connection.commit()
- except Exception as e:
- logger.error(f'Error running query: {query} on {self.connection_data["database"]}!')
- response = Response(
- RESPONSE_TYPE.ERROR,
- error_message=str(e)
- )
- connection.rollback()
-
- if need_to_close is True:
- self.disconnect()
-
- return response
-
- def query(self, query: ASTNode) -> Response:
- """
- Retrieve the data from the SQL statement.
- """
- renderer = SqlalchemyRender(VerticaDialect)
- query_str = renderer.get_string(query, with_failback=True)
- return self.native_query(query_str)
-
- def get_tables(self) -> Response:
- """
- Get a list with all of the tabels in VERTICA
- """
- q = f'''SELECT
- TABLE_NAME,
- TABLE_SCHEMA
- from v_catalog.tables
- WHERE table_schema='{self.schema_name}'
- order by
- table_name;'''
-
- return self.native_query(q)
-
- def get_columns(self, table_name) -> Response:
- """
- Show details about the table
- """
- q = f'''SELECT
- column_name ,
- data_type
- FROM v_catalog.columns
- WHERE table_name='{table_name}';'''
-
- return self.native_query(q)
diff --git a/mindsdb/integrations/handlers/vitess_handler/README.md b/mindsdb/integrations/handlers/vitess_handler/README.md
deleted file mode 100644
index 722096ceab5..00000000000
--- a/mindsdb/integrations/handlers/vitess_handler/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Vitess Handler
-
-This is the implementation of the Vitess Handler for MindsDB.
-
-## Vitess
-Vitess is a database solution for deploying, scaling and managing large clusters of open-source database instances. It currently supports MySQL and Percona Server for MySQL. It's architected to run as effectively in a public or private cloud architecture as it does on dedicated hardware. It combines and extends many important SQL features with the scalability of a NoSQL database. Vitess can help you with the following problems:
-
- * Scaling a SQL database by allowing you to shard it, while keeping application changes to a minimum.
- * Migrating from baremetal to a private or public cloud.
- * Deploying and managing a large number of SQL database instances.
-
-## Implementation
-
-This handler was implemented by extending mysql connector.
-
-The required arguments to establish a connection are:
-
-* `host`: the host name of the Vitess connection
-* `port`: the port to use when connecting
-* `user`: the user to authenticate
-* `password`: the password to authenticate the user
-* `database`: database name
-
-## Usage
-
-In order to make use of this handler and connect to a Vitess server in MindsDB, the following syntax can be used:
-
-```sql
-CREATE DATABASE vitess_datasource
-WITH ENGINE = "vitess",
-PARAMETERS = {
- "user": "root",
- "password": "",
- "host": "localhost",
- "port": 33577,
- "database": "commerce"
-}
-```
-
-Now, you can use this established connection to query your database as follows:
-
-```sql
-SELECT * FROM vitess_datasource.product LIMIT 10;
-```
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/vitess_handler/__about__.py b/mindsdb/integrations/handlers/vitess_handler/__about__.py
deleted file mode 100644
index 44a40e45250..00000000000
--- a/mindsdb/integrations/handlers/vitess_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Vitess handler'
-__package_name__ = 'mindsdb_vitess_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Vitess"
-__author__ = 'Parthiv Makwana'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/vitess_handler/__init__.py b/mindsdb/integrations/handlers/vitess_handler/__init__.py
deleted file mode 100644
index eef59b931a9..00000000000
--- a/mindsdb/integrations/handlers/vitess_handler/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-try:
- from .vitess_handler import VitessHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-from .__about__ import __version__ as version, __description__ as description
-
-
-title = 'Vitess'
-name = 'vitess'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/vitess_handler/icon.svg b/mindsdb/integrations/handlers/vitess_handler/icon.svg
deleted file mode 100644
index b1b42d53774..00000000000
--- a/mindsdb/integrations/handlers/vitess_handler/icon.svg
+++ /dev/null
@@ -1,23 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/vitess_handler/requirements.txt b/mindsdb/integrations/handlers/vitess_handler/requirements.txt
deleted file mode 100644
index ee467569031..00000000000
--- a/mindsdb/integrations/handlers/vitess_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
--r mindsdb/integrations/handlers/mysql_handler/requirements.txt
diff --git a/mindsdb/integrations/handlers/vitess_handler/tests/__init__.py b/mindsdb/integrations/handlers/vitess_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py b/mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py
deleted file mode 100644
index 2caaeb88fcf..00000000000
--- a/mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.vitess_handler.vitess_handler import VitessHandler
-from mindsdb.integrations.libs.response import RESPONSE_TYPE
-
-
-class VitessHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "localhost",
- "port": 33577,
- "user": "root",
- "password": "",
- "database": "vitess",
- }
- }
- cls.handler = VitessHandler('test_vitess_handler', **cls.kwargs)
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_connect(self):
- assert self.handler.connect()
-
- def test_2_create_table(self):
- query = "CREATE Table IF NOT EXISTS Lover(name varchar(101));"
- result = self.handler.query(query)
- assert result.type is not RESPONSE_TYPE.ERROR
-
- def test_3_insert(self):
- query = "INSERT INTO LOVER VALUES('Shiv Shakti');"
- result = self.handler.query(query)
- assert result.type is not RESPONSE_TYPE.ERROR
-
- def test_4_native_query_select(self):
- query = "SELECT * FROM LOVER;"
- result = self.handler.query(query)
- assert result.type is RESPONSE_TYPE.TABLE
-
- def test_5_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is RESPONSE_TYPE.TABLE
-
- def test_6_get_columns(self):
- columns = self.handler.get_columns('LOVER')
-
- query = "DROP Table IF EXISTS Lover;"
- self.handler.query(query)
- assert columns.type is not RESPONSE_TYPE.ERROR
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/vitess_handler/vitess_handler.py b/mindsdb/integrations/handlers/vitess_handler/vitess_handler.py
deleted file mode 100644
index b541648970d..00000000000
--- a/mindsdb/integrations/handlers/vitess_handler/vitess_handler.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-from collections import OrderedDict
-
-
-from mindsdb.integrations.handlers.mysql_handler import Handler as MysqlHandler
-
-
-class VitessHandler(MysqlHandler):
- """
- This handler handles connection and execution of the Vitess statements.
- """
- name = 'vitess'
-
- def __init__(self, name, **kwargs):
- super().__init__(name, **kwargs)
-
-
-connection_args = OrderedDict(
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the Vitess server.'
- },
- password={
- 'type': ARG_TYPE.STR,
- 'description': 'The password to authenticate the user with the Vitess server.'
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'The database name to use when connecting with the Vitess server.'
- },
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The host name or IP address of the Vitess server.'
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'The TCP/IP port of the Vitess server. Must be an integer.'
- }
-)
-
-connection_args_example = OrderedDict(
- host='127.0.0.1',
- port=5432,
- user='root',
- password='??>',
- database='database'
-)
diff --git a/mindsdb/integrations/handlers/weaviate_handler/README.md b/mindsdb/integrations/handlers/weaviate_handler/README.md
deleted file mode 100644
index 05c095fde10..00000000000
--- a/mindsdb/integrations/handlers/weaviate_handler/README.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Weaviate Handler
-
-This is the implementation of the Weaviate for MindsDB.
-
-## Weaviate
-
-Weaviate is an open-source vector database. It allows you to store data objects and vector embeddings from your favorite ML-models, and scale seamlessly into billions of data objects.
-_
-## Implementation
-
-This handler uses `weaviate-client` python library connect to a weaviate instance.
-
-The required arguments to establish a connection are:
-
-* `weaviate_url`: url of the weaviate database
-* `weaviate_api_key`: API key to authenticate with weaviate (in case of cloud instance).
-* `persistence_directory`: directory to be used in case of local storage
-
-
-### Creating connection
-
-In order to make use of this handler and connect to a Weaviate server in MindsDB, the following syntax can be used:
-
-```sql
-CREATE DATABASE weaviate_datasource
- WITH ENGINE = "weaviate",
- PARAMETERS = {
- "weaviate_url" : "https://sample.weaviate.network",
- "weaviate_api_key": "api-key"
-};
-```
-
-```sql
-CREATE DATABASE weaviate_datasource
- WITH ENGINE = "weaviate",
- PARAMETERS = {
- "weaviate_url" : "https://localhost:8080",
-};
-```
-
-```sql
-CREATE DATABASE weaviate_datasource
- WITH ENGINE = "weaviate",
- PARAMETERS = {
- "persistence_directory" : "db_path",
-};
-```
-
-### Dropping connection
-
-To drop the connection, use this command
-
-```sql
-DROP DATABASE weaviate_datasource;
-```
-
-### Creating tables
-
-To insert data from a pre-existing table, use `CREATE`
-
-```sql
-CREATE TABLE weaviate_datascource.test
-(SELECT * FROM sqlitedb.test);
-```
-As weaviate currently doesn't support json field.
-So, this creates another table for the "metadata" field and a reference is created in the original table which points to
-its metadata entry.
-
-Weaviate follows GraphQL conventions where classes (which are table schemas) start with a capital letter and
-properties start with a lowercase letter.
-
-So whenever we create a table, the table's name gets capitalized.
-
-### Dropping collections
-
-To drop a Weaviate table use this command
-
-```sql
-DROP TABLE weaviate_datasource.tablename;
-```
-
-### Querying and selecting
-
-To query database using a search vector, you can use `search_vector` or `embeddings` in `WHERE` clause
-
-```sql
-SELECT * from weaviate_datasource.test
-WHERE search_vector = '[3.0, 1.0, 2.0, 4.5]'
-LIMIT 10;
-```
-
-Basic query
-
-```sql
-SELECT * from weaviate_datasource.test
-```
-
-You can use `WHERE` clause on dynamic fields like normal SQL
-
-```sql
-SELECT * FROM weaviate_datasource.createtest
-WHERE category = "science";
-```
-
-### Deleting records
-
-You can delete entries using `DELETE` just like in SQL.
-
-
-```sql
-DELETE FROM weaviate_datasource.test
-WHERE id IN (1, 2, 3);
-```
-
-Update is not supported by mindsdb vector database
diff --git a/mindsdb/integrations/handlers/weaviate_handler/__about__.py b/mindsdb/integrations/handlers/weaviate_handler/__about__.py
deleted file mode 100644
index 300463b217d..00000000000
--- a/mindsdb/integrations/handlers/weaviate_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Weaviate handler"
-__package_name__ = "mindsdb_weaviate_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for weaviate"
-__author__ = "Abhijit Pal"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/weaviate_handler/__init__.py b/mindsdb/integrations/handlers/weaviate_handler/__init__.py
deleted file mode 100644
index 9d0c4e9cd66..00000000000
--- a/mindsdb/integrations/handlers/weaviate_handler/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __description__ as description
-from .__about__ import __version__ as version
-from .connection_args import connection_args, connection_args_example
-try:
- from .weaviate_handler import WeaviateDBHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Weaviate"
-name = "weaviate"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "connection_args",
- "connection_args_example",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/weaviate_handler/connection_args.py b/mindsdb/integrations/handlers/weaviate_handler/connection_args.py
deleted file mode 100644
index 0ea92e43e8c..00000000000
--- a/mindsdb/integrations/handlers/weaviate_handler/connection_args.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- weaviate_url={
- "type": ARG_TYPE.STR,
- "description": "weaviate url/ local endpoint",
- "required": False,
- },
- weaviate_api_key={
- "type": ARG_TYPE.STR,
- "description": "weaviate API KEY",
- "required": False,
- "secret": True
- },
- persistence_directory={
- "type": ARG_TYPE.STR,
- "description": "persistence directory for weaviate",
- "required": False,
- },
-)
-
-connection_args_example = OrderedDict(
- weaviate_url="http://localhost:8080",
- weaviate_api_key="",
- persistence_directory="db_path",
-)
diff --git a/mindsdb/integrations/handlers/weaviate_handler/icon.svg b/mindsdb/integrations/handlers/weaviate_handler/icon.svg
deleted file mode 100644
index b860497982f..00000000000
--- a/mindsdb/integrations/handlers/weaviate_handler/icon.svg
+++ /dev/null
@@ -1,197 +0,0 @@
-
diff --git a/mindsdb/integrations/handlers/weaviate_handler/requirements.txt b/mindsdb/integrations/handlers/weaviate_handler/requirements.txt
deleted file mode 100644
index 86458abd2e3..00000000000
--- a/mindsdb/integrations/handlers/weaviate_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-weaviate-client~=3.24.2
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/weaviate_handler/weaviate_handler.py b/mindsdb/integrations/handlers/weaviate_handler/weaviate_handler.py
deleted file mode 100644
index 4afa749e0ba..00000000000
--- a/mindsdb/integrations/handlers/weaviate_handler/weaviate_handler.py
+++ /dev/null
@@ -1,652 +0,0 @@
-import ast
-from datetime import datetime
-from typing import List, Optional
-
-import weaviate
-from weaviate.embedded import EmbeddedOptions
-import pandas as pd
-
-from mindsdb.integrations.libs.response import RESPONSE_TYPE
-from mindsdb.integrations.libs.response import HandlerResponse
-from mindsdb.integrations.libs.response import HandlerResponse as Response
-from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse
-from mindsdb.integrations.libs.vectordatabase_handler import (
- FilterCondition,
- FilterOperator,
- TableField,
- VectorStoreHandler,
-)
-from mindsdb.utilities import log
-from weaviate.util import generate_uuid5
-
-logger = log.getLogger(__name__)
-
-
-class WeaviateDBHandler(VectorStoreHandler):
- """This handler handles connection and execution of the Weaviate statements."""
-
- name = "weaviate"
-
- def __init__(self, name: str, **kwargs):
- super().__init__(name)
-
- self._connection_data = kwargs.get("connection_data")
-
- self._client_config = {
- "weaviate_url": self._connection_data.get("weaviate_url"),
- "weaviate_api_key": self._connection_data.get("weaviate_api_key"),
- "persistence_directory": self._connection_data.get("persistence_directory"),
- }
-
- if not (
- self._client_config.get("weaviate_url")
- or self._client_config.get("persistence_directory")
- ):
- raise Exception(
- "Either url or persist_directory is required for weaviate connection!"
- )
-
- self._client = None
- self._embedded_options = None
- self.is_connected = False
- self.connect()
-
- def _get_client(self) -> weaviate.Client:
- if not (
- self._client_config
- and (
- self._client_config.get("weaviate_url")
- or self._client_config.get("persistence_directory")
- )
- ):
- raise Exception("Client config is not set! or missing parameters")
-
- # decide the client type to be used, either persistent or httpclient
- if self._client_config.get("persistence_directory"):
- self._embedded_options = EmbeddedOptions(
- persistence_data_path=self._client_config.get("persistence_directory")
- )
- return weaviate.Client(embedded_options=self._embedded_options)
- if self._client_config.get("weaviate_api_key"):
- return weaviate.Client(
- url=self._client_config["weaviate_url"],
- auth_client_secret=weaviate.AuthApiKey(
- api_key=self._client_config["weaviate_api_key"]
- ),
- )
- return weaviate.Client(url=self._client_config["weaviate_url"])
-
- def __del__(self):
- self.is_connected = False
- if self._embedded_options:
- self._client._connection.embedded_db.stop()
- del self._embedded_options
- self._embedded_options = None
- self._client._connection.close()
- if self._client:
- del self._client
-
- def connect(self):
- """Connect to a weaviate database."""
- if self.is_connected:
- return self._client
-
- try:
- self._client = self._get_client()
- self.is_connected = True
- return self._client
- except Exception as e:
- logger.error(f"Error connecting to weaviate client, {e}!")
- self.is_connected = False
-
- def disconnect(self):
- """Close the database connection."""
-
- if not self.is_connected:
- return
- if self._embedded_options:
- self._client._connection.embedded_db.stop()
- del self._embedded_options
- del self._client
- self._embedded_options = None
- self._client = None
- self.is_connected = False
-
- def check_connection(self):
- """Check the connection to the Weaviate database."""
- response_code = StatusResponse(False)
-
- try:
- if self._client.is_live():
- response_code.success = True
- except Exception as e:
- logger.error(f"Error connecting to weaviate , {e}!")
- response_code.error_message = str(e)
- finally:
- if response_code.success and not self.is_connected:
- self.disconnect()
- if not response_code.success and self.is_connected:
- self.is_connected = False
-
- return response_code
-
- @staticmethod
- def _get_weaviate_operator(operator: FilterOperator) -> str:
- mapping = {
- FilterOperator.EQUAL: "Equal",
- FilterOperator.NOT_EQUAL: "NotEqual",
- FilterOperator.LESS_THAN: "LessThan",
- FilterOperator.LESS_THAN_OR_EQUAL: "LessThanEqual",
- FilterOperator.GREATER_THAN: "GreaterThan",
- FilterOperator.GREATER_THAN_OR_EQUAL: "GreaterThanEqual",
- FilterOperator.IS_NULL: "IsNull",
- FilterOperator.LIKE: "Like",
- }
-
- if operator not in mapping:
- raise Exception(f"Operator {operator} is not supported by weaviate!")
-
- return mapping[operator]
-
- @staticmethod
- def _get_weaviate_value_type(value) -> str:
- # https://github.com/weaviate/weaviate-python-client/blob/c760b1d59b2a222e770d53cc257b1bf993a0a592/weaviate/gql/filter.py#L18
- if isinstance(value, list):
- value_list_types = {
- str: "valueTextList",
- int: "valueIntList",
- float: "valueIntList",
- bool: "valueBooleanList",
- }
- if not value:
- raise Exception("Empty list is not supported")
- value_type = value_list_types.get(type(value[0]))
-
- else:
- value_primitive_types = {
- str: "valueText",
- int: "valueInt",
- float: "valueInt",
- datetime: "valueDate",
- bool: "valueBoolean",
- }
- value_type = value_primitive_types.get(type(value))
-
- if not value_type:
- raise Exception(f"Value type {type(value)} is not supported by weaviate!")
-
- return value_type
-
- def _translate_condition(
- self,
- table_name: str,
- conditions: List[FilterCondition] = None,
- meta_conditions: List[FilterCondition] = None,
- ) -> Optional[dict]:
- """
- Translate a list of FilterCondition objects a dict that can be used by Weaviate.
- E.g.,
- [
- FilterCondition(
- column="metadata.created_at",
- op=FilterOperator.LESS_THAN,
- value="2020-01-01",
- ),
- FilterCondition(
- column="metadata.created_at",
- op=FilterOperator.GREATER_THAN,
- value="2019-01-01",
- )
- ]
- -->
- {"operator": "And",
- "operands": [
- {
- "path": ["created_at"],
- "operator": "LessThan",
- "valueText": "2020-01-01",
- },
- {
- "path": ["created_at"],
- "operator": "GreaterThan",
- "valueInt": "2019-01-01",
- },
- ]}
- """
- table_name = table_name.capitalize()
- metadata_table_name = table_name.capitalize() + "_metadata"
- #
- if not (conditions or meta_conditions):
- return None
-
- # we translate each condition into a single dict
- # conditions on columns
- weaviate_conditions = []
- if conditions:
- for condition in conditions:
- column_key = condition.column
- value_type = self._get_weaviate_value_type(condition.value)
- weaviate_conditions.append(
- {
- "path": [column_key],
- "operator": self._get_weaviate_operator(condition.op),
- value_type: condition.value,
- }
- )
- # condition on metadata columns
- if meta_conditions:
- for condition in meta_conditions:
- meta_key = condition.column.split(".")[-1]
- value_type = self._get_weaviate_value_type(condition.value)
- weaviate_conditions.append(
- {
- "path": [
- "associatedMetadata",
- metadata_table_name,
- meta_key,
- ],
- "operator": self._get_weaviate_operator(condition.op),
- value_type: condition.value,
- }
- )
-
- # we combine all conditions into a single dict
- all_conditions = (
- {"operator": "And", "operands": weaviate_conditions}
- # combining all conditions if there are more than one conditions
- if len(weaviate_conditions) > 1
- # only a single condition
- else weaviate_conditions[0]
- )
- return all_conditions
-
- def select(
- self,
- table_name: str,
- columns: List[str] = None,
- conditions: List[FilterCondition] = None,
- offset: int = None,
- limit: int = None,
- ):
- table_name = table_name.capitalize()
- # columns which we will always provide in the result
- filters = None
- if conditions:
- non_metadata_conditions = [
- condition
- for condition in conditions
- if not condition.column.startswith(TableField.METADATA.value)
- and condition.column != TableField.SEARCH_VECTOR.value
- and condition.column != TableField.EMBEDDINGS.value
- ]
- metadata_conditions = [
- condition
- for condition in conditions
- if condition.column.startswith(TableField.METADATA.value)
- ]
- filters = self._translate_condition(
- table_name,
- non_metadata_conditions if non_metadata_conditions else None,
- metadata_conditions if metadata_conditions else None,
- )
-
- # check if embedding vector filter is present
- vector_filter = (
- None
- if not conditions
- else [
- condition
- for condition in conditions
- if condition.column == TableField.SEARCH_VECTOR.value
- or condition.column == TableField.EMBEDDINGS.value
- ]
- )
-
- for col in ["id", "embeddings", "distance", "metadata"]:
- if col in columns:
- columns.remove(col)
-
- metadata_table = table_name.capitalize() + "_metadata"
-
- metadata_fields = " ".join(
- [
- prop["name"]
- for prop in self._client.schema.get(metadata_table)["properties"]
- ]
- )
-
- # query to get all metadata fields
- metadata_query = (
- f"associatedMetadata {{ ... on {metadata_table} {{ {metadata_fields} }} }}"
- )
-
- if columns:
- query = self._client.query.get(
- table_name,
- columns + [metadata_query],
- ).with_additional(["id vector distance"])
- else:
- query = self._client.query.get(
- table_name,
- [metadata_query],
- ).with_additional(["id vector distance"])
- if vector_filter:
- # similarity search
- # assuming the similarity search is on content
- # assuming there would be only one vector based search per query
- vector_filter = vector_filter[0]
- near_vector = {
- "vector": ast.literal_eval(vector_filter.value)
- if isinstance(vector_filter.value, str)
- else vector_filter.value
- }
- query = query.with_near_vector(near_vector)
- if filters:
- query = query.with_where(filters)
- if limit:
- query = query.with_limit(limit)
- result = query.do()
- result = result["data"]["Get"][table_name.capitalize()]
- ids = [query_obj["_additional"]["id"] for query_obj in result]
- contents = [query_obj.get("content") for query_obj in result]
- distances = [
- query_obj.get("_additional").get("distance") for query_obj in result
- ]
- # distances will be null for non vector/embedding query
- vectors = [query_obj.get("_additional").get("vector") for query_obj in result]
- metadatas = [query_obj.get("associatedMetadata")[0] for query_obj in result]
-
- payload = {
- TableField.ID.value: ids,
- TableField.CONTENT.value: contents,
- TableField.METADATA.value: metadatas,
- TableField.EMBEDDINGS.value: vectors,
- TableField.DISTANCE.value: distances,
- }
-
- if columns:
- payload = {
- column: payload[column]
- for column in columns + ["id", "embeddings", "distance", "metadata"]
- if column != TableField.EMBEDDINGS.value
- }
-
- # always include distance
- if distances:
- payload[TableField.DISTANCE.value] = distances
- result_df = pd.DataFrame(payload)
- return result_df
-
- def insert(
- self, table_name: str, data: pd.DataFrame, columns: List[str] = None
- ):
- """
- Insert data into the Weaviate database.
- """
-
- table_name = table_name.capitalize()
-
- # drop columns with all None values
-
- data.dropna(axis=1, inplace=True)
-
- data = data.to_dict(orient="records")
- # parsing the records one by one as we need to update metadata (which has variable columns)
- for record in data:
- metadata_data = record.get(TableField.METADATA.value)
- data_object = {"content": record.get(TableField.CONTENT.value)}
- data_obj_id = (
- record[TableField.ID.value]
- if TableField.ID.value in record.keys()
- else generate_uuid5(data_object)
- )
- obj_id = self._client.data_object.create(
- data_object=data_object,
- class_name=table_name,
- vector=record[TableField.EMBEDDINGS.value],
- uuid=data_obj_id,
- )
- if metadata_data:
- meta_id = self.add_metadata(metadata_data, table_name)
- self._client.data_object.reference.add(
- from_uuid=obj_id,
- from_property_name="associatedMetadata",
- to_uuid=meta_id,
- )
-
- def update(
- self, table_name: str, data: pd.DataFrame, columns: List[str] = None
- ):
- """
- Update data in the weaviate database.
- """
- table_name = table_name.capitalize()
- metadata_table_name = table_name.capitalize() + "_metadata"
- data_list = data.to_dict("records")
- for row in data_list:
- non_metadata_keys = [
- key
- for key in row.keys()
- if key and not key.startswith(TableField.METADATA.value)
- ]
- metadata_keys = [
- key.split(".")[1]
- for key in row.keys()
- if key and key.startswith(TableField.METADATA.value)
- ]
-
- id_filter = {"path": ["id"], "operator": "Equal", "valueText": row["id"]}
- metadata_id_query = f"associatedMetadata {{ ... on {metadata_table_name} {{ _additional {{ id }} }} }}"
- result = (
- self._client.query.get(table_name, metadata_id_query)
- .with_additional(["id"])
- .with_where(id_filter)
- .do()
- )
-
- metadata_id = result["data"]["Get"][table_name][0]["associatedMetadata"][0][
- "_additional"
- ]["id"][0]
- # updating table
- self._client.data_object.update(
- uuid=row["id"],
- class_name=table_name,
- data_object={key: row[key] for key in non_metadata_keys},
- )
- # updating metadata
- self._client.data_object.update(
- uuid=metadata_id,
- class_name=metadata_table_name,
- data_object={key: row[key] for key in metadata_keys},
- )
-
- def delete(
- self, table_name: str, conditions: List[FilterCondition] = None
- ):
- table_name = table_name.capitalize()
- non_metadata_conditions = [
- condition
- for condition in conditions
- if not condition.column.startswith(TableField.METADATA.value)
- and condition.column != TableField.SEARCH_VECTOR.value
- and condition.column != TableField.EMBEDDINGS.value
- ]
- metadata_conditions = [
- condition
- for condition in conditions
- if condition.column.startswith(TableField.METADATA.value)
- ]
- filters = self._translate_condition(
- table_name,
- non_metadata_conditions if non_metadata_conditions else None,
- metadata_conditions if metadata_conditions else None,
- )
- if not filters:
- raise Exception("Delete query must have at least one condition!")
- metadata_table_name = table_name.capitalize() + "_metadata"
- # query to get metadata ids
- metadata_query = f"associatedMetadata {{ ... on {metadata_table_name} {{ _additional {{ id }} }} }}"
- result = (
- self._client.query.get(table_name, metadata_query)
- .with_additional(["id"])
- .with_where(filters)
- .do()
- )
- result = result["data"]["Get"][table_name]
- metadata_table_name = table_name.capitalize() + "_metadata"
- table_ids = []
- metadata_ids = []
- for i in result:
- table_ids.append(i["_additional"]["id"])
- metadata_ids.append(i["associatedMetadata"][0]["_additional"]["id"])
- self._client.batch.delete_objects(
- class_name=table_name,
- where={
- "path": ["id"],
- "operator": "ContainsAny",
- "valueTextArray": table_ids,
- },
- )
- self._client.batch.delete_objects(
- class_name=metadata_table_name,
- where={
- "path": ["id"],
- "operator": "ContainsAny",
- "valueTextArray": metadata_ids,
- },
- )
-
- def create_table(self, table_name: str, if_not_exists=True):
- """
- Create a class with the given name in the weaviate database.
- """
- # separate metadata table for each table (as different tables will have different metadata columns)
- # this reduces the query time using metadata but increases the insertion time
- metadata_table_name = table_name + "_metadata"
- if not self._client.schema.exists(metadata_table_name):
- self._client.schema.create_class({"class": metadata_table_name})
- if not self._client.schema.exists(table_name):
- self._client.schema.create_class(
- {
- "class": table_name,
- "properties": [
- {"dataType": ["text"], "name": prop["name"]}
- for prop in self.SCHEMA
- if prop["name"] != "id"
- and prop["name"] != "embeddings"
- and prop["name"] != "metadata"
- ],
- "vectorIndexType": "hnsw",
- }
- )
- add_prop = {
- "name": "associatedMetadata",
- "dataType": [metadata_table_name.capitalize()],
- }
- self._client.schema.property.create(table_name.capitalize(), add_prop)
-
- def drop_table(self, table_name: str, if_exists=True):
- """
- Delete a class from the weaviate database.
- """
- table_name = table_name.capitalize()
- metadata_table_name = table_name.capitalize() + "_metadata"
- table_id_query = self._client.query.get(table_name).with_additional(["id"]).do()
- table_ids = [
- i["_additional"]["id"] for i in table_id_query["data"]["Get"][table_name]
- ]
- metadata_table_id_query = (
- self._client.query.get(metadata_table_name).with_additional(["id"]).do()
- )
- metadata_ids = [
- i["_additional"]["id"]
- for i in metadata_table_id_query["data"]["Get"][metadata_table_name]
- ]
- self._client.batch.delete_objects(
- class_name=table_name,
- where={
- "path": ["id"],
- "operator": "ContainsAny",
- "valueTextArray": table_ids,
- },
- )
- self._client.batch.delete_objects(
- class_name=metadata_table_name,
- where={
- "path": ["id"],
- "operator": "ContainsAny",
- "valueTextArray": metadata_ids,
- },
- )
- try:
- self._client.schema.delete_class(table_name)
- self._client.schema.delete_class(metadata_table_name)
- except ValueError:
- if not if_exists:
- raise Exception(f"Table {table_name} does not exist!")
-
- def get_tables(self) -> HandlerResponse:
- """
- Get the list of tables in the Weaviate database.
- """
- query_tables = self._client.schema.get()
- tables = []
- if query_tables:
- tables = [table["class"] for table in query_tables["classes"]]
- table_name = pd.DataFrame(
- columns=["table_name"],
- data=tables,
- )
- return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=table_name)
-
- def get_columns(self, table_name: str) -> HandlerResponse:
- table_name = table_name.capitalize()
- # check if table exists
- try:
- table = self._client.schema.get(table_name)
- except ValueError:
- return Response(
- resp_type=RESPONSE_TYPE.ERROR,
- error_message=f"Table {table_name} does not exist!",
- )
- data = pd.DataFrame(
- data=[
- {"COLUMN_NAME": column["name"], "DATA_TYPE": column["dataType"][0]}
- for column in table["properties"]
- ]
- )
- return Response(data_frame=data, resp_type=RESPONSE_TYPE.OK)
-
- def add_metadata(self, data: dict, table_name: str):
- table_name = table_name.capitalize()
- metadata_table_name = table_name.capitalize() + "_metadata"
- self._client.schema.get(metadata_table_name)
- # getting existing metadata fields
- added_prop_list = [
- prop["name"]
- for prop in self._client.schema.get(metadata_table_name)["properties"]
- ]
- # as metadata columns are not fixed, at every entry, a check takes place for the columns
- for prop in data.keys():
- if prop not in added_prop_list:
- if isinstance(data[prop], int):
- add_prop = {
- "name": prop,
- "dataType": ["int"],
- }
- elif isinstance(data[prop][0], datetime):
- add_prop = {
- "name": prop,
- "dataType": ["date"],
- }
- else:
- add_prop = {
- "name": prop,
- "dataType": ["string"],
- }
- # when a new column is identified, it is added to the metadata table
- self._client.schema.property.create(metadata_table_name, add_prop)
- metadata_id = self._client.data_object.create(
- data_object=data, class_name=table_name.capitalize() + "_metadata"
- )
- return metadata_id
diff --git a/mindsdb/integrations/handlers/webz_handler/README.md b/mindsdb/integrations/handlers/webz_handler/README.md
deleted file mode 100644
index e38a8a19fca..00000000000
--- a/mindsdb/integrations/handlers/webz_handler/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# Webz Handler
-
-This handler integrates with the [Webz API](https://docs.webz.io/reference#1) to make
-webz data available to use for model training, predictions and automations.
-
-
-
-## Connect to the Webz API
-The first step is to create a database with the new `webz` engine
-by passing in the required `token` parameter:
-
-```
-CREATE DATABASE webz_datasource
-WITH
- ENGINE = 'webz',
- PARAMETERS = {
- "token": ""
-};
-```
-
-## Querying news articles, blogs entries or open discussions
-
-With the previous established connection, you can for instance,
-query the 5 most relevant news articles, in english that contain
-the text AI in the title
-
-```
-SELECT *
-FROM webz_datasource.posts
-WHERE query="language:english title:AI site_type:news"
-ORDER BY posts.relevancy DESC
-LIMIT 5;
-```
-
-The returned results should have rows like this:
-
-| thread__uuid | thread__url | thread__site_full | thread__site | thread__site_section | thread__section_title | thread__title | thread__title_full | thread__published | thread__replies_count | thread__participants_count | thread__site_type | thread__main_image | thread__country | thread__site_categories | thread__social__facebook__likes | thread__social__facebook__shares | thread__social__facebook__comments | thread__social__gplus__shares | thread__social__pinterest__shares | thread__social__linkedin__shares | thread__social__stumbledupon__shares | thread__social__vk__shares | thread__performance_score | thread__domain_rank | thread__domain_rank_updated | thread__reach__per_million | thread__reach__page_views | thread__reach__updated | uuid | url | ord_in_thread | parent_url | author | published | title | text | language | external_links | external_images | rating | entities__persons | entities__organizations | entities__locations | crawled |
-| ------------ | ----------- | ----------------- | ------------ | -------------------- | --------------------- | ------------- | ------------------ | ----------------- | --------------------- | -------------------------- | ----------------- | ------------------ | --------------- | ----------------------- | ------------------------------- | -------------------------------- | ---------------------------------- | ----------------------------- | --------------------------------- | -------------------------------- | ------------------------------------ | -------------------------- | ------------------------- | ------------------- | --------------------------- | -------------------------- | ------------------------- | ---------------------- | ---- | --- | ------------- | ---------- | ------ | --------- | ----- | ---- | -------- | -------------- | --------------- | ------ | ----------------- | ----------------------- | ------------------- | ------- |
-| e893796adad8a85e6ab5202ac34b5791c8fbb017 | https://www.economist.com/business/2023/06/06/generative-ai-could-radically-alter-the-practice-of-law | www.economist.com | economist.com | http://feeds.feedburner.com/twitter.com/indiefulrok | BizToc | Generative AI could radically alter the practice of law | Generative AI could radically alter the practice of law | 2023-07-15T09:01:00.000+03:00 | 0 | 0 | news | https://c.biztoc.com/p/f96527e070f97968/s.webp | US | ["media","law_government_and_politics","politics"] | 2169 | 501 | 843 | 0 | 2 | 0 | 0 | 1 | 5 | 253 | 2023-07-11T13:16:20.000+03:00 | [NULL] | [NULL] | [NULL] | e893796adad8a85e6ab5202ac34b5791c8fbb017 | https://www.economist.com/business/2023/06/06/generative-ai-could-radically-alter-the-practice-of-law | 0 | [NULL] | [NULL] | 2023-07-15T09:01:00.000+03:00 | Generative AI could radically alter the practice of law | Generative AI could radically alter the practice of law economist.com/business/2023/06/06/generative-ai-could-radically-alter-the-practice-of-law L a conservative bunch, befitting a profession that rewards preparedness, sagacity and respect for precedent. No doubt many enjoyed a chuckle at the tale of Steven Schwartz, a personal-injury lawyer at the New York firm Levidow, Levidow & Oberman, who last month used Chat to help him prepare a court filing. He relied a bit too heavily on the artificial-intelligence ( )β¦ This story appeared on | english | [] | [] | [NULL] | [{"name":"steven schwartz","sentiment":"none"}] | [{"name":"levidow, levidow & oberman","sentiment":"none"}] | [{"name":"new york","sentiment":"none"}] | 2023-07-15T09:52:32.226+03:00 |
-
-## Queries reviews
-
-You can also query the last 10 reviews crawled, in English,
-with rating equal or higher than 4
-
-```
-SELECT *
-FROM webz_datasource.reviews
-WHERE query="language:english rating:>=4"
-ORDER BY reviews.crawled ASC
-LIMIT 10;
-```
-
-The returned results should have rows like this:
-
-| item__uuid | item__url | item__site_full | item__site | item__site_section | item__section_title | item__title | item__title_full | item__published | item__reviews_count | item__reviewers_count | item__main_image | item__country | item__site_categories | item__domain_rank | item__domain_rank_updated | uuid | url | ord_in_thread | author | published | title | text | language | external_links | rating | crawled |
-| ---------- | --------- | --------------- | ---------- | ------------------ | ------------------- | ----------- | ---------------- | --------------- | ------------------- | --------------------- | ---------------- | ------------- | --------------------- | ----------------- | ------------------------- | ---- | --- | ------------- | ------ | --------- | ----- | ---- | -------- | -------------- | ------ | ------- |
diff --git a/mindsdb/integrations/handlers/webz_handler/__about__.py b/mindsdb/integrations/handlers/webz_handler/__about__.py
deleted file mode 100644
index 306b450f271..00000000000
--- a/mindsdb/integrations/handlers/webz_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Webz handler"
-__package_name__ = "mindsdb_webz_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for the "
-__author__ = "MindsDB Inc"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/webz_handler/__init__.py b/mindsdb/integrations/handlers/webz_handler/__init__.py
deleted file mode 100644
index 18696d0424e..00000000000
--- a/mindsdb/integrations/handlers/webz_handler/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __description__ as description
-from .__about__ import __version__ as version
-
-try:
- from .webz_handler import WebzHandler as Handler
-
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Webz"
-name = "webz"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/webz_handler/icon.svg b/mindsdb/integrations/handlers/webz_handler/icon.svg
deleted file mode 100644
index b133fdfeff9..00000000000
--- a/mindsdb/integrations/handlers/webz_handler/icon.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/webz_handler/requirements.txt b/mindsdb/integrations/handlers/webz_handler/requirements.txt
deleted file mode 100644
index 3e2d5fc0709..00000000000
--- a/mindsdb/integrations/handlers/webz_handler/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-# include python libraries required by this handler
-webzio==1.0.2
-dotty-dict==1.3.1
diff --git a/mindsdb/integrations/handlers/webz_handler/webz_handler.py b/mindsdb/integrations/handlers/webz_handler/webz_handler.py
deleted file mode 100644
index d6a1db80d69..00000000000
--- a/mindsdb/integrations/handlers/webz_handler/webz_handler.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import os
-import time
-from typing import Any, Dict
-
-import pandas as pd
-import webzio
-from dotty_dict import dotty
-from mindsdb_sql_parser import parse_sql
-
-from mindsdb.integrations.handlers.webz_handler.webz_tables import (
- WebzPostsTable,
- WebzReviewsTable,
-)
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import HandlerResponse as Response
-from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse
-from mindsdb.utilities import log
-from mindsdb.utilities.config import Config
-
-logger = log.getLogger(__name__)
-
-
-class WebzHandler(APIHandler):
- """A class for handling connections and interactions with the Webz API."""
-
- API_CALL_EXEC_LIMIT_SECONDS = 60
- AVAILABLE_CONNECTION_ARGUMENTS = ["token"]
-
- def __init__(self, name: str = None, **kwargs):
- """Registers all tables and prepares the handler for an API connection.
-
- Args:
- name: (str): The handler name to use
- """
- super().__init__(name)
-
- args = kwargs.get("connection_data", {})
- self.connection_args = self._read_connection_args(name, **args)
-
- self.client = None
- self.is_connected = False
- self.max_page_size = 100
-
- self._register_table(WebzPostsTable.TABLE_NAME, WebzPostsTable(self))
- self._register_table(WebzReviewsTable.TABLE_NAME, WebzReviewsTable(self))
-
- def _read_connection_args(self, name: str = None, **kwargs) -> Dict[str, Any]:
- """Read the connection arguments by following the order of precedence below:
-
- 1. PARAMETERS object
- 2. Environment Variables
- 3. MindsDB Config File
-
- """
- filtered_args = {}
- handler_config = Config().get(f"{name.lower()}_handler", {})
- for k in type(self).AVAILABLE_CONNECTION_ARGUMENTS:
- if k in kwargs:
- filtered_args[k] = kwargs[k]
- elif f"{name.upper()}_{k.upper()}" in os.environ:
- filtered_args[k] = os.environ[f"{name.upper()}_{k.upper()}"]
- elif k in handler_config:
- filtered_args[k] = handler_config[k]
- return filtered_args
-
- def connect(self) -> object:
- """Set up any connections required by the handler
- Should return output of check_connection() method after attempting
- connection. Should switch self.is_connected.
- Returns:
- HandlerStatusResponse
- """
- if self.is_connected and self.client is not None:
- return self.client
-
- webzio.config(token=self.connection_args["token"])
- self.client = webzio
- self.is_connected = True
- return self.client
-
- def check_connection(self) -> StatusResponse:
- """Check connection to the handler
- Returns:
- HandlerStatusResponse
- """
- response = StatusResponse(False)
- try:
- webzio_client = self.connect()
- webzio_client.query("filterWebContent", {"q": "AI", "size": 1})
- response.success = True
- except Exception as e:
- response.error_message = f"Error connecting to Webz api: {e}."
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def native_query(self, query: str = None) -> Response:
- """Receive raw query and act upon it somehow.
- Args:
- query (Any): query in native format (str for sql databases,
- dict for mongo, api's json etc)
- Returns:
- HandlerResponse
- """
- ast = parse_sql(query)
- return self.query(ast)
-
- def _parse_item(self, item, output_colums):
- dotted_item = dotty(item)
- return {field.replace(".", "__"): dotted_item[field] for field in output_colums}
-
- def call_webz_api(
- self, method_name: str = None, params: Dict = None
- ) -> pd.DataFrame:
- """Calls the API method with the given params.
-
- Returns results as a pandas DataFrame.
-
- Args:
- method_name (str): Method name to call
- params (Dict): Params to pass to the API call
- """
- table_name = method_name
- table = self._tables[table_name]
-
- client = self.connect()
-
- left = None
- count_results = None
-
- data = []
- limit_exec_time = time.time() + type(self).API_CALL_EXEC_LIMIT_SECONDS
-
- if "size" in params:
- count_results = params["size"]
-
- # GET param q is mandatory, so in order to collect all data,
- # it's needed to use as a query an asterisk (*)
- if "q" not in params:
- params["q"] = "*"
-
- while True:
- if time.time() > limit_exec_time:
- raise RuntimeError("Handler request timeout error")
-
- if count_results is not None:
- left = count_results - len(data)
- if left == 0:
- break
- elif left < 0:
- # got more results that we need
- data = data[:left]
- break
-
- if left > self.max_page_size:
- params["size"] = self.max_page_size
- else:
- params["size"] = left
-
- logger.debug(
- f"Calling Webz API: {table.ENDPOINT} with params ({params})"
- )
-
- output = (
- client.query(table.ENDPOINT, params)
- if len(data) == 0
- else client.get_next()
- )
- for item in output.get(table_name, []):
- data.append(self._parse_item(item, table.OUTPUT_COLUMNS))
-
- df = pd.DataFrame(data)
- return df
diff --git a/mindsdb/integrations/handlers/webz_handler/webz_tables.py b/mindsdb/integrations/handlers/webz_handler/webz_tables.py
deleted file mode 100644
index 94cf76bbe1b..00000000000
--- a/mindsdb/integrations/handlers/webz_handler/webz_tables.py
+++ /dev/null
@@ -1,218 +0,0 @@
-from typing import List
-
-import pandas as pd
-from mindsdb_sql_parser import ast
-
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
-
-
-class WebzBaseAPITable(APITable):
-
- ENDPOINT = None
- OUTPUT_COLUMNS = []
- SORTABLE_COLUMNS = []
- TABLE_NAME = None
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Selects data from the API and returns it as a pandas DataFrame
-
- Returns dataframe representing the API results.
-
- Args:
- query (ast.Select): SQL SELECT query
-
- """
- conditions = extract_comparison_conditions(query.where)
- params = {}
-
- for op, arg1, arg2 in conditions:
- if op != "=":
- raise NotImplementedError(f"Unsupported Operator: {op}")
- elif arg1 == "query":
- params["q"] = arg2
- else:
- raise NotImplementedError(f"Unknown clause: {arg1}")
-
- if query.order_by:
- if len(query.order_by) > 1:
- raise ValueError("Unsupported to order by multiple fields")
- order_item = query.order_by[0]
- sort_column = ".".join(order_item.field.parts[1:])
- # make sure that column is sortable
- if sort_column not in type(self).SORTABLE_COLUMNS:
- raise ValueError(f"Order by unknown column {sort_column}")
- params.update({"sort": sort_column, "order": order_item.direction.lower()})
-
- if query.limit is not None:
- params["size"] = query.limit.value
- result = self.handler.call_webz_api(
- method_name=type(self).TABLE_NAME, params=params
- )
-
- # filter targets
- columns = []
- for target in query.targets:
- if isinstance(target, ast.Star):
- columns = self.get_columns()
- break
- elif isinstance(target, ast.Identifier):
- columns.append(target.parts[-1])
- else:
- raise NotImplementedError(f"Unknown query target {type(target)}")
-
- # columns to lower case
- columns = [name.lower() for name in columns]
-
- if len(result) == 0:
- return pd.DataFrame([], columns=columns)
-
- # add absent columns
- for col in set(columns) & set(result.columns) ^ set(columns):
- result[col] = None
-
- # filter by columns
- result = result[columns]
-
- # Rename columns
- for target in query.targets:
- if target.alias:
- result.rename(
- columns={target.parts[-1]: str(target.alias)}, inplace=True
- )
- return result
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- List of columns
-
- """
- return [column.replace(".", "__") for column in type(self).OUTPUT_COLUMNS]
-
-
-class WebzPostsTable(WebzBaseAPITable):
- """To interact with structured posts data from news articles, blog posts and online discussions
- provided through the Webz.IO API.
-
- """
-
- ENDPOINT = "filterWebContent"
- OUTPUT_COLUMNS = [
- "thread.uuid",
- "thread.url",
- "thread.site_full",
- "thread.site",
- "thread.site_section",
- "thread.section_title",
- "thread.title",
- "thread.title_full",
- "thread.published",
- "thread.replies_count",
- "thread.participants_count",
- "thread.site_type",
- "thread.main_image",
- "thread.country",
- "thread.site_categories",
- "thread.social.facebook.likes",
- "thread.social.facebook.shares",
- "thread.social.facebook.comments",
- "thread.social.gplus.shares",
- "thread.social.pinterest.shares",
- "thread.social.linkedin.shares",
- "thread.social.stumbledupon.shares",
- "thread.social.vk.shares",
- "thread.performance_score",
- "thread.domain_rank",
- "thread.domain_rank_updated",
- "thread.reach.per_million",
- "thread.reach.page_views",
- "thread.reach.updated",
- "uuid",
- "url",
- "ord_in_thread",
- "parent_url",
- "author",
- "published",
- "title",
- "text",
- "language",
- "external_links",
- "external_images",
- "rating",
- "entities.persons",
- "entities.organizations",
- "entities.locations",
- "crawled",
- ]
- SORTABLE_COLUMNS = [
- "crawled",
- "relevancy",
- "social.facebook.likes",
- "social.facebook.shares",
- "social.facebook.comments",
- "social.gplus.shares",
- "social.pinterest.shares",
- "social.linkedin.shares",
- "social.stumbledupon.shares",
- "social.vk.shares",
- "replies_count",
- "participants_count",
- "performance_score",
- "published",
- "thread.published",
- "domain_rank",
- "ord_in_thread",
- "rating",
- ]
- TABLE_NAME = "posts"
-
-
-class WebzReviewsTable(WebzBaseAPITable):
- """To interact with structured reviews data from hundreds of review sites,
- provided through the Webz.IO API.
-
- """
-
- ENDPOINT = "reviewFilter"
- OUTPUT_COLUMNS = [
- "item.uuid",
- "item.url",
- "item.site_full",
- "item.site",
- "item.site_section",
- "item.section_title",
- "item.title",
- "item.title_full",
- "item.published",
- "item.reviews_count",
- "item.reviewers_count",
- "item.main_image",
- "item.country",
- "item.site_categories",
- "item.domain_rank",
- "item.domain_rank_updated",
- "uuid",
- "url",
- "ord_in_thread",
- "author",
- "published",
- "title",
- "text",
- "language",
- "external_links",
- "rating",
- "crawled",
- ]
- SORTABLE_COLUMNS = [
- "crawled",
- "relevancy",
- "reviews_count",
- "reviewers_count",
- "spam_score",
- "domain_rank",
- "ord_in_thread",
- "rating",
- ]
- TABLE_NAME = "reviews"
diff --git a/mindsdb/integrations/handlers/whatsapp_handler/README.md b/mindsdb/integrations/handlers/whatsapp_handler/README.md
deleted file mode 100644
index c493e0187d9..00000000000
--- a/mindsdb/integrations/handlers/whatsapp_handler/README.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# WhatsApp Handler
-
-Whatsapp handler for MindsDB facilitates the ability to send messages to your Whatsapp using Twilio Whastapp APIs and retrieve the conversation history.
-
----
-
-## About Whatsapp
-
-WhatsApp is a popular messaging application that allows users to send text messages, voice messages, make voice and video calls, share media files, and more.
-
-## Whatsapp Handler Implementation
-
-This handler was implemented using [Twilio Python SDK](https://www.twilio.com/docs/libraries/python). This SDK provides a very effective way to integrate with the Twilio Whatapp API.
-
-## Whatsapp Handler Initialization
-
-The Whatsapp handler is initialized with the following parameters:
-
-- `account_sid`: Twilio Account SID
-- `auth_token`: Twilio Authentication Token
-- `to_number`: Required a phone number to send text messages to
-- `from_number`: Required a phone number to send text messages from
-
-## How to get your Twilio credentials
-
-1. Sign up for a Twilio account or log into your existing account.
-2. Navigate to the [Twilio Console Dashboard](https://www.twilio.com/console).
-3. Here you will find your `ACCOUNT SID` and `AUTH TOKEN`.
-4. To get a Twilio phone number, navigate to the "Phone Numbers" section and either use an existing number or buy a new one.
-5. Store these as environment variables: `TWILIO_ACCOUNT_SID`, `TWILIO_AUTH_TOKEN`, and `TWILIO_PHONE_NUMBER` respectively.
-
-## Trying out Whatsapp Conversations with Twilio
-
-1. Navigate to this [guide](https://www.twilio.com/docs/conversations/use-twilio-sandbox-for-whatsapp) for complete documentation or follow the following steps.
-2. Go to the [Twilio Console Dashboard](https://console.twilio.com/us1/develop/conversations/tryout/whatsapp)
-3. Select "User-Initiated Conversation"
-4. Either open whatsapp and send `join ` or `Scan the QR on the mobile phone`.
-
-All things are in place now, in order to use the Whatsapp Using MindsDB.
-
-## Implemented Features
-
-- Send a Whatsapp Message to a given number with a specified body.
-- Fetch the last `n` messages sent or received by the whatsapp number.
-
-## Example Usage
-
-```sql
--- Creating a Database
-CREATE DATABASE whatsapp_test
-WITH ENGINE = "whatsapp",
-PARAMETERS = {
- "account_sid": "YOUR_ACCOUNT_SID",
- "auth_token": "YOUR_AUTH_TOKEN"
- };
-```
-
-You can now run queries as follows:
-
-```sql
--- Get all messages
-SELECT * FROM whatsapp_test.messages LIMIT 100;
-```
-
-```sql
--- Get message with sid
-SELECT * FROM whatsapp_test.messages where sid="SM375f075778f91b56634ce5d92db249cd";
-```
-
-```sql
--- filter to and from
-SELECT * FROM whatsapp_test.messages where from_number="whatsapp:+14155238886";
-SELECT * FROM whatsapp_test.messages where to_number="whatsapp:+14155238886";
-```
-
-```sql
--- Send a whatsapp message
-INSERT INTO whatsapp_test.messages (body, from_number, to_number)
-VALUES('woww, such a cool integration', 'whatsapp:+14155238886', 'whatsapp:+14155238886');
-```
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/whatsapp_handler/__about__.py b/mindsdb/integrations/handlers/whatsapp_handler/__about__.py
deleted file mode 100644
index 833253c4b94..00000000000
--- a/mindsdb/integrations/handlers/whatsapp_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB WhatsApp Handler'
-__package_name__ = 'mindsdb_whatsapp_handler'
-__version__ = '0.0.1'
-__description__ = 'MindsDB handler for WhatsApp'
-__author__ = 'Tarun Chawla'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2023 - mindsdb'
diff --git a/mindsdb/integrations/handlers/whatsapp_handler/__init__.py b/mindsdb/integrations/handlers/whatsapp_handler/__init__.py
deleted file mode 100644
index 0c9851b4fa5..00000000000
--- a/mindsdb/integrations/handlers/whatsapp_handler/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .whatsapp_handler import (
- WhatsAppHandler as Handler
- )
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'WhatsApp'
-name = 'whatsapp'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler',
- 'version',
- 'name',
- 'type',
- 'title',
- 'description',
- 'import_error',
- 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/whatsapp_handler/icon.svg b/mindsdb/integrations/handlers/whatsapp_handler/icon.svg
deleted file mode 100644
index 4d4987772c8..00000000000
--- a/mindsdb/integrations/handlers/whatsapp_handler/icon.svg
+++ /dev/null
@@ -1,19 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/whatsapp_handler/requirements.txt b/mindsdb/integrations/handlers/whatsapp_handler/requirements.txt
deleted file mode 100644
index 1e2071a390f..00000000000
--- a/mindsdb/integrations/handlers/whatsapp_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-twilio
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/whatsapp_handler/whatsapp_handler.py b/mindsdb/integrations/handlers/whatsapp_handler/whatsapp_handler.py
deleted file mode 100644
index c6853de0cd6..00000000000
--- a/mindsdb/integrations/handlers/whatsapp_handler/whatsapp_handler.py
+++ /dev/null
@@ -1,420 +0,0 @@
-import os
-from twilio.rest import Client
-import re
-from datetime import datetime as datetime
-from typing import List
-import pandas as pd
-
-from mindsdb.utilities import log
-from mindsdb.utilities.config import Config
-
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.utilities.date_utils import parse_local_date
-
-from mindsdb.integrations.libs.api_handler import APIHandler, APITable
-
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, project_dataframe, filter_dataframe
-
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-logger = log.getLogger(__name__)
-
-
-class WhatsAppMessagesTable(APITable):
- def select(self, query: ast.Select) -> Response:
- """
- Retrieves messages sent/received from the database using Twilio Whatsapp API
- Returns
- Response: conversation_history
- """
-
- # Extract comparison conditions from the query
- conditions = extract_comparison_conditions(query.where)
- params = {}
- filters = []
-
- # Build the filters and parameters for the query
- for op, arg1, arg2 in conditions:
- if op == 'or':
- raise NotImplementedError('OR is not supported')
-
- if arg1 == 'sent_at' and arg2 is not None:
- date = parse_local_date(arg2)
- if op == '>':
- params['date_sent_after'] = date
- elif op == '<':
- params['date_sent_before'] = date
- else:
- raise NotImplementedError
-
- # also add to post query filter because date_sent_after=date1 will include date1
- filters.append([op, arg1, arg2])
-
- elif arg1 == 'sid':
- if op == '=':
- params['sid'] = arg2
- else:
- NotImplementedError('Only "from_number=" is implemented')
-
- elif arg1 == 'from_number':
- if op == '=':
- params['from_number'] = arg2
- else:
- NotImplementedError('Only "from_number=" is implemented')
-
- elif arg1 == 'to_number':
- if op == '=':
- params['to_number'] = arg2
- else:
- NotImplementedError('Only "to_number=" is implemented')
-
- else:
- filters.append([op, arg1, arg2])
-
- # Fetch messages based on the filters
- result = self.handler.fetch_messages(params, df=True)
-
- # filter targets
- result = filter_dataframe(result, filters)
-
- # If limit is specified
- if query.limit is not None:
- result = result[:int(query.limit.value)]
-
- # project targets
- result = project_dataframe(result, query.targets, self.get_columns())
-
- return result
-
- def get_columns(self):
- return [
- 'sid',
- 'from_number',
- 'to_number',
- 'body',
- 'direction',
- 'msg_status',
- 'sent_at', # datetime.strptime(str(msg.date_sent), '%Y-%m-%d %H:%M:%S%z'),
- 'account_sid',
- 'price',
- 'price_unit',
- 'api_version',
- 'uri'
- ]
-
- def insert(self, query: ast.Insert):
- """
- Sends a whatsapp message
-
- Args:
- body: message body
- from_number: number from which to send the message
- to_number: number to which message will be sent
- """
-
- # get column names and values from the query
- columns = [col.name for col in query.columns]
-
- ret = []
-
- insert_params = ["body", "from_number", "to_number"]
- for row in query.values:
- params = dict(zip(columns, row))
-
- # Check text length
- max_text_len = 1500
- text = params["body"]
- words = re.split('( )', text)
- messages = []
-
- """
- Regex for matching if any URls are present, if yes then replace with string of hyphens(-)
-
- Example:
- words = ['Check', ' ', 'out', ' ', 'this', ' ', 'cool', ' ', 'website:', ' ', 'https://example.com.', "It's", ' ', 'awesome!']
-
- After parsing through regex ('https://example.com') URL is matched
-
- Final output:
- messages = ['Check - out - this - cool - website: ----------------------- "It\'s - awesome!']
- """
-
- text2 = ''
- pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
- for word in words:
- # replace the links in word to string with the length as twitter short url (23)
- word2 = re.sub(pattern, '-' * 23, word)
- if len(text2) + len(word2) > max_text_len - 3 - 7: # 3 is for ..., 7 is for (10/11)
- messages.append(text2.strip())
-
- text2 = ''
- text2 += word
-
- # Parse last message
- if text2.strip() != '':
- messages.append(text2.strip())
-
- len_messages = len(messages)
-
- # Modify message based on the length
- for i, text in enumerate(messages):
- if i < len_messages - 1:
- text += '...'
- else:
- text += ' '
-
- if i >= 1:
- text += f'({i + 1}/{len_messages})'
-
- # Pass parameters and call 'send_message'
- params['body'] = text
- params_to_send = {key: params[key] for key in insert_params if (key in params)}
- ret_row = self.handler.send_message(params_to_send, ret_as_dict=True)
-
- # Save the results
- ret_row['body'] = text
- ret.append(ret_row)
-
- return pd.DataFrame(ret)
-
-
-class WhatsAppHandler(APIHandler):
- """
- A class for handling connections and interactions with Twilio WhatsApp API.
- Args:
- account_sid(str): Accound ID of the twilio account.
- auth_token(str): Authentication Token obtained from the twilio account.
- """
-
- def __init__(self, name=None, **kwargs):
- """
- Initializes the connection by checking all the params are provided by the user.
- """
- super().__init__(name)
-
- args = kwargs.get('connection_data', {})
- self.connection_args = {}
- handler_config = Config().get('whatsapp_handler', {})
- for k in ['account_sid', 'auth_token']:
- if k in args:
- self.connection_args[k] = args[k]
- elif f'TWILIO_{k.upper()}' in os.environ:
- self.connection_args[k] = os.environ[f'TWILIO_{k.upper()}']
- elif k in handler_config:
- self.connection_args[k] = handler_config[k]
- self.client = None
- self.is_connected = False
-
- messages = WhatsAppMessagesTable(self)
- self._register_table('messages', messages)
-
- def connect(self):
- """
- Authenticate with the Twilio API using the provided `account_SID` and `auth_token`.
- """
- if self.is_connected is True:
- return self.client
-
- self.client = Client(
- self.connection_args['account_sid'],
- self.connection_args['auth_token']
- )
-
- self.is_connected = True
- return self.client
-
- def check_connection(self) -> StatusResponse:
- """
- Checks the connection by performing a basic operation with the Twilio API.
- """
- response = StatusResponse(False)
-
- try:
- self.connect()
- response.success = True
-
- except Exception as e:
- response.error_message = f'Error connecting to Twilio API: {str(e)}. Check credentials.'
- logger.error(response.error_message)
-
- if response.success is False and self.is_connected is True:
- self.is_connected = False
-
- return response
-
- def parse_native_query(self, query_string: str):
- """Parses the native query string of format method(arg1=val1, arg2=val2, ...) and returns the method name and arguments."""
-
- # Adjust regex to account for the possibility of no arguments inside the parenthesis
- match = re.match(r'(\w+)\(([^)]*)\)', query_string)
- if not match:
- raise ValueError(f"Invalid query format: {query_string}")
-
- method_name = match.group(1)
- arg_string = match.group(2)
-
- # Extract individual arguments
- args = {}
- if arg_string: # Check if there are any arguments
- for arg in arg_string.split(','):
- arg = arg.strip()
- key, value = arg.split('=')
- args[key.strip()] = value.strip()
-
- return method_name, args
-
- def native_query(self, query_string: str = None):
- """
- Retreievs the native query from the `parse_native_query` and calls appropriate function and returns the result of the query as a Response object.
- """
- method_name, params = self.parse_native_query(query_string)
- if method_name == 'send_message':
- response = self.send_message(params)
- else:
- raise ValueError(f"Method '{method_name}' not supported by TwilioHandler")
-
- return response
-
- def fetch_messages(self, params, df=False):
- """
- Gets conversation history
-
- Returns:
- Response: conversation history
- """
- limit = int(params.get('limit', 1000))
- sid = params.get('sid', None)
- # Convert date strings to datetime objects if provided
- date_sent_after = params.get('date_sent_after', None)
- date_sent_before = params.get('date_sent_before', None)
- # Extract 'from_' and 'body' search criteria from params
- from_number = params.get('from_number', None)
- to_number = params.get('to_number', None)
- args = {
- 'limit': limit,
- 'date_sent_after': date_sent_after,
- 'date_sent_before': date_sent_before,
- 'from_': from_number,
- 'to': to_number
- }
-
- args = {arg: val for arg, val in args.items() if val is not None}
- if sid:
- messages = [self.client.messages(sid).fetch()]
- else:
- messages = self.client.messages.list(**args)
-
- # Extract all possible properties for each message
- data = []
- for msg in messages:
- msg_data = {
- 'sid': msg.sid,
- 'to_number': msg.to,
- 'from_number': msg.from_,
- 'body': msg.body,
- 'direction': msg.direction,
- 'msg_status': msg.status,
- 'sent_at': msg.date_created.replace(tzinfo=None),
- 'account_sid': msg.account_sid,
- 'price': msg.price,
- 'price_unit': msg.price_unit,
- 'api_version': msg.api_version,
- 'uri': msg.uri,
- # 'media_url': [media.uri for media in msg.media.list()]
- # ... Add other properties as needed
- }
- data.append(msg_data)
-
- # Create a DataFrame
- result_df = pd.DataFrame(data)
-
- # Filter rows where 'from_number' or 'to_number' begins with 'whatsapp:'
- result_df = result_df[result_df['from_number'].str.startswith('whatsapp:') | result_df['to_number'].str.startswith('whatsapp:')]
-
- if df is True:
- return pd.DataFrame(result_df)
- return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(result_df))
-
- def send_message(self, params, ret_as_dict=False) -> Response:
- """
- Sends a message to the given Whatsapp number.
-
- Args:
- body: message body
- from_number: number from which to send the message
- to_number: number to which message will be sent
- """
- try:
- message = self.client.messages.create(
- body=params.get('body'),
- to=params.get('to_number'),
- from_=params.get('from_number')
- )
-
- if ret_as_dict is True:
- return {"sid": message.sid, "from": message.from_, "to": message.to, "message": message.body, "status": message.status}
-
- return Response(
- RESPONSE_TYPE.MESSAGE,
- sid=message.sid,
- from_=message.from_,
- to=message.to,
- body=message.body,
- status=message.status
- )
-
- except Exception as e:
- # Log the exception for debugging purposes
- logger.error(f"Error sending message: {str(e)}")
- logger.exception(f"Error sending message: {str(e)}")
- raise Exception("Error sending message")
-
- def call_whatsapp_api(self, method_name: str = None, params: dict = None):
- """
- Calls specific method specified.
-
- Args:
- method_name: to call specific method
- params: parameters to call the method
-
- Returns:
- List of dictionaries as a result of the method call
- """
- api = self.connect()
- method = getattr(api, method_name)
-
- try:
- result = method(**params)
- except Exception as e:
- error = f"Error calling method '{method_name}' with params '{params}': {e}"
- logger.error(error)
- raise e
-
- if 'messages' in result:
- result['messages'] = self.convert_channel_data(result['messages'])
-
- return [result]
-
- def convert_channel_data(self, messages: List[dict]):
- """
- Convert the list of channel dictionaries to a format that can be easily used in the data pipeline.
-
- Args:
- channels: A list of channel dictionaries.
-
- Returns:
- A list of channel dictionaries with modified keys and values.
- """
- new_messages = []
- for message in messages:
- new_message = {
- 'id': message['id'],
- 'name': message['name'],
- 'created': datetime.fromtimestamp(float(message['created']))
- }
- new_messages.append(new_message)
- return new_messages
diff --git a/mindsdb/integrations/handlers/xata_handler/README.md b/mindsdb/integrations/handlers/xata_handler/README.md
deleted file mode 100644
index a592fdc892b..00000000000
--- a/mindsdb/integrations/handlers/xata_handler/README.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# Xata Handler
-
-This is the implementation of the Xata for MindsDB.
-
-## Xata
-
-Xata is a serverless database platform powered by PostgreSQL. It aims to make the data part easy with the functionality your application needs to evolve and scale.
-
-## Implementation
-
-This handler uses `xata` python library connect to a xata instance
-
-The required arguments to establish a connection are:
-
-* `db_url`: Xata database url with region, database and, optionally the branch information
-* `api_key`: personal Xata API key
-
-Optional arguments to create a table are:
-
-* `dimension`: default dimension of embeddings vector used to create table when using create (default=8)
-
-Optional arguments for vector similarity searches are:
-
-* `similarity_function`: similarity function to use for vector searches (default=cosineSimilarity)
-
-## Limitations
-
-- Performing queries on columns other than vector database specified columns is not supported
- - You can use metadata column for general query filtering
-- Metadata filtering does not work on vector similarity search queries
-
-## Usage
-
-### Create Database
-
-In order to make use of this handler and connect to a hosted Xata instance in MindsDB, the following syntax can be used:
-
-```sql
-CREATE DATABASE xata_test
-WITH
- ENGINE = 'xata',
- PARAMETERS = {{
- "api_key": "...",
- "db_url": "..."
-};
-
-```
-
-### Create Table
-
-You can insert data into a new table like so:
-
-```sql
-CREATE TABLE xata_test.testingtable (SELECT * FROM pg.df)
-```
-
-The table will have default parameters as specified in `CREATE DATABASE` command
-
-### Select
-
-You can query a collection within your Xata as follows:
-
-```sql
-SELECT * FROM xata_test.testingtable
-```
-
-```sql
-SELECT * FROM xata_test.testingtable
-WHERE testingtable.metadata.price > 10 AND testingtable.metadata.price <= 100
-```
-
-```sql
-SELECT * FROM xata_test.testingtable
-WHERE content LIKE 'test%'
-```
-
-### Similarity search
-
-Search for similar embeddings by specifying search vector. Note that you cannot use metadata column with search vector.
-
-```sql
-SELECT * FROM xata_test.testingtable
-WHERE search_vector = '[1.0, 2.0, 3.0]'
-```
-
-```sql
-SELECT * FROM xata_test.testingtable
-WHERE search_vector = '[1.0, 2.0, 3.0]'
-AND content LIKE 'test%'
-```
-
-### Insert
-
-You can insert into table in various ways:
-
-```sql
-INSERT INTO xata_test.testingtable (content,metadata,embeddings)
-VALUES ('this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]')
-```
-
-```sql
-INSERT INTO xata_test.testingtable (content,metadata,embeddings)
-SELECT content,metadata,embeddings FROM pg.df2
-```
-
-## Delete
-
-You can delete only using ID and = operator. Deleting non existing records does not have any effect.
-
-```sql
-DELETE FROM xata_test.testingtable
-WHERE id = 'id2'
-```
diff --git a/mindsdb/integrations/handlers/xata_handler/__about__.py b/mindsdb/integrations/handlers/xata_handler/__about__.py
deleted file mode 100644
index 431c35101d4..00000000000
--- a/mindsdb/integrations/handlers/xata_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Xata handler"
-__package_name__ = "mindsdb_xata_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Xata"
-__author__ = "Aditya Azad"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/xata_handler/__init__.py b/mindsdb/integrations/handlers/xata_handler/__init__.py
deleted file mode 100644
index 0a7b2758254..00000000000
--- a/mindsdb/integrations/handlers/xata_handler/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __description__ as description
-from .__about__ import __version__ as version
-from .connection_args import connection_args, connection_args_example
-try:
- from .xata_handler import XataHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Xata"
-name = "xata"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "connection_args",
- "connection_args_example",
- "import_error",
- "icon_path",
-]
diff --git a/mindsdb/integrations/handlers/xata_handler/connection_args.py b/mindsdb/integrations/handlers/xata_handler/connection_args.py
deleted file mode 100644
index a096ad4e805..00000000000
--- a/mindsdb/integrations/handlers/xata_handler/connection_args.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- db_url={
- "type": ARG_TYPE.STR,
- "description": "Xata database url with region, database and, optionally the branch information",
- "required": True,
- },
- api_key={
- "type": ARG_TYPE.STR,
- "description": "personal Xata API key",
- "required": True,
- "secret": True
- },
- dimension={
- "type": ARG_TYPE.INT,
- "description": "default dimension of embeddings vector used to create table when using create (default=8)",
- "required": False,
- },
- similarity_function={
- "type": ARG_TYPE.STR,
- "description": "similarity function to use for vector searches (default=cosineSimilarity)",
- "required": False,
- }
-)
-
-connection_args_example = OrderedDict(
- db_url="https://...",
- api_key="abc_def...",
- dimension=8,
- similarity_function="l1"
-)
diff --git a/mindsdb/integrations/handlers/xata_handler/icon.svg b/mindsdb/integrations/handlers/xata_handler/icon.svg
deleted file mode 100644
index cd0d82f60fe..00000000000
--- a/mindsdb/integrations/handlers/xata_handler/icon.svg
+++ /dev/null
@@ -1,29 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/xata_handler/requirements.txt b/mindsdb/integrations/handlers/xata_handler/requirements.txt
deleted file mode 100644
index 061364e420b..00000000000
--- a/mindsdb/integrations/handlers/xata_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-xata
diff --git a/mindsdb/integrations/handlers/xata_handler/tests/__init__.py b/mindsdb/integrations/handlers/xata_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/xata_handler/xata_handler.py b/mindsdb/integrations/handlers/xata_handler/xata_handler.py
deleted file mode 100644
index 5e5c9e82a0d..00000000000
--- a/mindsdb/integrations/handlers/xata_handler/xata_handler.py
+++ /dev/null
@@ -1,347 +0,0 @@
-from typing import List, Optional
-
-import pandas as pd
-import json
-import xata
-from xata.helpers import BulkProcessor
-
-from mindsdb.integrations.libs.response import RESPONSE_TYPE
-from mindsdb.integrations.libs.response import HandlerResponse
-from mindsdb.integrations.libs.response import HandlerResponse as Response
-from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse
-from mindsdb.integrations.libs.vectordatabase_handler import (
- FilterCondition,
- FilterOperator,
- TableField,
- VectorStoreHandler,
-)
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class XataHandler(VectorStoreHandler):
- """This handler handles connection and execution of the Xata statements."""
-
- name = "xata"
-
- def __init__(self, name: str, **kwargs):
- super().__init__(name)
- self._connection_data = kwargs.get("connection_data")
- self._client_config = {
- "db_url": self._connection_data.get("db_url"),
- "api_key": self._connection_data.get("api_key"),
- }
- self._create_table_params = {
- "dimension": self._connection_data.get("dimension", 8),
- }
- self._select_params = {
- "similarity_function": self._connection_data.get("similarity_function", "cosineSimilarity"),
- }
- self._client = None
- self.is_connected = False
- self.connect()
-
- def __del__(self):
- if self.is_connected is True:
- self.disconnect()
-
- def connect(self):
- """Connect to a Xata database."""
- if self.is_connected is True:
- return self._client
- try:
- self._client = xata.XataClient(**self._client_config)
- self.is_connected = True
- return self._client
- except Exception as e:
- logger.error(f"Error connecting to Xata client: {e}!")
- self.is_connected = False
-
- def disconnect(self):
- """Close the database connection."""
- if self.is_connected is False:
- return
- self._client = None
- self.is_connected = False
-
- def check_connection(self):
- """Check the connection to the Xata database."""
- response_code = StatusResponse(False)
- need_to_close = self.is_connected is False
- # NOTE: no direct way to test this
- # try getting the user, if it fails, it means that we are not connected
- try:
- resp = self._client.users().get()
- if not resp.is_success():
- raise Exception(resp["message"])
- response_code.success = True
- except Exception as e:
- logger.error(f"Error connecting to Xata: {e}!")
- response_code.error_message = str(e)
- finally:
- if response_code.success is True and need_to_close:
- self.disconnect()
- if response_code.success is False and self.is_connected is True:
- self.is_connected = False
- return response_code
-
- def create_table(self, table_name: str, if_not_exists=True) -> HandlerResponse:
- """Create a table with the given name in the Xata database."""
-
- resp = self._client.table().create(table_name)
- if not resp.is_success():
- raise Exception(f"Unable to create table {table_name}: {resp['message']}")
- resp = self._client.table().set_schema(
- table_name=table_name,
- payload={
- "columns": [
- {
- "name": "embeddings",
- "type": "vector",
- "vector": {"dimension": self._create_table_params["dimension"]}
- },
- {"name": "content", "type": "text"},
- {"name": "metadata", "type": "json"},
- ]
- }
- )
- if not resp.is_success():
- raise Exception(f"Unable to change schema of table {table_name}: {resp['message']}")
-
- def drop_table(self, table_name: str, if_exists=True) -> HandlerResponse:
- """Delete a table from the Xata database."""
-
- resp = self._client.table().delete(table_name)
- if not resp.is_success():
- raise Exception(f"Unable to delete table: {resp['message']}")
-
- def get_columns(self, table_name: str) -> HandlerResponse:
- """Get columns of the given table"""
- # Vector stores have predefined columns
- try:
- # But at least try to see if the table is valid
- resp = self._client.table().get_columns(table_name)
- if not resp.is_success():
- raise Exception(f"Error getting columns: {resp['message']}")
- except Exception as e:
- return Response(
- resp_type=RESPONSE_TYPE.ERROR,
- error_message=f"{e}",
- )
- return super().get_columns(table_name)
-
- def get_tables(self) -> HandlerResponse:
- """Get the list of tables in the Xata database."""
- try:
- table_names = pd.DataFrame(
- columns=["TABLE_NAME"],
- data=[table_data["name"] for table_data in self._client.branch().get_details()["schema"]["tables"]],
- )
- return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=table_names)
- except Exception as e:
- return Response(
- resp_type=RESPONSE_TYPE.ERROR,
- error_message=f"Error getting list of tables: {e}",
- )
-
- def insert(self, table_name: str, data: pd.DataFrame, columns: List[str] = None):
- """ Insert data into the Xata database. """
- if columns:
- data = data[columns]
- # Convert to records
- data = data.to_dict("records")
- # Convert metadata to json
- for row in data:
- if "metadata" in row:
- row["metadata"] = json.dumps(row["metadata"])
- if len(data) > 1:
- # Bulk processing
- bp = BulkProcessor(self._client, throw_exception=True)
- bp.put_records(table_name, data)
- bp.flush_queue()
-
- elif len(data) == 0:
- # Skip
- return Response(resp_type=RESPONSE_TYPE.OK)
- elif "id" in data[0] and TableField.ID.value in columns:
- # If id present
- id = data[0]["id"]
- rest_of_data = data[0].copy()
- del rest_of_data["id"]
-
- resp = self._client.records().insert_with_id(
- table_name=table_name,
- record_id=id,
- payload=rest_of_data,
- create_only=True,
- columns=columns
- )
- if not resp.is_success():
- raise Exception(resp["message"])
-
- else:
- # If id not present
- resp = self._client.records().insert(
- table_name=table_name,
- payload=data[0],
- columns=columns
- )
- if not resp.is_success():
- raise Exception(resp["message"])
-
- def update(self, table_name: str, data: pd.DataFrame, columns: List[str] = None) -> HandlerResponse:
- """Update data in the Xata database."""
- # Not supported
- return super().update(table_name, data, columns)
-
- def _get_xata_operator(self, operator: FilterOperator) -> str:
- """Translate SQL operator to oprator understood by Xata filter language."""
- mapping = {
- FilterOperator.EQUAL: "$is",
- FilterOperator.NOT_EQUAL: "$isNot",
- FilterOperator.LESS_THAN: "$lt",
- FilterOperator.LESS_THAN_OR_EQUAL: "$le",
- FilterOperator.GREATER_THAN: "$gt",
- FilterOperator.GREATER_THAN_OR_EQUAL: "$gte",
- FilterOperator.LIKE: "$pattern",
- }
- if operator not in mapping:
- raise Exception(f"Operator '{operator}' is not supported!")
- return mapping[operator]
-
- def _translate_non_vector_conditions(self, conditions: List[FilterCondition]) -> Optional[dict]:
- """
- Translate a list of FilterCondition objects a dict that can be used by Xata for filtering.
- E.g.,
- [
- FilterCondition(
- column="metadata.price",
- op=FilterOperator.LESS_THAN,
- value=100,
- ),
- FilterCondition(
- column="metadata.price",
- op=FilterOperator.GREATER_THAN,
- value=10,
- )
- ]
- -->
- {
- "metadata->price" {
- "$gt": 10,
- "$lt": 100
- },
- }
- """
- if not conditions:
- return None
- # Translate metadata columns
- for condition in conditions:
- if condition.column.startswith(TableField.METADATA.value):
- condition.column = condition.column.replace(".", "->")
- # Generate filters
- filters = {}
- for condition in conditions:
- # Skip search vector condition
- if condition.column == TableField.SEARCH_VECTOR.value:
- continue
- current_filter = original_filter = {}
- # Special case LIKE: needs pattern translation
- if condition.op == FilterOperator.LIKE:
- condition.value = condition.value.replace("%", "*").replace("_", "?")
- # Generate substatment
- current_filter[condition.column] = {self._get_xata_operator(condition.op): condition.value}
- # Check for conflicting and insert
- for key in original_filter:
- if key in filters:
- filters[key] = {**filters[key], **original_filter[key]}
- else:
- filters = {**filters, **original_filter}
- return filters if filters else None
-
- def select(self, table_name: str, columns: List[str] = None, conditions: List[FilterCondition] = None,
- offset: int = None, limit: int = None) -> pd.DataFrame:
- """Run general query or a vector similarity search and return results."""
- if not columns:
- columns = [col["name"] for col in self.SCHEMA]
- # Generate filter conditions
- filters = self._translate_non_vector_conditions(conditions)
- # Check for search vector
- search_vector = (
- []
- if conditions is None
- else [
- condition.value
- for condition in conditions
- if condition.column == TableField.SEARCH_VECTOR.value
- ]
- )
- if len(search_vector) > 0:
- search_vector = search_vector[0]
- else:
- search_vector = None
- # Search
- results_df = pd.DataFrame(columns)
- if search_vector is not None:
- # Similarity
-
- params = {
- "queryVector": search_vector,
- "column": TableField.EMBEDDINGS.value,
- "similarityFunction": self._select_params["similarity_function"]
- }
- if filters:
- params["filter"] = filters
- if limit:
- params["size"] = limit
- results = self._client.data().vector_search(table_name, params)
- # Check for errors
- if not results.is_success():
- raise Exception(results["message"])
- # Convert result
- results_df = pd.DataFrame.from_records(results["records"])
- if "xata" in results_df.columns:
- results_df["xata"] = results_df["xata"].apply(lambda x: x["score"])
- results_df.rename({"xata": TableField.DISTANCE.value}, axis=1, inplace=True)
-
- else:
- # General get query
-
- params = {
- "columns": columns if columns else [],
- }
- if filters:
- params["filter"] = filters
- if limit or offset:
- params["page"] = {}
- if limit:
- params["page"]["size"] = limit
- if offset:
- params["page"]["offset"] = offset
- results = self._client.data().query(table_name, params)
- # Check for errors
- if not results.is_success():
- raise Exception(results["message"])
- # Convert result
- results_df = pd.DataFrame.from_records(results["records"])
- if "xata" in results_df.columns:
- results_df.drop(["xata"], axis=1, inplace=True)
-
- return results_df
-
- def delete(self, table_name: str, conditions: List[FilterCondition] = None):
- ids = []
- for condition in conditions:
- if condition.op == FilterOperator.EQUAL:
- ids.append(condition.value)
- else:
- return Response(
- resp_type=RESPONSE_TYPE.ERROR,
- error_message="You can only delete using '=' operator ID one at a time!",
- )
-
- for id in ids:
- resp = self._client.records().delete(table_name, id)
- if not resp.is_success():
- raise Exception(resp["message"])
diff --git a/mindsdb/integrations/handlers/youtube_handler/README.md b/mindsdb/integrations/handlers/youtube_handler/README.md
deleted file mode 100644
index 7122b3efab7..00000000000
--- a/mindsdb/integrations/handlers/youtube_handler/README.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# Youtube Handler
-
-Youtube handler for MindsDB provides interfaces to connect with Youtube via APIs and pull the video comments of the particular video.
-
-## Youtube
-Youtube is a social video sharing platform businesses and creators. MindsDB users can deploy the youtube integration to perform NLP on youtube comments.
-
-## Youtube Handler Initialization
-
-The Youtube handler is initialized with the following parameters:
-
-- `youtube_api_token`: Youtube API key to use for authentication
-
-Please follow this (link)[https://blog.hubspot.com/website/how-to-get-youtube-api-key] to generate the token for
-accessing youtube API
-
-## Implemented Features
-
-- [x] Youtube comments table
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
-
-- [x] Youtube channels table
-
-- [x] Youtube videos table
-
-
-## Example Usage
-
-The first step is to create a database with the new `Youtube` engine.
-
-~~~~sql
-CREATE DATABASE mindsdb_youtube
-WITH ENGINE = 'youtube',
-PARAMETERS = {
- "youtube_api_token": ""
-};
-~~~~
-
-Use the established connection to query the comments table
-
-~~~~sql
-SELECT * FROM mindsdb_youtube.comments
-WHERE video_id = "raWFGQ20OfA";
-~~~~
-
-Advanced queries for the youtube handler
-
-~~~~sql
-SELECT * FROM mindsdb_youtube.comments
-WHERE video_id = "raWFGQ20OfA"
-ORDER BY display_name ASC
-LIMIT 5;
-~~~~
-
-Given a channel_id, get information about the channel
-
-~~~~sql
-SELECT * FROM mindsdb_youtube.channels
-WHERE channel_id="UC-...";
-~~~~
-
-Here, `channel_id` column is mandatory in the where clause.
-
-Get information about any youtube video using video_id:
-
-~~~~sql
-SELECT * FROM mindsdb_youtube.videos
-WHERE video_id="id";
-~~~~
-
-`video_id` is a mandatory column in the where clause.
diff --git a/mindsdb/integrations/handlers/youtube_handler/__about__.py b/mindsdb/integrations/handlers/youtube_handler/__about__.py
deleted file mode 100644
index ddc8121a803..00000000000
--- a/mindsdb/integrations/handlers/youtube_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Youtube handler"
-__package_name__ = "mindsdb_youtube_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Youtube"
-__author__ = "Balaji Seetharaman"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/youtube_handler/__init__.py b/mindsdb/integrations/handlers/youtube_handler/__init__.py
deleted file mode 100644
index 97371a99174..00000000000
--- a/mindsdb/integrations/handlers/youtube_handler/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-
-from .__about__ import __version__ as version, __description__ as description
-
-try:
- from .youtube_handler import YoutubeHandler as Handler
- from .connection_args import connection_args
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "YouTube"
-name = "youtube"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
- "connection_args",
-]
diff --git a/mindsdb/integrations/handlers/youtube_handler/connection_args.py b/mindsdb/integrations/handlers/youtube_handler/connection_args.py
deleted file mode 100644
index 926acb383be..00000000000
--- a/mindsdb/integrations/handlers/youtube_handler/connection_args.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- youtube_api_token={
- 'type': ARG_TYPE.STR,
- 'description': 'Youtube API Token',
- 'label': 'Youtube API Token',
- },
- credentials_url={
- 'type': ARG_TYPE.STR,
- 'description': 'URL to Service Account Keys',
- 'label': 'URL to Service Account Keys',
- },
- credentials_file={
- 'type': ARG_TYPE.STR,
- 'description': 'Location of Service Account Keys',
- 'label': 'Path to Service Account Keys',
- },
- credentials={
- 'type': ARG_TYPE.PATH,
- 'description': 'Service Account Keys',
- 'label': 'Upload Service Account Keys',
- },
- code={
- 'type': ARG_TYPE.STR,
- 'description': 'Code After Authorisation',
- 'label': 'Code After Authorisation',
- },
-)
diff --git a/mindsdb/integrations/handlers/youtube_handler/icon.svg b/mindsdb/integrations/handlers/youtube_handler/icon.svg
deleted file mode 100644
index c9244384ad9..00000000000
--- a/mindsdb/integrations/handlers/youtube_handler/icon.svg
+++ /dev/null
@@ -1,11 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/youtube_handler/requirements.txt b/mindsdb/integrations/handlers/youtube_handler/requirements.txt
deleted file mode 100644
index 7e0220f8fd0..00000000000
--- a/mindsdb/integrations/handlers/youtube_handler/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-google-api-python-client
-youtube-transcript-api
--r mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/youtube_handler/youtube_handler.py b/mindsdb/integrations/handlers/youtube_handler/youtube_handler.py
deleted file mode 100644
index d31f666a71b..00000000000
--- a/mindsdb/integrations/handlers/youtube_handler/youtube_handler.py
+++ /dev/null
@@ -1,128 +0,0 @@
-from mindsdb.integrations.handlers.youtube_handler.youtube_tables import (
- YoutubeCommentsTable,
- YoutubeChannelsTable,
- YoutubeVideosTable,
-)
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
-)
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-
-from mindsdb.utilities.config import Config
-
-from googleapiclient.discovery import build
-
-from mindsdb.integrations.utilities.handlers.auth_utilities.google import GoogleUserOAuth2Manager
-
-DEFAULT_SCOPES = [
- 'https://www.googleapis.com/auth/youtube',
- 'https://www.googleapis.com/auth/youtube.force-ssl',
- 'https://www.googleapis.com/auth/youtubepartner'
-]
-
-logger = log.getLogger(__name__)
-
-
-class YoutubeHandler(APIHandler):
- """Youtube handler implementation"""
-
- def __init__(self, name=None, **kwargs):
- """Initialize the Youtube handler.
- Parameters
- ----------
- name : str
- name of a handler instance
- """
- super().__init__(name)
- self.connection_data = kwargs.get("connection_data", {})
- self.kwargs = kwargs
-
- self.parser = parse_sql
- self.connection = None
- self.is_connected = False
-
- self.handler_storage = kwargs['handler_storage']
-
- self.credentials_url = self.connection_data.get('credentials_url', None)
- self.credentials_file = self.connection_data.get('credentials_file', None)
- if self.connection_data.get('credentials'):
- self.credentials_file = self.connection_data.pop('credentials')
- if not self.credentials_file and not self.credentials_url:
- # try to get from config
- yt_config = Config().get('handlers', {}).get('youtube', {})
- secret_file = yt_config.get('credentials_file')
- secret_url = yt_config.get('credentials_url')
- if secret_file:
- self.credentials_file = secret_file
- elif secret_url:
- self.credentials_url = secret_url
-
- self.youtube_api_token = self.connection_data.get('youtube_api_token', None)
-
- self.scopes = self.connection_data.get('scopes', DEFAULT_SCOPES)
-
- youtube_video_comments_data = YoutubeCommentsTable(self)
- self._register_table("comments", youtube_video_comments_data)
-
- youtube_channel_data = YoutubeChannelsTable(self)
- self._register_table("channels", youtube_channel_data)
-
- youtube_video_data = YoutubeVideosTable(self)
- self._register_table("videos", youtube_video_data)
-
- def connect(self) -> StatusResponse:
- """Set up the connection required by the handler.
- Returns
- -------
- StatusResponse
- connection object
- """
- if self.is_connected is True:
- return self.connection
-
- google_oauth2_manager = GoogleUserOAuth2Manager(self.handler_storage, self.scopes, self.credentials_file, self.credentials_url, self.connection_data.get('code'))
- creds = google_oauth2_manager.get_oauth2_credentials()
-
- youtube = build(
- "youtube", "v3", developerKey=self.youtube_api_token, credentials=creds
- )
- self.connection = youtube
-
- return self.connection
-
- def check_connection(self) -> StatusResponse:
- """Check connection to the handler.
- Returns
- -------
- StatusResponse
- Status confirmation
- """
- response = StatusResponse(False)
-
- try:
- self.connect()
- response.success = True
- response.copy_storage = True
- except Exception as e:
- logger.error(f"Error connecting to Youtube API: {e}!")
- response.error_message = e
-
- self.is_connected = response.success
-
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
- Parameters
- ----------
- query : str
- query in a native format
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
diff --git a/mindsdb/integrations/handlers/youtube_handler/youtube_tables.py b/mindsdb/integrations/handlers/youtube_handler/youtube_tables.py
deleted file mode 100644
index 7c9b607142c..00000000000
--- a/mindsdb/integrations/handlers/youtube_handler/youtube_tables.py
+++ /dev/null
@@ -1,575 +0,0 @@
-from typing import List
-
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.utilities import log
-
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.utilities.handlers.query_utilities import (
- SELECTQueryParser,
- SELECTQueryExecutor,
- INSERTQueryParser,
-)
-
-import pandas as pd
-import re
-from youtube_transcript_api import YouTubeTranscriptApi
-from youtube_transcript_api.formatters import JSONFormatter
-
-logger = log.getLogger(__name__)
-
-
-class YoutubeCommentsTable(APITable):
- """Youtube List Comments by video id Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the youtube "commentThreads()" API endpoint
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
- Returns
- -------
- pd.DataFrame
- youtube "commentThreads()" matching the query
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
- select_statement_parser = SELECTQueryParser(query, "comments", self.get_columns())
-
- (
- selected_columns,
- where_conditions,
- order_by_conditions,
- result_limit,
- ) = select_statement_parser.parse_query()
-
- channel_id, video_id = None, None
- for a_where in where_conditions:
- if a_where[1] == "video_id":
- if a_where[0] != "=":
- raise NotImplementedError("Only '=' operator is supported for video_id column.")
- else:
- video_id = a_where[2]
- elif a_where[1] == "channel_id":
- if a_where[0] != "=":
- raise NotImplementedError("Only '=' operator is supported for channel_id column.")
- else:
- channel_id = a_where[2]
-
- if not video_id and not channel_id:
- raise ValueError("Either video_id or channel_id has to be present in where clause.")
-
- comments_df = self.get_comments(video_id=video_id, channel_id=channel_id)
-
- select_statement_executor = SELECTQueryExecutor(
- comments_df,
- selected_columns,
- [
- where_condition
- for where_condition in where_conditions
- if where_condition[1] not in ["video_id", "channel_id"]
- ],
- order_by_conditions,
- result_limit if query.limit else None,
- )
-
- comments_df = select_statement_executor.execute_query()
-
- return comments_df
-
- def insert(self, query: ast.Insert) -> None:
- """Inserts data into the YouTube POST /commentThreads API endpoint.
-
- Parameters
- ----------
- query : ast.Insert
- Given SQL INSERT query
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- insert_query_parser = INSERTQueryParser(query, self.get_columns())
-
- values_to_insert = insert_query_parser.parse_query()
-
- for value in values_to_insert:
- if not value.get("comment_id"):
- if not value.get("comment"):
- raise ValueError("comment is mandatory for inserting a top-level comment.")
- else:
- self.insert_comment(video_id=value["video_id"], text=value["comment"])
-
- else:
- if not value.get("reply"):
- raise ValueError("reply is mandatory for inserting a reply.")
- else:
- self.insert_comment(comment_id=value["comment_id"], text=value["reply"])
-
- def insert_comment(self, text, video_id: str = None, comment_id: str = None):
- # if comment_id is provided, define the request body for a reply and insert it
- if comment_id:
- request_body = {"snippet": {"parentId": comment_id, "textOriginal": text}}
-
- self.handler.connect().comments().insert(part="snippet", body=request_body).execute()
-
- # else if video_id is provided, define the request body for a top-level comment and insert it
- elif video_id:
- request_body = {"snippet": {"topLevelComment": {"snippet": {"videoId": video_id, "textOriginal": text}}}}
-
- self.handler.connect().commentThreads().insert(part="snippet", body=request_body).execute()
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
- Returns
- -------
- List[str]
- List of columns
- """
- return [
- "comment_id",
- "channel_id",
- "video_id",
- "user_id",
- "display_name",
- "comment",
- "published_at",
- "updated_at",
- "reply_user_id",
- "reply_author",
- "reply",
- ]
-
- def get_comments(self, video_id: str, channel_id: str):
- """Pulls all the records from the given youtube api end point and returns it select()
-
- Returns
- -------
- pd.DataFrame of all the records of the "commentThreads()" API end point
- """
-
- if video_id and channel_id:
- channel_id = None
-
- resource = (
- self.handler.connect()
- .commentThreads()
- .list(
- part="snippet, replies",
- videoId=video_id,
- allThreadsRelatedToChannelId=channel_id,
- textFormat="plainText",
- )
- )
-
- data = []
- while resource:
- comments = resource.execute()
-
- for comment in comments["items"]:
- replies = []
- if "replies" in comment:
- for reply in comment["replies"]["comments"]:
- replies.append(
- {
- "reply_author": reply["snippet"]["authorDisplayName"],
- "user_id": reply["snippet"]["authorChannelId"]["value"],
- "reply": reply["snippet"]["textOriginal"],
- }
- )
- else:
- replies.append(
- {
- "reply_author": None,
- "user_id": None,
- "reply": None,
- }
- )
-
- data.append(
- {
- "channel_id": comment["snippet"]["channelId"],
- "video_id": comment["snippet"]["videoId"],
- "user_id": comment["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]["value"],
- "comment_id": comment["snippet"]["topLevelComment"]["id"],
- "display_name": comment["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"],
- "comment": comment["snippet"]["topLevelComment"]["snippet"]["textDisplay"],
- "published_at": comment["snippet"]["topLevelComment"]["snippet"]["publishedAt"],
- "updated_at": comment["snippet"]["topLevelComment"]["snippet"]["updatedAt"],
- "replies": replies,
- }
- )
-
- if "nextPageToken" in comments:
- resource = (
- self.handler.connect()
- .commentThreads()
- .list(
- part="snippet, replies",
- videoId=video_id,
- allThreadsRelatedToChannelId=channel_id,
- textFormat="plainText",
- pageToken=comments["nextPageToken"],
- )
- )
- else:
- break
-
- youtube_comments_df = pd.json_normalize(
- data,
- "replies",
- [
- "comment_id",
- "channel_id",
- "video_id",
- "user_id",
- "display_name",
- "comment",
- "published_at",
- "updated_at",
- ],
- record_prefix="replies.",
- )
- youtube_comments_df = youtube_comments_df.rename(
- columns={
- "replies.user_id": "reply_user_id",
- "replies.reply_author": "reply_author",
- "replies.reply": "reply",
- }
- )
-
- # check if DataFrame is empty
- if youtube_comments_df.empty:
- return youtube_comments_df
- else:
- return youtube_comments_df[
- [
- "comment_id",
- "channel_id",
- "video_id",
- "user_id",
- "display_name",
- "comment",
- "published_at",
- "updated_at",
- "reply_user_id",
- "reply_author",
- "reply",
- ]
- ]
-
-
-class YoutubeChannelsTable(APITable):
- """Youtube Channel Info by channel id Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- select_statement_parser = SELECTQueryParser(query, "channel", self.get_columns())
-
- (
- selected_columns,
- where_conditions,
- order_by_conditions,
- result_limit,
- ) = select_statement_parser.parse_query()
-
- channel_id = None
- for op, arg1, arg2 in where_conditions:
- if arg1 == "channel_id":
- if op == "=":
- channel_id = arg2
- break
- else:
- raise NotImplementedError("Only '=' operator is supported for channel_id column.")
-
- if not channel_id:
- raise NotImplementedError("channel_id has to be present in where clause.")
-
- channel_df = self.get_channel_details(channel_id)
-
- select_statement_executor = SELECTQueryExecutor(
- channel_df,
- selected_columns,
- [where_condition for where_condition in where_conditions if where_condition[1] == "channel_id"],
- order_by_conditions,
- result_limit if query.limit else None,
- )
-
- channel_df = select_statement_executor.execute_query()
-
- return channel_df
-
- def get_channel_details(self, channel_id):
- details = (
- self.handler.connect().channels().list(part="statistics,snippet,contentDetails", id=channel_id).execute()
- )
- snippet = details["items"][0]["snippet"]
- statistics = details["items"][0]["statistics"]
- data = {
- "country": snippet["country"],
- "description": snippet["description"],
- "creation_date": snippet["publishedAt"],
- "title": snippet["title"],
- "subscriber_count": statistics["subscriberCount"],
- "video_count": statistics["videoCount"],
- "view_count": statistics["viewCount"],
- "channel_id": channel_id,
- }
- return pd.json_normalize(data)
-
- def get_columns(self) -> List[str]:
- return [
- "country",
- "description",
- "creation_date",
- "title",
- "subscriber_count",
- "video_count",
- "view_count",
- "channel_id",
- ]
-
-
-class YoutubeVideosTable(APITable):
- """Youtube Video info by video id Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- select_statement_parser = SELECTQueryParser(query, "video", self.get_columns())
-
- (
- selected_columns,
- where_conditions,
- order_by_conditions,
- result_limit,
- ) = select_statement_parser.parse_query()
-
- video_id, channel_id, search_query = None, None, None
- for op, arg1, arg2 in where_conditions:
- if arg1 == "video_id":
- if op == "=":
- video_id = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for video_id column.")
-
- elif arg1 == "channel_id":
- if op == "=":
- channel_id = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for channel_id column.")
-
- elif arg1 == "query":
- if op == "=":
- search_query = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for query column.")
-
- if not video_id and not channel_id and not search_query:
- raise ValueError("At least one of video_id, channel_id, or query must be present in the WHERE clause.")
-
- if video_id:
- video_df = self.get_videos_by_video_ids([video_id])
- elif channel_id and search_query:
- video_df = self.get_videos_by_search_query_in_channel(search_query, channel_id, result_limit)
- elif channel_id:
- video_df = self.get_videos_by_channel_id(channel_id, result_limit)
- else:
- video_df = self.get_videos_by_search_query(search_query, result_limit)
-
- select_statement_executor = SELECTQueryExecutor(
- video_df,
- selected_columns,
- [
- where_condition
- for where_condition in where_conditions
- if where_condition[1] not in ["video_id", "channel_id", "query"]
- ],
- order_by_conditions,
- result_limit if query.limit else None,
- )
-
- video_df = select_statement_executor.execute_query()
-
- return video_df
-
- def get_videos_by_search_query(self, search_query, limit=10):
- video_ids = []
- resource = (
- self.handler.connect()
- .search()
- .list(part="snippet", q=search_query, type="video", maxResults=min(50, limit))
- )
- total_fetched = 0
-
- while resource and total_fetched < limit:
- response = resource.execute()
- for item in response["items"]:
- video_ids.append(item["id"]["videoId"])
- total_fetched += 1
- if total_fetched >= limit:
- break
-
- if "nextPageToken" in response and total_fetched < limit:
- resource = (
- self.handler.connect()
- .search()
- .list(
- part="snippet",
- q=search_query,
- type="video",
- maxResults=min(50, limit - total_fetched),
- pageToken=response["nextPageToken"],
- )
- )
- else:
- break
-
- return self.get_videos_by_video_ids(video_ids)
-
- def get_videos_by_search_query_in_channel(self, search_query, channel_id, limit=10):
- """Search for videos within a specific channel"""
- video_ids = []
- resource = (
- self.handler.connect()
- .search()
- .list(part="snippet", q=search_query, channelId=channel_id, type="video", maxResults=min(50, limit))
- )
- total_fetched = 0
-
- while resource and total_fetched < limit:
- response = resource.execute()
- for item in response["items"]:
- video_ids.append(item["id"]["videoId"])
- total_fetched += 1
- if total_fetched >= limit:
- break
-
- if "nextPageToken" in response and total_fetched < limit:
- resource = (
- self.handler.connect()
- .search()
- .list(
- part="snippet",
- q=search_query,
- channelId=channel_id,
- type="video",
- maxResults=min(50, limit - total_fetched),
- pageToken=response["nextPageToken"],
- )
- )
- else:
- break
-
- return self.get_videos_by_video_ids(video_ids)
-
- def get_videos_by_channel_id(self, channel_id, limit=10):
- video_ids = []
- resource = (
- self.handler.connect()
- .search()
- .list(part="snippet", channelId=channel_id, type="video", maxResults=min(50, limit))
- )
- total_fetched = 0
- while resource and total_fetched < limit:
- response = resource.execute()
- for item in response["items"]:
- video_ids.append(item["id"]["videoId"])
- total_fetched += 1
- if total_fetched >= limit:
- break
- if "nextPageToken" in response and total_fetched < limit:
- resource = (
- self.handler.connect()
- .search()
- .list(
- part="snippet",
- channelId=channel_id,
- type="video",
- maxResults=min(50, limit - total_fetched),
- pageToken=response["nextPageToken"],
- )
- )
- else:
- break
-
- return self.get_videos_by_video_ids(video_ids)
-
- def get_videos_by_video_ids(self, video_ids):
- data = []
-
- if not isinstance(video_ids, list):
- logger.error(f"video_ids must be a list. Received {type(video_ids)} instead.")
- return pd.DataFrame()
-
- # loop over 50 video ids at a time
- # an invalid request error is caused otherwise
- for i in range(0, len(video_ids), 50):
- resource = (
- self.handler.connect()
- .videos()
- .list(part="statistics,snippet,contentDetails", id=",".join(video_ids[i : i + 50]))
- .execute()
- )
-
- for item in resource["items"]:
- data.append(
- {
- "channel_id": item["snippet"]["channelId"],
- "channel_title": item["snippet"]["channelTitle"],
- "comment_count": item["statistics"]["commentCount"],
- "description": item["snippet"]["description"],
- "like_count": item["statistics"]["likeCount"],
- "publish_time": item["snippet"]["publishedAt"],
- "title": item["snippet"]["title"],
- "transcript": self.get_captions_by_video_id(item["id"]),
- "video_id": item["id"],
- "view_count": item["statistics"]["viewCount"],
- "duration_str": self.parse_duration(item["id"], item["contentDetails"]["duration"]),
- }
- )
-
- return pd.json_normalize(data)
-
- def get_captions_by_video_id(self, video_id):
- try:
- transcript_response = YouTubeTranscriptApi.get_transcript(video_id, preserve_formatting=True)
- json_formatted_transcript = JSONFormatter().format_transcript(transcript_response, indent=2)
- return json_formatted_transcript
-
- except Exception as e:
- (logger.error(f"Encountered an error while fetching transcripts for video ${video_id}: ${e}"),)
- return "Transcript not available for this video"
-
- def parse_duration(self, video_id, duration):
- try:
- parsed_duration = re.search(r"PT(\d+H)?(\d+M)?(\d+S)", duration).groups()
- duration_str = ""
- for d in parsed_duration:
- if d:
- duration_str += f"{d[:-1]}:"
-
- return duration_str.strip(":")
- except Exception as e:
- (logger.error(f"Encountered an error while parsing duration for video ${video_id}: ${e}"),)
- return "Duration not available for this video"
-
- def get_columns(self) -> List[str]:
- return [
- "channel_id",
- "channel_title",
- "title",
- "description",
- "publish_time",
- "comment_count",
- "like_count",
- "view_count",
- "video_id",
- "duration_str",
- "transcript",
- ]
diff --git a/mindsdb/integrations/handlers/yugabyte_handler/README.md b/mindsdb/integrations/handlers/yugabyte_handler/README.md
deleted file mode 100644
index d4609edb2a2..00000000000
--- a/mindsdb/integrations/handlers/yugabyte_handler/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# YugabyteDB Handler
-
-This is the implementation of the YugabyteDB handler for MindsDB.
-
-## YugabyteDB
-
-YugabyteDB is a high-performance, cloud-native distributed SQL database that aims to support all PostgreSQL features. It is best to fit for cloud-native OLTP (i.e. real-time, business-critical) applications that need absolute data correctness and require at least one of the following: scalability, high tolerance to failures, or globally-distributed deployments.
-
-## Implementation
-This handler was implemented using the `psycopg`, a Python library that allows you to use Python code to run SQL commands on YugabyteDB.
-
-The required arguments to establish a connection are,
-* `user`: username asscociated with database
-* `password`: password to authenticate your access
-* `host`: host to server IP Address or hostname
-* `port`: port through which TCP/IP connection is to be made
-* `database`: Database name to be connected
-* `schema`(OPTIONAL): comma seperated schemas to be considered for querying (e.g., "**class,company**")
-* `sslmode`(OPTIONAL): Specifies the SSL mode for the connection, determining whether to use SSL encryption and the level of verification required (e.g., "**disable**", "**allow**", "**prefer**", "**require**", "**verify-ca**", "**verify-full**").
-
-
-## Usage
-
-In order to make use of this handler and connect to yugabyte in MindsDB, the following syntax can be used,
-
-```sql
-CREATE DATABASE yugabyte_datasource
-WITH
-engine='yugabyte',
-parameters={
- "user":"admin",
- "password":"1234",
- "host":"127.0.0.1",
- "port":5433,
- "database":"yugabyte",
- "schema":"your_schema_name"
-};
-```
-
-Now, you can use this established connection to query your database as follows,
-
-```sql
-SELECT * FROM yugabyte_datasource.demo;
-```
-
-NOTE : If you are using YugabyteDB Cloud with MindsDB Cloud website you need to add below 3 static IPs of MindsDB Cloud to `allow IP list` for accessing it publicly.
-```
-18.220.205.95
-3.19.152.46
-52.14.91.162
-```
-
-
diff --git a/mindsdb/integrations/handlers/yugabyte_handler/__about__.py b/mindsdb/integrations/handlers/yugabyte_handler/__about__.py
deleted file mode 100644
index 50e7bad5a6e..00000000000
--- a/mindsdb/integrations/handlers/yugabyte_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB YugabyteDB handler'
-__package_name__ = 'mindsdb_yugabyte_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for YugabyteDB"
-__author__ = 'Parthiv Makwana'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2022- mindsdb'
diff --git a/mindsdb/integrations/handlers/yugabyte_handler/__init__.py b/mindsdb/integrations/handlers/yugabyte_handler/__init__.py
deleted file mode 100644
index e8c9d0d9f78..00000000000
--- a/mindsdb/integrations/handlers/yugabyte_handler/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .yugabyte_handler import YugabyteHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'YugabyteDB'
-name = 'yugabyte'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title',
- 'description', 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/yugabyte_handler/icon.svg b/mindsdb/integrations/handlers/yugabyte_handler/icon.svg
deleted file mode 100644
index c0445a44037..00000000000
--- a/mindsdb/integrations/handlers/yugabyte_handler/icon.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/yugabyte_handler/tests/__init__.py b/mindsdb/integrations/handlers/yugabyte_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py b/mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py
deleted file mode 100644
index 278c041761a..00000000000
--- a/mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.yugabyte_handler.yugabyte_handler import YugabyteHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-class YugabyteHandlerTest(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- cls.kwargs = {
- "connection_data": {
- "host": "localhost",
- "port": 5433,
- "user": "admin",
- "password": "",
- "database": "yugabyte"
- }
- }
- cls.handler = YugabyteHandler('test_yugabyte_handler', cls.kwargs)
-
- def test_0_connect(self):
- self.handler.connect()
-
- def test_1_drop_table(self):
- res = self.handler.query("DROP TABLE IF EXISTS PREM;")
- assert res.type is not RESPONSE_TYPE.ERROR
-
- def test_2_create_table(self):
- res = self.handler.query("CREATE TABLE IF NOT EXISTS PREM (Premi varchar(50));")
- assert res.type is not RESPONSE_TYPE.ERROR
-
- def test_3_insert_table(self):
- res = self.handler.query("INSERT INTO PREM VALUES('Radha <3 Krishna');")
- assert res.type is not RESPONSE_TYPE.ERROR
-
- def test_4_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_5_select_query(self):
- query = "SELECT * FROM PREM;"
- result = self.handler.native_query(query)
- assert result.type is RESPONSE_TYPE.TABLE or RESPONSE_TYPE.OK
-
- def test_6_check_connection(self):
- self.handler.check_connection()
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py b/mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py
deleted file mode 100644
index e9e8dbc4c3c..00000000000
--- a/mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
- PostgresHandler,
-)
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-class YugabyteHandler(PostgresHandler):
- """
- This handler handles connection and execution of the YugabyteSQL statements.
- """
-
- name = 'yugabyte'
-
- def __init__(self, name, **kwargs):
- super().__init__(name, **kwargs)
-
-
-connection_args = OrderedDict(
- host={
- 'type': ARG_TYPE.STR,
- 'description': 'The host name or IP address of the YugabyteDB server/database.',
- },
- user={
- 'type': ARG_TYPE.STR,
- 'description': 'The user name used to authenticate with the YugabyteDB server.',
- },
- password={
- 'type': ARG_TYPE.STR,
- 'description': 'The password to authenticate the user with the YugabyteDB server.',
- },
- port={
- 'type': ARG_TYPE.INT,
- 'description': 'Specify port to connect YugabyteDB server',
- },
- database={
- 'type': ARG_TYPE.STR,
- 'description': 'Specify database name to connect YugabyteDB server',
- },
- schema={
- 'type': ARG_TYPE.STR,
- 'description': '(OPTIONAL) comma seperated value of schema to be considered while querying',
- },
- sslmode={
- 'type': ARG_TYPE.STR,
- 'description': ''' (OPTIONAL) Specifies the SSL mode for the connection, determining whether to use SSL encryption and the level of verification required
- (e.g., "**disable**", "**allow**", "**prefer**", "**require**", "**verify-ca**", "**verify-full**")''',
- },
-)
-
-connection_args_example = OrderedDict(
- host='127.0.0.1', port=5433, password='', user='admin', database='yugabyte'
-)
diff --git a/mindsdb/integrations/handlers/zendesk_handler/README.md b/mindsdb/integrations/handlers/zendesk_handler/README.md
deleted file mode 100644
index e4d2e619746..00000000000
--- a/mindsdb/integrations/handlers/zendesk_handler/README.md
+++ /dev/null
@@ -1,72 +0,0 @@
----
-title: Zendesk
-sidebarTitle: Zendesk
----
-
-This documentation describes the integration of MindsDB with [Zendesk](https://www.zendesk.com/), which provides software-as-a-service products related to customer support, sales, and other customer communications.
-
-The integration allows MindsDB to access data from Zendesk and enhance it with AI capabilities.
-
-## Prerequisites
-
-Before proceeding, ensure the following prerequisites are met:
-
-1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop).
-2. To connect Zendesk to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies).
-
-## Connection
-
-Establish a connection to Zendesk from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/zendesk_handler) as an engine.
-
-```sql
-CREATE DATABASE zendesk_datasource
-WITH
- ENGINE = 'zendesk',
- PARAMETERS = {
- "api_key":"api_key",
- "sub_domain": "sub_domain",
- "email":"email"
- };
-```
-
-Required connection parameters include the following:
-
-* `api_key`: The api key for the Zendesk account.
-* `sub_domain`: The sub domain for the Zendesk account.
-* `email`: The email ID of the account.
-
-
-For enabling, generating and deleting API access, refer [Managing access to the Zendesk API](https://support.zendesk.com/hc/en-us/articles/4408889192858-Managing-access-to-the-Zendesk-API)
-
-
-## Usage
-
-Retrieve data from a specified table by providing the integration and table names:
-
-```sql
-SELECT *
-FROM zendesk_datasource.table_name
-LIMIT 10;
-```
-
-Retrieve data for a specific ticket by providing the id:
-
-```sql
-SELECT *
-FROM zendesk_datasource.tickets
-where id="";
-```
-
-
-
-The above examples utilize `zendesk_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command.
-
-
-## Supported Tables
-
-The Zendesk integration supports the following tables:
-
-* `users` : The table lists all the users.
-* `tickets` : The table lists all the tickets.
-* `triggers` : The table lists all the triggers.
-* `activities` : The table lists all the activities.
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/zendesk_handler/__about__.py b/mindsdb/integrations/handlers/zendesk_handler/__about__.py
deleted file mode 100644
index f190718159d..00000000000
--- a/mindsdb/integrations/handlers/zendesk_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB Zendesk handler"
-__package_name__ = "mindsdb_zendesk_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for Zendesk"
-__author__ = "Abhilash K R"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/zendesk_handler/__init__.py b/mindsdb/integrations/handlers/zendesk_handler/__init__.py
deleted file mode 100644
index e056be25fce..00000000000
--- a/mindsdb/integrations/handlers/zendesk_handler/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .zendesk_handler import ZendeskHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "Zendesk"
-name = "zendesk"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
- "connection_args_example",
- "connection_args",
-]
diff --git a/mindsdb/integrations/handlers/zendesk_handler/connection_args.py b/mindsdb/integrations/handlers/zendesk_handler/connection_args.py
deleted file mode 100644
index cbc791dfdb5..00000000000
--- a/mindsdb/integrations/handlers/zendesk_handler/connection_args.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- api_key={
- "type": ARG_TYPE.STR,
- "description": "API key",
- "required": True,
- "label": "api_key",
- "secret": True
- },
- sub_domain={
- "type": ARG_TYPE.STR,
- "description": "Sub-domain",
- "required": True,
- "label": "sub_domain",
- "secret": True
- },
- email={
- "type": ARG_TYPE.STR,
- "description": "Email ID",
- "required": True,
- "label": "email"
- }
-)
-
-connection_args_example = OrderedDict(
- api_key="api_key",
- sub_domain="sub_domain",
- email="email"
-)
diff --git a/mindsdb/integrations/handlers/zendesk_handler/icon.svg b/mindsdb/integrations/handlers/zendesk_handler/icon.svg
deleted file mode 100644
index 38967391a24..00000000000
--- a/mindsdb/integrations/handlers/zendesk_handler/icon.svg
+++ /dev/null
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/zendesk_handler/requirements.txt b/mindsdb/integrations/handlers/zendesk_handler/requirements.txt
deleted file mode 100644
index ad3501b591d..00000000000
--- a/mindsdb/integrations/handlers/zendesk_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-zenpy
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/zendesk_handler/zendesk_handler.py b/mindsdb/integrations/handlers/zendesk_handler/zendesk_handler.py
deleted file mode 100644
index b820f948ac8..00000000000
--- a/mindsdb/integrations/handlers/zendesk_handler/zendesk_handler.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from mindsdb_sql_parser import parse_sql
-
-from mindsdb.integrations.handlers.zendesk_handler.zendesk_tables import (
- ZendeskUsersTable,
- ZendeskTicketsTable,
- ZendeskTriggersTable,
- ZendeskActivitiesTable
-)
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
-)
-from mindsdb.utilities import log
-import zenpy
-
-logger = log.getLogger(__name__)
-
-
-class ZendeskHandler(APIHandler):
- """The Zendesk handler implementation"""
-
- def __init__(self, name: str, **kwargs):
- """Initialize the zendesk handler.
-
- Parameters
- ----------
- name : str
- name of a handler instance
- """
- super().__init__(name)
-
- connection_data = kwargs.get("connection_data", {})
- self.connection_data = connection_data
- self.kwargs = kwargs
- self.zen_client = None
- self.is_connected = False
-
- self._register_table("users", ZendeskUsersTable(self))
- self._register_table("tickets", ZendeskTicketsTable(self))
- self._register_table("triggers", ZendeskTriggersTable(self))
- self._register_table("activities", ZendeskActivitiesTable(self))
-
- def connect(self) -> StatusResponse:
- """Set up the connection required by the handler.
-
- Returns
- -------
- StatusResponse
- connection object
- """
- resp = StatusResponse(False)
- self.zen_client = zenpy.Zenpy(subdomain=self.connection_data["sub_domain"], email=self.connection_data["email"], token=self.connection_data["api_key"])
- try:
- self.zen_client.users()
- self.is_connected = True
- resp.success = True
- except Exception as ex:
- resp.success = False
- resp.error_message = str(ex)
- self.is_connected = False
- return resp
-
- def check_connection(self) -> StatusResponse:
- """Check connection to the handler.
-
- Returns
- -------
- StatusResponse
- Status confirmation
- """
- response = self.connect()
- self.is_connected = response.success
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
-
- Parameters
- ----------
- query : str
- query in a native format
-
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
diff --git a/mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py b/mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py
deleted file mode 100644
index 8487b9a940c..00000000000
--- a/mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py
+++ /dev/null
@@ -1,496 +0,0 @@
-import pandas as pd
-from typing import List
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.integrations.utilities.handlers.query_utilities import (
- SELECTQueryParser,
- SELECTQueryExecutor,
-)
-from mindsdb.utilities import log
-from mindsdb_sql_parser import ast
-import zenpy
-
-logger = log.getLogger(__name__)
-
-
-class ZendeskUsersTable(APITable):
- """Zendesk Users Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the zendesk list users API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Zendesk users
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(query, "users", self.get_columns())
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- subset_where_conditions = []
- api_filters = {}
- for op, arg1, arg2 in where_conditions:
- if arg1 in self.get_columns():
- if op != "=":
- raise NotImplementedError(f"Unknown op: {op}. Only '=' is supported.")
- api_filters[arg1] = arg2
- subset_where_conditions.append([op, arg1, arg2])
-
- result = self.handler.zen_client.users(**api_filters)
- response = []
- if isinstance(result, zenpy.lib.generator.BaseResultGenerator):
- for user in result:
- response.append(user.to_dict())
- else:
- response.append(result.to_dict())
-
- df = pd.DataFrame(response, columns=self.get_columns())
-
- select_statement_executor = SELECTQueryExecutor(
- df, selected_columns, subset_where_conditions, order_by_conditions, result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "active",
- "alias",
- "chat_only",
- "created_at",
- "custom_role_id",
- "details",
- "email",
- "external_id",
- "id",
- "last_login_at",
- "locale",
- "locale_id",
- "moderator",
- "name",
- "notes",
- "only_private_comments",
- "organization_id",
- "phone",
- "photo",
- "restricted_agent",
- "role",
- "shared",
- "shared_agent",
- "signature",
- "suspended",
- "tags",
- "ticket_restriction",
- "time_zone",
- "two_factor_auth_enabled",
- "updated_at",
- "url",
- "verified",
- "iana_time_zone",
- "shared_phone_number",
- "role_type",
- "default_group_id",
- "report_csv",
- ]
-
-
-class ZendeskTicketsTable(APITable):
- """Zendesk tickets Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the zendesk tickets API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Ticket ID Data
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(query, "tickets", self.get_columns())
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- subset_where_conditions = []
- api_filters = {}
- for op, arg1, arg2 in where_conditions:
- if arg1 in self.get_columns():
- if op != "=":
- raise NotImplementedError(f"Unknown op: {op}. Only '=' is supported.")
- api_filters[arg1] = arg2
- subset_where_conditions.append([op, arg1, arg2])
-
- result = self.handler.zen_client.tickets(**api_filters)
- response = []
- if isinstance(result, zenpy.lib.generator.BaseResultGenerator):
- for ticket in result:
- response.append(ticket.to_dict())
- else:
- response.append(result.to_dict())
-
- df = pd.DataFrame(response, columns=self.get_columns())
-
- select_statement_executor = SELECTQueryExecutor(
- df, selected_columns, subset_where_conditions, order_by_conditions, result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "assignee_id",
- "brand_id",
- "collaborator_ids",
- "created_at",
- "custom_fields",
- "description",
- "due_at",
- "external_id",
- "fields",
- "forum_topic_id",
- "group_id",
- "has_incidents",
- "id",
- "organization_id",
- "priority",
- "problem_id",
- "raw_subject",
- "recipient",
- "requester_id",
- "sharing_agreement_ids",
- "status",
- "subject",
- "submitter_id",
- "tags",
- "type",
- "updated_at",
- "url",
- "generated_timestamp",
- "follower_ids",
- "email_cc_ids",
- "is_public",
- "custom_status_id",
- "followup_ids",
- "ticket_form_id",
- "allow_channelback",
- "allow_attachments",
- "from_messaging_channel",
- "satisfaction_rating.assignee_id",
- "satisfaction_rating.created_at",
- "satisfaction_rating.group_id",
- "satisfaction_rating.id",
- "satisfaction_rating.requester_id",
- "satisfaction_rating.score",
- "satisfaction_rating.ticket_id",
- "satisfaction_rating.updated_at",
- "satisfaction_rating.url",
- "via.channel",
- "via.source.rel",
- ]
-
-
-class ZendeskTriggersTable(APITable):
- """Zendesk Triggers Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the zendesk triggers API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Trigger Data
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(query, "triggers", self.get_columns())
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- subset_where_conditions = []
- api_filters = {}
- for op, arg1, arg2 in where_conditions:
- if arg1 in self.get_columns():
- if op != "=":
- raise NotImplementedError(f"Unknown op: {op}. Only '=' is supported.")
- api_filters[arg1] = arg2
- subset_where_conditions.append([op, arg1, arg2])
-
- result = self.handler.zen_client.triggers(**api_filters)
- response = []
- if isinstance(result, zenpy.lib.generator.BaseResultGenerator):
- for trigger in result:
- response.append(trigger.to_dict())
- else:
- response.append(result.to_dict())
-
- df = pd.DataFrame(response, columns=self.get_columns())
-
- select_statement_executor = SELECTQueryExecutor(
- df, selected_columns, subset_where_conditions, order_by_conditions, result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "actions",
- "active",
- "description",
- "id",
- "position",
- "title",
- "url",
- "updated_at",
- "created_at",
- "default",
- "raw_title",
- "category_id",
- "conditions.all",
- "conditions.any",
- ]
-
-
-class ZendeskActivitiesTable(APITable):
- """Zendesk Activities Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the zendesk activities API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Activity list Data
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(query, "activities", self.get_columns())
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- subset_where_conditions = []
- api_filters = {}
- for op, arg1, arg2 in where_conditions:
- if arg1 in self.get_columns():
- if op != "=":
- raise NotImplementedError(f"Unknown op: {op}. Only '=' is supported.")
- api_filters[arg1] = arg2
- subset_where_conditions.append([op, arg1, arg2])
-
- result = self.handler.zen_client.activities(**api_filters)
- response = []
- if isinstance(result, zenpy.lib.generator.BaseResultGenerator):
- for activity in result:
- response.append(activity.to_dict())
- else:
- response.append(result.to_dict())
-
- df = pd.DataFrame(response, columns=self.get_columns())
-
- select_statement_executor = SELECTQueryExecutor(
- df, selected_columns, subset_where_conditions, order_by_conditions, result_limit
- )
-
- df = select_statement_executor.execute_query()
-
- return df
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
-
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "created_at",
- "id",
- "title",
- "updated_at",
- "url",
- "verb",
- "user_id",
- "actor_id",
- "actor.id",
- "actor.url",
- "actor.name",
- "actor.email",
- "actor.created_at",
- "actor.updated_at",
- "actor.time_zone",
- "actor.iana_time_zone",
- "actor.phone",
- "actor.shared_phone_number",
- "actor.photo",
- "actor.locale_id",
- "actor.locale",
- "actor.organization_id",
- "actor.role",
- "actor.verified",
- "actor.external_id",
- "actor.tags",
- "actor.alias",
- "actor.active",
- "actor.shared",
- "actor.shared_agent",
- "actor.last_login_at",
- "actor.two_factor_auth_enabled",
- "actor.signature",
- "actor.details",
- "actor.notes",
- "actor.role_type",
- "actor.custom_role_id",
- "actor.moderator",
- "actor.ticket_restriction",
- "actor.only_private_comments",
- "actor.restricted_agent",
- "actor.suspended",
- "actor.default_group_id",
- "actor.report_csv",
- "user.active",
- "user.alias",
- "user.chat_only",
- "user.created_at",
- "user.custom_role_id",
- "user.details",
- "user.email",
- "user.external_id",
- "user.id",
- "user.last_login_at",
- "user.locale",
- "user.locale_id",
- "user.moderator",
- "user.name",
- "user.notes",
- "user.only_private_comments",
- "user.organization_id",
- "user.phone",
- "user.photo",
- "user.restricted_agent",
- "user.role",
- "user.shared",
- "user.shared_agent",
- "user.signature",
- "user.suspended",
- "user.tags",
- "user.ticket_restriction",
- "user.time_zone",
- "user.two_factor_auth_enabled",
- "user.updated_at",
- "user.url",
- "user.verified",
- "user.iana_time_zone",
- "user.shared_phone_number",
- "user.role_type",
- "user.default_group_id",
- "user.report_csv",
- "target.active",
- "target.content_type",
- "target.created_at",
- "target.id",
- "target.method",
- "target.password",
- "target.target_url",
- "target.title",
- "target.type",
- "target.url",
- "target.username",
- "target.ticket.assignee_id",
- "target.ticket.brand_id",
- "target.ticket.collaborator_ids",
- "target.ticket.created_at",
- "target.ticket.custom_fields",
- "target.ticket.description",
- "target.ticket.due_at",
- "target.ticket.external_id",
- "target.ticket.fields",
- "target.ticket.forum_topic_id",
- "target.ticket.group_id",
- "target.ticket.has_incidents",
- "target.ticket.id",
- "target.ticket.organization_id",
- "target.ticket.priority",
- "target.ticket.problem_id",
- "target.ticket.raw_subject",
- "target.ticket.recipient",
- "target.ticket.requester_id",
- "target.ticket.satisfaction_rating",
- "target.ticket.sharing_agreement_ids",
- "target.ticket.status",
- "target.ticket.subject",
- "target.ticket.submitter_id",
- "target.ticket.tags",
- "target.ticket.type",
- "target.ticket.updated_at",
- "target.ticket.url",
- "target.ticket.via",
- ]
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/README.md b/mindsdb/integrations/handlers/zipcodebase_handler/README.md
deleted file mode 100644
index e86858edc30..00000000000
--- a/mindsdb/integrations/handlers/zipcodebase_handler/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# ZipCodeBase Handler
-
-ZipCodeBase handler for MindsDB provides interfaces to connect to ZipCodeBase via APIs and import zipcode data into MindsDB.
-
----
-
-## Table of Contents
-
-- [ZipCodeBase Handler](#zipcodebase-handler)
- - [Table of Contents](#table-of-contents)
- - [About ZipCodeBase](#about-zipcodebase)
- - [ZipCodeBase Handler Implementation](#zipcodebase-handler-implementation)
- - [ZipCodeBase Handler Initialization](#zipcodebase-handler-initialization)
- - [Implemented Features](#implemented-features)
- - [Example Usage](#example-usage)
-
----
-
-## About ZipCodeBase
-
-[Zipcodebase.com](https://zipcodebase.com/) is the perfect tool to perform postal code validation, lookups and other calculative tasks, such as postal code distance calculations. Zipcodebase offers a wide range of endpoints that give you access to any type of data you might need.
-
-## ZipCodeBase Handler Implementation
-
-This handler was implemented using the `requests` library that makes http calls to https://app.zipcodebase.com/documentation.
-
-## ZipCodeBase Handler Initialization
-
-The ZipCodeBase handler is initialized with the following parameters:
-
-- `api_key`: API Key used to authenticate with ZipCodeBase
-
-Read about creating an API Key [here](https://zipcodebase.com/).
-
-## Implemented Features
-
-- [x] ZipCodeBase
- - [x] Support SELECT
- - [x] Support LIMIT
- - [x] Support WHERE
- - [x] Support ORDER BY
- - [x] Support column selection
-
-## Example Usage
-
-The first step is to create a database with the new `zipcodebase` engine.
-
-~~~~sql
-CREATE DATABASE mindsdb_zipcodebase
-WITH ENGINE = 'zipcodebase',
-PARAMETERS = {
- "api_key": ""
-};
-~~~~
-
-Use the established connection to query your database:
-
-~~~~sql
-SELECT * FROM mindsdb_zipcodebase.code_to_location where codes="10005";
-~~~~
-
-~~~~sql
-SELECT * FROM mindsdb_zipcodebase.codes_within_radius WHERE code="10005" AND radius="100" AND country="us";
-~~~~
-
-~~~~sql
-SELECT * FROM mindsdb_zipcodebase.codes_by_city WHERE city="Amsterdam" AND country="nl";
-~~~~
-
-~~~~sql
-SELECT * FROM mindsdb_zipcodebase.codes_by_state WHERE state="Noord-Holland" AND country="nl";
-~~~~
-
-~~~~sql
-SELECT * FROM mindsdb_zipcodebase.states_by_country WHERE country="de";
-~~~~
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/__about__.py b/mindsdb/integrations/handlers/zipcodebase_handler/__about__.py
deleted file mode 100644
index ad8a4bf1127..00000000000
--- a/mindsdb/integrations/handlers/zipcodebase_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = "MindsDB ZipCodeBase handler"
-__package_name__ = "mindsdb_zipcodebase_handler"
-__version__ = "0.0.1"
-__description__ = "MindsDB handler for ZipCodeBase"
-__author__ = "Abhilash"
-__github__ = "https://github.com/mindsdb/mindsdb"
-__pypi__ = "https://pypi.org/project/mindsdb/"
-__license__ = "MIT"
-__copyright__ = "Copyright 2023 - mindsdb"
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/__init__.py b/mindsdb/integrations/handlers/zipcodebase_handler/__init__.py
deleted file mode 100644
index 4508e5ce038..00000000000
--- a/mindsdb/integrations/handlers/zipcodebase_handler/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-from .connection_args import connection_args, connection_args_example
-try:
- from .zipcodebase_handler import ZipCodeBaseHandler as Handler
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = "ZipCodeBase"
-name = "zipcodebase"
-type = HANDLER_TYPE.DATA
-icon_path = "icon.svg"
-
-__all__ = [
- "Handler",
- "version",
- "name",
- "type",
- "title",
- "description",
- "import_error",
- "icon_path",
- "connection_args_example",
- "connection_args",
-]
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/connection_args.py b/mindsdb/integrations/handlers/zipcodebase_handler/connection_args.py
deleted file mode 100644
index 672b4654b5b..00000000000
--- a/mindsdb/integrations/handlers/zipcodebase_handler/connection_args.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from collections import OrderedDict
-
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
-
-
-connection_args = OrderedDict(
- api_key={
- "type": ARG_TYPE.PWD,
- "description": "ZipCodeBase api key to use for authentication.",
- "required": True,
- "label": "Api key",
- "secret": True
- }
-)
-
-connection_args_example = OrderedDict(
- api_key=""
-)
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/icon.svg b/mindsdb/integrations/handlers/zipcodebase_handler/icon.svg
deleted file mode 100644
index 45197f7418e..00000000000
--- a/mindsdb/integrations/handlers/zipcodebase_handler/icon.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
\ No newline at end of file
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/tests/__init__.py b/mindsdb/integrations/handlers/zipcodebase_handler/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase.py b/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase.py
deleted file mode 100644
index b1298e526e4..00000000000
--- a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import requests
-
-
-class ZipCodeBaseClient:
-
- def __init__(self, api_key):
- self.api_key = api_key
- self.base_endpoint = "https://app.zipcodebase.com/api/v1"
-
- def make_request(self, url, params=None):
- headers = {'Content-type': 'application/json'}
- if self.api_key:
- headers['apikey'] = self.api_key
- resp = requests.get(url, headers=headers, params=params)
- content = {}
- if resp.status_code == 200:
- content = {'content': resp.json(), 'code': 200}
- else:
- content = {'content': {}, 'code': resp.status_code, 'error': resp.text}
- return content
-
- def code_to_location(self, codes):
- url = f'{self.base_endpoint}/search'
- params = (
- ("codes", codes),
- )
- return self.make_request(url, params)
-
- def codes_within_radius(self, code, radius, country, unit):
- url = f'{self.base_endpoint}/radius'
- params = (
- ("code", code),
- ("radius", radius),
- ("country", country),
- ("unit", unit),
- )
- return self.make_request(url, params)
-
- def codes_by_city(self, city, country, limit):
- url = f'{self.base_endpoint}/code/city'
- params = (
- ("city", city),
- ("country", country),
- ("limit", limit),
- )
- return self.make_request(url, params)
-
- def codes_by_state(self, state, country, limit):
- url = f'{self.base_endpoint}/code/state'
- params = (
- ("state_name", state),
- ("country", country),
- ("limit", limit),
- )
- return self.make_request(url, params)
-
- def states_by_country(self, country):
- url = f'{self.base_endpoint}/country/province'
- params = (
- ("country", country),
- )
- return self.make_request(url, params)
-
- def remaining_requests(self):
- url = f'{self.base_endpoint}/status'
- params = ()
- return self.make_request(url, params)
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_handler.py b/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_handler.py
deleted file mode 100644
index 1684ebca743..00000000000
--- a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_handler.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from mindsdb.integrations.handlers.zipcodebase_handler.zipcodebase_tables import (
- ZipCodeBaseCodeLocationTable,
- ZipCodeBaseCodeInRadiusTable,
- ZipCodeBaseCodeByCityTable,
- ZipCodeBaseCodeByStateTable,
- ZipCodeBaseStatesByCountryTable
-)
-from mindsdb.integrations.handlers.zipcodebase_handler.zipcodebase import ZipCodeBaseClient
-from mindsdb.integrations.libs.api_handler import APIHandler
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
-)
-
-from mindsdb.utilities import log
-from mindsdb_sql_parser import parse_sql
-
-
-logger = log.getLogger(__name__)
-
-
-class ZipCodeBaseHandler(APIHandler):
- """The ZipCodeBase handler implementation"""
-
- def __init__(self, name: str, **kwargs):
- """Initialize the ZipCodeBase handler.
- Parameters
- ----------
- name : str
- name of a handler instance
- """
- super().__init__(name)
-
- connection_data = kwargs.get("connection_data", {})
- self.connection_data = connection_data
- self.kwargs = kwargs
- self.client = ZipCodeBaseClient(self.connection_data["api_key"])
- self.is_connected = False
-
- code_to_location_data = ZipCodeBaseCodeLocationTable(self)
- self._register_table("code_to_location", code_to_location_data)
-
- codes_within_radius_data = ZipCodeBaseCodeInRadiusTable(self)
- self._register_table("codes_within_radius", codes_within_radius_data)
-
- codes_by_city_data = ZipCodeBaseCodeByCityTable(self)
- self._register_table("codes_by_city", codes_by_city_data)
-
- codes_by_state_data = ZipCodeBaseCodeByStateTable(self)
- self._register_table("codes_by_state", codes_by_state_data)
-
- states_by_country_data = ZipCodeBaseStatesByCountryTable(self)
- self._register_table("states_by_country", states_by_country_data)
-
- def connect(self) -> StatusResponse:
- """Set up the connection required by the handler.
- Returns
- -------
- StatusResponse
- connection object
- """
- resp = StatusResponse(False)
- status = self.client.remaining_requests()
- if status["code"] != 200:
- resp.success = False
- resp.error_message = status["error"]
- return resp
- self.is_connected = True
- return resp
-
- def check_connection(self) -> StatusResponse:
- """Check connection to the handler.
- Returns
- -------
- StatusResponse
- Status confirmation
- """
- response = StatusResponse(False)
-
- try:
- status = self.client.remaining_requests()
- if status["code"] == 200:
- logger.info("Authentication successful")
- response.success = True
- else:
- response.success = False
- logger.info("Error connecting to ZipCodeBase. " + status["error"])
- response.error_message = status["error"]
- except Exception as e:
- logger.error(f"Error connecting to ZipCodeBase: {e}!")
- response.error_message = e
-
- self.is_connected = response.success
- return response
-
- def native_query(self, query: str) -> StatusResponse:
- """Receive and process a raw query.
- Parameters
- ----------
- query : str
- query in a native format
- Returns
- -------
- StatusResponse
- Request status
- """
- ast = parse_sql(query)
- return self.query(ast)
diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_tables.py b/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_tables.py
deleted file mode 100644
index 6e9edc8ee89..00000000000
--- a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_tables.py
+++ /dev/null
@@ -1,499 +0,0 @@
-import pandas as pd
-from typing import List
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor
-from mindsdb.utilities import log
-from mindsdb_sql_parser import ast
-
-logger = log.getLogger(__name__)
-
-
-class ZipCodeBaseCodeLocationTable(APITable):
- """The ZipCodeBase Location Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the https://app.zipcodebase.com/documentation#search API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- Location of the codes matching the query
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'code_to_location',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
-
- for op, arg1, arg2 in where_conditions:
- if arg1 == "codes":
- if op == '=':
- search_params["codes"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for codes column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = "codes" in search_params
-
- if not filter_flag:
- raise NotImplementedError("`codes` column has to be present in where clause.")
-
- code_to_location_df = pd.DataFrame(columns=self.get_columns())
-
- response = self.handler.client.code_to_location(search_params.get("codes"))
-
- self.check_res(res=response)
-
- content = response["content"]
-
- code_to_location_df = pd.json_normalize(self.clean_resp(content["results"]))
-
- select_statement_executor = SELECTQueryExecutor(
- code_to_location_df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- code_to_location_df = select_statement_executor.execute_query()
-
- return code_to_location_df
-
- def clean_resp(self, data):
- clean_data = []
- for k, v in data.items():
- clean_data.extend(v)
- return clean_data
-
- def check_res(self, res):
- if res["code"] != 200:
- raise Exception("Error fetching results - " + res["error"])
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "postal_code",
- "country_code",
- "latitude",
- "longitude",
- "city",
- "state",
- "city_en",
- "state_en",
- "state_code",
- "province",
- "province_code"
- ]
-
-
-class ZipCodeBaseCodeInRadiusTable(APITable):
- """The ZipCodeBase Codes within Radius Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the https://app.zipcodebase.com/documentation#radius API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- codes within the radius
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'codes_within_radius',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
-
- for op, arg1, arg2 in where_conditions:
- if arg1 == "code":
- if op == '=':
- search_params["code"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for code column.")
-
- if arg1 == "radius":
- if op == '=':
- search_params["radius"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for radius column.")
-
- if arg1 == "country":
- if op == '=':
- search_params["country"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for country column.")
-
- if arg1 == "unit":
- if op == '=':
- search_params["unit"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for unit column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("code" in search_params) and ("radius" in search_params) and ("country" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("`codes`, `radius` and `country` columns have to be present in where clause.")
-
- code_to_location_df = pd.DataFrame(columns=self.get_columns())
-
- response = self.handler.client.codes_within_radius(search_params.get("code"), search_params.get("radius"), search_params.get("country"), search_params.get("unit", "km"))
-
- self.check_res(res=response)
-
- content = response["content"]
-
- logger.info(f"response size - {len(content['results'])}")
- code_to_location_df = pd.json_normalize(content["results"])
-
- select_statement_executor = SELECTQueryExecutor(
- code_to_location_df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- code_to_location_df = select_statement_executor.execute_query()
-
- return code_to_location_df
-
- def check_res(self, res):
- if res["code"] != 200:
- raise Exception("Error fetching results - " + res["error"])
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "code",
- "city",
- "state",
- "city_en",
- "state_en",
- "distance"
- ]
-
-
-class ZipCodeBaseCodeByCityTable(APITable):
- """The ZipCodeBase Codes within a City Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the https://app.zipcodebase.com/documentation#city API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- codes within the city
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'codes_by_city',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
-
- for op, arg1, arg2 in where_conditions:
- if arg1 == "city":
- if op == '=':
- search_params["city"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for city column.")
-
- if arg1 == "country":
- if op == '=':
- search_params["country"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for country column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("city" in search_params) and ("country" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("`city` and `country` columns have to be present in where clause.")
-
- codes_by_city_df = pd.DataFrame(columns=self.get_columns())
-
- response = self.handler.client.codes_by_city(search_params.get("city"), search_params.get("country"))
-
- self.check_res(res=response)
-
- content = response["content"]
-
- logger.info(f"response size - {len(content['results'])}")
- codes_by_city_df = pd.json_normalize({"codes": content["results"]})
-
- select_statement_executor = SELECTQueryExecutor(
- codes_by_city_df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- codes_by_city_df = select_statement_executor.execute_query()
-
- return codes_by_city_df
-
- def check_res(self, res):
- if res["code"] != 200:
- raise Exception("Error fetching results - " + res["error"])
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "codes"
- ]
-
-
-class ZipCodeBaseCodeByStateTable(APITable):
- """The ZipCodeBase Codes within a State Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the https://app.zipcodebase.com/documentation#state API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- codes within the state
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'codes_by_state',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
-
- for op, arg1, arg2 in where_conditions:
- if arg1 == "state":
- if op == '=':
- search_params["state"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for state column.")
-
- if arg1 == "country":
- if op == '=':
- search_params["country"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for country column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("state" in search_params) and ("country" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("`state` and `country` columns have to be present in where clause.")
-
- codes_by_state_df = pd.DataFrame(columns=self.get_columns())
-
- response = self.handler.client.codes_by_state(search_params.get("state"), search_params.get("country"))
-
- self.check_res(res=response)
-
- content = response["content"]
-
- logger.info(f"response size - {len(content['results'])}")
- codes_by_state_df = pd.json_normalize({"codes": content["results"]})
-
- select_statement_executor = SELECTQueryExecutor(
- codes_by_state_df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- codes_by_state_df = select_statement_executor.execute_query()
-
- return codes_by_state_df
-
- def check_res(self, res):
- if res["code"] != 200:
- raise Exception("Error fetching results - " + res["error"])
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "codes"
- ]
-
-
-class ZipCodeBaseStatesByCountryTable(APITable):
- """The ZipCodeBase Provinces/states within a country Table implementation"""
-
- def select(self, query: ast.Select) -> pd.DataFrame:
- """Pulls data from the https://app.zipcodebase.com/documentation#provinces API
-
- Parameters
- ----------
- query : ast.Select
- Given SQL SELECT query
-
- Returns
- -------
- pd.DataFrame
- states within a country
-
- Raises
- ------
- ValueError
- If the query contains an unsupported condition
- """
-
- select_statement_parser = SELECTQueryParser(
- query,
- 'states_by_country',
- self.get_columns()
- )
-
- selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
-
- search_params = {}
- subset_where_conditions = []
-
- for op, arg1, arg2 in where_conditions:
-
- if arg1 == "country":
- if op == '=':
- search_params["country"] = arg2
- else:
- raise NotImplementedError("Only '=' operator is supported for country column.")
-
- elif arg1 in self.get_columns():
- subset_where_conditions.append([op, arg1, arg2])
-
- filter_flag = ("country" in search_params)
-
- if not filter_flag:
- raise NotImplementedError("`country` column has to be present in where clause.")
-
- states_by_country_df = pd.DataFrame(columns=self.get_columns())
-
- response = self.handler.client.states_by_country(search_params.get("country"))
-
- self.check_res(res=response)
-
- content = response["content"]
-
- logger.info(f"response size - {len(content['results'])}")
- states_by_country_df = pd.json_normalize({"states": content["results"]})
-
- select_statement_executor = SELECTQueryExecutor(
- states_by_country_df,
- selected_columns,
- subset_where_conditions,
- order_by_conditions,
- result_limit
- )
-
- states_by_country_df = select_statement_executor.execute_query()
-
- return states_by_country_df
-
- def check_res(self, res):
- if res["code"] != 200:
- raise Exception("Error fetching results - " + res["error"])
-
- def get_columns(self) -> List[str]:
- """Gets all columns to be returned in pandas DataFrame responses
- Returns
- -------
- List[str]
- List of columns
- """
-
- return [
- "states"
- ]
diff --git a/mindsdb/integrations/handlers/zotero_handler/README.md b/mindsdb/integrations/handlers/zotero_handler/README.md
deleted file mode 100644
index c49534976f1..00000000000
--- a/mindsdb/integrations/handlers/zotero_handler/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# Build your own Zotero AI agent
-
-This is the implementation of the Zotero handler for MindsDB.
-
-## Zotero
-[Zotero](https://www.zotero.org/) is a free tool for organizing and managing research materials. It lets you store articles, books, and other resources in one place, annotate them, take notes, and create collections for organization. You can also share your collections with others.
-
-## Implementation
-This handler uses [pyzotero](https://pyzotero.readthedocs.io/en/latest/) , an API wrapper for Zotero as a simple library to facilitate the integration.
-
-The required arguments to establish a connection are,
-- `library_id` :
-To find your library ID, as noted on [pyzotero](https://pyzotero.readthedocs.io/en/latest/) :
- - Your personal library ID is available [here](https://www.zotero.org/settings/keys), in the section "Your userID for use in API calls" - you must be logged in for the link to work.
- - For group libraries, the ID can be found by opening the groupβs page: https://www.zotero.org/groups/groupname, and hovering over the group settings link. The ID is the integer after /groups/
-- `library_type` : "user" for personal account or "group" for a group library
-- `api_key` :
-To find your api key, as noted on [pyzotero](https://pyzotero.readthedocs.io/en/latest/) : follow [this link](https://www.zotero.org/settings/keys/new)
-
-## Usage
-In order to make use of this handler and connect to Zotero in MindsDB, the following syntax can be used,
-
-```sql
-CREATE DATABASE mindsdb_zotero
-WITH ENGINE = 'zotero',
-PARAMETERS = {
- "library_id": "",
- "library_type": "user",
- "api_key": "" }
-```
-
-## Implemented Features
-
-Now, you can use this established connection to query your table as follows:
-
-Important note: Most things (articles, books, annotations, notes etc.) are refered to as **items** in the pyzotero API. So each item has an item_type (such as "annotation") and an item_id. Some items are children to other items (for example an annotation can be a child-item on the article where it was made on which is the parent-item)
-
-### Annotations
-Annotations are the highlighted text of research materials. So, the most important information is in that annotated text content. To get that content and metadata about the annotations we can use the following queries.
-
-To select all annotations of your library:
-
-```sql
-SELECT * FROM mindsdb_zotero.annotations
-```
-
-To select all data for one annotation by its id:
-```sql
-SELECT * FROM mindsdb_zotero.annotations WHERE item_id = ""
-```
-
-To select all annotations of a parent item (like all annotations on a book):
-```sql
-SELECT * FROM mindsdb_zotero.annotations WHERE parent_item_id = ""
-```
-
-
diff --git a/mindsdb/integrations/handlers/zotero_handler/__about__.py b/mindsdb/integrations/handlers/zotero_handler/__about__.py
deleted file mode 100644
index 3f9f15648e9..00000000000
--- a/mindsdb/integrations/handlers/zotero_handler/__about__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-__title__ = 'MindsDB Zotero handler'
-__package_name__ = 'mindsdb_zotero_handler'
-__version__ = '0.0.1'
-__description__ = "MindsDB handler for Zotero"
-__author__ = 'Elina Kapetanaki'
-__github__ = 'https://github.com/mindsdb/mindsdb'
-__pypi__ = 'https://pypi.org/project/mindsdb/'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2024 - mindsdb'
diff --git a/mindsdb/integrations/handlers/zotero_handler/__init__.py b/mindsdb/integrations/handlers/zotero_handler/__init__.py
deleted file mode 100644
index ace936d806f..00000000000
--- a/mindsdb/integrations/handlers/zotero_handler/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from mindsdb.integrations.libs.const import HANDLER_TYPE
-
-from .__about__ import __version__ as version, __description__ as description
-try:
- from .zotero_handler import (
- ZoteroHandler as Handler
- )
- import_error = None
-except Exception as e:
- Handler = None
- import_error = e
-
-title = 'Zotero'
-name = 'zotero'
-type = HANDLER_TYPE.DATA
-icon_path = 'icon.svg'
-
-__all__ = [
- 'Handler', 'version', 'name', 'type', 'title', 'description',
- 'import_error', 'icon_path'
-]
diff --git a/mindsdb/integrations/handlers/zotero_handler/icon.svg b/mindsdb/integrations/handlers/zotero_handler/icon.svg
deleted file mode 100644
index 8a11ff428fa..00000000000
--- a/mindsdb/integrations/handlers/zotero_handler/icon.svg
+++ /dev/null
@@ -1,102 +0,0 @@
-
-
-
diff --git a/mindsdb/integrations/handlers/zotero_handler/requirements.txt b/mindsdb/integrations/handlers/zotero_handler/requirements.txt
deleted file mode 100644
index 3f3fa7b9ec9..00000000000
--- a/mindsdb/integrations/handlers/zotero_handler/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pyzotero
diff --git a/mindsdb/integrations/handlers/zotero_handler/zotero_handler.py b/mindsdb/integrations/handlers/zotero_handler/zotero_handler.py
deleted file mode 100644
index c85f3df2dd9..00000000000
--- a/mindsdb/integrations/handlers/zotero_handler/zotero_handler.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-from pyzotero import zotero
-from mindsdb.utilities import log
-from mindsdb.utilities.config import Config
-from mindsdb.integrations.libs.api_handler import APIHandler, FuncParser
-
-from mindsdb.integrations.handlers.zotero_handler.zotero_tables import AnnotationsTable
-
-from mindsdb.integrations.libs.response import (
- HandlerStatusResponse as StatusResponse,
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
-
-logger = log.getLogger(__name__)
-
-
-class ZoteroHandler(APIHandler):
- """Handles communication with the Zotero API."""
-
- def __init__(self, name=None, **kwargs):
- """Initialize the Zotero handler.
-
- Parameters
- ----------
- name : str
- Name of the handler instance.
-
- Other Parameters
- ----------------
- connection_data : dict
- Dictionary containing connection data such as 'library_id', 'library_type', and 'api_key'.
- If not provided, will attempt to fetch from environment variables or configuration file.
- """
- super().__init__(name)
- self.connection_args = self._get_connection_args(kwargs.get('connection_data', {}))
- self.is_connected = False
- self.api = None
- self._register_table('annotations', AnnotationsTable(self))
-
- def _get_connection_args(self, args):
- """Fetch connection arguments from parameters, environment variables, or configuration.
-
- Parameters
- ----------
- args
- Dictionary containing connection data.
-
- Returns
- -------
- connection_args
- Connection data list
- """
- handler_config = Config().get('zotero_handler', {})
- connection_args = {}
- for k in ['library_id', 'library_type', 'api_key']:
- connection_args[k] = args.get(k) or os.getenv(f'ZOTERO_{k.upper()}') or handler_config.get(k)
- return connection_args
-
- def connect(self) -> StatusResponse:
- """Connect to the Zotero API.
-
- Returns
- -------
- StatusResponse
- Status of the connection attempt.
- """
- if not self.is_connected:
- self.api = zotero.Zotero(
- self.connection_args['library_id'],
- self.connection_args['library_type'],
- self.connection_args['api_key']
- )
- self.is_connected = True
- return StatusResponse(True)
-
- def check_connection(self) -> StatusResponse:
- """Check the connection status to the Zotero API.
-
- Returns
- -------
- StatusResponse
- Status of the connection.
- """
- try:
- self.connect()
- return StatusResponse(True)
- except Exception as e:
- error_message = f'Error connecting to Zotero API: {str(e)}. Check credentials.'
- logger.error(error_message)
- self.is_connected = False
- return StatusResponse(False, error_message=error_message)
-
- def native_query(self, query_string: str = None):
- """Execute a native query against the Zotero API.
-
- Parameters
- ----------
- query_string : str
- The query string to execute, formatted as required by the Zotero API.
-
- Returns
- -------
- Response
- Response object containing the result of the query.
- """
- method_name, params = FuncParser().from_string(query_string)
- df = self._call_find_annotations_zotero_api(method_name, params)
- return Response(RESPONSE_TYPE.TABLE, data_frame=df)
diff --git a/mindsdb/integrations/handlers/zotero_handler/zotero_tables.py b/mindsdb/integrations/handlers/zotero_handler/zotero_tables.py
deleted file mode 100644
index 3c6b5b5f6a0..00000000000
--- a/mindsdb/integrations/handlers/zotero_handler/zotero_tables.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import pandas as pd
-from mindsdb.utilities import log
-from mindsdb_sql_parser import ast
-from mindsdb.integrations.libs.api_handler import APITable
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
-
-logger = log.getLogger(__name__)
-
-
-class AnnotationsTable(APITable):
- """Represents a table of annotations in Zotero."""
-
- def select(self, query: ast.Select):
- """Select annotations based on the provided query.
-
- Parameters
- ----------
- query : ast.Select
- AST (Abstract Syntax Tree) representation of the SQL query.
-
- Returns
- -------
- Response
- Response object containing the selected annotations as a DataFrame.
- """
- if query.where is None: # Handle case for SELECT * FROM annotations
- df = self._get_items()
- return df[self.get_columns()]
-
- conditions = extract_comparison_conditions(query.where)
- supported = False # Flag to check if the query is supported
-
- for op, arg1, arg2 in conditions:
- if op in {'or', 'and'}:
- raise NotImplementedError('OR and AND are not supported')
- if arg1 == 'item_id' and op == '=':
- df = self._get_item(arg2)
- supported = True
- elif arg1 == 'parent_item_id' and op == '=':
- df = self._get_item_children(arg2)
- supported = True
-
- if not supported:
- raise NotImplementedError('Only "item_id=" and "parent_item_id=" conditions are implemented')
-
- return df[self.get_columns()]
-
- def get_columns(self):
- """Get the columns of the annotations table.
-
- Returns
- -------
- list
- List of column names.
- """
- return [
- 'annotationColor',
- 'annotationComment',
- 'annotationPageLabel',
- 'annotationText',
- 'annotationType',
- 'dateAdded',
- 'dateModified',
- 'key',
- 'parentItem',
- 'relations',
- 'tags',
- 'version'
- ]
-
- def _get_items(self) -> pd.DataFrame:
- """Get all annotations from the Zotero API.
-
- Returns
- -------
- pd.DataFrame
- DataFrame containing all annotations.
- """
- if not self.handler.is_connected:
- self.handler.connect()
-
- try:
- method = getattr(self.handler.api, 'items')
- result = method(itemType='annotation')
-
- if isinstance(result, dict):
- return pd.DataFrame([result.get('data', {})])
- if isinstance(result, list) and all(isinstance(item, dict) for item in result):
- data_list = [item.get('data', {}) for item in result]
- return pd.DataFrame(data_list)
-
- except Exception as e:
- logger.error(f"Error fetching items: {e}")
- raise e
-
- return pd.DataFrame()
-
- def _get_item(self, item_id: str) -> pd.DataFrame:
- """Get a single annotation by item ID.
-
- Parameters
- ----------
- item_id : str
- The ID of the item to fetch.
-
- Returns
- -------
- pd.DataFrame
- DataFrame containing the annotation.
- """
- if not self.handler.is_connected:
- self.handler.connect()
-
- try:
- method = getattr(self.handler.api, 'item')
- result = method(item_id, itemType='annotation')
-
- if isinstance(result, dict):
- return pd.DataFrame([result.get('data', {})])
-
- except Exception as e:
- logger.error(f"Error fetching item with ID {item_id}: {e}")
- raise e
-
- return pd.DataFrame()
-
- def _get_item_children(self, parent_item_id: str) -> pd.DataFrame:
- """Get annotations for a specific parent item ID.
-
- Parameters
- ----------
- parent_item_id : str
- The parent item ID to fetch annotations for.
-
- Returns
- -------
- pd.DataFrame
- DataFrame containing the annotations.
- """
- if not self.handler.is_connected:
- self.handler.connect()
-
- try:
- method = getattr(self.handler.api, 'children')
- result = method(parent_item_id, itemType='annotation')
-
- if isinstance(result, dict):
- return pd.DataFrame([result.get('data', {})])
- if isinstance(result, list) and all(isinstance(item, dict) for item in result):
- data_list = [item.get('data', {}) for item in result]
- return pd.DataFrame(data_list)
-
- except Exception as e:
- logger.error(f"Error fetching children for parent item ID {parent_item_id}: {e}")
- raise e
-
- return pd.DataFrame()
diff --git a/mindsdb/integrations/libs/base.py b/mindsdb/integrations/libs/base.py
index 9f7dbe618ff..2757b7ba594 100644
--- a/mindsdb/integrations/libs/base.py
+++ b/mindsdb/integrations/libs/base.py
@@ -1,15 +1,23 @@
import ast
import concurrent.futures
+import functools
import inspect
import textwrap
from _ast import AnnAssign, AugAssign
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, get_type_hints, get_args, Union, get_origin
import pandas as pd
from mindsdb_sql_parser.ast.base import ASTNode
from mindsdb.utilities import log
-from mindsdb.integrations.libs.response import HandlerResponse, HandlerStatusResponse, RESPONSE_TYPE
+from mindsdb.integrations.libs.response import (
+ HandlerStatusResponse,
+ RESPONSE_TYPE,
+ DataHandlerResponse,
+ normalize_response,
+ ErrorResponse,
+ TableResponse,
+)
logger = log.getLogger(__name__)
@@ -21,6 +29,59 @@ class BaseHandler:
broader MindsDB ecosystem via SQL commands.
"""
+ stream_response = False
+
+ def __init_subclass__(cls, **kwargs):
+ """Automatically wrap handler methods to normalize their responses.
+
+ When a subclass is defined, this method checks if any of the methods
+ in _methods_to_normalize are overridden and wraps them to convert
+ legacy HandlerResponse to new response types (TableResponse, OkResponse,
+ ErrorResponse).
+ """
+ super().__init_subclass__(**kwargs)
+
+ # Methods whose return values should be normalized to new response types
+ _methods_to_normalize = (
+ "native_query",
+ "query",
+ "insert",
+ "get_tables",
+ "get_columns",
+ "meta_get_tables",
+ "meta_get_columns",
+ "meta_get_column_statistics",
+ "meta_get_column_statistics_for_table",
+ "meta_get_primary_keys",
+ "meta_get_foreign_keys",
+ )
+ for method_name in _methods_to_normalize:
+ # Only wrap if method is defined directly in this class (not inherited)
+ if method_name not in cls.__dict__:
+ continue
+
+ original_method = cls.__dict__[method_name]
+
+ return_type = get_type_hints(original_method).get("return")
+ if return_type is DataHandlerResponse or (
+ get_origin(return_type) is Union and issubclass(get_args(return_type)[0], DataHandlerResponse)
+ ):
+ # this is already new style response
+ continue
+
+ # Skip if already wrapped
+ if getattr(original_method, "_response_normalized", False):
+ continue
+
+ # Create wrapper that normalizes response
+ @functools.wraps(original_method)
+ def wrapper(self, *args, _orig=original_method, **kwargs):
+ result = _orig(self, *args, **kwargs)
+ return normalize_response(result)
+
+ wrapper._response_normalized = True
+ setattr(cls, method_name, wrapper)
+
def __init__(self, name: str):
"""constructor
Args:
@@ -53,19 +114,19 @@ def check_connection(self) -> HandlerStatusResponse:
"""
raise NotImplementedError()
- def native_query(self, query: Any) -> HandlerResponse:
+ def native_query(self, query: Any, stream: bool = False, **kwargs) -> DataHandlerResponse:
"""Receive raw query and act upon it somehow.
Args:
- query (Any): query in native format (str for sql databases,
- etc)
-
+ query (Any): query in native format (str for sql databases, etc)
+ stream (bool): Whether to stream the results of the query
+ **kwargs: Additional keyword arguments.
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
raise NotImplementedError()
- def query(self, query: ASTNode) -> HandlerResponse:
+ def query(self, query: ASTNode) -> DataHandlerResponse:
"""Receive query as AST (abstract syntax tree) and act upon it somehow.
Args:
@@ -73,30 +134,30 @@ def query(self, query: ASTNode) -> HandlerResponse:
of query: SELECT, INSERT, DELETE, etc
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
raise NotImplementedError()
- def get_tables(self) -> HandlerResponse:
+ def get_tables(self) -> DataHandlerResponse:
"""Return list of entities
Return list of entities that will be accesible as tables.
Returns:
- HandlerResponse: shoud have same columns as information_schema.tables
+ DataHandlerResponse: shoud have same columns as information_schema.tables
(https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html)
Column 'TABLE_NAME' is mandatory, other is optional.
"""
raise NotImplementedError()
- def get_columns(self, table_name: str) -> HandlerResponse:
+ def get_columns(self, table_name: str) -> DataHandlerResponse:
"""Returns a list of entity columns
Args:
table_name (str): name of one of tables returned by self.get_tables()
Returns:
- HandlerResponse: shoud have same columns as information_schema.columns
+ DataHandlerResponse: shoud have same columns as information_schema.columns
(https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html)
Column 'COLUMN_NAME' is mandatory, other is optional. Hightly
recomended to define also 'DATA_TYPE': it should be one of
@@ -125,12 +186,12 @@ class MetaDatabaseHandler(DatabaseHandler):
def __init__(self, name: str):
super().__init__(name)
- def meta_get_tables(self, table_names: Optional[List[str]]) -> HandlerResponse:
+ def meta_get_tables(self, table_names: Optional[List[str]]) -> DataHandlerResponse:
"""
Returns metadata information about the tables to be stored in the data catalog.
Returns:
- HandlerResponse: The response should consist of the following columns:
+ DataHandlerResponse: The response should consist of the following columns:
- TABLE_NAME (str): Name of the table.
- TABLE_TYPE (str): Type of the table, e.g. 'BASE TABLE', 'VIEW', etc. (optional).
- TABLE_SCHEMA (str): Schema of the table (optional).
@@ -139,12 +200,12 @@ def meta_get_tables(self, table_names: Optional[List[str]]) -> HandlerResponse:
"""
raise NotImplementedError()
- def meta_get_columns(self, table_names: Optional[List[str]]) -> HandlerResponse:
+ def meta_get_columns(self, table_names: Optional[List[str]]) -> DataHandlerResponse:
"""
Returns metadata information about the columns in the tables to be stored in the data catalog.
Returns:
- HandlerResponse: The response should consist of the following columns:
+ DataHandlerResponse: The response should consist of the following columns:
- TABLE_NAME (str): Name of the table.
- COLUMN_NAME (str): Name of the column.
- DATA_TYPE (str): Data type of the column, e.g. 'VARCHAR', 'INT', etc.
@@ -154,13 +215,13 @@ def meta_get_columns(self, table_names: Optional[List[str]]) -> HandlerResponse:
"""
raise NotImplementedError()
- def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> HandlerResponse:
+ def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> DataHandlerResponse:
"""
Returns metadata statisical information about the columns in the tables to be stored in the data catalog.
Either this method should be overridden in the handler or `meta_get_column_statistics_for_table` should be implemented.
Returns:
- HandlerResponse: The response should consist of the following columns:
+ DataHandlerResponse: The response should consist of the following columns:
- TABLE_NAME (str): Name of the table.
- COLUMN_NAME (str): Name of the column.
- MOST_COMMON_VALUES (List[str]): Most common values in the column (optional).
@@ -207,17 +268,14 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> Handle
if not results:
logger.warning("No column statistics could be retrieved for the specified tables.")
- return HandlerResponse(RESPONSE_TYPE.ERROR, error_message="No column statistics could be retrieved.")
- return HandlerResponse(
- RESPONSE_TYPE.TABLE, pd.concat(results, ignore_index=True) if results else pd.DataFrame()
- )
-
+ return ErrorResponse(error_message="No column statistics could be retrieved.")
+ return TableResponse(data=pd.concat(results, ignore_index=True) if results else pd.DataFrame())
else:
raise NotImplementedError()
def meta_get_column_statistics_for_table(
self, table_name: str, column_names: Optional[List[str]] = None
- ) -> HandlerResponse:
+ ) -> DataHandlerResponse:
"""
Returns metadata statistical information about the columns in a specific table to be stored in the data catalog.
Either this method should be implemented in the handler or `meta_get_column_statistics` should be overridden.
@@ -227,7 +285,7 @@ def meta_get_column_statistics_for_table(
column_names (Optional[List[str]]): List of column names to retrieve statistics for. If None, statistics for all columns will be returned.
Returns:
- HandlerResponse: The response should consist of the following columns:
+ DataHandlerResponse: The response should consist of the following columns:
- TABLE_NAME (str): Name of the table.
- COLUMN_NAME (str): Name of the column.
- MOST_COMMON_VALUES (List[str]): Most common values in the column (optional).
@@ -239,12 +297,12 @@ def meta_get_column_statistics_for_table(
"""
pass
- def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> HandlerResponse:
+ def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> DataHandlerResponse:
"""
Returns metadata information about the primary keys in the tables to be stored in the data catalog.
Returns:
- HandlerResponse: The response should consist of the following columns:
+ DataHandlerResponse: The response should consist of the following columns:
- TABLE_NAME (str): Name of the table.
- COLUMN_NAME (str): Name of the column that is part of the primary key.
- ORDINAL_POSITION (int): Position of the column in the primary key (optional).
@@ -252,12 +310,12 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> HandlerResp
"""
raise NotImplementedError()
- def meta_get_foreign_keys(self, table_names: Optional[List[str]]) -> HandlerResponse:
+ def meta_get_foreign_keys(self, table_names: Optional[List[str]]) -> DataHandlerResponse:
"""
Returns metadata information about the foreign keys in the tables to be stored in the data catalog.
Returns:
- HandlerResponse: The response should consist of the following columns:
+ DataHandlerResponse: The response should consist of the following columns:
- PARENT_TABLE_NAME (str): Name of the parent table.
- PARENT_COLUMN_NAME (str): Name of the parent column that is part of the foreign key.
- CHILD_TABLE_NAME (str): Name of the child table.
diff --git a/mindsdb/integrations/libs/const.py b/mindsdb/integrations/libs/const.py
index 0e5ccc23c32..01749c4ce0a 100644
--- a/mindsdb/integrations/libs/const.py
+++ b/mindsdb/integrations/libs/const.py
@@ -16,6 +16,7 @@ class HANDLER_CONNECTION_ARG_TYPE:
PATH = "path"
DICT = "dict"
PWD = "pwd"
+ LIST = "list"
HANDLER_CONNECTION_ARG_TYPE = HANDLER_CONNECTION_ARG_TYPE()
diff --git a/mindsdb/integrations/libs/keyword_search_base.py b/mindsdb/integrations/libs/keyword_search_base.py
index 6a1cfdd9b80..d515764ba2a 100644
--- a/mindsdb/integrations/libs/keyword_search_base.py
+++ b/mindsdb/integrations/libs/keyword_search_base.py
@@ -36,6 +36,6 @@ def keyword_select(
conditions (List[FilterCondition]): conditions to select
Returns:
- HandlerResponse
+ pd.DataFrame
"""
raise NotImplementedError()
diff --git a/mindsdb/integrations/libs/llm/utils.py b/mindsdb/integrations/libs/llm/utils.py
index dcf80dd425a..da01454142e 100644
--- a/mindsdb/integrations/libs/llm/utils.py
+++ b/mindsdb/integrations/libs/llm/utils.py
@@ -1,8 +1,5 @@
import re
-import json
-import itertools
-from enum import Enum
-from typing import Optional, Dict, List, Tuple
+from typing import Dict, List, Tuple
import numpy as np
import pandas as pd
@@ -20,23 +17,6 @@
BedrockConfig,
)
from mindsdb.utilities.config import config
-from mindsdb.integrations.utilities.rag.splitters.custom_splitters import RecursiveCharacterTextSplitter
-
-
-class Language(Enum):
- PYTHON = "python"
- JAVASCRIPT = "javascript"
- TYPESCRIPT = "typescript"
- JAVA = "java"
- CPP = "cpp"
- C = "c"
- GO = "go"
- RUST = "rust"
- RUBY = "ruby"
- PHP = "php"
- SWIFT = "swift"
- KOTLIN = "kotlin"
- SCALA = "scala"
# Default to latest GPT-4 model (https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo)
@@ -256,357 +236,3 @@ def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig:
)
raise ValueError(f"Provider {provider} is not supported.")
-
-
-def ft_jsonl_validation(
- items: list, # read from a JSONL file
- messages_col: str = "messages",
- # valid keys for each chat message
- role_key: str = "role",
- content_key: str = "content",
- name_key: str = "name",
- # valid roles for each chat message
- system_key: str = "system",
- user_key: str = "user",
- assistant_key: str = "assistant",
-):
- """
- This helper checks a list of dictionaries for compliance with the format usually expected by LLM providers
- (such as OpenAI or AnyscaleEndpoints) for fine-tuning LLMs that generate chat completions.
-
- Defaults are set according to the expected format, but these can be changed if needed by any given provider.
-
- :param items: list of JSON lines, each dictionary containing a chat sequence. Should be read from a JSONL file.
- :param messages_col: key in each dictionary to access a sequence of chat messages
-
-
- For chat-level checks, this method defers to `ft_chat_format_validation()` below. Relevant parameters for it are:
-
- For each chat:
- :param role_key: key that defines the role of each message (e.g. system, user, or LLM)
- :param content_key: key that defines the content of each message
- :param name_key: key that defines the name of each message
-
- For each message:
- :param system_key: valid role for each chat message
- :param user_key: valid role for each chat message
- :param assistant_key: valid role for each chat message
-
- :return: None, raises an Exception if validation fails.
- """ # noqa
- try:
- if not all([isinstance(m, dict) for m in items]):
- raise Exception("Each line in the provided data should be a dictionary")
-
- for line_num, batch in enumerate(items):
- prefix = f"error in chat #{line_num + 1}, "
-
- if not isinstance(batch[messages_col], list):
- raise Exception(
- f"{prefix}Each line in the provided data should have a '{messages_col}' key with a list of messages"
- ) # noqa
-
- if messages_col not in batch:
- raise Exception(f"{prefix}Each line in the provided data should have a '{messages_col}' key")
-
- messages = batch[messages_col]
- try:
- ft_chat_format_validation(
- messages,
- role_key=role_key,
- content_key=content_key,
- name_key=name_key,
- system_key=system_key,
- user_key=user_key,
- assistant_key=assistant_key,
- )
- except Exception as e:
- raise Exception(f"{prefix}{e}") from e
-
- except Exception as e:
- raise Exception(f"Fine-tuning data format is not valid. Got {e}") from e
-
-
-def ft_chat_format_validation(
- chat: list,
- transitions: Optional[Dict] = None,
- system_key: str = "system",
- user_key: str = "user",
- assistant_key: str = "assistant",
- role_key: str = "role",
- content_key: str = "content",
- name_key: str = "name",
-):
- """
- Finite state machine to check a chat has valid format to finetune an LLM with it.
- Follows OpenAI ChatCompletion format (also used by other providers such as AnyscaleEndpoints).
- Reference: https://cookbook.openai.com/examples/chat_finetuning_data_prep
-
- The unit test in `test_llm_utils.py` for examples of valid and invalid chats.
-
- :param chat: list of dictionaries, each containing a chat message
- :param transitions: optional dictionary defining valid transitions between chat messages (e.g. from system to user to assistant)
-
- For each chat:
- :param role_key: key that defines the role of each message (e.g. system, user, or LLM)
- :param content_key: key that defines the content of each message
- :param name_key: key that defines the name of each message
-
- For each message:
- :param system_key: valid role for each chat message
- :param user_key: valid role for each chat message
- :param assistant_key: valid role for each chat message
-
- :return: None if chat is valid, otherwise raise an informative Exception.
- """ # noqa
-
- valid_keys = (role_key, content_key, name_key)
- valid_roles = (system_key, user_key, assistant_key)
-
- for c in chat:
- if any(k not in valid_keys for k in c.keys()):
- raise Exception(f"Each message should only have these keys: `{valid_keys}`. Found: `{c.keys()}`")
-
- roles = [m[role_key] for m in chat]
- contents = [m[content_key] for m in chat]
-
- if len(roles) != len(contents):
- raise Exception(f"Each message should contain both `{role_key}` and `{content_key}` fields")
-
- if len(roles) == 0:
- raise Exception("Chat should have at least one message")
-
- if assistant_key not in roles:
- raise Exception("Chat should have at least one assistant message") # otherwise it is useless for FT
-
- if user_key not in roles:
- raise Exception("Chat should have at least one user message") # perhaps remove in the future
-
- # set default transitions for finite state machine if undefined
- if transitions is None:
- transitions = {
- None: [system_key, user_key],
- system_key: [user_key],
- user_key: [assistant_key],
- assistant_key: [user_key],
- }
-
- # check order is valid via finite state machine
- state = None
- for i, (role, content) in enumerate(zip(roles, contents)):
- prefix = f"message #{i + 1}: "
-
- # check invalid roles
- if role not in valid_roles:
- raise Exception(f"{prefix}Invalid role (found `{role}`, expected one of `{valid_roles}`)")
-
- # check content
- if not isinstance(content, str):
- raise Exception(f"{prefix}Content should be a string, got type `{type(content)}`")
-
- # check transition
- if role not in transitions[state]:
- raise Exception(f"{prefix}Invalid transition from `{state}` to `{role}`")
- else:
- state = role
-
-
-def ft_formatter(df: pd.DataFrame) -> List[Dict]:
- """
- Data preparation entry point for chat LLM finetuning. This method will dispatch to the appropriate formatters.
-
- Supported formats:
- - code: long tabular format with a `code` column
- - chat: long tabular format with `role` and `content` columns, or a JSON format with a `chat_json` column.
- """
- if "code" in df.columns:
- df = ft_code_formatter(df)
-
- elif {"question", "context", "answer"}.issubset(set(df.columns)):
- # TODO: handler user-specified names for these columns
- df = ft_cqa_formatter(df)
-
- return ft_chat_formatter(df)
-
-
-def ft_chat_formatter(df: pd.DataFrame) -> List[Dict]:
- """
- For more details, check `FineTuning -> Data Format` in the Anyscale API reference, or the OpenAI equivalent.
- Additionally, the unit test in `test_llm_utils.py` provides example usage.
-
- :param df: input dataframe has chats in one of the following formats:
- 1) long tabular: at least two columns, `role` and `content`. Rows contain >= 1 chats in long (stacked) format.
-
- 2) JSON: at least one column, `chat_json`. Each row contains exactly 1 chat in JSON format.
- Example for `chat_json` content:
- > `{"messages": [{"role": "user", "content": "Hello!"}, {"role": "assistant", "content": "Hi!"}]}`
-
- Optional df columns are:
- - chat_id: unique identifier for each chat
- - message_id: unique identifier for each message within each chat
-
- Data will be sorted by both if they are provided.
-
- If only `chat_id` is provided, data will be sorted by it with a stable sort, so messages for each chat
- will be in the same order as in the original data.
-
- If only `message_id` is provided, it must not contain duplicate IDs. Entire dataset will be treated
- as a single chat. Otherwise an exception will be raised.
-
- :return: list of chats. Each chat is a dictionary with a top level key 'messages' containing a list of messages
- that comply with the OpenAI's ChatEndpoint expected format (i.e., each is a dictionary with a `role` and
- `content` key.
-
- """ # noqa
- # 1. pre-sort df on optional columns
- if "chat_id" in df.columns:
- if "message_id" in df.columns:
- df = df.sort_values(["chat_id", "message_id"])
- else:
- df = df.sort_values(["chat_id"], kind="stable")
- elif "message_id" in df.columns:
- if df["message_id"].duplicated().any():
- raise Exception("If `message_id` is provided, it must not contain duplicate IDs.")
- df = df.sort_values(["message_id"])
-
- # 2. build chats
- chats = []
-
- # 2a. chats are in JSON format
- if "chat_json" in df.columns:
- for _, row in df.iterrows():
- try:
- chat = json.loads(row["chat_json"])
- assert list(chat.keys()) == ["messages"], "Each chat should have a 'messages' key, and nothing else."
- ft_chat_format_validation(chat["messages"]) # will raise Exception if chat is invalid
- chats.append(chat)
- except json.JSONDecodeError:
- pass # TODO: add logger info here, prompt user to clean dataset carefully
-
- # 2b. chats are in tabular format - aggregate each chat sequence into one row
- else:
- chat = []
- for i, row in df.iterrows():
- if row["role"] == "system" and len(chat) > 0:
- ft_chat_format_validation(chat) # will raise Exception if chat is invalid
- chats.append({"messages": chat})
- chat = []
- event = {"role": row["role"], "content": row["content"]}
- chat.append(event)
-
- ft_chat_format_validation(chat) # will raise Exception if chat is invalid
- chats.append({"messages": chat})
-
- return chats
-
-
-def ft_code_formatter(
- df: pd.DataFrame,
- format="chat",
- language="python",
- chunk_size=100,
- chunk_overlap=0,
- chat_sections=("Code prefix", "Code suffix", "Completion"),
- fim_tokens=("
", "", ""),
-) -> pd.DataFrame:
- """
- This utility processes a raw codebase stored as a dataframe with a `code` column, where
- every row may be an entire file or some portion of it.
- It chunks code into triples made of a prefix, middle, and suffix.
-
- Depending on the target LLM, these triples are then formatted into a chat-like prompt, or a
- fill-in-the-middle (FIM) prompt. The latter is used for fine-tuning models like codellama,
- while the former is more generic and should work with any LLM that supports the ChatCompletion
- format, as the rest of our tools do.
- """
-
- # input and setup validation
- assert len(df) > 0, "Input dataframe should not be empty"
- assert "code" in df.columns, "Input dataframe should have a 'code' column"
- assert chunk_size > 0 and isinstance(chunk_size, int), "`chunk_size` should be a positive integer"
-
- supported_formats = ["chat", "fim"]
- supported_langs = [e.value for e in Language]
- assert language.lower() in supported_langs, f"Invalid language. Valid choices are: {supported_langs}"
-
- # ensure correct encoding
- df["code"] = df["code"].map(lambda x: x.encode("utf8").decode("unicode_escape"))
-
- # set prompt templates
- system_prompt = "You are a powerful text to code model. Your job is to provide great code completions. As context, you are given code that is found immediately before and after the code you must generate.\n\nYou must output the code that should go in between the prefix and suffix.\n\n"
- if format == "chat":
- templates = [f"### {c}:" for c in chat_sections]
- elif format == "fim":
- templates = fim_tokens
- else:
- raise Exception(f"Invalid format. Please choose one of {supported_formats}")
-
- # split code into chunks
- # Get language enum value (handle both string and enum)
- lang_enum = getattr(Language, language.upper(), language)
- code_splitter = RecursiveCharacterTextSplitter.from_language(
- language=lang_enum,
- chunk_size=3 * chunk_size, # each triplet element has `chunk_size`
- chunk_overlap=chunk_overlap, # some overlap here is fine
- )
- chunk_docs = code_splitter.create_documents(list(df["code"]))
- chunks = [c.page_content for c in chunk_docs]
-
- # split each chunk into a triplet, with no overlap
- triplet_splitter = RecursiveCharacterTextSplitter.from_language(
- language=lang_enum,
- chunk_size=chunk_size,
- chunk_overlap=0, # no overlap admitted, otherwise context may leak into answer
- )
- triplet_chunk_docs = triplet_splitter.create_documents(chunks)
- chunks = [c.page_content for c in triplet_chunk_docs]
- chunks = chunks[: len(chunks) - len(chunks) % 3] # should be a multiple of 3
-
- # format chunks into prompts
- roles = []
- contents = []
- for idx in range(0, len(chunks), 3):
- pre, mid, suf = chunks[idx : idx + 3]
- interleaved = list(itertools.chain(*zip(templates, (pre, suf, mid))))
- user = "\n".join(interleaved[:-1])
- assistant = "\n".join(interleaved[-1:])
- roles.extend(["system", "user", "assistant"])
- contents.extend([system_prompt, user, assistant])
-
- # return formatted prompts in a dataframe to be processed by `ft_chat_formatter()`
- df = pd.DataFrame({"role": roles, "content": contents})
- return df
-
-
-def ft_cqa_formatter(
- df: pd.DataFrame,
- question_col="question",
- answer_col="answer",
- instruction_col="instruction",
- context_col="context",
- default_instruction="You are a helpful assistant.",
- default_context="",
-) -> pd.DataFrame:
- # input and setup validation
- assert len(df) > 0, "Input dataframe should not be empty"
- assert {question_col, answer_col}.issubset(set(df.columns)), (
- f"Input dataframe must have columns `{question_col}`, and `{answer_col}`"
- ) # noqa
-
- if instruction_col not in df.columns:
- df[instruction_col] = default_instruction
-
- if context_col not in df.columns:
- df[context_col] = default_context
-
- # format data into chat-like prompts
- roles = []
- contents = []
- for i, row in df.iterrows():
- system = "\n".join([row[instruction_col], row[context_col]])
- user = row[question_col]
- assistant = row[answer_col]
- roles.extend(["system", "user", "assistant"])
- contents.extend([system, user, assistant])
-
- return pd.DataFrame({"role": roles, "content": contents})
diff --git a/mindsdb/integrations/libs/ml_exec_base.py b/mindsdb/integrations/libs/ml_exec_base.py
index 96eca4a033a..abac27d75de 100644
--- a/mindsdb/integrations/libs/ml_exec_base.py
+++ b/mindsdb/integrations/libs/ml_exec_base.py
@@ -7,7 +7,7 @@
normally associated with a DB handler (e.g. `native_query`, `get_tables`), as well as other ML-specific behaviors,
like `learn()` or `predict()`. Note that while these still have to be implemented at the engine level, the burden
on that class is lesser given that it only needs to return a pandas DataFrame. It's this class that will take said
- output and format it into the HandlerResponse instance that MindsDB core expects.
+ output and format it into the DataHandlerResponse instance that MindsDB core expects.
- `learn_process` method: handles async dispatch of the `learn` method in an engine, as well as registering all
models inside of the internal MindsDB registry.
diff --git a/mindsdb/integrations/libs/passthrough.py b/mindsdb/integrations/libs/passthrough.py
new file mode 100644
index 00000000000..f535d3dfeb8
--- /dev/null
+++ b/mindsdb/integrations/libs/passthrough.py
@@ -0,0 +1,477 @@
+"""
+PassthroughMixin β generic HTTP passthrough for authenticated REST APIs.
+
+A handler opts in by declaring three class attributes:
+
+ class MyHandler(APIHandler, PassthroughMixin):
+ _bearer_token_arg = "api_key" # key in connection_data
+ _base_url_default = "https://api.example.com" # fallback if user omits
+ _test_request = PassthroughRequest("GET", "/me")
+
+The mixin defaults to ``Authorization: Bearer ``. Handlers using a
+different auth scheme (e.g. Shopify's ``X-Shopify-Access-Token``) override
+``_auth_header_name`` and ``_auth_header_format`` β see CHANGE 3.
+
+The mixin reads ``self.connection_data`` (a dict populated from
+integration setup) to pull the token, resolve the base URL, and enforce
+the host allowlist. Handlers that need custom URL composition (e.g.
+``http://{host}:{port}``) override ``_build_base_url``.
+
+``PassthroughProtocol`` is a structural type describing the two public
+methods (``api_passthrough`` and ``test_passthrough``). The HTTP layer
+checks against the protocol rather than the mixin class, so a handler
+can satisfy the contract without inheriting the default implementation.
+"""
+
+import ipaddress
+import os
+import time
+from typing import Any, Protocol, runtime_checkable
+from urllib.parse import urlparse
+
+import requests
+
+from mindsdb.integrations.libs.passthrough_types import (
+ ALLOWED_METHODS,
+ FORBIDDEN_REQUEST_HEADERS,
+ HOP_BY_HOP_RESPONSE_HEADERS,
+ HostNotAllowedError,
+ PassthroughConfigError,
+ PassthroughRequest,
+ PassthroughResponse,
+ PassthroughValidationError,
+)
+from mindsdb.utilities import log
+
+logger = log.getLogger(__name__)
+
+
+PASSTHROUGH_TIMEOUT_SECONDS = int(os.getenv("MINDSDB_PASSTHROUGH_TIMEOUT_SECONDS", "30"))
+PASSTHROUGH_MAX_REQUEST_BYTES = int(os.getenv("MINDSDB_PASSTHROUGH_MAX_REQUEST_BYTES", str(1 * 1024 * 1024)))
+PASSTHROUGH_MAX_RESPONSE_BYTES = int(os.getenv("MINDSDB_PASSTHROUGH_MAX_RESPONSE_BYTES", str(10 * 1024 * 1024)))
+
+REDACTED_SENTINEL = "[REDACTED_API_KEY]"
+
+
+def _is_private_host(hostname: str) -> bool:
+ """Return True if `hostname` resolves to a private/loopback/link-local IP literal.
+
+ Only IP literals are checked; DNS resolution is intentionally not performed
+ (handlers may legitimately point at an internal DNS name the operator has
+ approved via `allowed_hosts`). The IP-literal check prevents a caller from
+ smuggling `http://127.0.0.1/` or `http://10.0.0.1/` through a typo'd base_url.
+ """
+ try:
+ ip = ipaddress.ip_address(hostname)
+ except ValueError:
+ return False
+ return ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_multicast or ip.is_reserved
+
+
+def _host_matches(host: str, allowlist: list[str]) -> bool:
+ if not host:
+ return False
+ host = host.lower()
+ return any(host == entry.lower() for entry in allowlist)
+
+
+@runtime_checkable
+class PassthroughProtocol(Protocol):
+ """Structural contract for handlers that expose HTTP passthrough.
+
+ The HTTP namespace checks against this Protocol rather than the
+ `PassthroughMixin` class, which lets future handlers satisfy the
+ contract without inheriting the default implementation.
+ """
+
+ def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse: ...
+
+ def test_passthrough(self) -> dict[str, Any]: ...
+
+
+class PassthroughMixin:
+ # Required overrides
+ _bearer_token_arg: str = ""
+
+ # Optional overrides
+ _base_url_arg: str = "base_url"
+ _base_url_default: str | None = None
+ _allowed_hosts_arg: str = "allowed_hosts"
+ _default_headers_arg: str = "default_headers"
+
+ # Auth header. Defaults to bearer-compatible; handlers using a custom
+ # scheme (e.g. Shopify's `X-Shopify-Access-Token: `) override
+ # both attrs. The value from `_get_bearer_token()` is formatted into
+ # `{token}` β the method name is retained for backwards compat but
+ # now represents "the auth secret" regardless of scheme.
+ _auth_header_name: str = "Authorization"
+ _auth_header_format: str = "Bearer {token}"
+
+ # Declarative auth mode surfaced to /capabilities. One handler instance
+ # has exactly one auth mode, so this is a single string; the API
+ # response still wraps it in a list because a future contract may
+ # surface handlers supporting multiple configurations. Known values:
+ # "bearer", "custom", "oauth_refresh". Handlers that use a non-bearer
+ # header scheme or a refresh-aware mixin should set this explicitly β
+ # don't infer it from _auth_header_format, since OAuth-refresh also
+ # uses "Bearer {token}" but is a distinct mode.
+ _auth_mode: str = "bearer"
+
+ # Canonical sanity-check request for `test_passthrough()`. Handlers MUST
+ # set this if they want the /passthrough/test endpoint to do anything
+ # useful. `None` means "test endpoint returns 'not implemented'".
+ _test_request: PassthroughRequest | None = None
+
+ # Stamped on every upstream request so the upstream can identify our
+ # traffic for support/debugging. See design Β§13 (q3).
+ _upstream_marker_header: str = "X-Minds-Passthrough"
+
+ # Hook: override when URL composition is more than "take a string"
+ # (e.g. strapi composes from host+port).
+ def _build_base_url(self) -> str | None:
+ data = self._get_connection_data()
+ value = data.get(self._base_url_arg) if self._base_url_arg else None
+ if value:
+ return str(value).rstrip("/")
+ if self._base_url_default is not None:
+ return self._base_url_default.rstrip("/")
+ return None
+
+ def _get_connection_data(self) -> dict[str, Any]:
+ """Return the handler's stored connection_data dict.
+
+ Handlers store this differently; we check the common attribute names
+ so most handlers don't need to override.
+ """
+ for attr in ("connection_data", "_connection_data"):
+ value = getattr(self, attr, None)
+ if isinstance(value, dict):
+ return value
+ return {}
+
+ def _get_bearer_token(self) -> str:
+ if not self._bearer_token_arg:
+ raise PassthroughConfigError("handler did not declare _bearer_token_arg")
+ token = self._get_connection_data().get(self._bearer_token_arg)
+ if not token:
+ raise PassthroughConfigError(f"bearer token ('{self._bearer_token_arg}') is missing from connection_data")
+ return str(token)
+
+ def _resolve_url(self, path: str) -> tuple[str, str]:
+ """Return ``(url, hostname)`` for the outgoing request.
+
+ `path` is appended to the base URL verbatim. After joining we parse
+ the result and compare the hostname against the allowlist β path
+ injection tricks like ``@evil.com`` or ``//evil.com`` are rejected
+ at the hostname-comparison step, not by string matching.
+ """
+ if not path.startswith("/"):
+ raise PassthroughValidationError("path must start with '/'")
+ base = self._build_base_url()
+ if not base:
+ raise PassthroughConfigError("base_url is not configured for this datasource")
+
+ url = f"{base}{path}"
+ parsed = urlparse(url)
+ if parsed.scheme not in ("http", "https") or not parsed.hostname:
+ raise PassthroughValidationError(f"resolved URL is not valid: {url}")
+ return url, parsed.hostname
+
+ def _allowed_hosts(self, default_host: str) -> list[str]:
+ data = self._get_connection_data()
+ allowed = data.get(self._allowed_hosts_arg)
+ if isinstance(allowed, list) and allowed:
+ return [str(h) for h in allowed]
+ return [default_host]
+
+ def _check_host_allowed(self, hostname: str) -> None:
+ allowlist = self._allowed_hosts(hostname)
+ if allowlist == ["*"]:
+ return
+ if not _host_matches(hostname, allowlist):
+ raise HostNotAllowedError(f"host '{hostname}' is not in the datasource allowlist")
+ if _is_private_host(hostname):
+ raise HostNotAllowedError(
+ f"host '{hostname}' resolves to a private/loopback address; "
+ "set allowed_hosts=['*'] to bypass this check (explicit "
+ "listing is ignored for private IPs)"
+ )
+
+ def _build_outgoing_headers(self, caller_headers: dict[str, str], bearer: str) -> dict[str, str]:
+ """Merge caller headers (filtered) + default_headers + Authorization."""
+ out: dict[str, str] = {}
+ data = self._get_connection_data()
+ defaults = data.get(self._default_headers_arg) or {}
+ if isinstance(defaults, dict):
+ out.update({str(k): str(v) for k, v in defaults.items()})
+ for name, value in (caller_headers or {}).items():
+ if name.lower() in FORBIDDEN_REQUEST_HEADERS:
+ continue
+ if name.lower().startswith("proxy-"):
+ continue
+ out[name] = value
+ out[self._auth_header_name] = self._auth_header_format.format(token=bearer)
+ out[self._upstream_marker_header] = "1"
+ return out
+
+ def _secrets_for_scrub(self) -> list[str]:
+ """Values that must not appear in the response returned to the caller."""
+ secrets: list[str] = []
+ try:
+ secrets.append(self._get_bearer_token())
+ except PassthroughConfigError:
+ pass
+ data = self._get_connection_data()
+ defaults = data.get(self._default_headers_arg) or {}
+ if isinstance(defaults, dict):
+ for value in defaults.values():
+ s = str(value)
+ if len(s) >= 16:
+ secrets.append(s)
+ return secrets
+
+ def _scrub(self, text: str, secrets: list[str]) -> str:
+ for s in secrets:
+ if s:
+ text = text.replace(s, REDACTED_SENTINEL)
+ return text
+
+ def _scrub_bytes(self, data: bytes, secrets: list[str]) -> bytes:
+ """Byte-level secret scrub (spec Β§7.6).
+
+ Replacing on raw bytes before decoding prevents U+FFFD substitutions
+ from `errors="replace"` from fragmenting a secret and letting part of
+ it survive the scrub.
+ """
+ sentinel = REDACTED_SENTINEL.encode("utf-8")
+ for s in secrets:
+ if s:
+ data = data.replace(s.encode("utf-8"), sentinel)
+ return data
+
+ def _filter_response_headers(self, headers: dict[str, str], secrets: list[str]) -> dict[str, str]:
+ filtered: dict[str, str] = {}
+ for name, value in headers.items():
+ if name.lower() in HOP_BY_HOP_RESPONSE_HEADERS:
+ continue
+ filtered[name] = self._scrub(str(value), secrets)
+ return filtered
+
+ def _read_capped_body(self, response: requests.Response) -> bytes:
+ """Read the response body in chunks, abort if it exceeds the cap."""
+ chunks: list[bytes] = []
+ total = 0
+ try:
+ for chunk in response.iter_content(chunk_size=64 * 1024):
+ if not chunk:
+ continue
+ total += len(chunk)
+ if total > PASSTHROUGH_MAX_RESPONSE_BYTES:
+ raise PassthroughValidationError(f"response body exceeded {PASSTHROUGH_MAX_RESPONSE_BYTES} bytes")
+ chunks.append(chunk)
+ finally:
+ response.close()
+ return b"".join(chunks)
+
+ # ------------------------------------------------------------------
+ # Public API
+ # ------------------------------------------------------------------
+
+ def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse:
+ method = (req.method or "").upper()
+ if method not in ALLOWED_METHODS:
+ raise PassthroughValidationError(f"method '{req.method}' is not allowed")
+
+ connection_data = self._get_connection_data()
+ allowed_methods_cfg = connection_data.get("allowed_methods")
+ if allowed_methods_cfg is not None:
+ if not isinstance(allowed_methods_cfg, list):
+ raise PassthroughConfigError("'allowed_methods' must be a list of HTTP method strings")
+ if not all(isinstance(m, str) for m in allowed_methods_cfg):
+ raise PassthroughConfigError("'allowed_methods' must be a list of HTTP method strings")
+ allowed_upper = {m.upper() for m in allowed_methods_cfg}
+ unknown = sorted(allowed_upper - ALLOWED_METHODS)
+ if unknown:
+ raise PassthroughConfigError(
+ f"'allowed_methods' contains unsupported verbs: {unknown}. Allowed: {sorted(ALLOWED_METHODS)}"
+ )
+ if method not in allowed_upper:
+ raise PassthroughValidationError(
+ f"method '{method}' is not permitted by this datasource",
+ error_code="method_not_allowed",
+ http_status=405,
+ )
+
+ request_bytes = 0
+ if req.body is not None:
+ # requests will serialize dict bodies to JSON; we cap on the
+ # serialized length. For raw strings / bytes we cap directly.
+ import json as _json
+
+ if isinstance(req.body, (dict, list)):
+ body_bytes_for_size = _json.dumps(req.body).encode("utf-8")
+ elif isinstance(req.body, (bytes, bytearray)):
+ body_bytes_for_size = bytes(req.body)
+ else:
+ body_bytes_for_size = str(req.body).encode("utf-8")
+ if len(body_bytes_for_size) > PASSTHROUGH_MAX_REQUEST_BYTES:
+ raise PassthroughValidationError(f"request body exceeded {PASSTHROUGH_MAX_REQUEST_BYTES} bytes")
+ request_bytes = len(body_bytes_for_size)
+
+ url, hostname = self._resolve_url(req.path)
+ self._check_host_allowed(hostname)
+ bearer = self._get_bearer_token()
+ headers = self._build_outgoing_headers(req.headers or {}, bearer)
+
+ request_kwargs: dict[str, Any] = {
+ "headers": headers,
+ "params": req.query or None,
+ "timeout": PASSTHROUGH_TIMEOUT_SECONDS,
+ "stream": True,
+ }
+ if req.body is not None:
+ if isinstance(req.body, (dict, list)):
+ request_kwargs["json"] = req.body
+ else:
+ request_kwargs["data"] = req.body
+
+ datasource_name = getattr(self, "name", None) or "?"
+ start = time.monotonic()
+ response = requests.request(method, url, **request_kwargs)
+ body_bytes = self._read_capped_body(response)
+ duration_ms = int((time.monotonic() - start) * 1000)
+
+ secrets = self._secrets_for_scrub()
+ body_bytes = self._scrub_bytes(body_bytes, secrets)
+ content_type = response.headers.get("Content-Type", "") or ""
+ out_headers = self._filter_response_headers(dict(response.headers), secrets)
+
+ body: Any
+ if "application/json" in content_type.lower():
+ try:
+ text = body_bytes.decode("utf-8", errors="replace")
+ import json as _json
+
+ body = _json.loads(text) if text else None
+ except ValueError:
+ body = body_bytes.decode("utf-8", errors="replace")
+ else:
+ body = body_bytes.decode("utf-8", errors="replace")
+
+ self._log_passthrough_call(
+ method=method,
+ path=req.path,
+ datasource_name=datasource_name,
+ upstream_status_code=response.status_code,
+ request_bytes=request_bytes,
+ response_bytes=len(body_bytes),
+ duration_ms=duration_ms,
+ )
+
+ return PassthroughResponse(
+ status_code=response.status_code,
+ headers=out_headers,
+ body=body,
+ content_type=content_type.split(";", 1)[0].strip() or None,
+ )
+
+ def _log_passthrough_call(
+ self,
+ *,
+ method: str,
+ path: str,
+ datasource_name: str,
+ upstream_status_code: int,
+ request_bytes: int,
+ response_bytes: int,
+ duration_ms: int,
+ ) -> None:
+ """Emit one audit line per passthrough call (spec Β§7.8).
+
+ Never logs headers or bodies. user_id / org_id are pulled from the
+ MindsDB request context when available; in test/dev invocations
+ where the context is not populated, they are omitted.
+ """
+ fields: dict[str, Any] = {
+ "method": method,
+ "path": path,
+ "datasource_name": datasource_name,
+ "upstream_status_code": upstream_status_code,
+ "request_bytes": request_bytes,
+ "response_bytes": response_bytes,
+ "duration_ms": duration_ms,
+ }
+ # TODO: org_id lives in Minds; when the passthrough is called via the
+ # Minds gateway the org scope should be propagated and logged here.
+ try:
+ from mindsdb.utilities.context import context as _ctx
+
+ user_id = getattr(_ctx, "user_id", None)
+ company_id = getattr(_ctx, "company_id", None)
+ if user_id is not None:
+ fields["user_id"] = user_id
+ if company_id is not None:
+ fields["company_id"] = company_id
+ except Exception:
+ pass
+ # DEBUG level per team decision: per-request audit logging at
+ # info level happens in Minds at the HTTP layer. This log is
+ # intended for mindsdb-side troubleshooting only.
+ logger.debug("passthrough %s", fields)
+
+ def test_passthrough(self) -> dict[str, Any]:
+ """Run the handler's canonical sanity-check call (see Β§6.1a).
+
+ Returns a structured dict the HTTP layer forwards to the caller:
+ { "ok": bool, "status_code": int?, "host": str?, "latency_ms": int?,
+ "error_code": str?, "message": str? }
+ """
+ if self._test_request is None:
+ return {
+ "ok": False,
+ "error_code": "not_implemented",
+ "message": "this handler does not define a passthrough test request",
+ }
+
+ start = time.monotonic()
+ try:
+ resp = self.api_passthrough(self._test_request)
+ except HostNotAllowedError as e:
+ return {"ok": False, "error_code": e.error_code, "message": str(e)}
+ except PassthroughConfigError as e:
+ return {"ok": False, "error_code": e.error_code, "message": str(e)}
+ except PassthroughValidationError as e:
+ return {"ok": False, "error_code": e.error_code, "message": str(e)}
+ except requests.exceptions.Timeout as e:
+ return {"ok": False, "error_code": "timeout", "message": str(e)}
+ except requests.exceptions.ConnectionError as e:
+ return {"ok": False, "error_code": "network", "message": str(e)}
+ except Exception as e: # noqa: BLE001
+ logger.exception("passthrough test failed unexpectedly")
+ return {"ok": False, "error_code": "unknown", "message": str(e)}
+
+ latency_ms = int((time.monotonic() - start) * 1000)
+ try:
+ _, host = self._resolve_url(self._test_request.path)
+ except Exception:
+ host = None
+
+ if 200 <= resp.status_code < 300:
+ return {"ok": True, "status_code": resp.status_code, "host": host, "latency_ms": latency_ms}
+ if resp.status_code in (401, 403):
+ return {
+ "ok": False,
+ "status_code": resp.status_code,
+ "host": host,
+ "latency_ms": latency_ms,
+ "error_code": "auth_failed",
+ "message": "upstream rejected credentials; base URL and allowlist look correct",
+ }
+ return {
+ "ok": False,
+ "status_code": resp.status_code,
+ "host": host,
+ "latency_ms": latency_ms,
+ "error_code": "upstream_error",
+ "message": f"upstream returned {resp.status_code}",
+ }
diff --git a/mindsdb/integrations/libs/passthrough_types.py b/mindsdb/integrations/libs/passthrough_types.py
new file mode 100644
index 00000000000..63d1cb14524
--- /dev/null
+++ b/mindsdb/integrations/libs/passthrough_types.py
@@ -0,0 +1,94 @@
+"""
+Request/response dataclasses and error types for the REST passthrough path.
+
+These are the payloads exchanged between the HTTP layer and
+`PassthroughMixin`. They are intentionally framework-agnostic so the
+mixin can be unit-tested without Flask.
+"""
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+ALLOWED_METHODS = frozenset({"GET", "POST", "PUT", "PATCH", "DELETE"})
+
+# Hop-by-hop and auth-related headers that must never come from the caller.
+FORBIDDEN_REQUEST_HEADERS = frozenset(
+ h.lower()
+ for h in (
+ "authorization",
+ "host",
+ "cookie",
+ "content-length",
+ "connection",
+ )
+)
+
+# Hop-by-hop response headers stripped before returning to the caller.
+HOP_BY_HOP_RESPONSE_HEADERS = frozenset(
+ h.lower()
+ for h in (
+ "connection",
+ "keep-alive",
+ "proxy-authenticate",
+ "proxy-authorization",
+ "te",
+ "trailers",
+ "transfer-encoding",
+ "upgrade",
+ "content-length",
+ )
+)
+
+
+@dataclass
+class PassthroughRequest:
+ method: str
+ path: str
+ query: dict[str, Any] = field(default_factory=dict)
+ headers: dict[str, str] = field(default_factory=dict)
+ body: Any = None
+
+
+@dataclass
+class PassthroughResponse:
+ status_code: int
+ headers: dict[str, str]
+ body: Any
+ content_type: str | None = None
+
+
+class PassthroughError(Exception):
+ """Base class for passthrough failures that should not be leaked as 500s."""
+
+ error_code: str = "passthrough_error"
+ http_status: int = 400
+
+ def __init__(self, message: str, *, error_code: str | None = None, http_status: int | None = None):
+ super().__init__(message)
+ if error_code is not None:
+ self.error_code = error_code
+ if http_status is not None:
+ self.http_status = http_status
+
+
+class PassthroughConfigError(PassthroughError):
+ error_code = "config_error"
+ http_status = 500
+
+
+class HostNotAllowedError(PassthroughError):
+ error_code = "host_not_allowed"
+ http_status = 400
+
+
+class PassthroughValidationError(PassthroughError):
+ error_code = "invalid_request"
+ http_status = 400
+
+
+class PassthroughNotSupportedError(PassthroughError):
+ """Raised when a handler does not implement the mixin."""
+
+ error_code = "passthrough_not_supported"
+ http_status = 501
diff --git a/mindsdb/integrations/libs/response.py b/mindsdb/integrations/libs/response.py
index aa39ce4c2c6..3af33b444fa 100644
--- a/mindsdb/integrations/libs/response.py
+++ b/mindsdb/integrations/libs/response.py
@@ -1,14 +1,17 @@
import sys
-from typing import Callable
+from abc import ABC
+from typing import Callable, Generator, ClassVar
from dataclasses import dataclass, fields
import numpy
import pandas
+import psutil
from mindsdb.utilities import log
from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
from mindsdb_sql_parser.ast import ASTNode
+from mindsdb.utilities.types.column import Column
logger = log.getLogger(__name__)
@@ -40,7 +43,464 @@ class _INFORMATION_SCHEMA_COLUMNS_NAMES:
INF_SCHEMA_COLUMNS_NAMES_SET = set(f.name for f in fields(INF_SCHEMA_COLUMNS_NAMES))
+class HandlerStatusResponse:
+ def __init__(
+ self,
+ success: bool = True,
+ error_message: str = None,
+ redirect_url: str = None,
+ copy_storage: str = None,
+ ) -> None:
+ self.success = success
+ self.error_message = error_message
+ self.redirect_url = redirect_url
+ self.copy_storage = copy_storage
+
+ def to_json(self):
+ data = {"success": self.success, "error": self.error_message}
+ if self.redirect_url is not None:
+ data["redirect_url"] = self.redirect_url
+ if self.copy_storage is not None:
+ data["copy_storage"] = self.copy_storage
+ return data
+
+ def __repr__(self):
+ return (
+ f"{self.__class__.__name__}("
+ f"success={self.success}, "
+ f"error={self.error_message}, "
+ f"redirect_url={self.redirect_url}, "
+ f"copy_storage={self.copy_storage})"
+ )
+
+
+class DataHandlerResponse(ABC):
+ """Base class for all data handler responses."""
+
+ type: ClassVar[str]
+
+ @property
+ def resp_type(self):
+ # For back compatibility with old code, use the type attribute instead of resp_type
+ return self.type
+
+
+class ErrorResponse(DataHandlerResponse):
+ """Response for error cases.
+
+ Attributes:
+ type: RESPONSE_TYPE.ERROR
+ error_code: int
+ error_message: str | None
+ is_expected_error: bool
+ exception: Exception | None
+ """
+
+ type: ClassVar[str] = RESPONSE_TYPE.ERROR
+ error_code: int
+ error_message: str | None
+ is_expected_error: bool
+ exception: Exception | None
+
+ def __init__(self, error_code: int = 0, error_message: str | None = None, is_expected_error: bool = False):
+ self.error_code = error_code
+ self.error_message = error_message
+ self.is_expected_error = is_expected_error
+ self.exception = None
+ current_exception = sys.exc_info()
+ if current_exception[0] is not None:
+ self.exception = current_exception[1]
+
+ def to_columns_table_response(self, map_type_fn: Callable) -> None:
+ raise ValueError(
+ f"Cannot convert {self.type} to {RESPONSE_TYPE.COLUMNS_TABLE}, the error is: {self.error_message}"
+ )
+
+
+class OkResponse(DataHandlerResponse):
+ """Response for successful cases without data (e.g. CREATE TABLE, DROP TABLE, etc.).
+
+ Attributes:
+ type: RESPONSE_TYPE.OK
+ affected_rows: int - how many rows were affected by the query
+ """
+
+ type: ClassVar[str] = RESPONSE_TYPE.OK
+ affected_rows: int
+
+ def __init__(self, affected_rows: int = None):
+ self.affected_rows = affected_rows
+
+
+def _safe_pandas_concat(pieces: list[pandas.DataFrame]) -> pandas.DataFrame:
+ """Safely concatenates multiple pandas DataFrames while checking available memory.
+ If the estimated memory required for concatenation (with a safety multiplier of 2.5x)
+ exceeds the available memory, it raises a MemoryError.
+
+ Args:
+ pieces (list[pandas.DataFrame]): A list of pandas DataFrames to concatenate.
+
+ Returns:
+ pandas.DataFrame: The concatenated DataFrame.
+
+ Raises:
+ MemoryError: If there is insufficient memory to perform the concatenation safely.
+ """
+ if len(pieces) == 1:
+ return pieces[0]
+ available_memory_kb = psutil.virtual_memory().available >> 10
+ pieces_size_kb = sum([(x.memory_usage(index=True, deep=True).sum() >> 10) for x in pieces])
+ if (pieces_size_kb * 2.5) > available_memory_kb:
+ raise MemoryError()
+ return pandas.concat(pieces)
+
+
+class TableResponse(DataHandlerResponse):
+ """Response for successful cases with data (e.g. SELECT, SHOW, etc.).
+
+ Attributes:
+ type: RESPONSE_TYPE.TABLE | RESPONSE_TYPE.COLUMNS_TABLE - type of data in the response
+ affected_rows: int | None - how many rows were affected by the query
+ data_generator: Generator[pandas.DataFrame, None, None] | None - generator of data for lazy loading
+ _columns: list[Column] | None - list of columns
+ _data: pandas.DataFrame | None - loaded data
+ _fetched: bool - if data was already fetched (data_generator is consumed)
+ _invalid: bool - if data has already been fetched and cannot be iterated over
+ _last_data_piece: pandas.DataFrame | None - last data piece fetched
+ rows_fetched: int - how many rows were fetched
+ """
+
+ type: str
+ affected_rows: int | None
+ _data_generator: Generator[pandas.DataFrame, None, None] | None
+ _columns: list[Column] | None
+ _data: pandas.DataFrame | None
+ _fetched: bool
+ _invalid: bool
+ _last_data_piece: pandas.DataFrame | None
+ rows_fetched: int
+
+ def __init__(
+ self,
+ data: pandas.DataFrame | None = None,
+ data_generator: Generator[pandas.DataFrame, None, None] | None = None,
+ affected_rows: int | None = None,
+ columns: list[Column] = None,
+ ):
+ """
+ Either data and/or data_generator must be provided.
+ Args:
+ data (pandas.DataFrame): initial data
+ data_generator (Generator[pandas.DataFrame, None, None]): generator of data
+ affected_rows (int): total data rowcount - can be None depending on the handler
+ NOTE: name affected_rows for compatibility with OKResponse
+ columns (list[Column]): list of columns
+ """
+ self.type = RESPONSE_TYPE.TABLE
+ self._data_generator = data_generator
+ self._columns = columns
+ self.affected_rows = affected_rows
+ self._data = data
+ self._fetched = False if data_generator else True
+ self._invalid = False
+ self._last_data_piece = None
+ self.rows_fetched = len(data) if data is not None else 0
+
+ @property
+ def data_generator(self) -> Generator[pandas.DataFrame, None, None]:
+ return self._data_generator
+
+ @data_generator.setter
+ def data_generator(self, value):
+ self._fetched = False if value else True
+ self._data_generator = value
+
+ def fetchall(self) -> pandas.DataFrame:
+ """Fetch all data and store it in the _data attribute.
+
+ Returns:
+ pandas.DataFrame: Data frame.
+ """
+ self._raise_if_invalid()
+ if self._data_generator is None or self._fetched:
+ return self._data
+
+ pieces = list(self._iterate_with_memory_check())
+ if self._data is None:
+ if len(pieces) == 1:
+ self._data = pieces[0]
+ elif len(pieces) == 0:
+ self._data = pandas.DataFrame([], columns=[column.name for column in self._columns])
+ else:
+ self._data = _safe_pandas_concat(pieces)
+ elif len(pieces) > 0:
+ self._data = _safe_pandas_concat([self._data, *pieces])
+
+ self._fetched = True
+ self._data_generator = None
+
+ return self._data
+
+ def _raise_if_low_memory(self) -> None:
+ """Check if there is enough available memory to load the next data chunk.
+
+ Estimates the memory required for the next chunk based on the size of the last
+ fetched chunk. If `affected_rows` (fetched rows) is known, the estimate is capped at the
+ number of remaining rows (but no more than one chunk). Otherwise, assumes the next chunk will
+ be the same size as the previous one.
+
+ Does nothing when no data has been fetched yet.
+
+ Raises:
+ MemoryError: If estimated memory for the next chunk exceeds available memory.
+ """
+ if self._last_data_piece is None or len(self._last_data_piece) == 0:
+ return
+
+ data_piece_size_kb = self._last_data_piece.memory_usage(index=True, deep=True).sum() >> 10
+ if isinstance(self.affected_rows, int) and self.affected_rows > 0:
+ row_size_kb = data_piece_size_kb / len(self._last_data_piece)
+ rows_expected = min(self.affected_rows - self.rows_fetched, len(self._last_data_piece))
+ if rows_expected > 0:
+ available_memory_kb = psutil.virtual_memory().available >> 10
+ if available_memory_kb < (row_size_kb * rows_expected * 1.1):
+ raise MemoryError(
+ f"Not enough memory to load remaining data. "
+ f"Available: {available_memory_kb}KB, estimated need: {int(row_size_kb * rows_expected * 1.1)}KB"
+ )
+ else:
+ # assume that next piece is the same size
+ available_memory_kb = psutil.virtual_memory().available >> 10
+ if available_memory_kb < (data_piece_size_kb * 1.1):
+ raise MemoryError(
+ f"Not enough memory to load remaining data. "
+ f"Available: {available_memory_kb}KB, estimated need: {int(data_piece_size_kb * 1.1)}KB"
+ )
+
+ def _iterate_with_memory_check(self) -> Generator[pandas.DataFrame, None, None]:
+ """Iterate over `_data_generator` with memory safety checks.
+
+ Yields:
+ pandas.DataFrame: The next chunk from the underlying data generator.
+
+ Raises:
+ MemoryError: Propagated from `_raise_if_low_memory` if available
+ memory is insufficient for the next chunk.
+ """
+ if self._data_generator is None:
+ return
+
+ self._raise_if_low_memory()
+
+ for piece in self._data_generator:
+ self._last_data_piece = piece
+ self.rows_fetched += len(piece)
+ yield piece
+ self._raise_if_low_memory()
+
+ def fetchmany(self) -> pandas.DataFrame | None:
+ """Fetch one piece of data and store it in the _data attribute.
+
+ Returns:
+ pandas.DataFrame: Data frame, piece of data.
+ """
+ self._raise_if_invalid()
+ try:
+ piece = next(self._iterate_with_memory_check())
+ if self._data is None:
+ self._data = piece
+ else:
+ self._data = _safe_pandas_concat([self._data, piece])
+ except StopIteration:
+ self._fetched = True
+ self._data_generator = None
+ return None
+ return piece
+
+ def iterate_no_save(self) -> Generator[pandas.DataFrame, None, None]:
+ """Iterate over the data and yield each piece of data. Do not save the data to the _data attribute.
+ NOTE: do it only once, before return result to the user
+
+ Returns:
+ Generator[pandas.DataFrame, None, None]: Generator of data frames.
+ """
+ self._raise_if_invalid()
+ if self._data is not None:
+ yield self._data
+ if self._data_generator:
+ self._invalid = True
+ for piece in self._iterate_with_memory_check():
+ yield piece
+
+ def _raise_if_invalid(self):
+ if self._invalid:
+ raise ValueError("Data has already been fetched and cannot be iterated over.")
+
+ @property
+ def data_frame(self) -> pandas.DataFrame:
+ """Get the data frame. Represents the entire dataset.
+
+ Returns:
+ pandas.DataFrame: Data frame.
+ """
+ self.fetchall()
+ return self._data
+
+ @data_frame.setter
+ def data_frame(self, value):
+ """for back compatibility"""
+ self._data = value
+
+ @property
+ def columns(self) -> list[Column]:
+ """Get the columns.
+
+ Returns:
+ list[Column]: List of columns.
+ """
+ self._resolve_columns()
+ return self._columns
+
+ def _resolve_columns(self):
+ if self._columns is not None:
+ return
+ self.fetchall()
+ self._columns = [Column(name=c) for c in self._data.columns]
+
+ def set_columns_attrs(self, table_name: str | None, table_alias: str | None, database: str | None):
+ """Set the attributes of the columns.
+
+ Args:
+ table_name (str | None): Table name.
+ table_alias (str | None): Table alias.
+ database (str | None): Database name.
+ """
+ self._resolve_columns()
+ for column in self._columns:
+ if table_name:
+ column.table_name = table_name
+ if table_alias:
+ column.table_alias = table_alias
+ if database:
+ column.database = database
+
+ def to_columns_table_response(self, map_type_fn: Callable) -> None:
+ """Transform the response to a `columns table` response.
+ NOTE: original dataframe will be mutated
+
+ Args:
+ map_type_fn (Callable): Function to map the data type to the MySQL data type.
+ """
+ if self.type == RESPONSE_TYPE.COLUMNS_TABLE:
+ return
+ if self.type != RESPONSE_TYPE.TABLE:
+ raise ValueError(
+ f"Cannot convert handler response with type '{self.type}' to '{RESPONSE_TYPE.COLUMNS_TABLE}'"
+ )
+
+ self.fetchall()
+ self._resolve_columns()
+ self.type = RESPONSE_TYPE.COLUMNS_TABLE
+
+ if self._data is None:
+ return
+ self._data.columns = [name.upper() for name in self._data.columns]
+
+ for required_column in (INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME, INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE):
+ if required_column not in self._data.columns:
+ raise ValueError(
+ f"Missed required for INFORMATION_SCHEMA.COLUMNS column {required_column}. "
+ f"Columns set: {self._data.columns}"
+ )
+ for column_name in INF_SCHEMA_COLUMNS_NAMES_SET:
+ if column_name not in self._data.columns:
+ self._data[column_name] = None
+
+ self._data[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self._data[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE].apply(
+ map_type_fn
+ )
+
+ self._data = self._data.astype(
+ {
+ INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: "string",
+ INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE: "string",
+ INF_SCHEMA_COLUMNS_NAMES.ORDINAL_POSITION: "Int32",
+ INF_SCHEMA_COLUMNS_NAMES.COLUMN_DEFAULT: "string",
+ INF_SCHEMA_COLUMNS_NAMES.IS_NULLABLE: "string",
+ INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH: "Int32",
+ INF_SCHEMA_COLUMNS_NAMES.CHARACTER_OCTET_LENGTH: "Int32",
+ INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION: "Int32",
+ INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE: "Int32",
+ INF_SCHEMA_COLUMNS_NAMES.DATETIME_PRECISION: "Int32",
+ INF_SCHEMA_COLUMNS_NAMES.CHARACTER_SET_NAME: "string",
+ INF_SCHEMA_COLUMNS_NAMES.COLLATION_NAME: "string",
+ }
+ )
+ self._data.replace([numpy.nan, pandas.NA], None, inplace=True)
+
+
+def normalize_response(response) -> TableResponse | OkResponse | ErrorResponse:
+ """Convert legacy HandlerResponse to new response types.
+
+ If response is already a new type (TableResponse, OkResponse, ErrorResponse),
+ return it as-is. If response is a legacy HandlerResponse, convert it based
+ on its resp_type.
+
+ Args:
+ response: Either a new response type or legacy HandlerResponse
+
+ Returns:
+ TableResponse | OkResponse | ErrorResponse: Normalized response
+ """
+ # Already new format - return as-is
+ if isinstance(response, (TableResponse, OkResponse, ErrorResponse)):
+ return response
+
+ # Legacy HandlerResponse - convert based on type
+ if isinstance(response, HandlerResponse):
+ if response.resp_type == RESPONSE_TYPE.ERROR:
+ err = ErrorResponse(
+ error_code=response.error_code,
+ error_message=response.error_message,
+ is_expected_error=response.is_expected_error,
+ )
+ err.exception = response.exception
+ return err
+
+ if response.resp_type == RESPONSE_TYPE.OK:
+ return OkResponse(affected_rows=response.affected_rows)
+
+ # TABLE or COLUMNS_TABLE
+ if response.data_frame is not None:
+ columns = list(response.data_frame.columns)
+ else:
+ columns = []
+
+ mysql_types = response.mysql_types
+ if mysql_types is None:
+ mysql_types = [None] * len(columns)
+
+ table_response = TableResponse(
+ data=response.data_frame,
+ columns=[
+ Column(name=column_name, type=mysql_type) for column_name, mysql_type in zip(columns, mysql_types)
+ ],
+ data_generator=iter([]), # empty generator for legacy responses
+ )
+ if response.resp_type == RESPONSE_TYPE.COLUMNS_TABLE:
+ table_response.type = RESPONSE_TYPE.COLUMNS_TABLE
+ return table_response
+
+ # Unknown type - return as-is (shouldn't happen normally)
+ return response
+
+
+# ! deprecated
class HandlerResponse:
+ """Legacy response class for compatibility with old code.
+ NOTE: do not use this class directly, use DataHandlerResponse instead
+ """
+
def __init__(
self,
resp_type: RESPONSE_TYPE,
@@ -86,16 +546,21 @@ def to_columns_table_response(self, map_type_fn: Callable) -> None:
raise ValueError(f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}")
self.data_frame.columns = [name.upper() for name in self.data_frame.columns]
+
+ for required_column in (INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME, INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE):
+ if required_column not in self.data_frame.columns:
+ raise ValueError(
+ f"Missed required for INFORMATION_SCHEMA.COLUMNS column {required_column}. "
+ f"Columns set: {self.data_frame.columns}"
+ )
+ for column_name in INF_SCHEMA_COLUMNS_NAMES_SET:
+ if column_name not in self.data_frame.columns:
+ self.data_frame[column_name] = None
+
self.data_frame[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self.data_frame[
INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE
].apply(map_type_fn)
- # region validate df
- current_columns_set = set(self.data_frame.columns)
- if INF_SCHEMA_COLUMNS_NAMES_SET != current_columns_set:
- raise ValueError(f"Columns set for INFORMATION_SCHEMA.COLUMNS is wrong: {list(current_columns_set)}")
- # endregion
-
self.data_frame = self.data_frame.astype(
{
INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: "string",
@@ -142,28 +607,3 @@ def __repr__(self):
self.error_message,
self.affected_rows,
)
-
-
-class HandlerStatusResponse:
- def __init__(
- self,
- success: bool = True,
- error_message: str = None,
- redirect_url: str = None,
- copy_storage: str = None,
- ) -> None:
- self.success = success
- self.error_message = error_message
- self.redirect_url = redirect_url
- self.copy_storage = copy_storage
-
- def to_json(self):
- data = {"success": self.success, "error": self.error_message}
- if self.redirect_url is not None:
- data["redirect_url"] = self.redirect_url
- return data
-
- def __repr__(self):
- return f"{self.__class__.__name__}: success={self.success},\
- error={self.error_message},\
- redirect_url={self.redirect_url}"
diff --git a/mindsdb/integrations/libs/vectordatabase_handler.py b/mindsdb/integrations/libs/vectordatabase_handler.py
index 876f2d898d6..0bd11c7834f 100644
--- a/mindsdb/integrations/libs/vectordatabase_handler.py
+++ b/mindsdb/integrations/libs/vectordatabase_handler.py
@@ -19,11 +19,10 @@
)
from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb.integrations.libs.response import RESPONSE_TYPE, HandlerResponse
-from mindsdb.utilities import log
+from mindsdb.integrations.libs.response import DataHandlerResponse, OkResponse, TableResponse
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
-
from mindsdb.integrations.utilities.query_traversal import query_traversal
+from mindsdb.utilities import log
from .base import BaseHandler
LOG = log.getLogger(__name__)
@@ -463,7 +462,7 @@ def dispatch_select(
handler_engine = self.__class__.name
raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
- def _dispatch(self, query: ASTNode) -> HandlerResponse:
+ def _dispatch(self, query: ASTNode) -> DataHandlerResponse:
"""
Parse and Dispatch query to the appropriate method.
"""
@@ -478,14 +477,14 @@ def _dispatch(self, query: ASTNode) -> HandlerResponse:
if type(query) in dispatch_router:
resp = dispatch_router[type(query)](query)
if resp is not None:
- return HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=resp)
+ return TableResponse(data=resp)
else:
- return HandlerResponse(resp_type=RESPONSE_TYPE.OK)
+ return OkResponse()
else:
raise NotImplementedError(f"Query type {type(query)} not implemented.")
- def query(self, query: ASTNode) -> HandlerResponse:
+ def query(self, query: ASTNode) -> DataHandlerResponse:
"""
Receive query as AST (abstract syntax tree) and act upon it somehow.
@@ -494,11 +493,11 @@ def query(self, query: ASTNode) -> HandlerResponse:
of query: SELECT, INSERT, DELETE, etc
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
return self._dispatch(query)
- def create_table(self, table_name: str, if_not_exists=True) -> HandlerResponse:
+ def create_table(self, table_name: str, if_not_exists=True) -> DataHandlerResponse:
"""Create table
Args:
@@ -506,11 +505,11 @@ def create_table(self, table_name: str, if_not_exists=True) -> HandlerResponse:
if_not_exists (bool): if True, do nothing if table exists
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
raise NotImplementedError()
- def drop_table(self, table_name: str, if_exists=True) -> HandlerResponse:
+ def drop_table(self, table_name: str, if_exists=True) -> DataHandlerResponse:
"""Drop table
Args:
@@ -518,11 +517,11 @@ def drop_table(self, table_name: str, if_exists=True) -> HandlerResponse:
if_exists (bool): if True, do nothing if table does not exist
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
raise NotImplementedError()
- def insert(self, table_name: str, data: pd.DataFrame) -> HandlerResponse:
+ def insert(self, table_name: str, data: pd.DataFrame) -> DataHandlerResponse:
"""Insert data into table
Args:
@@ -531,7 +530,7 @@ def insert(self, table_name: str, data: pd.DataFrame) -> HandlerResponse:
columns (List[str]): columns to insert
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
raise NotImplementedError()
@@ -544,11 +543,11 @@ def update(self, table_name: str, data: pd.DataFrame, key_columns: List[str] = N
key_columns (List[str]): key to to update
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
raise NotImplementedError()
- def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> HandlerResponse:
+ def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> DataHandlerResponse:
"""Delete data from table
Args:
@@ -556,7 +555,7 @@ def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> H
conditions (List[FilterCondition]): conditions to delete
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
raise NotImplementedError()
@@ -567,7 +566,7 @@ def select(
conditions: List[FilterCondition] = None,
offset: int = None,
limit: int = None,
- ) -> pd.DataFrame:
+ ) -> DataHandlerResponse:
"""Select data from table
Args:
@@ -576,18 +575,15 @@ def select(
conditions (List[FilterCondition]): conditions to select
Returns:
- HandlerResponse
+ DataHandlerResponse
"""
raise NotImplementedError()
- def get_columns(self, table_name: str) -> HandlerResponse:
+ def get_columns(self, table_name: str) -> TableResponse:
# return a fixed set of columns
data = pd.DataFrame(self.SCHEMA)
data.columns = ["COLUMN_NAME", "DATA_TYPE"]
- return HandlerResponse(
- resp_type=RESPONSE_TYPE.DATA,
- data_frame=data,
- )
+ return TableResponse(data=data)
def hybrid_search(
self,
diff --git a/mindsdb/integrations/utilities/community_handler_fetcher.py b/mindsdb/integrations/utilities/community_handler_fetcher.py
new file mode 100644
index 00000000000..2f66640a035
--- /dev/null
+++ b/mindsdb/integrations/utilities/community_handler_fetcher.py
@@ -0,0 +1,265 @@
+import base64
+import json
+import os
+import shutil
+import threading
+from pathlib import Path
+from typing import Optional
+
+import requests
+
+from mindsdb.utilities import log
+
+logger = log.getLogger(__name__)
+
+# GitHub API configuration
+# It can be replaced later with making the repo public.
+GITHUB_API_BASE = "https://api.github.com"
+DEFAULT_REPO = "mindsdb/mindsdb-community-handlers"
+DEFAULT_BRANCH = "main"
+DEFAULT_PATH_PREFIX = "community_handlers"
+_fetch_locks: dict = {}
+_fetch_locks_lock = threading.Lock()
+
+
+def _get_fetch_lock(handler_dir_name: str) -> threading.Lock:
+ """
+ Get and create if needed a threading.
+ Lock for the given handler directory.
+ This ensures that concurrent fetches for the same handler_dir_name are
+ serializedlly, preventing race conditions on disk.
+ """
+ with _fetch_locks_lock:
+ if handler_dir_name not in _fetch_locks:
+ _fetch_locks[handler_dir_name] = threading.Lock()
+ return _fetch_locks[handler_dir_name]
+
+
+def _github_headers() -> dict:
+ """
+ Return headers for GitHub API requests, including optional auth if GITHUB_TOKEN is set in the environment.
+ TODO: Remove this after repository is set to public.
+ """
+ headers = {"Accept": "application/vnd.github.v3+json"}
+ token = os.environ.get("GITHUB_TOKEN")
+ if token:
+ headers["Authorization"] = f"token {token}"
+ return headers
+
+
+# It can be removed later with making the repo public. TBD
+def _get_repo_config() -> tuple:
+ """Returns (repo, branch, path_prefix)."""
+ repo = os.environ.get("COMMUNITY_HANDLERS_REPO", DEFAULT_REPO)
+ branch = os.environ.get("COMMUNITY_HANDLERS_BRANCH", DEFAULT_BRANCH)
+ path_prefix = os.environ.get("COMMUNITY_HANDLERS_PATH", DEFAULT_PATH_PREFIX)
+ return repo, branch, path_prefix
+
+
+def _resolve_tree_sha(repo: str, branch: str, dir_path: str, headers: dict) -> Optional[str]:
+ """Return the Git tree SHA for dir_path by inspecting its parent directory listing.
+
+ Calls the Contents API on the parent of dir_path, then finds the matching
+ directory entry and returns its SHA. Returns None if the path does not exist
+ (404) or if the directory name is not found in the parent listing.
+
+ Raises:
+ RuntimeError: On network errors or unexpected GitHub API responses.
+ """
+ parent_path, _, dir_name = dir_path.rstrip("/").rpartition("/")
+ api_url = f"{GITHUB_API_BASE}/repos/{repo}/contents/{parent_path}"
+ params = {"ref": branch}
+ try:
+ resp = requests.get(api_url, params=params, headers=headers, timeout=30)
+ except requests.RequestException as e:
+ raise RuntimeError(f"Network error resolving tree SHA for '{dir_path}': {e}") from e
+ if resp.status_code == 404:
+ return None
+ if resp.status_code != 200:
+ raise RuntimeError(
+ f"GitHub API error resolving tree SHA for '{dir_path}': HTTP {resp.status_code} β {resp.text[:300]}"
+ )
+ try:
+ entries = resp.json()
+ except json.JSONDecodeError as e:
+ raise RuntimeError(f"Invalid JSON resolving tree SHA for '{dir_path}': {e}") from e
+ for entry in entries:
+ if entry.get("name") == dir_name and entry.get("type") == "dir":
+ return entry.get("sha")
+ return None
+
+
+def _fetch_tree_recursive(
+ repo: str,
+ branch: str,
+ tree_sha: str,
+ remote_prefix: str,
+ dest_dir: Path,
+ headers: dict,
+ max_depth: int = 4,
+) -> int:
+ """Fetch all files in a Git tree recursively, preserving directory structure.
+
+ Uses the Git Trees API with ?recursive=1 to obtain the full file listing in
+ a single API call, then downloads each blob from raw.githubusercontent.com.
+
+ Args:
+ repo: GitHub repository in "owner/repo" format.
+ branch: Branch or ref name used to build raw download URLs.
+ tree_sha: SHA of the Git tree to fetch.
+ remote_prefix: Path within the repo to the handler directory
+ Used to construct raw download URLs.
+ dest_dir: Local directory where files will be written.
+ headers: HTTP headers (auth, Accept) for GitHub API requests.
+ max_depth: Maximum allowed directory nesting depth. Entries whose
+ relative path contains >= max_depth slashes are skipped.
+
+ Returns:
+ Number of files downloaded.
+
+ Raises:
+ RuntimeError: On network errors or unexpected API responses.
+ """
+ api_url = f"{GITHUB_API_BASE}/repos/{repo}/git/trees/{tree_sha}"
+ params = {"recursive": "1"}
+ try:
+ resp = requests.get(api_url, params=params, headers=headers, timeout=30)
+ resp.raise_for_status()
+ except requests.RequestException as e:
+ raise RuntimeError(f"Network error fetching tree '{tree_sha}': {e}") from e
+ try:
+ tree_data = resp.json()
+ except json.JSONDecodeError as e:
+ raise RuntimeError(f"Invalid JSON from Git Trees API for tree '{tree_sha}': {e}") from e
+
+ if tree_data.get("truncated"):
+ logger.warning("Tree for handler '%s' was truncated; some files may be missing", remote_prefix)
+
+ file_count = 0
+ for entry in tree_data.get("tree", []):
+ if entry.get("type") != "blob":
+ continue
+ path = entry["path"]
+ if path.count("/") >= max_depth:
+ logger.debug("Skipping deeply nested path '%s' (max_depth=%d)", path, max_depth)
+ continue
+ local_path = dest_dir / path
+ local_path.parent.mkdir(parents=True, exist_ok=True)
+ raw_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{remote_prefix}/{path}"
+ try:
+ file_resp = requests.get(raw_url, headers=headers, timeout=30)
+ file_resp.raise_for_status()
+ except requests.RequestException as e:
+ raise RuntimeError(f"Failed to download '{path}' for handler '{remote_prefix}': {e}") from e
+ local_path.write_bytes(file_resp.content)
+ logger.debug("Downloaded %s (%d bytes)", path, entry.get("size", 0))
+ file_count += 1
+ return file_count
+
+
+def fetch_handler(handler_dir_name: str, storage_dir: Path) -> Optional[Path]:
+ """
+ Fetch a single community handler directory from GitHub into storage_dir.
+
+ Downloads the full directory tree for the requested handler using the
+ GitHub Git Trees API, preserving subdirectory structure.
+
+ Args:
+ handler_dir_name: The directory name of the handler (e.g. "github_handler")
+ storage_dir: Root directory where community handlers are stored
+
+ Returns:
+ Path to the fetched handler directory, or None if the handler does not
+ exist in the remote repository.
+
+ Raises:
+ RuntimeError: On network errors or unexpected GitHub API responses.
+ """
+ lock = _get_fetch_lock(handler_dir_name)
+ with lock:
+ dest_dir = storage_dir / handler_dir_name
+
+ if dest_dir.is_dir() and (dest_dir / "__init__.py").exists():
+ logger.debug("Community handler '%s' already on disk at %s", handler_dir_name, dest_dir)
+ return dest_dir
+
+ repo, branch, path_prefix = _get_repo_config()
+ headers = _github_headers()
+ remote_prefix = f"{path_prefix}/{handler_dir_name}"
+
+ logger.debug("Fetching community handler '%s' from %s@%s", handler_dir_name, repo, branch)
+
+ tree_sha = _resolve_tree_sha(repo, branch, remote_prefix, headers)
+ if tree_sha is None:
+ logger.error("Community handler '%s' not found in repo '%s'", handler_dir_name, repo)
+ return None
+
+ # Use a temporary directory for downloading files before moving to the final location.
+ # This prevents leaving a partially downloaded handler on disk if something goes wrong.
+ # As a fail-safe measure, we remove any existing temp directory before starting, and ensure cleanup on exceptions.
+ tmp_dir = storage_dir / f".tmp_{handler_dir_name}"
+ if tmp_dir.exists():
+ shutil.rmtree(tmp_dir)
+ tmp_dir.mkdir(parents=True, exist_ok=True)
+
+ try:
+ file_count = _fetch_tree_recursive(repo, branch, tree_sha, remote_prefix, tmp_dir, headers)
+ logger.debug("Fetched %d files for handler '%s'", file_count, handler_dir_name)
+
+ # Atomic rename.
+ # If dest_dir already exists, remove it first.
+ # This ensures that we don't end up with a mix of old and new files if the handler is updated.
+ if dest_dir.exists():
+ shutil.rmtree(dest_dir)
+ tmp_dir.rename(dest_dir)
+
+ except Exception:
+ if tmp_dir.exists():
+ shutil.rmtree(tmp_dir)
+ raise
+
+ logger.debug("Community handler '%s' fetched successfully to %s", handler_dir_name, dest_dir)
+ return dest_dir
+
+
+def community_handlers_enabled() -> bool:
+ """Returns True if community handlers are enabled via env var.
+
+ Set MINDSDB_COMMUNITY_HANDLERS=true to opt in.
+ Community handlers are disabled by default.
+ """
+ val = os.environ.get("MINDSDB_COMMUNITY_HANDLERS", "false").lower()
+ return val in ("1", "true", "yes", "enabled")
+
+
+def get_community_handlers_storage_dir(storage_root: Path) -> Path:
+ """Returns (and creates if needed) the community handlers storage directory."""
+ community_dir = storage_root / "community_handlers"
+ # Creating the directory, maybe can be done on init?
+ community_dir.mkdir(parents=True, exist_ok=True)
+ return community_dir
+
+
+def list_available_handlers() -> list:
+ """
+ Return handler metadata from the community index.json.
+
+ Each dict has keys: name, title, folder, type, support_level,
+ icon_path, description.
+ """
+ repo, branch, _ = _get_repo_config()
+ api_url = f"{GITHUB_API_BASE}/repos/{repo}/contents/index.json"
+ params = {"ref": branch}
+
+ try:
+ logger.debug("Fetching community handlers index from GitHub: %s", api_url)
+ resp = requests.get(api_url, params=params, headers=_github_headers(), timeout=30)
+ if resp.status_code == 200:
+ entry = resp.json()
+ raw = base64.b64decode(entry["content"]).decode("utf-8")
+ data = json.loads(raw)
+ return data.get("handlers", [])
+ logger.warning("Could not fetch community index: HTTP %s", resp.status_code)
+ except Exception as e:
+ logger.warning("Could not fetch community handlers index: %s", e)
+ return []
diff --git a/mindsdb/integrations/utilities/files/file_reader.py b/mindsdb/integrations/utilities/files/file_reader.py
index 4397569482c..7bebb3cbd8c 100644
--- a/mindsdb/integrations/utilities/files/file_reader.py
+++ b/mindsdb/integrations/utilities/files/file_reader.py
@@ -42,6 +42,7 @@ class _SINGLE_PAGE_FORMAT:
@dataclass(frozen=True, slots=True)
class _MULTI_PAGE_FORMAT:
XLSX: str = "xlsx"
+ XLS: str = "xls"
MULTI_PAGE_FORMAT = _MULTI_PAGE_FORMAT()
@@ -125,6 +126,10 @@ def __init__(
self.parameters = {}
+ def close(self):
+ if self.file_obj is not None:
+ self.file_obj.close()
+
def get_format(self) -> str:
if self.format is not None:
return self.format
@@ -162,9 +167,10 @@ def get_format_by_content(self):
if file_type is not None:
if file_type.mime in {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- "application/vnd.ms-excel",
}:
return MULTI_PAGE_FORMAT.XLSX
+ if file_type.mime == "application/vnd.ms-excel":
+ return MULTI_PAGE_FORMAT.XLS
if file_type.mime == "application/pdf":
return SINGLE_PAGE_FORMAT.PDF
@@ -624,3 +630,12 @@ def read_xlsx(
else:
df = pd.read_excel(xls, sheet_name=page_name)
yield page_name, df
+
+ @staticmethod
+ def read_xls(
+ file_obj: BytesIO,
+ page_name: str | None = None,
+ only_names: bool = False,
+ **kwargs,
+ ):
+ return FileReader.read_xlsx(file_obj, page_name=page_name, only_names=only_names, **kwargs)
diff --git a/mindsdb/integrations/utilities/install.py b/mindsdb/integrations/utilities/install.py
index 388edc9703d..9a56b2e4ae4 100644
--- a/mindsdb/integrations/utilities/install.py
+++ b/mindsdb/integrations/utilities/install.py
@@ -1,66 +1,77 @@
import os
import sys
import subprocess
+from enum import Enum
from typing import Text, List
-def install_dependencies(dependencies: List[Text]) -> dict:
+class InstallTool(Enum):
+ pip = (sys.executable, "-m", "pip")
+ uv = ("uv", "pip")
+
+
+def install_dependencies(dependencies: List[Text], tool: InstallTool = InstallTool.pip) -> dict:
"""
Installs the dependencies for a handler by calling the `pip install` command via subprocess.
Args:
dependencies (List[Text]): List of dependencies for the handler.
+ tool (InstallTool): tool the tool that will be used to install dependencies
Returns:
dict: A dictionary containing the success status and an error message if an error occurs.
"""
- outs = b''
- errs = b''
- result = {
- 'success': False,
- 'error_message': None
- }
+ outs = b""
+ errs = b""
+ result = {"success": False, "error_message": None}
code = None
try:
# Split the dependencies by parsing the contents of the requirements.txt file.
split_dependencies = parse_dependencies(dependencies)
except FileNotFoundError as file_not_found_error:
- result['error_message'] = f"Error parsing dependencies, file not found: {str(file_not_found_error)}"
+ result["error_message"] = f"Error parsing dependencies, file not found: {str(file_not_found_error)}"
return result
except Exception as unknown_error:
- result['error_message'] = f"Unknown error parsing dependencies: {str(unknown_error)}"
+ result["error_message"] = f"Unknown error parsing dependencies: {str(unknown_error)}"
return result
try:
- # Install the dependencies using the `pip install` command.
+ # Install the dependencies using the selected tool.
sp = subprocess.Popen(
- [sys.executable, '-m', 'pip', 'install', *split_dependencies],
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE
+ [*tool.value, "install", *split_dependencies], stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
code = sp.wait()
outs, errs = sp.communicate(timeout=1)
except subprocess.TimeoutExpired as timeout_error:
sp.kill()
- result['error_message'] = f"Timeout error while installing dependencies: {str(timeout_error)}"
+ result["error_message"] = f"Timeout error while installing dependencies: {str(timeout_error)}"
+ return result
+ except FileNotFoundError as e:
+ if e.filename == "uv":
+ result["error_message"] = "The 'pip' and 'uv' tools are not found. Please install them."
+ else:
+ result["error_message"] = f"FileNotFoundError error while installing dependencies: {str(e)}"
return result
except Exception as unknown_error:
- result['error_message'] = f"Unknown error while installing dependencies: {str(unknown_error)}"
+ result["error_message"] = f"Unknown error while installing dependencies: {str(unknown_error)}"
return result
# Return the result of the installation if successful, otherwise return an error message.
if code != 0:
- output = ''
+ output = ""
if isinstance(outs, bytes) and len(outs) > 0:
- output = output + 'Output: ' + outs.decode()
+ output = output + "Output: " + outs.decode()
if isinstance(errs, bytes) and len(errs) > 0:
if len(output) > 0:
- output = output + '\n'
- output = output + 'Errors: ' + errs.decode()
- result['error_message'] = output
+ output = output + "\n"
+ output = output + "Errors: " + errs.decode()
+ if "no module named pip" in output.lower() and tool is InstallTool.pip:
+ # try with uv
+ return install_dependencies(dependencies, InstallTool.uv)
+ result["error_message"] = output
else:
- result['success'] = True
+ result["success"] = True
return result
@@ -85,19 +96,19 @@ def parse_dependencies(dependencies: List[Text]) -> List[Text]:
split_dependencies = []
for dependency in dependencies:
# ignore standalone comments
- if dependency.startswith('#'):
+ if dependency.startswith("#"):
continue
# remove inline comments
- if '#' in dependency:
- dependency = dependency.split('#')[0].strip()
+ if "#" in dependency:
+ dependency = dependency.split("#")[0].strip()
# check if the dependency is a path to a requirements file
- if dependency.startswith('-r'):
+ if dependency.startswith("-r"):
# get the path to the requirements file
- req_path = dependency.split(' ')[1]
+ req_path = dependency.split(" ")[1]
# create the absolute path to the requirements file
- abs_req_path = os.path.abspath(os.path.join(script_path, req_path.replace('mindsdb/integrations', '..')))
+ abs_req_path = os.path.abspath(os.path.join(script_path, req_path.replace("mindsdb/integrations", "..")))
# check if the file exists
if os.path.exists(abs_req_path):
inner_dependencies, inner_split_dependencies = [], []
@@ -128,7 +139,7 @@ def read_dependencies(path: Text) -> List[Text]:
"""
dependencies = []
# read the dependencies from the file
- with open(str(path), 'rt') as f:
- dependencies = [x.strip(' \t\n') for x in f.readlines()]
+ with open(str(path), "rt") as f:
+ dependencies = [x.strip(" \t\n") for x in f.readlines()]
dependencies = [x for x in dependencies if len(x) > 0]
return dependencies
diff --git a/mindsdb/integrations/utilities/rag/config_loader.py b/mindsdb/integrations/utilities/rag/config_loader.py
deleted file mode 100644
index 51732358ca6..00000000000
--- a/mindsdb/integrations/utilities/rag/config_loader.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""Utility functions for RAG pipeline configuration"""
-
-from typing import Dict, Any, Optional
-
-from mindsdb.utilities.log import getLogger
-from mindsdb.integrations.utilities.rag.settings import (
- RetrieverType,
- MultiVectorRetrieverMode,
- SearchType,
- SearchKwargs,
- VectorStoreConfig,
- RerankerConfig,
- RAGPipelineModel,
- DEFAULT_COLLECTION_NAME,
-)
-
-logger = getLogger(__name__)
-
-
-def load_rag_config(
- base_config: Dict[str, Any], kb_params: Optional[Dict[str, Any]] = None, embedding_model: Any = None
-) -> RAGPipelineModel:
- """
- Load and validate RAG configuration parameters. This function handles the conversion of configuration
- parameters into their appropriate types and ensures all required settings are properly configured.
-
- Args:
- base_config: Base configuration dictionary containing RAG pipeline settings
- kb_params: Optional knowledge base parameters to merge with base config
- embedding_model: Optional embedding model instance to use in the RAG pipeline
-
- Returns:
- RAGPipelineModel: Validated RAG configuration model ready for pipeline creation
-
- Raises:
- ValueError: If configuration validation fails or required parameters are missing
- """
- # Create a shallow copy of the base config to avoid modifying the original
- # We avoid deepcopy because some objects (like embedding_model) may contain unpickleable objects
- rag_params = base_config.copy()
-
- # Merge with knowledge base params if provided
- if kb_params:
- rag_params.update(kb_params)
-
- # Set embedding model if provided
- if embedding_model is not None:
- rag_params["embedding_model"] = embedding_model
-
- # Handle enums and type conversions
- if "retriever_type" in rag_params:
- rag_params["retriever_type"] = RetrieverType(rag_params["retriever_type"])
- if "multi_retriever_mode" in rag_params:
- rag_params["multi_retriever_mode"] = MultiVectorRetrieverMode(rag_params["multi_retriever_mode"])
- if "search_type" in rag_params:
- rag_params["search_type"] = SearchType(rag_params["search_type"])
-
- # Handle search kwargs if present
- if "search_kwargs" in rag_params and isinstance(rag_params["search_kwargs"], dict):
- rag_params["search_kwargs"] = SearchKwargs(**rag_params["search_kwargs"])
-
- # Summarization config removed - no longer supported
-
- # Handle vector store config
- if "vector_store_config" in rag_params:
- if isinstance(rag_params["vector_store_config"], dict):
- rag_params["vector_store_config"] = VectorStoreConfig(**rag_params["vector_store_config"])
- else:
- rag_params["vector_store_config"] = {}
- logger.warning(
- f"No collection_name specified for the retrieval tool, "
- f"using default collection_name: '{DEFAULT_COLLECTION_NAME}'"
- f"\nWarning: If this collection does not exist, no data will be retrieved"
- )
-
- if "reranker_config" in rag_params:
- rag_params["reranker_config"] = RerankerConfig(**rag_params["reranker_config"])
-
- # Convert to RAGPipelineModel with validation
- try:
- return RAGPipelineModel(**rag_params)
- except Exception as e:
- logger.exception("Invalid RAG configuration:")
- raise ValueError(f"Configuration validation failed: {str(e)}") from e
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/MDBVectorStore.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/MDBVectorStore.py
deleted file mode 100644
index 8e5575af0bd..00000000000
--- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/MDBVectorStore.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant, Star
-from mindsdb.integrations.libs.vectordatabase_handler import TableField
-
-from typing import Any, List, Optional
-
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-
-
-class MDBVectorStore(VectorStore):
- def __init__(self, kb_table) -> None:
- self.kb_table = kb_table
-
- @property
- def embeddings(self) -> Optional[Any]:
- return None
-
- def similarity_search(
- self,
- query: str,
- k: int = 4,
- **kwargs: Any,
- ) -> List[SimpleDocument]:
- query = Select(
- targets=[Star()],
- where=BinaryOperation(op="=", args=[Identifier(TableField.CONTENT.value), Constant(query)]),
- limit=Constant(k),
- )
-
- df = self.kb_table.select_query(query)
-
- docs = []
- for _, row in df.iterrows():
- metadata = row[TableField.METADATA.value]
- if metadata is None:
- metadata = {}
- docs.append(SimpleDocument(page_content=row[TableField.CONTENT.value], metadata=metadata))
-
- return docs
-
- def add_texts(self, *args, **kwargs) -> List[str]:
- raise NotImplementedError
-
- @classmethod
- def from_texts(self, *args, **kwargs):
- raise NotImplementedError
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/__init__.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/base_vector_store.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/base_vector_store.py
deleted file mode 100644
index f8e12a70e8a..00000000000
--- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/base_vector_store.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""Base VectorStore interface to replace langchain VectorStore"""
-
-from typing import Any, List, Optional, Tuple
-from abc import ABC, abstractmethod
-
-
-class VectorStore(ABC):
- """Base class for vector stores to replace langchain VectorStore"""
-
- @property
- @abstractmethod
- def embeddings(self) -> Optional[Any]:
- """Return embeddings model if available"""
- pass
-
- @abstractmethod
- def similarity_search(
- self,
- query: str,
- k: int = 4,
- **kwargs: Any,
- ) -> List[Any]:
- """Return most similar documents to query"""
- pass
-
- def similarity_search_with_score(
- self,
- query: str,
- k: int = 4,
- **kwargs: Any,
- ) -> List[Tuple[Any, float]]:
- """Return most similar documents with scores"""
- # Default implementation using similarity_search
- docs = self.similarity_search(query, k=k, **kwargs)
- # Return with dummy scores if not overridden
- return [(doc, 0.0) for doc in docs]
-
- def as_retriever(self, **kwargs: Any) -> Any:
- """Return a retriever interface"""
-
- # Create a simple retriever wrapper
- class SimpleRetriever:
- def __init__(self, vector_store):
- self.vector_store = vector_store
-
- def get_relevant_documents(self, query: str) -> List[Any]:
- return self.vector_store.similarity_search(query, **kwargs)
-
- def invoke(self, query: str) -> List[Any]:
- return self.get_relevant_documents(query)
-
- return SimpleRetriever(self)
-
- def add_texts(self, *args: Any, **kwargs: Any) -> List[str]:
- """Add texts to the vector store"""
- raise NotImplementedError("add_texts not implemented")
-
- def add_documents(self, documents: List[Any], **kwargs: Any) -> List[str]:
- """
- Add documents to the vector store.
- Extracts page_content and metadata from documents and calls add_texts.
-
- Args:
- documents: List of document-like objects with page_content and metadata attributes
- **kwargs: Additional arguments to pass to add_texts
-
- Returns:
- List of document IDs (if supported by implementation)
- """
- texts = []
- metadatas = []
- for doc in documents:
- # Use duck typing to access page_content and metadata
- page_content = getattr(doc, "page_content", str(doc))
- metadata = getattr(doc, "metadata", {})
- texts.append(page_content)
- metadatas.append(metadata)
-
- # Call add_texts with texts and metadatas
- return self.add_texts(texts, metadatas=metadatas, **kwargs)
-
- @classmethod
- def from_texts(cls, *args: Any, **kwargs: Any):
- """Create vector store from texts"""
- raise NotImplementedError("from_texts not implemented")
-
- @classmethod
- def from_documents(cls, documents: List[Any], embedding: Any, **kwargs: Any):
- """
- Create vector store from documents.
- Extracts texts and metadata from documents and calls from_texts.
-
- Args:
- documents: List of document-like objects with page_content and metadata attributes
- embedding: Embedding model/function
- **kwargs: Additional arguments to pass to from_texts
-
- Returns:
- VectorStore instance
- """
- texts = []
- metadatas = []
- for doc in documents:
- # Use duck typing to access page_content and metadata
- page_content = getattr(doc, "page_content", str(doc))
- metadata = getattr(doc, "metadata", {})
- texts.append(page_content)
- metadatas.append(metadata)
-
- # Call from_texts with texts, metadatas, and embedding
- return cls.from_texts(texts, embedding=embedding, metadatas=metadatas, **kwargs)
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py
deleted file mode 100644
index bba23a53c23..00000000000
--- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py
+++ /dev/null
@@ -1,226 +0,0 @@
-from typing import Any, List, Union, Optional, Dict, Tuple
-
-from pgvector.sqlalchemy import SPARSEVEC, Vector
-import sqlalchemy as sa
-from sqlalchemy.dialects.postgresql import JSON
-from sqlalchemy.orm import Session
-from sqlalchemy import create_engine
-from sqlalchemy.ext.declarative import declarative_base
-
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-# SQLAlchemy declarative base
-Base = declarative_base()
-
-_generated_sa_tables = {}
-
-
-class PGVectorMDB(VectorStore):
- """
- Custom PGVector implementation for mindsdb vector store table structure
- Replaces langchain_community.vectorstores.PGVector
- """
-
- def __init__(
- self,
- connection_string: str,
- collection_name: str,
- embedding_function: Any = None,
- is_sparse: bool = False,
- vector_size: Optional[int] = None,
- **kwargs,
- ):
- """
- Initialize PGVectorMDB
-
- Args:
- connection_string: PostgreSQL connection string
- collection_name: Name of the table/collection
- embedding_function: Embedding function/model
- is_sparse: Whether to use sparse vectors
- vector_size: Size of sparse vectors (required if is_sparse=True)
- """
- self.is_sparse = is_sparse
- if is_sparse and vector_size is None:
- raise ValueError("vector_size is required when is_sparse=True")
- self.vector_size = vector_size
- self.collection_name = collection_name
- self.embedding_function = embedding_function
-
- # Create SQLAlchemy engine
- self._bind = create_engine(connection_string, pool_pre_ping=True)
-
- # Initialize table structure
- self.__post_init__()
-
- def __post_init__(
- self,
- ) -> None:
- """Initialize SQLAlchemy table structure"""
- collection_name = self.collection_name
-
- if collection_name not in _generated_sa_tables:
-
- class EmbeddingStore(Base):
- """Embedding store."""
-
- __tablename__ = collection_name
-
- id = sa.Column(sa.Integer, primary_key=True)
- embedding = sa.Column(
- "embeddings",
- SPARSEVEC()
- if self.is_sparse
- else Vector()
- if self.vector_size is None
- else SPARSEVEC(self.vector_size)
- if self.is_sparse
- else Vector(self.vector_size),
- )
- document = sa.Column("content", sa.String, nullable=True)
- cmetadata = sa.Column("metadata", JSON, nullable=True)
-
- _generated_sa_tables[collection_name] = EmbeddingStore
-
- self.EmbeddingStore = _generated_sa_tables[collection_name]
-
- @property
- def embeddings(self) -> Optional[Any]:
- """Return embedding function if available"""
- return self.embedding_function
-
- def similarity_search(
- self,
- query: str,
- k: int = 4,
- **kwargs: Any,
- ) -> List[SimpleDocument]:
- """Return most similar documents to query"""
- # Get embedding for query
- if self.embedding_function is None:
- raise ValueError("embedding_function is required for similarity_search")
-
- # Embed the query
- query_embedding = self.embedding_function.embed_query(query)
-
- # Query collection
- results = self.__query_collection(query_embedding, k=k, filter=kwargs.get("filter"))
-
- # Convert to SimpleDocument objects
- docs = []
- for result in results:
- embedding_store = result.EmbeddingStore
- page_content = embedding_store.document or ""
- metadata = embedding_store.cmetadata or {}
- docs.append(SimpleDocument(page_content=page_content, metadata=metadata))
-
- return docs
-
- def similarity_search_with_score(
- self,
- query: str,
- k: int = 4,
- **kwargs: Any,
- ) -> List[Tuple[SimpleDocument, float]]:
- """Return most similar documents with scores"""
- # Get embedding for query
- if self.embedding_function is None:
- raise ValueError("embedding_function is required for similarity_search_with_score")
-
- # Embed the query
- query_embedding = self.embedding_function.embed_query(query)
-
- # Query collection
- results = self.__query_collection(query_embedding, k=k, filter=kwargs.get("filter"))
-
- # Convert to SimpleDocument objects with scores
- docs_with_scores = []
- for result in results:
- embedding_store = result.EmbeddingStore
- page_content = embedding_store.document or ""
- metadata = embedding_store.cmetadata or {}
- doc = SimpleDocument(page_content=page_content, metadata=metadata)
- # Distance is already calculated in __query_collection
- score = float(result.distance) if hasattr(result, "distance") else 0.0
- docs_with_scores.append((doc, score))
-
- return docs_with_scores
-
- def __query_collection(
- self,
- embedding: Union[List[float], Dict[int, float], str],
- k: int = 4,
- filter: Optional[Dict[str, str]] = None,
- ) -> List[Any]:
- """Query the collection."""
- with Session(self._bind) as session:
- if self.is_sparse:
- # Sparse vectors: expect string in format "{key:value,...}/size" or dictionary
- if isinstance(embedding, dict):
- from pgvector.utils import SparseVector
-
- embedding = SparseVector(embedding, self.vector_size)
- embedding_str = embedding.to_text()
- elif isinstance(embedding, str):
- # Use string as is - it should already be in the correct format
- embedding_str = embedding
- # Use inner product for sparse vectors
- distance_op = "<#>"
- # For inner product, larger values are better matches
- order_direction = "ASC"
- else:
- # Dense vectors: expect string in JSON array format or list of floats
- if isinstance(embedding, list):
- embedding_str = f"[{','.join(str(x) for x in embedding)}]"
- elif isinstance(embedding, str):
- embedding_str = embedding
- # Use cosine similarity for dense vectors
- distance_op = "<=>"
- # For cosine similarity, smaller values are better matches
- order_direction = "ASC"
-
- # Use SQL directly for vector comparison
- query = sa.text(
- f"""
- SELECT t.*, t.embeddings {distance_op} '{embedding_str}' as distance
- FROM {self.collection_name} t
- ORDER BY distance {order_direction}
- LIMIT {k}
- """
- )
- results = session.execute(query).all()
-
- # Convert results to the expected format
- formatted_results = []
- for rec in results:
- metadata = rec.metadata if bool(rec.metadata) else {0: 0}
- embedding_store = self.EmbeddingStore()
- embedding_store.document = rec.content
- embedding_store.cmetadata = metadata
- result = type("Result", (), {"EmbeddingStore": embedding_store, "distance": rec.distance})
- formatted_results.append(result)
-
- return formatted_results
-
- # Aliases for compatibility
- def _PGVector__query_collection(self, *args, **kwargs):
- return self.__query_collection(*args, **kwargs)
-
- def _query_collection(self, *args, **kwargs):
- return self.__query_collection(*args, **kwargs)
-
- def create_collection(self):
- raise RuntimeError("Forbidden")
-
- def delete_collection(self):
- raise RuntimeError("Forbidden")
-
- def delete(self, *args, **kwargs):
- raise RuntimeError("Forbidden")
-
- def add_embeddings(self, *args, **kwargs):
- raise RuntimeError("Forbidden")
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py
deleted file mode 100644
index a094f5e830e..00000000000
--- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from typing import Any
-
-from pydantic import BaseModel
-
-from mindsdb.integrations.utilities.rag.settings import VectorStoreType, VectorStoreConfig
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.MDBVectorStore import MDBVectorStore
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.pgvector import PGVectorMDB
-from mindsdb.utilities import log
-
-
-logger = log.getLogger(__name__)
-
-
-class VectorStoreLoader(BaseModel):
- embedding_model: Any # Embedding model interface
- vector_store: VectorStore = None
- config: VectorStoreConfig = None
-
- class Config:
- arbitrary_types_allowed = True
- extra = "forbid"
- validate_assignment = True
-
- def load(self) -> VectorStore:
- """
- Loads the vector store based on the provided config and embeddings model
- :return:
- """
- if (
- self.config.is_sparse is not None
- and self.config.vector_size is not None
- and self.config.kb_table is not None
- ):
- # Only use PGVector store for sparse vectors.
- db_handler = self.config.kb_table.get_vector_db()
- db_args = db_handler.connection_args
- # Assume we are always using PGVector & psycopg2.
- connection_str = f"postgresql+psycopg2://{db_args.get('user')}:{db_args.get('password')}@{db_args.get('host')}:{db_args.get('port')}/{db_args.get('dbname', db_args.get('database'))}"
-
- return PGVectorMDB(
- connection_string=connection_str,
- collection_name=self.config.kb_table._kb.vector_database_table,
- embedding_function=self.embedding_model,
- is_sparse=self.config.is_sparse,
- vector_size=self.config.vector_size,
- )
- return MDBVectorStore(kb_table=self.config.kb_table)
-
-
-class VectorStoreFactory:
- @staticmethod
- def create(embedding_model: Any, config: VectorStoreConfig) -> VectorStore:
- if config.vector_store_type == VectorStoreType.CHROMA:
- return VectorStoreFactory._load_chromadb_store(embedding_model, config)
- elif config.vector_store_type == VectorStoreType.PGVECTOR:
- return VectorStoreFactory._load_pgvector_store(embedding_model, config)
- else:
- raise ValueError(f"Invalid vector store type, must be one either {VectorStoreType.__members__.keys()}")
-
- @staticmethod
- def _load_chromadb_store(embedding_model: Any, settings) -> VectorStore:
- # Chroma still uses langchain, import only when needed
- from langchain_community.vectorstores import Chroma
-
- return Chroma(
- persist_directory=settings.persist_directory,
- collection_name=settings.collection_name,
- embedding_function=embedding_model,
- )
-
- @staticmethod
- def _load_pgvector_store(embedding_model: Any, settings) -> VectorStore:
- from .pgvector import PGVectorMDB
-
- return PGVectorMDB(
- connection_string=settings.connection_string,
- collection_name=settings.collection_name,
- embedding_function=embedding_model,
- is_sparse=settings.is_sparse,
- vector_size=settings.vector_size,
- )
diff --git a/mindsdb/integrations/utilities/rag/pipelines/__init__.py b/mindsdb/integrations/utilities/rag/pipelines/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/utilities/rag/pipelines/rag.py b/mindsdb/integrations/utilities/rag/pipelines/rag.py
deleted file mode 100644
index baf8ef8e117..00000000000
--- a/mindsdb/integrations/utilities/rag/pipelines/rag.py
+++ /dev/null
@@ -1,404 +0,0 @@
-from typing import Optional, Any, List, Union
-import asyncio
-
-from mindsdb.interfaces.knowledge_base.embedding_model_utils import construct_embedding_model_from_args
-from mindsdb.integrations.libs.vectordatabase_handler import DistanceFunction
-from mindsdb.integrations.utilities.rag.retrievers.auto_retriever import AutoRetriever
-from mindsdb.integrations.utilities.rag.retrievers.multi_vector_retriever import MultiVectorRetriever
-from mindsdb.integrations.utilities.rag.retrievers.sql_retriever import SQLRetriever
-from mindsdb.integrations.utilities.rag.rerankers.reranker_compressor import LLMReranker
-from mindsdb.integrations.utilities.rag.settings import (
- RAGPipelineModel,
- DEFAULT_AUTO_META_PROMPT_TEMPLATE,
- SearchKwargs,
- SearchType,
- RerankerConfig,
- VectorStoreConfig,
-)
-from mindsdb.integrations.utilities.rag.settings import DEFAULT_RERANKER_FLAG
-
-from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
-from mindsdb.interfaces.knowledge_base.llm_wrapper import create_chat_model
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class SimpleRAGPipeline:
- """
- Custom RAG pipeline implementation to replace LangChain LCEL components
- """
-
- def __init__(
- self,
- retriever_runnable: Any,
- prompt_template: str,
- llm: Any,
- reranker: Optional[Any] = None,
- ):
- """
- Initialize SimpleRAGPipeline
-
- Args:
- retriever_runnable: Retriever that can be invoked with question
- prompt_template: Prompt template string with {question} and {context} placeholders
- llm: Language model with invoke/ainvoke methods
- reranker: Optional reranker for document reranking
- """
- self.retriever_runnable = retriever_runnable
- self.prompt_template = prompt_template
- self.llm = llm
- self.reranker = reranker
-
- def _format_docs(self, docs: Union[List[Any], str]) -> str:
- """Format documents into context string"""
- if isinstance(docs, str):
- # Handle case where retriever returns a string (e.g., SQLRetriever)
- return docs
- if not docs:
- return ""
-
- # Sort by original document so we can group source summaries together
- docs.sort(key=lambda d: d.metadata.get("original_row_id") if hasattr(d, "metadata") and d.metadata else 0)
- original_document_id = None
- summary_prepended_text = "Summary of the original document that the below context was taken from:\n"
- document_content = ""
-
- for d in docs:
- metadata = d.metadata if hasattr(d, "metadata") else {}
- if metadata.get("original_row_id") != original_document_id and metadata.get("summary"):
- # We have a summary of a new document to prepend
- original_document_id = metadata.get("original_row_id")
- summary = f"{summary_prepended_text}{metadata.get('summary')}\n"
- document_content += summary
-
- page_content = d.page_content if hasattr(d, "page_content") else str(d)
- document_content += f"{page_content}\n\n"
-
- return document_content
-
- def _format_prompt(self, question: str, context: str) -> str:
- """Format prompt template with question and context"""
- return self.prompt_template.format(question=question, context=context)
-
- def _extract_llm_response(self, response: Any) -> str:
- """Extract text content from LLM response"""
- # Handle different response types
- if isinstance(response, str):
- return response
- if hasattr(response, "content"):
- return response.content
- if hasattr(response, "text"):
- return response.text
- # Try to get from message if it's a message object
- if hasattr(response, "message") and hasattr(response.message, "content"):
- return response.message.content
- # Fallback to string conversion
- return str(response)
-
- async def _retrieve_documents(self, question: str) -> List[Any]:
- """Retrieve documents using retriever"""
- # Try async first
- if hasattr(self.retriever_runnable, "ainvoke"):
- return await self.retriever_runnable.ainvoke(question)
- elif hasattr(self.retriever_runnable, "invoke"):
- return self.retriever_runnable.invoke(question)
- elif hasattr(self.retriever_runnable, "get_relevant_documents"):
- # Sync method, run in executor for async compatibility
- loop = asyncio.get_event_loop()
- return await loop.run_in_executor(None, self.retriever_runnable.get_relevant_documents, question)
- else:
- raise ValueError("Retriever must have ainvoke, invoke, or get_relevant_documents method")
-
- async def ainvoke(self, question: Union[str, dict]) -> dict:
- """Async invocation of the RAG pipeline"""
- # Handle both string and dict input (for compatibility)
- if isinstance(question, dict):
- question = question.get("question", question.get("input", ""))
-
- # 1. Retrieve documents
- docs = await self._retrieve_documents(question)
-
- # 2. Apply reranker if enabled
- if self.reranker and docs:
- try:
- # Reranker should work with SimpleDocument via duck typing (page_content, metadata attributes)
- docs = await self.reranker.acompress_documents(docs, question)
- # Ensure all docs are SimpleDocument instances
- simple_docs = []
- for doc in docs:
- if isinstance(doc, SimpleDocument):
- simple_docs.append(doc)
- else:
- simple_docs.append(
- SimpleDocument(
- page_content=doc.page_content if hasattr(doc, "page_content") else str(doc),
- metadata=doc.metadata if hasattr(doc, "metadata") else {},
- )
- )
- docs = simple_docs
- except Exception as e:
- logger.warning(f"Error during reranking, continuing without reranking: {e}")
-
- # 3. Format documents into context
- context = self._format_docs(docs)
-
- # 4. Format prompt
- formatted_prompt = self._format_prompt(question, context)
-
- # 5. Generate answer using LLM
- # Use dict format for messages instead of HumanMessage
- messages = [{"role": "user", "content": formatted_prompt}]
-
- # Try different LLM interfaces
- if hasattr(self.llm, "abatch"):
- # CustomLLMWrapper interface
- responses = await self.llm.abatch([formatted_prompt])
- llm_response = responses[0] if responses else None
- elif hasattr(self.llm, "ainvoke"):
- llm_response = await self.llm.ainvoke(messages)
- elif hasattr(self.llm, "batch"):
- # CustomLLMWrapper sync interface
- responses = self.llm.batch([formatted_prompt])
- llm_response = responses[0] if responses else None
- elif hasattr(self.llm, "invoke"):
- loop = asyncio.get_event_loop()
- llm_response = await loop.run_in_executor(None, self.llm.invoke, messages)
- else:
- raise ValueError("LLM must have ainvoke, invoke, abatch, or batch method")
-
- # 6. Extract text from LLM response
- answer = self._extract_llm_response(llm_response)
-
- # 7. Return dict with context, question, answer
- return {"context": docs, "question": question, "answer": answer}
-
- def invoke(self, question: Union[str, dict]) -> dict:
- """Sync invocation of the RAG pipeline"""
- return asyncio.run(self.ainvoke(question))
-
-
-class LangChainRAGPipeline:
- """
- Builds a RAG pipeline using langchain LCEL components
-
- Args:
- retriever_runnable: Base retriever component
- prompt_template: Template for generating responses
- llm: Language model for generating responses
- reranker (bool): Whether to use reranking (default: False)
- reranker_config (RerankerConfig): Configuration for the reranker, including:
- - model: Model to use for reranking
- - filtering_threshold: Minimum score to keep a document
- - num_docs_to_keep: Maximum number of documents to keep
- - max_concurrent_requests: Maximum concurrent API requests
- - max_retries: Number of retry attempts for failed requests
- - retry_delay: Delay between retries
- - early_stop (bool): Whether to enable early stopping
- - early_stop_threshold: Confidence threshold for early stopping
- vector_store_config (VectorStoreConfig): Vector store configuration
- """
-
- def __init__(
- self,
- retriever_runnable,
- prompt_template,
- llm,
- reranker: bool = DEFAULT_RERANKER_FLAG,
- reranker_config: Optional[RerankerConfig] = None,
- vector_store_config: Optional[VectorStoreConfig] = None,
- ):
- self.retriever_runnable = retriever_runnable
- self.prompt_template = prompt_template
- self.llm = llm
- if reranker:
- if reranker_config is None:
- reranker_config = RerankerConfig()
- # Convert config to dict and initialize reranker
- reranker_kwargs = reranker_config.model_dump(exclude_none=True)
- self.reranker = LLMReranker(**reranker_kwargs)
- else:
- self.reranker = None
- self.vector_store_config = vector_store_config
-
- def with_returned_sources(self) -> SimpleRAGPipeline:
- """
- Builds a RAG pipeline with returned sources
- :return: SimpleRAGPipeline instance
- """
- # Ensure all the required components are not None
- if self.prompt_template is None:
- raise ValueError("One of the required components (prompt_template) is None")
- if self.llm is None:
- raise ValueError("One of the required components (llm) is None")
-
- # Return SimpleRAGPipeline instance that handles all the pipeline logic
- return SimpleRAGPipeline(
- retriever_runnable=self.retriever_runnable,
- prompt_template=self.prompt_template,
- llm=self.llm,
- reranker=self.reranker,
- )
-
- async def ainvoke(self, input_dict: dict) -> dict:
- """Async invocation of the RAG pipeline."""
- chain = self.with_returned_sources()
- return await chain.ainvoke(input_dict)
-
- def invoke(self, input_dict: dict) -> dict:
- """Sync invocation of the RAG pipeline."""
- import asyncio
-
- return asyncio.run(self.ainvoke(input_dict))
-
- @classmethod
- def _apply_search_kwargs(
- cls, retriever: Any, search_kwargs: Optional[SearchKwargs] = None, search_type: Optional[SearchType] = None
- ) -> Any:
- """Apply search kwargs and search type to the retriever if they exist"""
- if hasattr(retriever, "search_kwargs") and search_kwargs:
- # Convert search kwargs to dict, excluding None values
- kwargs_dict = search_kwargs.model_dump(exclude_none=True)
-
- # Only include relevant parameters based on search type
- if search_type == SearchType.SIMILARITY:
- # Remove MMR and similarity threshold specific params
- kwargs_dict.pop("fetch_k", None)
- kwargs_dict.pop("lambda_mult", None)
- kwargs_dict.pop("score_threshold", None)
- elif search_type == SearchType.MMR:
- # Remove similarity threshold specific params
- kwargs_dict.pop("score_threshold", None)
- elif search_type == SearchType.SIMILARITY_SCORE_THRESHOLD:
- # Remove MMR specific params
- kwargs_dict.pop("fetch_k", None)
- kwargs_dict.pop("lambda_mult", None)
-
- retriever.search_kwargs.update(kwargs_dict)
-
- # Set search type if supported by the retriever
- if hasattr(retriever, "search_type") and search_type:
- retriever.search_type = search_type.value
-
- return retriever
-
- @classmethod
- def from_retriever(cls, config: RAGPipelineModel):
- """
- Builds a RAG pipeline with returned sources using a simple vector store retriever
- :param config: RAGPipelineModel
- :return:
- """
- vector_store_operator = VectorStoreOperator(
- vector_store=config.vector_store,
- documents=config.documents,
- embedding_model=config.embedding_model,
- vector_store_config=config.vector_store_config,
- )
- retriever = vector_store_operator.vector_store.as_retriever()
- retriever = cls._apply_search_kwargs(retriever, config.search_kwargs, config.search_type)
-
- return cls(
- retriever,
- config.rag_prompt_template,
- config.llm,
- vector_store_config=config.vector_store_config,
- reranker=config.reranker,
- reranker_config=config.reranker_config,
- )
-
- @classmethod
- def from_auto_retriever(cls, config: RAGPipelineModel):
- if not config.retriever_prompt_template:
- config.retriever_prompt_template = DEFAULT_AUTO_META_PROMPT_TEMPLATE
-
- retriever = AutoRetriever(config=config).as_runnable()
- retriever = cls._apply_search_kwargs(retriever, config.search_kwargs, config.search_type)
- return cls(
- retriever,
- config.rag_prompt_template,
- config.llm,
- reranker_config=config.reranker_config,
- reranker=config.reranker,
- vector_store_config=config.vector_store_config,
- summarization_config=config.summarization_config,
- )
-
- @classmethod
- def from_multi_vector_retriever(cls, config: RAGPipelineModel):
- retriever = MultiVectorRetriever(config=config).as_runnable()
- retriever = cls._apply_search_kwargs(retriever, config.search_kwargs, config.search_type)
- return cls(
- retriever,
- config.rag_prompt_template,
- config.llm,
- reranker_config=config.reranker_config,
- reranker=config.reranker,
- vector_store_config=config.vector_store_config,
- summarization_config=config.summarization_config,
- )
-
- @classmethod
- def from_sql_retriever(cls, config: RAGPipelineModel):
- retriever_config = config.sql_retriever_config
- if retriever_config is None:
- raise ValueError('Must provide "sql_retriever_config" for RAG pipeline config')
- vector_store_config = config.vector_store_config
- knowledge_base_table = vector_store_config.kb_table if vector_store_config is not None else None
- if knowledge_base_table is None:
- raise ValueError('Must provide valid "vector_store_config" for RAG pipeline config')
- embedding_args = knowledge_base_table._kb.embedding_model.learn_args.get("using", {})
- embeddings = construct_embedding_model_from_args(embedding_args)
- sql_llm = create_chat_model(
- {
- "model_name": retriever_config.llm_config.model_name,
- "provider": retriever_config.llm_config.provider,
- **retriever_config.llm_config.params,
- }
- )
- vector_store_operator = VectorStoreOperator(
- vector_store=config.vector_store,
- documents=config.documents,
- embedding_model=config.embedding_model,
- vector_store_config=config.vector_store_config,
- )
- vector_store_retriever = vector_store_operator.vector_store.as_retriever()
- vector_store_retriever = cls._apply_search_kwargs(
- vector_store_retriever, config.search_kwargs, config.search_type
- )
- distance_function = DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE
- if config.vector_store_config.is_sparse and config.vector_store_config.vector_size is not None:
- # Use negative dot product for sparse retrieval.
- distance_function = DistanceFunction.NEGATIVE_DOT_PRODUCT
- retriever = SQLRetriever(
- fallback_retriever=vector_store_retriever,
- vector_store_handler=knowledge_base_table.get_vector_db(),
- min_k=retriever_config.min_k,
- max_filters=retriever_config.max_filters,
- filter_threshold=retriever_config.filter_threshold,
- database_schema=retriever_config.database_schema,
- embeddings_model=embeddings,
- search_kwargs=config.search_kwargs,
- rewrite_prompt_template=retriever_config.rewrite_prompt_template,
- table_prompt_template=retriever_config.table_prompt_template,
- column_prompt_template=retriever_config.column_prompt_template,
- value_prompt_template=retriever_config.value_prompt_template,
- boolean_system_prompt=retriever_config.boolean_system_prompt,
- generative_system_prompt=retriever_config.generative_system_prompt,
- num_retries=retriever_config.num_retries,
- embeddings_table=knowledge_base_table._kb.vector_database_table,
- source_table=retriever_config.source_table,
- source_id_column=retriever_config.source_id_column,
- distance_function=distance_function,
- llm=sql_llm,
- )
- return cls(
- retriever,
- config.rag_prompt_template,
- config.llm,
- reranker_config=config.reranker_config,
- reranker=config.reranker,
- vector_store_config=config.vector_store_config,
- summarization_config=config.summarization_config,
- )
diff --git a/mindsdb/integrations/utilities/rag/rag_pipeline_builder.py b/mindsdb/integrations/utilities/rag/rag_pipeline_builder.py
deleted file mode 100644
index f9709989069..00000000000
--- a/mindsdb/integrations/utilities/rag/rag_pipeline_builder.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import pandas as pd
-from typing import Any
-from mindsdb.integrations.utilities.rag.storage.in_memory_byte_store import InMemoryByteStore
-from mindsdb.integrations.utilities.rag.pipelines.rag import LangChainRAGPipeline
-from mindsdb.integrations.utilities.rag.settings import RetrieverType, RAGPipelineModel
-from mindsdb.integrations.utilities.rag.utils import documents_to_df
-from mindsdb.integrations.utilities.rag.retrievers.multi_hop_retriever import MultiHopRetriever
-from mindsdb.integrations.utilities.rag.splitters.custom_splitters import RecursiveCharacterTextSplitter
-from mindsdb.utilities.log import getLogger
-
-logger = getLogger(__name__)
-
-_retriever_strategies = {
- RetrieverType.VECTOR_STORE: lambda config: _create_pipeline_from_vector_store(config),
- RetrieverType.AUTO: lambda config: _create_pipeline_from_auto_retriever(config),
- RetrieverType.MULTI: lambda config: _create_pipeline_from_multi_retriever(config),
- RetrieverType.SQL: lambda config: _create_pipeline_from_sql_retriever(config),
- RetrieverType.MULTI_HOP: lambda config: _create_pipeline_from_multi_hop_retriever(config),
-}
-
-
-def _create_pipeline_from_vector_store(config: RAGPipelineModel) -> LangChainRAGPipeline:
- return LangChainRAGPipeline.from_retriever(config=config)
-
-
-def _create_pipeline_from_auto_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
- return LangChainRAGPipeline.from_auto_retriever(config=config)
-
-
-def _create_pipeline_from_multi_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
- if config.text_splitter is None:
- config.text_splitter = RecursiveCharacterTextSplitter(
- chunk_size=config.chunk_size, chunk_overlap=config.chunk_overlap
- )
- if config.parent_store is None:
- config.parent_store = InMemoryByteStore()
-
- return LangChainRAGPipeline.from_multi_vector_retriever(config=config)
-
-
-def _create_pipeline_from_sql_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
- return LangChainRAGPipeline.from_sql_retriever(config=config)
-
-
-def _create_pipeline_from_multi_hop_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
- retriever = MultiHopRetriever.from_config(config)
- return LangChainRAGPipeline(
- retriever_runnable=retriever,
- prompt_template=config.rag_prompt_template,
- llm=config.llm,
- reranker_config=config.reranker_config,
- reranker=config.reranker,
- vector_store_config=config.vector_store_config,
- )
-
-
-def _process_documents_to_df(config: RAGPipelineModel) -> pd.DataFrame:
- return documents_to_df(
- config.content_column_name, config.documents, embedding_model=config.embedding_model, with_embeddings=True
- )
-
-
-def get_pipeline_from_retriever(config: RAGPipelineModel) -> Any:
- retriever_strategy = _retriever_strategies.get(config.retriever_type)
- if retriever_strategy:
- return retriever_strategy(config).with_returned_sources()
- else:
- raise ValueError(
- f"Invalid retriever type, must be one of: {list(_retriever_strategies.keys())}. Got {config.retriever_type}"
- )
-
-
-class RAG:
- def __init__(self, config: RAGPipelineModel):
- self.pipeline = get_pipeline_from_retriever(config)
-
- def __call__(self, question: str) -> dict:
- logger.info(f"Processing question using rag pipeline: {question}")
- result = self.pipeline.invoke(question)
-
- returned_sources = [docs.page_content for docs in result["context"]]
- logger.info(f"retrieved context used to answer question: {returned_sources}")
-
- return result
diff --git a/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py b/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py
index 2542967672f..3fc7951530d 100644
--- a/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py
+++ b/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py
@@ -1,13 +1,12 @@
from __future__ import annotations
import re
+import os
import json
+import math
import asyncio
import logging
-import math
-import os
import random
-from abc import ABC
from typing import Any, List, Optional, Tuple
from openai import AsyncOpenAI, AsyncAzureOpenAI
@@ -35,7 +34,11 @@
DEFAULT_VALID_CLASS_TOKENS,
RerankerMode,
)
-from mindsdb.integrations.libs.base import BaseMLEngine
+
+from mindsdb.interfaces.knowledge_base.providers.bedrock import AsyncBedrockClient
+from mindsdb.interfaces.knowledge_base.providers.gemini import GeminiClient
+from mindsdb.interfaces.knowledge_base.providers.snowflake import SnowflakeClient
+
log = logging.getLogger(__name__)
@@ -50,7 +53,7 @@ def get_event_loop():
return loop
-class BaseLLMReranker(BaseModel, ABC):
+class BaseLLMReranker(BaseModel):
filtering_threshold: float = 0.0 # Default threshold for filtering
provider: str = "openai"
model: str = DEFAULT_RERANKING_MODEL # Model to use for reranking
@@ -59,10 +62,10 @@ class BaseLLMReranker(BaseModel, ABC):
base_url: Optional[str] = None
api_version: Optional[str] = None
num_docs_to_keep: Optional[int] = None # How many of the top documents to keep after reranking & compressing.
- method: str = "multi-class" # Scoring method: 'multi-class' or 'binary'
+ method: str = "no-logprobs" # Scoring method: 'multi-class' or 'no-logprobs'
mode: RerankerMode = RerankerMode.POINTWISE
_api_key_var: str = "OPENAI_API_KEY"
- client: Optional[AsyncOpenAI | BaseMLEngine] = None
+ client: Optional[AsyncOpenAI | AsyncBedrockClient | GeminiClient | SnowflakeClient] = None
_semaphore: Optional[asyncio.Semaphore] = None
max_concurrent_requests: int = 20
max_retries: int = 4
@@ -102,6 +105,9 @@ def _get_semaphore(self):
def _init_client(self):
if self.client is None:
+ if self.provider == "google":
+ self.provider = "gemini"
+
if self.provider == "azure_openai":
azure_api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY")
azure_api_endpoint = self.base_url or os.environ.get("AZURE_OPENAI_ENDPOINT")
@@ -113,11 +119,21 @@ def _init_client(self):
timeout=self.request_timeout,
max_retries=2,
)
+ self.method = "multi-class"
+ elif self.provider == "bedrock":
+ kwargs = self.model_extra.copy()
+ self.client = AsyncBedrockClient(**kwargs)
+ elif self.provider == "gemini":
+ self.client = GeminiClient(api_key=self.api_key)
+ elif self.provider == "snowflake":
+ kwargs = self.model_extra.copy()
+ self.client = SnowflakeClient(api_key=self.api_key, **kwargs)
elif self.provider in ("openai", "ollama"):
if self.provider == "ollama":
- self.method = "no-logprobs"
if self.api_key is None:
self.api_key = "n/a"
+ else:
+ self.method = "multi-class"
api_key_var: str = "OPENAI_API_KEY"
openai_api_key = self.api_key or os.getenv(api_key_var)
@@ -157,24 +173,15 @@ def _init_client(self):
self.method = "no-logprobs"
else:
- # try to use litellm
- from mindsdb.api.executor.controllers.session_controller import SessionController
-
- session = SessionController()
- module = session.integration_controller.get_handler_module("litellm")
+ raise NotImplementedError(f'Provider "{self.provider}" is not supported')
- if module is None or module.Handler is None:
- raise ValueError(f'Unable to use "{self.provider}" provider. Litellm handler is not installed')
-
- self.client = module.Handler
- self.method = "no-logprobs"
-
- async def _call_llm(self, messages):
+ async def _call_llm(self, messages) -> str:
if self.provider in ("azure_openai", "openai", "ollama"):
- return await self.client.chat.completions.create(
+ response = await self.client.chat.completions.create(
model=self.model,
messages=messages,
)
+ return response.choices[0].message.content
elif self.provider in ("google", "google_genai"):
# Convert OpenAI message format to Google Gen AI prompt format
prompt_parts = []
@@ -228,12 +235,7 @@ def __init__(self, text):
return CompletionResponse(response.text)
else:
- kwargs = self.model_extra.copy()
-
- if self.api_key is not None:
- kwargs["api_key"] = self.api_key
-
- return await self.client.acompletion(self.provider, model=self.model, messages=messages, args=kwargs)
+ return await self.client.acompletion(model_name=self.model, messages=messages)
async def _rank(self, query_document_pairs: List[Tuple[str, str]], rerank_callback=None) -> List[Tuple[str, float]]:
ranked_results = []
@@ -314,7 +316,7 @@ async def search_relevancy(self, query: str, document: str) -> Any:
temperature=self.temperature,
n=1,
logprobs=True,
- max_tokens=1,
+ max_completion_tokens=1,
)
# Extract response and logprobs
@@ -344,12 +346,10 @@ async def search_relevancy_no_logprob(self, query: str, document: str) -> Any:
f"Search query: {query}"
)
- response = await self._call_llm(
+ answer = await self._call_llm(
messages=[{"role": "system", "content": prompt}, {"role": "user", "content": document}],
)
- answer = response.choices[0].message.content
-
try:
value = re.findall(r"[\d]+", answer)[0]
score = float(value) / 100
@@ -462,7 +462,7 @@ async def search_relevancy_score(self, query: str, document: str) -> Any:
n=self.n,
logprobs=self.logprobs,
top_logprobs=self.top_logprobs,
- max_tokens=self.max_tokens,
+ max_completion_tokens=self.max_tokens,
)
# Extract response and logprobs
@@ -610,8 +610,8 @@ async def _rank_single_batch(
for attempt in range(self.max_retries):
try:
- response = await self._call_llm(messages)
- content = response.choices[0].message.content
+ content = await self._call_llm(messages)
+
scores = self._extract_scores(content, len(documents))
return list(zip(documents, scores))
except Exception as exc:
diff --git a/mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py b/mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py
deleted file mode 100644
index 9fdc083ccbf..00000000000
--- a/mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import logging
-from typing import Any, Dict, Optional, Sequence
-
-from mindsdb.integrations.utilities.rag.rerankers.base_reranker import BaseLLMReranker
-
-log = logging.getLogger(__name__)
-
-
-def _dispatch_custom_event(event_name: str, data: dict):
- """Simple event dispatcher replacement for langchain's dispatch_custom_event.
-
- This is a no-op implementation. If custom event handling is needed,
- it can be extended to dispatch events to registered handlers.
- """
- # No-op for now - can be extended if needed
- pass
-
-
-class LLMReranker(BaseLLMReranker):
- remove_irrelevant: bool = True # New flag to control removal of irrelevant documents
-
- def _dispatch_rerank_event(self, data):
- """Dispatch rerank event using custom event dispatcher"""
- _dispatch_custom_event("rerank", data)
-
- async def acompress_documents(
- self,
- documents: Sequence[Any],
- query: str,
- callbacks: Optional[Any] = None,
- ) -> Sequence[Any]:
- """
- Async compress documents using reranking with proper error handling.
-
- Args:
- documents: Sequence of document objects with page_content and metadata attributes
- query: Query string for reranking
- callbacks: Optional callbacks object with on_retriever_start, on_retriever_end,
- on_text, and on_retriever_error methods
-
- Returns:
- Sequence of filtered and reranked documents
- """
- if callbacks and hasattr(callbacks, "on_retriever_start"):
- try:
- await callbacks.on_retriever_start({"query": query}, "Reranking documents")
- except Exception as e:
- log.warning(f"Error in callback on_retriever_start: {e}")
-
- log.info(f"Async compressing documents. Initial count: {len(documents)}")
- if not documents:
- if callbacks and hasattr(callbacks, "on_retriever_end"):
- try:
- await callbacks.on_retriever_end({"documents": []})
- except Exception as e:
- log.warning(f"Error in callback on_retriever_end: {e}")
- return []
-
- # Stream reranking update.
- _dispatch_custom_event("rerank_begin", {"num_documents": len(documents)})
-
- try:
- # Prepare query-document pairs
- # Use duck typing to access page_content attribute
- query_document_pairs = [(query, doc.page_content) for doc in documents]
-
- if callbacks and hasattr(callbacks, "on_text"):
- try:
- await callbacks.on_text("Starting document reranking...")
- except Exception as e:
- log.warning(f"Error in callback on_text: {e}")
-
- # Get ranked results
- ranked_results = await self._rank(query_document_pairs, rerank_callback=self._dispatch_rerank_event)
-
- # Sort by score in descending order
- ranked_results.sort(key=lambda x: x[1], reverse=True)
-
- # Filter based on threshold and num_docs_to_keep
- filtered_docs = []
- for doc, score in ranked_results:
- if score >= self.filtering_threshold:
- matching_doc = next(d for d in documents if d.page_content == doc)
- # Use duck typing to access and update metadata
- metadata = getattr(matching_doc, "metadata", None) or {}
- matching_doc.metadata = {**metadata, "relevance_score": score}
- filtered_docs.append(matching_doc)
-
- if callbacks and hasattr(callbacks, "on_text"):
- try:
- await callbacks.on_text(f"Document scored {score:.2f}")
- except Exception as e:
- log.warning(f"Error in callback on_text: {e}")
-
- if self.num_docs_to_keep and len(filtered_docs) >= self.num_docs_to_keep:
- break
-
- log.info(f"Async compression complete. Final count: {len(filtered_docs)}")
-
- if callbacks and hasattr(callbacks, "on_retriever_end"):
- try:
- await callbacks.on_retriever_end({"documents": filtered_docs})
- except Exception as e:
- log.warning(f"Error in callback on_retriever_end: {e}")
-
- return filtered_docs
-
- except Exception as e:
- error_msg = "Error during async document compression:"
- log.exception(error_msg)
- if callbacks and hasattr(callbacks, "on_retriever_error"):
- try:
- await callbacks.on_retriever_error(f"{error_msg} {e}")
- except Exception as callback_error:
- log.warning(f"Error in callback on_retriever_error: {callback_error}")
- return documents # Return original documents on error
-
- def compress_documents(
- self,
- documents: Sequence[Any],
- query: str,
- callbacks: Optional[Any] = None,
- ) -> Sequence[Any]:
- """
- Sync wrapper for async compression.
-
- Args:
- documents: Sequence of document objects with page_content and metadata attributes
- query: Query string for reranking
- callbacks: Optional callbacks object
-
- Returns:
- Sequence of filtered and reranked documents
- """
- return asyncio.run(self.acompress_documents(documents, query, callbacks))
-
- @property
- def _identifying_params(self) -> Dict[str, Any]:
- """Get the identifying parameters."""
- return {
- "model": self.model,
- "temperature": self.temperature,
- "remove_irrelevant": self.remove_irrelevant,
- "method": self.method,
- }
diff --git a/mindsdb/integrations/utilities/rag/retrievers/__init__.py b/mindsdb/integrations/utilities/rag/retrievers/__init__.py
deleted file mode 100644
index 94e359da03a..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from mindsdb.integrations.utilities.rag.retrievers.multi_hop_retriever import MultiHopRetriever
-
-__all__ = ['MultiHopRetriever']
\ No newline at end of file
diff --git a/mindsdb/integrations/utilities/rag/retrievers/auto_retriever.py b/mindsdb/integrations/utilities/rag/retrievers/auto_retriever.py
deleted file mode 100644
index ba260693c92..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/auto_retriever.py
+++ /dev/null
@@ -1,228 +0,0 @@
-from typing import List, Any
-import json
-import asyncio
-
-import pandas as pd
-
-from mindsdb.integrations.utilities.rag.retrievers.base import BaseRetriever, RunnableRetriever
-from mindsdb.integrations.utilities.rag.utils import documents_to_df
-from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
-from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class AutoRetriever(BaseRetriever):
- """
- AutoRetrieval is a class that uses LLM to extract metadata from documents and query vectorstore using self-query retrievers.
- """
-
- def __init__(self, config: RAGPipelineModel):
- """
-
- :param config: RAGPipelineModel
-
-
- """
-
- self.documents = config.documents
- self.content_column_name = config.content_column_name
- self.vectorstore = config.vector_store
- self.filter_columns = config.auto_retriever_filter_columns
- self.document_description = config.dataset_description
- self.llm = config.llm
- self.embedding_model = config.embedding_model
- self.prompt_template = config.retriever_prompt_template
- self.cardinality_threshold = config.cardinality_threshold
-
- def _get_low_cardinality_columns(self, data: pd.DataFrame):
- """
- Given a dataframe, return a list of columns with low cardinality if datatype is not bool.
- :return:
- """
- low_cardinality_columns = []
- columns = data.columns if self.filter_columns is None else self.filter_columns
- for column in columns:
- if data[column].dtype != "bool":
- if data[column].nunique() < self.cardinality_threshold:
- low_cardinality_columns.append(column)
- return low_cardinality_columns
-
- def get_metadata_field_info(self):
- """
- Given a list of Document, use llm to extract metadata from it.
- :return:
- """
-
- def _alter_description(data: pd.DataFrame, low_cardinality_columns: list, result: List[dict]):
- """
- For low cardinality columns, alter the description to include the sorted valid values.
- :param data: pd.DataFrame
- :param low_cardinality_columns: list
- :param result: List[dict]
- """
- for column_name in low_cardinality_columns:
- valid_values = sorted(data[column_name].unique())
- for entry in result:
- if entry["name"] == column_name:
- entry["description"] += f". Valid values: {valid_values}"
-
- data = documents_to_df(self.content_column_name, self.documents)
-
- prompt = self.prompt_template.format(dataframe=data.head().to_json(), description=self.document_description)
- # Call LLM and extract response
- llm_response = self.llm.invoke(prompt)
- # Extract content from LLM response
- if hasattr(llm_response, "content"):
- response_text = llm_response.content
- elif isinstance(llm_response, str):
- response_text = llm_response
- else:
- response_text = str(llm_response)
-
- result: List[dict] = json.loads(response_text)
-
- _alter_description(data, self._get_low_cardinality_columns(data), result)
-
- return result
-
- def get_vectorstore(self):
- """
-
- :return:
- """
- return VectorStoreOperator(
- vector_store=self.vectorstore, documents=self.documents, embedding_model=self.embedding_model
- ).vector_store
-
- def as_runnable(self) -> RunnableRetriever:
- """
- Return a custom self-query retriever
- :return: CustomSelfQueryRetriever instance
- """
- vectorstore = self.get_vectorstore()
- metadata_field_info = self.get_metadata_field_info()
-
- return CustomSelfQueryRetriever(
- llm=self.llm,
- vectorstore=vectorstore,
- document_contents=self.document_description,
- metadata_field_info=metadata_field_info,
- )
-
-
-class CustomSelfQueryRetriever:
- """
- Custom implementation of SelfQueryRetriever to replace langchain's SelfQueryRetriever.
- Uses LLM to generate metadata filters and queries vectorstore with those filters.
- """
-
- def __init__(self, llm: Any, vectorstore: Any, document_contents: str, metadata_field_info: List[dict]):
- """
- Initialize CustomSelfQueryRetriever
-
- Args:
- llm: LLM instance with invoke method
- vectorstore: Vector store with similarity_search_with_score method
- document_contents: Description of document contents
- metadata_field_info: List of metadata field information dicts
- """
- self.llm = llm
- self.vectorstore = vectorstore
- self.document_contents = document_contents
- self.metadata_field_info = metadata_field_info
-
- def _generate_metadata_filters(self, query: str) -> dict:
- """
- Use LLM to generate metadata filters from query
-
- Args:
- query: User query string
-
- Returns:
- Dictionary of metadata filters
- """
- # Create prompt for LLM to generate metadata filters
- metadata_info_str = json.dumps(self.metadata_field_info, indent=2)
- prompt = f"""Given the following query and metadata field information, generate a structured query with metadata filters.
-
-Query: {query}
-
-Document contents description: {self.document_contents}
-
-Available metadata fields:
-{metadata_info_str}
-
-Generate a JSON object with the query string and any applicable metadata filters.
-Format: {{"query": "extracted query", "filters": {{"field_name": "value"}}}}
-"""
-
- try:
- llm_response = self.llm.invoke(prompt)
- # Extract content from LLM response
- if hasattr(llm_response, "content"):
- response_text = llm_response.content
- elif isinstance(llm_response, str):
- response_text = llm_response
- else:
- response_text = str(llm_response)
-
- # Parse JSON response
- parsed = json.loads(response_text)
- return parsed.get("filters", {})
- except Exception as e:
- logger.warning(f"Error generating metadata filters: {e}")
- return {}
-
- def _query_vectorstore(self, query: str, filters: dict) -> List[Any]:
- """
- Query vectorstore with query and metadata filters
-
- Args:
- query: Query string
- filters: Metadata filters dictionary
-
- Returns:
- List of documents
- """
- # Use vectorstore's similarity_search method
- # If vectorstore supports metadata filtering, apply filters
- if hasattr(self.vectorstore, "similarity_search"):
- # Try to pass filters if supported
- if filters:
- try:
- # Some vectorstores support filter parameter
- if hasattr(self.vectorstore, "similarity_search_with_score"):
- docs_with_scores = self.vectorstore.similarity_search_with_score(query, k=4, filter=filters)
- return [doc for doc, _ in docs_with_scores]
- else:
- return self.vectorstore.similarity_search(query, k=4, filter=filters)
- except TypeError:
- # If filter not supported, just do regular search
- return self.vectorstore.similarity_search(query, k=4)
- else:
- return self.vectorstore.similarity_search(query, k=4)
- else:
- raise ValueError("Vectorstore must have similarity_search method")
-
- def invoke(self, query: str) -> List[Any]:
- """Sync invocation - retrieve documents for a query"""
- # Generate metadata filters
- filters = self._generate_metadata_filters(query)
-
- # Extract query string (LLM might have rewritten it)
- # For now, use original query
- # In a full implementation, we'd extract the rewritten query from LLM response
-
- # Query vectorstore
- return self._query_vectorstore(query, filters)
-
- async def ainvoke(self, query: str) -> List[Any]:
- """Async invocation - retrieve documents for a query"""
- loop = asyncio.get_event_loop()
- return await loop.run_in_executor(None, self.invoke, query)
-
- def get_relevant_documents(self, query: str) -> List[Any]:
- """Get relevant documents (sync)"""
- return self.invoke(query)
diff --git a/mindsdb/integrations/utilities/rag/retrievers/base.py b/mindsdb/integrations/utilities/rag/retrievers/base.py
deleted file mode 100644
index 1c136436aec..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/base.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Protocol, List, Any
-
-
-class RunnableRetriever(Protocol):
- """Protocol for retriever runnable objects that can be invoked to retrieve documents"""
-
- def invoke(self, query: str) -> List[Any]:
- """Sync invocation - retrieve documents for a query"""
- ...
-
- async def ainvoke(self, query: str) -> List[Any]:
- """Async invocation - retrieve documents for a query"""
- ...
-
- def get_relevant_documents(self, query: str) -> List[Any]:
- """Get relevant documents (sync) - alternative interface"""
- ...
-
-
-class BaseRetriever(ABC):
- """Represents a base retriever for a RAG pipeline"""
-
- @abstractmethod
- def as_runnable(self) -> RunnableRetriever:
- """
- Return a runnable retriever object that can be invoked.
-
- Returns:
- RunnableRetriever: An object that implements invoke(), ainvoke(), or get_relevant_documents()
- """
- pass
diff --git a/mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py b/mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py
deleted file mode 100644
index d5bca056e0a..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py
+++ /dev/null
@@ -1,130 +0,0 @@
-from typing import List, Optional, Any
-
-import json
-from pydantic import Field, PrivateAttr
-
-from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel, DEFAULT_QUESTION_REFORMULATION_TEMPLATE
-from mindsdb.integrations.utilities.rag.retrievers.retriever_factory import create_retriever
-from mindsdb.integrations.utilities.rag.retrievers.base import BaseRetriever, RunnableRetriever
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class MultiHopRetriever(BaseRetriever):
- """A retriever that implements multi-hop question reformulation strategy.
-
- This retriever takes a base retriever and uses an LLM to generate follow-up
- questions based on the initial results. It then retrieves documents for each
- follow-up question and combines all results.
- """
-
- base_retriever: Any = Field(
- description="Base retriever to use for document lookup (must have get_relevant_documents or invoke method)"
- )
- llm: Any = Field(description="LLM to use for generating follow-up questions (must have invoke method)")
- max_hops: int = Field(default=3, description="Maximum number of follow-up questions to generate")
- reformulation_template: str = Field(
- default=DEFAULT_QUESTION_REFORMULATION_TEMPLATE, description="Template for reformulating questions"
- )
-
- _asked_questions: set = PrivateAttr(default_factory=set)
-
- @classmethod
- def from_config(cls, config: RAGPipelineModel) -> "MultiHopRetriever":
- """Create a MultiHopRetriever from a RAGPipelineModel config."""
- if config.multi_hop_config is None:
- raise ValueError("multi_hop_config must be set for MultiHopRetriever")
-
- # Create base retriever based on type
- base_retriever = create_retriever(config, config.multi_hop_config.base_retriever_type)
-
- return cls(
- base_retriever=base_retriever,
- llm=config.llm,
- max_hops=config.multi_hop_config.max_hops,
- reformulation_template=config.multi_hop_config.reformulation_template,
- )
-
- def _get_relevant_documents(self, query: str, *, run_manager: Optional[Any] = None) -> List[Any]:
- """
- Get relevant documents using multi-hop retrieval.
-
- Args:
- query: Query string
- run_manager: Optional callback manager (not used, kept for compatibility)
-
- Returns:
- List of documents with page_content and metadata attributes
- """
- if query in self._asked_questions:
- return []
-
- self._asked_questions.add(query)
-
- # Get initial documents using duck typing
- docs = self._retrieve_from_base_retriever(query)
- if not docs or len(self._asked_questions) >= self.max_hops:
- return docs
-
- # Generate follow-up questions
- context = "\n".join(doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs)
- prompt = self.reformulation_template.format(question=query, context=context)
-
- try:
- # Call LLM - handle both string and message formats
- llm_response = self.llm.invoke(prompt)
- # Extract content from LLM response
- if hasattr(llm_response, "content"):
- response_text = llm_response.content
- elif isinstance(llm_response, str):
- response_text = llm_response
- else:
- response_text = str(llm_response)
-
- follow_up_questions = json.loads(response_text)
- if not isinstance(follow_up_questions, list):
- return docs
- except (json.JSONDecodeError, TypeError, Exception) as e:
- logger.warning(f"Error parsing follow-up questions: {e}")
- return docs
-
- # Get documents for follow-up questions
- for question in follow_up_questions:
- if isinstance(question, str):
- follow_up_docs = self._get_relevant_documents(question)
- docs.extend(follow_up_docs)
-
- return docs
-
- def _retrieve_from_base_retriever(self, query: str) -> List[Any]:
- """Retrieve documents from base retriever using duck typing"""
- if hasattr(self.base_retriever, "_get_relevant_documents"):
- return self.base_retriever._get_relevant_documents(query)
- elif hasattr(self.base_retriever, "get_relevant_documents"):
- return self.base_retriever.get_relevant_documents(query)
- elif hasattr(self.base_retriever, "invoke"):
- return self.base_retriever.invoke(query)
- else:
- raise ValueError(
- "Base retriever must have _get_relevant_documents, get_relevant_documents, or invoke method"
- )
-
- def invoke(self, query: str) -> List[Any]:
- """Sync invocation - retrieve documents for a query"""
- return self._get_relevant_documents(query)
-
- async def ainvoke(self, query: str) -> List[Any]:
- """Async invocation - retrieve documents for a query"""
- import asyncio
-
- loop = asyncio.get_event_loop()
- return await loop.run_in_executor(None, self._get_relevant_documents, query)
-
- def get_relevant_documents(self, query: str) -> List[Any]:
- """Get relevant documents (sync)"""
- return self._get_relevant_documents(query)
-
- def as_runnable(self) -> RunnableRetriever:
- """Return self as a runnable retriever"""
- return self
diff --git a/mindsdb/integrations/utilities/rag/retrievers/multi_vector_retriever.py b/mindsdb/integrations/utilities/rag/retrievers/multi_vector_retriever.py
deleted file mode 100644
index c7d2c5918bd..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/multi_vector_retriever.py
+++ /dev/null
@@ -1,193 +0,0 @@
-from typing import List, Tuple, Any
-import uuid
-import asyncio
-
-from mindsdb.integrations.utilities.rag.retrievers.base import BaseRetriever, RunnableRetriever
-from mindsdb.integrations.utilities.rag.settings import DEFAULT_LLM_MODEL, MultiVectorRetrieverMode, RAGPipelineModel
-from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
-from mindsdb.integrations.utilities.rag.retrievers.safe_output_parser import SafeOutputParser
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class MultiVectorRetriever(BaseRetriever):
- """
- MultiVectorRetriever stores multiple vectors per document.
- """
-
- def __init__(self, config: RAGPipelineModel):
- self.vectorstore = config.vector_store
- self.parent_store = config.parent_store
- self.id_key = config.id_key
- self.documents = config.documents
- self.text_splitter = config.text_splitter
- self.embedding_model = config.embedding_model
- self.max_concurrency = config.max_concurrency
- self.mode = config.multi_retriever_mode
-
- def _generate_id_and_split_document(self, doc: Any) -> Tuple[str, List[Any]]:
- """
- Generate a unique id for the document and split it into sub-documents.
- :param doc: Document with page_content and metadata
- :return: Tuple of (doc_id, list of sub_documents)
- """
- doc_id = str(uuid.uuid4())
- sub_docs = self.text_splitter.split_documents([doc])
- for sub_doc in sub_docs:
- # Use duck typing to access metadata
- if not hasattr(sub_doc, "metadata"):
- sub_doc.metadata = {}
- sub_doc.metadata[self.id_key] = doc_id
- return doc_id, sub_docs
-
- def _split_documents(self) -> Tuple[List[Any], List[str]]:
- """
- Split the documents into sub-documents and generate unique ids for each document.
- :return: Tuple of (list of split_docs, list of doc_ids)
- """
- split_info = list(map(self._generate_id_and_split_document, self.documents))
- doc_ids, split_docs_lists = zip(*split_info)
- split_docs = [doc for sublist in split_docs_lists for doc in sublist]
- return split_docs, list(doc_ids)
-
- def _create_retriever_and_vs_operator(
- self, docs: List[Any]
- ) -> Tuple["CustomMultiVectorRetriever", VectorStoreOperator]:
- vstore_operator = VectorStoreOperator(
- vector_store=self.vectorstore,
- documents=docs,
- embedding_model=self.embedding_model,
- )
- retriever = CustomMultiVectorRetriever(
- vectorstore=vstore_operator.vector_store, byte_store=self.parent_store, id_key=self.id_key
- )
- return retriever, vstore_operator
-
- def _get_document_summaries(self, llm: Any) -> List[str]:
- """
- Get document summaries using LLM
-
- Args:
- llm: LLM instance with invoke method
-
- Returns:
- List of summary strings
- """
- summaries = []
- prompt_template = "Summarize the following document:\n\n{doc}"
-
- for doc in self.documents:
- # Extract page_content using duck typing
- page_content = doc.page_content if hasattr(doc, "page_content") else str(doc)
- prompt = prompt_template.format(doc=page_content)
-
- try:
- # Call LLM
- llm_response = llm.invoke(prompt)
- # Extract content from LLM response
- if hasattr(llm_response, "content"):
- summary = llm_response.content
- elif isinstance(llm_response, str):
- summary = llm_response
- else:
- summary = str(llm_response)
-
- # Use SafeOutputParser to clean the output (extract actual text from parse result)
- parser = SafeOutputParser()
- parsed_result = parser.parse(summary)
- summary = parser.extract_output(parsed_result)
- summaries.append(summary)
- except Exception as e:
- logger.warning(f"Error generating summary for document: {e}")
- # Fallback to empty summary or first part of content
- summaries.append(page_content[:200] if len(page_content) > 200 else page_content)
-
- return summaries
-
- def as_runnable(self) -> RunnableRetriever:
- # Get LLM from config - need to check how it's passed
- # For now, assume we need to get it from somewhere
- # This might need to be passed in config
- llm = getattr(self, "llm", None)
- if llm is None:
- # Try to create a default LLM - this might need adjustment
- from mindsdb.interfaces.knowledge_base.llm_wrapper import create_chat_model
-
- llm = create_chat_model({"model_name": DEFAULT_LLM_MODEL, "provider": "openai"})
-
- if self.mode in {MultiVectorRetrieverMode.SPLIT, MultiVectorRetrieverMode.BOTH}:
- split_docs, doc_ids = self._split_documents()
- retriever, vstore_operator = self._create_retriever_and_vs_operator(split_docs)
- summaries = self._get_document_summaries(llm)
- summary_docs = [
- SimpleDocument(page_content=s, metadata={self.id_key: doc_ids[i]}) for i, s in enumerate(summaries)
- ]
- vstore_operator.add_documents(summary_docs)
- retriever.docstore.mset(list(zip(doc_ids, self.documents)))
- return retriever
-
- elif self.mode == MultiVectorRetrieverMode.SUMMARIZE:
- summaries = self._get_document_summaries(llm)
- doc_ids = [str(uuid.uuid4()) for _ in self.documents]
- summary_docs = [
- SimpleDocument(page_content=s, metadata={self.id_key: doc_ids[i]}) for i, s in enumerate(summaries)
- ]
- retriever, vstore_operator = self._create_retriever_and_vs_operator(summary_docs)
- retriever.docstore.mset(list(zip(doc_ids, self.documents)))
- return retriever
-
- else:
- raise ValueError(f"Invalid mode: {self.mode}")
-
-
-class CustomMultiVectorRetriever:
- """
- Custom implementation of MultiVectorRetriever to replace langchain's MultiVectorRetriever.
- Stores parent documents in docstore and sub-documents/summaries in vectorstore.
- """
-
- def __init__(self, vectorstore: Any, byte_store: Any, id_key: str = "doc_id"):
- """
- Initialize CustomMultiVectorRetriever
-
- Args:
- vectorstore: Vector store for storing sub-documents/summaries
- byte_store: Store for parent documents (must have mset and mget methods)
- id_key: Key used to link sub-documents to parent documents
- """
- self.vectorstore = vectorstore
- self.docstore = byte_store
- self.id_key = id_key
-
- def invoke(self, query: str) -> List[Any]:
- """Sync invocation - retrieve documents for a query"""
- # Get sub-documents from vectorstore
- sub_docs = self.vectorstore.similarity_search(query, k=4)
-
- # Get parent document IDs from sub-documents
- parent_ids = []
- for doc in sub_docs:
- metadata = getattr(doc, "metadata", {})
- if self.id_key in metadata:
- parent_ids.append(metadata[self.id_key])
-
- # Get parent documents from docstore
- parent_docs = []
- if parent_ids and hasattr(self.docstore, "mget"):
- parent_docs = self.docstore.mget(parent_ids)
- elif parent_ids and hasattr(self.docstore, "get"):
- parent_docs = [self.docstore.get(pid) for pid in parent_ids if self.docstore.get(pid) is not None]
-
- # Return parent documents (or sub-docs if no parent store)
- return parent_docs if parent_docs else sub_docs
-
- async def ainvoke(self, query: str) -> List[Any]:
- """Async invocation - retrieve documents for a query"""
- loop = asyncio.get_event_loop()
- return await loop.run_in_executor(None, self.invoke, query)
-
- def get_relevant_documents(self, query: str) -> List[Any]:
- """Get relevant documents (sync)"""
- return self.invoke(query)
diff --git a/mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py b/mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py
deleted file mode 100644
index 15fec83adcf..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""Factory functions for creating retrievers."""
-
-from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel, RetrieverType
-from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
-from mindsdb.integrations.utilities.rag.retrievers.auto_retriever import AutoRetriever
-from mindsdb.integrations.utilities.rag.retrievers.sql_retriever import SQLRetriever
-
-
-def create_vector_store_retriever(config: RAGPipelineModel):
- """Create a vector store retriever."""
- if getattr(config.vector_store, '_mock_return_value', None) is not None:
- # If vector_store is mocked, return a simple mock retriever for testing
- from unittest.mock import MagicMock
- mock_retriever = MagicMock()
- mock_retriever._get_relevant_documents.return_value = [
- {"page_content": "The Wright brothers invented the airplane."}
- ]
- return mock_retriever
-
- vector_store_operator = VectorStoreOperator(
- vector_store=config.vector_store,
- documents=config.documents,
- embedding_model=config.embedding_model,
- vector_store_config=config.vector_store_config
- )
- return vector_store_operator.vector_store.as_retriever()
-
-
-def create_auto_retriever(config: RAGPipelineModel):
- """Create an auto retriever."""
- return AutoRetriever(
- vector_store=config.vector_store,
- documents=config.documents,
- embedding_model=config.embedding_model
- )
-
-
-def create_sql_retriever(config: RAGPipelineModel):
- """Create a SQL retriever."""
- return SQLRetriever(
- sql_source=config.sql_source,
- llm=config.llm
- )
-
-
-def create_retriever(config: RAGPipelineModel, retriever_type: RetrieverType = None):
- """Create a retriever based on type."""
- retriever_type = retriever_type or config.retriever_type
-
- if retriever_type == RetrieverType.VECTOR_STORE:
- return create_vector_store_retriever(config)
- elif retriever_type == RetrieverType.AUTO:
- return create_auto_retriever(config)
- elif retriever_type == RetrieverType.SQL:
- return create_sql_retriever(config)
- else:
- raise ValueError(f"Unsupported retriever type: {retriever_type}")
diff --git a/mindsdb/integrations/utilities/rag/retrievers/safe_output_parser.py b/mindsdb/integrations/utilities/rag/retrievers/safe_output_parser.py
deleted file mode 100644
index 0460714eb12..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/safe_output_parser.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import re
-from typing import Union
-from dataclasses import dataclass
-
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-# Default format instructions for conversational agent
-# This is a simplified version - can be customized if needed
-FORMAT_INSTRUCTIONS = """Use the following format:
-
-Question: the input question you must answer
-Thought: you should think about what to do
-Action: the action to take, should be one of the available tools
-Action Input: the input to the action
-Observation: the result of the action
-... (this Thought/Action/Action Input/Observation can repeat N times)
-Thought: I now know the final answer
-Final Answer: the final answer to the original input question"""
-
-
-@dataclass
-class AgentAction:
- """Custom AgentAction class to replace langchain AgentAction"""
-
- tool: str
- tool_input: str
- log: str
-
-
-@dataclass
-class AgentFinish:
- """Custom AgentFinish class to replace langchain AgentFinish"""
-
- return_values: dict
- log: str
-
-
-class SafeOutputParser:
- """Output parser for the conversational agent that does not throw OutputParserException."""
-
- def __init__(self, ai_prefix: str = "AI", format_instructions: str = FORMAT_INSTRUCTIONS):
- self.ai_prefix = ai_prefix
- self.format_instructions = format_instructions
-
- def get_format_instructions(self) -> str:
- """Returns formatting instructions for the given output parser."""
- return self.format_instructions
-
- def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
- """Parses outputted text from an LLM.
-
- Args:
- text (str): Outputted text to parse.
-
- Returns:
- Union[AgentAction, AgentFinish]: Parsed agent action or finish result
- """
- regex = r"Action: (.*?)[\n]*Action Input:([\s\S]*)"
- match = re.search(regex, text, re.DOTALL)
- if match is not None:
- action = match.group(1)
- action_input = match.group(2)
- return AgentAction(action.strip(), action_input.strip(" ").strip('"'), text)
- output = text
- if f"{self.ai_prefix}:" in text:
- output = text.split(f"{self.ai_prefix}:")[-1].strip()
- return AgentFinish({"output": output}, text)
-
- def extract_output(self, result: Union[AgentAction, AgentFinish, str]) -> str:
- """Extract the actual output text from a parse result.
-
- Args:
- result: Result from parse() method or a string
-
- Returns:
- str: The actual output text
- """
- if isinstance(result, str):
- return result
- elif isinstance(result, AgentFinish):
- return result.return_values.get("output", result.log)
- elif isinstance(result, AgentAction):
- # For AgentAction, return the log or tool_input
- return result.tool_input if result.tool_input else result.log
- else:
- return str(result)
-
- @property
- def _type(self) -> str:
- return "conversational"
diff --git a/mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py b/mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py
deleted file mode 100644
index 34d7a1e0b89..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py
+++ /dev/null
@@ -1,949 +0,0 @@
-import re
-import math
-import logging
-import collections
-import json
-from typing import List, Any, Optional, Dict, Tuple, Union, Callable
-
-from pydantic import BaseModel, Field
-
-from mindsdb.integrations.utilities.rag.retrievers.base import BaseRetriever, RunnableRetriever
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-from mindsdb.integrations.libs.response import HandlerResponse
-from mindsdb.integrations.libs.vectordatabase_handler import (
- DistanceFunction,
- VectorStoreHandler,
-)
-from mindsdb.integrations.utilities.rag.settings import (
- DatabaseSchema,
- TableSchema,
- ColumnSchema,
- ValueSchema,
- SearchKwargs,
-)
-from mindsdb.utilities import log
-
-import numpy as np
-
-logger = log.getLogger(__name__)
-
-
-class MetadataFilter(BaseModel):
- """Represents an LLM generated metadata filter to apply to a PostgreSQL query."""
-
- attribute: str = Field(description="Database column to apply filter to")
- comparator: str = Field(description="PostgreSQL comparator to use to filter database column")
- value: Any = Field(description="Value to use to filter database column")
-
-
-class AblativeMetadataFilter(MetadataFilter):
- """Adds additional fields to support ablation."""
-
- schema_table: str = Field(description="schema name of the table for this filter")
- schema_column: str = Field(description="schema name of the column for this filter")
- schema_value: str = Field(description="schema name of the value for this filter")
-
-
-class MetadataFilters(BaseModel):
- """List of LLM generated metadata filters to apply to a PostgreSQL query."""
-
- filters: List[MetadataFilter] = Field(description="List of PostgreSQL metadata filters to apply for user query")
-
-
-class SQLRetriever(BaseRetriever):
- """Retriever that uses a LLM to generate pgvector queries to do similarity search with metadata filters.
-
- How it works:
-
- 1. Use a LLM to rewrite the user input to something more suitable for retrieval. For example:
- "Show me documents containing how to finetune a LLM please" --> "how to finetune a LLM"
-
- 2. Use a LLM to generate structured metadata filters based on the user input. Provided
- metadata schemas & examples are used as additional context.
-
- 3. Generate a prepared PostgreSQL query from the structured metadata filters.
-
- 4. Actually execute the query against our vector database to retrieve documents & return them.
- """
-
- fallback_retriever: Any # Must have get_relevant_documents or invoke method
- vector_store_handler: VectorStoreHandler
- # search parameters
- max_filters: int
- filter_threshold: float
- min_k: int
-
- # Schema description
- database_schema: Optional[DatabaseSchema] = None
-
- # Embeddings
- embeddings_model: Any # Must have embed_query method
- search_kwargs: SearchKwargs
-
- # prompt templates
- rewrite_prompt_template: str
-
- # schema templates
- table_prompt_template: str
- column_prompt_template: str
- value_prompt_template: str
-
- # formatting templates
- boolean_system_prompt: str
- generative_system_prompt: str
-
- # SQL search config
- num_retries: int
- embeddings_table: str
- source_table: str
- source_id_column: str
- distance_function: DistanceFunction
-
- # Re-rank and metadata generation model.
- llm: Any # Must have invoke method
-
- def _sort_schema_by_priority_key(
- self,
- schema_dict_item: Tuple[str, Union[TableSchema, ColumnSchema, ValueSchema]],
- ):
- return schema_dict_item[1].priority
-
- def _sort_schema_by_relevance_key(
- self,
- schema_dict_item: Tuple[str, Union[TableSchema, ColumnSchema, ValueSchema]],
- ):
- if schema_dict_item[1].relevance is not None:
- return schema_dict_item[1].relevance
- else:
- return 0
-
- def _sort_schema_by_key(
- self,
- schema: Union[DatabaseSchema, TableSchema, ColumnSchema],
- key: Callable,
- update: Dict[str, Any] = None,
- ) -> Union[DatabaseSchema, TableSchema, ColumnSchema]:
- """Takes a schema and converts its dict into an OrderedDict"""
- if isinstance(schema, DatabaseSchema):
- collection_key = "tables"
- elif isinstance(schema, TableSchema):
- collection_key = "columns"
- elif isinstance(schema, ColumnSchema):
- collection_key = "values"
- else:
- raise Exception("schema must be either a DatabaseSchema, TableSchema, or ColumnSchema.")
-
- if update is not None:
- ordered = collections.OrderedDict(sorted(update.items(), key=key, reverse=True))
- else:
- ordered = collections.OrderedDict(sorted(getattr(schema, collection_key).items(), key=key, reverse=True))
- schema = schema.model_copy(update={collection_key: ordered})
-
- return schema
-
- def _sort_database_schema_by_key(self, database_schema: DatabaseSchema, key: Callable) -> DatabaseSchema:
- """Re-build schema with OrderedDicts"""
- tables = {}
- # build new tables dict
- for table_key, table_schema in database_schema.tables.items():
- columns = {}
- # build new column dict
- for column_key, column_schema in table_schema.columns.items():
- # sort values directly and update column schema
- columns[column_key] = self._sort_schema_by_key(schema=column_schema, key=key)
- # update table schema and sort
- tables[table_key] = self._sort_schema_by_key(schema=table_schema, key=key, update=columns)
- # update table schema and sort
- database_schema = self._sort_schema_by_key(schema=database_schema, key=key, update=tables)
-
- return database_schema
-
- def _prepare_value_prompt(
- self,
- value_schema: ValueSchema,
- column_schema: ColumnSchema,
- table_schema: TableSchema,
- boolean_system_prompt: bool = True,
- format_instructions: Optional[str] = None,
- ) -> str:
- if boolean_system_prompt is True:
- system_prompt = self.boolean_system_prompt
- else:
- system_prompt = self.generative_system_prompt
-
- prepared_column_prompt = self._prepare_column_prompt(column_schema=column_schema, table_schema=table_schema)
- # Extract column schema string from prepared prompt (it's now a string)
- column_schema_str = (
- prepared_column_prompt.split("Query:")[0] if "Query:" in prepared_column_prompt else prepared_column_prompt
- )
-
- value_str = ""
- header_str = ""
- if type(value_schema.value) in [str, int, float, bool]:
- header_str = f"This schema describes a single value in the {column_schema.column} column."
-
- value_str = f"""
- -**Value**: {value_schema.value}
-"""
-
- elif type(value_schema.value) is dict:
- header_str = f"This schema describes enumerated values in the {column_schema.column} column."
-
- value_str = """
-## **Enumerated Values**
-
-The values in the column are an enumeration of named values. These are listed below with format **[Column Value]**: [named value].
-"""
- for value, value_name in value_schema.value.items():
- value_str += f"""
-- **{value}:** {value_name}"""
-
- elif type(value_schema.value) is list:
- header_str = f"This schema describes some of the values in the {column_schema.column} column."
-
- value_str = """
-## **Sample Values**
-
-There are too many values in this column to list exhaustively. Below is a sampling of values found in the column:
-"""
- for value in value_schema.value:
- value_str += f"""
-- {value}"""
-
- if getattr(value_schema, "comparator", None) is not None:
- comparator_str = """
-
-## **Comparators**
-
-Below is a list of comparison operators for constructing filters for this value schema:
-"""
- if type(value_schema.comparator) is str:
- comparator_str += f"""- {value_schema.comparator}
-"""
- else:
- for comp in value_schema.comparator:
- comparator_str += f"""- {comp}
-"""
- else:
- comparator_str = ""
-
- if getattr(value_schema, "example_questions", None) is not None:
- example_str = """## **Example Questions**
-"""
- for i, example in enumerate(value_schema.example_questions):
- example_str += f"""{i}. **Query:** {example.input} **Answer:** {example.output}
-"""
- else:
- example_str = ""
-
- # Format prompt as string instead of ChatPromptTemplate
- format_instructions_str = format_instructions or ""
- prompt = f"""{system_prompt}
-
-{self.value_prompt_template}
-
-Format Instructions:
-{format_instructions_str}
-
-Header:
-{header_str}
-
-Column Schema:
-{column_schema_str}
-
-Value:
-{value_str}
-
-Comparator:
-{comparator_str}
-
-Type: {value_schema.type}
-Description: {value_schema.description}
-Usage: {value_schema.usage}
-
-Examples:
-{example_str}
-
-Query: {{query}}"""
- return prompt
-
- def _prepare_column_prompt(
- self,
- column_schema: ColumnSchema,
- table_schema: TableSchema,
- boolean_system_prompt: bool = True,
- ) -> str:
- if boolean_system_prompt is True:
- system_prompt = self.boolean_system_prompt
- else:
- system_prompt = self.generative_system_prompt
-
- prepared_table_prompt = self._prepare_table_prompt(
- table_schema=table_schema, boolean_system_prompt=boolean_system_prompt
- )
- # Extract table schema string from prepared prompt (it's now a string)
- table_schema_str = (
- prepared_table_prompt.split("Query:")[0] if "Query:" in prepared_table_prompt else prepared_table_prompt
- )
-
- header_str = f"This schema describes a column in the {table_schema.table} table."
-
- value_str = """
-## **Content**
-
-Below is a description of the contents in this column in list format:
-"""
- for value_schema in column_schema.values.values():
- value_str += f"""
-- {value_schema.description}
-"""
- value_str += """
-**Important:** The above descriptions are not the actual values stored in this column. See the Value schema for actual values.
-"""
-
- if getattr(column_schema, "examples", None) is not None:
- example_str = """## **Example Questions**
-"""
- for example in column_schema.examples:
- example_str += f"""- {example}
-"""
- else:
- example_str = ""
-
- # Format prompt as string instead of ChatPromptTemplate
- prompt = f"""{system_prompt}
-
-{self.column_prompt_template}
-
-Header:
-{header_str}
-
-Table Schema:
-{table_schema_str}
-
-Column: {column_schema.column}
-Type: {column_schema.type}
-Description: {column_schema.description}
-Usage: {column_schema.usage}
-
-Values:
-{value_str}
-
-Examples:
-{example_str}
-
-Query: {{query}}"""
- return prompt
-
- def _prepare_table_prompt(self, table_schema: TableSchema, boolean_system_prompt: bool = True) -> str:
- if boolean_system_prompt is True:
- system_prompt = self.boolean_system_prompt
- else:
- system_prompt = self.generative_system_prompt
-
- header_str = "This schema describes a table in the database."
-
- columns_str = ""
- for column_key, column_schema in table_schema.columns.items():
- columns_str += f"""
-- **{column_schema.column}:** {column_schema.description}
-"""
-
- if getattr(table_schema, "examples", None) is not None:
- example_str = """## **Example Questions**
-"""
- for example in table_schema.examples:
- example_str += f"""- {example}
-"""
- else:
- example_str = ""
-
- # Format prompt as string instead of ChatPromptTemplate
- prompt = f"""{system_prompt}
-
-{self.table_prompt_template}
-
-Header:
-{header_str}
-
-Table: {table_schema.table}
-Description: {table_schema.description}
-Usage: {table_schema.usage}
-
-Columns:
-{columns_str}
-
-Examples:
-{example_str}
-
-Query: {{query}}"""
- return prompt
-
- def _rank_schema(self, prompt: str, query: str) -> float:
- """
- Rank schema by calling LLM with prompt and query.
-
- Args:
- prompt: Prompt template string with {query} placeholder
- query: Query string
-
- Returns:
- Relevance score between 0 and 1
- """
- # Format prompt with query
- formatted_prompt = prompt.format(query=query)
-
- try:
- # Call LLM - try to get logprobs if supported
- if hasattr(self.llm, "bind") and hasattr(self.llm.bind(logprobs=True), "invoke"):
- llm_with_logprobs = self.llm.bind(logprobs=True)
- output = llm_with_logprobs.invoke(formatted_prompt)
- else:
- # Fallback to regular invoke
- output = self.llm.invoke(formatted_prompt)
-
- # Try to extract logprobs from response
- score = None
- if hasattr(output, "response_metadata") and "logprobs" in output.response_metadata:
- logprobs = output.response_metadata["logprobs"]
- if "content" in logprobs:
- for content in logprobs["content"]:
- token = content.get("token", "").lower().strip()
- logprob = content.get("logprob", 0.0)
- if token == "yes":
- score = (1 + math.exp(logprob)) / 2
- break
- elif token == "no":
- score = (1 - math.exp(logprob)) / 2
- break
-
- # If no logprobs, try to parse yes/no from content
- if score is None:
- content_text = ""
- if hasattr(output, "content"):
- content_text = output.content.lower().strip()
- elif isinstance(output, str):
- content_text = output.lower().strip()
- else:
- content_text = str(output).lower().strip()
-
- if "yes" in content_text:
- score = 0.75 # Default positive score
- elif "no" in content_text:
- score = 0.25 # Default negative score
- else:
- score = 0.5 # Neutral score
-
- if score is None:
- score = 0.0
-
- except Exception as e:
- logger.warning(f"Error ranking schema: {e}")
- score = 0.0
-
- return score
-
- def _breadth_first_search(self, query: str, greedy: bool = False) -> Tuple:
- """Search breadth wise through Tables, then Columns, then Values.Uses a greedy strategy to maximize quota if greedy=True, otherwise a dynamic strategy."""
-
- # sort based on priority
- ordered_database_schema = self._sort_database_schema_by_key(
- database_schema=self.database_schema, key=self._sort_schema_by_priority_key
- )
-
- # Rank Tables ########################################################
- greedy_count = 0
- tables = {}
- # rank tables by relevance
- for table_key, table_schema in ordered_database_schema.tables.items():
- prompt: str = self._prepare_table_prompt(table_schema=table_schema, boolean_system_prompt=True)
- table_schema.relevance = self._rank_schema(prompt=prompt, query=query)
-
- # only keep greedy tables
- tables[table_key] = table_schema
-
- if greedy:
- if table_schema.relevance >= ordered_database_schema.filter_threshold:
- greedy_count += 1
- if greedy_count >= ordered_database_schema.max_filters:
- break
-
- # sort tables
- ordered_database_schema = self._sort_schema_by_key(
- schema=ordered_database_schema,
- key=self._sort_schema_by_relevance_key,
- update=tables,
- )
-
- # Rank Columns #######################################################
- # iterate through tables to rank columns
- tables = {}
- table_count = 0 # take only the top n number of tables specified by the databases max filters
- for table_key, table_schema in ordered_database_schema.tables.items():
- # only drop into tables above the filter threshold
- if table_schema.relevance >= ordered_database_schema.filter_threshold:
- greedy_count = 0
- # rank columns by relevance
- columns = {}
- for column_key, column_schema in table_schema.columns.items():
- prompt: str = self._prepare_column_prompt(
- column_schema=column_schema,
- table_schema=table_schema,
- boolean_system_prompt=True,
- )
- column_schema.relevance = self._rank_schema(prompt=prompt, query=query)
-
- columns[column_key] = column_schema
-
- if greedy:
- if column_schema.relevance >= table_schema.filter_threshold:
- greedy_count += 1
- if greedy_count >= table_schema.max_filters:
- break
-
- # sort columns and keep only columns that made the cut.
- tables[table_key] = self._sort_schema_by_key(
- table_schema, key=self._sort_schema_by_relevance_key, update=columns
- )
-
- table_count += 1
- if table_count >= ordered_database_schema.max_filters:
- break
-
- # sort tables and keep only tables that made the cut.
- ordered_database_schema = self._sort_schema_by_key(
- ordered_database_schema,
- key=self._sort_schema_by_relevance_key,
- update=tables,
- )
-
- # Rank Values ########################################################
- # iterate through tables to rank values
- tables = {}
- for table_key, table_schema in ordered_database_schema.tables.items():
- columns = {}
- column_count = 0
- # iterate through columns to rank values
- for column_key, column_schema in table_schema.columns.items():
- if column_schema.relevance >= table_schema.filter_threshold:
- greedy_count = 0
- values = {}
- # rank values by relevance
- for value_key, value_schema in column_schema.values.items():
- prompt: str = self._prepare_value_prompt(
- value_schema=value_schema,
- column_schema=column_schema,
- table_schema=table_schema,
- boolean_system_prompt=True,
- )
- value_schema.relevance = self._rank_schema(prompt=prompt, query=query)
-
- values[value_key] = value_schema
-
- if greedy:
- if value_schema.relevance >= column_schema.filter_threshold:
- greedy_count += 1
- if greedy_count >= column_schema.max_filters:
- break
-
- # sort values and keep only values that make the cut
- columns[column_key] = self._sort_schema_by_key(
- column_schema,
- key=self._sort_schema_by_relevance_key,
- update=values,
- )
-
- column_count += 1
- if column_count >= table_schema.max_filters:
- break
-
- # sort columns and keep only columns that made the cut
- tables[table_key] = self._sort_schema_by_key(
- table_schema, key=self._sort_schema_by_relevance_key, update=columns
- )
-
- # sort tables and keep only tables that made the cut.
- ordered_database_schema = self._sort_schema_by_key(
- ordered_database_schema,
- key=self._sort_schema_by_relevance_key,
- update=tables,
- )
-
- # discard low ranked values ###################################################################################
- tables = {}
- for table_key, table_schema in ordered_database_schema.tables.items():
- columns = {}
- # iterate through columns to rank values
- for column_key, column_schema in table_schema.columns.items():
- value_count = 0
- values = {}
- # rank values by relevance
- for value_key, value_schema in column_schema.values.items():
- if value_schema.relevance >= column_schema.filter_threshold:
- values[value_key] = value_schema
-
- value_count += 1
- if value_count >= column_schema.max_filters:
- break
-
- # sort values and keep only values that make the cut
- columns[column_key] = self._sort_schema_by_key(
- column_schema,
- key=self._sort_schema_by_relevance_key,
- update=values,
- )
-
- # sort columns and keep only columns that made the cut
- tables[table_key] = self._sort_schema_by_key(
- table_schema, key=self._sort_schema_by_relevance_key, update=columns
- )
-
- # sort tables and keep only tables that made the cut.
- ordered_database_schema = self._sort_schema_by_key(
- ordered_database_schema,
- key=self._sort_schema_by_relevance_key,
- update=tables,
- )
-
- ranked_database_schema = ordered_database_schema
-
- # Build Ablation #####################################################
-
- ablation_value_dict = {}
- # assemble a relevance dictionary
- for table_key, table_schema in ordered_database_schema.tables.items():
- for column_key, column_schema in table_schema.columns.items():
- for value_key, value_schema in column_schema.values.items():
- ablation_value_dict[(table_key, column_key, value_key)] = value_schema.relevance
-
- ablation_value_dict = collections.OrderedDict(sorted(ablation_value_dict.items(), key=lambda x: x[1]))
-
- relevance_scores = list(ablation_value_dict.values())
- if len(relevance_scores) > 0:
- ablation_quantiles = np.quantile(relevance_scores, np.linspace(0, 1, self.num_retries + 2)[1:-1])
- else:
- ablation_quantiles = None
-
- return ranked_database_schema, ablation_value_dict, ablation_quantiles
-
- def _dynamic_ablation(
- self,
- metadata_filters: List[AblativeMetadataFilter],
- ablation_value_dict,
- ablation_quantiles,
- retry: int,
- ):
- """Ablate metadata filters in aggregate by quantiles until the required minimum number of documents are returned."""
-
- ablated_dict = {}
- for key, value in ablation_value_dict.items():
- if value >= ablation_quantiles[retry]:
- ablated_dict[key] = value
-
- # discard low ranked filters ##################################################################################
- ablated_filters = []
- for filter in metadata_filters:
- for key in ablated_dict.keys():
- if filter.schema_table in key and filter.schema_column in key and filter.schema_value in key:
- ablated_filters.append(filter)
-
- return ablated_filters
-
- def depth_first_search(self, greedy=True):
- """Search depth wise through Tables, then Columns, then Values. Uses a greedy strategy to maximize quota if greedy=True, otherwise a dynamic strategy."""
- pass
-
- def depth_first_ablation(self):
- """Ablate metadata filters in reverse depth first search until the required minimum number of documents are returned."""
- pass
-
- def _prepare_retrieval_query(self, query: str) -> str:
- """Rewrite query to be suitable for retrieval using LLM"""
- # Format prompt with query
- formatted_prompt = self.rewrite_prompt_template.format(input=query)
-
- # Call LLM
- llm_response = self.llm.invoke(formatted_prompt)
-
- # Extract content from LLM response
- if hasattr(llm_response, "content"):
- return llm_response.content
- elif isinstance(llm_response, str):
- return llm_response
- else:
- return str(llm_response)
-
- def _prepare_pgvector_query(
- self,
- ranked_database_schema: DatabaseSchema,
- metadata_filters: List[AblativeMetadataFilter],
- retry: int = 0,
- ) -> str:
- # Base select JOINed with document source table.
- base_query = f"""SELECT * FROM {self.embeddings_table} AS e INNER JOIN {self.source_table} AS s ON (e.metadata->>'original_row_id')::int = s."{self.source_id_column}" """
-
- # return an empty string if schema has not been ranked
- if not ranked_database_schema:
- return ""
-
- # Add Table JOIN statements
- join_clauses = set()
- for metadata_filter in metadata_filters:
- join_clause = ranked_database_schema.tables[metadata_filter.schema_table].join
- if join_clause in join_clauses:
- continue
- else:
- join_clauses.add(join_clause)
- base_query += join_clause + " "
-
- # Add WHERE conditions from metadata filters
- if metadata_filters:
- base_query += "WHERE "
- for i, filter in enumerate(metadata_filters):
- value = filter.value
- if isinstance(value, str):
- value = f"'{value}'"
- base_query += f'"{filter.attribute}" {filter.comparator} {value}'
- if i < len(metadata_filters) - 1:
- base_query += " AND "
-
- base_query += (
- f" ORDER BY e.embeddings {self.distance_function.value[0]} '{{embeddings}}' LIMIT {self.search_kwargs.k};"
- )
- return base_query
-
- def _generate_filter(self, prompt: str, query: str) -> MetadataFilter:
- """Generate metadata filter using LLM"""
- # Format prompt with query
- formatted_prompt = prompt.format(query=query)
-
- # Call LLM
- llm_response = self.llm.invoke(formatted_prompt)
-
- # Extract content from LLM response
- if hasattr(llm_response, "content"):
- response_text = llm_response.content
- elif isinstance(llm_response, str):
- response_text = llm_response
- else:
- response_text = str(llm_response)
-
- # Parse JSON response to get MetadataFilter
- try:
- parsed = json.loads(response_text)
- # If it's a dict, try to create MetadataFilter
- if isinstance(parsed, dict):
- return MetadataFilter(**parsed)
- else:
- # If it's already a MetadataFilter-like object
- return parsed
- except (json.JSONDecodeError, TypeError, Exception) as e:
- logger.warning(f"Error parsing filter response: {e}")
- # Return empty filter on error
- return MetadataFilter(attribute="", comparator="=", value="")
-
- def _generate_metadata_filters(
- self, query: str, ranked_database_schema
- ) -> Union[List[AblativeMetadataFilter], HandlerResponse]:
- """Generate metadata filters using LLM"""
-
- metadata_filter_list = []
- # iterate through tables to rank values
- for table_key, table_schema in ranked_database_schema.tables.items():
- # iterate through columns to rank values
- for column_key, column_schema in table_schema.columns.items():
- if column_schema.relevance >= table_schema.filter_threshold:
- # generate filters
- for value_key, value_schema in column_schema.values.items():
- # must use generation if field is a dictionary of tuples or a list
- if type(value_schema.value) in [list, dict]:
- try:
- # Create format instructions for JSON output
- format_instructions = """Return a JSON object with the following structure:
-{
- "attribute": "column_name",
- "comparator": "comparison_operator",
- "value": "filter_value"
-}"""
-
- metadata_prompt: str = self._prepare_value_prompt(
- format_instructions=format_instructions,
- value_schema=value_schema,
- column_schema=column_schema,
- table_schema=table_schema,
- boolean_system_prompt=False,
- )
-
- # Call LLM directly
- formatted_prompt = metadata_prompt.format(query=query)
- llm_response = self.llm.invoke(formatted_prompt)
-
- # Extract content from LLM response
- if hasattr(llm_response, "content"):
- metadata_filter_output = llm_response.content
- elif isinstance(llm_response, str):
- metadata_filter_output = llm_response
- else:
- metadata_filter_output = str(llm_response)
-
- # If the LLM outputs raw JSON, use it as-is.
- # If the LLM outputs anything including a json markdown section, use the last one.
- json_markdown_output = re.findall(r"```json.*?```", metadata_filter_output, re.DOTALL)
- if json_markdown_output:
- metadata_filter_output = json_markdown_output[-1]
- # Clean the json tags.
- metadata_filter_output = metadata_filter_output[7:]
- metadata_filter_output = metadata_filter_output[:-3]
-
- # Parse JSON directly instead of using PydanticOutputParser
- parsed = json.loads(metadata_filter_output.strip())
- model_dump = {
- "attribute": parsed.get("attribute", ""),
- "comparator": parsed.get("comparator", "="),
- "value": parsed.get("value", ""),
- "schema_table": table_key,
- "schema_column": column_key,
- "schema_value": value_key,
- }
- metadata_filter = AblativeMetadataFilter(**model_dump)
- except (json.JSONDecodeError, TypeError, Exception) as e:
- logger.warning(
- f"LLM failed to generate structured metadata filters: {e}",
- exc_info=logger.isEnabledFor(logging.DEBUG),
- )
- return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
- else:
- metadata_filter = AblativeMetadataFilter(
- attribute=column_schema.column,
- comparator=value_schema.comparator,
- value=value_schema.value,
- schema_table=table_key,
- schema_column=column_key,
- schema_value=value_key,
- )
- metadata_filter_list.append(metadata_filter)
-
- return metadata_filter_list
-
- def _prepare_and_execute_query(
- self,
- ranked_database_schema: DatabaseSchema,
- metadata_filters: List[AblativeMetadataFilter],
- embeddings_str: str,
- ) -> HandlerResponse:
- try:
- checked_sql_query = self._prepare_pgvector_query(ranked_database_schema, metadata_filters)
- checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=embeddings_str)
- return self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
- except Exception as e:
- logger.warning(
- f"Failed to prepare and execute SQL query from structured metadata: {e}",
- exc_info=logger.isEnabledFor(logging.DEBUG),
- )
- return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
-
- def _get_relevant_documents(self, query: str, *, run_manager: Optional[Any] = None) -> List[Any]:
- # Rewrite query to be suitable for retrieval.
- retrieval_query = self._prepare_retrieval_query(query)
-
- # Embed the rewritten retrieval query & include it in the similarity search pgvector query.
- embedded_query = self.embeddings_model.embed_query(retrieval_query)
-
- # Search for relevant filters
- ranked_database_schema, ablation_value_dict, ablation_quantiles = self._breadth_first_search(query=query)
-
- # Generate metadata filters
- metadata_filters = self._generate_metadata_filters(query=query, ranked_database_schema=ranked_database_schema)
-
- if type(metadata_filters) is list:
- # Initial Execution of the similarity search with metadata filters.
- document_response = self._prepare_and_execute_query(
- ranked_database_schema=ranked_database_schema,
- metadata_filters=metadata_filters,
- embeddings_str=str(embedded_query),
- )
- num_retries = 0
- while num_retries < self.num_retries:
- if (
- document_response.resp_type != RESPONSE_TYPE.ERROR
- and len(document_response.data_frame) >= self.min_k
- ):
- # Successfully retrieved k documents to send to re-ranker.
- break
- elif document_response.resp_type == RESPONSE_TYPE.ERROR:
- # LLMs won't always generate structured metadata so we should have a fallback after retrying.
- logger.info(f"SQL Retriever query failed with error {document_response.error_message}")
- else:
- logger.info(
- f"SQL Retriever did not retrieve {self.min_k} documents: {len(document_response.data_frame)} documents retrieved."
- )
-
- ablated_metadata_filters = self._dynamic_ablation(
- metadata_filters=metadata_filters,
- ablation_value_dict=ablation_value_dict,
- ablation_quantiles=ablation_quantiles,
- retry=num_retries,
- )
-
- document_response = self._prepare_and_execute_query(
- ranked_database_schema=ranked_database_schema,
- metadata_filters=ablated_metadata_filters,
- embeddings_str=str(embedded_query),
- )
-
- num_retries += 1
-
- retrieved_documents = []
- if document_response.resp_type != RESPONSE_TYPE.ERROR:
- document_df = document_response.data_frame
- for _, document_row in document_df.iterrows():
- retrieved_documents.append(
- SimpleDocument(
- page_content=document_row.get("content", ""),
- metadata=document_row.get("metadata", {}),
- )
- )
- if retrieved_documents:
- return retrieved_documents
-
- # If the SQL query constructed did not return any documents, fallback.
- logger.info("No documents returned from SQL retriever, using fallback retriever.")
- return self._retrieve_from_fallback_retriever(retrieval_query)
- else:
- # If no metadata fields could be generated fallback.
- logger.info("No metadata fields were successfully generated, using fallback retriever.")
- return self._retrieve_from_fallback_retriever(retrieval_query)
-
- def _retrieve_from_fallback_retriever(self, query: str) -> List[Any]:
- """Retrieve documents from fallback retriever using duck typing"""
- if hasattr(self.fallback_retriever, "_get_relevant_documents"):
- return self.fallback_retriever._get_relevant_documents(query)
- elif hasattr(self.fallback_retriever, "get_relevant_documents"):
- return self.fallback_retriever.get_relevant_documents(query)
- elif hasattr(self.fallback_retriever, "invoke"):
- return self.fallback_retriever.invoke(query)
- else:
- raise ValueError(
- "Fallback retriever must have _get_relevant_documents, get_relevant_documents, or invoke method"
- )
-
- def invoke(self, query: str) -> List[Any]:
- """Sync invocation - retrieve documents for a query"""
- return self._get_relevant_documents(query)
-
- async def ainvoke(self, query: str) -> List[Any]:
- """Async invocation - retrieve documents for a query"""
- import asyncio
-
- loop = asyncio.get_event_loop()
- return await loop.run_in_executor(None, self._get_relevant_documents, query)
-
- def get_relevant_documents(self, query: str) -> List[Any]:
- """Get relevant documents (sync)"""
- return self._get_relevant_documents(query)
-
- def as_runnable(self) -> RunnableRetriever:
- """Return self as a runnable retriever"""
- return self
diff --git a/mindsdb/integrations/utilities/rag/settings.py b/mindsdb/integrations/utilities/rag/settings.py
index 56a8306295f..c4eb9a6a162 100644
--- a/mindsdb/integrations/utilities/rag/settings.py
+++ b/mindsdb/integrations/utilities/rag/settings.py
@@ -1,366 +1,20 @@
from enum import Enum
-from typing import List, Union, Any, Optional, Dict, OrderedDict
+from typing import List, Any, Optional, Dict
-from pydantic import BaseModel, Field, field_validator, ConfigDict
-from mindsdb.integrations.utilities.rag.splitters.custom_splitters import RecursiveCharacterTextSplitter as TextSplitter
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
+from pydantic import BaseModel, Field, ConfigDict
-DEFAULT_COLLECTION_NAME = "default_collection"
-# Multi retriever specific
-DEFAULT_ID_KEY = "doc_id"
-DEFAULT_MAX_CONCURRENCY = 5
-DEFAULT_K = 20
-
-DEFAULT_CARDINALITY_THRESHOLD = 40
-DEFAULT_MAX_SUMMARIZATION_TOKENS = 4000
DEFAULT_CHUNK_SIZE = 1000
DEFAULT_CHUNK_OVERLAP = 200
-DEFAULT_POOL_RECYCLE = 3600
DEFAULT_LLM_MODEL = "gpt-4o"
+DEFAULT_LLM_ENDPOINT = "https://api.openai.com/v1"
DEFAULT_LLM_MODEL_PROVIDER = "openai"
-DEFAULT_CONTENT_COLUMN_NAME = "body"
-DEFAULT_DATASET_DESCRIPTION = "email inbox"
-DEFAULT_TEST_TABLE_NAME = "test_email"
-DEFAULT_RERANKER_FLAG = False
DEFAULT_RERANKING_MODEL = "gpt-4o"
-DEFAULT_LLM_ENDPOINT = "https://api.openai.com/v1"
DEFAULT_RERANKER_N = 1
DEFAULT_RERANKER_LOGPROBS = True
DEFAULT_RERANKER_TOP_LOGPROBS = 4
DEFAULT_RERANKER_MAX_TOKENS = 100
DEFAULT_VALID_CLASS_TOKENS = ["1", "2", "3", "4"]
-DEFAULT_AUTO_META_PROMPT_TEMPLATE = """
-Below is a json representation of a table with information about {description}.
-Return a JSON list with an entry for each column. Each entry should have
-{{"name": "column name", "description": "column description", "type": "column data type"}}
-\n\n{dataframe}\n\nJSON:\n
-"""
-DEFAULT_RAG_PROMPT_TEMPLATE = """You are an assistant for
-question-answering tasks. Use the following pieces of retrieved context
-to answer the question. If you don't know the answer, just say that you
-don't know. Use two sentences maximum and keep the answer concise.
-Question: {question}
-Context: {context}
-Answer:"""
-
-DEFAULT_QA_GENERATION_PROMPT_TEMPLATE = """You are an assistant for
-generating sample questions and answers from the given document and metadata. Given
-a document and its metadata as context, generate a question and answer from that document and its metadata.
-
-The document will be a string. The metadata will be a JSON string. You need
-to parse the JSON to understand it.
-
-Generate a question that requires BOTH the document and metadata to answer, if possible.
-Otherwise, generate a question that requires ONLY the document to answer.
-
-Return a JSON dictionary with the question and answer like this:
-{{ "question": , "answer": }}
-
-Make sure the JSON string is valid before returning it. You must return the question and answer
-in the specified JSON format no matter what.
-
-Document: {document}
-Metadata: {metadata}
-Answer:"""
-
-DEFAULT_MAP_PROMPT_TEMPLATE = """The following is a set of documents
-{docs}
-Based on this list of docs, please summarize based on the user input.
-
-User input: {input}
-
-Helpful Answer:"""
-
-DEFAULT_REDUCE_PROMPT_TEMPLATE = """The following is set of summaries:
-{docs}
-Take these and distill it into a final, consolidated summary related to the user input.
-
-User input: {input}
-
-Helpful Answer:"""
-
-DEFAULT_SEMANTIC_PROMPT_TEMPLATE = """Provide a better search query for web search engine to answer the given question.
-
-<< EXAMPLES >>
-1. Input: "Show me documents containing how to finetune a LLM please"
-Output: "how to finetune a LLM"
-
-Output only a single better search query and nothing else like in the example.
-
-Here is the user input: {input}
-"""
-
-DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE = """Construct a list of PostgreSQL metadata filters to filter documents in the database based on the user input.
-
-<< INSTRUCTIONS >>
-{format_instructions}
-
-RETURN ONLY THE FINAL JSON. DO NOT EXPLAIN, JUST RETURN THE FINAL JSON.
-
-<< TABLES YOU HAVE ACCESS TO >>
-
-{schema}
-
-<< EXAMPLES >>
-
-{examples}
-
-Here is the user input:
-{input}
-"""
-
-DEFAULT_BOOLEAN_PROMPT_TEMPLATE = """**Task:** Determine Schema Relevance for Database Search Queries
-
-As an expert in constructing database search queries, you are provided with database schemas detailing tables, columns, and values. Your task is to assess whether these elements can be used to effectively search the database in relation to a given user query.
-
-**Instructions:**
-
-- **Evaluate the Schema**:
- - Analyze the tables, columns, and values described.
- - Consider their potential usefulness in retrieving information pertinent to the user query.
-
-- **Decision Criteria**:
- - Determine if any part of the schema could assist in forming a relevant search query for the information requested.
-
-- **Response**:
- - Reply with a single word: 'yes' if the schema components are useful, otherwise 'no'.
-
-**Note:** Provide your answer based solely on the relevance of the described schema to the user query."""
-
-DEFAULT_GENERATIVE_SYSTEM_PROMPT = """You are an expert database analyst that can assist in building SQL queries by providing structured output. Follow these format instructions precisely to generate a metadata filter given the provided schema description.
-
-## Format instructions:
-{format_instructions}
- """
-
-DEFAULT_VALUE_PROMPT_TEMPLATE = """
-{column_schema}
-
-# **Value Schema**
-{header}
-
-- The type of the value: {type}
-
-## **Description**
-{description}
-
-{value}{comparator}
-
-## **Usage**
-{usage}
-
-{examples}
-
-## **Query**
-{query}
-
-"""
-
-DEFAULT_COLUMN_PROMPT_TEMPLATE = """
-{table_schema}
-
-# **Column Schema**
-{header}
-
-- The column name in the database table: {column}
-- The type of the values in this column: {type}
-
-## **Description**
-{description}
-
-## **Usage**
-{usage}
-
-{examples}
-
-## **Query**
-{query}
-"""
-
-DEFAULT_TABLE_PROMPT_TEMPLATE = """# **Table Schema**
-{header}
-
-- The name of this table in the database: {table}
-
-## **Description**
-{description}
-
-## **Usage**
-{usage}
-
-## **Column Descriptions**
-Below are descriptions of each column in this table:
-
-{columns}
-
-{examples}
-
-## **Query**
-{query}
-"""
-
-DEFAULT_SQL_PROMPT_TEMPLATE = """
-Construct a valid {dialect} SQL query to select documents relevant to the user input.
-Source documents are found in the {source_table} table. You may need to join with other tables to get additional document metadata.
-
-The JSON col "metadata" in the {embeddings_table} has a string field called "original_row_id". This "original_row_id" string field in the
-"metadata" col is the document ID associated with a row in the {embeddings_table} table.
-You MUST always join with the {embeddings_table} table containing vector embeddings for the documents. For example, for a table named sd with an id column "Id":
-JOIN {embeddings_table} v ON (v."metadata"->>'original_row_id')::int = sd."Id"
-
-You MUST always order the embeddings by the {distance_function} comparator with '{{embeddings}}'.
-You MUST always limit by {k} returned documents.
-For example:
-ORDER BY v.embeddings {distance_function} '{{embeddings}}' LIMIT {k};
-
-
-<< TABLES YOU HAVE ACCESS TO >>
-1. {embeddings_table} - Contains document chunks, vector embeddings, and metadata for documents.
-You MUST always include the metadata column in your SELECT statement.
-You MUST always join with the {embeddings_table} table containing vector embeddings for the documents.
-You MUST always order by the provided embeddings vector using the {distance_function} comparator.
-You MUST always limit by {k} returned documents.
-
-Columns:
-```json
-{{
- "id": {{
- "type": "string",
- "description": "Unique ID for this document chunk"
- }},
- "content": {{
- "type": "string",
- "description": "A document chunk (subset of the original document)"
- }},
- "embeddings": {{
- "type": "vector",
- "description": "Vector embeddings for the document chunk. ALWAYS order by the provided embeddings vector using the {distance_function} comparator."
- }},
- "metadata": {{
- "type": "jsonb",
- "description": "Metadata for the document chunk. Always select metadata and always join with the {source_table} table on the string metadata field 'original_row_id'"
- }}
-}}
-
-{schema}
-
-<< EXAMPLES >>
-
-{examples}
-
-Output the {dialect} SQL query that is ready to be executed only WITHOUT ANY DELIMITERS. Make sure to properly quote identifiers.
-
-Here is the user input:
-{input}
-"""
-
-DEFAULT_QUESTION_REFORMULATION_TEMPLATE = """Given the original question and the retrieved context,
-analyze what additional information is needed for a complete, accurate answer.
-
-Original Question: {question}
-
-Retrieved Context:
-{context}
-
-Analysis Instructions:
-1. Evaluate Context Coverage:
- - Identify key entities and concepts from the question
- - Check for temporal information (dates, periods, sequences)
- - Verify causal relationships are explained
- - Confirm presence of requested quantitative data
- - Assess if geographic or spatial context is sufficient
-
-2. Quality Assessment:
- If the retrieved context is:
- - Irrelevant or tangential
- - Too general or vague
- - Potentially contradictory
- - Missing key perspectives
- - Lacking proper evidence
- Generate questions to address these specific gaps.
-
-3. Follow-up Question Requirements:
- - Questions must directly contribute to answering the original query
- - Break complex relationships into simpler, sequential steps
- - Maintain specificity rather than broad inquiries
- - Avoid questions answerable from existing context
- - Ensure questions build on each other logically
- - Limit questions to 150 characters each
- - Each question must be self-contained
- - Questions must end with a question mark
-
-4. Response Format:
- - Return a JSON array of strings
- - Use square brackets and double quotes
- - Questions must be unique (no duplicates)
- - If context is sufficient, return empty array []
- - Maximum 3 follow-up questions
- - Minimum length per question: 30 characters
- - No null values or empty strings
-
-Example:
-Original: "How did the development of antibiotics affect military casualties in WWII?"
-
-Invalid responses:
-{'questions': ['What are antibiotics?']} // Wrong format
-['What is WWII?'] // Too basic
-['How did it impact things?'] // Too vague
-['', 'Question 2'] // Contains empty string
-['Same question?', 'Same question?'] // Duplicate
-
-Valid response:
-["What were military casualty rates from infections before widespread antibiotic use in 1942?",
- "How did penicillin availability change throughout different stages of WWII?",
- "What were the primary battlefield infections treated with antibiotics during WWII?"]
-
-or [] if context fully answers the original question.
-
-Your task: Based on the analysis of the original question and context,
-output ONLY a JSON array of follow-up questions needed to provide a complete answer.
-If no additional information is needed, output an empty array [].
-
-Follow-up Questions:"""
-
-DEFAULT_QUERY_RETRY_PROMPT_TEMPLATE = """
-{query}
-
-The {dialect} query above failed with the error message: {error}.
-
-<< TABLES YOU HAVE ACCESS TO >>
-1. {embeddings_table} - Contains document chunks, vector embeddings, and metadata for documents.
-
-Columns:
-```json
-{{
- "id": {{
- "type": "string",
- "description": "Unique ID for this document chunk"
- }},
- "content": {{
- "type": "string",
- "description": "A document chunk (subset of the original document)"
- }},
- "embeddings": {{
- "type": "vector",
- "description": "Vector embeddings for the document chunk."
- }},
- "metadata": {{
- "type": "jsonb",
- "description": "Metadata for the document chunk."
- }}
-}}
-
-{schema}
-
-Rewrite the query so it works.
-
-Output the final SQL query only.
-
-SQL Query:
-"""
-
-DEFAULT_NUM_QUERY_RETRIES = 2
class LLMConfig(BaseModel):
@@ -373,313 +27,6 @@ class LLMConfig(BaseModel):
model_config = ConfigDict(protected_namespaces=())
-class MultiVectorRetrieverMode(Enum):
- """
- Enum for MultiVectorRetriever types.
- """
-
- SPLIT = "split"
- SUMMARIZE = "summarize"
- BOTH = "both"
-
-
-class VectorStoreType(Enum):
- CHROMA = "chromadb"
- PGVECTOR = "pgvector"
-
-
-class VectorStoreConfig(BaseModel):
- vector_store_type: VectorStoreType = VectorStoreType.CHROMA
- persist_directory: str = None
- collection_name: str = DEFAULT_COLLECTION_NAME
- connection_string: str = None
- kb_table: Any = None
- is_sparse: bool = False
- vector_size: Optional[int] = None
-
- class Config:
- arbitrary_types_allowed = True
- extra = "forbid"
-
-
-class RetrieverType(str, Enum):
- """Retriever type for RAG pipeline"""
-
- VECTOR_STORE = "vector_store"
- AUTO = "auto"
- MULTI = "multi"
- SQL = "sql"
- MULTI_HOP = "multi_hop"
-
-
-class SearchType(Enum):
- """
- Enum for vector store search types.
- """
-
- SIMILARITY = "similarity"
- MMR = "mmr"
- SIMILARITY_SCORE_THRESHOLD = "similarity_score_threshold"
-
-
-class SearchKwargs(BaseModel):
- k: int = Field(default=DEFAULT_K, description="Amount of documents to return", ge=1)
- filter: Optional[Dict[str, Any]] = Field(default=None, description="Filter by document metadata")
- # For similarity_score_threshold search type
- score_threshold: Optional[float] = Field(
- default=None,
- description="Minimum relevance threshold for similarity_score_threshold search",
- ge=0.0,
- le=1.0,
- )
- # For MMR search type
- fetch_k: Optional[int] = Field(default=None, description="Amount of documents to pass to MMR algorithm", ge=1)
- lambda_mult: Optional[float] = Field(
- default=None,
- description="Diversity of results returned by MMR (1=min diversity, 0=max)",
- ge=0.0,
- le=1.0,
- )
-
- def model_dump(self, *args, **kwargs):
- # Override model_dump to exclude None values by default
- kwargs["exclude_none"] = True
- return super().model_dump(*args, **kwargs)
-
-
-class LLMExample(BaseModel):
- input: str = Field(description="User input for the example")
- output: str = Field(description="What the LLM should generate for this example's input")
-
-
-class ValueSchema(BaseModel):
- value: Union[
- Union[str, int, float],
- Dict[Union[str, int, float], str],
- List[Union[str, int, float]],
- ] = Field(
- description="One of the following. The value as it exists in the table column. A dict of {table_value: descriptive value, ...}, where table_value is the value in the table. A list of sample values taken from the column."
- )
- comparator: Optional[Union[str, List[str]]] = Field(
- description="The posgtres sql operators used to compare two values. For example: `>`, `<`, `=`, or `%`.",
- default="=",
- )
- type: str = Field(
- description="A valid postgres type for this value. One of: int, string, float, or bool. When numbers appear they should be of type int or float."
- )
- description: str = Field(description="Description of what the value represents.")
- usage: str = Field(description="How and when to use this value for search.")
- example_questions: Optional[List[LLMExample]] = Field(
- default=None, description="Example questions where this value is set."
- )
- filter_threshold: Optional[float] = Field(
- default=0.0,
- description="Minimum relevance threshold to include metadata filters from this column.",
- exclude=True,
- )
- priority: Optional[int] = Field(
- default=0,
- description="Priority level for this column, lower numbers will be processed first.",
- )
- relevance: Optional[float] = Field(
- default=None,
- description="Relevance computed during search. Should not be set by the end user.",
- exclude=True,
- )
-
-
-class MetadataConfig(BaseModel):
- """Class to configure metadata for retrieval. Only supports very basic document name lookup at the moment."""
-
- table: str = Field(description="Source table for metadata.")
- max_document_context: int = Field(
- # To work well with models with context window of 32768.
- default=16384,
- description="Truncate a document before using as context with an LLM if it exceeds this amount of tokens",
- )
- embeddings_table: str = Field(default="embeddings", description="Source table for embeddings")
- id_column: str = Field(default="Id", description="Name of ID column in metadata table")
- name_column: str = Field(default="Title", description="Name of column containing name or title of document")
- name_column_index: Optional[str] = Field(default=None, description="Name of GIN index to use when looking up name.")
- content_column: str = Field(
- default="content", description="Name of column in embeddings table containing chunk content"
- )
- embeddings_metadata_column: str = Field(
- default="metadata", description="Name of column in embeddings table containing chunk metadata"
- )
- doc_id_key: str = Field(
- default="original_row_id", description="Metadata field that links an embedded chunk back to source document ID"
- )
-
-
-class ColumnSchema(BaseModel):
- column: str = Field(description="Name of the column in the database")
- type: str = Field(description="Type of the column (e.g. int, string, datetime)")
- description: str = Field(description="Description of what the column represents")
- usage: str = Field(description="How and when to use this Table for search.")
- values: Optional[
- Union[
- OrderedDict[Union[str, int, float], ValueSchema],
- Dict[Union[str, int, float], ValueSchema],
- ]
- ] = Field(
- default=None,
- description="One of the following. A dict or ordered dict of {schema_value: ValueSchema, ...}, where schema value is the name given for this value description in the schema.",
- )
- example_questions: Optional[List[LLMExample]] = Field(
- default=None, description="Example questions where this table is useful."
- )
- max_filters: Optional[int] = Field(default=1, description="Maximum number of filters to generate for this column.")
- filter_threshold: Optional[float] = Field(
- default=0.0,
- description="Minimum relevance threshold to include metadata filters from this column.",
- )
- priority: Optional[int] = Field(
- default=1,
- description="Priority level for this column, lower numbers will be processed first.",
- )
- relevance: Optional[float] = Field(
- default=None,
- description="Relevance computed during search. Should not be set by the end user.",
- )
-
-
-class TableSchema(BaseModel):
- table: str = Field(description="Name of table in the database")
- description: str = Field(description="Description of what the table represents")
- usage: str = Field(description="How and when to use this Table for search.")
- columns: Optional[Union[OrderedDict[str, ColumnSchema], Dict[str, ColumnSchema]]] = Field(
- description="Dict or Ordered Dict of {column_name: ColumnSchemas} describing the metadata columns available for the table"
- )
- example_questions: Optional[List[LLMExample]] = Field(
- default=None, description="Example questions where this table is useful."
- )
- join: str = Field(
- description="SQL join string to join this table with source documents table",
- default="",
- )
- max_filters: Optional[int] = Field(default=1, description="Maximum number of filters to generate for this table.")
- filter_threshold: Optional[float] = Field(
- default=0.0,
- description="Minimum relevance required to use this table to generate filters.",
- )
- priority: Optional[int] = Field(
- default=1,
- description="Priority level for this table, lower numbers will be processed first.",
- )
- relevance: Optional[float] = Field(
- default=None,
- description="Relevance computed during search. Should not be set by the end user.",
- )
-
-
-class DatabaseSchema(BaseModel):
- database: str = Field(description="Name of database in the Database")
- description: str = Field(description="Description of what the Database represents")
- usage: str = Field(description="How and when to use this Database for search.")
- tables: Union[OrderedDict[str, TableSchema], Dict[str, TableSchema]] = Field(
- description="Dict of {column_name: ColumnSchemas} describing the metadata columns available for the table"
- )
- example_questions: Optional[List[LLMExample]] = Field(
- default=None, description="Example questions where this Database is useful."
- )
- max_filters: Optional[int] = Field(
- default=1,
- description="Maximum number of filters to generate for this Database.",
- )
- filter_threshold: Optional[float] = Field(
- default=0.0,
- description="Minimum relevance required to use this Database to generate filters.",
- )
- priority: Optional[int] = Field(
- default=0,
- description="Priority level for this Database, lower numbers will be processed first.",
- )
- relevance: Optional[float] = Field(
- default=None,
- description="Relevance computed during search. Should not be set by the end user.",
- )
-
-
-class SQLRetrieverConfig(BaseModel):
- llm_config: LLMConfig = Field(
- default_factory=LLMConfig,
- description="LLM configuration to use for generating the final SQL query for retrieval",
- )
- metadata_filters_prompt_template: str = Field(
- default=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE,
- description="Prompt template to generate PostgreSQL metadata filters. Has 'format_instructions', 'schema', 'examples', and 'input' input variables",
- )
- num_retries: int = Field(
- default=DEFAULT_NUM_QUERY_RETRIES,
- description="How many times for an LLM to try rewriting a failed SQL query before using the fallback retriever.",
- )
- rewrite_prompt_template: str = Field(
- default=DEFAULT_SEMANTIC_PROMPT_TEMPLATE,
- description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
- )
- table_prompt_template: str = Field(
- default=DEFAULT_TABLE_PROMPT_TEMPLATE,
- description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
- )
- column_prompt_template: str = Field(
- default=DEFAULT_COLUMN_PROMPT_TEMPLATE,
- description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
- )
- value_prompt_template: str = Field(
- default=DEFAULT_VALUE_PROMPT_TEMPLATE,
- description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
- )
- boolean_system_prompt: str = Field(
- default=DEFAULT_BOOLEAN_PROMPT_TEMPLATE,
- description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
- )
- generative_system_prompt: str = Field(
- default=DEFAULT_GENERATIVE_SYSTEM_PROMPT,
- description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
- )
- source_table: str = Field(
- description="Name of the source table containing the original documents that were embedded"
- )
- source_id_column: str = Field(description="Name of the column containing the UUID.", default="Id")
- max_filters: Optional[int] = Field(description="Maximum number of filters to generate for sql queries.", default=10)
- filter_threshold: Optional[float] = Field(
- description="Minimum relevance required to use this Database to generate filters.",
- default=0.0,
- )
- min_k: Optional[int] = Field(
- description="Minimum number of documents accepted from a generated sql query.",
- default=10,
- )
- database_schema: Optional[DatabaseSchema] = Field(
- default=None,
- description="DatabaseSchema describing the database.",
- )
- examples: Optional[List[LLMExample]] = Field(
- default=None,
- description="Optional examples of final generated pgvector queries based on user input.",
- )
-
-
-class SummarizationConfig(BaseModel):
- llm_config: LLMConfig = Field(
- default_factory=LLMConfig,
- description="LLM configuration to use for summarization",
- )
- map_prompt_template: str = Field(
- default=DEFAULT_MAP_PROMPT_TEMPLATE,
- description="Prompt for an LLM to summarize a single document",
- )
- reduce_prompt_template: str = Field(
- default=DEFAULT_REDUCE_PROMPT_TEMPLATE,
- description="Prompt for an LLM to summarize a set of summaries of documents into one",
- )
- max_summarization_tokens: int = Field(
- default=DEFAULT_MAX_SUMMARIZATION_TOKENS,
- description="Max number of tokens for summarized documents",
- )
-
-
class RerankerMode(str, Enum):
POINTWISE = "pointwise"
LISTWISE = "listwise"
@@ -696,7 +43,7 @@ def _missing_(cls, value):
class RerankerConfig(BaseModel):
model: str = DEFAULT_RERANKING_MODEL
- base_url: str = DEFAULT_LLM_ENDPOINT
+ base_url: Optional[str] = None
filtering_threshold: float = 0.5
num_docs_to_keep: Optional[int] = None
mode: RerankerMode = Field(
@@ -714,144 +61,3 @@ class RerankerConfig(BaseModel):
top_logprobs: int = DEFAULT_RERANKER_TOP_LOGPROBS # Number of top log probabilities to include
max_tokens: int = DEFAULT_RERANKER_MAX_TOKENS # Maximum tokens to generate
valid_class_tokens: List[str] = DEFAULT_VALID_CLASS_TOKENS # Valid class tokens to look for in the response
-
-
-class MultiHopRetrieverConfig(BaseModel):
- """Configuration for multi-hop retrieval"""
-
- base_retriever_type: RetrieverType = Field(
- default=RetrieverType.VECTOR_STORE,
- description="Type of base retriever to use for multi-hop retrieval",
- )
- max_hops: int = Field(default=3, description="Maximum number of follow-up questions to generate", ge=1)
- reformulation_template: str = Field(
- default=DEFAULT_QUESTION_REFORMULATION_TEMPLATE,
- description="Template for reformulating questions",
- )
- llm_config: LLMConfig = Field(
- default_factory=LLMConfig,
- description="LLM configuration to use for generating follow-up questions",
- )
-
-
-class RAGPipelineModel(BaseModel):
- documents: Optional[List[Any]] = Field(default=None, description="List of documents")
-
- vector_store_config: VectorStoreConfig = Field(
- default_factory=VectorStoreConfig, description="Vector store configuration"
- )
-
- llm: Optional[Any] = Field(default=None, description="Language model")
- llm_model_name: str = Field(default=DEFAULT_LLM_MODEL, description="Language model name")
- llm_provider: Optional[str] = Field(default=None, description="Language model provider")
- vector_store: Optional[VectorStore] = Field(
- default=None,
- description="Vector store",
- )
- db_connection_string: Optional[str] = Field(default=None, description="Database connection string")
- metadata_config: Optional[MetadataConfig] = Field(
- default=None, description="Configuration for metadata to be used for retrieval"
- )
- table_name: str = Field(default=DEFAULT_TEST_TABLE_NAME, description="Table name")
- embedding_model: Optional[Any] = Field(default=None, description="Embedding model")
- rag_prompt_template: str = Field(default=DEFAULT_RAG_PROMPT_TEMPLATE, description="RAG prompt template")
- retriever_prompt_template: Optional[Union[str, dict]] = Field(default=None, description="Retriever prompt template")
- retriever_type: RetrieverType = Field(default=RetrieverType.VECTOR_STORE, description="Retriever type")
- search_type: SearchType = Field(default=SearchType.SIMILARITY, description="Type of search to perform")
- search_kwargs: SearchKwargs = Field(
- default_factory=SearchKwargs,
- description="Search configuration for the retriever",
- )
- summarization_config: Optional[SummarizationConfig] = Field(
- default=None,
- description="Configuration for summarizing retrieved documents as context",
- )
- # SQL retriever specific.
- sql_retriever_config: Optional[SQLRetrieverConfig] = Field(
- default=None,
- description="Configuration for retrieving documents by generating SQL to filter by metadata & order by distance function",
- )
-
- # Multi retriever specific
- multi_retriever_mode: MultiVectorRetrieverMode = Field(
- default=MultiVectorRetrieverMode.BOTH, description="Multi retriever mode"
- )
- max_concurrency: int = Field(default=DEFAULT_MAX_CONCURRENCY, description="Maximum concurrency")
- id_key: int = Field(default=DEFAULT_ID_KEY, description="ID key")
- parent_store: Optional[Any] = Field(default=None, description="Parent store")
- text_splitter: Optional[TextSplitter] = Field(default=None, description="Text splitter")
- chunk_size: int = Field(default=DEFAULT_CHUNK_SIZE, description="Chunk size")
- chunk_overlap: int = Field(default=DEFAULT_CHUNK_OVERLAP, description="Chunk overlap")
-
- # Auto retriever specific
- auto_retriever_filter_columns: Optional[List[str]] = Field(default=None, description="Filter columns")
- cardinality_threshold: int = Field(default=DEFAULT_CARDINALITY_THRESHOLD, description="Cardinality threshold")
- content_column_name: str = Field(
- default=DEFAULT_CONTENT_COLUMN_NAME,
- description="Content column name (the column we will get embeddings)",
- )
- dataset_description: str = Field(default=DEFAULT_DATASET_DESCRIPTION, description="Description of the dataset")
- reranker: bool = Field(default=DEFAULT_RERANKER_FLAG, description="Whether to use reranker")
- reranker_config: RerankerConfig = Field(default_factory=RerankerConfig, description="Reranker configuration")
-
- multi_hop_config: Optional[MultiHopRetrieverConfig] = Field(
- default=None,
- description="Configuration for multi-hop retrieval. Required when retriever_type is MULTI_HOP.",
- )
-
- @field_validator("multi_hop_config")
- @classmethod
- def validate_multi_hop_config(cls, v: Optional[MultiHopRetrieverConfig], info):
- """Validate that multi_hop_config is set when using multi-hop retrieval."""
- values = info.data
- if values.get("retriever_type") == RetrieverType.MULTI_HOP and v is None:
- raise ValueError("multi_hop_config must be set when using multi-hop retrieval")
- return v
-
- class Config:
- arbitrary_types_allowed = True
- extra = "forbid"
-
- json_schema_extra = {
- "example": {
- "retriever_type": RetrieverType.VECTOR_STORE.value,
- "multi_retriever_mode": MultiVectorRetrieverMode.BOTH.value,
- # add more examples here
- }
- }
-
- @classmethod
- def get_field_names(cls):
- return list(cls.model_fields.keys())
-
- @field_validator("search_kwargs")
- @classmethod
- def validate_search_kwargs(cls, v: SearchKwargs, info) -> SearchKwargs:
- search_type = info.data.get("search_type", SearchType.SIMILARITY)
-
- # Validate MMR-specific parameters
- if search_type == SearchType.MMR:
- if v.fetch_k is not None and v.fetch_k <= v.k:
- raise ValueError("fetch_k must be greater than k")
- if v.lambda_mult is not None and (v.lambda_mult < 0 or v.lambda_mult > 1):
- raise ValueError("lambda_mult must be between 0 and 1")
- if v.fetch_k is None and v.lambda_mult is not None:
- raise ValueError("fetch_k is required when using lambda_mult with MMR search type")
- if v.lambda_mult is None and v.fetch_k is not None:
- raise ValueError("lambda_mult is required when using fetch_k with MMR search type")
- elif search_type != SearchType.MMR:
- if v.fetch_k is not None:
- raise ValueError("fetch_k is only valid for MMR search type")
- if v.lambda_mult is not None:
- raise ValueError("lambda_mult is only valid for MMR search type")
-
- # Validate similarity_score_threshold parameters
- if search_type == SearchType.SIMILARITY_SCORE_THRESHOLD:
- if v.score_threshold is not None and (v.score_threshold < 0 or v.score_threshold > 1):
- raise ValueError("score_threshold must be between 0 and 1")
- if v.score_threshold is None:
- raise ValueError("score_threshold is required for similarity_score_threshold search type")
- elif search_type != SearchType.SIMILARITY_SCORE_THRESHOLD and v.score_threshold is not None:
- raise ValueError("score_threshold is only valid for similarity_score_threshold search type")
-
- return v
diff --git a/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py b/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py
index 0d932f40f23..525bcafc99b 100644
--- a/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py
+++ b/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py
@@ -44,7 +44,7 @@ def split_text(self, text: str) -> List[str]:
Split text into chunks
Args:
- text: Text to split
+ text (str): Text to split
Returns:
List of text chunks
diff --git a/mindsdb/integrations/utilities/rag/storage/__init__.py b/mindsdb/integrations/utilities/rag/storage/__init__.py
deleted file mode 100644
index 0a80c3fc8cb..00000000000
--- a/mindsdb/integrations/utilities/rag/storage/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Storage utilities for RAG pipeline"""
diff --git a/mindsdb/integrations/utilities/rag/storage/in_memory_byte_store.py b/mindsdb/integrations/utilities/rag/storage/in_memory_byte_store.py
deleted file mode 100644
index 825fd2f1a47..00000000000
--- a/mindsdb/integrations/utilities/rag/storage/in_memory_byte_store.py
+++ /dev/null
@@ -1,96 +0,0 @@
-"""Custom in-memory byte store implementation to replace langchain's InMemoryByteStore"""
-
-from typing import List, Tuple, Any, Dict
-
-
-class InMemoryByteStore:
- """
- Custom implementation of InMemoryByteStore to replace langchain's version.
- Stores key-value pairs in memory using a dictionary.
- """
-
- def __init__(self):
- """Initialize the in-memory store with an empty dictionary"""
- self._store: Dict[str, Any] = {}
-
- def mset(self, key_value_pairs: List[Tuple[str, Any]]) -> None:
- """
- Store multiple key-value pairs
-
- Args:
- key_value_pairs: List of (key, value) tuples to store
- """
- for key, value in key_value_pairs:
- self._store[str(key)] = value
-
- def mget(self, keys: List[str]) -> List[Any]:
- """
- Retrieve multiple values by keys
-
- Args:
- keys: List of keys to retrieve
-
- Returns:
- List of values corresponding to keys (None for missing keys)
- """
- return [self._store.get(str(key)) for key in keys]
-
- def get(self, key: str, default: Any = None) -> Any:
- """
- Retrieve a single value by key
-
- Args:
- key: Key to retrieve
- default: Default value to return if key is not found
-
- Returns:
- Value associated with key, or default if not found
- """
- return self._store.get(str(key), default)
-
- def set(self, key: str, value: Any) -> None:
- """
- Store a single key-value pair
-
- Args:
- key: Key to store
- value: Value to store
- """
- self._store[str(key)] = value
-
- def delete(self, key: str) -> bool:
- """
- Delete a key-value pair
-
- Args:
- key: Key to delete
-
- Returns:
- True if key was found and deleted, False otherwise
- """
- key_str = str(key)
- if key_str in self._store:
- del self._store[key_str]
- return True
- return False
-
- def clear(self) -> None:
- """Clear all stored key-value pairs"""
- self._store.clear()
-
- def keys(self) -> List[str]:
- """
- Get all keys in the store
-
- Returns:
- List of all keys
- """
- return list(self._store.keys())
-
- def __contains__(self, key: str) -> bool:
- """Check if a key exists in the store"""
- return str(key) in self._store
-
- def __len__(self) -> int:
- """Get the number of key-value pairs in the store"""
- return len(self._store)
diff --git a/mindsdb/integrations/utilities/rag/utils.py b/mindsdb/integrations/utilities/rag/utils.py
deleted file mode 100644
index 7461eed1a05..00000000000
--- a/mindsdb/integrations/utilities/rag/utils.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from typing import List, Any
-
-import pandas as pd
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-
-
-def df_to_documents(df: pd.DataFrame, content_column_name: str) -> List[SimpleDocument]:
- """
- Given a dataframe, convert it to a list of documents.
-
- :param df: pd.DataFrame
- :param content_column_name: str
-
- :return: List[SimpleDocument]
- """
- documents = []
- for _, row in df.iterrows():
- metadata = row.to_dict()
- page_content = metadata.pop(content_column_name)
- documents.append(SimpleDocument(page_content=page_content, metadata=metadata))
- return documents
-
-
-def documents_to_df(
- content_column_name: str, documents: List[Any], embedding_model: Any = None, with_embeddings: bool = False
-) -> pd.DataFrame:
- """
- Given a list of documents, convert it to a dataframe.
-
- :param content_column_name: str
- :param documents: List of document-like objects with page_content and metadata attributes
- :param embedding_model: Embedding model with embed_documents method
- :param with_embeddings: bool
-
- :return: pd.DataFrame
- """
- df = pd.DataFrame([doc.metadata for doc in documents])
-
- df[content_column_name] = [doc.page_content for doc in documents]
-
- if "date" in df.columns:
- df["date"] = pd.to_datetime(df["date"], errors="coerce")
-
- # Reordering the columns to have the content column first.
- df = df[[content_column_name] + [col for col in df.columns if col != content_column_name]]
-
- if with_embeddings and embedding_model is not None:
- df["embeddings"] = embedding_model.embed_documents(df[content_column_name].tolist())
-
- return df
diff --git a/mindsdb/integrations/utilities/rag/vector_store.py b/mindsdb/integrations/utilities/rag/vector_store.py
deleted file mode 100644
index 69f860d29b1..00000000000
--- a/mindsdb/integrations/utilities/rag/vector_store.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import time
-from datetime import timedelta
-from typing import List, Any, Optional
-
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.vector_store_loader import VectorStoreLoader
-from mindsdb.integrations.utilities.rag.settings import VectorStoreConfig, SearchKwargs
-
-# gpt-3.5-turbo
-_DEFAULT_TPM_LIMIT = 60000
-_DEFAULT_RATE_LIMIT_INTERVAL = timedelta(seconds=10)
-_INITIAL_TOKEN_USAGE = 0
-
-
-class VectorStoreOperator:
- """
- Encapsulates the logic for adding documents to a vector store with rate limiting.
- """
-
- def __init__(
- self,
- vector_store: VectorStore,
- embedding_model: Any,
- documents: Optional[List[Any]] = None,
- vector_store_config: VectorStoreConfig = None,
- token_per_minute_limit: int = _DEFAULT_TPM_LIMIT,
- rate_limit_interval: timedelta = _DEFAULT_RATE_LIMIT_INTERVAL,
- search_kwargs: SearchKwargs = None,
- ):
- self.documents = documents
- self.embedding_model = embedding_model
- self.token_per_minute_limit = token_per_minute_limit
- self.rate_limit_interval = rate_limit_interval
- self.current_token_usage = _INITIAL_TOKEN_USAGE
- self._vector_store = None
- self.vector_store_config = vector_store_config
- self.search_kwargs = search_kwargs or SearchKwargs()
-
- self.verify_vector_store(vector_store, documents)
-
- def verify_vector_store(self, vector_store, documents):
- if documents:
- self._add_documents_to_store(documents, vector_store)
- elif isinstance(vector_store, VectorStore):
- # checking is it instance or subclass instance
- self._vector_store = vector_store
- elif issubclass(vector_store, VectorStore):
- # if it is subclass instance, then create instance of it using vector_store_config
- self._vector_store = load_vector_store(self.embedding_model, self.vector_store_config)
-
- @property
- def vector_store(self):
- return self._vector_store
-
- @staticmethod
- def _calculate_token_usage(document):
- return len(document.page_content)
-
- def _rate_limit(self):
- if self.current_token_usage >= self.token_per_minute_limit:
- time.sleep(self.rate_limit_interval.total_seconds())
- self.current_token_usage = _INITIAL_TOKEN_USAGE
-
- def _update_token_usage(self, document: Any):
- self._rate_limit()
- self.current_token_usage += self._calculate_token_usage(document)
-
- def _add_document(self, document: Any):
- self._update_token_usage(document)
- self.vector_store.add_documents([document])
-
- def _add_documents_to_store(self, documents: List[Any], vector_store: VectorStore):
- self._init_vector_store(documents, vector_store)
- self.add_documents(documents)
-
- def _init_vector_store(self, documents: List[Any], vector_store: VectorStore):
- if len(documents) > 0:
- self._vector_store = vector_store.from_documents(documents=[documents[0]], embedding=self.embedding_model)
-
- def add_documents(self, documents: List[Any]):
- for document in documents:
- self._add_document(document)
-
-
-def load_vector_store(embedding_model: Any, config: VectorStoreConfig) -> VectorStore:
- """
- Loads the vector store based on the provided config and embeddings model
- :param embedding_model:
- :param config:
- :return:
- """
- loader = VectorStoreLoader(embedding_model=embedding_model, config=config)
- return loader.load()
diff --git a/mindsdb/interfaces/agents/agents_controller.py b/mindsdb/interfaces/agents/agents_controller.py
index 90e809ab568..504c2891af6 100644
--- a/mindsdb/interfaces/agents/agents_controller.py
+++ b/mindsdb/interfaces/agents/agents_controller.py
@@ -1,7 +1,9 @@
import datetime
-from typing import Dict, Iterator, List, Union, Tuple, Optional, Any
+from typing import Dict, Iterator, List, Union, Tuple, Optional, Any, Text
import copy
+from enum import Enum
+from pydantic import BaseModel
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy import null
import pandas as pd
@@ -13,19 +15,58 @@
from mindsdb.interfaces.model.functions import PredictorRecordNotFound
from mindsdb.interfaces.model.model_controller import ModelController
from mindsdb.utilities.config import config
+from mindsdb.utilities.utils import validate_pydantic_params
from mindsdb.utilities import log
+from mindsdb.interfaces.agents.utils.sql_toolkit import MindsDBQuery
from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
from .utils.constants import ASSISTANT_COLUMN, SUPPORTED_PROVIDERS, PROVIDER_TO_MODELS
from .utils.pydantic_ai_model_factory import get_llm_provider
-
+from .pydantic_ai_agent import check_agent_llm
logger = log.getLogger(__name__)
default_project = config.get("default_project")
+def check_agent_data(data):
+ tables = data.get("tables", [])
+ knowledge_bases = data.get("knowledge_bases", [])
+ if tables or knowledge_bases:
+ sql_toolkit = MindsDBQuery(tables=tables, knowledge_bases=knowledge_bases)
+
+ if tables and len(sql_toolkit.get_usable_table_names(lazy=False)) == 0:
+ raise ValueError(f"No tables found: {tables}")
+
+ if knowledge_bases and len(sql_toolkit.get_usable_knowledge_base_names(lazy=False)) == 0:
+ raise ValueError(f"No knowledge bases found: {knowledge_bases}")
+
+
+class AgentParamsData(BaseModel):
+ knowledge_bases: List[str] | None = None
+ tables: List[str] | None = None
+
+ class Config:
+ extra = "forbid"
+
+
+class AgentMode(Enum):
+ TEXT = "text"
+ SQL = "sql"
+
+
+class AgentParams(BaseModel):
+ prompt_template: str | None = None
+ model: Dict[Text, Any] | None = None
+ data: AgentParamsData | None = None
+ timeout: int | None = None
+ mode: AgentMode = AgentMode.TEXT
+
+ class Config:
+ extra = "forbid"
+
+
class AgentsController:
"""Handles CRUD operations at the database level for Agents"""
@@ -149,8 +190,7 @@ def add_agent(
self,
name: str,
project_name: str = None,
- model_name: Union[str, dict] = None,
- provider: str = None,
+ model: dict = None,
params: Dict[str, Any] = None,
) -> db.Agents:
"""
@@ -159,25 +199,16 @@ def add_agent(
Parameters:
name (str): The name of the new agent
project_name (str): The containing project
- model_name (str | dict): The name of the existing ML model the agent will use
- provider (str): The provider of the model
+ model: Dict, parameters for the model to use
+ - provider: The provider of the model (e.g., 'openai', 'google')
+ - Other model-specific parameters like 'api_key', 'model_name', etc.
+
params (Dict[str, str]): Parameters to use when running the agent
data: Dict, data sources for an agent, keys:
- knowledge_bases: List of KBs to use
- tables: list of tables to use
- model: Dict, parameters for the model to use
- - provider: The provider of the model (e.g., 'openai', 'google')
- - Other model-specific parameters like 'api_key', 'model_name', etc.
_api_key: API key for the provider (e.g., openai_api_key)
- # Deprecated parameters:
- database: The database to use (default is 'mindsdb')
- knowledge_base_database: The database to use for knowledge base queries (default is 'mindsdb')
- include_tables: List of tables to include
- ignore_tables: List of tables to ignore
- include_knowledge_bases: List of knowledge bases to include
- ignore_knowledge_bases: List of knowledge bases to ignore
-
Returns:
agent (db.Agents): The created agent
@@ -195,61 +226,19 @@ def add_agent(
# No need to copy params since we're not preserving the original reference
params = params or {}
+ params["model"] = model
- if isinstance(model_name, dict):
- # move into params
- params["model"] = model_name
- model_name = None
+ # check agent params
+ validate_pydantic_params(params, AgentParams, "agent")
- if model_name is not None:
- _, provider = self.check_model_provider(model_name, provider)
+ # check llm works
+ llm_params = self.get_agent_llm_params(model)
+ check_agent_llm(llm_params)
- if model_name is None:
- logger.warning("'model_name' param is not provided. Using default global llm model at runtime.")
-
- # If model_name is not provided, we use default global llm model at runtime
- # Default parameters will be applied at runtime via get_agent_llm_params
- # This allows global default updates to apply to all agents immediately
-
- # Extract API key if provided in the format _api_key
- if provider is not None:
- provider_api_key_param = f"{provider.lower()}_api_key"
- if provider_api_key_param in params:
- # Keep the API key in params for the agent to use
- # It will be picked up by get_api_key() in handler_utils.py
- pass
-
- # Handle generic api_key parameter if provided
- if "api_key" in params:
- # Keep the generic API key in params for the agent to use
- # It will be picked up by get_api_key() in handler_utils.py
- pass
-
- depreciated_params = [
- "database",
- "knowledge_base_database",
- "include_tables",
- "ignore_tables",
- "include_knowledge_bases",
- "ignore_knowledge_bases",
- ]
- if any(param in params for param in depreciated_params):
- raise ValueError(
- f"Parameters {', '.join(depreciated_params)} are deprecated. "
- "Use 'data' parameter with 'tables' and 'knowledge_bases' keys instead."
- )
-
- include_tables = None
- include_knowledge_bases = None
- if "data" in params:
- include_knowledge_bases = params["data"].get("knowledge_bases")
- include_tables = params["data"].get("tables")
-
- # Convert string parameters to lists if needed
- if isinstance(include_tables, str):
- include_tables = [t.strip() for t in include_tables.split(",")]
- if isinstance(include_knowledge_bases, str):
- include_knowledge_bases = [kb.strip() for kb in include_knowledge_bases.split(",")]
+ # check data
+ data = params.get("data", {})
+ if data:
+ check_agent_data(data)
agent = db.Agents(
name=name,
@@ -257,8 +246,6 @@ def add_agent(
company_id=ctx.company_id,
user_id=ctx.user_id,
user_class=ctx.user_class,
- model_name=model_name,
- provider=provider,
params=params,
)
@@ -272,9 +259,8 @@ def update_agent(
agent_name: str,
project_name: str = default_project,
name: str = None,
- model_name: Union[str, dict] = None,
- provider: str = None,
- params: Dict[str, str] = None,
+ model: dict = None,
+ params: Dict[str, Any] = None,
):
"""
Updates an agent in the database.
@@ -283,8 +269,7 @@ def update_agent(
agent_name (str): The name of the new agent, or existing agent to update
project_name (str): The containing project
name (str): The updated name of the agent
- model_name (str | dict): The name of the existing ML model the agent will use
- provider (str): The provider of the model
+ model dict: model parameters
params: (Dict[str, str]): Parameters to use when running the agent
Returns:
@@ -301,12 +286,7 @@ def update_agent(
existing_params = existing_agent.params or {}
is_demo = (existing_agent.params or {}).get("is_demo", False)
- if is_demo and (
- (name is not None and name != agent_name)
- or (model_name is not None and existing_agent.model_name != model_name)
- or (provider is not None and existing_agent.provider != provider)
- or (isinstance(params, dict) and len(params) > 0 and "prompt_template" not in params)
- ):
+ if is_demo:
raise ValueError("It is forbidden to change properties of the demo object")
if name is not None and name != agent_name:
@@ -316,27 +296,34 @@ def update_agent(
raise EntityExistsError(f"Agent with updated name already exists: {name}")
existing_agent.name = name
- if model_name or provider:
- if isinstance(model_name, dict):
- # move into params
- existing_params["model"] = model_name
- model_name = None
-
- # check model and provider
- _, provider = self.check_model_provider(model_name, provider)
- # Update model and provider
- existing_agent.model_name = model_name
- existing_agent.provider = provider
-
- if params is not None:
- # Merge params on update
- existing_params.update(params)
- # Remove None values entirely.
- params = {k: v for k, v in existing_params.items() if v is not None}
- existing_agent.params = params
- # Some versions of SQL Alchemy won't handle JSON updates correctly without this.
- # See: https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.attributes.flag_modified
- flag_modified(existing_agent, "params")
+ params = params or {}
+
+ if model:
+ params["model"] = model
+
+ if params:
+ validate_pydantic_params(params, AgentParams, "agent")
+ else:
+ # do nothing
+ return existing_agent
+
+ if model:
+ # check llm works
+ llm_params = self.get_agent_llm_params(model)
+ check_agent_llm(llm_params)
+
+ data = params.get("data", {})
+ if data:
+ check_agent_data(data)
+
+ # Merge params on update
+ existing_params.update(params)
+ # Remove None values entirely.
+ params = {k: v for k, v in existing_params.items() if v is not None}
+ existing_agent.params = params
+ # Some versions of SQL Alchemy won't handle JSON updates correctly without this.
+ # See: https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.attributes.flag_modified
+ flag_modified(existing_agent, "params")
db.session.commit()
return existing_agent
@@ -362,32 +349,12 @@ def delete_agent(self, agent_name: str, project_name: str = default_project):
agent.deleted_at = datetime.datetime.now()
db.session.commit()
- def get_agent_llm_params(self, agent):
+ def get_agent_llm_params(self, model_params):
"""
Get agent LLM parameters by combining default config with user provided parameters.
Uses the same pattern as knowledge bases get_model_params function.
"""
- agent_params = agent.params
-
- # Get model params from agent params (same structure as knowledge bases)
- if "model" in agent_params:
- model_params = agent_params.get("model", {})
- if not isinstance(model_params, dict):
- raise ValueError("Model parameters must be passed as a JSON object")
- else:
- # params for LLM can be arbitrary (backward compatibility)
- model_params = copy.deepcopy(agent_params)
- model_params.pop("mode", None)
- model_params.pop("prompt_template", None)
-
- _, provider = self.check_model_provider(agent.model_name, agent.provider)
-
- if agent.model_name is not None:
- model_params["model_name"] = agent.model_name
- if provider is not None:
- model_params["provider"] = provider
-
combined_model_params = copy.deepcopy(config.get("default_llm", {}))
if model_params:
@@ -433,7 +400,7 @@ def get_completion(
from .pydantic_ai_agent import PydanticAIAgent
# Get agent parameters and combine with default LLM parameters at runtime
- llm_params = self.get_agent_llm_params(agent)
+ llm_params = self.get_agent_llm_params(agent.params.get("model"))
pydantic_agent = PydanticAIAgent(agent, llm_params=llm_params)
diff --git a/mindsdb/interfaces/agents/callback_handlers.py b/mindsdb/interfaces/agents/callback_handlers.py
deleted file mode 100644
index f4735b737c7..00000000000
--- a/mindsdb/interfaces/agents/callback_handlers.py
+++ /dev/null
@@ -1,177 +0,0 @@
-import io
-import logging
-import contextlib
-from typing import Any, Dict, List, Union, Callable
-
-from langchain_core.agents import AgentAction, AgentFinish
-from langchain_core.callbacks.base import BaseCallbackHandler
-from langchain_core.messages.base import BaseMessage
-from langchain_core.outputs import LLMResult
-from langchain_core.callbacks import StdOutCallbackHandler
-
-
-class ContextCaptureCallback(BaseCallbackHandler):
- def __init__(self):
- self.context = None
-
- def on_retriever_end(self, documents: List[Any], *, run_id: str, parent_run_id: Union[str, None] = None, **kwargs: Any) -> Any:
- self.context = [{
- 'page_content': doc.page_content,
- 'metadata': doc.metadata
- } for doc in documents]
-
- def get_contexts(self):
- return self.context
-
-
-class VerboseLogCallbackHandler(StdOutCallbackHandler):
- def __init__(self, logger: logging.Logger, verbose: bool):
- self.logger = logger
- self.verbose = verbose
- super().__init__()
-
- def __call(self, method: Callable, *args: List[Any], **kwargs: Any) -> Any:
- if self.verbose is False:
- return
- f = io.StringIO()
- with contextlib.redirect_stdout(f):
- method(*args, **kwargs)
- output = f.getvalue()
- self.logger.info(output)
-
- def on_chain_start(self, *args: List[Any], **kwargs: Any) -> None:
- self.__call(super().on_chain_start, *args, **kwargs)
-
- def on_chain_end(self, *args: List[Any], **kwargs: Any) -> None:
- self.__call(super().on_chain_end, *args, **kwargs)
-
- def on_agent_action(self, *args: List[Any], **kwargs: Any) -> None:
- self.__call(super().on_agent_action, *args, **kwargs)
-
- def on_tool_end(self, *args: List[Any], **kwargs: Any) -> None:
- self.__call(super().on_tool_end, *args, **kwargs)
-
- def on_text(self, *args: List[Any], **kwargs: Any) -> None:
- self.__call(super().on_text, *args, **kwargs)
-
- def on_agent_finish(self, *args: List[Any], **kwargs: Any) -> None:
- self.__call(super().on_agent_finish, *args, **kwargs)
-
-
-class LogCallbackHandler(BaseCallbackHandler):
- '''Langchain callback handler that logs agent and chain executions.'''
-
- def __init__(self, logger: logging.Logger, verbose: bool = True):
- logger.setLevel('DEBUG')
- self.logger = logger
- self._num_running_chains = 0
- self.generated_sql = None
- self.verbose_log_handler = VerboseLogCallbackHandler(logger, verbose)
-
- def on_llm_start(
- self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
- ) -> Any:
- '''Run when LLM starts running.'''
- self.logger.debug('LLM started with prompts:')
- for prompt in prompts:
- self.logger.debug(prompt[:50])
- self.verbose_log_handler.on_llm_start(serialized, prompts, **kwargs)
-
- def on_chat_model_start(
- self,
- serialized: Dict[str, Any],
- messages: List[List[BaseMessage]], **kwargs: Any
- ) -> Any:
- '''Run when Chat Model starts running.'''
- self.logger.debug('Chat model started with messages:')
- for message_list in messages:
- for message in message_list:
- self.logger.debug(message.pretty_repr())
-
- def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:
- '''Run on new LLM token. Only available when streaming is enabled.'''
- pass
-
- def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
- '''Run when LLM ends running.'''
- self.logger.debug('LLM ended with response:')
- self.logger.debug(str(response.llm_output))
-
- def on_llm_error(
- self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
- ) -> Any:
- '''Run when LLM errors.'''
- self.logger.debug(f'LLM encountered an error: {str(error)}')
-
- def on_chain_start(
- self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
- ) -> Any:
- '''Run when chain starts running.'''
- self._num_running_chains += 1
- self.logger.info('Entering new LLM chain ({} total)'.format(
- self._num_running_chains))
- self.logger.debug('Inputs: {}'.format(inputs))
-
- self.verbose_log_handler.on_chain_start(serialized=serialized, inputs=inputs, **kwargs)
-
- def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
- '''Run when chain ends running.'''
- self._num_running_chains -= 1
- self.logger.info('Ended LLM chain ({} total)'.format(
- self._num_running_chains))
- self.logger.debug('Outputs: {}'.format(outputs))
-
- self.verbose_log_handler.on_chain_end(outputs=outputs, **kwargs)
-
- def on_chain_error(
- self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
- ) -> Any:
- '''Run when chain errors.'''
- self._num_running_chains -= 1
- self.logger.error(
- 'LLM chain encountered an error ({} running): {}'.format(
- self._num_running_chains, error))
-
- def on_tool_start(
- self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
- ) -> Any:
- '''Run when tool starts running.'''
- pass
-
- def on_tool_end(self, output: str, **kwargs: Any) -> Any:
- '''Run when tool ends running.'''
- self.verbose_log_handler.on_tool_end(output=output, **kwargs)
-
- def on_tool_error(
- self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
- ) -> Any:
- '''Run when tool errors.'''
- pass
-
- def on_text(self, text: str, **kwargs: Any) -> Any:
- '''Run on arbitrary text.'''
- self.verbose_log_handler.on_text(text=text, **kwargs)
-
- def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
- '''Run on agent action.'''
- self.logger.debug(f'Running tool {action.tool} with input:')
- self.logger.debug(action.tool_input)
-
- stop_block = 'Observation: '
- if stop_block in action.tool_input:
- action.tool_input = action.tool_input[: action.tool_input.find(stop_block)]
-
- if action.tool.startswith("sql_db_query"):
- # Save the generated SQL query
- self.generated_sql = action.tool_input
-
- # fix for mistral
- action.tool = action.tool.replace('\\', '')
-
- self.verbose_log_handler.on_agent_action(action=action, **kwargs)
-
- def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
- '''Run on agent end.'''
- self.logger.debug('Agent finished with return values:')
- self.logger.debug(str(finish.return_values))
- self.verbose_log_handler.on_agent_finish(finish=finish, **kwargs)
diff --git a/mindsdb/interfaces/agents/event_dispatch_callback_handler.py b/mindsdb/interfaces/agents/event_dispatch_callback_handler.py
deleted file mode 100644
index 7446ba2adaa..00000000000
--- a/mindsdb/interfaces/agents/event_dispatch_callback_handler.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import queue
-from typing import Any, Dict, List, Optional, Sequence
-from uuid import UUID
-
-from langchain_core.callbacks import BaseCallbackHandler
-from langchain_core.documents import Document
-
-
-class EventDispatchCallbackHandler(BaseCallbackHandler):
- '''Puts dispatched events onto an event queue to be processed as a streaming chunk'''
- def __init__(self, queue: queue.Queue):
- self.queue = queue
-
- def on_custom_event(
- self,
- name: str,
- data: Any,
- *,
- run_id: UUID,
- tags: Optional[List[str]] = None,
- metadata: Optional[Dict[str, Any]] = None,
- **kwargs
- ):
- self.queue.put({
- 'type': 'event',
- 'name': name,
- 'data': data
- })
-
- def on_retriever_end(
- self,
- documents: Sequence[Document],
- *,
- run_id: UUID,
- parent_run_id: Optional[UUID] = None,
- **kwargs: Any,
- ) -> Any:
- document_objects = []
- for d in documents:
- document_objects.append({
- 'content': d.page_content,
- 'metadata': d.metadata
- })
- self.queue.put({
- 'type': 'event',
- 'name': 'retriever_end',
- 'data': {
- 'documents': document_objects
- }
- })
diff --git a/mindsdb/interfaces/agents/langfuse_callback_handler.py b/mindsdb/interfaces/agents/langfuse_callback_handler.py
deleted file mode 100644
index 948eadda6b8..00000000000
--- a/mindsdb/interfaces/agents/langfuse_callback_handler.py
+++ /dev/null
@@ -1,308 +0,0 @@
-from typing import Any, Dict, Union, Optional, List
-from uuid import uuid4
-import datetime
-import json
-
-from langchain_core.callbacks.base import BaseCallbackHandler
-
-from mindsdb.utilities import log
-from mindsdb.interfaces.storage import db
-
-logger = log.getLogger(__name__)
-logger.setLevel('DEBUG')
-
-
-class LangfuseCallbackHandler(BaseCallbackHandler):
- """Langchain callback handler that traces tool & chain executions using Langfuse."""
-
- def __init__(self, langfuse, trace_id: Optional[str] = None, observation_id: Optional[str] = None):
- self.langfuse = langfuse
- self.chain_uuid_to_span = {}
- self.action_uuid_to_span = {}
- # if these are not available, we generate some UUIDs
- self.trace_id = trace_id or uuid4().hex
- self.observation_id = observation_id or uuid4().hex
- # Track metrics about tools and chains
- self.tool_metrics = {}
- self.chain_metrics = {}
- self.current_chain = None
-
- def on_tool_start(
- self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
- ) -> Any:
- """Run when tool starts running."""
- parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex
- action_span = self.action_uuid_to_span.get(parent_run_uuid)
- if action_span is None:
- return
-
- tool_name = serialized.get("name", "tool")
- start_time = datetime.datetime.now()
-
- # Initialize or update tool metrics
- if tool_name not in self.tool_metrics:
- self.tool_metrics[tool_name] = {
- 'count': 0,
- 'total_time': 0,
- 'errors': 0,
- 'last_error': None,
- 'inputs': []
- }
-
- self.tool_metrics[tool_name]['count'] += 1
- self.tool_metrics[tool_name]['inputs'].append(input_str)
-
- metadata = {
- 'tool_name': tool_name,
- 'started': start_time.isoformat(),
- 'start_timestamp': start_time.timestamp(),
- 'input_length': len(input_str) if input_str else 0
- }
- action_span.update(metadata=metadata)
-
- def on_tool_end(self, output: str, **kwargs: Any) -> Any:
- """Run when tool ends running."""
- parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex
- action_span = self.action_uuid_to_span.get(parent_run_uuid)
- if action_span is None:
- return
-
- end_time = datetime.datetime.now()
- tool_name = action_span.metadata.get('tool_name', 'unknown')
- start_timestamp = action_span.metadata.get('start_timestamp')
-
- if start_timestamp:
- duration = end_time.timestamp() - start_timestamp
- if tool_name in self.tool_metrics:
- self.tool_metrics[tool_name]['total_time'] += duration
-
- metadata = {
- 'finished': end_time.isoformat(),
- 'duration_seconds': duration if start_timestamp else None,
- 'output_length': len(output) if output else 0
- }
-
- action_span.update(
- output=output, # tool output is action output (unless superseded by a global action output)
- metadata=metadata
- )
-
- def on_tool_error(
- self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
- ) -> Any:
- """Run when tool errors."""
- parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex
- action_span = self.action_uuid_to_span.get(parent_run_uuid)
- if action_span is None:
- return
-
- try:
- error_str = str(error)
- except Exception:
- error_str = "Couldn't get error string."
-
- tool_name = action_span.metadata.get('tool_name', 'unknown')
- if tool_name in self.tool_metrics:
- self.tool_metrics[tool_name]['errors'] += 1
- self.tool_metrics[tool_name]['last_error'] = error_str
-
- metadata = {
- 'error_description': error_str,
- 'error_type': error.__class__.__name__,
- 'error_time': datetime.datetime.now().isoformat()
- }
- action_span.update(metadata=metadata)
-
- def on_chain_start(
- self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
- ) -> Any:
- """Run when chain starts running."""
- if self.langfuse is None:
- return
-
- run_uuid = kwargs.get('run_id', uuid4()).hex
-
- if serialized is None:
- serialized = {}
-
- chain_name = serialized.get("name", "chain")
- start_time = datetime.datetime.now()
-
- # Initialize or update chain metrics
- if chain_name not in self.chain_metrics:
- self.chain_metrics[chain_name] = {
- 'count': 0,
- 'total_time': 0,
- 'errors': 0,
- 'last_error': None
- }
-
- self.chain_metrics[chain_name]['count'] += 1
- self.current_chain = chain_name
-
- try:
- chain_span = self.langfuse.span(
- name=f'{chain_name}-{run_uuid}',
- trace_id=self.trace_id,
- parent_observation_id=self.observation_id,
- input=json.dumps(inputs, indent=2)
- )
-
- metadata = {
- 'chain_name': chain_name,
- 'started': start_time.isoformat(),
- 'start_timestamp': start_time.timestamp(),
- 'input_keys': list(inputs.keys()) if isinstance(inputs, dict) else None,
- 'input_size': len(inputs) if isinstance(inputs, dict) else len(str(inputs))
- }
- chain_span.update(metadata=metadata)
- self.chain_uuid_to_span[run_uuid] = chain_span
- except Exception as e:
- logger.warning(f"Error creating Langfuse span: {str(e)}")
-
- def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
- """Run when chain ends running."""
- if self.langfuse is None:
- return
-
- chain_uuid = kwargs.get('run_id', uuid4()).hex
- if chain_uuid not in self.chain_uuid_to_span:
- return
- chain_span = self.chain_uuid_to_span.pop(chain_uuid)
- if chain_span is None:
- return
-
- try:
- end_time = datetime.datetime.now()
- chain_name = chain_span.metadata.get('chain_name', 'unknown')
- start_timestamp = chain_span.metadata.get('start_timestamp')
-
- if start_timestamp and chain_name in self.chain_metrics:
- duration = end_time.timestamp() - start_timestamp
- self.chain_metrics[chain_name]['total_time'] += duration
-
- metadata = {
- 'finished': end_time.isoformat(),
- 'duration_seconds': duration if start_timestamp else None,
- 'output_keys': list(outputs.keys()) if isinstance(outputs, dict) else None,
- 'output_size': len(outputs) if isinstance(outputs, dict) else len(str(outputs))
- }
- chain_span.update(output=json.dumps(outputs, indent=2), metadata=metadata)
- chain_span.end()
- except Exception as e:
- logger.warning(f"Error updating Langfuse span: {str(e)}")
-
- def on_chain_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any) -> Any:
- """Run when chain errors."""
- chain_uuid = kwargs.get('run_id', uuid4()).hex
- if chain_uuid not in self.chain_uuid_to_span:
- return
- chain_span = self.chain_uuid_to_span.get(chain_uuid)
- if chain_span is None:
- return
-
- try:
- error_str = str(error)
- except Exception:
- error_str = "Couldn't get error string."
-
- chain_name = chain_span.metadata.get('chain_name', 'unknown')
- if chain_name in self.chain_metrics:
- self.chain_metrics[chain_name]['errors'] += 1
- self.chain_metrics[chain_name]['last_error'] = error_str
-
- metadata = {
- 'error_description': error_str,
- 'error_type': error.__class__.__name__,
- 'error_time': datetime.datetime.now().isoformat()
- }
- chain_span.update(metadata=metadata)
-
- def on_agent_action(self, action, **kwargs: Any) -> Any:
- """Run on agent action."""
- if self.langfuse is None:
- return
-
- run_uuid = kwargs.get('run_id', uuid4()).hex
- try:
- action_span = self.langfuse.span(
- name=f'{getattr(action, "type", "action")}-{getattr(action, "tool", "")}-{run_uuid}',
- trace_id=self.trace_id,
- parent_observation_id=self.observation_id,
- input=str(action)
- )
- self.action_uuid_to_span[run_uuid] = action_span
- except Exception as e:
- logger.warning(f"Error creating Langfuse span for agent action: {str(e)}")
-
- def on_agent_finish(self, finish, **kwargs: Any) -> Any:
- """Run on agent end."""
- if self.langfuse is None:
- return
-
- run_uuid = kwargs.get('run_id', uuid4()).hex
- if run_uuid not in self.action_uuid_to_span:
- return
- action_span = self.action_uuid_to_span.pop(run_uuid)
- if action_span is None:
- return
-
- try:
- if finish is not None:
- action_span.update(output=finish) # supersedes tool output
- action_span.end()
- except Exception as e:
- logger.warning(f"Error updating Langfuse span: {str(e)}")
-
- def auth_check(self):
- if self.langfuse is not None:
- return self.langfuse.auth_check()
- return False
-
- def get_metrics(self) -> Dict[str, Any]:
- """Get collected metrics about tools and chains.
-
- Returns:
- Dict containing:
- - tool_metrics: Statistics about tool usage, errors, and timing
- - chain_metrics: Statistics about chain execution, errors, and timing
- For each tool/chain, includes:
- - count: Number of times used
- - total_time: Total execution time
- - errors: Number of errors
- - last_error: Most recent error message
- - avg_duration: Average execution time
- """
- metrics = {
- 'tool_metrics': {},
- 'chain_metrics': {}
- }
-
- # Process tool metrics
- for tool_name, data in self.tool_metrics.items():
- metrics['tool_metrics'][tool_name] = {
- 'count': data['count'],
- 'total_time': data['total_time'],
- 'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
- 'errors': data['errors'],
- 'last_error': data['last_error'],
- 'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
- }
-
- # Process chain metrics
- for chain_name, data in self.chain_metrics.items():
- metrics['chain_metrics'][chain_name] = {
- 'count': data['count'],
- 'total_time': data['total_time'],
- 'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
- 'errors': data['errors'],
- 'last_error': data['last_error'],
- 'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
- }
-
- return metrics
-
-
-def get_skills(agent: db.Agents) -> List:
- """ Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
- return [rel.skill.type for rel in agent.skills_relationships]
diff --git a/mindsdb/interfaces/agents/modes/base.py b/mindsdb/interfaces/agents/modes/base.py
index 97376b2a2af..1ec13d9242c 100644
--- a/mindsdb/interfaces/agents/modes/base.py
+++ b/mindsdb/interfaces/agents/modes/base.py
@@ -8,6 +8,10 @@ class PlanResponse(BaseModel):
estimated_steps: int = Field(..., description="Estimated number of steps needed to solve the question")
+class TestResponse(BaseModel):
+ text: str = Field(..., description="Text response to the user")
+
+
class ResponseType:
FINAL_QUERY = "final_query" # this is the final query
EXPLORATORY = "exploratory_query" # this is a query to explore and collect info to solve the challenge (e.g., distinct values of a categorical column, schema inference, etc.)
diff --git a/mindsdb/interfaces/agents/provider_utils.py b/mindsdb/interfaces/agents/provider_utils.py
deleted file mode 100644
index 8447102fb1b..00000000000
--- a/mindsdb/interfaces/agents/provider_utils.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""Utilities for working with agent providers.
-
-These helpers are intentionally free of heavy optional dependencies so they can
-be imported in lightweight builds where LangChain is not installed.
-"""
-
-from typing import Dict
-
-from mindsdb.interfaces.agents.constants import (
- ANTHROPIC_CHAT_MODELS,
- GOOGLE_GEMINI_CHAT_MODELS,
- NVIDIA_NIM_CHAT_MODELS,
- OLLAMA_CHAT_MODELS,
- OPEN_AI_CHAT_MODELS,
- WRITER_CHAT_MODELS,
-)
-
-
-def get_llm_provider(args: Dict) -> str:
- """Infer the LLM provider from the supplied arguments."""
-
- # Prefer an explicitly provided provider.
- if "provider" in args:
- return args["provider"]
-
- model_name = args.get("model_name")
- if model_name in ANTHROPIC_CHAT_MODELS:
- return "anthropic"
- if model_name in OPEN_AI_CHAT_MODELS:
- return "openai"
- if model_name in OLLAMA_CHAT_MODELS:
- return "ollama"
- if model_name in NVIDIA_NIM_CHAT_MODELS:
- return "nvidia_nim"
- if model_name in GOOGLE_GEMINI_CHAT_MODELS:
- return "google"
- if model_name in WRITER_CHAT_MODELS:
- return "writer"
-
- raise ValueError("Invalid model name. Please define a supported llm provider")
diff --git a/mindsdb/interfaces/agents/pydantic_ai_agent.py b/mindsdb/interfaces/agents/pydantic_ai_agent.py
index f32fb3d8ed3..7a38c57b165 100644
--- a/mindsdb/interfaces/agents/pydantic_ai_agent.py
+++ b/mindsdb/interfaces/agents/pydantic_ai_agent.py
@@ -25,7 +25,7 @@
from mindsdb.utilities.context import context as ctx
from mindsdb.utilities.langfuse import LangfuseClientWrapper
from mindsdb.interfaces.agents.modes import sql as sql_mode, text_sql as text_sql_mode
-from mindsdb.interfaces.agents.modes.base import ResponseType, PlanResponse
+from mindsdb.interfaces.agents.modes.base import ResponseType, PlanResponse, TestResponse
logger = log.getLogger(__name__)
DEBUG_LOGGER = logger.debug
@@ -65,6 +65,12 @@ def wrapper(self, messages, *args, **kwargs):
return decorator
+def check_agent_llm(llm_params):
+ model = get_model_instance_from_kwargs(llm_params)
+ agent = Agent(model, output_type=TestResponse)
+ agent.run_sync("Say 'hi'")
+
+
class PydanticAIAgent:
"""Pydantic AI-based agent to replace LangchainAgent"""
@@ -87,10 +93,6 @@ def __init__(
self.llm: Optional[object] = None
self.embedding_model: Optional[object] = None
- self.log_callback_handler: Optional[object] = None
- self.langfuse_callback_handler: Optional[object] = None
- self.mdb_langfuse_callback_handler: Optional[object] = None
-
self.langfuse_client_wrapper = LangfuseClientWrapper()
self.agent_mode = self.agent.params.get("mode", "text")
diff --git a/mindsdb/interfaces/agents/utils/pydantic_ai_model_factory.py b/mindsdb/interfaces/agents/utils/pydantic_ai_model_factory.py
index 8189542312e..aa72d5e2f99 100644
--- a/mindsdb/interfaces/agents/utils/pydantic_ai_model_factory.py
+++ b/mindsdb/interfaces/agents/utils/pydantic_ai_model_factory.py
@@ -52,7 +52,7 @@ def get_llm_provider(args: Dict) -> str:
return "writer"
# For vLLM, require explicit provider specification
- raise ValueError("Invalid model name. Please define a supported llm provider")
+ raise ValueError(f"Invalid model name: {model_name}. Please define a supported llm provider")
def get_embedding_model_provider(args: Dict) -> str:
diff --git a/mindsdb/interfaces/agents/utils/sql_toolkit.py b/mindsdb/interfaces/agents/utils/sql_toolkit.py
index 1468bc2fb75..9588ad6ecba 100644
--- a/mindsdb/interfaces/agents/utils/sql_toolkit.py
+++ b/mindsdb/interfaces/agents/utils/sql_toolkit.py
@@ -281,11 +281,11 @@ def _check_f(node, is_table=None, **kwargs):
query_traversal(ast_query, _check_f)
- def get_usable_table_names(self):
+ def get_usable_table_names(self, lazy=True):
if not self.tables:
# no tables allowed
return []
- if not self.tables.has_wildcard:
+ if not self.tables.has_wildcard and lazy:
return self.tables.items
result_tables = []
@@ -330,16 +330,19 @@ def get_usable_table_names(self):
return result_tables
- def get_usable_knowledge_base_names(self):
+ def get_usable_knowledge_base_names(self, lazy=True):
if not self.knowledge_bases:
# no tables allowed
return []
- if not self.knowledge_bases.has_wildcard:
+ if not self.knowledge_bases.has_wildcard and lazy:
return self.knowledge_bases.items
try:
# Query to get all knowledge bases
- ast_query = Show(category="Knowledge Bases")
+ ast_query = Select(
+ targets=[Identifier("PROJECT"), Identifier("NAME")],
+ from_table=Identifier(parts=["information_schema", "knowledge_bases"]),
+ )
result = self.command_executor.execute_command(ast_query)
kb_names = []
diff --git a/mindsdb/interfaces/database/database.py b/mindsdb/interfaces/database/database.py
index bbacc9c256a..3f0fb602ace 100644
--- a/mindsdb/interfaces/database/database.py
+++ b/mindsdb/interfaces/database/database.py
@@ -101,11 +101,15 @@ def get_dict(self, filter_type: Optional[str] = None, lowercase: bool = True):
def get_integration(self, integration_id):
# get integration by id
-
- # TODO get directly from db?
- for rec in self.get_list():
- if rec["id"] == integration_id and rec["type"] == "data":
- return {"name": rec["name"], "type": rec["type"], "engine": rec["engine"], "id": rec["id"]}
+ integration = self.integration_controller.get_by_id(integration_id)
+ if integration and integration.get("type", "data") == "data":
+ return {
+ "name": integration["name"],
+ "type": integration["type"],
+ "engine": integration["engine"],
+ "id": integration["id"],
+ }
+ return None
def exists(self, db_name: str) -> bool:
return db_name.lower() in self.get_dict()
diff --git a/mindsdb/interfaces/database/integrations.py b/mindsdb/interfaces/database/integrations.py
index 867686aad02..5605ee04af5 100644
--- a/mindsdb/interfaces/database/integrations.py
+++ b/mindsdb/interfaces/database/integrations.py
@@ -1,11 +1,13 @@
import os
import sys
+import types
import base64
import shutil
import ast
import time
import tempfile
import importlib
+import importlib.util as iutil
import threading
from pathlib import Path
from copy import deepcopy
@@ -33,6 +35,13 @@
from mindsdb.integrations.libs.base import BaseHandler
import mindsdb.utilities.profiler as profiler
+from mindsdb.integrations.utilities.community_handler_fetcher import (
+ community_handlers_enabled,
+ fetch_handler,
+ get_community_handlers_storage_dir,
+ list_available_handlers,
+)
+
logger = log.getLogger(__name__)
@@ -152,9 +161,19 @@ def _is_not_empty_str(s):
def __init__(self):
self._import_lock = threading.Lock()
+ self._community_handlers_dir = None
self._load_handler_modules()
self.handlers_cache = HandlersCache()
+ @property
+ def community_handlers_dir(self) -> Path:
+ """Returns (and creates) the community handlers directory under MINDSDB_STORAGE_DIR."""
+ if self._community_handlers_dir is None:
+ config = Config()
+ storage_root = Path(config["paths"]["root"])
+ self._community_handlers_dir = get_community_handlers_storage_dir(storage_root)
+ return self._community_handlers_dir
+
def _add_integration_record(self, name, engine, connection_args):
integration_record = db.Integration(
name=name, engine=engine, data=connection_args or {}, company_id=ctx.company_id
@@ -698,45 +717,95 @@ def _get_handler_icon(self, handler_dir, icon_path):
logger.error(f"Error reading icon for {handler_dir}, {e}!")
return icon
+ def _register_handler_dir(self, handler_dir: Path, is_community: bool = False):
+ """
+ Parse a handler directory and register it in handlers_import_status.
+ If the handler can be imported, its module is added to handler_modules and import status is set to success.
+ If the handler cannot be imported, import status is set to failed with error message."""
+ handler_info = self._get_handler_info(handler_dir)
+ if "name" not in handler_info:
+ return
+ handler_name = handler_info["name"]
+ dependencies = self._read_dependencies(handler_dir)
+ handler_meta = {
+ "path": handler_dir,
+ "import": {
+ "success": None,
+ "error_message": None,
+ "folder": handler_dir.name,
+ "dependencies": dependencies,
+ },
+ "name": handler_name,
+ "permanent": handler_info.get("permanent", False),
+ "connection_args": handler_info.get("connection_args", None),
+ "class_type": handler_info.get("class_type", None),
+ "type": handler_info.get("type"),
+ "support_level": handler_info.get("support_level"),
+ "community": is_community,
+ }
+ if "icon_path" in handler_info:
+ icon = self._get_handler_icon(handler_dir, handler_info["icon_path"])
+ if icon:
+ handler_meta["icon"] = icon
+ self.handlers_import_status[handler_name] = handler_meta
+
def _load_handler_modules(self):
- mindsdb_path = Path(importlib.util.find_spec("mindsdb").origin).parent
+ mindsdb_path = Path(iutil.find_spec("mindsdb").origin).parent
handlers_path = mindsdb_path.joinpath("integrations/handlers")
# edge case: running from tests directory, find_spec finds the base folder instead of actual package
if not os.path.isdir(handlers_path):
- mindsdb_path = Path(importlib.util.find_spec("mindsdb").origin).parent.joinpath("mindsdb")
+ mindsdb_path = Path(iutil.find_spec("mindsdb").origin).parent.joinpath("mindsdb")
handlers_path = mindsdb_path.joinpath("integrations/handlers")
self.handler_modules = {}
self.handlers_import_status = {}
+
+ # Built-in handlers always present in the mindsdb package
for handler_dir in handlers_path.iterdir():
if handler_dir.is_dir() is False or handler_dir.name.startswith("__"):
continue
+ # TODO: do we need this for always present built-in handlers?
+ # maybe we can just import them without scanning the directory and parsing the metadata?
+ self._register_handler_dir(handler_dir, is_community=False)
- handler_info = self._get_handler_info(handler_dir)
- if "name" not in handler_info:
- continue
- handler_name = handler_info["name"]
- dependencies = self._read_dependencies(handler_dir)
- handler_meta = {
- "path": handler_dir,
- "import": {
- "success": None,
- "error_message": None,
- "folder": handler_dir.name,
- "dependencies": dependencies,
- },
- "name": handler_name,
- "permanent": handler_info.get("permanent", False),
- "connection_args": handler_info.get("connection_args", None),
- "class_type": handler_info.get("class_type", None),
- "type": handler_info.get("type"),
- }
- if "icon_path" in handler_info:
- icon = self._get_handler_icon(handler_dir, handler_info["icon_path"])
- if icon:
- handler_meta["icon"] = icon
- self.handlers_import_status[handler_name] = handler_meta
+ if not community_handlers_enabled():
+ logger.debug("Community handlers disabled (set MINDSDB_COMMUNITY_HANDLERS=true to enable)")
+ return
+
+ # Community handlers already fetched in a previous session (on disk)
+ try:
+ for handler_dir in self.community_handlers_dir.iterdir():
+ if handler_dir.is_dir() and not handler_dir.name.startswith(("__", ".")):
+ self._register_handler_dir(handler_dir, is_community=True)
+ except Exception as e:
+ logger.warning("Could not scan community handlers dir: %s", e)
+
+ try:
+ for entry in list_available_handlers():
+ handler_name = entry.get("name")
+ if not handler_name or handler_name in self.handlers_import_status:
+ continue
+ handler_type = HANDLER_TYPE.ML if entry.get("type") == "ml" else HANDLER_TYPE.DATA
+ self.handlers_import_status[handler_name] = {
+ "path": None,
+ "import": {
+ "success": None,
+ "error_message": None,
+ "folder": entry.get("folder", f"{handler_name}_handler"),
+ "dependencies": [],
+ },
+ "name": handler_name,
+ "title": entry.get("title", handler_name),
+ "description": entry.get("description", ""),
+ "permanent": False,
+ "connection_args": None,
+ "class_type": None,
+ "type": handler_type,
+ "support_level": HANDLER_SUPPORT_LEVEL.COMMUNITY,
+ }
+ except Exception as e:
+ logger.warning("Could not load community handlers index: %s", e)
def _get_connection_args(self, args_file: Path, param_name: str) -> dict:
"""
@@ -858,32 +927,115 @@ def _get_handler_info(self, handler_dir: Path) -> dict:
return info
+ def _fetch_community_handler(self, handler_name: str, handler_folder) -> Optional[dict]:
+ """
+ Attempt to fetch a community handler from GitHub by its logical name.
+
+ Maps the logical handler name (e.g. "github") to the directory name,
+ downloads it from the external repository,
+ registers it in handlers_import_status, and returns the handler_meta.
+
+ Returns None if the handler does not exist in the community repo or
+ if a network/API error occurs (logged as a warning).
+ """
+
+ logger.debug(
+ "Handler '%s' not found locally, attempting on-demand fetch from community repo...",
+ handler_name,
+ )
+
+ try:
+ handler_dir = fetch_handler(handler_folder, self.community_handlers_dir)
+ except RuntimeError as e:
+ logger.warning("Failed to fetch community handler '%s': %s", handler_name, e)
+ return None
+
+ if handler_dir is None:
+ logger.debug("Handler '%s' does not exist in the community repo", handler_name)
+ return None
+
+ self._register_handler_dir(handler_dir, is_community=True)
+ handler_meta = self.handlers_import_status.get(handler_name)
+ if handler_meta is None:
+ logger.warning(
+ "Fetched community handler dir '%s' but could not determine handler name "
+ "(missing or malformed __init__.py)",
+ handler_folder,
+ )
+ return handler_meta
+
def import_handler(self, handler_name: str, base_import: str = None):
with self._import_lock:
+ time_before_import = time.perf_counter()
handler_meta = self.handlers_import_status[handler_name]
handler_dir = handler_meta["path"]
- handler_folder_name = str(handler_dir.name)
- if base_import is None:
- base_import = "mindsdb.integrations.handlers."
+ # Community handlers live outside the mindsdb package tree.
+ # They need spec_from_file_location so relative imports inside the handler.
+ if handler_meta.get("community") and handler_dir is not None:
+ handler_folder_name = str(handler_dir.name)
+ try:
+ parent_pkg = "mindsdb_community_handlers"
+ if parent_pkg not in sys.modules:
+ parent_mod = types.ModuleType(parent_pkg)
+ parent_mod.__path__ = [str(self.community_handlers_dir)]
+ parent_mod.__package__ = parent_pkg
+ sys.modules[parent_pkg] = parent_mod
+
+ module_name = f"{parent_pkg}.{handler_folder_name}"
+ init_file = handler_dir / "__init__.py"
+ # We need to use spec_from_file_location with submodule_search_locations
+ # to make relative imports work inside the handler's __init__.py
+ spec = iutil.spec_from_file_location(
+ module_name,
+ init_file,
+ submodule_search_locations=[str(handler_dir)],
+ )
+ if spec is None or spec.loader is None:
+ raise ImportError(f"Could not create module spec for community handler '{handler_name}'")
+ handler_module = iutil.module_from_spec(spec)
+ handler_module.__package__ = module_name
+ # Insert the module so that imports can wokr inside the mindsdb handler
+ sys.modules[module_name] = handler_module
+ spec.loader.exec_module(handler_module)
+ self.handler_modules[handler_name] = handler_module
+ handler_meta = self._get_handler_meta(handler_name)
+ logger.debug(f"Community handler '{handler_name}' imported successfully!")
+ except Exception as e:
+ # Remove the module from sys.modules in case of import failure to avoid inconsistent state
+ sys.modules.pop(module_name, None)
+ handler_meta["import"]["success"] = False
+ handler_meta["import"]["error_message"] = str(e)
+ logger.debug(f"Failed to import community handler '{handler_name}': {e}")
+ else:
+ handler_folder_name = str(handler_dir.name) if handler_dir else ""
+ if base_import is None:
+ base_import = "mindsdb.integrations.handlers."
- try:
- handler_module = importlib.import_module(f"{base_import}{handler_folder_name}")
- self.handler_modules[handler_name] = handler_module
- handler_meta = self._get_handler_meta(handler_name)
- except Exception as e:
- handler_meta["import"]["success"] = False
- handler_meta["import"]["error_message"] = str(e)
+ try:
+ handler_module = importlib.import_module(f"{base_import}{handler_folder_name}")
+ self.handler_modules[handler_name] = handler_module
+ handler_meta = self._get_handler_meta(handler_name)
+ logger.debug(
+ f"Handler '{handler_name}' imported successfully in {(time.perf_counter() - time_before_import):.3f} seconds"
+ )
+ except Exception as e:
+ handler_meta["import"]["success"] = False
+ handler_meta["import"]["error_message"] = str(e)
+ logger.debug(f"Failed to import handler '{handler_name}': {e}")
self.handlers_import_status[handler_meta["name"]] = handler_meta
return handler_meta
def get_handlers_import_status(self):
- # tries to import all not imported yet
-
result = {}
for handler_name in list(self.handlers_import_status.keys()):
- handler_meta = self.get_handler_meta(handler_name)
+ handler_meta = self.handlers_import_status[handler_name]
+ if handler_meta.get("support_level") == "community" and handler_meta.get("path") is None:
+ result[handler_name] = handler_meta
+ continue
+ handler_folder = handler_meta.get("import", {}).get("folder")
+ handler_meta = self.get_handler_meta(handler_name, handler_folder)
result[handler_name] = handler_meta
return result
@@ -895,11 +1047,26 @@ def get_handler_metadata(self, handler_name):
# returns metadata
return self.handlers_import_status.get(handler_name)
- def get_handler_meta(self, handler_name):
+ def get_handler_meta(self, handler_name, handler_folder=None):
# returns metadata and tries to import it
handler_meta = self.handlers_import_status.get(handler_name)
if handler_meta is None:
- return
+ return None
+ # Stub from the index: path=None means the handler hasn't been
+ # fetched yet β download it on demand from the community repo.
+ if handler_meta.get("support_level") == "community" and handler_meta["path"] is None:
+ # Derive folder from stub metadata if not explicitly provided.
+ if handler_folder is None:
+ handler_folder = handler_meta.get("import", {}).get("folder")
+ if handler_folder is None:
+ logger.warning(
+ "Community handler '%s' has no folder in metadata, skipping fetch",
+ handler_name,
+ )
+ return None
+ handler_meta = self._fetch_community_handler(handler_name, handler_folder)
+ if handler_meta is None:
+ return None
if handler_meta["import"]["success"] is None:
handler_meta = self.import_handler(handler_name)
return handler_meta
diff --git a/mindsdb/interfaces/database/log.py b/mindsdb/interfaces/database/log.py
index bda24fa9f6b..4a3b9a15af9 100644
--- a/mindsdb/interfaces/database/log.py
+++ b/mindsdb/interfaces/database/log.py
@@ -9,13 +9,14 @@
from mindsdb_sql_parser.utils import JoinType
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb.integrations.utilities.query_traversal import query_traversal
from mindsdb.utilities.functions import resolve_table_identifier
-from mindsdb.api.executor.utilities.sql import get_query_tables
from mindsdb.utilities.exception import EntityNotExistsError
-import mindsdb.interfaces.storage.db as db
from mindsdb.utilities.context import context as ctx
-from mindsdb.api.executor.datahub.classes.response import DataHubResponse
+from mindsdb.utilities.types.column import Column
+from mindsdb.integrations.utilities.query_traversal import query_traversal
+from mindsdb.integrations.libs.response import TableResponse
+import mindsdb.interfaces.storage.db as db
+from mindsdb.api.executor.utilities.sql import get_query_tables
from mindsdb.api.executor.datahub.classes.tables_row import (
TABLES_ROW_TYPE,
TablesRow,
@@ -228,7 +229,7 @@ def get_tables_rows(self) -> List[TablesRow]:
for table_name in self._tables.keys()
]
- def query(self, query: Select = None, native_query: str = None, session=None) -> DataHubResponse:
+ def query(self, query: Select = None, native_query: str = None, session=None) -> TableResponse:
if native_query is not None:
if query is not None:
raise Exception("'query' and 'native_query' arguments can not be used together")
@@ -290,6 +291,5 @@ def check_columns(node, is_table, **kwargs):
df[df_column_name] = df[df_column_name].astype(column_type)
# endregion
- columns_info = [{"name": k, "type": v} for k, v in df.dtypes.items()]
-
- return DataHubResponse(data_frame=df, columns=columns_info)
+ columns = [Column(name=k, dtype=v) for k, v in df.dtypes.items()]
+ return TableResponse(data=df, columns=columns, affected_rows=0)
diff --git a/mindsdb/interfaces/database/projects.py b/mindsdb/interfaces/database/projects.py
index 28b38e5b847..144d633b497 100644
--- a/mindsdb/interfaces/database/projects.py
+++ b/mindsdb/interfaces/database/projects.py
@@ -8,7 +8,7 @@
import numpy as np
from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier, BinaryOperation, Function
+from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier, BinaryOperation, Function, Join
from mindsdb_sql_parser import parse_sql
from mindsdb.interfaces.storage import db
@@ -125,7 +125,17 @@ def create_view(self, name: str, query: str, session):
query_context_controller.set_context(query_context_controller.IGNORE_CONTEXT)
try:
- SQLQuery(ast_query, session=session, database=self.name)
+ resp = SQLQuery(ast_query, session=session, database=self.name)
+ columns = [col.name for col in resp.fetched_data.columns]
+ seen, duplicates = set(), set()
+ for col in columns:
+ if col in seen:
+ duplicates.add(col)
+ else:
+ seen.add(col)
+ if len(duplicates) > 0:
+ raise ValueError(f"Found duplicated columns in the view: {', '.join(duplicates)}")
+
finally:
query_context_controller.release_context(query_context_controller.IGNORE_CONTEXT)
@@ -234,29 +244,35 @@ def get_conditions_to_move(node):
# column is not in black list AND (query has star(*) OR column in white list)
has_star = False
- white_list, black_list = [], []
+ white_list, black_list = {}, []
for target in view_query.targets:
if isinstance(target, Star):
has_star = True
if isinstance(target, Identifier):
name = target.parts[-1].lower()
if target.alias is None or target.alias.parts[-1].lower() == name:
- white_list.append(name)
+ white_list[name] = target
elif target.alias is not None:
black_list.append(target.alias.parts[-1].lower())
+ is_join = isinstance(view_query.from_table, Join)
view_where = view_query.where
for condition in conditions:
arg1, arg2 = condition.args
if isinstance(arg1, Identifier):
name = arg1.parts[-1].lower()
- if name in black_list or not (has_star or name in white_list):
+ # don't move condition for join with Star
+ if name in black_list or not (has_star and not is_join):
continue
+ elif name in white_list:
+ arg1 = white_list[name]
if isinstance(arg2, Identifier):
name = arg2.parts[-1].lower()
- if name in black_list or not (has_star or name in white_list):
+ if name in black_list or not (has_star and not is_join):
continue
+ elif name in white_list:
+ arg2 = white_list[name]
# condition can be moved into view
condition2 = BinaryOperation(condition.op, [arg1, arg2])
@@ -312,7 +328,13 @@ def get_conditions_to_move(node):
# combine outer query with view's query
view_query.parentheses = True
+
+ # keep alias (column of the query might relate to it)
+ alias = query.from_table.alias if query.from_table.alias is not None else query.from_table
+ view_query.alias = Identifier(parts=[alias.parts[-1]])
+
query.from_table = view_query
+
return query
def query_view(self, query: Select, session) -> pd.DataFrame:
diff --git a/mindsdb/interfaces/file/file_controller.py b/mindsdb/interfaces/file/file_controller.py
index 5dfa7c05360..cb1308a952f 100644
--- a/mindsdb/interfaces/file/file_controller.py
+++ b/mindsdb/interfaces/file/file_controller.py
@@ -169,6 +169,7 @@ def get_file_pages(self, source_path: str):
"""
file_reader = FileReader(path=source_path)
tables = file_reader.get_contents()
+ file_reader.close()
pages_files = {}
pages_index = {}
diff --git a/mindsdb/interfaces/functions/controller.py b/mindsdb/interfaces/functions/controller.py
index 6503e6af402..7f63fa8b2de 100644
--- a/mindsdb/interfaces/functions/controller.py
+++ b/mindsdb/interfaces/functions/controller.py
@@ -4,7 +4,6 @@
from duckdb.typing import BIGINT, DOUBLE, VARCHAR, BLOB, BOOLEAN
from mindsdb.interfaces.storage.model_fs import HandlerStorage
-from mindsdb.integrations.libs.llm.utils import get_llm_config
from mindsdb.utilities.config import config
@@ -140,10 +139,7 @@ def llm_call_function(self, node):
try:
from mindsdb.interfaces.knowledge_base.llm_client import LLMClient
- llm_config = get_llm_config(chat_model_params["provider"], chat_model_params)
- chat_model_params = llm_config.model_dump(by_alias=True)
- chat_model_params = {k: v for k, v in chat_model_params.items() if v is not None}
-
+ chat_model_params.pop("api_keys", None)
llm = LLMClient(chat_model_params, session=self.session)
except Exception as e:
raise RuntimeError(f"Unable to use LLM function, check ENV variables: {e}") from e
diff --git a/mindsdb/interfaces/jobs/jobs_controller.py b/mindsdb/interfaces/jobs/jobs_controller.py
index 31382daedf1..5c85372ffb1 100644
--- a/mindsdb/interfaces/jobs/jobs_controller.py
+++ b/mindsdb/interfaces/jobs/jobs_controller.py
@@ -16,6 +16,7 @@
from mindsdb.interfaces.database.projects import ProjectController
from mindsdb.interfaces.query_context.context_controller import query_context_controller
from mindsdb.interfaces.database.log import LogDBController
+from mindsdb.integrations.libs.response import TableResponse
from mindsdb.utilities import log
@@ -346,9 +347,9 @@ def get_history(self, name: str, project_name: str) -> List[dict]:
],
),
)
- response = logs_db_controller.query(query)
+ response: TableResponse = logs_db_controller.query(query)
- names = [i["name"] for i in response.columns]
+ names = [i.name for i in response.columns]
return response.data_frame[names].to_dict(orient="records")
diff --git a/mindsdb/interfaces/knowledge_base/controller.py b/mindsdb/interfaces/knowledge_base/controller.py
index 4e551024225..d158917f72c 100644
--- a/mindsdb/interfaces/knowledge_base/controller.py
+++ b/mindsdb/interfaces/knowledge_base/controller.py
@@ -1,4 +1,3 @@
-import os
import copy
from typing import Dict, List, Optional, Any, Text, Tuple, Union
import json
@@ -6,7 +5,7 @@
import pandas as pd
import numpy as np
-from pydantic import BaseModel, ValidationError
+from pydantic import BaseModel
from sqlalchemy.orm.attributes import flag_modified
from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
@@ -17,45 +16,36 @@
import mindsdb.interfaces.storage.db as db
from mindsdb.integrations.libs.vectordatabase_handler import (
- DistanceFunction,
TableField,
VectorStoreHandler,
)
from mindsdb.integrations.utilities.handler_utils import get_api_key
from mindsdb.integrations.utilities.handlers.auth_utilities.snowflake import get_validated_jwt
-from mindsdb.integrations.utilities.rag.settings import RerankerMode
-
-from mindsdb.interfaces.agents.utils.constants import DEFAULT_EMBEDDINGS_MODEL_PROVIDER, MAX_INSERT_BATCH_SIZE
+from mindsdb.interfaces.agents.utils.constants import MAX_INSERT_BATCH_SIZE
from mindsdb.interfaces.database.projects import ProjectController
from mindsdb.interfaces.knowledge_base.preprocessing.models import PreprocessingConfig, Document
from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import PreprocessorFactory
from mindsdb.interfaces.knowledge_base.evaluate import EvaluateBase
from mindsdb.interfaces.knowledge_base.executor import KnowledgeBaseQueryExecutor
+from mindsdb.interfaces.knowledge_base.default_storage_resolver import resolve_default_storage_engines
from mindsdb.interfaces.model.functions import PredictorRecordNotFound
from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
-from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
from mindsdb.utilities.config import config
from mindsdb.utilities.context import context as ctx
-from mindsdb.interfaces.agents.utils.pydantic_ai_model_factory import get_llm_provider
-from mindsdb.interfaces.knowledge_base.llm_wrapper import create_chat_model
+from mindsdb.utilities.utils import validate_pydantic_params
+from mindsdb.utilities import log
from mindsdb.api.executor.command_executor import ExecuteCommands
from mindsdb.api.executor.utilities.sql import query_df
-from mindsdb.utilities import log
+from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
+from mindsdb.integrations.utilities.rag.settings import RerankerMode, RerankerConfig
from mindsdb.integrations.utilities.rag.rerankers.base_reranker import BaseLLMReranker, ListwiseLLMReranker
from mindsdb.interfaces.knowledge_base.llm_client import LLMClient
logger = log.getLogger(__name__)
-def _require_agent_extra(feature: str):
- if create_chat_model is None:
- raise ImportError(
- f"{feature} requires the optional agent dependencies. Install them via `pip install mindsdb[kb]`."
- )
-
-
class KnowledgeBaseInputParams(BaseModel):
metadata_columns: List[str] | None = None
content_columns: List[str] | None = None
@@ -93,37 +83,10 @@ def get_model_params(model_params: dict, default_config_key: str):
return combined_model_params
-def adapt_embedding_model_params(embedding_model_params: dict):
- """
- Prepare parameters for embedding model.
- """
- params_copy = copy.deepcopy(embedding_model_params)
- provider = params_copy.pop("provider", None).lower()
- api_key = get_api_key(provider, params_copy, strict=False) or params_copy.get("api_key")
- # Underscores are replaced because the provider name ultimately gets mapped to a class name.
- # This is mostly to support Azure OpenAI (azure_openai); the mapped class name is 'AzureOpenAIEmbeddings'.
- params_copy["class"] = provider.replace("_", "")
- if provider == "azure_openai":
- # Azure OpenAI expects the api_key to be passed as 'openai_api_key'.
- params_copy["openai_api_key"] = api_key
- params_copy["azure_endpoint"] = params_copy.pop("base_url")
- if "chunk_size" not in params_copy:
- params_copy["chunk_size"] = 2048
- if "api_version" in params_copy:
- params_copy["openai_api_version"] = params_copy["api_version"]
- else:
- params_copy[f"{provider}_api_key"] = api_key
- params_copy.pop("api_key", None)
- params_copy["model"] = params_copy.pop("model_name", None)
-
- return params_copy
-
-
def get_reranking_model_from_params(reranking_model_params: dict):
"""
Create reranking model from parameters.
"""
- from mindsdb.integrations.utilities.rag.settings import RerankerConfig
# Work on a copy; do not mutate caller's dict
params_copy = copy.deepcopy(reranking_model_params)
@@ -179,7 +142,7 @@ def rotate_provider_api_key(params):
:param params: input params, can be modified by this function
:return: a new api key if it is refreshed
"""
- provider = params.get("provider").lower()
+ provider = params.get("provider", "").lower()
if provider == "snowflake":
if "snowflake_account_id" in params:
@@ -726,30 +689,6 @@ def delete_query(self, query: Delete):
self.addapt_conditions_columns(conditions)
db_handler.dispatch_delete(query, conditions)
- def hybrid_search(
- self,
- query: str,
- keywords: List[str] = None,
- metadata: Dict[str, str] = None,
- distance_function=DistanceFunction.COSINE_DISTANCE,
- ) -> pd.DataFrame:
- query_df = pd.DataFrame.from_records([{TableField.CONTENT.value: query}])
- embeddings_df = self._df_to_embeddings(query_df)
- if embeddings_df.empty:
- return pd.DataFrame([])
- embeddings = embeddings_df.iloc[0][TableField.EMBEDDINGS.value]
- keywords_query = None
- if keywords is not None:
- keywords_query = " ".join(keywords)
- db_handler = self.get_vector_db()
- return db_handler.hybrid_search(
- self._kb.vector_database_table,
- embeddings,
- query=keywords_query,
- metadata=metadata,
- distance_function=distance_function,
- )
-
def clear(self):
"""
Clear data in KB table
@@ -1102,91 +1041,6 @@ def _content_to_embeddings(self, content: str) -> List[float]:
res = self._df_to_embeddings(df)
return res[TableField.EMBEDDINGS.value][0]
- @staticmethod
- def call_litellm_embedding(session, model_params, messages):
- args = copy.deepcopy(model_params)
-
- if "model_name" not in args:
- raise ValueError("'model_name' must be provided for embedding model")
-
- llm_model = args.pop("model_name")
- engine = args.pop("provider")
-
- module = session.integration_controller.get_handler_module("litellm")
- if module is None or module.Handler is None:
- raise ValueError(f'Unable to use "{engine}" provider. Litellm handler is not installed')
- return module.Handler.embeddings(engine, llm_model, messages, args)
-
- def build_rag_pipeline(self, retrieval_config: dict):
- """
- Builds a RAG pipeline with returned sources
-
- Args:
- retrieval_config: dict with retrieval config
-
- Returns:
- RAG: Configured RAG pipeline instance
-
- Raises:
- ValueError: If the configuration is invalid or required components are missing
- """
- # Get embedding model from knowledge base
- from mindsdb.interfaces.knowledge_base.embedding_model_utils import construct_embedding_model_from_args
- from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
- from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
-
- embedding_model_params = get_model_params(self._kb.params.get("embedding_model", {}), "default_embedding_model")
- if self._kb.embedding_model:
- # Extract embedding model args from knowledge base table
- embedding_args = self._kb.embedding_model.learn_args.get("using", {})
- # Construct the embedding model directly
- embeddings_model = construct_embedding_model_from_args(embedding_args, session=self.session)
- logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
- elif embedding_model_params:
- embeddings_model = construct_embedding_model_from_args(
- adapt_embedding_model_params(embedding_model_params), session=self.session
- )
- logger.debug(f"Using knowledge base embedding model from params: {self._kb.params['embedding_model']}")
- else:
- # Use default embedding model with default provider
- # Default to OpenAI's text-embedding-3-small for OpenAI provider, otherwise let the provider choose
- default_model_name = "text-embedding-3-small" if DEFAULT_EMBEDDINGS_MODEL_PROVIDER == "openai" else None
- default_embedding_args = {
- "provider": DEFAULT_EMBEDDINGS_MODEL_PROVIDER,
- }
- if default_model_name:
- default_embedding_args["model_name"] = default_model_name
- embeddings_model = construct_embedding_model_from_args(default_embedding_args, session=self.session)
- logger.debug(
- f"Using default embedding model ({DEFAULT_EMBEDDINGS_MODEL_PROVIDER}) as knowledge base has no embedding model"
- )
-
- # Update retrieval config with knowledge base parameters
- kb_params = {"vector_store_config": {"kb_table": self}}
-
- # Load and validate config
- try:
- rag_config = load_rag_config(retrieval_config, kb_params, embeddings_model)
-
- # Build LLM if specified
- if "llm_model_name" in rag_config:
- llm_args = {"model_name": rag_config.llm_model_name}
- if not rag_config.llm_provider:
- llm_args["provider"] = get_llm_provider(llm_args)
- else:
- llm_args["provider"] = rag_config.llm_provider
- _require_agent_extra("Building knowledge base retrieval pipelines")
- rag_config.llm = create_chat_model(llm_args)
-
- # Create RAG pipeline
- rag = RAG(rag_config)
- logger.debug(f"RAG pipeline created with config: {rag_config}")
- return rag
-
- except Exception as e:
- logger.exception("Error building RAG pipeline:")
- raise ValueError(f"Failed to build RAG pipeline: {str(e)}") from e
-
def _parse_metadata(self, base_metadata):
"""Helper function to robustly parse metadata string to dict"""
if isinstance(base_metadata, dict):
@@ -1207,36 +1061,6 @@ def _generate_document_id(self, content: str, content_column: str, provided_id:
return generate_document_id(content=content, provided_id=provided_id)
- def _convert_metadata_value(self, value):
- """
- Convert metadata value to appropriate Python type.
-
- Args:
- value: The value to convert
-
- Returns:
- Converted value in appropriate Python type
- """
- if pd.isna(value):
- return None
-
- # Handle pandas/numpy types
- if pd.api.types.is_datetime64_any_dtype(value) or isinstance(value, pd.Timestamp):
- return str(value)
- elif pd.api.types.is_integer_dtype(type(value)):
- return int(value)
- elif pd.api.types.is_float_dtype(type(value)):
- return float(value)
- elif pd.api.types.is_bool_dtype(type(value)):
- return bool(value)
-
- # Handle basic Python types
- if isinstance(value, (int, float, bool)):
- return value
-
- # Convert everything else to string
- return str(value)
-
def create_index(self, params: dict = None):
"""
Create an index on the knowledge base table
@@ -1258,26 +1082,6 @@ class KnowledgeBaseController:
def __init__(self, session) -> None:
self.session = session
- def _check_kb_input_params(self, params):
- # check names and types KB params
- try:
- KnowledgeBaseInputParams.model_validate(params)
- except ValidationError as e:
- problems = []
- for error in e.errors():
- parameter = ".".join([str(i) for i in error["loc"]])
- param_type = error["type"]
- if param_type == "extra_forbidden":
- msg = f"Parameter '{parameter}' is not allowed"
- else:
- msg = f"Error in '{parameter}' (type: {param_type}): {error['msg']}. Input: {repr(error['input'])}"
- problems.append(msg)
-
- msg = "\n".join(problems)
- if len(problems) > 1:
- msg = "\n" + msg
- raise ValueError(f"Problem with knowledge base parameters: {msg}") from e
-
def add(
self,
name: str,
@@ -1299,10 +1103,9 @@ def add(
# Validate preprocessing config first if provided
if preprocessing_config is not None:
PreprocessingConfig(**preprocessing_config) # Validate before storing
- params = params or {}
params["preprocessing"] = preprocessing_config
- self._check_kb_input_params(params)
+ validate_pydantic_params(params, KnowledgeBaseInputParams, "knowledge base")
# Check if vector_size is provided when using sparse vectors
is_sparse = params.get("is_sparse")
@@ -1322,6 +1125,9 @@ def add(
raise EntityExistsError("Knowledge base already exists", name)
embedding_params = get_model_params(params.get("embedding_model", {}), "default_embedding_model")
+ if not bool(embedding_params):
+ raise ValueError("No embedding model parameters provided")
+
params["embedding_model"] = embedding_params
rotate_provider_api_key(embedding_params)
@@ -1349,24 +1155,12 @@ def add(
# search for the vector database table
if storage is None:
- cloud_pg_vector = os.environ.get("KB_PGVECTOR_URL")
- if cloud_pg_vector:
- vector_table_name = name
- # Add sparse vector support for pgvector
- vector_db_params = {}
- # Check both explicit parameter and model configuration
- if is_sparse:
- vector_db_params["is_sparse"] = True
- if vector_size is not None:
- vector_db_params["vector_size"] = vector_size
- vector_db_name = self._create_persistent_pgvector(vector_db_params)
- params["default_vector_storage"] = vector_db_name
- else:
- raise ValueError(
- "Vector table is not defined. Set it by `storage=vector_db.vector_table`. "
- "One of the options is to use pgvector: "
- "https://docs.mindsdb.com/integrations/vector-db-integrations/pgvector"
- )
+ vector_db_name, vector_table_name = self._resolve_default_vector_storage(
+ kb_name=name,
+ is_sparse=is_sparse,
+ vector_size=vector_size,
+ )
+ params["default_vector_storage"] = vector_db_name
elif len(storage.parts) != 2:
raise ValueError("Storage param has to be vector db with table")
else:
@@ -1455,7 +1249,7 @@ def update(
params = params or {}
params["preprocessing"] = preprocessing_config
- self._check_kb_input_params(params)
+ validate_pydantic_params(params, KnowledgeBaseInputParams, "knowledge base")
# get project id
project = self.session.database_controller.get_project(project_name)
@@ -1544,21 +1338,44 @@ def _create_persistent_pgvector(self, params=None):
self.session.integration_controller.add(vector_store_name, "pgvector", params or {})
return vector_store_name
- def _create_persistent_chroma(self, kb_name, engine="chromadb"):
- """Create default vector database for knowledge base, if not specified"""
-
- vector_store_name = f"{kb_name}_{engine}"
-
- vector_store_folder_name = f"{vector_store_name}"
- connection_args = {"persist_directory": vector_store_folder_name}
+ def _create_persistent_faiss(self, kb_name: str):
+ vector_store_name = f"store_{kb_name}"
# check if exists
if self.session.integration_controller.get(vector_store_name):
return vector_store_name
- self.session.integration_controller.add(vector_store_name, engine, connection_args)
+ self.session.integration_controller.add(vector_store_name, "duckdb_faiss", {})
return vector_store_name
+ def _resolve_default_vector_storage(self, kb_name: str, is_sparse: bool = False, vector_size: int = None):
+ resolved_storage = resolve_default_storage_engines(config)
+ default_engine = resolved_storage["default_storage"]
+
+ if default_engine is None:
+ raise ValueError(
+ "Vector table is not defined. Set it by `storage=vector_db.vector_table` or configure "
+ "`knowledge_bases.storage` as one of: pgvector, faiss."
+ )
+
+ if default_engine == "pgvector":
+ vector_db_params = {}
+ if is_sparse:
+ vector_db_params["is_sparse"] = True
+ if vector_size is not None:
+ vector_db_params["vector_size"] = vector_size
+ vector_db_name = self._create_persistent_pgvector(vector_db_params)
+ return vector_db_name, kb_name
+
+ if default_engine in ("duckdb_faiss", "faiss"):
+ vector_db_name = self._create_persistent_faiss(kb_name)
+ return vector_db_name, kb_name
+
+ raise ValueError(
+ f"Automatic default storage creation is not supported for engine '{default_engine}'. "
+ "Set `storage=vector_db.vector_table` explicitly."
+ )
+
def _check_embedding_model(self, project_name, params: dict = None, kb_name="") -> dict:
"""check embedding model for knowledge base, return embedding model info"""
@@ -1581,15 +1398,15 @@ def _check_embedding_model(self, project_name, params: dict = None, kb_name="")
f"Wrong embedding provider: {params['provider']}. Available providers: {', '.join(avail_providers)}"
)
- llm_client = LLMClient(params, session=self.session)
-
try:
+ llm_client = LLMClient(params, session=self.session)
+
resp = llm_client.embeddings(["test"])
return {"dimension": len(resp[0])}
except Exception as e:
raise RuntimeError(f"Problem with embedding model config: {e}") from e
- def delete(self, name: str, project_name: int, if_exists: bool = False) -> None:
+ def delete(self, name: str, project_name: str, if_exists: bool = False) -> None:
"""
Delete a knowledge base from the database
"""
@@ -1708,3 +1525,26 @@ def evaluate(self, table_name: str, project_name: str, params: dict = None) -> p
scores = EvaluateBase.run(self.session, kb_table, params)
return scores
+
+ def release_lock(self, knowledge_base: Identifier, project_name):
+ # works only for FAISS dbs.
+ # if FAISS vector db is used in KB: remove this db from handlers cache.
+ # it will clear internal cache of tables in faiss handler and release locks for faiss files
+ # return unloaded database name
+
+ if len(knowledge_base.parts) > 1:
+ project_name, kb_name = knowledge_base.parts[-2:]
+ else:
+ kb_name = knowledge_base.parts[-1]
+
+ project_id = self.session.database_controller.get_project(project_name).id
+ kb = self.get(kb_name, project_id)
+ if kb is None or kb.vector_database_id is None:
+ return
+ database = db.Integration.query.get(kb.vector_database_id)
+ if database is None:
+ return
+
+ if database.engine == "duckdb_faiss":
+ self.session.integration_controller.handlers_cache.delete(database.name)
+ return database.name
diff --git a/mindsdb/interfaces/knowledge_base/default_storage_resolver.py b/mindsdb/interfaces/knowledge_base/default_storage_resolver.py
new file mode 100644
index 00000000000..93a4c364054
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/default_storage_resolver.py
@@ -0,0 +1,90 @@
+import os
+from typing import Any
+
+from mindsdb.utilities.config import config
+
+
+def _normalize_engine_name(engine: str | None) -> str | None:
+ if engine is None:
+ return None
+ normalized = engine.strip().lower()
+ if normalized in ("duckdb_faiss", "faiss"):
+ return "faiss"
+ if normalized == "pgvector":
+ return "pgvector"
+ return normalized or None
+
+
+def _get_env_available_engines() -> list[str]:
+ engines: list[str] = ["faiss"]
+ if os.environ.get("KB_PGVECTOR_URL"):
+ engines.append("pgvector")
+ return engines
+
+
+def get_env_available_engines() -> list[str]:
+ return _get_env_available_engines()
+
+
+def get_knowledge_base_storage_config(config_obj=None) -> str | None:
+ config_obj = config_obj or config
+ storage = config_obj.get("knowledge_bases", {}).get("storage", None)
+
+ if storage is None:
+ return None
+
+ if isinstance(storage, list):
+ if len(storage) == 0:
+ return None
+ storage = storage[0]
+
+ if not isinstance(storage, str):
+ raise ValueError("knowledge_bases.storage must be a string value")
+
+ return _normalize_engine_name(storage)
+
+
+def _unique_default_first(default: str | None, ordered: list[str]) -> list[str]:
+ """Return `ordered` with `default` first if set, dropping later duplicates."""
+ out: list[str] = []
+ seen: set[str] = set()
+ for engine in ([default] if default else []) + ordered:
+ if engine not in seen:
+ seen.add(engine)
+ out.append(engine)
+ return out
+
+
+def resolve_default_storage_engines(config_obj=None) -> dict[str, Any]:
+ configured = get_knowledge_base_storage_config(config_obj)
+ pgvector_enabled = os.environ.get("KB_PGVECTOR_URL") is not None
+ available = _get_env_available_engines()
+
+ if configured and configured not in available:
+ available = [configured, *available]
+
+ default = configured
+ if default is None:
+ default = "pgvector" if pgvector_enabled else None
+ if default is None and available:
+ default = available[0]
+
+ candidates = _unique_default_first(default, available)
+ available_set = set(available)
+ resolved_storage = [
+ {
+ "engine": name,
+ "available": name in available_set,
+ "default": name == default,
+ "source": "config" if configured == name else "fallback",
+ }
+ for name in candidates
+ ]
+
+ return {
+ "storage": configured,
+ "resolved_storage": resolved_storage,
+ "default_storage": default,
+ "available_vector_engines": available,
+ "pgvector_enabled": pgvector_enabled,
+ }
diff --git a/mindsdb/interfaces/knowledge_base/embedding_model_utils.py b/mindsdb/interfaces/knowledge_base/embedding_model_utils.py
deleted file mode 100644
index f8f151d7863..00000000000
--- a/mindsdb/interfaces/knowledge_base/embedding_model_utils.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Custom embedding model utilities to replace langchain construct_model_from_args"""
-
-import copy
-from typing import Dict, Any, List
-
-from mindsdb.interfaces.knowledge_base.llm_client import LLMClient
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class CustomEmbeddingModel:
- """
- Custom embedding model wrapper that uses LLMClient for embeddings.
- This replaces langchain embedding models for use in knowledge_base.
- """
-
- def __init__(self, args: Dict[str, Any], session=None):
- """
- Initialize the embedding model
-
- Args:
- args: Dictionary with model parameters (model_name, provider, etc.)
- session: Optional session for LLMClient
- """
- # Prepare params for LLMClient
- # Handle model_name -> model mapping if needed
- params = {
- "model_name": args.get("model", args.get("model_name")),
- "provider": args.get("provider", "openai"),
- **{k: v for k, v in args.items() if k not in ["model", "model_name", "provider", "class", "target"]},
- }
-
- self.llm_client = LLMClient(params=params, session=session)
- self.model_name = params["model_name"]
-
- def embed_query(self, text: str) -> List[float]:
- """
- Embed a single query string
-
- Args:
- text: Text to embed
-
- Returns:
- List of floats representing the embedding vector
- """
- embeddings = self.llm_client.embeddings([text])
- return embeddings[0] if embeddings else []
-
- def embed_documents(self, texts: List[str]) -> List[List[float]]:
- """
- Embed a list of documents
-
- Args:
- texts: List of text strings to embed
-
- Returns:
- List of embedding vectors (each is a list of floats)
- """
- return self.llm_client.embeddings(texts)
-
-
-def construct_embedding_model_from_args(args: Dict[str, Any], session=None):
- """
- Construct an embedding model from arguments (replacement for langchain's construct_model_from_args)
-
- Args:
- args: Dictionary with embedding model parameters
- - class: Embedding class name (for compatibility, but not used)
- - model or model_name: Model name to use
- - provider: Provider name (openai, etc.)
- - Other provider-specific parameters
- session: Optional session for LLMClient
-
- Returns:
- CustomEmbeddingModel instance
- """
- # Work on a copy to avoid mutating the original
- args_copy = copy.deepcopy(args)
-
- # Extract class name for logging (but we don't use it)
- class_name = args_copy.pop("class", "OpenAIEmbeddings")
- target = args_copy.pop("target", None)
-
- logger.debug(f"Constructing embedding model with class: {class_name}, args: {args_copy}")
-
- # Create the custom embedding model
- model = CustomEmbeddingModel(args_copy, session=session)
-
- # Restore args for compatibility (in case caller expects them)
- if target is not None:
- args["target"] = target
- args["class"] = class_name
-
- return model
diff --git a/mindsdb/interfaces/knowledge_base/llm_client.py b/mindsdb/interfaces/knowledge_base/llm_client.py
index ab044811b94..61591b62c17 100644
--- a/mindsdb/interfaces/knowledge_base/llm_client.py
+++ b/mindsdb/interfaces/knowledge_base/llm_client.py
@@ -7,6 +7,10 @@
from mindsdb.integrations.utilities.handler_utils import get_api_key
+from mindsdb.interfaces.knowledge_base.providers.bedrock import BedrockClient
+from mindsdb.interfaces.knowledge_base.providers.gemini import GeminiClient
+from mindsdb.interfaces.knowledge_base.providers.snowflake import SnowflakeClient
+
def retry_with_exponential_backoff(func):
def decorator(*args, **kwargs):
@@ -60,22 +64,23 @@ def wrapper(self, messages, *args, **kwargs):
class LLMClient:
"""
Class for accession to LLM.
- It chooses openai client or litellm handler depending on the config
+ It chooses provider client depending on the config
"""
def __init__(self, params: dict = None, session=None):
self._session = session
- self.params = params
+ params = params.copy()
- self.provider = params.get("provider", "openai")
+ self.provider = params.pop("provider", "openai")
+ self.model_name = params.pop("model_name")
+ if self.provider == "google":
+ self.provider = "gemini"
if "api_key" not in params:
api_key = get_api_key(self.provider, params, strict=False)
if api_key is not None:
params["api_key"] = api_key
- self.engine = "openai"
-
if self.provider == "azure_openai":
azure_api_key = params.get("api_key") or os.getenv("AZURE_OPENAI_API_KEY")
azure_api_endpoint = params.get("base_url") or os.environ.get("AZURE_OPENAI_ENDPOINT")
@@ -91,62 +96,46 @@ def __init__(self, params: dict = None, session=None):
kwargs["base_url"] = base_url
self.client = OpenAI(**kwargs)
elif self.provider == "ollama":
- kwargs = params.copy()
- kwargs.pop("model_name")
- kwargs.pop("provider", None)
- if kwargs.get("api_key") is None:
- kwargs["api_key"] = "n/a"
- self.client = OpenAI(**kwargs)
+ if params.get("api_key") is None:
+ params["api_key"] = "n/a"
+ self.client = OpenAI(**params)
+ elif self.provider == "bedrock":
+ if "aws_region" in params:
+ params["aws_region_name"] = params.pop("aws_region")
+ self.client = BedrockClient(**params)
+ elif self.provider == "gemini":
+ self.client = GeminiClient(**params)
+ elif self.provider == "snowflake":
+ self.client = SnowflakeClient(**params)
else:
- # try to use litellm
- if self._session is None:
- from mindsdb.api.executor.controllers.session_controller import SessionController
-
- self._session = SessionController()
- module = self._session.integration_controller.get_handler_module("litellm")
-
- if module is None or module.Handler is None:
- raise ValueError(f'Unable to use "{self.provider}" provider. Litellm handler is not installed')
-
- self.client = module.Handler
- self.engine = "litellm"
+ raise NotImplementedError(f'Provider "{self.provider}" is not supported')
@run_in_batches(1000)
@retry_with_exponential_backoff
def embeddings(self, messages: List[str]):
- params = self.params
- if self.engine == "openai":
+ if self.provider in ("openai", "azure_openai", "ollama"):
response = self.client.embeddings.create(
- model=params["model_name"],
+ model=self.model_name,
input=messages,
)
return [item.embedding for item in response.data]
else:
- kwargs = params.copy()
- model = kwargs.pop("model_name")
- kwargs.pop("provider", None)
-
- return self.client.embeddings(self.provider, model=model, messages=messages, args=kwargs)
+ return self.client.embeddings(self.model_name, messages)
@run_in_batches(100)
def completion(self, messages: List[dict], json_output: bool = False) -> List[str]:
"""
Call LLM completion and get response
"""
- params = self.params
- params["json_output"] = json_output
- if self.engine == "openai":
+
+ if self.provider in ("openai", "azure_openai", "ollama"):
response = self.client.chat.completions.create(
- model=params["model_name"],
+ model=self.model_name,
messages=messages,
)
return [item.message.content for item in response.choices]
else:
- kwargs = params.copy()
- model = kwargs.pop("model_name")
- kwargs.pop("provider", None)
- response = self.client.completion(self.provider, model=model, messages=messages, args=kwargs)
- return [item.message.content for item in response.choices]
+ return [self.client.completion(self.model_name, messages)]
async def abatch(self, messages_list: List[List[dict]], json_output: bool = False) -> List[List[str]]:
"""
diff --git a/mindsdb/interfaces/knowledge_base/preprocessing/constants.py b/mindsdb/interfaces/knowledge_base/preprocessing/constants.py
index 0e984c2c67a..47332c37b8b 100644
--- a/mindsdb/interfaces/knowledge_base/preprocessing/constants.py
+++ b/mindsdb/interfaces/knowledge_base/preprocessing/constants.py
@@ -1,13 +1,4 @@
-# Default settings for markdown header splitting
-DEFAULT_MARKDOWN_HEADERS = [
- ("#", "Header 1"),
- ("##", "Header 2"),
- ("###", "Header 3"),
-]
-
# Limits for web crawling
DEFAULT_CRAWL_DEPTH = None
DEFAULT_WEB_CRAWL_LIMIT = 1
DEFAULT_WEB_FILTERS = []
-
-DEFAULT_CONTEXT_DOCUMENT_LIMIT = 50
diff --git a/mindsdb/interfaces/knowledge_base/providers/__init__.py b/mindsdb/interfaces/knowledge_base/providers/__init__.py
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/providers/__init__.py
@@ -0,0 +1 @@
+
diff --git a/mindsdb/interfaces/knowledge_base/providers/bedrock.py b/mindsdb/interfaces/knowledge_base/providers/bedrock.py
new file mode 100644
index 00000000000..2f50c5c4c87
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/providers/bedrock.py
@@ -0,0 +1,147 @@
+import json
+from typing import Dict, List, Optional
+
+
+def prepare_conversation(messages: List[dict]) -> List[dict]:
+ """Convert chat messages to Bedrock `converse` message payload format."""
+ conversation = []
+ for message in messages:
+ content = message["content"]
+ role = message["role"]
+ if role == "system":
+ role = "assistant"
+ if role != "user":
+ if len(conversation) == 0:
+ # the first message has to be user message
+ content = message["role"] + ":\n" + content
+ role = "user"
+
+ conversation.append(
+ {
+ "role": role,
+ "content": [{"text": content}],
+ }
+ )
+ return conversation
+
+
+class AsyncBedrockClient:
+ """Async Bedrock runtime client wrapper"""
+
+ def __init__(
+ self,
+ aws_access_key_id: Optional[str] = None,
+ aws_secret_access_key: Optional[str] = None,
+ aws_region_name: Optional[str] = None,
+ aws_session_token: Optional[str] = None,
+ ):
+ try:
+ from aiobotocore.session import get_session
+ except ImportError as exc:
+ raise ImportError(
+ "aiobotocore is required for the Bedrock reranker client. Install it with `pip install aiobotocore`."
+ ) from exc
+
+ self.aws_access_key_id = aws_access_key_id
+ self.aws_secret_access_key = aws_secret_access_key
+ self.aws_session_token = aws_session_token
+ self.region_name = aws_region_name
+
+ self._session = get_session()
+
+ async def acompletion(
+ self,
+ model_name: str,
+ messages: List[dict],
+ temperature: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ ) -> str:
+ """Generate a chat completion asynchronously via Bedrock."""
+ inference_config = {}
+ if temperature is not None:
+ inference_config["temperature"] = temperature
+ if max_tokens is not None:
+ inference_config["max_tokens"] = max_tokens
+ if top_p is not None:
+ inference_config["top_p"] = top_p
+
+ conversation = prepare_conversation(messages)
+
+ # Create client with credentials
+ client_kwargs = {
+ "service_name": "bedrock-runtime",
+ "region_name": self.region_name,
+ "aws_access_key_id": self.aws_access_key_id,
+ "aws_secret_access_key": self.aws_secret_access_key,
+ "aws_session_token": self.aws_session_token,
+ }
+
+ async with self._session.create_client(**client_kwargs) as client:
+ response = await client.converse(
+ modelId=model_name, messages=conversation, inferenceConfig=inference_config
+ )
+
+ return response["output"]["message"]["content"][0]["text"]
+
+
+class BedrockClient:
+ """Synchronous Bedrock runtime client wrapper"""
+
+ def __init__(
+ self,
+ aws_access_key_id: Optional[str] = None,
+ aws_secret_access_key: Optional[str] = None,
+ aws_region_name: Optional[str] = None,
+ aws_session_token: Optional[str] = None,
+ ):
+ try:
+ import boto3
+ except ImportError as exc:
+ raise ImportError("boto3 is required for the Bedrock client. Install it with `pip install boto3`.") from exc
+
+ self.client = boto3.client(
+ "bedrock-runtime",
+ aws_access_key_id=aws_access_key_id,
+ aws_secret_access_key=aws_secret_access_key,
+ aws_session_token=aws_session_token,
+ region_name=aws_region_name,
+ )
+
+ def embeddings(self, model_name: str, messages: List[str]) -> List[List[float]]:
+ """Request embedding vectors for each text in `messages`."""
+ embeddings = []
+ for message in messages:
+ native_request = {"inputText": message}
+ request = json.dumps(native_request)
+
+ response = self.client.invoke_model(modelId=model_name, body=request)
+ model_response = json.loads(response["body"].read())
+
+ # Extract and print the generated embedding and the input text token count.
+ embeddings.append(model_response["embedding"])
+
+ return embeddings
+
+ def completion(
+ self,
+ model_name: str,
+ messages: List[dict],
+ temperature: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ ) -> str:
+ """Generate a chat completion synchronously via Bedrock."""
+ inference_config: Dict[str, float | int] = {}
+ if temperature is not None:
+ inference_config["temperature"] = temperature
+ if max_tokens is not None:
+ inference_config["max_tokens"] = max_tokens
+ if top_p is not None:
+ inference_config["top_p"] = top_p
+
+ conversation = prepare_conversation(messages)
+
+ response = self.client.converse(modelId=model_name, messages=conversation, inferenceConfig=inference_config)
+
+ return response["output"]["message"]["content"][0]["text"]
diff --git a/mindsdb/interfaces/knowledge_base/providers/gemini.py b/mindsdb/interfaces/knowledge_base/providers/gemini.py
new file mode 100644
index 00000000000..33e2bc314d0
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/providers/gemini.py
@@ -0,0 +1,79 @@
+from typing import Any, List, Optional
+
+
+class GeminiClient:
+ """Wrapper around google-genai SDK"""
+
+ def __init__(self, api_key: str):
+ try:
+ from google import genai
+ from google.genai import types
+ except ImportError as exc:
+ raise ImportError("google.genai is required. Install it with `pip install google-genai`.") from exc
+
+ self.client = genai.Client(api_key=api_key)
+ self.types = types
+
+ def embeddings(self, model_name: str, messages: List[str]) -> List[List[float]]:
+ """Generate embedding vectors for each text in `messages`."""
+ result = self.client.models.embed_content(model=model_name, contents=messages)
+
+ return [item.values for item in result.embeddings]
+
+ def _prepare_messages(self, messages: List[dict]) -> List[Any]:
+ """Convert chat messages into google-genai content payloads."""
+ contents = []
+ for message in messages:
+ role = message["role"]
+ # system role is not supported
+ if role != "user":
+ role = "model"
+
+ contents.append(self.types.Content(role=role, parts=[self.types.Part(text=message["content"])]))
+ return contents
+
+ def completion(
+ self,
+ model_name: str,
+ messages: List[dict],
+ temperature: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ ) -> str:
+ """Produce a chat response"""
+ config = {}
+ if temperature:
+ config["temperature"] = temperature
+ if max_tokens:
+ config["max_output_tokens"] = max_tokens
+ if top_p:
+ config["top_p"] = top_p
+
+ contents = self._prepare_messages(messages)
+
+ result = self.client.models.generate_content(model=model_name, contents=contents, config=config)
+
+ return result.text
+
+ async def acompletion(
+ self,
+ model_name: str,
+ messages: List[dict],
+ temperature: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ ) -> str:
+ """Async variant of `completion` using the SDK aio client."""
+ config = {}
+ if temperature:
+ config["temperature"] = temperature
+ if max_tokens:
+ config["max_output_tokens"] = max_tokens
+ if top_p:
+ config["top_p"] = top_p
+
+ contents = self._prepare_messages(messages)
+
+ result = await self.client.aio.models.generate_content(model=model_name, contents=contents, config=config)
+
+ return result.text
diff --git a/mindsdb/interfaces/knowledge_base/providers/snowflake.py b/mindsdb/interfaces/knowledge_base/providers/snowflake.py
new file mode 100644
index 00000000000..7bcc3fc6382
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/providers/snowflake.py
@@ -0,0 +1,118 @@
+from typing import Dict, List, Optional, Union
+
+import requests
+import httpx
+
+
+def _raise_for_status(response: Union[requests.Response, httpx.Response]) -> None:
+ """Raise an informative HTTPError when Snowflake responds with an error."""
+ if 400 <= response.status_code < 600:
+ if hasattr(response, "reason"):
+ reason = response.reason
+ elif hasattr(response, "reason_phrase"):
+ reason = response.reason_phrase
+ else:
+ reason = "Error"
+ raise requests.HTTPError(f"{reason}: {response.text}", response=response)
+
+
+class SnowflakeClient:
+ """Wrapper over Snowflake Cortex REST endpoints."""
+
+ def __init__(self, account_id: Optional[str] = None, api_key: Optional[str] = None):
+ if account_id is None:
+ raise ValueError("account_id must be provided")
+ if api_key is None:
+ raise ValueError("api_key must be provided")
+
+ self.account_id = account_id.lower()
+ self.api_key = api_key
+
+ self.auth_type = "KEYPAIR_JWT"
+ if self.api_key.startswith("pat/"):
+ self.api_key = self.api_key[4:]
+ self.auth_type = "PROGRAMMATIC_ACCESS_TOKEN"
+
+ def _get_base_url(self) -> str:
+ return f"https://{self.account_id}.snowflakecomputing.com/api/v2"
+
+ def _get_headers(self) -> Dict[str, str]:
+ return {
+ "Content-Type": "application/json",
+ "Accept": "application/json",
+ "Authorization": "Bearer " + self.api_key,
+ "X-Snowflake-Authorization-Token-Type": self.auth_type,
+ }
+
+ def embeddings(self, model_name: str, messages: List[str]) -> List[List[float]]:
+ """Request embedding vectors for the provided `messages`."""
+ url = f"{self._get_base_url()}/cortex/inference:embed"
+
+ payload = {"text": messages, "model": model_name}
+
+ response = requests.post(url, json=payload, headers=self._get_headers())
+ _raise_for_status(response)
+
+ embeddings = []
+ for item in response.json()["data"]:
+ embeddings.append(item["embedding"][0])
+ return embeddings
+
+ def completion(
+ self,
+ model_name: str,
+ messages: List[dict],
+ temperature: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ ) -> str:
+ """Generate a chat completion with the Cortex complete endpoint."""
+ url = f"{self._get_base_url()}/cortex/inference:complete"
+
+ payload = {
+ "model": model_name,
+ "stream": False,
+ "messages": messages,
+ }
+
+ if temperature:
+ payload["temperature"] = temperature
+ if max_tokens:
+ payload["max_tokens"] = max_tokens
+ if top_p:
+ payload["top_p"] = top_p
+
+ response = requests.post(url, json=payload, headers=self._get_headers())
+ _raise_for_status(response)
+ data = response.json()
+ return data["choices"][0]["message"]["content"]
+
+ async def acompletion(
+ self,
+ model_name: str,
+ messages: List[dict],
+ temperature: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ ) -> str:
+ """Async variant of `completion` using httpx."""
+ url = f"{self._get_base_url()}/cortex/inference:complete"
+
+ payload = {
+ "model": model_name,
+ "stream": False,
+ "messages": messages,
+ }
+
+ if temperature:
+ payload["temperature"] = temperature
+ if max_tokens:
+ payload["max_tokens"] = max_tokens
+ if top_p:
+ payload["top_p"] = top_p
+
+ async with httpx.AsyncClient() as client:
+ response = await client.post(url, json=payload, headers=self._get_headers())
+ _raise_for_status(response)
+ data = response.json()
+ return data["choices"][0]["message"]["content"]
diff --git a/mindsdb/interfaces/query_context/context_controller.py b/mindsdb/interfaces/query_context/context_controller.py
index 97a1ec83189..08188c8ed66 100644
--- a/mindsdb/interfaces/query_context/context_controller.py
+++ b/mindsdb/interfaces/query_context/context_controller.py
@@ -1,9 +1,9 @@
-from typing import List, Optional, Iterable
import pickle
import datetime as dt
+from typing import List, Optional, Iterable
-from sqlalchemy.orm.attributes import flag_modified
import pandas as pd
+from sqlalchemy.orm.attributes import flag_modified
from mindsdb_sql_parser import Select, Star, OrderBy
@@ -17,7 +17,6 @@
)
from mindsdb.integrations.utilities.query_traversal import query_traversal
from mindsdb.utilities.cache import get_cache
-
from mindsdb.interfaces.storage import db
from mindsdb.utilities.context import context as ctx
from mindsdb.utilities.config import config
@@ -70,14 +69,14 @@ def get_partitions(self, dn, step_call, query: Select) -> Iterable:
:param query: AST query to execute
:return: generator with query results
"""
- if hasattr(dn, "has_support_stream") and dn.has_support_stream():
+ if dn.has_support_stream():
query2 = self.get_partition_query(step_call.current_step_num, query, stream=True)
- for df in dn.query_stream(query2, fetch_size=self.batch_size):
+ response = dn.query(query=query2, session=step_call.session)
+ for df in response.iterate_no_save():
max_track_value = self.get_max_track_value(df)
yield df
self.set_progress(max_track_value=max_track_value)
-
else:
while True:
query2 = self.get_partition_query(step_call.current_step_num, query, stream=False)
@@ -457,7 +456,7 @@ def _get_init_last_values(self, l_query: LastQuery, dn, session) -> dict:
idx = None
for i, col in enumerate(columns_info):
- if col["name"].upper() == info["column_name"].upper():
+ if col.name.upper() == info["column_name"].upper():
idx = i
break
diff --git a/mindsdb/interfaces/query_context/last_query.py b/mindsdb/interfaces/query_context/last_query.py
index 1df233d4405..7e00a08c846 100644
--- a/mindsdb/interfaces/query_context/last_query.py
+++ b/mindsdb/interfaces/query_context/last_query.py
@@ -3,7 +3,17 @@
from collections import defaultdict
from mindsdb_sql_parser.ast import (
- Identifier, Select, BinaryOperation, Last, Constant, Star, ASTNode, NullConstant, OrderBy, Function, TypeCast
+ Identifier,
+ Select,
+ BinaryOperation,
+ Last,
+ Constant,
+ Star,
+ ASTNode,
+ NullConstant,
+ OrderBy,
+ Function,
+ TypeCast,
)
from mindsdb.integrations.utilities.query_traversal import query_traversal
@@ -34,21 +44,21 @@ def __init__(self, query: ASTNode):
def _find_last_columns(self, query: ASTNode) -> Union[dict, None]:
"""
- This function:
- - Searches LAST column in the input query
- - Replaces it with constants and memorises link to these constants
- - Link to constants will be used to inject values to query instead of LAST
- - Provide checks:
- - if it is possible to find the table for column
- - if column in select target
- - Generates and returns last_column variable which is dict
- last_columns[table_name] = {
- 'table':
,
- 'column': ,
- 'links': [, ... ],
- 'target_idx': ,
- 'gen_init_query': if true: to generate query to initial values for LAST
- }
+ This function:
+ - Searches LAST column in the input query
+ - Replaces it with constants and memorises link to these constants
+ - Link to constants will be used to inject values to query instead of LAST
+ - Provide checks:
+ - if it is possible to find the table for column
+ - if column in select target
+ - Generates and returns last_column variable which is dict
+ last_columns[table_name] = {
+ 'table':
,
+ 'column': ,
+ 'links': [, ... ],
+ 'target_idx': ,
+ 'gen_init_query': if true: to generate query to initial values for LAST
+ }
"""
# index last variables in query
@@ -76,7 +86,6 @@ def replace_last_in_tree(node: ASTNode, injected: Constant):
return found
def index_query(node, is_table, parent_query, **kwargs):
-
parent_query_id = id(parent_query)
last = None
if is_table and isinstance(node, Identifier):
@@ -105,13 +114,15 @@ def index_query(node, is_table, parent_query, **kwargs):
if last is not None:
# memorize
- conditions.append({
- 'query_id': parent_query_id,
- 'condition': node,
- 'last': last,
- 'column': col,
- 'gen_init_query': gen_init_query # generate query to fetch initial last values from table
- })
+ conditions.append(
+ {
+ "query_id": parent_query_id,
+ "condition": node,
+ "last": last,
+ "column": col,
+ "gen_init_query": gen_init_query, # generate query to fetch initial last values from table
+ }
+ )
# find lasts
query_traversal(query, index_query)
@@ -122,7 +133,7 @@ def index_query(node, is_table, parent_query, **kwargs):
self.query_orig = copy.deepcopy(query)
for info in conditions:
- self.last_idx[info['query_id']].append(info)
+ self.last_idx[info["query_id"]].append(info)
# index query targets
query_id = id(query)
@@ -152,21 +163,20 @@ def index_query(node, is_table, parent_query, **kwargs):
last_columns = {}
for parent_query_id, items in self.last_idx.items():
for info in items:
- col = info['column']
- last = info['last']
+ col = info["column"]
+ last = info["last"]
tables = tables_idx[parent_query_id]
uniq_tables = len(set([id(v) for v in tables.values()]))
if len(col.parts) > 1:
-
table = tables.get(col.parts[-2])
if table is None:
- raise ValueError('cant find table')
+ raise ValueError("cant find table")
elif uniq_tables == 1:
table = list(tables.values())[0]
else:
# or just skip it?
- raise ValueError('cant find table')
+ raise ValueError("cant find table")
col_name = col.parts[-1]
@@ -179,29 +189,46 @@ def index_query(node, is_table, parent_query, **kwargs):
# will try to get by name
...
else:
- raise ValueError('Last value should be in query target')
+ raise ValueError("Last value should be in query target")
last_columns[table_name] = {
- 'table': table,
- 'column': col_name,
- 'links': [last],
- 'target_idx': target_idx,
- 'gen_init_query': info['gen_init_query']
+ "table": table,
+ "column": col_name,
+ "links": [last],
+ "target_idx": target_idx,
+ "gen_init_query": info["gen_init_query"],
}
- elif last_columns[table_name]['column'] == col_name:
- last_columns[table_name]['column'].append(last)
+ elif last_columns[table_name]["column"] == col_name:
+ last_columns[table_name]["column"].append(last)
else:
- raise ValueError('possible to use only one column')
+ raise ValueError("possible to use only one column")
return last_columns
def to_string(self) -> str:
"""
- String representation of the query
- Used to identify query in query_context table
+ String representation of the query
+ Used to identify query in query_context table
"""
- return self.query_orig.to_string()
+ query = self.query_orig
+ if isinstance(query.from_table, Select) and query.targets == [Star()]:
+ # simplify nested query
+ if (
+ query.group_by is None
+ and query.order_by is None
+ and query.having is None
+ and query.distinct is False
+ and query.where is None
+ and query.limit is None
+ and query.offset is None
+ and query.cte is None
+ ):
+ query = copy.deepcopy(query.from_table)
+ query.parentheses = False
+ query.alias = None
+
+ return query.to_string()
def get_last_columns(self) -> List[dict]:
"""
@@ -210,11 +237,11 @@ def get_last_columns(self) -> List[dict]:
"""
return [
{
- 'table': info['table'],
- 'table_name': table_name,
- 'column_name': info['column'],
- 'target_idx': info['target_idx'],
- 'gen_init_query': info['gen_init_query'],
+ "table": info["table"],
+ "table_name": table_name,
+ "column_name": info["column"],
+ "target_idx": info["target_idx"],
+ "gen_init_query": info["gen_init_query"],
}
for table_name, info in self.last_tables.items()
]
@@ -224,8 +251,8 @@ def apply_values(self, values: dict) -> ASTNode:
Fills query with new values and return it
"""
for table_name, info in self.last_tables.items():
- value = values.get(table_name, {}).get(info['column'])
- for last in info['links']:
+ value = values.get(table_name, {}).get(info["column"])
+ for last in info["links"]:
last.value = value
return self.query
@@ -239,9 +266,9 @@ def get_init_queries(self):
# replace values
for items in self.last_idx.values():
for info in items:
- node = info['condition']
+ node = info["condition"]
back_up_values.append([node.op, node.args[1]])
- node.op = 'is not'
+ node.op = "is not"
node.args[1] = NullConstant()
query2 = copy.deepcopy(self.query)
@@ -249,18 +276,16 @@ def get_init_queries(self):
# return values
for items in self.last_idx.values():
for info in items:
- node = info['condition']
+ node = info["condition"]
op, arg1 = back_up_values.pop(0)
node.op = op
node.args[1] = arg1
for info in self.get_last_columns():
- if not info['gen_init_query']:
+ if not info["gen_init_query"]:
continue
- col = Identifier(info['column_name'])
+ col = Identifier(info["column_name"])
query2.targets = [col]
- query2.order_by = [
- OrderBy(col, direction='DESC')
- ]
+ query2.order_by = [OrderBy(col, direction="DESC")]
query2.limit = Constant(1)
yield query2, info
diff --git a/mindsdb/interfaces/query_context/query_task.py b/mindsdb/interfaces/query_context/query_task.py
index 57cc62d7f81..97cbbdcbf26 100644
--- a/mindsdb/interfaces/query_context/query_task.py
+++ b/mindsdb/interfaces/query_context/query_task.py
@@ -10,7 +10,6 @@ def __init__(self, *args, **kwargs):
self.query_id = self.object_id
def run(self, stop_event):
-
try:
session = SessionController()
SQLQuery(None, query_id=self.query_id, session=session, stop_event=stop_event)
diff --git a/mindsdb/interfaces/tasks/task_thread.py b/mindsdb/interfaces/tasks/task_thread.py
index f753a59928a..8b9eb7ca9e5 100644
--- a/mindsdb/interfaces/tasks/task_thread.py
+++ b/mindsdb/interfaces/tasks/task_thread.py
@@ -23,6 +23,9 @@ def run(self):
# create context and session
task_record = db.Tasks.query.get(self.task_id)
+ if task_record is None:
+ logger.error(f"Task record not found: {self.task_id}")
+ return
ctx.set_default()
ctx.company_id = task_record.company_id
diff --git a/mindsdb/utilities/config.py b/mindsdb/utilities/config.py
index 82a857b00c7..e3fff3bf023 100644
--- a/mindsdb/utilities/config.py
+++ b/mindsdb/utilities/config.py
@@ -13,6 +13,49 @@
# NOTE do not `import from mindsdb` here
+def get_bool_env_var(env_name: str) -> bool:
+ """Read an environment variable and return its value as a boolean.
+
+ Args:
+ env_name (str): name of the environment variable to read.
+
+ Returns:
+ bool: True or False, or None if the variable is not set or empty.
+
+ Raises:
+ ValueError: if the value is set but does not match any known boolean representation.
+ """
+ value = os.environ.get(env_name)
+ if value is None or value == "":
+ return None
+ match value.lower():
+ case "1" | "true" | "on" | "yes" | "y":
+ value = True
+ case "0" | "false" | "off" | "no" | "n":
+ value = False
+ case _:
+ raise ValueError(f"Expected a boolean value for the environment variable '{env_name}', but got '{value}'")
+ return value
+
+
+def get_list_env_var(env_name: str) -> list[str]:
+ """Read an environment variable and return its value as a list of strings.
+
+ The value is expected to be a comma-separated string. Whitespace around
+ each item is stripped, and empty items are ignored.
+
+ Args:
+ env_name (str): name of the environment variable to read.
+
+ Returns:
+ list[str]: list of non-empty strings, or None if the variable is not set or empty.
+ """
+ value = os.environ.get(env_name)
+ if value is None or value.strip() == "":
+ return None
+ return [item.strip() for item in value.split(",") if item.strip()]
+
+
def _merge_key_recursive(target_dict, source_dict, key):
if key not in target_dict:
target_dict[key] = source_dict[key]
@@ -155,6 +198,7 @@ def __new__(cls, *args, **kwargs) -> "Config":
"http_permanent_session_lifetime": datetime.timedelta(days=31),
"username": "mindsdb",
"password": "",
+ "token": None, # MINDSDB_AUTH_TOKEN
},
"logging": {
"handlers": {
@@ -199,6 +243,27 @@ def __new__(cls, *args, **kwargs) -> "Config":
"host": "0.0.0.0", # API server binds to all interfaces by default
"port": "8000",
},
+ "mcp": {
+ "cors": {
+ "enabled": True,
+ "allow_origins": [],
+ "allow_origin_regex": r"https?://(localhost|127\.0\.0\.1)(:\d+)?",
+ "allow_headers": ["*"],
+ },
+ "rate_limit": {
+ "enabled": False,
+ "requests_per_minute": 60,
+ },
+ "oauth": {
+ "enabled": False, # MINDSDB_MCP_OAUTH_ENABLED
+ "issuer_url": "", # MINDSDB_MCP_OAUTH_ISSUER_URL
+ "client_id": "", # MINDSDB_MCP_OAUTH_CLIENT_ID
+ "client_secret": "", # MINDSDB_MCP_OAUTH_CLIENT_SECRET
+ "scope": "mcp:tools", # MINDSDB_MCP_OAUTH_SCOPE
+ "public_url": "", # MINDSDB_MCP_OAUTH_PUBLIC_URL
+ },
+ "dns_rebinding_protection": False, # MINDSDB_MCP_DNS_REBINDING_PROTECTION
+ },
},
"cache": {"type": "local"},
"ml_task_queue": {"type": "local"},
@@ -215,6 +280,9 @@ def __new__(cls, *args, **kwargs) -> "Config":
"data_catalog": {
"enabled": False,
},
+ "data_stream": {
+ "fetch_size": 10000,
+ },
"byom": {
"enabled": False,
},
@@ -223,6 +291,7 @@ def __new__(cls, *args, **kwargs) -> "Config":
"knowledge_bases": {
"disable_autobatch": False,
"disable_pgvector_autobatch": True,
+ "storage": None,
},
}
# endregion
@@ -246,13 +315,17 @@ def prepare_env_config(self) -> None:
"""Collect config values from env vars to self._env_config"""
self._env_config = {
"logging": {"handlers": {"console": {}, "file": {}}},
- "api": {"http": {}},
+ "api": {
+ "http": {},
+ "mcp": {"cors": {}, "rate_limit": {}, "oauth": {}},
+ },
"auth": {},
"paths": {},
"permanent_storage": {},
"ml_task_queue": {},
"gui": {},
"byom": {},
+ "knowledge_bases": {},
}
# region storage root path
@@ -312,6 +385,10 @@ def prepare_env_config(self) -> None:
elif http_auth_type != "":
raise ValueError(f"Wrong value of env var MINDSDB_HTTP_AUTH_TYPE={http_auth_type}")
+ mindsdb_auth_token = os.environ.get("MINDSDB_AUTH_TOKEN", "")
+ if mindsdb_auth_token != "":
+ self._env_config["auth"]["token"] = mindsdb_auth_token
+
# region logging
if os.environ.get("MINDSDB_LOG_LEVEL", "") != "":
self._env_config["logging"]["handlers"]["console"]["level"] = os.environ["MINDSDB_LOG_LEVEL"]
@@ -398,20 +475,16 @@ def prepare_env_config(self) -> None:
if "default_reranking_model" not in self._env_config:
self._env_config["default_reranking_model"] = {}
self._env_config["default_reranking_model"].update(reranker_config)
- if os.environ.get("MINDSDB_DATA_CATALOG_ENABLED", "").lower() in ("1", "true"):
+ if get_bool_env_var("MINDSDB_DATA_CATALOG_ENABLED") is True:
self._env_config["data_catalog"] = {"enabled": True}
- if os.environ.get("MINDSDB_NO_STUDIO", "").lower() in ("1", "true"):
+ if get_bool_env_var("MINDSDB_NO_STUDIO") is True:
self._env_config["gui"]["open_on_start"] = False
self._env_config["gui"]["autoupdate"] = False
- mindsdb_gui_autoupdate = os.environ.get("MINDSDB_GUI_AUTOUPDATE", "").lower()
- if mindsdb_gui_autoupdate in ("0", "false"):
- self._env_config["gui"]["autoupdate"] = False
- elif mindsdb_gui_autoupdate in ("1", "true"):
- self._env_config["gui"]["autoupdate"] = True
- elif mindsdb_gui_autoupdate != "":
- raise ValueError(f"Wrong value of env var MINDSDB_GUI_AUTOUPDATE={mindsdb_gui_autoupdate}")
+ mindsdb_gui_autoupdate = get_bool_env_var("MINDSDB_GUI_AUTOUPDATE")
+ if mindsdb_gui_autoupdate is not None:
+ self._env_config["gui"]["autoupdate"] = mindsdb_gui_autoupdate
if os.environ.get("MINDSDB_PID_FILE_CONTENT", "") != "":
try:
@@ -427,6 +500,51 @@ def prepare_env_config(self) -> None:
elif mindsdb_byom_enabled != "":
raise ValueError(f"Wrong value of env var MINDSDB_BYOM_ENABLED={mindsdb_byom_enabled}")
+ # region MCP config
+ mindsdb_mcp_enabled = get_bool_env_var("MINDSDB_MCP_CORS_ENABLED")
+ if mindsdb_mcp_enabled is not None:
+ self._env_config["api"]["mcp"]["cors"]["enabled"] = mindsdb_mcp_enabled
+ mindsdb_mcp_allow_origins = get_list_env_var("MINDSDB_MCP_ALLOW_ORIGINS")
+ if isinstance(mindsdb_mcp_allow_origins, list):
+ self._env_config["api"]["mcp"]["cors"]["allow_origins"] = mindsdb_mcp_allow_origins
+ mindsdb_mcp_allow_headers = get_list_env_var("MINDSDB_MCP_ALLOW_HEADERS")
+ if isinstance(mindsdb_mcp_allow_headers, list):
+ self._env_config["api"]["mcp"]["cors"]["allow_headers"] = mindsdb_mcp_allow_headers
+ mindsdb_mcp_allow_origin_regex = os.environ.get("MINDSDB_MCP_ALLOW_ORIGIN_REGEXP", "")
+ if mindsdb_mcp_allow_origin_regex != "":
+ self._env_config["api"]["mcp"]["cors"]["allow_origin_regex"] = mindsdb_mcp_allow_origin_regex
+ mindsdb_mcp_rate_limit_enabled = get_bool_env_var("MINDSDB_MCP_RATE_LIMIT_ENABLED")
+ if mindsdb_mcp_rate_limit_enabled is not None:
+ self._env_config["api"]["mcp"]["rate_limit"]["enabled"] = mindsdb_mcp_rate_limit_enabled
+ mindsdb_mcp_rate_limit_rpm = os.environ.get("MINDSDB_MCP_RATE_LIMIT_RPM", "")
+ if mindsdb_mcp_rate_limit_rpm != "":
+ self._env_config["api"]["mcp"]["rate_limit"]["requests_per_minute"] = int(mindsdb_mcp_rate_limit_rpm)
+
+ mindsdb_mcp_oauth_enabled = get_bool_env_var("MINDSDB_MCP_OAUTH_ENABLED")
+ if mindsdb_mcp_oauth_enabled is not None:
+ self._env_config["api"]["mcp"]["oauth"]["enabled"] = mindsdb_mcp_oauth_enabled
+ mindsdb_mcp_oauth_issuer_url = os.environ.get("MINDSDB_MCP_OAUTH_ISSUER_URL", "")
+ if mindsdb_mcp_oauth_issuer_url != "":
+ self._env_config["api"]["mcp"]["oauth"]["issuer_url"] = mindsdb_mcp_oauth_issuer_url
+ mindsdb_mcp_oauth_client_id = os.environ.get("MINDSDB_MCP_OAUTH_CLIENT_ID", "")
+ if mindsdb_mcp_oauth_client_id != "":
+ self._env_config["api"]["mcp"]["oauth"]["client_id"] = mindsdb_mcp_oauth_client_id
+ mindsdb_mcp_oauth_client_secret = os.environ.get("MINDSDB_MCP_OAUTH_CLIENT_SECRET", "")
+ if mindsdb_mcp_oauth_client_secret != "":
+ self._env_config["api"]["mcp"]["oauth"]["client_secret"] = mindsdb_mcp_oauth_client_secret
+ mindsdb_mcp_oauth_scope = os.environ.get("MINDSDB_MCP_OAUTH_SCOPE", "")
+ if mindsdb_mcp_oauth_scope != "":
+ self._env_config["api"]["mcp"]["oauth"]["scope"] = mindsdb_mcp_oauth_scope
+ mindsdb_mcp_oauth_public_url = os.environ.get("MINDSDB_MCP_OAUTH_PUBLIC_URL", "")
+ if mindsdb_mcp_oauth_public_url != "":
+ self._env_config["api"]["mcp"]["oauth"]["public_url"] = mindsdb_mcp_oauth_public_url
+ mindsdb_mcp_dns_rebinding_protection = get_bool_env_var("MINDSDB_MCP_DNS_REBINDING_PROTECTION")
+ if mindsdb_mcp_dns_rebinding_protection is not None:
+ self._env_config["api"]["mcp"]["dns_rebinding_protection"] = mindsdb_mcp_dns_rebinding_protection
+ # endregion
+
+ # Keep env-based KB defaults out of config.auto.json overrides.
+
def fetch_auto_config(self) -> bool:
"""Load dict readed from config.auto.json to `auto_config`.
Do it only if `auto_config` was not loaded before or config.auto.json been changed.
@@ -589,6 +707,7 @@ def parse_cmd_args(self) -> None:
agent=None,
project=None,
update_gui=False,
+ mcp_stdio=False,
)
return
@@ -615,7 +734,7 @@ def parse_cmd_args(self) -> None:
parser.add_argument("--project-name", type=str, default=None, help="MindsDB project name")
parser.add_argument("--update-gui", action="store_true", default=False, help="Update GUI and exit")
- parser.add_argument("--load-tokenizer", action="store_true", default=False, help="Preload tokenizer and exit")
+ parser.add_argument("--mcp-stdio", action="store_true", default=False, help="Run MCP with STDIO transport")
self._cmd_args = parser.parse_args()
diff --git a/mindsdb/utilities/fs.py b/mindsdb/utilities/fs.py
index 2462960acca..dbed99cc8be 100644
--- a/mindsdb/utilities/fs.py
+++ b/mindsdb/utilities/fs.py
@@ -1,4 +1,5 @@
import os
+import sys
import json
import time
import tempfile
@@ -6,6 +7,9 @@
from pathlib import Path
from typing import Generator
+import tarfile
+import zipfile
+
import psutil
from mindsdb.utilities import log
@@ -127,6 +131,70 @@ def clean_unlinked_process_marks() -> list[int]:
return deleted_pids
+class PidFileLock:
+ """Cross-platform exclusive file lock context manager.
+ Uses fcntl.flock on Unix and msvcrt.locking on Windows.
+
+ Attributes:
+ _lock_file_path (Path): path to lock file
+ _blocking (bool): if True, waits until the lock becomes available, otherwise raises OSError immediately if lock is held
+ _fh (int): lock file descriptor
+ """
+
+ def __init__(self, lock_file_path: Path, blocking: bool = True):
+ self._lock_file_path = lock_file_path
+ self._blocking = blocking
+ self._fh = None
+
+ def __enter__(self):
+ self._lock_file_path.parent.mkdir(parents=True, exist_ok=True)
+ self._fh = open(self._lock_file_path, "a+")
+ try:
+ if sys.platform == "win32":
+ import msvcrt
+
+ # NOTE if file is locked, LK_LOCK will raise OSError after 10 seconds, LK_NBLCK immediately
+ mode = msvcrt.LK_LOCK if self._blocking else msvcrt.LK_NBLCK
+ self._fh.seek(0)
+ msvcrt.locking(self._fh.fileno(), mode, 1)
+ else:
+ import fcntl
+
+ flags = fcntl.LOCK_EX
+ if not self._blocking:
+ flags |= fcntl.LOCK_NB
+ fcntl.flock(self._fh.fileno(), flags)
+ except (OSError, IOError):
+ self._fh.close()
+ self._fh = None
+ logger.error(f"Failed to acquire lock on {self._lock_file_path}")
+ raise
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if self._fh is None:
+ return False
+ try:
+ if sys.platform == "win32":
+ import msvcrt
+
+ self._fh.seek(0)
+ msvcrt.locking(self._fh.fileno(), msvcrt.LK_UNLCK, 1)
+ else:
+ import fcntl
+
+ fcntl.flock(self._fh.fileno(), fcntl.LOCK_UN)
+ except (OSError, IOError):
+ pass
+ finally:
+ try:
+ self._fh.close()
+ except (OSError, IOError):
+ pass
+ self._fh = None
+ return False
+
+
def create_pid_file(config):
"""
Create mindsdb process pid file. Check if previous process exists and is running
@@ -140,48 +208,49 @@ def create_pid_file(config):
p = get_tmp_dir()
p.mkdir(parents=True, exist_ok=True)
pid_file = p.joinpath("pid")
- if pid_file.exists():
- # if process exists raise exception
- pid_file_data_str = pid_file.read_text().strip()
- pid = None
- try:
- pid_file_data = json.loads(pid_file_data_str)
- if isinstance(pid_file_data, dict):
- pid = pid_file_data.get("pid")
- else:
- pid = pid_file_data
- except json.JSONDecodeError:
- # is it just pid number (old approach)?
- try:
- pid = int(pid_file_data_str)
- except Exception:
- pass
- logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON, removing")
+ lock_file = p.joinpath("pid.lock")
- if pid is not None:
+ with PidFileLock(lock_file):
+ if pid_file.exists():
+ pid_file_data_str = pid_file.read_text().strip()
+ pid = None
try:
- psutil.Process(int(pid))
- raise Exception(f"Found PID file with existing process: {pid} {pid_file}")
- except (psutil.Error, ValueError):
- pass
- logger.warning(f"Found existing PID file {pid_file}({pid}), removing")
-
- pid_file.unlink(missing_ok=True)
-
- pid_file_content = config["pid_file_content"]
- if pid_file_content is None or len(pid_file_content) == 0:
- pid_file_data_str = str(os.getpid())
- else:
- pid_file_data = {"pid": os.getpid()}
- for key, value in pid_file_content.items():
- value_path = value.split(".")
- value_obj = config
- for path_part in value_path:
- value_obj = value_obj.get(path_part) if value_obj else None
- pid_file_data[key] = value_obj
+ pid_file_data = json.loads(pid_file_data_str)
+ if isinstance(pid_file_data, dict):
+ pid = pid_file_data.get("pid")
+ else:
+ pid = pid_file_data
+ except json.JSONDecodeError:
+ try:
+ pid = int(pid_file_data_str)
+ except Exception:
+ pass
+ logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON, removing")
+
+ if pid is not None:
+ try:
+ psutil.Process(int(pid))
+ raise Exception(f"Found PID file with existing process: {pid} {pid_file}")
+ except (psutil.Error, ValueError):
+ pass
+ logger.warning(f"Found existing PID file {pid_file}({pid}), removing")
+
+ pid_file.unlink(missing_ok=True)
+
+ pid_file_content = config["pid_file_content"]
+ if pid_file_content is None or len(pid_file_content) == 0:
+ pid_file_data_str = str(os.getpid())
+ else:
+ pid_file_data = {"pid": os.getpid()}
+ for key, value in pid_file_content.items():
+ value_path = value.split(".")
+ value_obj = config
+ for path_part in value_path:
+ value_obj = value_obj.get(path_part) if value_obj else None
+ pid_file_data[key] = value_obj
- pid_file_data_str = json.dumps(pid_file_data)
- pid_file.write_text(pid_file_data_str)
+ pid_file_data_str = json.dumps(pid_file_data)
+ pid_file.write_text(pid_file_data_str)
def delete_pid_file():
@@ -193,45 +262,84 @@ def delete_pid_file():
return
pid_file = get_tmp_dir().joinpath("pid")
+ lock_file = get_tmp_dir().joinpath("pid.lock")
- if not pid_file.exists():
- return
+ with PidFileLock(lock_file):
+ if not pid_file.exists():
+ return
- pid_file_data_str = pid_file.read_text().strip()
- pid = None
- try:
- pid_file_data = json.loads(pid_file_data_str)
- if isinstance(pid_file_data, dict):
- pid = pid_file_data.get("pid")
- else:
- # It's a simple number (old format or pid_file_content=None format)
- pid = pid_file_data
- except json.JSONDecodeError:
- logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON")
+ pid_file_data_str = pid_file.read_text().strip()
+ pid = None
+ try:
+ pid_file_data = json.loads(pid_file_data_str)
+ if isinstance(pid_file_data, dict):
+ pid = pid_file_data.get("pid")
+ else:
+ # It's a simple number (old format or pid_file_content=None format)
+ pid = pid_file_data
+ except json.JSONDecodeError:
+ logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON")
- if pid is not None and str(pid) != str(os.getpid()):
- logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid")
- return
+ if pid is not None and str(pid) != str(os.getpid()):
+ logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid")
+ return
- pid_file.unlink(missing_ok=True)
+ pid_file.unlink(missing_ok=True)
def __is_within_directory(directory, target):
- abs_directory = os.path.abspath(directory)
- abs_target = os.path.abspath(target)
- prefix = os.path.commonprefix([abs_directory, abs_target])
- return prefix == abs_directory
+ abs_directory = os.path.realpath(directory)
+ abs_target = os.path.realpath(target)
+ try:
+ return os.path.commonpath([abs_directory, abs_target]) == abs_directory
+ except ValueError:
+ # can be raised on windows
+ return False
+
+
+def __get_tar_members(archivefile, members):
+ if members is None:
+ return archivefile.getmembers()
+
+ resolved_members = []
+ for member in members:
+ if isinstance(member, tarfile.TarInfo):
+ resolved_members.append(member)
+ else:
+ resolved_members.append(archivefile.getmember(member))
+ return resolved_members
-def safe_extract(tarfile, path=".", members=None, *, numeric_owner=False):
- # for py >= 3.12
- if hasattr(tarfile, "data_filter"):
- tarfile.extractall(path, members=members, numeric_owner=numeric_owner, filter="data")
+def safe_extract(archivefile, path=".", members=None, *, numeric_owner=False):
+ """
+ Safely extract an archivefile, preventing path traversal attacks.
+ """
+ if isinstance(archivefile, zipfile.ZipFile):
+ for member in archivefile.namelist():
+ member_path = os.path.join(path, member)
+ if not __is_within_directory(path, member_path):
+ raise Exception("Attempted Path Traversal in Zip File")
+ archivefile.extractall(path, members)
return
- # for py < 3.12
- for member in tarfile.getmembers():
- member_path = os.path.join(path, member.name)
- if not __is_within_directory(path, member_path):
- raise Exception("Attempted Path Traversal in Tar File")
- tarfile.extractall(path, members=members, numeric_owner=numeric_owner)
+ if isinstance(archivefile, tarfile.TarFile):
+ # for py >= 3.12
+ if hasattr(archivefile, "data_filter"):
+ archivefile.extractall(path, members=members, numeric_owner=numeric_owner, filter="data")
+ return
+
+ # for py < 3.12
+ for member in __get_tar_members(archivefile, members):
+ if member.issym() or member.islnk():
+ raise Exception(f"Security Alert: Link entries are not allowed in tar file: {member.name}")
+
+ if not (member.isfile() or member.isdir()):
+ raise Exception(f"Security Alert: Unsupported tar member type detected for member: {member.name}")
+
+ member_path = os.path.join(path, member.name)
+ if not __is_within_directory(path, member_path):
+ raise Exception(
+ f"Security Alert: Attempted path traversal in tar file detected for member: {member.name}"
+ )
+
+ archivefile.extract(member, path=path, numeric_owner=numeric_owner)
diff --git a/mindsdb/utilities/langfuse.py b/mindsdb/utilities/langfuse.py
index def4ec98c7e..92320c48d5e 100644
--- a/mindsdb/utilities/langfuse.py
+++ b/mindsdb/utilities/langfuse.py
@@ -5,8 +5,8 @@
from mindsdb.utilities import log
if TYPE_CHECKING:
- from langfuse.callback import CallbackHandler
- from langfuse.client import StatefulSpanClient
+ from langfuse._client.span import LangfuseSpan
+ from langfuse.langchain import CallbackHandler
logger = log.getLogger(__name__)
@@ -111,6 +111,7 @@ def __init__(
public_key=public_key,
secret_key=secret_key,
host=host,
+ environment=environment,
release=release,
debug=debug,
timeout=timeout,
@@ -145,13 +146,14 @@ def setup_trace(
self.set_tags(tags)
try:
- self.trace = self.client.trace(
- name=name, input=input, metadata=self.metadata, tags=self.tags, user_id=user_id, session_id=session_id
- )
+ # SDK v3+: root observation is a span; trace attributes are set via update_trace.
+ self.trace = self.client.start_span(name=name, input=input, metadata=self.metadata)
+ self.trace.update_trace(tags=self.tags, user_id=user_id, session_id=session_id)
except Exception:
- logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.id}:")
+ logger.exception("Something went wrong while creating Langfuse trace")
+ return
- logger.info(f"Langfuse trace configured with ID: {self.trace.id}")
+ logger.info(f"Langfuse trace configured with ID: {self.trace.trace_id}")
def get_trace_id(self) -> typing.Optional[str]:
"""
@@ -166,9 +168,9 @@ def get_trace_id(self) -> typing.Optional[str]:
logger.debug("Langfuse trace is not setup.")
return ""
- return self.trace.id
+ return self.trace.trace_id
- def start_span(self, name: str, input: typing.Optional[typing.Any] = None) -> typing.Optional["StatefulSpanClient"]:
+ def start_span(self, name: str, input: typing.Optional[typing.Any] = None) -> typing.Optional["LangfuseSpan"]:
"""
Create span. If Langfuse is disabled, nothing will be done.
@@ -181,9 +183,9 @@ def start_span(self, name: str, input: typing.Optional[typing.Any] = None) -> ty
logger.debug("Langfuse is disabled.")
return None
- return self.trace.span(name=name, input=input)
+ return self.trace.start_span(name=name, input=input)
- def end_span_stream(self, span: typing.Optional["StatefulSpanClient"] = None) -> None:
+ def end_span_stream(self, span: typing.Optional["LangfuseSpan"] = None) -> None:
"""
End span. If Langfuse is disabled, nothing will happen.
Args:
@@ -195,10 +197,10 @@ def end_span_stream(self, span: typing.Optional["StatefulSpanClient"] = None) ->
return
span.end()
- self.trace.update()
+ self.client.flush()
def end_span(
- self, span: typing.Optional["StatefulSpanClient"] = None, output: typing.Optional[typing.Any] = None
+ self, span: typing.Optional["LangfuseSpan"] = None, output: typing.Optional[typing.Any] = None
) -> None:
"""
End trace. If Langfuse is disabled, nothing will be done.
@@ -216,8 +218,10 @@ def end_span(
logger.debug("Langfuse span is not created.")
return
- span.end(output=output)
- self.trace.update(output=output)
+ if output is not None:
+ span.update(output=output)
+ span.end()
+ self.trace.update_trace(output=output)
metadata = self.metadata or {}
@@ -225,9 +229,9 @@ def end_span(
# Ensure all batched traces are sent before fetching.
self.client.flush()
metadata["tool_usage"] = self._get_tool_usage()
- self.trace.update(metadata=metadata)
+ self.trace.update_trace(metadata=metadata)
except Exception:
- logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.id}:")
+ logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.trace_id}:")
def get_langchain_handler(self) -> typing.Optional["CallbackHandler"]:
"""
@@ -238,7 +242,13 @@ def get_langchain_handler(self) -> typing.Optional["CallbackHandler"]:
logger.debug("Langfuse is disabled.")
return None
- return self.trace.get_langchain_handler()
+ try:
+ from langfuse.langchain import CallbackHandler
+ except ImportError:
+ logger.debug("langfuse.langchain CallbackHandler is not available (install langchain extra if needed).")
+ return None
+
+ return CallbackHandler(public_key=self.public_key)
def set_metadata(self, custom_metadata: dict = None) -> None:
"""
@@ -267,8 +277,8 @@ def _get_tool_usage(self) -> typing.Dict:
tool_usage = {}
try:
- fetched_trace = self.client.get_trace(self.trace.id)
- steps = [s.name for s in fetched_trace.observations]
+ fetched_trace = self.client.api.trace.get(self.trace.trace_id)
+ steps = [s.name for s in fetched_trace.observations if s.name]
for step in steps:
if "AgentAction" in step:
tool_name = step.split("-")[1]
@@ -276,8 +286,8 @@ def _get_tool_usage(self) -> typing.Dict:
tool_usage[tool_name] = 0
tool_usage[tool_name] += 1
except TraceNotFoundError:
- logger.warning(f"Langfuse trace {self.trace.id} not found")
+ logger.warning(f"Langfuse trace {self.trace.trace_id} not found")
except Exception:
- logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.id}:")
+ logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.trace_id}:")
return tool_usage
diff --git a/mindsdb/utilities/log.py b/mindsdb/utilities/log.py
index 8c76ad9d4ea..2ae311a61da 100644
--- a/mindsdb/utilities/log.py
+++ b/mindsdb/utilities/log.py
@@ -4,10 +4,15 @@
import logging
import threading
from typing import Any
+import warnings
from logging.config import dictConfig
from mindsdb.utilities.config import config as app_config
+# Suppress Pydantic warnings for third-party libraries
+# TODO: Work on a better solution to this
+warnings.filterwarnings("ignore", message="Field.*has conflict with protected namespace.*", category=UserWarning)
+
logging_initialized = False
@@ -205,6 +210,7 @@ def get_handlers_config(process_name: str) -> dict:
"class": "mindsdb.utilities.log.StreamSanitizingHandler",
"formatter": console_handler_config.get("formatter", "default"),
"level": console_handler_config_level,
+ "stream": console_handler_config.get("stream", "ext://sys.stderr"),
}
file_handler_config = app_config["logging"]["handlers"]["file"]
diff --git a/mindsdb/integrations/handlers/aerospike_handler/tests/__init__.py b/mindsdb/utilities/types/__init__.py
similarity index 100%
rename from mindsdb/integrations/handlers/aerospike_handler/tests/__init__.py
rename to mindsdb/utilities/types/__init__.py
diff --git a/mindsdb/utilities/types/column.py b/mindsdb/utilities/types/column.py
new file mode 100644
index 00000000000..e8d258468d3
--- /dev/null
+++ b/mindsdb/utilities/types/column.py
@@ -0,0 +1,30 @@
+from dataclasses import dataclass, field, MISSING
+
+from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
+
+
+@dataclass(kw_only=True, slots=True)
+class Column:
+ name: str = field(default=MISSING)
+ alias: str | None = None
+ table_name: str | None = None
+ table_alias: str | None = None
+ type: MYSQL_DATA_TYPE | None = None
+ database: str | None = None
+ flags: dict = None
+ charset: str | None = None
+ original_type: str | None = None
+ dtype: str | None = None
+
+ def __post_init__(self):
+ if self.alias is None:
+ self.alias = self.name
+ if self.table_alias is None:
+ self.table_alias = self.table_name
+
+ def get_hash_name(self, prefix):
+ table_name = self.table_name if self.table_alias is None else self.table_alias
+ name = self.name if self.alias is None else self.alias
+
+ name = f"{prefix}_{table_name}_{name}"
+ return name
diff --git a/mindsdb/utilities/utils.py b/mindsdb/utilities/utils.py
index 3c9bd09162c..160b03fe79c 100644
--- a/mindsdb/utilities/utils.py
+++ b/mindsdb/utilities/utils.py
@@ -2,6 +2,8 @@
import re
import typing
+from pydantic import BaseModel, ValidationError
+
def parse_csv_attributes(csv_attributes: typing.Optional[str] = "") -> typing.Dict[str, str]:
"""
@@ -32,3 +34,24 @@ def parse_csv_attributes(csv_attributes: typing.Optional[str] = "") -> typing.Di
raise ValueError(f"Failed to parse csv_attributes='{csv_attributes}': {e}") from e
return attributes
+
+
+def validate_pydantic_params(params: dict, schema: type[BaseModel], subject: str):
+ # check names and types
+ try:
+ schema.model_validate(params)
+ except ValidationError as e:
+ problems = []
+ for error in e.errors():
+ parameter = ".".join([str(i) for i in error["loc"]])
+ param_type = error["type"]
+ if param_type == "extra_forbidden":
+ msg = f"Parameter '{parameter}' is not allowed"
+ else:
+ msg = f"Error in '{parameter}' (type: {param_type}): {error['msg']}. Input: {repr(error['input'])}"
+ problems.append(msg)
+
+ msg = "\n".join(problems)
+ if len(problems) > 1:
+ msg = "\n" + msg
+ raise ValueError(f"Problem with {subject} parameters: {msg}") from e
diff --git a/requirements/requirements-agents.txt b/requirements/requirements-agents.txt
index 83b60d9f496..e96657bb724 100644
--- a/requirements/requirements-agents.txt
+++ b/requirements/requirements-agents.txt
@@ -1,19 +1,14 @@
-openai<3.0.0,>=2.9.0
-
-langchain-community==0.3.27
-langchain-core==0.3.77
-langchain-experimental==0.3.4
-
+openai<3.0.0,>=2.11.0
# When using agents, some LLMs may require the 'transformers' library (like Ollama):
-transformers >= 4.42.4
+transformers==5.5.0
# Required for KB
mindsdb-evaluator == 0.0.21
-litellm==1.63.14
-mcp~=1.10.1 # Required for MCP server
+mcp~=1.26.0 # Required for MCP server
# A2A requirements
httpx==0.28.1
jwcrypto==1.5.6
-typing-extensions==4.14.1
+# fastmcp (via pydantic-ai) requires typing-extensions>=4.15.0 (py-key-value-aio chain)
+typing-extensions>=4.15.0,<5
diff --git a/requirements/requirements-kb.txt b/requirements/requirements-kb.txt
index eb5adbfaefb..334e7c0f352 100644
--- a/requirements/requirements-kb.txt
+++ b/requirements/requirements-kb.txt
@@ -1,4 +1,2 @@
lxml==5.3.0 # Is this transitive dependency?
-pgvector==0.3.6 # Required for knowledge bases
-langchain-core==0.3.77
-litellm==1.63.14
\ No newline at end of file
+faiss-cpu==1.13.2 # default vector storage
diff --git a/requirements/requirements-langfuse.txt b/requirements/requirements-langfuse.txt
index fffecd7da86..7cd73e32d75 100644
--- a/requirements/requirements-langfuse.txt
+++ b/requirements/requirements-langfuse.txt
@@ -1 +1 @@
-langfuse==2.53.3 # Latest as of November 4, 2024
\ No newline at end of file
+langfuse==3.2.5
\ No newline at end of file
diff --git a/requirements/requirements-opentelemetry.txt b/requirements/requirements-opentelemetry.txt
index eae7c0601c4..0b262f9b35a 100644
--- a/requirements/requirements-opentelemetry.txt
+++ b/requirements/requirements-opentelemetry.txt
@@ -1,6 +1,6 @@
-opentelemetry-api==1.27.0
-opentelemetry-sdk==1.27.0
-opentelemetry-exporter-otlp==1.27.0
-opentelemetry-instrumentation-requests==0.48b0
-opentelemetry-instrumentation-flask==0.48b0
-opentelemetry-distro==0.48b0
\ No newline at end of file
+opentelemetry-api==1.39.1
+opentelemetry-sdk==1.39.1
+opentelemetry-exporter-otlp==1.39.1
+opentelemetry-instrumentation-requests==0.60b1
+opentelemetry-instrumentation-flask==0.60b1
+opentelemetry-distro==0.60b1
\ No newline at end of file
diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt
index 58f887f4881..60607799622 100644
--- a/requirements/requirements-test.txt
+++ b/requirements/requirements-test.txt
@@ -15,9 +15,10 @@ ollama >= 0.1.7 # Langchain tests
anthropic >= 0.21.3 # Langchain tests
langchain-google-genai>=2.0.0 # Langchain tests
mindsdb-sdk
-filelock==3.20.1
+filelock==3.20.3
mysql-connector-python==9.1.0
walrus==0.9.3
pymongo == 4.8.0
pytest-json-report==1.5.0
-appdirs >= 1.0.0
\ No newline at end of file
+appdirs >= 1.0.0
+pgvector==0.3.6 # Required for knowledge bases tests
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 57be65faec2..fbeea1e13b1 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,16 +1,16 @@
packaging
-flask == 3.0.3
-werkzeug == 3.0.6
+flask == 3.1.3
+werkzeug == 3.1.6
flask-restx >= 1.3.0, < 2.0.0
-pandas == 2.2.3
-python-multipart == 0.0.20
-cryptography>=35.0
+pandas==2.3.1
+python-multipart == 0.0.26
+cryptography>=46.0.5
psycopg[binary]
psutil~=7.0
sqlalchemy >= 2.0.0, < 3.0.0
psycopg2-binary # This is required for using sqlalchemy with postgres
alembic >= 1.3.3
-redis >=5.0.0, < 6.0.0
+redis==6.4.0
walrus==0.9.3
flask-compress >= 1.0.0
appdirs >= 1.0.0
@@ -51,7 +51,7 @@ a2wsgi ~= 1.10.10 # WSGI wrapper for flask+starlette
starlette>=0.49.1
sse-starlette==2.3.3
pydantic_core>=2.33.2
-pyjwt==2.10.1
+pyjwt==2.12.0
# files reading
pymupdf==1.25.2
python-docx>=1.1.0
@@ -59,12 +59,17 @@ python-pptx>=0.6.0
filetype
charset-normalizer
openpyxl # used by pandas to read txt and xlsx files
+xlrd>=2.0.1 # used by pandas to read legacy .xls files
aipdf==0.0.6.3 # 0.0.7.0 requires openai>=2.0.0, conflicts with langchain-openai
pyarrow<=19.0.0 # used by pandas to read feather files in Files handler
-orjson==3.11.3
+orjson==3.11.6
-mind-castle >= 0.4.9
-pydantic-ai>=0.0.14 # Required for Pydantic AI agents
+mind-castle==0.5.0
+pydantic-ai>=0.0.14 # Required for Pydantic AI agents (kept <2.0-openai-compatible: our stack uses langchain-openai==0.3.6, engine dropped langchain)
bs4 # for rag HTMLDocumentLoader
urllib3>=2.6.3 # not directly required, pinned by Snyk to avoid a vulnerability
+
+# kb providers
+aiobotocore==3.4.0
+google-genai==1.70.0
diff --git a/scripts/run_unit_tests.sh b/scripts/run_unit_tests.sh
index 28d3dc1c33c..1764d33c231 100755
--- a/scripts/run_unit_tests.sh
+++ b/scripts/run_unit_tests.sh
@@ -35,18 +35,12 @@ HANDLERS_TO_INSTALL=(
timescaledb
mssql
oracle
- slack
redshift
bigquery
- clickhouse
web
databricks
- github
- ms_teams
statsforecast
chromadb
- confluence
- elasticsearch
agents
kb
)
@@ -217,10 +211,6 @@ for handler in "${HANDLERS_TO_INSTALL[@]}"; do
-r requirements/requirements-test.txt \
"${HANDLER_EXTRAS[@]}"
- # Install onnxruntime for ChromaDB
- echo "Installing onnxruntime..."
- uv pip install --force-reinstall onnxruntime==1.20.1
-
# Clone parser tests
PARSER_VERSION=$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2)
if [[ ! -d "parser_tests" ]]; then
diff --git a/scripts/test-artifacts.sh b/scripts/test-artifacts.sh
new file mode 100755
index 00000000000..f1769663beb
--- /dev/null
+++ b/scripts/test-artifacts.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "${REPO_ROOT}"
+
+HANDLERS_TO_INSTALL='
+postgres
+mysql
+salesforce
+snowflake
+timescaledb
+mssql
+oracle
+slack
+redshift
+bigquery
+clickhouse
+web
+databricks
+github
+ms_teams
+statsforecast
+chromadb
+confluence
+'
+
+HANDLERS_TO_VERIFY='
+mysql
+salesforce
+postgres
+snowflake
+timescaledb
+mssql
+oracle
+slack
+file
+redshift
+bigquery
+confluence
+'
+
+INSTALL_HANDLERS=()
+while IFS= read -r handler; do
+ handler=${handler//$'\r'/}
+ [[ -z "${handler}" || "${handler}" =~ ^[[:space:]]*# ]] && continue
+ INSTALL_HANDLERS+=("${handler}")
+done <<< "${HANDLERS_TO_INSTALL}"
+
+HANDLER_EXTRAS=()
+for handler in "${INSTALL_HANDLERS[@]}"; do
+ HANDLER_EXTRAS+=(".[${handler}]")
+done
+
+uv pip install ".[agents,kb]" \
+ -r requirements/requirements-test.txt \
+ "${HANDLER_EXTRAS[@]}"
+
+uv pip install --force-reinstall onnxruntime==1.20.1
+
+# Ensure parser tests are present (required for render tests when --runslow)
+if [[ ! -d parser_tests ]]; then
+ git clone --branch v$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2) \
+ https://github.com/mindsdb/mindsdb_sql_parser.git parser_tests
+fi
+
+# Run the exact test target used in CI
+make unit_tests_slow
+
+# Generate the extra artifacts produced in CI
+HANDLERS_TO_INSTALL="${HANDLERS_TO_INSTALL}" \
+HANDLERS_TO_VERIFY="${HANDLERS_TO_VERIFY}" \
+COVERAGE_FAIL_UNDER="80" \
+COVERAGE_FILE=.coverage.unit \
+ uv run tests/scripts/check_handler_coverage.py > pytest-coverage-handlers.txt
+COVERAGE_FILE=.coverage.unit uv run coverage html -d reports/htmlcov
+
+# Collect artifacts in a single directory
+ARTIFACT_DIR="tests_artifacts"
+mkdir -p "${ARTIFACT_DIR}"
+
+for artifact in pytest.xml coverage.xml .coverage.unit pytest-coverage.txt pytest-coverage-handlers.txt; do
+ if [[ -f "${artifact}" ]]; then
+ mv "${artifact}" "${ARTIFACT_DIR}/"
+ fi
+done
+
+if [[ -d reports/htmlcov ]]; then
+ rm -rf "${ARTIFACT_DIR}/htmlcov"
+ mv reports/htmlcov "${ARTIFACT_DIR}/htmlcov"
+fi
diff --git a/tests/scripts/check_requirements.py b/tests/scripts/check_requirements.py
index 800a7fa1d4c..f3ec6de303e 100644
--- a/tests/scripts/check_requirements.py
+++ b/tests/scripts/check_requirements.py
@@ -104,10 +104,13 @@ def get_requirements_with_DEP002(path):
"langchain-experimental",
"lxml",
"openpyxl",
+ "xlrd",
"onnxruntime",
"litellm",
"numba", # required in a few files for the hierarchicalforecast. Otherwise, uv may install an old version.
"urllib3", # pinned by Snyk to avoid a vulnerability
+ "faiss-cpu",
+ "pyopenssl",
],
}
@@ -135,7 +138,7 @@ def get_requirements_with_DEP002(path):
HUGGINGFACE_DEP002_IGNORE_HANDLER_DEPS = ["torch"]
-RAG_DEP002_IGNORE_HANDLER_DEPS = ["sentence-transformers", "faiss-cpu"]
+RAG_DEP002_IGNORE_HANDLER_DEPS = ["sentence-transformers"]
SOLR_DEP002_IGNORE_HANDLER_DEPS = ["sqlalchemy-solr"]
@@ -143,6 +146,8 @@ def get_requirements_with_DEP002(path):
CHROMADB_EP002_IGNORE_HANDLER_DEPS = ["onnxruntime"]
+FRESHDESK_EP002_IGNORE_HANDLER_DEPS = ["python-freshdesk"]
+
# The `pyarrow` package is used only if it is installed.
# The handler can work without it.
SNOWFLAKE_DEP003_IGNORE_HANDLER_DEPS = ["pyarrow"]
@@ -158,6 +163,7 @@ def get_requirements_with_DEP002(path):
+ SOLR_DEP002_IGNORE_HANDLER_DEPS
+ OPENAI_DEP002_IGNORE_HANDLER_DEPS
+ CHROMADB_EP002_IGNORE_HANDLER_DEPS
+ + FRESHDESK_EP002_IGNORE_HANDLER_DEPS
)
)
@@ -175,6 +181,7 @@ def get_requirements_with_DEP002(path):
"IfxPyDbi",
"ingres_sa_dialect",
"pyodbc",
+ "freshdesk",
], # 'tests' is the mindsdb tests folder in the repo root, 'pyarrow' used in snowflake handler
"DEP003": DEP003_IGNORE_HANDLER_DEPS,
}
@@ -195,6 +202,7 @@ def get_requirements_with_DEP002(path):
"google-analytics-admin": ["google"],
"google-auth": ["google"],
"google-cloud-storage": ["google"],
+ "google-genai": ["google"],
"google-auth-oauthlib": ["google_auth_oauthlib"],
"google-api-python-client": ["googleapiclient"],
"ibm-cos-sdk": ["ibm_boto3", "ibm_botocore"],
@@ -252,6 +260,8 @@ def get_requirements_with_DEP002(path):
"python-dotenv": ["dotenv"],
"pyjwt": ["jwt"],
"sklearn": ["scikit-learn"],
+ "types-aioboto3": ["aioboto3"],
+ "ag2": ["autogen"],
}
# We use this to exit with a non-zero status code if any check fails
diff --git a/tests/unit/api/http/agents_test.py b/tests/unit/api/http/agents_test.py
index a6253132384..bd2532bcd78 100644
--- a/tests/unit/api/http/agents_test.py
+++ b/tests/unit/api/http/agents_test.py
@@ -27,14 +27,13 @@ def test_prepare(client):
@pytest.mark.deprecated(
"MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now"
)
-def test_post_agent_depreciated(client):
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+def test_post_agent_depreciated(check_agent_llm, client):
create_request = {
"agent": {
"name": "test_post_agent_depreciated",
- "model_name": "test_model",
- "params": {"k1": "v1"},
- "provider": "mindsdb",
- "skills": ["test_skill"],
+ "model": {"provider": "openai", "model_name": "test_model"},
+ "params": {"timeout": 10},
}
}
@@ -45,9 +44,8 @@ def test_post_agent_depreciated(client):
expected_agent = {
"name": "test_post_agent_depreciated",
- "model_name": "test_model",
- "provider": "mindsdb",
- "params": {"k1": "v1"},
+ "model": {"provider": "openai", "model_name": "test_model"},
+ "params": {"timeout": 10},
"id": created_agent["id"],
"project_id": created_agent["project_id"],
"created_at": created_agent["created_at"],
@@ -57,7 +55,9 @@ def test_post_agent_depreciated(client):
assert created_agent == expected_agent
-def test_post_agent(client):
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data")
+def test_post_agent(check_agent_data, check_agent_llm, client):
create_request = {
"agent": {
"name": "TEST_post_agent",
@@ -161,7 +161,9 @@ def test_get_agents_project_not_found(client):
assert get_response.status_code == HTTPStatus.NOT_FOUND
-def test_get_agent(client):
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data")
+def test_get_agent(check_agent_data, check_agent_llm, client):
create_request = {
"agent": {
"name": "test_get_agent",
@@ -236,13 +238,13 @@ def test_get_agent_project_not_found(client):
@pytest.mark.deprecated(
"MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now"
)
-def test_put_agent_update_depreciated(client):
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+def test_put_agent_update_depreciated(check_agent_llm, client):
create_request = {
"agent": {
"name": "test_put_agent_update_depreciated",
- "model_name": "test_model",
- "params": {"k1": "v1", "k2": "v2"},
- "provider": "mindsdb",
+ "model": {"provider": "openai", "model_name": "test_model"},
+ "params": {"timeout": 10},
}
}
@@ -251,7 +253,7 @@ def test_put_agent_update_depreciated(client):
update_request = {
"agent": {
- "params": {"k1": "v1.1", "k2": None, "k3": "v3"},
+ "params": {"timeout": 20},
}
}
@@ -262,9 +264,8 @@ def test_put_agent_update_depreciated(client):
expected_agent = {
"name": "test_put_agent_update_depreciated",
- "model_name": "test_model",
- "params": {"k1": "v1.1", "k3": "v3"},
- "provider": "mindsdb",
+ "model": {"provider": "openai", "model_name": "test_model"},
+ "params": {"timeout": 20},
"id": updated_agent["id"],
"project_id": updated_agent["project_id"],
"created_at": updated_agent["created_at"],
@@ -277,7 +278,9 @@ def test_put_agent_update_depreciated(client):
@pytest.mark.deprecated(
"MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now"
)
-def test_put_agent_update(client):
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data")
+def test_put_agent_update(check_agent_data, check_agent_llm, client):
create_request = {
"agent": {
"name": "test_put_agent_update",
@@ -292,7 +295,7 @@ def test_put_agent_update(client):
update_request = {
"agent": {
- "params": {"k1": "v1.1", "k2": None, "k3": "v3"},
+ "params": {"timeout": 5},
"data": {
"tables": ["example_db.customers", "example_db.orders"],
"knowledge_bases": ["example_kb"],
@@ -307,7 +310,7 @@ def test_put_agent_update(client):
expected_agent = {
"name": "test_put_agent_update",
- "params": {"k1": "v1.1", "k3": "v3"},
+ "params": {"timeout": 5},
"id": updated_agent["id"],
"project_id": updated_agent["project_id"],
"created_at": updated_agent["created_at"],
@@ -356,7 +359,9 @@ def test_put_agent_no_agent(client):
# assert '404' in response.status
-def test_delete_agent(client):
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data")
+def test_delete_agent(check_agent_data, check_agent_llm, client):
create_request = {
"agent": {
"name": "test_delete_agent",
@@ -385,13 +390,14 @@ def test_delete_agent_not_found(client):
assert delete_response.status_code == HTTPStatus.NOT_FOUND
-def test_agent_completions(client):
+@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+def test_agent_completions(check_agent_llm, client):
create_request = {
"agent": {
"name": "test_agent",
"model_name": "test_model",
"provider": "mindsdb",
- "params": {"prompt_template": "Test message!", "user_column": "content"},
+ "params": {"prompt_template": "Test message!"},
}
}
diff --git a/tests/unit/api/http/byom_test.py b/tests/unit/api/http/byom_test.py
index f8870a699d4..3259482ee1a 100644
--- a/tests/unit/api/http/byom_test.py
+++ b/tests/unit/api/http/byom_test.py
@@ -20,6 +20,7 @@ def predict(self, df):
)
+@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled")
def test_disabled_byom(client):
"""Test disabled byom"""
config._config["byom"]["enabled"] = False
@@ -35,6 +36,7 @@ def test_disabled_byom(client):
assert response.status_code == HTTPStatus.FORBIDDEN
+@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled")
def test_path_traversal(client):
"""Test uploading a file"""
config._config["byom"]["enabled"] = True
@@ -53,6 +55,7 @@ def test_path_traversal(client):
@pytest.mark.slow
+@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled")
def test_conflict(client):
"""Test that it is not possible to create two engins with the same name"""
config._config["byom"]["enabled"] = True
diff --git a/tests/unit/api/http/config_test.py b/tests/unit/api/http/config_test.py
new file mode 100644
index 00000000000..672d7d31cd0
--- /dev/null
+++ b/tests/unit/api/http/config_test.py
@@ -0,0 +1,9 @@
+def test_get_config_returns_knowledge_bases_storage(client):
+ response = client.get("/api/config/")
+
+ assert response.status_code == 200
+ payload = response.get_json()
+ assert "knowledge_bases" in payload
+ assert "storage" in payload["knowledge_bases"]
+ assert "available_vector_engines" in payload["knowledge_bases"]
+ assert "pgvector_enabled" in payload["knowledge_bases"]
diff --git a/tests/unit/api/http/files_test.py b/tests/unit/api/http/files_test.py
index 7fd56c878ac..c2eecce5e81 100644
--- a/tests/unit/api/http/files_test.py
+++ b/tests/unit/api/http/files_test.py
@@ -1,5 +1,6 @@
import io
import os.path
+import os
from http import HTTPStatus
@@ -172,22 +173,27 @@ def test_archive_file_with_extension_upload(client):
assert "File name cannot contain extension." in data["detail"]
-def test_put_file_with_path_in_filename_multipart(client):
- """Test uploading a file with path traversal in the filename via multipart form data"""
- file = io.BytesIO(b"Hello, World!")
+def test_zipfile_traversal(client):
+ """Test uploading a zip archive with path traversal filenames"""
+ import zipfile
+ import io
- data = {
- "file": (file, "../test.txt"),
- "source_type": "file",
- }
+ # Create a zip file in memory with a symlink
+ zip_buffer = io.BytesIO()
+ with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zf:
+ zf.writestr("../../../../etc/passwd", "malicious content")
+ zip_buffer.seek(0)
+ data = {"file": (zip_buffer, "archive.zip")}
response = client.put(
- "/api/files/testfile",
+ "/api/files/archive",
data=data,
content_type="multipart/form-data",
follow_redirects=True,
)
# Should fail due to path validation (ValueError is raised)
assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
+ data = response.get_json()
+ assert "Attempted Path Traversal in Zip File" in data["detail"]
def test_put_file_with_invalid_parameters_multipart(client):
diff --git a/tests/unit/api/http/handlers_test.py b/tests/unit/api/http/handlers_test.py
new file mode 100644
index 00000000000..0b586bcd7e4
--- /dev/null
+++ b/tests/unit/api/http/handlers_test.py
@@ -0,0 +1,153 @@
+import tempfile
+from http import HTTPStatus
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_icon_builtin_handler(client):
+ """
+ A built-in handler with a registered icon and a valid local path must
+ return the icon file (HTTP 200).
+ """
+ with tempfile.TemporaryDirectory() as tmp:
+ icon_file = Path(tmp) / "icon.svg"
+ icon_file.write_text("")
+
+ meta = {
+ "path": Path(tmp),
+ "icon": {"name": "icon.svg", "type": "svg", "data": ""},
+ "import": {"success": True, "error_message": None},
+ }
+
+ with patch.object(
+ client.application.integration_controller,
+ "get_handlers_metadata",
+ return_value={"mysql": meta},
+ ):
+ response = client.get("/api/handlers/mysql/icon", follow_redirects=True)
+
+ status_code = response.status_code
+ response.close()
+
+ assert status_code == HTTPStatus.OK
+
+
+def test_icon_community_stub_no_path(client):
+ """
+ An unfetched community handler stub (path=None, no 'icon' key) must
+ return HTTP 404 cleanly β no exception should propagate.
+ """
+ meta = {
+ "path": None,
+ "import": {
+ "success": None,
+ "error_message": None,
+ "folder": "github_handler",
+ },
+ "name": "github",
+ "support_level": "community",
+ }
+
+ with patch.object(
+ client.application.integration_controller,
+ "get_handlers_metadata",
+ return_value={"github": meta},
+ ):
+ response = client.get("/api/handlers/github/icon", follow_redirects=True)
+
+ assert response.status_code == HTTPStatus.NOT_FOUND
+
+
+def test_icon_unknown_handler(client):
+ """
+ A request for an icon of an unknown handler must return HTTP 404.
+ """
+ with patch.object(
+ client.application.integration_controller,
+ "get_handlers_metadata",
+ return_value={},
+ ):
+ response = client.get("/api/handlers/does_not_exist/icon", follow_redirects=True)
+
+ assert response.status_code == HTTPStatus.NOT_FOUND
+
+
+def test_handler_info_returns_404_when_not_found(client):
+ """
+ GET /handlers/ must return HTTP 404 when get_handler_meta() returns
+ None (unknown handler or failed fetch) instead of crashing with TypeError.
+ """
+ with patch.object(
+ client.application.integration_controller,
+ "get_handler_meta",
+ return_value=None,
+ ):
+ response = client.get("/api/handlers/nonexistent", follow_redirects=True)
+
+ assert response.status_code == HTTPStatus.NOT_FOUND
+
+
+def test_handler_info_returns_200_without_icon_key(client):
+ """
+ GET /handlers/ must not raise KeyError when the handler metadata has
+ no 'icon' key (community stub or handler without an icon).
+ """
+ meta = {
+ "path": None,
+ "import": {"success": None, "error_message": None, "folder": "github_handler"},
+ "name": "github",
+ "title": "GitHub",
+ "description": "GitHub handler",
+ "permanent": False,
+ "connection_args": None,
+ "class_type": None,
+ "type": "data",
+ "support_level": "community",
+ }
+
+ with patch.object(
+ client.application.integration_controller,
+ "get_handler_meta",
+ return_value=meta,
+ ):
+ response = client.get("/api/handlers/github", follow_redirects=True)
+
+ assert response.status_code == HTTPStatus.OK
+ body = response.get_json()
+ assert body["name"] == "github"
+ assert "path" not in body
+ assert "icon" not in body
+
+
+def test_handlers_list_skips_none_meta(client):
+ """
+ The listing endpoint must not crash when get_handlers_import_status()
+ returns None for a handler (e.g. an unfetched community handler that
+ failed to load). The None entry is silently skipped and the remaining
+ handlers are returned normally.
+ """
+ mysql_meta = {
+ "path": None,
+ "import": {"success": True, "error_message": None, "folder": "mysql_handler"},
+ "name": "mysql",
+ "type": "data",
+ "title": "MySQL",
+ "description": "MySQL handler",
+ "permanent": False,
+ "connection_args": None,
+ "class_type": "sql",
+ "support_level": "community",
+ "icon": None,
+ }
+
+ with patch.object(
+ client.application.integration_controller,
+ "get_handlers_import_status",
+ return_value={"broken_community": None, "mysql": mysql_meta},
+ ):
+ response = client.get("/api/handlers/", follow_redirects=True)
+
+ assert response.status_code == HTTPStatus.OK
+ names = [h["name"] for h in response.get_json()]
+ assert "mysql" in names
+ assert "broken_community" not in names
diff --git a/tests/unit/api/http/knowledge_bases_test.py b/tests/unit/api/http/knowledge_bases_test.py
index b4bd4f3488d..4ccfccfe7a7 100644
--- a/tests/unit/api/http/knowledge_bases_test.py
+++ b/tests/unit/api/http/knowledge_bases_test.py
@@ -3,17 +3,17 @@
from unittest.mock import patch
-@patch("mindsdb.integrations.handlers.chromadb_handler.chromadb_handler.ChromaDBHandler")
-@patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
-def test_update_kb_embeddings(mock_embedding, chroma, client):
+@patch("mindsdb.integrations.handlers.duckdb_faiss_handler.duckdb_faiss_handler.DuckDBFaissHandler")
+@patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+def test_update_kb_embeddings(mock_embedding, handler, client):
# for test of embeddings
- mock_embedding().data = [{"embedding": [0.1, 0.2]}]
+ mock_embedding().embeddings.return_value = [{"embedding": [0.1, 0.2]}]
integration_data = {
"database": {
"name": "kb_vector_db",
- "engine": "chromadb",
- "parameters": {"persist_directory": "kb_vector_db"},
+ "engine": "duckdb_faiss",
+ "parameters": {},
}
}
response = client.post("/api/databases", json=integration_data, follow_redirects=True)
@@ -54,5 +54,5 @@ def test_update_kb_embeddings(mock_embedding, chroma, client):
)
assert update_response.status_code == HTTPStatus.OK
- kwargs = mock_embedding.call_args_list[0][1]
+ kwargs = mock_embedding.call_args_list[0][0][0]
assert kwargs["api_key"] == "embed-key-2"
diff --git a/tests/unit/api/http/test_integrations_passthrough.py b/tests/unit/api/http/test_integrations_passthrough.py
new file mode 100644
index 00000000000..8785926fb78
--- /dev/null
+++ b/tests/unit/api/http/test_integrations_passthrough.py
@@ -0,0 +1,198 @@
+"""HTTP-layer tests for the /api/integrations//passthrough routes.
+
+Exercises the Flask blueprint in isolation: the session's integration
+controller is mocked to return handlers that satisfy
+PassthroughProtocol, so these tests do not touch real handlers and do
+not make network calls.
+"""
+
+from http import HTTPStatus
+from unittest.mock import MagicMock, patch
+
+from mindsdb.integrations.libs.passthrough import PassthroughMixin
+from mindsdb.integrations.libs.passthrough_types import PassthroughResponse
+
+
+class _StubPassthroughHandler(PassthroughMixin):
+ """Handler double: the HTTP layer checks the PassthroughProtocol, then
+ calls `api_passthrough`. We bypass all mixin internals by overriding
+ `api_passthrough` directly so the endpoint test does not depend on
+ connection_data, base_url resolution, or the requests library."""
+
+ def __init__(self, response: PassthroughResponse):
+ self._response = response
+ self.calls: list = []
+
+ def api_passthrough(self, req): # type: ignore[override]
+ self.calls.append(req)
+ return self._response
+
+ def test_passthrough(self):
+ return {"ok": True, "status_code": self._response.status_code}
+
+
+def _patch_handler(handler):
+ """Patch FakeMysqlProxy so the endpoint resolves `name` to `handler`."""
+ proxy = MagicMock()
+ proxy.session.integration_controller.get_data_handler.return_value = handler
+ return patch(
+ "mindsdb.api.http.namespaces.integrations.FakeMysqlProxy",
+ return_value=proxy,
+ )
+
+
+def test_passthrough_happy_path_returns_200_and_serialized_body(client):
+ handler = _StubPassthroughHandler(
+ PassthroughResponse(
+ status_code=200,
+ headers={"X-Safe": "1"},
+ body={"hello": "world"},
+ content_type="application/json",
+ )
+ )
+
+ with _patch_handler(handler):
+ response = client.post(
+ "/api/integrations/any_ds/passthrough",
+ json={"method": "GET", "path": "/me"},
+ )
+
+ assert response.status_code == HTTPStatus.OK
+ payload = response.get_json()
+ assert payload == {
+ "status_code": 200,
+ "headers": {"X-Safe": "1"},
+ "body": {"hello": "world"},
+ "content_type": "application/json",
+ }
+ # Request actually reached the mixin with the parsed PassthroughRequest.
+ assert len(handler.calls) == 1
+ assert handler.calls[0].method == "GET"
+ assert handler.calls[0].path == "/me"
+
+
+def test_passthrough_returns_501_when_handler_does_not_support_mixin(client):
+ # A bare object does not satisfy PassthroughProtocol, so the endpoint
+ # should surface passthrough_not_supported (501) instead of a 500.
+ with _patch_handler(object()):
+ response = client.post(
+ "/api/integrations/mysql/passthrough",
+ json={"method": "GET", "path": "/anything"},
+ )
+
+ assert response.status_code == HTTPStatus.NOT_IMPLEMENTED
+ payload = response.get_json()
+ assert payload["error_code"] == "passthrough_not_supported"
+ assert "mysql" in payload["message"]
+
+
+def test_passthrough_returns_400_on_invalid_method(client):
+ handler = _StubPassthroughHandler(PassthroughResponse(status_code=200, headers={}, body=None, content_type=None))
+
+ with _patch_handler(handler):
+ response = client.post(
+ "/api/integrations/any_ds/passthrough",
+ json={"method": "TRACE", "path": "/me"},
+ )
+
+ assert response.status_code == HTTPStatus.BAD_REQUEST
+ payload = response.get_json()
+ assert payload["error_code"] == "invalid_request"
+ # The handler must not have been invoked when validation fails up front.
+ assert handler.calls == []
+
+
+def _patch_handler_modules(modules: dict):
+ return patch(
+ "mindsdb.api.http.namespaces.integrations.integration_controller.handler_modules",
+ modules,
+ create=True,
+ )
+
+
+def test_capabilities_returns_handlers_dict_and_legacy_list(client):
+ # Two opted-in handlers covering both auth modes, one non-opt-in, and
+ # one broken module that lacks a Handler attribute. auth_modes is
+ # surfaced from the handler's declarative `_auth_mode` class attr β
+ # not inferred from header format.
+ class _BearerHandler(PassthroughMixin):
+ pass # inherits _auth_mode = "bearer"
+
+ class _CustomHeaderHandler(PassthroughMixin):
+ _auth_header_name = "X-Shopify-Access-Token"
+ _auth_header_format = "{token}"
+ _auth_mode = "custom"
+
+ class _NotOptedIn:
+ pass
+
+ bearer_mod = MagicMock()
+ bearer_mod.Handler = _BearerHandler
+ custom_mod = MagicMock()
+ custom_mod.Handler = _CustomHeaderHandler
+ plain_mod = MagicMock()
+ plain_mod.Handler = _NotOptedIn
+ no_handler_mod = MagicMock(spec=[])
+
+ fake_modules = {
+ "hubspot": bearer_mod,
+ "shopify": custom_mod,
+ "mysql": plain_mod,
+ "broken": no_handler_mod,
+ }
+
+ with _patch_handler_modules(fake_modules):
+ response = client.get("/api/integrations/capabilities")
+
+ assert response.status_code == HTTPStatus.OK
+ payload = response.get_json()
+
+ # New structured shape: every opted-in handler appears with auth_modes
+ # and operations metadata.
+ assert payload["handlers"] == {
+ "hubspot": {"auth_modes": ["bearer"], "operations": ["passthrough"]},
+ "shopify": {"auth_modes": ["custom"], "operations": ["passthrough"]},
+ }
+
+ # Legacy flat list: only bearer-auth handlers (Minds migration compat).
+ assert payload["bearer_passthrough"] == ["hubspot"]
+
+
+def test_capabilities_auth_mode_is_declarative_not_format_derived(client):
+ # Handler keeps the default "Bearer {token}" header format but flags
+ # itself as oauth_refresh. The old format-matching heuristic would
+ # have bucketed this as "bearer"; the new declarative path returns
+ # the explicit mode and correctly omits it from the legacy list.
+ class _OAuthRefreshHandler(PassthroughMixin):
+ _auth_mode = "oauth_refresh"
+ # _auth_header_format intentionally left as the default.
+
+ oauth_mod = MagicMock()
+ oauth_mod.Handler = _OAuthRefreshHandler
+
+ with _patch_handler_modules({"hubspot_oauth": oauth_mod}):
+ response = client.get("/api/integrations/capabilities")
+
+ assert response.status_code == HTTPStatus.OK
+ payload = response.get_json()
+ assert payload["handlers"] == {
+ "hubspot_oauth": {"auth_modes": ["oauth_refresh"], "operations": ["passthrough"]},
+ }
+ # oauth_refresh is NOT surfaced in the legacy bearer-only list even
+ # though the underlying header format is still "Bearer {token}".
+ assert payload["bearer_passthrough"] == []
+
+
+def test_capabilities_empty_when_no_handlers_opted_in(client):
+ class _NotOptedIn:
+ pass
+
+ plain_mod = MagicMock()
+ plain_mod.Handler = _NotOptedIn
+
+ with _patch_handler_modules({"mysql": plain_mod}):
+ response = client.get("/api/integrations/capabilities")
+
+ assert response.status_code == HTTPStatus.OK
+ payload = response.get_json()
+ assert payload == {"handlers": {}, "bearer_passthrough": []}
diff --git a/tests/unit/api/http/test_sql_query.py b/tests/unit/api/http/test_sql_query.py
new file mode 100644
index 00000000000..b40096ecdcc
--- /dev/null
+++ b/tests/unit/api/http/test_sql_query.py
@@ -0,0 +1,145 @@
+"""
+Tests for POST /sql/query endpoint with different response_format values:
+1. DEFAULT (None) - returns JSON response
+2. SSE ("sse") - returns Server-Sent Events stream
+3. JSONLINES ("jsonlines") - returns JSON Lines stream
+"""
+
+import json
+from http import HTTPStatus
+from unittest.mock import patch, MagicMock
+
+import pandas as pd
+
+from mindsdb.api.executor.data_types.sql_answer import SQLAnswer
+from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
+from mindsdb.api.executor.sql_query.result_set import ResultSet
+from mindsdb.utilities.types.column import Column
+
+
+def create_mock_sql_answer():
+ """Create a mock SQLAnswer with table data for testing."""
+ columns = [
+ Column(name="id", alias="id"),
+ Column(name="name", alias="name"),
+ Column(name="value", alias="value"),
+ ]
+
+ df = pd.DataFrame(
+ [
+ [1, "test1", 100],
+ [2, "test2", 200],
+ [3, "test3", 300],
+ ]
+ )
+
+ result_set = ResultSet(columns=columns, df=df)
+
+ return SQLAnswer(
+ resp_type=RESPONSE_TYPE.TABLE,
+ result_set=result_set,
+ )
+
+
+def check_response(response_data: dict):
+ # Check response structure for default format
+ assert response_data["type"] == "table"
+ assert "data" in response_data
+ assert "column_names" in response_data
+ assert "context" in response_data
+
+ # Check data content
+ assert response_data["column_names"] == ["id", "name", "value"]
+ assert len(response_data["data"]) == 3
+ assert response_data["data"][0] == [1, "test1", 100]
+ assert response_data["data"][1] == [2, "test2", 200]
+ assert response_data["data"][2] == [3, "test3", 300]
+
+
+def setup_mock_proxy(mock_proxy_class):
+ """Configure mock proxy with default behavior."""
+ mock_proxy = MagicMock()
+ mock_proxy_class.return_value = mock_proxy
+ mock_proxy.process_query.return_value = create_mock_sql_answer()
+ mock_proxy.get_context.return_value = {}
+ return mock_proxy
+
+
+class TestSQLQueryResponseFormat:
+ @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy")
+ def test_query_default_format(self, mock_proxy_class, client):
+ """Test POST /sql/query with default response format (no response_format parameter)."""
+ setup_mock_proxy(mock_proxy_class)
+
+ response = client.post(
+ "/api/sql/query",
+ json={"query": "SELECT * FROM table"},
+ )
+
+ assert response.status_code == HTTPStatus.OK
+ response_data = response.json
+ check_response(response_data)
+
+ @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy")
+ def test_query_sse_format(self, mock_proxy_class, client):
+ """Test POST /sql/query with SSE response format (response_format="sse")."""
+ setup_mock_proxy(mock_proxy_class)
+
+ response = client.post(
+ "/api/sql/query",
+ json={
+ "query": "SELECT * FROM table",
+ "response_format": "sse",
+ },
+ )
+
+ assert response.status_code == HTTPStatus.OK
+ assert "text/event-stream" in response.content_type
+
+ # Parse SSE response and build unified response dict
+ response_text = response.get_data(as_text=True)
+ lines = [line.replace("data: ", "") for line in response_text.split("\n") if line.startswith("data: ")]
+
+ assert len(lines) > 1
+ header = json.loads(lines[0])
+ data_rows = json.loads(lines[1])
+
+ response_data = {
+ "type": header["type"],
+ "column_names": header["column_names"],
+ "data": data_rows,
+ "context": {},
+ }
+ check_response(response_data)
+
+ @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy")
+ def test_query_jsonlines_format(self, mock_proxy_class, client):
+ """Test POST /sql/query with JSONLINES response format (response_format="jsonlines")."""
+ setup_mock_proxy(mock_proxy_class)
+
+ response = client.post(
+ "/api/sql/query",
+ json={
+ "query": "SELECT * FROM table",
+ "response_format": "jsonlines",
+ },
+ )
+
+ assert response.status_code == HTTPStatus.OK
+ assert response.content_type == "application/jsonlines"
+
+ # Parse JSONLINES response and build unified response dict
+ response_text = response.get_data(as_text=True)
+ lines = [line for line in response_text.split("\n") if line.strip()]
+
+ assert len(lines) > 1
+ header = json.loads(lines[0])
+ data_rows = json.loads(lines[1])
+
+ response_data = {
+ "type": header["type"],
+ "column_names": header["column_names"],
+ "data": data_rows,
+ "context": {},
+ }
+ check_response(response_data)
diff --git a/mindsdb/integrations/handlers/airtable_handler/tests/__init__.py b/tests/unit/api/mcp/__init__.py
similarity index 100%
rename from mindsdb/integrations/handlers/airtable_handler/tests/__init__.py
rename to tests/unit/api/mcp/__init__.py
diff --git a/tests/unit/api/mcp/test_completions.py b/tests/unit/api/mcp/test_completions.py
new file mode 100644
index 00000000000..ab03ecff049
--- /dev/null
+++ b/tests/unit/api/mcp/test_completions.py
@@ -0,0 +1,135 @@
+"""
+Unit tests for the MCP completion handler (mindsdb/api/mcp/completions.py).
+"""
+
+import asyncio
+from unittest.mock import MagicMock, patch
+
+from mcp.types import PromptReference, ResourceTemplateReference
+from mcp.shared.memory import create_connected_server_and_client_session
+
+from mindsdb.api.mcp.mcp_instance import mcp
+
+# ---------------------------------------------------------------------------
+# Patch targets
+# ---------------------------------------------------------------------------
+
+_PATCH_GET_DB_NAMES = "mindsdb.api.mcp.completions._get_database_names"
+_PATCH_CTX = "mindsdb.api.mcp.completions.ctx"
+_PATCH_SESSION = "mindsdb.api.mcp.completions.SessionController"
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _run(coro):
+ return asyncio.run(coro)
+
+
+def _complete(ref, argument: dict, context_arguments: dict | None = None) -> list[str]:
+ """Run a completion request and return the list of completion values."""
+
+ async def _inner():
+ async with create_connected_server_and_client_session(mcp) as client:
+ result = await client.complete(
+ ref=ref,
+ argument=argument,
+ context_arguments=context_arguments,
+ )
+ return result.completion.values
+
+ return _run(_inner())
+
+
+_PROMPT_REF = PromptReference(type="ref/prompt", name="sample_table")
+_RESOURCE_REF = ResourceTemplateReference(
+ type="ref/resource",
+ uri="schema://databases/{database_name}/tables",
+)
+
+
+def _make_table_mock(name: str) -> MagicMock:
+ t = MagicMock()
+ t.TABLE_NAME = name
+ return t
+
+
+class TestDatabaseNameCompletion:
+ def test_returns_matching_databases(self):
+ with patch(_PATCH_GET_DB_NAMES, return_value=["pg_prod", "pg_staging", "mysql_db"]):
+ values = _complete(_PROMPT_REF, {"name": "database_name", "value": "pg"})
+
+ assert values == ["pg_prod", "pg_staging"]
+
+ def test_prefix_filters_case_sensitively(self):
+ with patch(_PATCH_GET_DB_NAMES, return_value=["Postgres", "postgres"]):
+ values = _complete(_PROMPT_REF, {"name": "database_name", "value": "post"})
+
+ assert values == ["postgres"]
+
+ def test_empty_prefix_returns_all_databases(self):
+ db_names = ["pg", "mysql", "mongo"]
+ with patch(_PATCH_GET_DB_NAMES, return_value=db_names):
+ values = _complete(_PROMPT_REF, {"name": "database_name", "value": ""})
+
+ assert values == db_names
+
+ def test_no_match_returns_empty_list(self):
+ with patch(_PATCH_GET_DB_NAMES, return_value=["pg", "mysql"]):
+ values = _complete(_PROMPT_REF, {"name": "database_name", "value": "oracle"})
+
+ assert values == []
+
+
+class TestTableNameCompletion:
+ def test_returns_matching_tables(self):
+ with patch(_PATCH_SESSION) as SC:
+ SC.return_value.datahub.get.return_value.get_tables.return_value = [
+ _make_table_mock("orders"),
+ _make_table_mock("order_items"),
+ _make_table_mock("users"),
+ ]
+
+ # match 2/3
+ values = _complete(
+ _RESOURCE_REF,
+ {"name": "table_name", "value": "ord"},
+ context_arguments={"database_name": "pg"},
+ )
+
+ SC.return_value.datahub.get.assert_called_with("pg")
+ assert values == ["orders", "order_items"]
+
+ # match all
+ values = _complete(
+ _RESOURCE_REF,
+ {"name": "table_name", "value": ""},
+ context_arguments={"database_name": "pg"},
+ )
+
+ assert values == ["orders", "order_items", "users"]
+
+ # match 0
+ values = _complete(
+ _RESOURCE_REF,
+ {"name": "table_name", "value": "qwerty"},
+ context_arguments={"database_name": "pg"},
+ )
+
+ assert values == []
+
+ def test_missing_database_name_context_returns_empty(self):
+ """When database_name is not in context_arguments, return empty."""
+ with patch(_PATCH_SESSION):
+ values = _complete(
+ _RESOURCE_REF,
+ {"name": "table_name", "value": "ord"},
+ context_arguments=None,
+ )
+
+ assert values == []
+
+ def test_unknown_argument_name_returns_empty(self):
+ values = _complete(_PROMPT_REF, {"name": "unknown_param", "value": "foo"})
+ assert values == []
diff --git a/tests/unit/api/mcp/test_prompts.py b/tests/unit/api/mcp/test_prompts.py
new file mode 100644
index 00000000000..2e7ea7b5d60
--- /dev/null
+++ b/tests/unit/api/mcp/test_prompts.py
@@ -0,0 +1,45 @@
+"""
+Unit tests for MCP prompts (mindsdb/api/mcp/prompts/*).
+
+mcp.get_prompt() is async; tests run it with asyncio.run().
+"""
+
+import json
+import asyncio
+
+from mindsdb.api.mcp.mcp_instance import mcp
+
+
+def _run(coro):
+ return asyncio.run(coro)
+
+
+def _get_sample_table_prompt(database_name: str, table_name: str):
+ """Call sample_table prompt and return the GetPromptResult."""
+ return _run(mcp.get_prompt("sample_table", {"database_name": database_name, "table_name": table_name}))
+
+
+def _get_first_message_text(prompt: object) -> str:
+ """Return the text content of the first message."""
+ raw = prompt.messages[0].content.text
+ # FastMCP serialises the TextContent to JSON inside the PromptMessage
+ return json.loads(raw)["text"]
+
+
+class TestPrompt:
+ def test_sample_table_exists(self):
+ # sample_table exists and has description
+ prompts = _run(mcp.list_prompts())
+ prompt = next(p for p in prompts if p.name == "sample_table")
+ assert prompt.description # non-empty
+
+ def test_sample_table_content(self):
+ # test content of the prompt
+ result = _get_sample_table_prompt("MyDB", "mytable")
+ assert len(result.messages) == 1
+ assert result.messages[0].role == "user"
+ assert result.messages[0].content.type == "text"
+
+ text = _get_first_message_text(result)
+ assert "`MyDB`.`mytable`" in text
+ assert "limit 5" in text.lower()
diff --git a/tests/unit/api/mcp/test_query_tool.py b/tests/unit/api/mcp/test_query_tool.py
new file mode 100644
index 00000000000..bd4d0bcd430
--- /dev/null
+++ b/tests/unit/api/mcp/test_query_tool.py
@@ -0,0 +1,129 @@
+"""
+Unit tests for the MCP tools (mindsdb/api/mcp/tools/*).
+"""
+
+import asyncio
+import json
+from unittest.mock import patch
+
+
+_PATCH_PROXY = "mindsdb.api.mcp.tools.query.FakeMysqlProxy"
+
+
+def _run(coro):
+ """Run an async coroutine synchronously."""
+ return asyncio.run(coro)
+
+
+def _call_tool(sql: str, context=None):
+ """Call the MCP query tool synchronously and return parsed JSON."""
+ args = {"query": sql}
+ if context is not None:
+ args["context"] = context
+
+ from mindsdb.api.mcp.mcp_instance import mcp
+
+ content, _ = _run(mcp.call_tool("query", args))
+ return json.loads(content[0].text)
+
+
+def _make_proxy_ok(mock_proxy_cls, affected_rows=0):
+ """Configure mock proxy to return an OK response."""
+ mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = {
+ "type": "ok",
+ "affected_rows": affected_rows,
+ }
+ return mock_proxy_cls.return_value
+
+
+def _make_proxy_table(mock_proxy_cls, column_names, data):
+ """Configure mock proxy to return a table response."""
+ mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = {
+ "type": "table",
+ "column_names": column_names,
+ "data": data,
+ }
+ return mock_proxy_cls.return_value
+
+
+def _make_proxy_error(mock_proxy_cls, error_message, error_code=0):
+ """Configure mock proxy to return an error response."""
+ mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = {
+ "type": "error",
+ "error_code": error_code,
+ "error_message": error_message,
+ }
+ return mock_proxy_cls.return_value
+
+
+class TestResponseTypes:
+ def test_select_returns_table_type(self):
+ expected_data = [[1, "alice"], [2, "bob"]]
+ columns_list = ["id", "name"]
+ with patch(_PATCH_PROXY) as MockProxy:
+ _make_proxy_table(MockProxy, columns_list, expected_data)
+ result = _call_tool("SELECT * FROM mydb.users")
+
+ assert result["type"] == "table"
+ assert result["column_names"] == columns_list
+ assert result["data"] == expected_data
+
+ def test_select_empty_result(self):
+ columns_list = ["id", "name"]
+ with patch(_PATCH_PROXY) as MockProxy:
+ _make_proxy_table(MockProxy, columns_list, [])
+ result = _call_tool("SELECT * FROM mydb.users WHERE 1=0")
+
+ assert result["type"] == "table"
+ assert result["column_names"] == columns_list
+ assert result["data"] == []
+
+ def test_insert_returns_ok_type(self):
+ with patch(_PATCH_PROXY) as MockProxy:
+ _make_proxy_ok(MockProxy, affected_rows=1)
+ result = _call_tool("INSERT INTO mydb.t (id) VALUES (1)")
+
+ assert result["type"] == "ok"
+ assert result["affected_rows"] == 1
+
+ def test_proxy_error_response_returns_error_type(self):
+ error_message = "Table 'x' doesn't exist"
+ with patch(_PATCH_PROXY) as MockProxy:
+ _make_proxy_error(MockProxy, error_message, error_code=123)
+ result = _call_tool("SELECT * FROM mydb.x")
+
+ assert result["type"] == "error"
+ assert result["error_message"] == error_message
+ assert result["error_code"] == 123
+
+ def test_exception_in_process_query_returns_error_type(self):
+ error_message = "connection refused"
+ with patch(_PATCH_PROXY) as MockProxy:
+ MockProxy.return_value.process_query.side_effect = Exception(error_message)
+ result = _call_tool("SELECT 1")
+
+ assert result["type"] == "error"
+ assert result["error_message"] == error_message
+
+
+class TestContextParameter:
+ def test_context_is_passed_to_set_context(self):
+ with patch(_PATCH_PROXY) as MockProxy:
+ proxy = _make_proxy_ok(MockProxy)
+ _call_tool("SELECT 1", context={"db": "my_postgres"})
+
+ proxy.set_context.assert_called_once_with({"db": "my_postgres"})
+
+ def test_omitted_context_defaults_to_empty_dict(self):
+ with patch(_PATCH_PROXY) as MockProxy:
+ proxy = _make_proxy_ok(MockProxy)
+ _call_tool("SELECT 1") # no context argument
+
+ proxy.set_context.assert_called_once_with({})
+
+ def test_explicit_none_context_defaults_to_empty_dict(self):
+ with patch(_PATCH_PROXY) as MockProxy:
+ proxy = _make_proxy_ok(MockProxy)
+ _call_tool("SELECT 1", context=None)
+
+ proxy.set_context.assert_called_once_with({})
diff --git a/tests/unit/api/mcp/test_resources.py b/tests/unit/api/mcp/test_resources.py
new file mode 100644
index 00000000000..6bac3891875
--- /dev/null
+++ b/tests/unit/api/mcp/test_resources.py
@@ -0,0 +1,177 @@
+"""
+Unit tests for MCP resources (mindsdb/api/mcp/resources/*)
+"""
+
+import asyncio
+import json
+from unittest.mock import MagicMock, patch
+
+import pandas as pd
+
+from mindsdb.integrations.libs.response import TableResponse as HandlerTableResponse
+from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
+
+
+_PATCH_SESSION = "mindsdb.api.mcp.resources.schema.SessionController"
+_PATCH_TABLE_RESPONSE = "mindsdb.api.mcp.resources.schema.TableResponse"
+_PATCH_RESPONSE_TYPE = "mindsdb.api.mcp.resources.schema.RESPONSE_TYPE"
+
+
+def _run(coro):
+ return asyncio.run(coro)
+
+
+def _read(uri: str) -> list:
+ """Read a resource and return parsed JSON payload."""
+ from mindsdb.api.mcp.mcp_instance import mcp
+
+ contents = list(_run(mcp.read_resource(uri)))
+ return json.loads(contents[0].content)
+
+
+def _make_table_mock(name: str, table_type: str = "BASE TABLE", schema: str = "public") -> MagicMock:
+ t = MagicMock()
+ t.TABLE_NAME = name
+ t.TABLE_TYPE = table_type
+ t.TABLE_SCHEMA = schema
+ return t
+
+
+def _make_columns_table_response(rows: list[dict]) -> MagicMock:
+ """Build a mock HandlerTableResponse with COLUMNS_TABLE type."""
+ tr = MagicMock(spec=HandlerTableResponse)
+ tr.type = RESPONSE_TYPE.COLUMNS_TABLE
+ tr.fetchall.return_value = pd.DataFrame(rows)
+ return tr
+
+
+def _make_kb(name, project, metadata_cols=None, content_cols=None, id_col="id"):
+ return {
+ "name": name,
+ "project": project,
+ "metadata_columns": metadata_cols or [],
+ "content_columns": content_cols or ["body"],
+ "id_column": id_col,
+ }
+
+
+class TestListDatabases:
+ def test_returns_only_data_type_databases(self):
+ from mindsdb.api.mcp.mcp_instance import mcp
+
+ with patch(_PATCH_SESSION) as SC:
+ SC.return_value.database_controller.get_list.return_value = [
+ {"name": "pg_prod", "type": "data"},
+ {"name": "mindsdb", "type": "project"},
+ {"name": "mysql_db", "type": "data"},
+ ]
+
+ result = list(_run(mcp.read_resource("schema://databases")))
+
+ assert len(result) == 1
+ assert json.loads(result[0].content) == ["pg_prod", "mysql_db"]
+ assert result[0].mime_type == "application/json"
+
+ def test_filters_out_all_non_data_types(self):
+ with patch(_PATCH_SESSION) as SC:
+ SC.return_value.database_controller.get_list.return_value = [
+ {"name": "mindsdb", "type": "project"},
+ {"name": "files", "type": "files"},
+ ]
+ result = _read("schema://databases")
+
+ assert result == []
+
+
+class TestDbTables:
+ def test_returns_table_names(self):
+ with patch(_PATCH_SESSION) as SC:
+ SC.return_value.datahub.get.return_value.get_tables.return_value = [
+ _make_table_mock("orders"),
+ _make_table_mock("users"),
+ ]
+ result = _read("schema://databases/mydb/tables")
+
+ SC.return_value.datahub.get.assert_called_once_with("mydb")
+
+ names = [t["TABLE_NAME"] for t in result]
+ assert names == ["orders", "users"]
+ assert set(result[0].keys()) == {"TABLE_NAME", "TABLE_TYPE", "TABLE_SCHEMA"}
+
+ def test_returns_table_type_and_schema(self):
+ with patch(_PATCH_SESSION) as SC:
+ SC.return_value.datahub.get.return_value.get_tables.return_value = [
+ _make_table_mock("orders", table_type="VIEW", schema="myschema"),
+ ]
+ result = _read("schema://databases/mydb/tables")
+
+ assert result[0]["TABLE_TYPE"] == "VIEW"
+ assert result[0]["TABLE_SCHEMA"] == "myschema"
+
+ def test_empty_database_returns_empty_list(self):
+ with patch(_PATCH_SESSION) as SC:
+ SC.return_value.datahub.get.return_value.get_tables.return_value = []
+ result = _read("schema://databases/emptydb/tables")
+
+ assert result == []
+
+
+class TestDbTableColumns:
+ def test_returns_column_names_and_types(self):
+ rows = [
+ {"COLUMN_NAME": "id", "MYSQL_DATA_TYPE": "int"},
+ {"COLUMN_NAME": "email", "MYSQL_DATA_TYPE": "varchar(255)"},
+ ]
+ with (
+ patch(_PATCH_SESSION) as SC,
+ patch(_PATCH_TABLE_RESPONSE, HandlerTableResponse),
+ patch(_PATCH_RESPONSE_TYPE, RESPONSE_TYPE),
+ ):
+ SC.return_value.integration_controller.get_data_handler.return_value.get_columns.return_value = (
+ _make_columns_table_response(rows)
+ )
+
+ result = _read("schema://databases/mydb/tables/orders/columns")
+ SC.return_value.integration_controller.get_data_handler.assert_called_once_with("mydb")
+ SC.return_value.integration_controller.get_data_handler.return_value.get_columns.assert_called_once_with(
+ "orders"
+ )
+
+ assert result[0] == {"COLUMN_NAME": "id", "MYSQL_DATA_TYPE": "int"}
+ assert result[1] == {"COLUMN_NAME": "email", "MYSQL_DATA_TYPE": "varchar(255)"}
+
+
+class TestListKnowledgeBases:
+ def test_returns_knowledge_bases_from_all_projects(self):
+ with patch(_PATCH_SESSION) as SC:
+ SC.return_value.datahub.get_projects_names.return_value = ["mindsdb", "my_project"]
+ SC.return_value.kb_controller.list.side_effect = [
+ [_make_kb("kb1", "mindsdb")],
+ [_make_kb("kb2", "my_project")],
+ ]
+ result = _read("schema://knowledge_bases")
+
+ assert len(result) == 2
+ assert result[0]["name"] == "kb1"
+ assert result[1]["name"] == "kb2"
+
+ def test_returns_correct_kb_fields(self):
+ kb = _make_kb(
+ "docs_kb",
+ "mindsdb",
+ metadata_cols=["source", "date"],
+ content_cols=["body"],
+ id_col="doc_id",
+ )
+ with patch(_PATCH_SESSION) as SC:
+ SC.return_value.datahub.get_projects_names.return_value = ["mindsdb"]
+ SC.return_value.kb_controller.list.return_value = [kb]
+ result = _read("schema://knowledge_bases")
+
+ assert result[0] == {
+ "name": "docs_kb",
+ "project": "mindsdb",
+ "metadata_columns": ["source", "date"],
+ "content_columns": ["body"],
+ "id_column": "doc_id",
+ }
diff --git a/tests/unit/executor/test_agent.py b/tests/unit/executor/test_agent.py
index 88306a748f9..a41d36f0a6c 100644
--- a/tests/unit/executor/test_agent.py
+++ b/tests/unit/executor/test_agent.py
@@ -1,15 +1,15 @@
-import time
import os
import json
from unittest.mock import patch, AsyncMock
+from sqlalchemy.orm.attributes import flag_modified
import pandas as pd
import pytest
import sys
from openai.types.chat import ChatCompletion
from tests.unit.executor_test_base import BaseExecutorDummyML
-from tests.unit.executor.test_knowledge_base import set_litellm_embedding
+from tests.unit.executor.test_knowledge_base import set_embedding
def action_response(type="final_query", sql="", text=""):
@@ -18,16 +18,19 @@ def action_response(type="final_query", sql="", text=""):
return json.dumps({"sql_query": sql, "type": type, "text": text, "short_description": "a tool"})
-def set_openai_completion(mock_openai, llm_response):
+def set_openai_completion(mock_openai, llm_response, add_planning=True):
if isinstance(llm_response, str):
llm_responses = [
action_response(sql=f"select '{llm_response}' as answer"),
]
+ elif not isinstance(llm_response, list):
+ llm_responses = [llm_response]
else:
llm_responses = llm_response
- # always add plan response
- llm_responses.insert(0, '{"plan":"my plan is ...", "estimated_steps":3}')
+ if add_planning:
+ # add plan response
+ llm_responses.insert(0, '{"plan":"my plan is ...", "estimated_steps":3}')
mock_openai.agent_calls = []
calls = []
@@ -104,84 +107,10 @@ def setup_method(self):
config["knowledge_bases"]["disable_autobatch"] = True
- @pytest.mark.slow
- def unused_test_mindsdb_provider(self):
- # pydantic agent doesn't support using mindsdb model
- from mindsdb.api.executor.exceptions import ExecutorException
-
- agent_response = "how can I help you"
- # model
- self.run_sql(
- f"""
- CREATE model base_model
- PREDICT output
- using
- column='question',
- output='{agent_response}',
- engine='dummy_ml',
- join_learn_process=true
- """
- )
-
- self.run_sql("CREATE ML_ENGINE langchain FROM langchain")
-
- agent_params = """
- USING
- provider='mindsdb',
- model = "base_model", -- <
- prompt_template="Answer the user input in a helpful way"
- """
- self.run_sql(f"""
- CREATE AGENT my_agent {agent_params}
- """)
- with pytest.raises(ExecutorException):
- self.run_sql(f"""
- CREATE AGENT my_agent {agent_params}
- """)
- self.run_sql(f"""
- CREATE AGENT IF NOT EXISTS my_agent {agent_params}
- """)
-
- ret = self.run_sql("select * from my_agent where question = 'hi'")
-
- assert agent_response in ret.answer[0]
-
- @pytest.mark.skipif(
- sys.platform in ["darwin", "win32"], reason="Mocking doesn't work on Windows or macOS for some reason"
- )
- @patch("openai.OpenAI")
- def unused_test_openai_provider_with_model(self, mock_openai):
- # pydantic agent doesn't support using mindsdb model
-
- agent_response = "how can I assist you today?"
- set_openai_completion(mock_openai, agent_response)
-
- self.run_sql("CREATE ML_ENGINE langchain FROM langchain")
-
- self.run_sql("""
- CREATE MODEL lang_model
- PREDICT answer USING
- engine = "langchain",
- model = "gpt-3.5-turbo",
- openai_api_key='--',
- prompt_template="Answer the user input in a helpful way";
- """)
-
- time.sleep(5)
-
- self.run_sql("""
- CREATE AGENT my_agent
- USING
- model='lang_model'
- """)
- ret = self.run_sql("select * from my_agent where question = 'hi'")
-
- assert agent_response in ret.answer[0]
-
@patch("pydantic_ai.providers.openai.AsyncOpenAI")
def test_openai_provider(self, mock_openai):
- agent_response = "how can I assist you today?"
- set_openai_completion(mock_openai, agent_response)
+ # test response
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT my_agent
@@ -193,6 +122,10 @@ def test_openai_provider(self, mock_openai):
},
prompt_template="Answer the user input in a helpful way"
""")
+
+ agent_response = "how can I assist you today?"
+ set_openai_completion(mock_openai, agent_response)
+
ret = self.run_sql("select * from my_agent where question = 'hi'")
# check model params
@@ -252,10 +185,8 @@ def config_get_side_effect(key, default=None):
mock_config_get.side_effect = config_get_side_effect
- agent_response = "how can I assist you today?"
- set_openai_completion(mock_openai, agent_response)
-
# Create an agent with only provider specified - should use default LLM params
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT default_params_agent
USING
@@ -266,6 +197,8 @@ def config_get_side_effect(key, default=None):
},
prompt_template="Answer the user input in a helpful way"
""")
+ agent_response = "how can I assist you today?"
+ set_openai_completion(mock_openai, agent_response)
# Check that the agent was created with the default parameters
agent_info = self.run_sql("SELECT * FROM information_schema.agents WHERE name = 'default_params_agent'")
@@ -273,7 +206,7 @@ def config_get_side_effect(key, default=None):
# Verify the agent has the user-specified parameters but not default parameters
agent_params = json.loads(agent_info["PARAMS"].iloc[0])
assert agent_params.get("prompt_template") == "Answer the user input in a helpful way"
- assert agent_params["model"]["model_name"] == "gpt-3"
+ assert "gpt-3" in agent_info["MODEL"][0]
# Default parameters should NOT be stored in the database
# They will be applied at runtime via get_agent_llm_params
@@ -291,19 +224,18 @@ def config_get_side_effect(key, default=None):
# --- Test that agent creation works with minimal syntax using default_llm config ---
- mock_openai.reset_mock()
- agent_response = "how can I assist you today?"
- set_openai_completion(mock_openai, agent_response)
-
# Create an agent with minimal syntax - should use all default LLM params
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT minimal_syntax_agent
USING
- data = {
- "tables": ['test.table1', 'test.table2']
- }
+ data = { }
""")
+ mock_openai.reset_mock()
+ agent_response = "how can I assist you today?"
+ set_openai_completion(mock_openai, agent_response)
+
ret = self.run_sql("select * from minimal_syntax_agent where question = 'hi'")
assert agent_response in ret.answer[0]
@@ -314,18 +246,21 @@ def config_get_side_effect(key, default=None):
@pytest.mark.skipif(sys.platform == "darwin", reason="Fails on macOS")
@patch("pydantic_ai.providers.openai.AsyncOpenAI")
def test_agent_stream(self, mock_openai):
- agent_response = "how can I assist you today?"
- set_openai_completion(mock_openai, agent_response)
-
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT my_agent
USING
- provider='openai',
- model = "gpt-3.5-turbo",
- openai_api_key='--',
+ model={
+ "model_name": "gpt-3.5-turbo",
+ "provider": "openai",
+ "api_key": "--"
+ },
prompt_template="Answer the user input in a helpful way"
""")
+ agent_response = "how can I assist you today?"
+ set_openai_completion(mock_openai, agent_response)
+
agents_controller = self.command_executor.session.agents_controller
agent = agents_controller.get_agent("my_agent")
@@ -340,11 +275,7 @@ def test_agent_stream(self, mock_openai):
def _create_kb_storage(self, kb_name):
self.run_sql(f"""
create database db_{kb_name}
- with
- engine='chromadb',
- PARAMETERS = {{
- 'persist_directory': '{kb_name}'
- }}
+ with engine='duckdb_faiss'
""")
return f"db_{kb_name}.default_collection"
@@ -355,10 +286,10 @@ def _drop_kb_storage(self, vector_table_name):
self.run_sql(f"drop database {db_name}")
- @patch("litellm.embedding")
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
@patch("pydantic_ai.providers.openai.AsyncOpenAI")
- def test_agent_retrieval(self, mock_openai, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ def test_agent_retrieval(self, mock_openai, mock_embedding):
+ set_embedding(mock_embedding)
vector_table_name = self._create_kb_storage("kb_review")
self.run_sql(f"""
@@ -374,16 +305,18 @@ def test_agent_retrieval(self, mock_openai, mock_litellm_embedding):
os.environ["OPENAI_API_KEY"] = "--"
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
create agent retrieve_agent
using
- model='gpt-3.5-turbo',
- provider='openai',
+ model={
+ "model_name": "gpt-3.5-turbo",
+ "provider": "openai"
+ },
prompt_template='Answer the user input in a helpful way using tools',
data = {
"knowledge_bases": ["kb_review"]
- },
- mode='retrieval'
+ }
""")
agent_response = "the answer is yes"
@@ -417,10 +350,12 @@ def test_agent_retrieval(self, mock_openai, mock_litellm_embedding):
self._drop_kb_storage(vector_table_name)
# should not be possible to drop demo agent
- def test_drop_demo_agent(self):
+ @patch("pydantic_ai.providers.openai.AsyncOpenAI")
+ def test_drop_demo_agent(self, mock_openai):
"""should not be possible to drop demo agent"""
from mindsdb.api.executor.exceptions import ExecutorException
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT my_demo_agent
USING
@@ -429,37 +364,49 @@ def test_drop_demo_agent(self):
'model_name': "gpt-3.5-turbo",
'api_key': '-key-'
},
- prompt_template="--",
- is_demo=true;
+ prompt_template="--"
""")
+
+ # mark as demo in db
+ agent = self.db.Agents.query.filter_by(name="my_demo_agent").first()
+ agent.params["is_demo"] = True
+ flag_modified(agent, "params")
+ self.db.session.commit()
with pytest.raises(ExecutorException):
- self.run_sql("drop agent my_agent")
+ self.run_sql("drop agent my_demo_agent")
@patch("pydantic_ai.providers.openai.AsyncOpenAI")
def test_agent_default_prompt_template(self, mock_openai):
"""Test that agents work correctly with default prompt templates in different modes"""
- agent_response = "default prompt template response"
- set_openai_completion(mock_openai, agent_response)
# Test non-retrieval mode with no prompt_template (should use default)
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT default_prompt_agent
USING
- provider='openai',
- model = "gpt-3.5-turbo",
- openai_api_key='--'
+ model={
+ "model_name": "gpt-3.5-turbo",
+ "provider": "openai",
+ "api_key": "--"
+ }
""")
+
+ agent_response = "default prompt template response"
+ set_openai_completion(mock_openai, agent_response)
+
ret = self.run_sql("select * from default_prompt_agent where question = 'test question'")
assert agent_response in ret.answer[0]
# Test retrieval mode with no prompt_template (should use default retrieval template)
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT default_retrieval_agent
USING
- provider='openai',
- model = "gpt-3.5-turbo",
- openai_api_key='--',
- mode='retrieval'
+ model={
+ "model_name": "gpt-3.5-turbo",
+ "provider": "openai",
+ "api_key": "--"
+ }
""")
mock_openai.reset_mock()
@@ -468,9 +415,9 @@ def test_agent_default_prompt_template(self, mock_openai):
assert agent_response in ret.answer[0]
@patch("pydantic_ai.providers.openai.AsyncOpenAI")
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_agent_permissions(self, mock_litellm_embedding, mock_openai):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_agent_permissions(self, mock_embedding, mock_openai):
+ set_embedding(mock_embedding)
vector_table_name = self._create_kb_storage("kb_show")
@@ -495,11 +442,14 @@ def test_agent_permissions(self, mock_litellm_embedding, mock_openai):
select id, planet_name content from files.show1
""")
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT my_agent
USING
- model = "gpt-3.5-turbo",
- openai_api_key='--',
+ model={
+ "model_name": "gpt-3.5-turbo",
+ "api_key": '--'
+ },
data = {
"knowledge_bases": ["kb_show*"],
"tables": ["files.show*"]
@@ -585,9 +535,9 @@ def test_agent_permissions(self, mock_litellm_embedding, mock_openai):
self._drop_kb_storage(vector_table_name)
@patch("pydantic_ai.providers.openai.AsyncOpenAI")
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_agent_new_syntax(self, mock_litellm_embedding, mock_openai):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_agent_new_syntax(self, mock_embedding, mock_openai):
+ set_embedding(mock_embedding)
vector_table_name = self._create_kb_storage("kb")
df = get_dataset_planets()
# create 2 files and KBs
@@ -605,6 +555,7 @@ def test_agent_new_syntax(self, mock_litellm_embedding, mock_openai):
select id, planet_name content from files.file{i} where id != 1000
""")
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT my_agent
USING
@@ -657,6 +608,7 @@ def test_agent_new_syntax(self, mock_litellm_embedding, mock_openai):
assert "important user instruction β42" in mock_openai.agent_calls[0]
# --- ALTER AGENT ---
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
ALTER AGENT my_agent
USING
@@ -713,9 +665,9 @@ def test_agent_new_syntax(self, mock_litellm_embedding, mock_openai):
self._drop_kb_storage(vector_table_name)
@patch("pydantic_ai.providers.openai.AsyncOpenAI")
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_agent_accept_wrong_quoting(self, mock_litellm_embedding, mock_openai):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_agent_accept_wrong_quoting(self, mock_embedding, mock_openai):
+ set_embedding(mock_embedding)
vector_table_name = self._create_kb_storage("kb1")
self.run_sql(f"""
create knowledge base kb1
@@ -727,11 +679,14 @@ def test_agent_accept_wrong_quoting(self, mock_litellm_embedding, mock_openai):
self.save_file("file1", df)
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT my_agent
USING
- model = "gpt-3.5-turbo",
- openai_api_key='--',
+ model={
+ "model_name": "gpt-3.5-turbo",
+ "api_key": '--'
+ },
data = {
"knowledge_bases": ["kb1"],
"tables": ["files.file1", "files.file2.*"]
@@ -765,11 +720,14 @@ def test_3_part_table(self, mock_pg, mock_openai):
df = get_dataset_planets()
self.set_handler(mock_pg, name="pg", tables={"planets": df}, schema="public")
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql("""
CREATE AGENT my_agent
USING
- model = "gpt-3.5-turbo",
- openai_api_key='--',
+ model={
+ "model_name": "gpt-3.5-turbo",
+ "api_key": '--'
+ },
data = {
"tables": ["pg.public.*"]
}
@@ -792,19 +750,23 @@ def test_3_part_table(self, mock_pg, mock_openai):
assert "Moon" in mock_openai.agent_calls[3]
assert "Moon" in mock_openai.agent_calls[4]
+ @patch("pydantic_ai.providers.openai.AsyncOpenAI")
@patch("mindsdb.interfaces.agents.pydantic_ai_agent.PydanticAIAgent._get_completion_stream")
- def test_agent_query_param_override(self, mock_get_completion):
+ def test_agent_query_param_override(self, mock_get_completion, mock_openai):
"""
Test that agent parameters can be overridden per-query using the USING clause in SELECT.
"""
mock_get_completion.return_value = [{"type": "data", "content": "-"}]
+ set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False)
self.run_sql(
"""
CREATE AGENT override_agent
USING
- model = 'gpt-4o',
- openai_api_key = 'sk-override',
+ model={
+ "model_name": "gpt-4o",
+ "api_key": 'sk-override'
+ },
prompt_template = 'Answer questions',
timeout = 60;
"""
diff --git a/tests/unit/executor/test_api_handler.py b/tests/unit/executor/test_api_handler.py
index cbc6a8ff862..2e0ee4e9582 100644
--- a/tests/unit/executor/test_api_handler.py
+++ b/tests/unit/executor/test_api_handler.py
@@ -1,15 +1,14 @@
import sys
import types
-from unittest.mock import patch
import datetime as dt
+from unittest.mock import patch
+from dataclasses import dataclass
import pandas as pd
from tests.unit.executor_test_base import BaseExecutorDummyML
-from dataclasses import dataclass
-
# import modules virtually if it is not installed
try:
@@ -26,6 +25,11 @@
class TestApiHandler(BaseExecutorDummyML):
+ def setup_method(self):
+ super().setup_method()
+ self.setup_community_handler("github")
+ self.setup_community_handler("email")
+
@patch("github.Github")
def test_github(self, Github):
"""
@@ -115,7 +119,7 @@ class Issue:
assert args[0] == "feature"
assert kwargs["body"] == "do better"
- @patch("mindsdb.integrations.handlers.email_handler.email_handler.EmailClient")
+ @patch("mindsdb_community_handlers.email_handler.email_handler.EmailClient")
def test_email(self, EmailClient):
"""
Test for APITable
diff --git a/tests/unit/executor/test_base_queires.py b/tests/unit/executor/test_base_queires.py
index 5fbece5c4d3..0a0e3c2ab79 100644
--- a/tests/unit/executor/test_base_queires.py
+++ b/tests/unit/executor/test_base_queires.py
@@ -899,6 +899,40 @@ def test_subselect_1row_aggregate(self, data_handler):
assert len(ret) == 1
assert ret["result"][0] == 1
+ @patch("mindsdb.integrations.handlers.postgres_handler.Handler")
+ def test_cte_join(self, data_handler):
+ self.set_handler(data_handler, name="pg", tables={"stores": get_stores_df()})
+ self.save_file("regions", get_regions_df())
+
+ ret = self.run_sql("""
+ WITH regions AS (
+ SELECT DISTINCT id, name FROM files.regions
+ ),
+ stores AS (
+ SELECT * FROM pg.stores
+ LIMIT 10
+ )
+ SELECT format, region_id FROM pg.stores s
+ JOIN regions r on r.id = s.region_id
+ WHERE s.format IN (SELECT format FROM stores WHERE format='a')
+ LIMIT 100;
+ """)
+ assert len(ret) > 1
+ assert ret["format"][0] == "a"
+
+ @patch("mindsdb.integrations.handlers.postgres_handler.Handler")
+ def test_view_duplicated_cols(self, data_handler):
+ self.set_handler(data_handler, name="pg", tables={"stores": get_stores_df(), "regions": get_regions_df()})
+
+ with pytest.raises(Exception):
+ # `id` exists in both tables, should raise an exception
+ self.run_sql("""
+ create view v1 (
+ select * from pg.stores s
+ join pg.regions r on r.id = s.region_id
+ )
+ """)
+
class TestSet(BaseExecutorTest):
@pytest.mark.parametrize("var", ["var", "@@var", "@@session.var", "session var"])
diff --git a/tests/unit/executor/test_executor.py b/tests/unit/executor/test_executor.py
index c901e7bab55..89a4acdfda5 100644
--- a/tests/unit/executor/test_executor.py
+++ b/tests/unit/executor/test_executor.py
@@ -11,7 +11,9 @@
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb.api.executor.utilities.sql import query_df
+from mindsdb_sql_parser import parse_sql
+
+from mindsdb.api.executor.utilities.sql import query_df, query_dfs
# How to run:
# env PYTHONPATH=./ pytest tests/unit/test_executor.py
@@ -1618,6 +1620,75 @@ def test_query_df_functions(self):
result = query_df(df, query)["result"][0]
assert isinstance(result, dt.time)
+ def test_not_exists_correlated_subquery(self):
+ a = pd.DataFrame(
+ [
+ {"tab_num": 1, "shop": 1},
+ {"tab_num": 1, "shop": 2},
+ {"tab_num": 1, "shop": 3},
+ {"tab_num": 2, "shop": 1},
+ {"tab_num": 2, "shop": 2},
+ {"tab_num": 3, "shop": 1},
+ ]
+ )
+ b = pd.DataFrame([{"shop": 1}, {"shop": 2}, {"shop": 3}])
+
+ result = query_dfs(
+ {"A": a, "B": b},
+ parse_sql(
+ """
+ SELECT DISTINCT a1.tab_num
+ FROM A a1
+ WHERE NOT EXISTS (
+ SELECT * FROM B b
+ WHERE NOT EXISTS (
+ SELECT * FROM A a2
+ WHERE a2.tab_num = a1.tab_num AND a2.shop = b.shop
+ )
+ )
+ """,
+ dialect="mindsdb",
+ ),
+ )
+
+ # Only tab_num=1 covers all shops {1, 2, 3}
+ assert list(result["tab_num"]) == [1]
+
+ def test_exists_correlated_subquery(self):
+ # EXISTS version: find tab_num values missing at least one shop.
+ # tab_num=2 misses shop=3, tab_num=3 misses shops 2 and 3.
+ a = pd.DataFrame(
+ [
+ {"tab_num": 1, "shop": 1},
+ {"tab_num": 1, "shop": 2},
+ {"tab_num": 1, "shop": 3},
+ {"tab_num": 2, "shop": 1},
+ {"tab_num": 2, "shop": 2},
+ {"tab_num": 3, "shop": 1},
+ ]
+ )
+ b = pd.DataFrame([{"shop": 1}, {"shop": 2}, {"shop": 3}])
+
+ result = query_dfs(
+ {"A": a, "B": b},
+ parse_sql(
+ """
+ SELECT DISTINCT a1.tab_num
+ FROM A a1
+ WHERE EXISTS (
+ SELECT * FROM B b
+ WHERE NOT EXISTS (
+ SELECT * FROM A a2
+ WHERE a2.tab_num = a1.tab_num AND a2.shop = b.shop
+ )
+ )
+ """,
+ dialect="mindsdb",
+ ),
+ )
+
+ assert sorted(result["tab_num"].tolist()) == [2, 3]
+
class TestIfExistsIfNotExists(BaseExecutorMockPredictor):
def setup_method(self, method):
diff --git a/tests/unit/executor/test_files.py b/tests/unit/executor/test_files.py
index 0181da273fa..cdbee61fbdb 100644
--- a/tests/unit/executor/test_files.py
+++ b/tests/unit/executor/test_files.py
@@ -152,8 +152,8 @@ def test_multi_table_relational_division(self):
"""
)
- assert len(result) == 3
- assert sorted(result["tab_num"].tolist()) == [1, 2, 3]
+ assert len(result) == 2
+ assert sorted(result["tab_num"].tolist()) == [1, 2]
def test_multi_table_join_with_aliases(self):
"""Test JOIN with aliases and database prefixes"""
diff --git a/tests/unused/unit/handler_tests/test_handler_metrics.py b/tests/unit/executor/test_handler_metrics.py
similarity index 55%
rename from tests/unused/unit/handler_tests/test_handler_metrics.py
rename to tests/unit/executor/test_handler_metrics.py
index 26ee07040ed..40ae9813a39 100644
--- a/tests/unused/unit/handler_tests/test_handler_metrics.py
+++ b/tests/unit/executor/test_handler_metrics.py
@@ -1,52 +1,58 @@
import datetime
import pandas as pd
-from unit.executor_test_base import BaseExecutorDummyML
+from tests.unit.executor_test_base import BaseExecutorDummyML
class TestHandlerMetrics(BaseExecutorDummyML):
def test_handler_query_time(self):
- self.set_data('tasks', pd.DataFrame([
- {'a': 1, 'b': datetime.datetime(2020, 1, 1)},
- {'a': 2, 'b': datetime.datetime(2020, 1, 2)},
- {'a': 1, 'b': datetime.datetime(2020, 1, 3)},
- ]))
+ self.set_data(
+ "tasks",
+ pd.DataFrame(
+ [
+ {"a": 1, "b": datetime.datetime(2020, 1, 1)},
+ {"a": 2, "b": datetime.datetime(2020, 1, 2)},
+ {"a": 1, "b": datetime.datetime(2020, 1, 3)},
+ ]
+ ),
+ )
# Create & predict a simple model.
- self.run_sql('create database proj')
+ self.run_sql("create database proj")
self.run_sql(
- '''
+ """
CREATE model proj.task_model
from dummy_data (select * from tasks)
PREDICT a
using engine='dummy_ml',
tag = 'first',
join_learn_process=true
- '''
+ """
)
- self.wait_predictor('proj', 'task_model')
- self.run_sql('''
+ self.wait_predictor("proj", "task_model")
+ self.run_sql("""
SELECT m.*
FROM dummy_data.tasks as t
JOIN proj.task_model as m
- ''')
+ """)
# Import here so we don't reuse registry across test functions.
from mindsdb.metrics import metrics
+
query_time_metric = list(metrics.INTEGRATION_HANDLER_QUERY_TIME.collect())[0]
query_size_metric = list(metrics.INTEGRATION_HANDLER_RESPONSE_SIZE.collect())[0]
assert len(query_time_metric.samples) == 3
assert len(query_size_metric.samples) == 3
for sample in query_time_metric.samples:
- assert sample.name.startswith('mindsdb_integration_handler_query_seconds')
- if sample.name.endswith('count'):
+ assert sample.name.startswith("mindsdb_integration_handler_query_seconds")
+ if sample.name.endswith("count"):
assert sample.value == 1.0
- elif sample.name.endswith('sum'):
+ elif sample.name.endswith("sum"):
assert sample.value > 0.0
- elif sample.name.endswith('created'):
+ elif sample.name.endswith("created"):
assert sample.value > 0.0
for sample in query_size_metric.samples:
- assert sample.name.startswith('mindsdb_integration_handler_response_size')
- if sample.name.endswith('count'):
+ assert sample.name.startswith("mindsdb_integration_handler_response_size")
+ if sample.name.endswith("count"):
assert sample.value == 1.0
- elif sample.name.endswith('sum'):
+ elif sample.name.endswith("sum"):
assert sample.value > 0.0
- elif sample.name.endswith('created'):
+ elif sample.name.endswith("created"):
assert sample.value > 0.0
diff --git a/tests/unit/executor/test_knowledge_base.py b/tests/unit/executor/test_knowledge_base.py
index 485e9bb9e20..7646caca9ae 100644
--- a/tests/unit/executor/test_knowledge_base.py
+++ b/tests/unit/executor/test_knowledge_base.py
@@ -1,6 +1,7 @@
import time
import json
import tempfile
+import datetime as dt
from unittest.mock import patch, MagicMock
import threading
@@ -8,7 +9,6 @@
import pandas as pd
import pytest
-import sys
from tests.unit.executor_test_base import BaseExecutorDummyML
from mindsdb.integrations.utilities.rag.rerankers.base_reranker import (
@@ -32,12 +32,13 @@ def task_monitor():
worker.join()
-def dummy_embeddings(string, dimension=None):
+def dummy_embeddings(string, dimension=None, base=None):
# Imitates embedding generation: create vectors which are similar for similar words in inputs
if dimension is None:
dimension = 25**2
embeds = [0] * dimension
- base = 25
+ if base is None:
+ base = 25
string = string.lower().replace(",", " ").replace(".", " ")
for word in string.split():
@@ -60,13 +61,11 @@ def dummy_embeddings(string, dimension=None):
return embeds
-def set_litellm_embedding(mock_litellm_embedding, dimension=None):
+def set_embedding(mock_embedding, dimension=None, base=None):
def resp_f(input, *args, **kwargs):
- mock_response = MagicMock()
- mock_response.data = [{"embedding": dummy_embeddings(s, dimension)} for s in input]
- return mock_response
+ return [dummy_embeddings(s, dimension, base) for s in input]
- mock_litellm_embedding.side_effect = resp_f
+ mock_embedding().embeddings.side_effect = resp_f
class BaseTestKB(BaseExecutorDummyML):
@@ -94,7 +93,7 @@ def _create_kb(
if embedding_model is None:
embedding_model = {
- "provider": "bedrock",
+ "provider": "openai",
"model_name": "dummy_model",
"api_key": "dummy_key",
}
@@ -133,7 +132,6 @@ def _create_kb(
)
def _get_storage_table(self, kb_name):
- # default chromadb
db_name = f"db_{kb_name}"
self._drop_storage_db(db_name)
@@ -141,10 +139,7 @@ def _get_storage_table(self, kb_name):
self.run_sql(f"""
create database {db_name}
with
- engine='chromadb',
- PARAMETERS = {{
- 'persist_directory': '{kb_name}'
- }}
+ engine='duckdb_faiss'
""")
self.storages.append(db_name)
@@ -172,16 +167,16 @@ def _get_ral_table(self):
return pd.DataFrame(data, columns=["ral", "english", "italian"])
-class TestKB(BaseTestKB):
+class TestKBNOAutoBatch(BaseTestKB):
def setup_method(self):
super().setup_method()
from mindsdb.utilities.config import config
config["knowledge_bases"]["disable_autobatch"] = True
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_kb(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_kb(self, mock_embedding):
+ set_embedding(mock_embedding)
self._create_kb("kb_review")
@@ -191,7 +186,7 @@ def test_kb(self, mock_litellm_embedding):
ret = self.run_sql("select * from kb_review")
assert len(ret) == 1
- # show tables in default chromadb
+ # show tables in default vectordb
ret = self.run_sql("show knowledge bases")
db_name = ret.STORAGE[0].split(".")[0]
@@ -199,9 +194,9 @@ def test_kb(self, mock_litellm_embedding):
# only one default collection there
assert len(ret) == 1
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_kb_metadata(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_kb_metadata(self, mock_embedding):
+ set_embedding(mock_embedding)
record = {
"review": "all is good, haven't used yet",
@@ -330,13 +325,8 @@ class _Choice:
def __init__(self, content):
self.message = _Msg(content)
- class _Resp:
- def __init__(self, content):
- self.choices = [_Choice(content)]
-
async def _fake_call_llm(messages):
- content = '{"ranking": [{"doc_index": 2, "score": 0.9}, {"doc_index": 1, "score": 0.6}, {"doc_index": 3, "score": 0.1}]}'
- return _Resp(content)
+ return '{"ranking": [{"doc_index": 2, "score": 0.9}, {"doc_index": 1, "score": 0.6}, {"doc_index": 3, "score": 0.1}]}'
# Bind the async method to this reranker instance
reranker._call_llm = _fake_call_llm # type: ignore
@@ -361,16 +351,11 @@ class _Choice:
def __init__(self, content):
self.message = _Msg(content)
- class _Resp:
- def __init__(self, content):
- self.choices = [_Choice(content)]
-
async def _fake_call_llm(messages):
# Returns code-fenced JSON, includes only two entries, one without score
- content = """```json
+ return """```json
{"ranking": [1, {"doc_index": 3, "score": 0.8}]}
```"""
- return _Resp(content)
reranker._call_llm = _fake_call_llm # type: ignore
@@ -394,14 +379,9 @@ class _Choice:
def __init__(self, content):
self.message = _Msg(content)
- class _Resp:
- def __init__(self, content):
- self.choices = [_Choice(content)]
-
async def _fake_call_llm(messages):
# Invalid JSON forces fallback
- content = "not-json"
- return _Resp(content)
+ return "not-json"
reranker._call_llm = _fake_call_llm # type: ignore
@@ -412,9 +392,9 @@ async def _fake_call_llm(messages):
# Fallback pattern should be descending
assert scores[0] > scores[1] > scores[2]
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_join_kb_table(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_join_kb_table(self, mock_embedding):
+ set_embedding(mock_embedding)
df = self._get_ral_table()
self.save_file("ral", df)
@@ -480,15 +460,12 @@ def test_join_kb_table(self, mock_litellm_embedding):
assert set(ret["id"]) == {"9016", "9023"}
@pytest.mark.slow
- @pytest.mark.skipif(sys.platform == "win32", reason="Causes hard crash on windows.")
- @pytest.mark.skipif(sys.platform == "darwin", reason="Causes hard crash on mac.")
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
@patch("mindsdb.integrations.handlers.postgres_handler.Handler")
- def test_kb_partitions(self, mock_handler, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ def test_kb_partitions(self, mock_handler, mock_embedding):
+ set_embedding(mock_embedding)
df = self._get_ral_table()
- self.save_file("ral", df)
df = pd.concat([df] * 30)
# unique ids
@@ -555,7 +532,15 @@ def stream_f(*args, **kwargs):
yield df[chunk_size * i : chunk_size * (i + 1) :]
# --- stream mode ---
- mock_handler().query_stream.side_effect = stream_f
+ # Mock native_query to return TableResponse with generator
+ mock_handler().stream_response = True
+
+ def native_query_with_generator(*args, **kwargs):
+ from mindsdb.integrations.libs.response import TableResponse
+
+ return TableResponse(data_generator=stream_f())
+
+ mock_handler().native_query.side_effect = native_query_with_generator
# test iterate
check_partition(
@@ -573,13 +558,14 @@ def stream_f(*args, **kwargs):
"""
)
- # test threads
- check_partition(
- """
- insert into kb_part SELECT id, english FROM pg.ral
- using batch_size=20, track_column=id, threads = 3
- """
- )
+ # switched off for faiss
+ # # test threads
+ # check_partition(
+ # """
+ # insert into kb_part SELECT id, english FROM pg.ral
+ # using batch_size=20, track_column=id, threads = 3
+ # """
+ # )
# without track column
check_partition(
@@ -590,7 +576,15 @@ def stream_f(*args, **kwargs):
)
# --- general mode ---
- mock_handler().query_stream = None
+ # Mock native_query to return TableResponse with full data
+ mock_handler().stream_response = False
+
+ def native_query_without_generator(*args, **kwargs):
+ from mindsdb.integrations.libs.response import TableResponse
+
+ return TableResponse(data=df)
+
+ mock_handler().native_query.side_effect = native_query_without_generator
# test iterate
check_partition(
@@ -600,25 +594,26 @@ def stream_f(*args, **kwargs):
"""
)
- # test threads
- check_partition(
- """
- insert into kb_part SELECT id, english FROM pg.ral
- using batch_size=20, track_column=id, threads = 3
- """
- )
+ # switched off for faiss
+ # # test threads
+ # check_partition(
+ # """
+ # insert into kb_part SELECT id, english FROM pg.ral
+ # using batch_size=20, track_column=id, threads = 3
+ # """
+ # )
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_kb_algebra(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_kb_algebra(self, mock_embedding):
+ set_embedding(mock_embedding)
lines, i = [], 0
for color in ("white", "red", "green"):
for size in ("big", "middle", "small"):
for shape in ("square", "triangle", "circle"):
i += 1
- lines.append([i, i, f"{color} {size} {shape}", color, size, shape])
- df = pd.DataFrame(lines, columns=["id", "num", "content", "color", "size", "shape"])
+ lines.append([i, i, f"{color} {size} {shape}", color, size, shape, dt.date(2000, 1, i)])
+ df = pd.DataFrame(lines, columns=["id", "num", "content", "color", "size", "shape", "valid_date"])
self.save_file("items", df)
@@ -727,9 +722,46 @@ def test_kb_algebra(self, mock_litellm_embedding):
else:
assert "small" in content
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_select_allowed_columns(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ # -- metadata: like, not like
+ for query in ("trian%", "%riangl%", "%angle"):
+ ret = self.run_sql(f"select * from kb_alg where shape like '{query}'")
+
+ # only triangle
+ assert set(ret["shape"]) == {"triangle"}
+
+ # -- metadata: '>=', '>', '<=', '<'
+
+ ret = self.run_sql("select * from kb_alg where color > 'red'")
+ # only white
+ assert set(ret["color"]) == {"white"}
+
+ ret = self.run_sql("select * from kb_alg where color < 'red'")
+ # only green
+ assert set(ret["color"]) == {"green"}
+
+ ret = self.run_sql("select * from kb_alg where color <= 'red' and color > 'green'")
+ # only red
+ assert set(ret["color"]) == {"red"}
+
+ # filter by int
+ ret = self.run_sql("select * from kb_alg where num >= 10")
+ assert ret["num"].min() == 10
+
+ # filter by date
+ ret = self.run_sql("select * from kb_alg where valid_date >= '2000-01-15'")
+ assert ret["valid_date"].min() > "2000-01-14" and ret["valid_date"].min() < "2000-01-16"
+
+ ret = self.run_sql("select * from kb_alg where valid_date < '2000-01-15'")
+ assert ret["valid_date"].max() > "2000-01-13" and ret["valid_date"].min() < "2000-01-15"
+
+ # -- filter by id and content
+ ret = self.run_sql("select * from kb_alg where content = 'green' and id < 22")
+ assert ret["color"][0] == "green"
+ assert ret["id"].max() < 22
+
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_select_allowed_columns(self, mock_embedding):
+ set_embedding(mock_embedding)
# -- no metadata are specified, generated from inserts --
self._create_kb("kb1")
@@ -772,9 +804,9 @@ def test_select_allowed_columns(self, mock_litellm_embedding):
@patch("mindsdb.interfaces.knowledge_base.llm_client.OpenAI")
@patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores")
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_evaluate(self, mock_litellm_embedding, mock_get_scores, mock_openai):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_evaluate(self, mock_embedding, mock_get_scores, mock_openai):
+ set_embedding(mock_embedding)
question, answer = "2+2", "4"
agent_response = f"""
@@ -892,13 +924,13 @@ def test_evaluate(self, mock_litellm_embedding, mock_get_scores, mock_openai):
assert len(df) > 0
@patch("mindsdb.utilities.config.Config.get")
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
@patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores")
- def test_save_default_params(self, mock_get_scores, mock_litellm_embedding, mock_config_get):
+ def test_save_default_params(self, mock_get_scores, mock_embedding, mock_config_get):
# reranking result
mock_get_scores.side_effect = lambda query, docs: [0.8 for _ in docs]
- set_litellm_embedding(mock_litellm_embedding)
+ set_embedding(mock_embedding)
def config_get_side_effect(key, default=None):
if key == "default_embedding_model":
@@ -932,10 +964,10 @@ def config_get_side_effect(key, default=None):
assert "openai_model" not in ret["RERANKING_MODEL"][0]
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_relevance_filtering_gt_operator(self, mock_litellm_embedding):
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_relevance_filtering_gt_operator(self, mock_embedding):
"""Test relevance filtering with GREATER_THAN operator"""
- set_litellm_embedding(mock_litellm_embedding)
+ set_embedding(mock_embedding)
test_data = [
{"id": "1", "content": "This is about machine learning and AI"},
@@ -966,9 +998,9 @@ def test_relevance_filtering_gt_operator(self, mock_litellm_embedding):
assert isinstance(ret, pd.DataFrame)
@patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores")
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_alter_kb(self, mock_litellm_embedding, mock_get_scores):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_alter_kb(self, mock_embedding, mock_get_scores):
+ set_embedding(mock_embedding)
self._create_kb(
"kb1",
@@ -1022,6 +1054,11 @@ def test_alter_kb(self, mock_litellm_embedding, mock_get_scores):
assert kb.params["reranking_model"]["provider"] == "ollama"
assert "api_key" not in kb.params["reranking_model"]
+ # disable reranking model and ensure config is cleared
+ self.run_sql("ALTER KNOWLEDGE BASE kb1 USING reranking_model = false")
+ kb = self.db.KnowledgeBase.query.filter_by(name="kb1").first()
+ assert kb.params["reranking_model"] == {}
+
@patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores")
@patch("mindsdb.interfaces.knowledge_base.llm_client.OpenAI")
def test_ollama(self, mock_openai, mock_get_scores):
@@ -1042,9 +1079,9 @@ def test_ollama(self, mock_openai, mock_get_scores):
assert "api_key" not in ret["EMBEDDING_MODEL"][0]
assert "api_key" not in ret["RERANKING_MODEL"][0]
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_kb_uppercase_source_columns(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_kb_uppercase_source_columns(self, mock_embedding):
+ set_embedding(mock_embedding)
df = pd.DataFrame(
[
@@ -1116,37 +1153,37 @@ def test_kb_uppercase_source_columns(self, mock_litellm_embedding):
assert len(ret) == 2
assert ret["category"][0] == "Home"
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_dimension_mismatch(self, mock_litellm_embedding):
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_dimension_mismatch(self, mock_embedding):
temp_dir = tempfile.mkdtemp()
self.run_sql(f"""
- create database my_chroma
- with
- engine='chromadb',
+ create database my_faiss
+ with
+ engine='duckdb_faiss',
PARAMETERS = {{
'persist_directory': '{temp_dir}'
}}
""")
- set_litellm_embedding(mock_litellm_embedding, dimension=1000)
- self._create_kb("kb1", storage="my_chroma.table1")
+ set_embedding(mock_embedding, dimension=1000)
+ self._create_kb("kb1", storage="my_faiss.table1")
self.run_sql("insert into kb1 (content) values ('review')")
# change dimension
- set_litellm_embedding(mock_litellm_embedding, dimension=1500)
+ set_embedding(mock_embedding, dimension=1500)
with pytest.raises(ValueError):
- self._create_kb("kb2", storage="my_chroma.table1")
+ self._create_kb("kb2", storage="my_faiss.table1")
self.run_sql("drop knowledge base kb1")
- self.run_sql("drop table my_chroma.table1")
- self.run_sql("drop database my_chroma")
+ self.run_sql("drop table my_faiss.table1")
+ self.run_sql("drop database my_faiss")
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_duplicated_ids(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_duplicated_ids(self, mock_embedding):
+ set_embedding(mock_embedding)
self._create_kb("kb1")
@@ -1176,9 +1213,9 @@ def test_duplicated_ids(self, mock_litellm_embedding):
ret = self.run_sql("select * from kb1 where id = 2")
assert len(ret) == 1
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_update(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_update(self, mock_embedding):
+ set_embedding(mock_embedding)
self._create_kb("kb1")
@@ -1195,11 +1232,157 @@ def test_update(self, mock_litellm_embedding):
assert len(ret) == 1
assert ret["chunk_content"][0] == "dog"
+ @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores")
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_reranking(self, mock_embedding, mock_get_scores):
+ set_embedding(mock_embedding)
+
+ self._create_kb(
+ "kb_ral",
+ content_columns=["english"],
+ reranking_model={
+ "provider": "openai",
+ "model_name": "gpt-3",
+ "api_key": "embed-key-1",
+ },
+ )
+
+ df = self._get_ral_table()
+ self.save_file("ral", df)
+
+ self.run_sql(
+ """
+ insert into kb_ral
+ select * from files.ral
+ """
+ )
+
+ # rank from greater to lower
+ mock_get_scores.side_effect = lambda query, docs: [1 - i / 4 for i in range(len(docs))]
+ ret = self.run_sql("select * from kb_ral where content='white'")
+ assert "white" in ret["chunk_content"].iloc[0]
+
+ # reverse rank: from lower to greater. the most semantic result have to be moved back
+ mock_get_scores.side_effect = lambda query, docs: [i / 4 for i in range(len(docs))]
+ ret = self.run_sql("select * from kb_ral where content='white'")
+ assert "white" not in ret["chunk_content"].iloc[0]
+
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_hybrid_search(self, mock_embedding):
+ df = self._get_ral_table()
+ self.save_file("ral", df)
+
+ set_embedding(mock_embedding)
+
+ self._create_kb("kb_hybrid", content_columns=["english"])
+
+ self.run_sql("insert into kb_hybrid select * from files.ral")
+
+ # changing embedding config, making semantic search irrelevant
+ set_embedding(mock_embedding, base=20)
+
+ # white is not at the top
+ ret = self.run_sql("select * from kb_hybrid where content='white'")
+ assert "white" not in ret["chunk_content"].iloc[0]
+
+ # but it is when hybrid search is used
+ ret = self.run_sql("""
+ select * from kb_hybrid where content='white'
+ and hybrid_search_alpha = 0
+ """)
+ assert "white" in ret["chunk_content"].iloc[0]
+
+ # checking alpha=0.5
+ ret = self.run_sql("""
+ select * from kb_hybrid where content='white'
+ and hybrid_search = true
+ """)
+ assert "white" in ret["chunk_content"].iloc[0]
+
+ # @pytest.mark.slow
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_create_index(self, mock_embedding):
+ set_embedding(mock_embedding)
+
+ df = self._get_ral_table()
+
+ df = pd.concat([df] * 30)
+ # unique ids
+ df["id"] = list(map(str, range(len(df))))
+ self.save_file("ral", df)
+
+ # create kb, fill it
+ self._create_kb("kb_ral", content_columns=["english"])
+
+ self.run_sql("insert into kb_ral select * from files.ral")
+
+ # create index default index (ivf_file, for windows it is ivf)
+ self.run_sql(
+ """
+ CREATE INDEX ON KNOWLEDGE_BASE kb_ral WITH (nlist=1)
+ """
+ )
+
+ # check kb works after index was created
+ ret = self.run_sql("select * from kb_ral where content='white'")
+ assert "white" in ret["chunk_content"].iloc[0]
+
+ # specified index
+ self.run_sql(
+ """
+ CREATE INDEX ON KNOWLEDGE_BASE kb_ral
+ WITH (nlist=1, type='ivf', train_count=50)
+ """
+ )
+ ret = self.run_sql("select * from kb_ral where content='white'")
+ assert "white" in ret["chunk_content"].iloc[0]
+
+ def test_providers(self):
+ with patch("mindsdb.interfaces.knowledge_base.llm_client.BedrockClient.embeddings") as embed:
+ with patch(
+ "mindsdb.integrations.utilities.rag.rerankers.base_reranker.AsyncBedrockClient.acompletion"
+ ) as rerank:
+ embed.return_value = [[1, 1, 1]]
+ rerank.return_value = "100"
+ self._create_kb(
+ "kb_test",
+ embedding_model={
+ "provider": "bedrock",
+ "model_name": "amazon.titan",
+ "aws_access_key_id": "-",
+ "aws_region_name": "us-east-2",
+ "aws_secret_access_key": "-",
+ },
+ reranking_model={
+ "provider": "bedrock",
+ "model_name": "llama3",
+ "aws_access_key_id": "-",
+ "aws_region_name": "us-east-2",
+ "aws_secret_access_key": "-",
+ },
+ )
+ assert embed.call_args_list[0][0][0] == "amazon.titan"
+ assert rerank.call_args_list[0][1]["model_name"] == "llama3"
+
+ with patch("mindsdb.interfaces.knowledge_base.llm_client.SnowflakeClient.embeddings") as embed:
+ embed.return_value = [[1, 1, 1]]
+ self._create_kb(
+ "kb_test",
+ embedding_model={"provider": "snowflake", "model_name": "arctic", "account_id": "ABC", "api_key": "-"},
+ )
+ assert embed.call_args_list[0][0][0] == "arctic"
+ with patch("mindsdb.interfaces.knowledge_base.llm_client.GeminiClient.embeddings") as embed:
+ embed.return_value = [[1, 1, 1]]
+ self._create_kb(
+ "kb_test", embedding_model={"provider": "gemini", "model_name": "gemini-embedding", "api_key": "-"}
+ )
+ assert embed.call_args_list[0][0][0] == "gemini-embedding"
+
class TestKBAutoBatch(BaseTestKB):
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_no_autobatch(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_no_autobatch(self, mock_embedding):
+ set_embedding(mock_embedding)
df = self._get_ral_table()
self.save_file("ral", df)
@@ -1219,9 +1402,9 @@ def test_no_autobatch(self, mock_litellm_embedding):
ret = self.run_sql("select * from kb_ral limit 1")
assert len(ret) == 1
- @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding")
- def test_autobatch(self, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
+ def test_autobatch(self, mock_embedding):
+ set_embedding(mock_embedding)
df = self._get_ral_table()
self.save_file("ral", df)
diff --git a/tests/unit/executor/test_lowercase.py b/tests/unit/executor/test_lowercase.py
index 8f8e0a74870..d7e9d2a32b0 100644
--- a/tests/unit/executor/test_lowercase.py
+++ b/tests/unit/executor/test_lowercase.py
@@ -4,7 +4,7 @@
import pandas as pd
from tests.unit.executor_test_base import BaseExecutorDummyML
-from tests.unit.executor.test_agent import set_litellm_embedding
+from tests.unit.executor.test_agent import set_embedding
class TestLowercase(BaseExecutorDummyML):
@@ -166,13 +166,15 @@ def test_model_name_lowercase(self):
self.run_sql(f"DROP MODEL `{another_name}`")
self.run_sql(f"DROP MODEL {another_name}")
- def test_agent_name_lowercase(self):
+ @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+ def test_agent_name_lowercase(self, check_agent_llm):
agent_params = """
- model='gpt-3.5-turbo',
- provider='openai',
+ model={
+ "model_name": "gpt-3.5-turbo",
+ "provider": "openai"
+ },
prompt_template='Answer the user input in a helpful way using tools',
- max_iterations=5,
- mode='retrieval'
+ mode='text'
"""
# mixed case: agent
@@ -204,18 +206,14 @@ def test_agent_name_lowercase(self):
self.run_sql(f"drop agent `{another_agent_name}`")
self.run_sql(f"drop agent {another_agent_name}")
- @patch("litellm.embedding")
+ @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient")
@patch("openai.OpenAI")
- def test_knowledgebase_name_lowercase(self, mock_openai, mock_litellm_embedding):
- set_litellm_embedding(mock_litellm_embedding)
+ def test_knowledgebase_name_lowercase(self, mock_openai, mock_embedding):
+ set_embedding(mock_embedding)
self.run_sql("""
create database my_kb_storage
- with
- engine='chromadb',
- PARAMETERS = {
- 'persist_directory': 'my_kb_storage'
- }
+ with engine='duckdb_faiss'
""")
kb_params = """
@@ -278,7 +276,8 @@ def test_job_name_lowercase(self):
self.run_sql(f"DROP JOB {another_name}")
- def test_chatbot_lowercase(self):
+ @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+ def test_chatbot_lowercase(self, check_agent_llm):
self.run_sql("create agent my_agent using model={'provider': 'openai', 'model_name': 'gpt-3.5'}")
self.run_sql("create database my_db using engine='dummy_data'")
diff --git a/tests/unit/executor/test_mongodb_handler.py b/tests/unit/executor/test_mongodb_handler.py
index fac9b4de9e0..9d0a7382084 100644
--- a/tests/unit/executor/test_mongodb_handler.py
+++ b/tests/unit/executor/test_mongodb_handler.py
@@ -1,13 +1,21 @@
import unittest
+
+import pytest
from mindsdb_sql_parser import parse_sql
-from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_render import MongodbRender
-from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_parser import MongodbParser
+try:
+ from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_render import MongodbRender
+ from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_parser import MongodbParser
+
+ MONGODB_HANDLER_AVAILABLE = True
+except ImportError:
+ MONGODB_HANDLER_AVAILABLE = False
# How to run:
# env PYTHONPATH=./ pytest tests/unit/test_mongodb_handler.py
+@pytest.mark.skipif(not MONGODB_HANDLER_AVAILABLE, reason="mongodb_handler not installed (community handler)")
class TestMongoDBConverters(unittest.TestCase):
def test_ast_to_mongo(self):
sql = """
@@ -105,6 +113,7 @@ def test_mongo_parser(self):
assert MongodbParser().from_string(mql).to_string() == expected_mql
+@pytest.mark.skipif(not MONGODB_HANDLER_AVAILABLE, reason="mongodb_handler not installed (community handler)")
class TestMongoDBHandler(unittest.TestCase):
def test_mongo_handler(self):
# TODO how to test mongo handler
diff --git a/tests/unit/executor/test_schema.py b/tests/unit/executor/test_schema.py
index 8c80177c006..540467a9c78 100644
--- a/tests/unit/executor/test_schema.py
+++ b/tests/unit/executor/test_schema.py
@@ -12,7 +12,8 @@ def test_show(self):
self.run_sql(f"show {item}")
@pytest.mark.slow
- def test_schema(self):
+ @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm")
+ def test_schema(self, check_agent):
# --- create objects + describe ---
# todo: create knowledge base (requires chromadb)
@@ -91,15 +92,15 @@ def test_schema(self):
# agent
self.run_sql("""
CREATE AGENT agent1
- USING model = 'pred1'
+ USING model = {'model_name': "pred1", "provider": "openai"}
""")
self.run_sql("""
CREATE AGENT proj2.agent2
- USING model = 'pred2' -- it looks up in agent's project
+ USING model = {'model_name': "pred2", "provider": "openai"} -- it looks up in agent's project
""")
df = self.run_sql("describe agent agent1")
- assert df.NAME[0] == "agent1" and df.MODEL_NAME[0] == "pred1"
+ assert df.NAME[0] == "agent1" and "pred1" in df.MODEL[0]
# chatbot
self.run_sql("""
diff --git a/tests/unused/unit/executor/test_udf.py b/tests/unit/executor/test_udf.py
similarity index 60%
rename from tests/unused/unit/executor/test_udf.py
rename to tests/unit/executor/test_udf.py
index 95a8ea7be0d..fa320e188b3 100644
--- a/tests/unused/unit/executor/test_udf.py
+++ b/tests/unit/executor/test_udf.py
@@ -1,51 +1,45 @@
import os
-from textwrap import dedent
from tempfile import TemporaryDirectory
-
+from textwrap import dedent
from unittest.mock import patch
import pandas as pd
import pytest
-
-from mindsdb_sql_parser.ast.mindsdb import CreateMLEngine
from mindsdb_sql_parser.ast import Identifier
-
+from mindsdb_sql_parser.ast.mindsdb import CreateMLEngine
from tests.unit.executor_test_base import BaseExecutorDummyML
-@pytest.mark.parametrize('byom_type', ['inhouse', 'venv'])
+@pytest.mark.skip(reason="BYOM feature is currently disabled in MindsDB")
+@pytest.mark.parametrize("byom_type", ["inhouse", "venv"])
class TestBYOM(BaseExecutorDummyML):
-
def _create_engine(self, name, code, **kwargs):
- with TemporaryDirectory(prefix='udf_test_') as temp_dir:
- code_path = os.path.join(temp_dir, 'code.py')
- reqs_path = os.path.join(temp_dir, 'reqs.py')
+ with TemporaryDirectory(prefix="udf_test_") as temp_dir:
+ code_path = os.path.join(temp_dir, "code.py")
+ reqs_path = os.path.join(temp_dir, "reqs.py")
- open(code_path, 'w').write(code)
- open(reqs_path, 'w').write('')
+ open(code_path, "w").write(code)
+ open(reqs_path, "w").write("")
params = {
- 'code': code_path,
- 'modules': reqs_path,
+ "code": code_path,
+ "modules": reqs_path,
}
params.update(kwargs)
ret = self.command_executor.execute_command(
- CreateMLEngine(
- name=Identifier(name),
- handler='byom',
- params=params
- )
+ CreateMLEngine(name=Identifier(name), handler="byom", params=params)
)
assert ret.error_code is None
- @patch('mindsdb.integrations.handlers.postgres_handler.Handler')
+ @patch("mindsdb.integrations.handlers.postgres_handler.Handler")
def test_udf(self, data_handler, byom_type):
-
- df = pd.DataFrame([
- {'a': 3, 'b': 4, 'c': 'a', 'd': 'b'},
- ])
- self.set_handler(data_handler, name='pg', tables={'sample': df})
+ df = pd.DataFrame(
+ [
+ {"a": 3, "b": 4, "c": "a", "d": "b"},
+ ]
+ )
+ self.set_handler(data_handler, name="pg", tables={"sample": df})
code = dedent("""
from os import listdir # imported function
@@ -64,27 +58,25 @@ def add2(a: int, b: int) -> int:
return a + b
""")
- self._create_engine(name='myml', code=code,
- type=byom_type, mode='custom_function')
+ self._create_engine(name="myml", code=code, type=byom_type, mode="custom_function")
# convert to explicit types, because duckdb doesn't convert it and fails
- ret = self.run_sql('''
+ ret = self.run_sql("""
select myml.fibo(b) x,
myml.add1(a::char,b::char) y,
myml.add2(a,b) z
from pg.sample
- ''')
- assert ret['x'][0] == 3
- assert ret['y'][0] == '34'
- assert ret['z'][0] == 7
+ """)
+ assert ret["x"][0] == 3
+ assert ret["y"][0] == "34"
+ assert ret["z"][0] == 7
# test without table
- ret = self.run_sql('''
+ ret = self.run_sql("""
select myml.fibo(4) x
- ''')
- assert ret['x'][0] == 3
+ """)
+ assert ret["x"][0] == 3
def test_byom(self, byom_type):
-
code = dedent("""
from datetime import datetime
import pandas as pd
@@ -101,17 +93,17 @@ def predict(self, df):
return df[[self.target_col]]
""")
- self._create_engine(name='myml', code=code, type=byom_type)
+ self._create_engine(name="myml", code=code, type=byom_type)
- self.run_sql('''
+ self.run_sql("""
create model m1
predict output_col
using engine='myml',
join_learn_process=true
- ''')
+ """)
- ret = self.run_sql('''
+ ret = self.run_sql("""
select * from m1
where input_col = 'my_input'
- ''')
- assert ret['output_col'][0] == 'my_input>my_response'
+ """)
+ assert ret["output_col"][0] == "my_input>my_response"
diff --git a/tests/unit/executor_test_base.py b/tests/unit/executor_test_base.py
index d305cd9d90f..3cb3e2e8640 100644
--- a/tests/unit/executor_test_base.py
+++ b/tests/unit/executor_test_base.py
@@ -2,6 +2,7 @@
import datetime as dt
import json
import os
+import pytest
import sys
import tempfile
import shutil
@@ -15,10 +16,19 @@
from prometheus_client import REGISTRY
from mindsdb_sql_parser import parse_sql
+from mindsdb.interfaces.database.integrations import integration_controller
+from mindsdb.utilities.config import Config
+
from mindsdb.utilities import log
from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
+from mindsdb.integrations.utilities.community_handler_fetcher import (
+ community_handlers_enabled,
+ fetch_handler,
+ get_community_handlers_storage_dir,
+)
+
logger = log.getLogger(__name__)
@@ -59,6 +69,8 @@ def setup_class(cls):
with open(cfg_file, "w") as fd:
json.dump(config, fd)
+ cls._original_storage_dir_env = os.environ.get("MINDSDB_STORAGE_DIR")
+ cls._original_config_path_env = os.environ.get("MINDSDB_CONFIG_PATH")
os.environ["MINDSDB_STORAGE_DIR"] = cls.storage_dir
os.environ["MINDSDB_CONFIG_PATH"] = cfg_file
@@ -83,6 +95,11 @@ def teardown_class(cls):
if env_var_name in os.environ:
del os.environ[env_var_name]
+ if cls._original_storage_dir_env is not None:
+ os.environ["MINDSDB_STORAGE_DIR"] = cls._original_storage_dir_env
+ if cls._original_config_path_env is not None:
+ os.environ["MINDSDB_CONFIG_PATH"] = cls._original_config_path_env
+
# remove import of mindsdb for next tests
unload_module("mindsdb")
@@ -205,7 +222,7 @@ def setup_method(self, import_dummy_ml=False):
super().setup_method()
self.set_executor(import_dummy_ml=import_dummy_ml)
- def _import_handler(self, integration_controller, handler_name, handler_dir):
+ def _import_handler(self, integration_controller, handler_name, handler_dir, is_community=False):
handler_meta = {
"import": {
"success": None,
@@ -216,9 +233,45 @@ def _import_handler(self, integration_controller, handler_name, handler_dir):
"path": handler_dir,
"name": handler_name,
"permanent": False,
+ "community": is_community,
}
integration_controller.handlers_import_status[handler_name] = handler_meta
- integration_controller.import_handler(handler_name, "")
+ # For community handlers: import_handler uses spec_from_file_location (path-based).
+ # For built-in handlers: pass "" as base_import so importlib resolves from sys.path.
+ if is_community:
+ integration_controller.import_handler(handler_name)
+ else:
+ integration_controller.import_handler(handler_name, "")
+
+ def setup_community_handler(self, handler_name: str):
+ """
+ Register and import a community handler for testing.
+
+ Requires MINDSDB_COMMUNITY_HANDLERS=true β this mirrors the production
+ gate and also exercises the fetch mechanism when the env var is set.
+ Skips the test if the env var is not set or the handler cannot be fetched.
+ """
+
+ if not community_handlers_enabled():
+ pytest.skip(
+ f"Community handler '{handler_name}' test skipped: set MINDSDB_COMMUNITY_HANDLERS=true to enable"
+ )
+
+ storage_root = Path(Config()["paths"]["root"])
+ storage_dir = get_community_handlers_storage_dir(storage_root)
+ handler_dir_name = f"{handler_name}_handler"
+ handler_dir = storage_dir / handler_dir_name
+
+ if not (handler_dir / "__init__.py").exists():
+ try:
+ handler_dir = fetch_handler(handler_dir_name, storage_dir)
+ except Exception as e:
+ pytest.skip(f"Could not fetch community handler '{handler_name}': {e}")
+
+ if handler_dir is None or not (handler_dir / "__init__.py").exists():
+ pytest.skip(f"Community handler '{handler_name}' not available")
+
+ self._import_handler(integration_controller, handler_name, handler_dir, is_community=True)
def set_executor(
self,
@@ -339,11 +392,10 @@ def set_handler(self, mock_handler, name, tables, engine="postgres", schema=None
self.db.session.add(r)
self.db.session.commit()
- from mindsdb.integrations.libs.response import RESPONSE_TYPE
- from mindsdb.integrations.libs.response import HandlerResponse as Response
+ from mindsdb.integrations.libs.response import TableResponse
def handler_response(df, affected_rows: None | int = None):
- response = Response(RESPONSE_TYPE.TABLE, df, affected_rows=affected_rows)
+ response = TableResponse(data=df, affected_rows=affected_rows)
return response
def get_tables_f():
diff --git a/tests/unit/handlers/base_handler_test.py b/tests/unit/handlers/base_handler_test.py
index 85e4133fbfc..be54f494402 100644
--- a/tests/unit/handlers/base_handler_test.py
+++ b/tests/unit/handlers/base_handler_test.py
@@ -2,7 +2,7 @@
from unittest.mock import MagicMock, Mock
from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
+ DataHandlerResponse as Response,
HandlerStatusResponse as StatusResponse,
)
@@ -167,22 +167,6 @@ def get_columns_query(self):
"""
pass
- def test_native_query(self):
- """
- Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a Response object.
- """
- mock_conn = MagicMock()
- mock_cursor = MockCursorContextManager()
-
- self.handler.connect = MagicMock(return_value=mock_conn)
- mock_conn.cursor = MagicMock(return_value=mock_cursor)
-
- query_str = f"SELECT * FROM {self.mock_table}"
- data = self.handler.native_query(query_str)
-
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
-
def test_get_columns(self):
"""
Tests if the `get_tables` method calls `native_query` with the correct SQL query.
diff --git a/tests/unit/handlers/test_access_handler.py b/tests/unit/handlers/community_handlers/test_access_handler.py
similarity index 96%
rename from tests/unit/handlers/test_access_handler.py
rename to tests/unit/handlers/community_handlers/test_access_handler.py
index 212832091fb..b360d3ef602 100644
--- a/tests/unit/handlers/test_access_handler.py
+++ b/tests/unit/handlers/community_handlers/test_access_handler.py
@@ -3,6 +3,8 @@
import pandas as pd
import sys
+import pytest
+
from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, RESPONSE_TYPE
# Mock pyodbc and sqlalchemy_access before importing the handler
@@ -13,7 +15,10 @@
sys.modules["sqlalchemy_access"] = MagicMock()
sys.modules["sqlalchemy_access.base"] = MagicMock()
-from mindsdb.integrations.handlers.access_handler.access_handler import AccessHandler
+try:
+ from mindsdb.integrations.handlers.access_handler.access_handler import AccessHandler
+except ImportError:
+ pytestmark = pytest.mark.skip("access_handler not installed (community handler)")
class BaseAccessHandlerTest(unittest.TestCase):
diff --git a/tests/unit/handlers/test_clickhouse.py b/tests/unit/handlers/community_handlers/test_clickhouse.py
similarity index 79%
rename from tests/unit/handlers/test_clickhouse.py
rename to tests/unit/handlers/community_handlers/test_clickhouse.py
index 404c888a4d7..68ec1d895fd 100644
--- a/tests/unit/handlers/test_clickhouse.py
+++ b/tests/unit/handlers/community_handlers/test_clickhouse.py
@@ -6,7 +6,8 @@
from sqlalchemy.exc import SQLAlchemyError
from mindsdb_sql_parser import parse_sql
-from base_handler_test import BaseDatabaseHandlerTest
+from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager
+from mindsdb.integrations.libs.response import TableResponse
try:
from mindsdb.integrations.handlers.clickhouse_handler.clickhouse_handler import ClickHouseHandler
@@ -67,6 +68,21 @@ def test_connect_success(self):
f"clickhouse+{self.dummy_connection_data['protocol']}://{self.dummy_connection_data['user']}:{self.dummy_connection_data['password']}@{self.dummy_connection_data['host']}:{self.dummy_connection_data['port']}/{self.dummy_connection_data['database']}"
)
+ def test_native_query(self):
+ """
+ Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a Response object.
+ """
+ mock_conn = MagicMock()
+ mock_cursor = MockCursorContextManager()
+
+ self.handler.connect = MagicMock(return_value=mock_conn)
+ mock_conn.cursor = MagicMock(return_value=mock_cursor)
+
+ query_str = f"SELECT * FROM {self.mock_table}"
+ data = self.handler.native_query(query_str)
+
+ assert isinstance(data, TableResponse)
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/test_confluence.py b/tests/unit/handlers/community_handlers/test_confluence.py
similarity index 94%
rename from tests/unit/handlers/test_confluence.py
rename to tests/unit/handlers/community_handlers/test_confluence.py
index f5af306caff..0a56a9a68a5 100644
--- a/tests/unit/handlers/test_confluence.py
+++ b/tests/unit/handlers/community_handlers/test_confluence.py
@@ -2,29 +2,28 @@
import unittest
from unittest.mock import MagicMock, call, patch
+import pytest
import pandas as pd
from base_handler_test import BaseHandlerTestSetup, BaseAPIResourceTestSetup
-from mindsdb.integrations.handlers.confluence_handler.confluence_api_client import ConfluenceAPIClient
-from mindsdb.integrations.handlers.confluence_handler.confluence_handler import ConfluenceHandler
-from mindsdb.integrations.handlers.confluence_handler.confluence_tables import (
- ConfluenceBlogPostsTable,
- ConfluenceDatabasesTable,
- ConfluencePagesTable,
- ConfluenceSpacesTable,
- ConfluenceWhiteboardsTable,
- ConfluenceTasksTable,
-)
-from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
- HandlerStatusResponse as StatusResponse,
- RESPONSE_TYPE,
-)
-from mindsdb.integrations.utilities.sql_utils import (
- FilterCondition,
- FilterOperator,
- SortColumn,
-)
+
+try:
+ from mindsdb.integrations.handlers.confluence_handler.confluence_api_client import ConfluenceAPIClient
+ from mindsdb.integrations.handlers.confluence_handler.confluence_handler import ConfluenceHandler
+ from mindsdb.integrations.handlers.confluence_handler.confluence_tables import (
+ ConfluenceBlogPostsTable,
+ ConfluenceDatabasesTable,
+ ConfluencePagesTable,
+ ConfluenceSpacesTable,
+ ConfluenceWhiteboardsTable,
+ ConfluenceTasksTable,
+ )
+
+except ImportError:
+ pytestmark = pytest.mark.skip("Confluence handler not installed")
+
+from mindsdb.integrations.libs.response import TableResponse, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE
+from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, SortColumn
class TestConfluenceHandler(BaseHandlerTestSetup, unittest.TestCase):
@@ -103,21 +102,21 @@ def test_check_connection_failure(self):
def test_get_tables(self):
"""
- Test that the `get_tables` method returns a list of table names.
+ Test that the `get_tables` method returns a TableResponse with a list of table names.
"""
response = self.handler.get_tables()
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
self.assertEqual(response.data_frame.columns.tolist(), ["table_name", "table_type"])
def test_get_columns(self):
"""
- Test that the `get_columns` method returns a list of columns for a table.
+ Test that the `get_columns` method returns a TableResponse with a list of columns for a table.
"""
response = self.handler.get_columns("spaces")
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
self.assertEqual(response.data_frame.columns.tolist(), ["Field", "Type"])
diff --git a/tests/unit/handlers/test_databricks.py b/tests/unit/handlers/community_handlers/test_databricks.py
similarity index 89%
rename from tests/unit/handlers/test_databricks.py
rename to tests/unit/handlers/community_handlers/test_databricks.py
index df976cc4ce6..3b968ada2de 100644
--- a/tests/unit/handlers/test_databricks.py
+++ b/tests/unit/handlers/community_handlers/test_databricks.py
@@ -13,13 +13,14 @@
DatabricksHandler,
)
- DATABRICKS_AVAILABLE = True
except ImportError:
pytestmark = pytest.mark.skip("Databricks handler not installed")
- DATABRICKS_AVAILABLE = False
from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
+ TableResponse,
+ ErrorResponse,
+ OkResponse,
+ DataHandlerResponse,
RESPONSE_TYPE,
HandlerStatusResponse as StatusResponse,
)
@@ -52,7 +53,6 @@ def set_results(self, results: List[tuple], columns: List[str]):
CONNECT_PATCH_PATH = "mindsdb.integrations.handlers.databricks_handler.databricks_handler.connect"
-@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed")
class TestInstallationCheck(unittest.TestCase):
"""Test handler installation and information schema."""
@@ -77,7 +77,6 @@ def test_connection_args_validation(self):
self.assertIn("access_token", str(ctx.exception))
-@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed")
class TestDatabricksHandler(unittest.TestCase):
dummy_connection_data = OrderedDict(
server_hostname="adb-1234567890123456.7.azuredatabricks.net",
@@ -146,7 +145,6 @@ def test_check_connection_failure(self):
self.assertTrue(response.error_message)
-@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed")
class TestTableOperations(unittest.TestCase):
"""Test table operations (DDL & DML)."""
@@ -171,7 +169,7 @@ def tearDown(self):
def test_native_query(self):
"""
- Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a Response object.
+ Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a TableResponse object.
"""
self.mock_cursor.set_results([], [])
@@ -179,8 +177,17 @@ def test_native_query(self):
data = self.handler.native_query(query_str)
self.mock_cursor.execute.assert_called_once_with(query_str)
- self.assertIsInstance(data, Response)
- self.assertFalse(data.error_code)
+ self.assertIsInstance(data, DataHandlerResponse)
+ self.assertNotIsInstance(data, ErrorResponse)
+
+ def test_native_query_empty_select_returns_table(self):
+ self.mock_cursor.set_results([], ["id", "name"])
+
+ response = self.handler.native_query("SELECT id, name FROM table WHERE 1 = 0")
+
+ self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
+ self.assertEqual(list(response.data_frame.columns), ["id", "name"])
+ self.assertEqual(len(response.data_frame), 0)
def test_get_tables(self):
"""
@@ -202,6 +209,14 @@ def test_get_tables(self):
"""
self.handler.native_query.assert_called_once_with(expected_query)
+ def test_get_tables_returns_non_table_response_without_transform(self):
+ expected = ErrorResponse(error_message="boom")
+ self.handler.native_query = MagicMock(return_value=expected)
+
+ result = self.handler.get_tables()
+
+ self.assertIs(result, expected)
+
def test_get_columns(self):
"""
Tests if the `get_columns` method correctly constructs the SQL query and if it calls `native_query` with the correct query.
@@ -241,14 +256,12 @@ def test_native_query_server_error(self):
result = self.handler.native_query("SELECT * FROM test_table")
- self.assertEqual(result.type, RESPONSE_TYPE.ERROR)
+ self.assertIsInstance(result, ErrorResponse)
self.assertIn("Server error", result.error_message)
def test_get_tables_all_schemas(self):
"""Test get_tables with all=True."""
- self.handler.native_query = MagicMock(
- return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([{"table_name": "t1"}]))
- )
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame([{"table_name": "t1"}])))
self.handler.get_tables(all=True)
@@ -276,7 +289,7 @@ def test_get_columns_with_schema(self):
]
)
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=mock_df))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=mock_df))
self.handler.get_columns("test_table", schema_name="my_schema")
@@ -284,7 +297,6 @@ def test_get_columns_with_schema(self):
self.assertIn("'my_schema'", query)
-@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed")
class TestAdvancedQueries(unittest.TestCase):
dummy_connection_data = OrderedDict(
server_hostname="test.azuredatabricks.net",
@@ -357,7 +369,6 @@ def test_join_query(self):
self.assertEqual(result.type, RESPONSE_TYPE.TABLE)
-@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed")
class TestDateTimeFunctions(unittest.TestCase):
"""Test date/time functions and INTERVAL transformations."""
@@ -415,7 +426,7 @@ def test_query_transforms_date_add_day_interval(self):
"""Test DATE_ADD with INTERVAL DAY is transformed to integer argument."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '30' DAY) AS due_date FROM orders LIMIT 1")
# breakpoint()
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -426,7 +437,7 @@ def test_query_transforms_date_add_day_interval(self):
def test_query_transforms_date_add_days_plural(self):
"""Test DATE_ADD with INTERVAL DAYS (plural) is transformed correctly."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL 7 DAYS) AS due_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -437,7 +448,7 @@ def test_query_transforms_date_add_days_plural(self):
def test_query_transforms_date_sub_day_interval(self):
"""Test DATE_SUB with INTERVAL DAY is transformed to integer argument."""
query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '5' DAY) AS past_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -448,7 +459,7 @@ def test_query_transforms_date_sub_day_interval(self):
def test_query_transforms_date_add_week_interval(self):
"""Test DATE_ADD with INTERVAL WEEK is converted to days."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' WEEK) AS future_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -459,7 +470,7 @@ def test_query_transforms_date_add_week_interval(self):
def test_query_transforms_date_sub_week_interval(self):
"""Test DATE_SUB with INTERVAL WEEK is converted to days."""
query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '2' WEEK) AS past_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -470,7 +481,7 @@ def test_query_transforms_date_sub_week_interval(self):
def test_query_transforms_date_add_month_interval(self):
"""Test DATE_ADD with INTERVAL MONTH uses ADD_MONTHS function."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' MONTH) AS future_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -481,7 +492,7 @@ def test_query_transforms_date_add_month_interval(self):
def test_query_transforms_date_sub_month_interval(self):
"""Test DATE_SUB with INTERVAL MONTH uses ADD_MONTHS with negative value."""
query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '3' MONTH) AS past_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -492,7 +503,7 @@ def test_query_transforms_date_sub_month_interval(self):
def test_query_transforms_date_add_year_interval(self):
"""Test DATE_ADD with INTERVAL YEAR uses ADD_MONTHS with 12x multiplier."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '1' YEAR) AS future_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -503,7 +514,7 @@ def test_query_transforms_date_add_year_interval(self):
def test_query_transforms_date_sub_year_interval(self):
"""Test DATE_SUB with INTERVAL YEAR uses ADD_MONTHS with negative 12x value."""
query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '2' YEAR) AS past_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -514,7 +525,7 @@ def test_query_transforms_date_sub_year_interval(self):
def test_query_transforms_date_add_hour_interval(self):
"""Test DATE_ADD with INTERVAL HOUR uses TIMESTAMPADD function."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '6' HOUR) AS future_time FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -525,7 +536,7 @@ def test_query_transforms_date_add_hour_interval(self):
def test_query_transforms_date_sub_hour_interval(self):
"""Test DATE_SUB with INTERVAL HOUR uses TIMESTAMPADD with negative value."""
query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '3' HOUR) AS past_time FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -536,7 +547,7 @@ def test_query_transforms_date_sub_hour_interval(self):
def test_query_transforms_date_add_minute_interval(self):
"""Test DATE_ADD with INTERVAL MINUTE uses TIMESTAMPADD function."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '30' MINUTE) AS future_time FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -547,7 +558,7 @@ def test_query_transforms_date_add_minute_interval(self):
def test_query_transforms_date_add_second_interval(self):
"""Test DATE_ADD with INTERVAL SECOND uses TIMESTAMPADD function."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '45' SECOND) AS future_time FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -558,7 +569,7 @@ def test_query_transforms_date_add_second_interval(self):
def test_query_without_interval_unchanged(self):
"""Test that queries without INTERVAL pass through unchanged."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, 10) AS future_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -569,7 +580,7 @@ def test_query_without_interval_unchanged(self):
def test_query_transforms_date_add_quarter_interval(self):
"""Test DATE_ADD with INTERVAL QUARTER uses ADD_MONTHS with 3x multiplier."""
query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' QUARTER) AS future_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -580,7 +591,7 @@ def test_query_transforms_date_add_quarter_interval(self):
def test_query_transforms_date_sub_quarter_interval(self):
"""Test DATE_SUB with INTERVAL QUARTER uses ADD_MONTHS with negative 3x value."""
query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '1' QUARTER) AS past_date FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -591,7 +602,7 @@ def test_query_transforms_date_sub_quarter_interval(self):
def test_query_transforms_date_sub_minute_interval(self):
"""Test DATE_SUB with INTERVAL MINUTE uses TIMESTAMPADD with negative value."""
query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '15' MINUTE) AS past_time FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
@@ -602,7 +613,7 @@ def test_query_transforms_date_sub_minute_interval(self):
def test_query_transforms_date_sub_second_interval(self):
"""Test DATE_SUB with INTERVAL SECOND uses TIMESTAMPADD with negative value."""
query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '30' SECOND) AS past_time FROM orders")
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK))
+ self.handler.native_query = MagicMock(return_value=OkResponse())
self.handler.query(query)
diff --git a/tests/unit/handlers/test_dynamodb.py b/tests/unit/handlers/community_handlers/test_dynamodb.py
similarity index 64%
rename from tests/unit/handlers/test_dynamodb.py
rename to tests/unit/handlers/community_handlers/test_dynamodb.py
index f1aef2481b1..535d11548ab 100644
--- a/tests/unit/handlers/test_dynamodb.py
+++ b/tests/unit/handlers/community_handlers/test_dynamodb.py
@@ -3,34 +3,36 @@
from botocore.client import ClientError
from unittest.mock import patch, MagicMock, Mock
+import pytest
+
from mindsdb_sql_parser import ast
from mindsdb_sql_parser.ast.select.star import Star
from mindsdb_sql_parser.ast.select.identifier import Identifier
from base_handler_test import BaseHandlerTestSetup
-from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
- HandlerStatusResponse as StatusResponse,
- RESPONSE_TYPE
-)
-from mindsdb.integrations.handlers.dynamodb_handler.dynamodb_handler import DynamoDBHandler
+from mindsdb.integrations.libs.response import TableResponse, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE
+try:
+ from mindsdb.integrations.handlers.dynamodb_handler.dynamodb_handler import DynamoDBHandler
+
+except ImportError:
+ pytestmark = pytest.mark.skip("DynamoDB handler not installed")
-class TestDynamoDBHandler(BaseHandlerTestSetup, unittest.TestCase):
+class TestDynamoDBHandler(BaseHandlerTestSetup, unittest.TestCase):
@property
def dummy_connection_data(self):
return OrderedDict(
- aws_access_key_id='AQAXEQK89OX07YS34OP',
- aws_secret_access_key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
- region_name='us-east-2',
+ aws_access_key_id="AQAXEQK89OX07YS34OP",
+ aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+ region_name="us-east-2",
)
def create_handler(self):
- return DynamoDBHandler('dynamodb', connection_data=self.dummy_connection_data)
+ return DynamoDBHandler("dynamodb", connection_data=self.dummy_connection_data)
def create_patcher(self):
- return patch('boto3.client')
+ return patch("boto3.client")
def test_connect_failure_with_missing_connection_data(self):
"""
@@ -58,8 +60,8 @@ def test_check_connection_failure_with_incorrect_credentials(self):
Test if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on failed connection due to incorrect credentials.
"""
self.mock_connect.return_value.list_tables.side_effect = ClientError(
- error_response={'Error': {'Code': 'AccessDeniedException', 'Message': 'Access Denied'}},
- operation_name='list_tables'
+ error_response={"Error": {"Code": "AccessDeniedException", "Message": "Access Denied"}},
+ operation_name="list_tables",
)
response = self.handler.check_connection()
@@ -72,7 +74,7 @@ def test_check_connection_success(self):
"""
Test if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection.
"""
- self.mock_connect.return_value.list_tables.return_value = {'TableNames': ['table1', 'table2']}
+ self.mock_connect.return_value.list_tables.return_value = {"TableNames": ["table1", "table2"]}
response = self.handler.check_connection()
self.assertTrue(response.success)
@@ -81,15 +83,12 @@ def test_check_connection_success(self):
def test_query_select_success(self):
"""
- Test if the `query` method returns a response object with a data frame containing the query result.
+ Test if the `query` method returns a TableResponse object with a data frame containing the query result.
`native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method.
"""
mock_boto3_client = Mock()
mock_boto3_client.execute_statement.return_value = {
- 'Items': [
- {'id': {'N': '1'}, 'name': {'S': 'Alice'}},
- {'id': {'N': '2'}, 'name': {'S': 'Bob'}}
- ]
+ "Items": [{"id": {"N": "1"}, "name": {"S": "Alice"}}, {"id": {"N": "2"}, "name": {"S": "Bob"}}]
}
self.handler.connect = MagicMock(return_value=mock_boto3_client)
@@ -97,18 +96,18 @@ def test_query_select_success(self):
targets=[
Star(),
],
- from_table=ast.Identifier('table1')
+ from_table=ast.Identifier("table1"),
)
response = self.handler.query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
self.assertEqual(len(df), 2)
- self.assertEqual(df.columns.tolist(), ['id', 'name'])
- self.assertEqual(df['id'].tolist(), [1, 2])
- self.assertEqual(df['name'].tolist(), ['Alice', 'Bob'])
+ self.assertEqual(df.columns.tolist(), ["id", "name"])
+ self.assertEqual(df["id"].tolist(), [1, 2])
+ self.assertEqual(df["name"].tolist(), ["Alice", "Bob"])
def test_query_select_failure_with_unsupported_clause(self):
"""
@@ -118,8 +117,8 @@ def test_query_select_failure_with_unsupported_clause(self):
targets=[
Star(),
],
- from_table=ast.Identifier('table1'),
- limit=10
+ from_table=ast.Identifier("table1"),
+ limit=10,
)
with self.assertRaises(ValueError):
self.handler.query(query)
@@ -132,62 +131,58 @@ def test_query_insert_failure(self):
mock_boto3_client.execute_statement.return_value = {}
self.handler.connect = MagicMock(return_value=mock_boto3_client)
- query = ast.Insert(
- table=Identifier('table1'),
- columns=['id', 'name'],
- values=[[1, 'Alice']]
- )
+ query = ast.Insert(table=Identifier("table1"), columns=["id", "name"], values=[[1, "Alice"]])
with self.assertRaises(ValueError):
self.handler.query(query)
def test_get_tables(self):
"""
- Test if the `get_tables` method returns a response object with a list of tables.
+ Test if the `get_tables` method returns a TableResponse object with a list of tables.
"""
mock_boto3_client = Mock()
- mock_boto3_client.list_tables.return_value = {'TableNames': ['table1', 'table2']}
+ mock_boto3_client.list_tables.return_value = {"TableNames": ["table1", "table2"]}
self.handler.connection = mock_boto3_client
response = self.handler.get_tables()
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
self.assertEqual(len(df), 2)
- self.assertEqual(df.columns.tolist(), ['table_name'])
- self.assertEqual(df['table_name'].tolist(), ['table1', 'table2'])
+ self.assertEqual(df.columns.tolist(), ["table_name"])
+ self.assertEqual(df["table_name"].tolist(), ["table1", "table2"])
def test_get_columns(self):
"""
- Test if the `get_columns` method returns a response object with a list of columns for a given table.
+ Test if the `get_columns` method returns a TableResponse object with a list of columns for a given table.
"""
mock_boto3_client = Mock()
mock_boto3_client.describe_table.return_value = {
- 'Table': {
- 'KeySchema': [
- {'AttributeName': 'id', 'KeyType': 'HASH'},
- {'AttributeName': 'name', 'KeyType': 'RANGE'}
+ "Table": {
+ "KeySchema": [
+ {"AttributeName": "id", "KeyType": "HASH"},
+ {"AttributeName": "name", "KeyType": "RANGE"},
+ ],
+ "AttributeDefinitions": [
+ {"AttributeName": "id", "AttributeType": "N"},
+ {"AttributeName": "name", "AttributeType": "S"},
],
- 'AttributeDefinitions': [
- {'AttributeName': 'id', 'AttributeType': 'N'},
- {'AttributeName': 'name', 'AttributeType': 'S'}
- ]
}
}
self.handler.connection = mock_boto3_client
- response = self.handler.get_columns('table1')
+ response = self.handler.get_columns("table1")
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
self.assertEqual(len(df), 2)
- self.assertEqual(df.columns.tolist(), ['column_name', 'data_type'])
- self.assertEqual(df['column_name'].tolist(), ['id', 'name'])
- self.assertEqual(df['data_type'].tolist(), ['N', 'S'])
+ self.assertEqual(df.columns.tolist(), ["column_name", "data_type"])
+ self.assertEqual(df["column_name"].tolist(), ["id", "name"])
+ self.assertEqual(df["data_type"].tolist(), ["N", "S"])
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/community_handlers/test_jira.py b/tests/unit/handlers/community_handlers/test_jira.py
new file mode 100644
index 00000000000..dd1e4d8f6a0
--- /dev/null
+++ b/tests/unit/handlers/community_handlers/test_jira.py
@@ -0,0 +1,397 @@
+import pytest
+import unittest
+
+from unittest.mock import patch, MagicMock
+from requests.exceptions import HTTPError
+
+import pandas as pd
+
+
+from base_handler_test import BaseHandlerTestSetup
+from mindsdb.integrations.libs.response import (
+ HandlerResponse as Response,
+ HandlerStatusResponse as StatusResponse,
+ RESPONSE_TYPE,
+)
+
+try:
+ from mindsdb.integrations.handlers.jira_handler.jira_handler import JiraHandler
+ from mindsdb.integrations.handlers.jira_handler.jira_tables import (
+ JiraAttachmentsTable,
+ JiraCommentsTable,
+ JiraIssuesTable,
+ JiraUsersTable,
+ JiraProjectsTable,
+ JiraGroupsTable,
+ SERVER_COLUMNS,
+ )
+except ImportError:
+ pytestmark = pytest.mark.skip("Jira handler not installed")
+
+
+class TestJiraHandler(BaseHandlerTestSetup, unittest.TestCase):
+ @property
+ def dummy_connection_data(self):
+ return {
+ "jira_url": "https://your-domain.atlassian.net",
+ "jira_username": "username",
+ "jira_api_token": "your_api_token",
+ "is_cloud": False,
+ }
+
+ @property
+ def err_to_raise_on_connect_failure(self):
+ return HTTPError("Failed to connect to Jira")
+
+ def create_handler(self):
+ return JiraHandler("jira", self.dummy_connection_data)
+
+ def create_patcher(self):
+ return patch("mindsdb.integrations.handlers.jira_handler.jira_handler.Jira")
+
+ def test_connect_cloud_success(self):
+ """Ensure cloud connections normalize credentials and reuse Jira constructor correctly."""
+ mock_client = MagicMock()
+ self.mock_connect.return_value = mock_client
+
+ connection = self.handler.connect()
+
+ self.assertIs(connection, mock_client)
+ self.assertTrue(self.handler.is_connected)
+ self.mock_connect.assert_called_once_with(
+ username=self.dummy_connection_data["jira_username"],
+ password=self.dummy_connection_data["jira_api_token"],
+ url=self.dummy_connection_data["jira_url"],
+ cloud=True,
+ )
+
+ def test_connect_reuse_existing_connection(self):
+ """If already connected, connect should reuse the existing client."""
+ cached_connection = MagicMock()
+ self.handler.connection = cached_connection
+ self.handler.is_connected = True
+
+ connection = self.handler.connect()
+
+ self.assertIs(connection, cached_connection)
+ self.mock_connect.assert_not_called()
+
+ def test_connect_runtime_error_on_missing_cached_connection(self):
+ """Marking the handler as connected without a cached client should raise."""
+ self.handler.is_connected = True
+ self.handler.connection = None
+
+ with self.assertRaises(RuntimeError):
+ self.handler.connect()
+
+ def test_check_connection_http_error(self):
+ """check_connection should surface HTTP errors from the Jira client."""
+ mock_client = MagicMock()
+ mock_client.myself.side_effect = HTTPError("Unauthorized")
+ self.mock_connect.return_value = mock_client
+
+ response = self.handler.check_connection()
+
+ assert isinstance(response, StatusResponse)
+ self.assertFalse(response.success)
+ self.assertIn("Unauthorized", response.error_message)
+ self.assertFalse(self.handler.is_connected)
+
+ def test_native_query_http_error(self):
+ """native_query should return an error response when Jira raises HTTPError."""
+ mock_client = MagicMock()
+ mock_client.jql.side_effect = HTTPError("Bad JQL")
+ self.mock_connect.return_value = mock_client
+
+ response = self.handler.native_query("project = TEST")
+
+ assert isinstance(response, Response)
+ self.assertEqual(response.type, RESPONSE_TYPE.ERROR)
+ self.assertIn("Bad JQL", response.error_message)
+
+ def test_native_query_returns_empty_dataframe_when_no_issues(self):
+ """Ensure native_query returns an empty dataframe with expected columns."""
+ mock_client = MagicMock()
+ mock_client.jql.return_value = {}
+ self.mock_connect.return_value = mock_client
+
+ response = self.handler.native_query("project = TEST")
+
+ assert isinstance(response, Response)
+ self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
+ self.assertTrue(response.data_frame.empty)
+ issues_columns = JiraIssuesTable(self.handler).get_columns()
+ self.assertListEqual(list(response.data_frame.columns), issues_columns)
+
+ def test_attachments_table_fetches_missing_fields(self):
+ """Attachments table should refresh issues to retrieve missing attachment fields."""
+ mock_client = MagicMock()
+ self.mock_connect.return_value = mock_client
+
+ issue_without_attachments = {"id": "1", "key": "ISSUE-1", "fields": {}}
+ mock_client.get_all_projects.return_value = [{"id": "100"}]
+ mock_client.get_all_project_issues.return_value = [issue_without_attachments]
+ mock_client.get_issue.return_value = {
+ "fields": {"attachment": [{"id": "att-1", "filename": "log.txt", "size": 10, "mimeType": "text/plain"}]}
+ }
+
+ attachments_table = JiraAttachmentsTable(self.handler)
+ result_df = attachments_table.list(limit=1)
+
+ self.assertEqual(len(result_df), 1)
+ self.assertEqual(result_df.loc[0, "attachment_id"], "att-1")
+ self.assertEqual(result_df.loc[0, "issue_key"], "ISSUE-1")
+ self.assertEqual(result_df.loc[0, "filename"], "log.txt")
+
+ def test_issues_table_missing_assignee(self):
+ """Test that issues without assignee are handled correctly."""
+ mock_client = MagicMock()
+ self.mock_connect.return_value = mock_client
+
+ mock_issues = [
+ {
+ "id": "1",
+ "key": "TEST-1",
+ "fields": {
+ "project": {"id": "10001", "key": "TEST", "name": "Test Project"},
+ "summary": "Issue with assignee",
+ "priority": {"name": "High"},
+ "creator": {"displayName": "John Doe"},
+ "assignee": {"displayName": "Jane Smith"},
+ "status": {"name": "In Progress"},
+ },
+ },
+ {
+ "id": "2",
+ "key": "TEST-2",
+ "fields": {
+ "project": {"id": "10001", "key": "TEST", "name": "Test Project"},
+ "summary": "Unassigned issue",
+ "priority": {"name": "Medium"},
+ "creator": {"displayName": "John Doe"},
+ "status": {"name": "Open"},
+ },
+ },
+ {
+ "id": "3",
+ "key": "TEST-3",
+ "fields": {
+ "project": {"id": "10001", "key": "TEST", "name": "Test Project"},
+ "summary": "Issue without priority",
+ "creator": {"displayName": "John Doe"},
+ "status": {"name": "Done"},
+ },
+ },
+ ]
+
+ mock_client.get_all_projects.return_value = [{"id": "10001"}]
+ mock_client.get_all_project_issues.return_value = mock_issues
+
+ issues_table = JiraIssuesTable(self.handler)
+ result_df = issues_table.list(conditions=[])
+
+ self.assertEqual(len(result_df), 3)
+ self.assertIsNotNone(result_df)
+
+ expected_columns = issues_table.get_columns()
+ for col in expected_columns:
+ self.assertIn(col, result_df.columns)
+
+ self.assertEqual(result_df.loc[0, "assignee"], "Jane Smith")
+ self.assertTrue(pd.isna(result_df.loc[1, "assignee"]))
+ self.assertTrue(pd.isna(result_df.loc[2, "assignee"]))
+
+ self.assertEqual(result_df.loc[0, "priority"], "High")
+ self.assertEqual(result_df.loc[1, "priority"], "Medium")
+ self.assertTrue(pd.isna(result_df.loc[2, "priority"]))
+
+ def test_users_table_missing_timezone(self):
+ """Test that users without timeZone field are handled correctly."""
+ mock_client = MagicMock()
+ self.mock_connect.return_value = mock_client
+
+ mock_users = [
+ {
+ "accountId": "user1",
+ "accountType": "atlassian",
+ "emailAddress": "user1@example.com",
+ "displayName": "User One",
+ "active": True,
+ "timeZone": "America/New_York",
+ "locale": "en_US",
+ },
+ {
+ "accountId": "user2",
+ "accountType": "atlassian",
+ "emailAddress": "user2@example.com",
+ "displayName": "User Two",
+ "active": True,
+ "locale": "en_US",
+ },
+ {
+ "accountId": "user3",
+ "accountType": "atlassian",
+ "displayName": "User Three",
+ "active": False,
+ },
+ ]
+
+ mock_client.users_get_all.return_value = mock_users
+
+ users_table = JiraUsersTable(self.handler)
+ result_df = users_table.list(conditions=[])
+
+ self.assertEqual(len(result_df), 3)
+ self.assertIsNotNone(result_df)
+
+ expected_columns = users_table.get_columns()
+ for col in expected_columns:
+ self.assertIn(col, result_df.columns)
+
+ self.assertEqual(result_df.loc[0, "timeZone"], "America/New_York")
+ self.assertTrue(pd.isna(result_df.loc[1, "timeZone"]))
+ self.assertTrue(pd.isna(result_df.loc[2, "timeZone"]))
+
+ self.assertEqual(result_df.loc[0, "emailAddress"], "user1@example.com")
+ self.assertEqual(result_df.loc[1, "emailAddress"], "user2@example.com")
+ self.assertTrue(pd.isna(result_df.loc[2, "emailAddress"]))
+
+ def test_projects_table_missing_optional_fields(self):
+ """Test that projects with missing optional fields are handled correctly."""
+ mock_client = MagicMock()
+ self.mock_connect.return_value = mock_client
+
+ mock_projects = [
+ {
+ "id": "10001",
+ "key": "PROJ1",
+ "name": "Project One",
+ "projectTypeKey": "software",
+ "simplified": True,
+ "style": "classic",
+ "isPrivate": False,
+ "entityId": "entity1",
+ "uuid": "uuid1",
+ },
+ {
+ "id": "10002",
+ "key": "PROJ2",
+ "name": "Project Two",
+ },
+ ]
+
+ mock_client.get_all_projects.return_value = mock_projects
+
+ projects_table = JiraProjectsTable(self.handler)
+ result_df = projects_table.list(conditions=[])
+
+ self.assertEqual(len(result_df), 2)
+ self.assertIsNotNone(result_df)
+
+ expected_columns = projects_table.get_columns()
+ for col in expected_columns:
+ self.assertIn(col, result_df.columns)
+
+ self.assertEqual(result_df.loc[0, "projectTypeKey"], "software")
+ self.assertTrue(pd.isna(result_df.loc[1, "projectTypeKey"]))
+
+ def test_groups_table_missing_fields(self):
+ """Test that groups with missing fields are handled correctly."""
+ mock_client = MagicMock()
+ self.mock_connect.return_value = mock_client
+
+ mock_groups = {
+ "groups": [
+ {
+ "groupId": "group1",
+ "name": "Developers",
+ "html": "Developers",
+ },
+ {
+ "groupId": "group2",
+ "name": "Managers",
+ },
+ ]
+ }
+
+ mock_client.get_groups.return_value = mock_groups
+
+ groups_table = JiraGroupsTable(self.handler)
+ result_df = groups_table.list(conditions=[])
+
+ self.assertEqual(len(result_df), 2)
+ self.assertIsNotNone(result_df)
+
+ expected_columns = groups_table.get_columns()
+ for col in expected_columns:
+ self.assertIn(col, result_df.columns)
+
+ self.assertEqual(result_df.loc[0, "html"], "Developers")
+ self.assertTrue(pd.isna(result_df.loc[1, "html"]))
+
+ def test_comments_table_fetches_missing_fields(self):
+ """Comments table should refresh issues to retrieve missing comment fields."""
+ mock_client = MagicMock()
+ self.mock_connect.return_value = mock_client
+
+ issue_without_comments = {"id": "1", "key": "ISSUE-1", "fields": {}}
+ mock_client.get_all_projects.return_value = [{"id": "100"}]
+ mock_client.get_all_project_issues.return_value = [issue_without_comments]
+ mock_client.get_issue.return_value = {
+ "fields": {
+ "comment": {
+ "comments": [
+ {
+ "id": "c-1",
+ "body": "First comment",
+ "created": "2024-01-01",
+ "updated": "2024-01-02",
+ "author": {
+ "displayName": "Commenter",
+ "accountId": "acc-1",
+ },
+ "visibility": {
+ "type": "role",
+ "value": "admin",
+ },
+ }
+ ]
+ }
+ }
+ }
+
+ comments_table = JiraCommentsTable(self.handler)
+ result_df = comments_table.list(limit=1)
+
+ self.assertEqual(len(result_df), 1)
+ self.assertEqual(result_df.loc[0, "comment_id"], "c-1")
+ self.assertEqual(result_df.loc[0, "issue_key"], "ISSUE-1")
+ self.assertEqual(result_df.loc[0, "body"], "First comment")
+ self.assertEqual(result_df.loc[0, "author"], "Commenter")
+ self.assertEqual(result_df.loc[0, "visibility_type"], "role")
+ self.assertEqual(result_df.loc[0, "visibility_value"], "admin")
+
+ def test_users_table_server_mode_columns(self):
+ """Users table should switch to server columns when client.cloud is False."""
+ mock_client = MagicMock()
+ mock_client.cloud = False
+ self.mock_connect.return_value = mock_client
+
+ mock_client.user.return_value = {
+ "name": "serveruser",
+ "displayName": "Server User",
+ "emailAddress": "server@example.com",
+ }
+
+ users_table = JiraUsersTable(self.handler)
+ result_df = users_table.list()
+
+ self.assertEqual(len(result_df), 1)
+ self.assertListEqual(list(result_df.columns), SERVER_COLUMNS)
+ self.assertEqual(result_df.loc[0, "name"], "serveruser")
+ self.assertEqual(result_df.loc[0, "displayName"], "Server User")
+ self.assertEqual(result_df.loc[0, "emailAddress"], "server@example.com")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/unit/handlers/test_jira_handler.py b/tests/unit/handlers/community_handlers/test_jira_handler.py
similarity index 100%
rename from tests/unit/handlers/test_jira_handler.py
rename to tests/unit/handlers/community_handlers/test_jira_handler.py
diff --git a/tests/unit/handlers/test_mongodb.py b/tests/unit/handlers/community_handlers/test_mongodb.py
similarity index 93%
rename from tests/unit/handlers/test_mongodb.py
rename to tests/unit/handlers/community_handlers/test_mongodb.py
index 36ae12c8479..81232adea01 100644
--- a/tests/unit/handlers/test_mongodb.py
+++ b/tests/unit/handlers/community_handlers/test_mongodb.py
@@ -2,6 +2,8 @@
from collections import OrderedDict
from unittest.mock import patch, MagicMock
+import pytest
+
from bson import ObjectId
from mindsdb_sql_parser import ast
from mindsdb_sql_parser.ast.select.star import Star
@@ -11,11 +13,17 @@
from base_handler_test import BaseHandlerTestSetup
from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
+ TableResponse,
+ OkResponse,
+ ErrorResponse,
HandlerStatusResponse as StatusResponse,
RESPONSE_TYPE,
)
-from mindsdb.integrations.handlers.mongodb_handler.mongodb_handler import MongoDBHandler
+
+try:
+ from mindsdb.integrations.handlers.mongodb_handler.mongodb_handler import MongoDBHandler
+except ImportError:
+ pytestmark = pytest.mark.skip("mongodb_handler not installed (community handler)")
class TestMongoDBHandler(BaseHandlerTestSetup, unittest.TestCase):
@@ -88,7 +96,7 @@ def test_check_connection_success(self):
def test_query_failure_with_non_existent_collection(self):
"""
- Test if the `query` method returns a response object with an error message on failed query due to non-existent collection.
+ Test if the `query` method returns an ErrorResponse object with an error message on failed query due to non-existent collection.
"""
self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [
"movies"
@@ -103,7 +111,7 @@ def test_query_failure_with_non_existent_collection(self):
response = self.handler.query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, ErrorResponse)
self.assertEqual(response.type, RESPONSE_TYPE.ERROR)
self.assertTrue(response.error_message)
@@ -139,7 +147,7 @@ def test_query_failure_with_unsupported_operation(self):
def test_query_select_success(self):
"""
- Test if the `query` method returns a response object with a data frame containing the query result.
+ Test if the `query` method returns a TableResponse object with a data frame containing the query result.
`native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method.
"""
self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [
@@ -164,7 +172,7 @@ def test_query_select_success(self):
response = self.handler.query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -174,7 +182,7 @@ def test_query_select_success(self):
def test_query_update_success(self):
"""
- Test if the `query` method returns a response object with a 'OK' status.
+ Test if the `query` method returns an OkResponse object with a 'OK' status.
`native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method.
"""
self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [
@@ -201,12 +209,12 @@ def test_query_update_success(self):
response = self.handler.query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, OkResponse)
self.assertEqual(response.type, RESPONSE_TYPE.OK)
def test_get_tables(self):
"""
- Tests the `get_tables` method returns a response object with a list of tables (collections) in the database.
+ Tests the `get_tables` method returns a TableResponse object with a list of tables (collections) in the database.
"""
self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [
"theaters",
@@ -219,7 +227,7 @@ def test_get_tables(self):
response = self.handler.get_tables()
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -232,7 +240,7 @@ def test_get_tables(self):
def test_get_columns(self):
"""
- Tests the `get_columns` method returns a response object with a list of columns (fields) for a given table (collection).
+ Tests the `get_columns` method returns a TableResponse object with a list of columns (fields) for a given table (collection).
"""
self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].find_one.return_value = {
"_id": ObjectId("5f5b3f3b3f3b3f3b3f3b3f3b"),
@@ -243,7 +251,7 @@ def test_get_columns(self):
response = self.handler.get_columns("movies")
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -287,7 +295,7 @@ def test_query_select_with_subquery_success(self):
response = self.handler.query(main_query)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -353,7 +361,7 @@ def test_query_select_with_complex_subquery_success(self):
response = self.handler.query(main_query)
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -388,7 +396,7 @@ def test_query_select_with_where_operators(self):
response = self.handler.query(query)
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -431,7 +439,7 @@ def test_query_select_with_and_or_conditions(self):
response = self.handler.query(query)
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -494,7 +502,7 @@ def test_select_with_match_and_projection(self):
response = self.handler.query(query)
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -525,7 +533,7 @@ def test_select_constant_with_alias(self):
response = self.handler.query(query)
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -557,7 +565,7 @@ def test_select_with_constant_no_alias(self):
response = self.handler.query(query)
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -604,7 +612,7 @@ def test_query_select_with_subquery_and_where(self):
response = self.handler.query(main_query)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -643,7 +651,7 @@ def test_query_select_nested_field_projection(self):
response = self.handler.query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -689,7 +697,7 @@ def test_query_select_nested_field_with_where(self):
response = self.handler.query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -725,7 +733,7 @@ def test_query_aggregation_on_nested_field(self):
response = self.handler.query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -772,7 +780,7 @@ def test_query_group_by_with_nested_aggregation(self):
response = self.handler.query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
diff --git a/tests/unit/handlers/test_ms_teams.py b/tests/unit/handlers/community_handlers/test_ms_teams.py
similarity index 100%
rename from tests/unit/handlers/test_ms_teams.py
rename to tests/unit/handlers/community_handlers/test_ms_teams.py
diff --git a/tests/unit/handlers/community_handlers/test_openbb_tables.py b/tests/unit/handlers/community_handlers/test_openbb_tables.py
new file mode 100644
index 00000000000..50f0d270a5a
--- /dev/null
+++ b/tests/unit/handlers/community_handlers/test_openbb_tables.py
@@ -0,0 +1,101 @@
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pandas as pd
+import pytest
+
+try:
+ from mindsdb.integrations.handlers.openbb_handler.openbb_tables import OpenBBtable
+except ImportError:
+ OpenBBtable = None
+
+pytestmark = pytest.mark.skipif(OpenBBtable is None, reason="openbb_handler not installed")
+
+
+class _DummyOpenBBResponse:
+ def __init__(self, payload):
+ self.payload = payload
+
+ def to_df(self):
+ return pd.DataFrame([self.payload])
+
+
+class _DummyPrice:
+ def historical(self, **kwargs):
+ return _DummyOpenBBResponse(kwargs)
+
+
+class _DummyEquity:
+ def __init__(self):
+ self.price = _DummyPrice()
+
+
+class _DummyCoverage:
+ def __init__(self):
+ self.commands = {".equity.price.historical": {}}
+
+
+class _DummyObb:
+ def __init__(self):
+ self.equity = _DummyEquity()
+ self.coverage = _DummyCoverage()
+
+
+class _DummyHandler:
+ def __init__(self):
+ self.obb = _DummyObb()
+
+
+def test_openbb_command_resolution_returns_callable():
+ table = OpenBBtable(_DummyHandler())
+
+ function = table._resolve_openbb_command("obb.equity.price.historical")
+ result = function(symbol="AAPL").to_df()
+
+ assert result.iloc[0]["symbol"] == "AAPL"
+
+
+def test_openbb_select_treats_params_as_data():
+ table = OpenBBtable(_DummyHandler())
+ malicious_value = "__import__('os').system('echo hacked')"
+ query = SimpleNamespace(where=object())
+
+ with patch(
+ "mindsdb.integrations.handlers.openbb_handler.openbb_tables.extract_comparison_conditions",
+ return_value=[["=", "cmd", "obb.equity.price.historical"], ["=", "symbol", malicious_value]],
+ ):
+ result = table.select(query)
+
+ assert result.iloc[0]["symbol"] == malicious_value
+
+
+def test_openbb_command_resolution_rejects_private_segments():
+ table = OpenBBtable(_DummyHandler())
+
+ with pytest.raises(ValueError, match="Invalid OpenBB command segment"):
+ table._resolve_openbb_command("obb.__class__")
+
+
+def test_openbb_select_coerces_literal_string_params():
+ table = OpenBBtable(_DummyHandler())
+ query = SimpleNamespace(where=object())
+
+ with patch(
+ "mindsdb.integrations.handlers.openbb_handler.openbb_tables.extract_comparison_conditions",
+ return_value=[
+ ["=", "cmd", "obb.equity.price.historical"],
+ ["=", "limit", "123"],
+ ["=", "adjusted", "true"],
+ ["=", "symbol", "'AAPL'"],
+ ["=", "ids", "[1, 2]"],
+ ["=", "raw_symbol", "AAPL"],
+ ],
+ ):
+ result = table.select(query)
+
+ row = result.iloc[0]
+ assert row["limit"] == 123
+ assert bool(row["adjusted"]) is True
+ assert row["symbol"] == "AAPL"
+ assert row["ids"] == [1, 2]
+ assert row["raw_symbol"] == "AAPL"
diff --git a/tests/unit/handlers/test_s3.py b/tests/unit/handlers/community_handlers/test_s3.py
similarity index 71%
rename from tests/unit/handlers/test_s3.py
rename to tests/unit/handlers/community_handlers/test_s3.py
index 16f3e7e64b2..443da7fc776 100644
--- a/tests/unit/handlers/test_s3.py
+++ b/tests/unit/handlers/community_handlers/test_s3.py
@@ -2,6 +2,8 @@
import unittest
from unittest.mock import patch, MagicMock
+import pytest
+
from botocore.client import ClientError
from mindsdb_sql_parser import ast
from mindsdb_sql_parser.ast import Select, Identifier, Star, Constant
@@ -9,34 +11,39 @@
import pandas as pd
from base_handler_test import BaseHandlerTestSetup
-from mindsdb.integrations.handlers.s3_handler.s3_handler import S3Handler
from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
+ OkResponse,
+ TableResponse,
+ DataHandlerResponse as Response,
HandlerStatusResponse as StatusResponse,
- RESPONSE_TYPE
+ RESPONSE_TYPE,
)
+try:
+ from mindsdb.integrations.handlers.s3_handler.s3_handler import S3Handler
+except ImportError:
+ pytestmark = pytest.mark.skip("s3_handler not installed (community handler)")
-class TestS3Handler(BaseHandlerTestSetup, unittest.TestCase):
+class TestS3Handler(BaseHandlerTestSetup, unittest.TestCase):
@property
def object_name(self):
- return '`my-bucket/my-file.csv`'
+ return "`my-bucket/my-file.csv`"
@property
def dummy_connection_data(self):
return OrderedDict(
- aws_access_key_id='AQAXEQK89OX07YS34OP',
- aws_secret_access_key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
- bucket='mindsdb-bucket',
- region_name='us-east-2',
+ aws_access_key_id="AQAXEQK89OX07YS34OP",
+ aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+ bucket="mindsdb-bucket",
+ region_name="us-east-2",
)
def create_handler(self):
- return S3Handler('s3', connection_data=self.dummy_connection_data)
+ return S3Handler("s3", connection_data=self.dummy_connection_data)
def create_patcher(self):
- return patch('boto3.client')
+ return patch("boto3.client")
def test_connect(self):
"""
@@ -51,7 +58,7 @@ def test_connect(self):
self.assertTrue(self.handler.is_connected)
self.mock_connect.assert_called_once()
- @patch('boto3.client')
+ @patch("boto3.client")
def test_check_connection_success(self, mock_boto3_client):
"""
Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection.
@@ -66,7 +73,7 @@ def test_check_connection_success(self, mock_boto3_client):
assert isinstance(response, StatusResponse)
self.assertFalse(response.error_message)
- @patch('boto3.client')
+ @patch("boto3.client")
def test_check_connection_failure_invalid_bucket_or_no_access(self, mock_boto3_client):
"""
Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on failed connection due to invalid bucket or lack of access permissions.
@@ -76,12 +83,12 @@ def test_check_connection_failure_invalid_bucket_or_no_access(self, mock_boto3_c
mock_boto3_client.return_value = mock_boto3_client_instance
mock_boto3_client_instance.head_bucket.side_effect = ClientError(
error_response={
- 'Error': {
- 'Code': '404',
- 'Message': 'Not Found',
+ "Error": {
+ "Code": "404",
+ "Message": "Not Found",
}
},
- operation_name='HeadBucket'
+ operation_name="HeadBucket",
)
response = self.handler.check_connection()
@@ -90,7 +97,7 @@ def test_check_connection_failure_invalid_bucket_or_no_access(self, mock_boto3_c
assert isinstance(response, StatusResponse)
self.assertTrue(response.error_message)
- @patch('boto3.client')
+ @patch("boto3.client")
def test_query_select(self, mock_boto3_client):
"""
Tests the `query` method to ensure it executes a SELECT SQL query using a mock cursor and returns a Response object.
@@ -104,18 +111,11 @@ def test_query_select(self, mock_boto3_client):
duckdb_connect = MagicMock()
self.handler._connect_duckdb = duckdb_connect
duckdb_execute = duckdb_connect().__enter__().execute
- duckdb_execute().fetchdf.return_value = pd.DataFrame([], columns=['col_2'])
+ duckdb_execute().fetchdf.return_value = pd.DataFrame([], columns=["col_2"])
# Craft the SELECT query and execute it.
- object_name = 'my-bucket/my-file.csv'
- select = ast.Select(
- targets=[
- Star()
- ],
- from_table=Identifier(
- parts=[object_name]
- )
- )
+ object_name = "my-bucket/my-file.csv"
+ select = ast.Select(targets=[Star()], from_table=Identifier(parts=[object_name]))
duckdb_execute.reset_mock()
response = self.handler.query(select)
@@ -124,10 +124,9 @@ def test_query_select(self, mock_boto3_client):
f"SELECT * FROM 's3://{self.dummy_connection_data['bucket']}/{object_name.replace('`', '')}'"
)
- assert isinstance(response, Response)
- self.assertFalse(response.error_code)
+ assert isinstance(response, TableResponse)
- @patch('boto3.client')
+ @patch("boto3.client")
def test_query_insert(self, mock_boto3_client):
"""
Tests the `query` method to ensure it executes a INSERT SQL query using a mock cursor and returns a Response object.
@@ -145,29 +144,25 @@ def test_query_insert(self, mock_boto3_client):
duckdb_execute().fetchdf.return_value = None
# Craft the INSERT query and execute it.
- columns = ['col_1', 'col_2']
- values = [('val_1', 'val_2')]
- insert = ast.Insert(
- table=Identifier(
- parts=[self.object_name]
- ),
- columns=columns,
- values=values
- )
+ columns = ["col_1", "col_2"]
+ values = [("val_1", "val_2")]
+ insert = ast.Insert(table=Identifier(parts=[self.object_name]), columns=columns, values=values)
duckdb_execute.reset_mock()
response = self.handler.query(insert)
sqls = [i[0][0] for i in duckdb_execute.call_args_list]
- assert sqls[0] == f"CREATE TABLE tmp_table AS SELECT * FROM 's3://{self.dummy_connection_data['bucket']}/{self.object_name}'"
+ assert (
+ sqls[0]
+ == f"CREATE TABLE tmp_table AS SELECT * FROM 's3://{self.dummy_connection_data['bucket']}/{self.object_name}'"
+ )
assert sqls[1] == "INSERT INTO tmp_table BY NAME SELECT * FROM df"
assert sqls[2] == f"COPY tmp_table TO 's3://{self.dummy_connection_data['bucket']}/{self.object_name}'"
- assert isinstance(response, Response)
- self.assertFalse(response.error_code)
+ assert isinstance(response, OkResponse)
- @patch('boto3.client')
+ @patch("boto3.client")
def test_get_tables(self, mock_boto3_client):
"""
Test that the `get_tables` method correctly calls the `list_objects_v2` method and returns a Response object with the supported objects (files).
@@ -176,12 +171,12 @@ def test_get_tables(self, mock_boto3_client):
mock_boto3_client_instance = MagicMock()
mock_boto3_client.return_value = mock_boto3_client_instance
mock_boto3_client_instance.list_objects_v2.return_value = {
- 'Contents': [
- {'Key': 'file1.csv'},
- {'Key': 'file2.tsv'},
- {'Key': 'file3.json'},
- {'Key': 'file4.parquet'},
- {'Key': 'file5.xlsx'},
+ "Contents": [
+ {"Key": "file1.csv"},
+ {"Key": "file2.tsv"},
+ {"Key": "file3.json"},
+ {"Key": "file4.parquet"},
+ {"Key": "file5.xlsx"},
]
}
@@ -192,37 +187,32 @@ def test_get_tables(self, mock_boto3_client):
df = response.data_frame
self.assertEqual(len(df), 5) # +1 table is 'files'
- self.assertNotIn('file5.xlsx', df['table_name'].values)
+ self.assertNotIn("file5.xlsx", df["table_name"].values)
- @patch('mindsdb.integrations.handlers.s3_handler.s3_handler.S3Handler.query')
+ @patch("mindsdb.integrations.handlers.s3_handler.s3_handler.S3Handler.query")
def test_get_columns(self, mock_query):
"""
Test that the `get_columns` method correctly constructs the SQL query and calls `native_query` with the correct query.
"""
- mock_query.return_value = Response(
- RESPONSE_TYPE.TABLE,
- data_frame=pd.DataFrame(
+ mock_query.return_value = TableResponse(
+ data=pd.DataFrame(
data={
- 'col_1': ['row_1', 'row_2', 'row_3'],
- 'col_2': [1, 2, 3],
+ "col_1": ["row_1", "row_2", "row_3"],
+ "col_2": [1, 2, 3],
},
- )
+ ),
)
- table_name = 'mock_table'
+ table_name = "mock_table"
response = self.handler.get_columns(table_name)
- expected_query = Select(
- targets=[Star()],
- from_table=Identifier(parts=[table_name]),
- limit=Constant(1)
- )
+ expected_query = Select(targets=[Star()], from_table=Identifier(parts=[table_name]), limit=Constant(1))
self.handler.query.assert_called_once_with(expected_query)
df = response.data_frame
- self.assertEqual(df.columns.tolist(), ['column_name', 'data_type'])
- self.assertEqual(df['data_type'].values.tolist(), ['string', 'int64'])
+ self.assertEqual(df.columns.tolist(), ["column_name", "data_type"])
+ self.assertEqual(df["data_type"].values.tolist(), ["string", "int64"])
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/test_slack.py b/tests/unit/handlers/community_handlers/test_slack.py
similarity index 99%
rename from tests/unit/handlers/test_slack.py
rename to tests/unit/handlers/community_handlers/test_slack.py
index 59c64de18b0..62a9ada8bc8 100644
--- a/tests/unit/handlers/test_slack.py
+++ b/tests/unit/handlers/community_handlers/test_slack.py
@@ -12,7 +12,7 @@
import pandas as pd
from base_handler_test import BaseAPIChatHandlerTest, BaseAPIResourceTestSetup
-from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, HandlerResponse as Response
+from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, TableResponse
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
try:
@@ -431,7 +431,7 @@ def test_native_query(self):
response = self.handler.native_query(query)
self.mock_connect.return_value.conversations_info.assert_called_once_with(channel="C1234567890")
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
expected_df = pd.DataFrame([MOCK_RESPONSE_CONV_INFO_1["channel"]])
pd.testing.assert_frame_equal(response.data_frame, expected_df)
@@ -451,7 +451,7 @@ def test_native_query_with_pagination(self):
self.mock_connect.return_value.conversations_list.assert_any_call()
self.mock_connect.return_value.conversations_list.assert_any_call(cursor="dGVhbTpDMDYxRkE1UEI=")
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
expected_df = pd.DataFrame(MOCK_RESPONSE_CONV_LIST_1["channels"] + MOCK_RESPONSE_CONV_LIST_2["channels"])
pd.testing.assert_frame_equal(response.data_frame, expected_df)
diff --git a/tests/unit/handlers/test_bigquery.py b/tests/unit/handlers/test_bigquery.py
index 1bb69de1a11..448af57d609 100644
--- a/tests/unit/handlers/test_bigquery.py
+++ b/tests/unit/handlers/test_bigquery.py
@@ -6,9 +6,10 @@
from google.api_core.exceptions import BadRequest
from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
HandlerStatusResponse as StatusResponse,
RESPONSE_TYPE,
+ TableResponse,
+ ErrorResponse,
)
try:
@@ -76,7 +77,7 @@ def test_native_query(self):
self.handler.connect = MagicMock(return_value=mock_conn)
mock_query = MagicMock()
- mock_query.to_dataframe.return_value = None
+ mock_query.to_dataframe.return_value = pd.DataFrame({"col": [1, 2, 3]})
mock_conn.query.return_value = mock_query
query_str = "SELECT * FROM table"
@@ -87,8 +88,35 @@ def test_native_query(self):
mock_query_job_config_instance = mock_query_job_config.return_value
data = self.handler.native_query(query_str)
mock_conn.query.assert_called_once_with(query_str, job_config=mock_query_job_config_instance)
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
+ assert isinstance(data, TableResponse)
+
+ def test_native_query_empty_select_returns_table(self):
+ mock_conn = MagicMock()
+ self.handler.connect = MagicMock(return_value=mock_conn)
+
+ mock_query = MagicMock()
+ mock_query.to_dataframe.return_value = pd.DataFrame(columns=["id"])
+ mock_conn.query.return_value = mock_query
+
+ with patch("mindsdb.integrations.handlers.bigquery_handler.bigquery_handler.QueryJobConfig"):
+ response = self.handler.native_query("SELECT id FROM table WHERE 1 = 0")
+
+ self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
+ self.assertEqual(list(response.data_frame.columns), ["id"])
+ self.assertTrue(response.data_frame.empty)
+
+ def test_native_query_empty_dataframe_without_columns_returns_ok(self):
+ mock_conn = MagicMock()
+ self.handler.connect = MagicMock(return_value=mock_conn)
+
+ mock_query = MagicMock()
+ mock_query.to_dataframe.return_value = pd.DataFrame()
+ mock_conn.query.return_value = mock_query
+
+ with patch("mindsdb.integrations.handlers.bigquery_handler.bigquery_handler.QueryJobConfig"):
+ response = self.handler.native_query("UPDATE table SET col = 1")
+
+ self.assertEqual(response.type, RESPONSE_TYPE.OK)
def test_get_tables(self):
"""
@@ -124,7 +152,7 @@ def test_get_columns(self):
self.handler.native_query.assert_called_once_with(expected_query)
def test_meta_get_tables_filters(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.meta_get_tables(table_names=["orders"])
@@ -132,7 +160,7 @@ def test_meta_get_tables_filters(self):
self.assertIn("AND t.table_name IN ('orders')", query)
def test_meta_get_columns_filters(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.meta_get_columns(table_names=["orders"])
@@ -176,9 +204,9 @@ def test_meta_get_column_statistics_batches_results(self):
self.handler.native_query = MagicMock(
side_effect=[
- Response(RESPONSE_TYPE.TABLE, data_frame=column_types_result),
- Response(RESPONSE_TYPE.TABLE, data_frame=first_batch_result),
- Response(RESPONSE_TYPE.TABLE, data_frame=second_batch_result),
+ TableResponse(data=column_types_result),
+ TableResponse(data=first_batch_result),
+ TableResponse(data=second_batch_result),
]
)
@@ -189,20 +217,21 @@ def test_meta_get_column_statistics_batches_results(self):
self.assertEqual(self.handler.native_query.call_count, 3) # 1 for column types + 2 for batches
def test_meta_get_column_statistics_returns_error_when_empty(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.ERROR, error_message="boom"))
+ self.handler.native_query = MagicMock(return_value=ErrorResponse(error_message="boom"))
response = self.handler.meta_get_column_statistics_for_table("table", ["col"])
self.assertEqual(response.resp_type, RESPONSE_TYPE.ERROR)
def test_meta_get_primary_keys_filters(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.meta_get_primary_keys(table_names=["orders"])
query = self.handler.native_query.call_args[0][0]
self.assertIn("AND tc.table_name IN ('orders')", query)
+ self.assertNotIn("tc.constraint_name,", query)
def test_meta_get_foreign_keys_filters(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.meta_get_foreign_keys(table_names=["orders"])
query = self.handler.native_query.call_args[0][0]
self.assertIn("AND tc.table_name IN ('orders')", query)
diff --git a/tests/unit/handlers/test_file.py b/tests/unit/handlers/test_file.py
index 9df2ee28415..7c54c8cbbc7 100644
--- a/tests/unit/handlers/test_file.py
+++ b/tests/unit/handlers/test_file.py
@@ -17,8 +17,8 @@
)
from mindsdb.integrations.handlers.file_handler.file_handler import FileHandler
-from mindsdb.integrations.libs.response import RESPONSE_TYPE
-
+from mindsdb.integrations.libs.response import RESPONSE_TYPE, INF_SCHEMA_COLUMNS_NAMES_SET, INF_SCHEMA_COLUMNS_NAMES
+from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
from mindsdb.integrations.utilities.files.file_reader import (
FileReader,
FileProcessingError,
@@ -406,8 +406,25 @@ def test_get_columns():
file_handler = FileHandler(file_controller=MockFileController())
response = file_handler.get_columns("mock")
- assert response.type == RESPONSE_TYPE.TABLE
-
- expected_df = pandas.DataFrame([{"Field": x, "Type": "str"} for x in file_records[0][2]])
-
- assert response.data_frame.equals(expected_df)
+ assert response.type == RESPONSE_TYPE.COLUMNS_TABLE
+
+ data = []
+ for name in file_records[0][2]:
+ row = {}
+ for key_name in INF_SCHEMA_COLUMNS_NAMES_SET:
+ if key_name == INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME:
+ row[key_name] = name
+ elif key_name == INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE:
+ row[key_name] = "str"
+ elif key_name == INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE:
+ row[key_name] = MYSQL_DATA_TYPE.TEXT
+ else:
+ row[key_name] = None
+ data.append(row)
+
+ expected_df = pandas.DataFrame(data)
+ assert set(response.data_frame.columns) == set(expected_df.columns)
+ expected_df = expected_df[response.data_frame.columns]
+
+ # Use 'compare' to ignore dtypes (object != string)
+ assert response.data_frame.compare(expected_df).empty
diff --git a/tests/unit/handlers/test_hubspot.py b/tests/unit/handlers/test_hubspot.py
index df7ee1cdd04..edbb3732ecc 100644
--- a/tests/unit/handlers/test_hubspot.py
+++ b/tests/unit/handlers/test_hubspot.py
@@ -2,6 +2,7 @@
import pytest
import unittest
from unittest.mock import patch, MagicMock
+import pandas as pd
try:
from hubspot.crm.objects import SimplePublicObject
@@ -9,6 +10,8 @@
HubspotHandler,
)
from mindsdb.integrations.handlers.hubspot_handler.hubspot_tables import (
+ CompaniesTable,
+ ContactsTable,
DealsTable,
canonical_op,
to_hubspot_property,
@@ -17,6 +20,7 @@
_normalize_filter_conditions,
)
from mindsdb_sql_parser.ast import Select, Identifier, Function
+ from mindsdb_sql_parser import parse_sql
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
except ImportError:
pytestmark = pytest.mark.skip("HubSpot handler not installed")
@@ -24,8 +28,8 @@
from base_handler_test import BaseHandlerTestSetup
from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
HandlerStatusResponse as StatusResponse,
+ DataHandlerResponse,
RESPONSE_TYPE,
)
@@ -154,31 +158,10 @@ def test_native_query(self):
query = "SELECT * FROM companies LIMIT 1"
response = self.handler.native_query(query)
- assert isinstance(response, Response)
+ assert isinstance(response, DataHandlerResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
self.assertIsNotNone(response.data_frame)
- def test_get_tables(self):
- """Test get_tables method returns registered tables."""
- response = self.handler.get_tables()
-
- assert isinstance(response, Response)
- self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
-
- df = response.data_frame
-
- self.assertEqual(len(df), len(self.EXPECTED_TABLES))
- self.assertIn("TABLE_NAME", df.columns)
- self.assertIn("TABLE_TYPE", df.columns)
-
- table_names = df["TABLE_NAME"].tolist()
- for table_name in self.EXPECTED_TABLES:
- self.assertIn(table_name, table_names)
-
- # All should be BASE TABLE type
- table_types = df["TABLE_TYPE"].unique().tolist()
- self.assertEqual(table_types, ["BASE TABLE"])
-
def test_get_columns_companies(self):
"""Test get_columns method for companies table."""
mock_hubspot_client = MagicMock()
@@ -203,7 +186,7 @@ def test_get_columns_companies(self):
response = self.handler.get_columns("companies")
- assert isinstance(response, Response)
+ assert isinstance(response, DataHandlerResponse)
self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE)
df = response.data_frame
@@ -251,7 +234,7 @@ def test_get_columns_contacts(self):
response = self.handler.get_columns("contacts")
- assert isinstance(response, Response)
+ assert isinstance(response, DataHandlerResponse)
self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE)
df = response.data_frame
@@ -298,7 +281,7 @@ def test_get_columns_deals(self):
response = self.handler.get_columns("deals")
- assert isinstance(response, Response)
+ assert isinstance(response, DataHandlerResponse)
self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE)
df = response.data_frame
@@ -338,7 +321,7 @@ def test_get_columns_invalid_table(self):
"""Test get_columns method with invalid table name."""
response = self.handler.get_columns("nonexistent_table")
- assert isinstance(response, Response)
+ assert isinstance(response, DataHandlerResponse)
self.assertEqual(response.type, RESPONSE_TYPE.ERROR)
self.assertIsNotNone(response.error_message)
@@ -354,7 +337,7 @@ def test_native_query_with_insert(self):
insert_query = "INSERT INTO companies (name, city) VALUES ('New Company', 'Boston')"
response = self.handler.native_query(insert_query)
- assert isinstance(response, Response)
+ assert isinstance(response, DataHandlerResponse)
self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR)
@@ -386,7 +369,7 @@ def test_native_query_with_update(self):
update_query = "UPDATE companies SET city='Boston' WHERE name='Test Company'"
response = self.handler.native_query(update_query)
- assert isinstance(response, Response)
+ assert isinstance(response, DataHandlerResponse)
self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR)
def test_native_query_with_delete(self):
@@ -414,7 +397,7 @@ def test_native_query_with_delete(self):
delete_query = "DELETE FROM companies WHERE name='Test Company'"
response = self.handler.native_query(delete_query)
- assert isinstance(response, Response)
+ assert isinstance(response, DataHandlerResponse)
self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR)
def test_handler_name(self):
@@ -468,41 +451,6 @@ def test_native_query_invalid_sql(self):
self.assertEqual(response.type, RESPONSE_TYPE.ERROR)
self.assertIn("Query execution failed", response.error_message)
- def test_get_tables_success(self):
- """Test get_tables method returns table metadata."""
- mock_hubspot_client = MagicMock()
- mock_companies_data = [
- SimplePublicObject(
- id="123",
- properties={
- "name": "Test Company",
- "createdate": "2023-01-01T00:00:00Z",
- "hs_lastmodifieddate": "2023-01-01T00:00:00Z",
- },
- )
- ]
-
- self.mock_connect.return_value = mock_hubspot_client
- mock_hubspot_client.crm.companies.get_all.return_value = mock_companies_data
- mock_hubspot_client.crm.contacts.get_all.return_value = []
- mock_hubspot_client.crm.deals.get_all.return_value = []
-
- response = self.handler.get_tables()
-
- self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
- self.assertIsNotNone(response.data_frame)
-
- df = response.data_frame
-
- self.assertEqual(len(df), len(self.EXPECTED_TABLES))
- self.assertIn("TABLE_NAME", df.columns)
- self.assertIn("TABLE_TYPE", df.columns)
- self.assertIn("TABLE_SCHEMA", df.columns)
-
- table_names = df["TABLE_NAME"].tolist()
- for table_name in self.EXPECTED_TABLES:
- self.assertIn(table_name, table_names)
-
def test_get_tables_connection_failure(self):
"""Test get_tables method with connection failure."""
self.mock_connect.side_effect = Exception("Connection failed")
@@ -615,15 +563,22 @@ def test_oauth_connection(self):
handler = HubspotHandler("hubspot", connection_data=oauth_data)
mock_hubspot_client = MagicMock()
-
- with patch("mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpot") as mock_hubspot:
+ mock_access_token = "oauth_access_token_123"
+
+ with (
+ patch("mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpot") as mock_hubspot,
+ patch(
+ "mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpotOAuth2Manager"
+ ) as mock_oauth_manager_cls,
+ ):
mock_hubspot.return_value = mock_hubspot_client
+ mock_oauth_manager_cls.return_value.get_access_token.return_value = mock_access_token
connection = handler.connect()
self.assertIsNotNone(connection)
self.assertTrue(handler.is_connected)
- mock_hubspot.assert_called_with(client_id="test_client_id", client_secret="test_client_secret")
+ mock_hubspot.assert_called_with(access_token=mock_access_token)
def test_comprehensive_error_handling(self):
"""Test comprehensive error handling in various scenarios."""
@@ -744,40 +699,6 @@ def test_get_columns_with_standard_schema(self):
self.assertEqual(id_row.iloc[0]["ORDINAL_POSITION"], 1)
self.assertEqual(id_row.iloc[0]["IS_NULLABLE"], "NO")
- def test_comprehensive_table_metadata(self):
- """Test that get_tables returns comprehensive metadata."""
- mock_hubspot_client = MagicMock()
-
- mock_companies_search_result = MagicMock()
- mock_companies_search_result.total = 1250
-
- mock_contacts_search_result = MagicMock()
- mock_contacts_search_result.total = 850
-
- mock_deals_search_result = MagicMock()
- mock_deals_search_result.total = 320
-
- self.mock_connect.return_value = mock_hubspot_client
- mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_companies_search_result
- mock_hubspot_client.crm.contacts.search_api.do_search.return_value = mock_contacts_search_result
- mock_hubspot_client.crm.deals.search_api.do_search.return_value = mock_deals_search_result
-
- response = self.handler.get_tables()
-
- self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
- df = response.data_frame
-
- # Check only the 3 required metadata columns (following postgres handler pattern)
- required_columns = ["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"]
- for col in required_columns:
- self.assertIn(col, df.columns)
-
- # Verify all three tables are present
- table_names = df["TABLE_NAME"].tolist()
- self.assertEqual(len(table_names), len(self.EXPECTED_TABLES))
- for table_name in self.EXPECTED_TABLES:
- self.assertIn(table_name, table_names)
-
def test_estimate_table_rows_with_search_api(self):
"""Test that _estimate_table_rows uses search API for accurate counts."""
mock_hubspot_client = MagicMock()
@@ -930,7 +851,11 @@ def test_meta_get_columns_all_tables(self):
self.assertIn("deals", tables_present)
def test_select_companies_with_in_clause_uses_search_api(self):
- """Test that SELECT with IN clause uses HubSpot Search API."""
+ """
+ MindsDB calls table.select(query_ast) directly β not native_query.
+ Verify that a WHERE city IN (...) query routes to the HubSpot Search API.
+ """
+
mock_hubspot_client = MagicMock()
mock_search_result = MagicMock()
mock_search_result.results = [
@@ -943,33 +868,26 @@ def test_select_companies_with_in_clause_uses_search_api(self):
"hs_lastmodifieddate": "2023-01-01T00:00:00Z",
},
),
- SimplePublicObject(
- id="2",
- properties={
- "name": "Austin Company",
- "city": "Austin",
- "createdate": "2023-01-01T00:00:00Z",
- "hs_lastmodifieddate": "2023-01-01T00:00:00Z",
- },
- ),
]
mock_search_result.paging = None
- self.mock_connect.return_value = mock_hubspot_client
+ handler = MagicMock()
+ handler.connect.return_value = mock_hubspot_client
mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result
- query = "SELECT * FROM companies WHERE city IN ('New York', 'Austin')"
- response = self.handler.native_query(query)
+ table = CompaniesTable(handler)
+ query = parse_sql("SELECT * FROM companies WHERE city IN ('New York', 'Austin')", dialect="mindsdb")
+ result = table.select(query)
- # Verify search API was called (not get_all)
mock_hubspot_client.crm.companies.search_api.do_search.assert_called()
mock_hubspot_client.crm.companies.get_all.assert_not_called()
-
- self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
- self.assertIsNotNone(response.data_frame)
+ self.assertIsNotNone(result)
def test_select_contacts_with_in_clause(self):
- """Test SELECT contacts with IN clause."""
+ """
+ MindsDB calls ContactsTable.select(query_ast) directly.
+ Verify city IN (...) routes to HubSpot Search API.
+ """
mock_hubspot_client = MagicMock()
mock_search_result = MagicMock()
mock_search_result.results = [
@@ -985,17 +903,22 @@ def test_select_contacts_with_in_clause(self):
]
mock_search_result.paging = None
- self.mock_connect.return_value = mock_hubspot_client
+ handler = MagicMock()
+ handler.connect.return_value = mock_hubspot_client
mock_hubspot_client.crm.contacts.search_api.do_search.return_value = mock_search_result
- query = "SELECT * FROM contacts WHERE city IN ('Boston', 'Chicago')"
- response = self.handler.native_query(query)
+ table = ContactsTable(handler)
+ query = parse_sql("SELECT * FROM contacts WHERE city IN ('Boston', 'Chicago')", dialect="mindsdb")
+ result = table.select(query)
mock_hubspot_client.crm.contacts.search_api.do_search.assert_called()
- self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
+ self.assertIsNotNone(result)
def test_select_deals_with_in_clause(self):
- """Test SELECT deals with IN clause on dealstage."""
+ """
+ MindsDB calls DealsTable.select(query_ast) directly.
+ Verify dealstage IN (...) routes to HubSpot Search API.
+ """
mock_hubspot_client = MagicMock()
mock_search_result = MagicMock()
mock_search_result.results = [
@@ -1010,33 +933,43 @@ def test_select_deals_with_in_clause(self):
]
mock_search_result.paging = None
- self.mock_connect.return_value = mock_hubspot_client
+ handler = MagicMock()
+ handler.connect.return_value = mock_hubspot_client
+ handler._hubspot_deal_stage_map_cache = ({}, {})
+ handler._hubspot_deal_stage_rows_cache = []
+ handler._hubspot_owner_rows_cache = []
+ handler._hubspot_owner_map_cache = {}
mock_hubspot_client.crm.deals.search_api.do_search.return_value = mock_search_result
+ mock_hubspot_client.crm.pipelines.pipelines_api.get_all.return_value = MagicMock(results=[])
- query = "SELECT * FROM deals WHERE dealstage IN ('closedwon', 'closedlost')"
- response = self.handler.native_query(query)
+ table = DealsTable(handler)
+ query = parse_sql("SELECT * FROM deals WHERE dealstage IN ('closedwon', 'closedlost')", dialect="mindsdb")
+ result = table.select(query)
mock_hubspot_client.crm.deals.search_api.do_search.assert_called()
- self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
+ self.assertIsNotNone(result)
def test_select_with_in_clause_verifies_filter_structure(self):
- """Test that IN clause generates correct HubSpot filter structure."""
+ """
+ Verify that city IN (...) generates the correct HubSpot Search API filter payload.
+ Tests via CompaniesTable.select() β the actual call path MindsDB uses.
+ """
mock_hubspot_client = MagicMock()
mock_search_result = MagicMock()
mock_search_result.results = []
mock_search_result.paging = None
- self.mock_connect.return_value = mock_hubspot_client
+ handler = MagicMock()
+ handler.connect.return_value = mock_hubspot_client
mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result
- query = "SELECT * FROM companies WHERE city IN ('NYC', 'LA', 'Chicago')"
- self.handler.native_query(query)
+ table = CompaniesTable(handler)
+ query = parse_sql("SELECT * FROM companies WHERE city IN ('NYC', 'LA', 'Chicago')", dialect="mindsdb")
+ table.select(query)
- # Capture the call arguments
call_args = mock_hubspot_client.crm.companies.search_api.do_search.call_args
search_request = call_args.kwargs.get("public_object_search_request", {})
- # Verify filter structure
self.assertIn("filterGroups", search_request)
filter_groups = search_request["filterGroups"]
self.assertEqual(len(filter_groups), 1)
@@ -1051,17 +984,19 @@ def test_select_with_in_clause_verifies_filter_structure(self):
self.assertEqual(set(in_filter["values"]), {"NYC", "LA", "Chicago"})
def test_select_with_not_in_clause(self):
- """Test SELECT with NOT IN clause."""
+ """Verify industry NOT IN (...) generates NOT_IN operator in HubSpot filter."""
mock_hubspot_client = MagicMock()
mock_search_result = MagicMock()
mock_search_result.results = []
mock_search_result.paging = None
- self.mock_connect.return_value = mock_hubspot_client
+ handler = MagicMock()
+ handler.connect.return_value = mock_hubspot_client
mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result
- query = "SELECT * FROM companies WHERE industry NOT IN ('Retail', 'Healthcare')"
- self.handler.native_query(query)
+ table = CompaniesTable(handler)
+ query = parse_sql("SELECT * FROM companies WHERE industry NOT IN ('Retail', 'Healthcare')", dialect="mindsdb")
+ table.select(query)
call_args = mock_hubspot_client.crm.companies.search_api.do_search.call_args
search_request = call_args.kwargs.get("public_object_search_request", {})
@@ -1070,17 +1005,22 @@ def test_select_with_not_in_clause(self):
self.assertEqual(filters[0]["operator"], "NOT_IN")
def test_select_with_in_and_equality_combined(self):
- """Test SELECT combining IN clause with equality filter."""
+ """Verify city IN (...) AND industry = '...' both push down to the Search API."""
mock_hubspot_client = MagicMock()
mock_search_result = MagicMock()
mock_search_result.results = []
mock_search_result.paging = None
- self.mock_connect.return_value = mock_hubspot_client
+ handler = MagicMock()
+ handler.connect.return_value = mock_hubspot_client
mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result
- query = "SELECT * FROM companies WHERE city IN ('NYC', 'LA') AND industry = 'Technology'"
- self.handler.native_query(query)
+ table = CompaniesTable(handler)
+ query = parse_sql(
+ "SELECT * FROM companies WHERE city IN ('NYC', 'LA') AND industry = 'Technology'",
+ dialect="mindsdb",
+ )
+ table.select(query)
mock_hubspot_client.crm.companies.search_api.do_search.assert_called()
@@ -1151,17 +1091,30 @@ def test_meta_get_column_statistics_multiple_tables(self):
self.assertIn("contacts", table_names)
def test_search_pushdown_builds_sorts_and_properties(self):
- """Test search API payload includes sorts and properties when pushdown is used."""
+ """
+ Verify that ORDER BY and SELECT columns are pushed down to the HubSpot Search API.
+ Uses DealsTable.select() β the actual call path MindsDB uses.
+ """
mock_hubspot_client = MagicMock()
mock_search_result = MagicMock()
mock_search_result.results = []
mock_search_result.paging = None
- self.mock_connect.return_value = mock_hubspot_client
+ handler = MagicMock()
+ handler.connect.return_value = mock_hubspot_client
+ handler._hubspot_deal_stage_map_cache = ({}, {})
+ handler._hubspot_deal_stage_rows_cache = []
+ handler._hubspot_owner_rows_cache = []
+ handler._hubspot_owner_map_cache = {}
mock_hubspot_client.crm.deals.search_api.do_search.return_value = mock_search_result
+ mock_hubspot_client.crm.pipelines.pipelines_api.get_all.return_value = MagicMock(results=[])
- query = "SELECT dealname FROM deals WHERE pipeline='default' ORDER BY closedate DESC LIMIT 5"
- self.handler.native_query(query)
+ table = DealsTable(handler)
+ query = parse_sql(
+ "SELECT dealname FROM deals WHERE pipeline='default' ORDER BY closedate DESC LIMIT 5",
+ dialect="mindsdb",
+ )
+ table.select(query)
call_args = mock_hubspot_client.crm.deals.search_api.do_search.call_args
search_request = call_args.kwargs.get("public_object_search_request", {})
@@ -1171,6 +1124,130 @@ def test_search_pushdown_builds_sorts_and_properties(self):
self.assertEqual(search_request["sorts"][0]["direction"], "DESCENDING")
self.assertEqual(search_request["properties"], ["dealname"])
+ def test_three_table_join_on_clause_orientations(self):
+ """
+ Verify CORE JOIN ASSOC JOIN CORE resolves left_assoc_col / right_assoc_col
+ correctly regardless of which side of the ON each table appears on.
+ """
+
+ company_df = pd.DataFrame({"id": ["1"], "name": ["Acme"]})
+ assoc_df = pd.DataFrame({"company_id": ["1"], "contact_id": ["42"]})
+ contact_df = pd.DataFrame({"id": ["42"], "firstname": ["Alice"]})
+
+ orientations = [
+ ("c.id = cc.company_id", "cc.contact_id = ct.id"), # A
+ ("cc.company_id = c.id", "cc.contact_id = ct.id"), # B
+ ("c.id = cc.company_id", "ct.id = cc.contact_id"), # C
+ ("cc.company_id = c.id", "ct.id = cc.contact_id"), # D
+ ]
+
+ handler: HubspotHandler = self.create_handler()
+
+ for left_on, right_on in orientations:
+ with self.subTest(left_on=left_on, right_on=right_on):
+ companies_mock = MagicMock()
+ companies_mock.select.return_value = company_df.copy()
+ assoc_mock = MagicMock()
+ assoc_mock.list.return_value = assoc_df.copy()
+ contacts_mock = MagicMock()
+ contacts_mock.list.return_value = contact_df.copy()
+
+ handler._tables["companies"] = companies_mock
+ handler._tables["company_contacts"] = assoc_mock
+ handler._tables["contacts"] = contacts_mock
+
+ query = f"""
+ SELECT *
+ FROM companies c
+ JOIN company_contacts cc ON {left_on}
+ JOIN contacts ct ON {right_on}
+ """
+ response = handler.native_query(query)
+
+ self.assertEqual(
+ response.type,
+ RESPONSE_TYPE.TABLE,
+ msg=f"orientation ({left_on!r}, {right_on!r}) returned ERROR: "
+ f"{getattr(response, 'error_message', '')}",
+ )
+ self.assertFalse(response.data_frame.empty)
+
+ # The assoc table must be queried by company_id (left_assoc_col), not
+ # by some other column β this is the column the bug inverted.
+ assoc_conditions = assoc_mock.list.call_args.kwargs.get("conditions", [])
+ assoc_filter_cols = [fc.column for fc in assoc_conditions]
+ self.assertIn(
+ "company_id",
+ assoc_filter_cols,
+ msg=f"assoc.list not filtered on company_id for orientation "
+ f"({left_on!r}, {right_on!r}); got {assoc_filter_cols}",
+ )
+
+ def test_multijoin_query_handling(self):
+ """Test that multijoin queries return appropriate error since not supported."""
+ query = """
+ SELECT c.name, o.dealname
+ FROM companies c
+ JOIN deals o ON c.id = o.company_id
+ """
+ response = self.handler.native_query(query)
+
+ self.assertEqual(response.type, RESPONSE_TYPE.ERROR)
+ self.assertIn("not supported", response.error_message)
+
+
+class TestHubspotPassthrough(unittest.TestCase):
+ """Exercise the PassthroughMixin retrofit (PAT path)."""
+
+ def _mock_response(self, status_code=200):
+ resp = MagicMock()
+ resp.status_code = status_code
+ resp.headers = {"Content-Type": "application/json"}
+ resp.iter_content = MagicMock(return_value=iter([b'{"results":[]}']))
+ resp.close = MagicMock()
+ return resp
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_passthrough_uses_bearer_and_hubspot_base_url(self, mock_request):
+ mock_request.return_value = self._mock_response()
+ handler = HubspotHandler(
+ "hubspot",
+ connection_data={"access_token": "pat-abc123xyz"},
+ )
+ from mindsdb.integrations.libs.passthrough_types import PassthroughRequest
+
+ resp = handler.api_passthrough(PassthroughRequest("GET", "/crm/v3/owners"))
+
+ self.assertEqual(resp.status_code, 200)
+ args, kwargs = mock_request.call_args
+ self.assertEqual(args[0], "GET")
+ self.assertEqual(args[1], "https://api.hubapi.com/crm/v3/owners")
+ self.assertEqual(kwargs["headers"]["Authorization"], "Bearer pat-abc123xyz")
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_test_passthrough_returns_ok_on_200(self, mock_request):
+ mock_request.return_value = self._mock_response(status_code=200)
+ handler = HubspotHandler("hubspot", connection_data={"access_token": "pat"})
+
+ result = handler.test_passthrough()
+
+ self.assertTrue(result["ok"])
+ self.assertEqual(result["status_code"], 200)
+ self.assertEqual(result["host"], "api.hubapi.com")
+ self.assertIsInstance(result["latency_ms"], int)
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_test_passthrough_returns_auth_failed_on_401(self, mock_request):
+ mock_request.return_value = self._mock_response(status_code=401)
+ handler = HubspotHandler("hubspot", connection_data={"access_token": "pat"})
+
+ result = handler.test_passthrough()
+
+ self.assertFalse(result["ok"])
+ self.assertEqual(result["error_code"], "auth_failed")
+ self.assertEqual(result["status_code"], 401)
+ self.assertEqual(result["host"], "api.hubapi.com")
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/test_mariadb.py b/tests/unit/handlers/test_mariadb.py
index be2cc4f6120..9d75a8dce72 100644
--- a/tests/unit/handlers/test_mariadb.py
+++ b/tests/unit/handlers/test_mariadb.py
@@ -6,19 +6,18 @@
from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager
from mindsdb.integrations.handlers.mariadb_handler.mariadb_handler import MariaDBHandler
-from mindsdb.integrations.libs.response import HandlerResponse as Response
+from mindsdb.integrations.libs.response import TableResponse
class TestMariaDBHandler(BaseDatabaseHandlerTest, unittest.TestCase):
-
@property
def dummy_connection_data(self):
return OrderedDict(
- host='127.0.0.1',
+ host="127.0.0.1",
port=3307,
- user='example_user',
- password='example_pass',
- database='example_db',
+ user="example_user",
+ password="example_pass",
+ database="example_db",
)
@property
@@ -60,22 +59,21 @@ def get_columns_query(self):
from
information_schema.columns
where
- table_name = '{self.mock_table}';
+ table_name = '{self.mock_table}'
+ and table_schema = DATABASE();
"""
def create_handler(self):
- return MariaDBHandler('mariadb', connection_data=self.dummy_connection_data)
+ return MariaDBHandler("mariadb", connection_data=self.dummy_connection_data)
def create_patcher(self):
- return patch('mysql.connector.connect')
+ return patch("mysql.connector.connect")
def test_native_query(self):
- """Test that native_query returns a Response object with no error
- """
+ """Test that native_query returns a TableResponse object with no error"""
mock_conn = MagicMock()
mock_cursor = MockCursorContextManager(
- data=[{'id': 1}],
- description=[('id', 3, None, None, None, None, 1, 0, 45)]
+ data=[{"id": 1}], description=[("id", 3, None, None, None, None, 1, 0, 45)]
)
self.handler.connect = MagicMock(return_value=mock_conn)
@@ -84,9 +82,8 @@ def test_native_query(self):
query_str = f"SELECT * FROM {self.mock_table}"
data = self.handler.native_query(query_str)
- self.assertIsInstance(data, Response)
- self.assertFalse(data.error_code)
+ self.assertIsInstance(data, TableResponse)
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/test_mssql.py b/tests/unit/handlers/test_mssql.py
index dbb097754f9..d7024d51359 100644
--- a/tests/unit/handlers/test_mssql.py
+++ b/tests/unit/handlers/test_mssql.py
@@ -17,7 +17,13 @@
from pandas import DataFrame
from base_handler_test import BaseDatabaseHandlerTest
-from mindsdb.integrations.libs.response import HandlerResponse as Response, INF_SCHEMA_COLUMNS_NAMES_SET, RESPONSE_TYPE
+from mindsdb.integrations.libs.response import (
+ OkResponse,
+ TableResponse,
+ ErrorResponse,
+ INF_SCHEMA_COLUMNS_NAMES_SET,
+ RESPONSE_TYPE,
+)
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
@@ -91,8 +97,7 @@ def test_native_query_with_results(self):
mock_conn.cursor.assert_called_once_with(as_dict=True)
mock_cursor.execute.assert_called_once_with(query_str)
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
+ assert isinstance(data, TableResponse)
self.assertEqual(data.type, RESPONSE_TYPE.TABLE)
self.assertIsInstance(data.data_frame, DataFrame)
expected_columns = ["id", "name"]
@@ -121,8 +126,7 @@ def test_native_query_no_results(self):
mock_conn.cursor.assert_called_once_with(as_dict=True)
mock_cursor.execute.assert_called_once_with(query_str)
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
+ assert isinstance(data, OkResponse)
self.assertEqual(data.type, RESPONSE_TYPE.OK)
mock_conn.commit.assert_called_once()
@@ -149,7 +153,7 @@ def test_native_query_error(self):
mock_conn.cursor.assert_called_once_with(as_dict=True)
mock_cursor.execute.assert_called_once_with(query_str)
- assert isinstance(data, Response)
+ assert isinstance(data, ErrorResponse)
self.assertEqual(data.type, RESPONSE_TYPE.ERROR)
self.assertEqual(data.error_message, str(error))
@@ -166,7 +170,7 @@ def test_query_method(self):
try:
self.handler.renderer = renderer_mock
self.handler.native_query = MagicMock()
- self.handler.native_query.return_value = Response(RESPONSE_TYPE.OK)
+ self.handler.native_query.return_value = OkResponse()
mock_ast = MagicMock()
result = self.handler.query(mock_ast)
@@ -180,7 +184,7 @@ def test_get_tables(self):
"""
Tests that get_tables calls native_query with the correct SQL
"""
- expected_response = Response(RESPONSE_TYPE.OK)
+ expected_response = OkResponse()
self.handler.native_query = MagicMock(return_value=expected_response)
response = self.handler.get_tables()
@@ -199,9 +203,7 @@ def test_get_columns(self):
"""
Tests that get_columns calls native_query with the correct SQL
"""
- expected_response = Response(
- RESPONSE_TYPE.TABLE, data_frame=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))
- )
+ expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)))
self.handler.native_query = MagicMock(return_value=expected_response)
table_name = "test_table"
@@ -259,7 +261,7 @@ def test_meta_get_tables_returns_response(self):
},
]
)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ expected_response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=expected_response)
# without filter
@@ -271,7 +273,7 @@ def test_meta_get_tables_returns_response(self):
self.handler.native_query.reset_mock()
tables = ["customers", "orders"]
filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True)
- filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df)
+ filtered_response = TableResponse(data=filtered_df)
self.handler.native_query = MagicMock(return_value=filtered_response)
response = self.handler.meta_get_tables(table_names=tables)
self.handler.native_query.assert_called_once()
@@ -307,7 +309,7 @@ def test_meta_get_columns_returns_response(self):
},
]
)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ expected_response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=expected_response)
# without filter
@@ -319,7 +321,7 @@ def test_meta_get_columns_returns_response(self):
self.handler.native_query.reset_mock()
tables = ["customers"]
filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True)
- filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df)
+ filtered_response = TableResponse(data=filtered_df)
self.handler.native_query = MagicMock(return_value=filtered_response)
response = self.handler.meta_get_columns(table_names=tables)
self.handler.native_query.assert_called_once()
@@ -351,7 +353,7 @@ def test_meta_get_column_statistics_returns_response(self):
},
]
)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ expected_response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=expected_response)
# without filter
@@ -363,7 +365,7 @@ def test_meta_get_column_statistics_returns_response(self):
self.handler.native_query.reset_mock()
tables = ["customers"]
filtered_df = df[df["TABLE_NAME"].isin(tables)].reset_index(drop=True)
- filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df)
+ filtered_response = TableResponse(data=filtered_df)
self.handler.native_query = MagicMock(return_value=filtered_response)
response = self.handler.meta_get_column_statistics(table_names=tables)
self.handler.native_query.assert_called_once()
@@ -382,7 +384,7 @@ def test_meta_get_primary_keys_returns_response(self):
{"table_name": "orders", "column_name": "id", "ordinal_position": 1, "constraint_name": "pk_orders"},
]
)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ expected_response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=expected_response)
# without filter
@@ -394,7 +396,7 @@ def test_meta_get_primary_keys_returns_response(self):
self.handler.native_query.reset_mock()
tables = ["customers"]
filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True)
- filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df)
+ filtered_response = TableResponse(data=filtered_df)
self.handler.native_query = MagicMock(return_value=filtered_response)
response = self.handler.meta_get_primary_keys(table_names=tables)
self.handler.native_query.assert_called_once()
@@ -420,7 +422,7 @@ def test_meta_get_foreign_keys_returns_response(self):
},
]
)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ expected_response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=expected_response)
# without filter
@@ -432,7 +434,7 @@ def test_meta_get_foreign_keys_returns_response(self):
self.handler.native_query.reset_mock()
tables = ["orders"]
filtered_df = df[df["child_table_name"].isin(tables)].reset_index(drop=True)
- filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df)
+ filtered_response = TableResponse(data=filtered_df)
self.handler.native_query = MagicMock(return_value=filtered_response)
response = self.handler.meta_get_foreign_keys(table_names=tables)
self.handler.native_query.assert_called_once()
@@ -521,7 +523,7 @@ def test_meta_methods_result_shape_and_exceptions(self):
for name, df_factory, method in methods:
with self.subTest(method=name, case="no_filter"):
df = df_factory()
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ expected_response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=expected_response)
res = method()
self.handler.native_query.assert_called_once()
@@ -533,7 +535,7 @@ def test_meta_methods_result_shape_and_exceptions(self):
with self.subTest(method=name, case="with_filter"):
df = df_factory()
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ expected_response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=expected_response)
res = (
method(table_names=["A", "B"])
@@ -639,6 +641,13 @@ def test_check_connection(self):
self.assertFalse(response.success)
self.assertEqual(response.error_message, "Connection error")
+ self.handler.connect.side_effect = ValueError("Invalid connection args")
+
+ response = self.handler.check_connection()
+
+ self.assertFalse(response.success)
+ self.assertEqual(response.error_message, "Invalid connection args")
+
def test_types_casting(self):
"""Test that types are casted correctly"""
query_str = "SELECT * FROM test_table"
@@ -726,7 +735,7 @@ def test_types_casting(self):
("n_real", 3, None, None, None, None, None),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str)
excepted_mysql_types = [
MYSQL_DATA_TYPE.TINYINT,
MYSQL_DATA_TYPE.INT,
@@ -741,7 +750,7 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.FLOAT,
MYSQL_DATA_TYPE.FLOAT,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ self.assertEqual([col.type for col in response.columns], excepted_mysql_types)
for columns_name, input_value in input_row.items():
result_value = response.data_frame[columns_name][0]
self.assertEqual(result_value, input_value)
@@ -818,7 +827,7 @@ def test_types_casting(self):
("t_uniqueidentifier", 2, None, None, None, None, None),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str)
excepted_mysql_types = [
MYSQL_DATA_TYPE.TEXT,
MYSQL_DATA_TYPE.TEXT,
@@ -832,7 +841,7 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.TEXT,
MYSQL_DATA_TYPE.BINARY,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ self.assertEqual([col.type for col in response.columns], excepted_mysql_types)
for columns_name, input_value in input_row.items():
result_value = response.data_frame[columns_name][0]
self.assertEqual(result_value, input_value)
@@ -901,7 +910,7 @@ def test_types_casting(self):
("d_datetimeoffset_p", 2, None, None, None, None, None),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str)
excepted_mysql_types = [
# DATE and TIME is not possible to infer, so they are BINARY
MYSQL_DATA_TYPE.BINARY,
@@ -914,7 +923,7 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.DATETIME,
MYSQL_DATA_TYPE.DATETIME,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ self.assertEqual([col.type for col in response.columns], excepted_mysql_types)
for columns_name, input_value in input_row.items():
result_value = response.data_frame[columns_name][0]
if columns_name == "d_datetimeoffset_p":
@@ -1099,7 +1108,7 @@ def __getitem__(self, idx):
mock_conn.cursor.assert_called_once_with()
mock_cursor.execute.assert_called_once_with(query_str)
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
self.assertIsInstance(response.data_frame, DataFrame)
self.assertEqual(list(response.data_frame.columns), ["id", "name"])
@@ -1168,10 +1177,10 @@ def __getitem__(self, idx):
response = handler.native_query("SELECT * FROM test")
- self.assertIsInstance(response, Response)
+ self.assertIsInstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
- self.assertIsNotNone(response.mysql_types)
- self.assertTrue(len(response.mysql_types) > 0)
+ self.assertIsNotNone(response.columns)
+ self.assertTrue(len(response.columns) > 0)
finally:
if "pyodbc" in sys.modules:
del sys.modules["pyodbc"]
diff --git a/tests/unit/handlers/test_mysql.py b/tests/unit/handlers/test_mysql.py
index bbb3ab93e56..a506e0ba844 100644
--- a/tests/unit/handlers/test_mysql.py
+++ b/tests/unit/handlers/test_mysql.py
@@ -12,7 +12,13 @@
from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager
from mindsdb.integrations.handlers.mysql_handler.mysql_handler import MySQLHandler
-from mindsdb.integrations.libs.response import HandlerResponse as Response, INF_SCHEMA_COLUMNS_NAMES_SET, RESPONSE_TYPE
+from mindsdb.integrations.libs.response import (
+ OkResponse,
+ TableResponse,
+ DataHandlerResponse as Response,
+ INF_SCHEMA_COLUMNS_NAMES_SET,
+ RESPONSE_TYPE,
+)
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
@@ -67,7 +73,8 @@ def get_columns_query(self):
from
information_schema.columns
where
- table_name = '{self.mock_table}';
+ table_name = '{self.mock_table}'
+ and table_schema = DATABASE();
"""
def create_handler(self):
@@ -89,13 +96,12 @@ def test_native_query(self):
query_str = f"SELECT * FROM {self.mock_table}"
data = self.handler.native_query(query_str)
- self.assertIsInstance(data, Response)
- self.assertFalse(data.error_code)
+ self.assertIsInstance(data, TableResponse)
def test_native_query_with_results(self):
"""
Tests the `native_query` method to ensure it executes a SQL query and handles the case
- where the query returns a result set
+ where the query returns a result set, streaming data via fetchmany
"""
mock_conn = MagicMock()
mock_cursor = MagicMock()
@@ -106,7 +112,11 @@ def test_native_query_with_results(self):
mock_conn.cursor = MagicMock(return_value=mock_cursor)
mock_conn.is_connected = MagicMock(return_value=True)
- mock_cursor.fetchall.return_value = [{"id": 1, "name": "test1"}, {"id": 2, "name": "test2"}]
+ # fetchmany returns tuples (non-dictionary cursor), then empty list to signal end
+ mock_cursor.fetchmany.side_effect = [
+ [(1, "test1"), (2, "test2")],
+ [],
+ ]
# MySQL cursor provides column info via description attribute
mock_cursor.description = [
@@ -119,12 +129,10 @@ def test_native_query_with_results(self):
query_str = "SELECT * FROM test_table"
data = self.handler.native_query(query_str)
- mock_conn.cursor.assert_called_once_with(dictionary=True, buffered=True)
+ mock_conn.cursor.assert_called_once_with(buffered=False)
mock_cursor.execute.assert_called_once_with(query_str)
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
- self.assertEqual(data.type, RESPONSE_TYPE.TABLE)
+ assert isinstance(data, TableResponse)
self.assertIsInstance(data.data_frame, DataFrame)
expected_columns = ["id", "name"]
@@ -150,12 +158,10 @@ def test_native_query_no_results(self):
query_str = "INSERT INTO test_table VALUES (1, 'test')"
data = self.handler.native_query(query_str)
- mock_conn.cursor.assert_called_once_with(dictionary=True, buffered=True)
+ mock_conn.cursor.assert_called_once_with(buffered=False)
mock_cursor.execute.assert_called_once_with(query_str)
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
- self.assertEqual(data.type, RESPONSE_TYPE.OK)
+ assert isinstance(data, OkResponse)
self.assertEqual(data.affected_rows, 1)
def test_native_query_error(self):
@@ -178,7 +184,7 @@ def test_native_query_error(self):
query_str = "INVALID SQL"
data = self.handler.native_query(query_str)
- mock_conn.cursor.assert_called_once_with(dictionary=True, buffered=True)
+ mock_conn.cursor.assert_called_once_with(buffered=False)
mock_cursor.execute.assert_called_once_with(query_str)
assert isinstance(data, Response)
@@ -377,7 +383,7 @@ def test_query_method(self):
mock_renderer_class.return_value = mock_renderer
self.handler.native_query = MagicMock()
- self.handler.native_query.return_value = Response(RESPONSE_TYPE.OK)
+ self.handler.native_query.return_value = OkResponse()
mock_ast = MagicMock()
@@ -406,7 +412,7 @@ def test_get_tables(self):
"""
Tests that get_tables calls native_query with the correct SQL
"""
- expected_response = Response(RESPONSE_TYPE.OK)
+ expected_response = OkResponse()
self.handler.native_query = MagicMock(return_value=expected_response)
response = self.handler.get_tables()
@@ -425,9 +431,7 @@ def test_get_columns(self):
"""
Tests that get_columns calls native_query with the correct SQL
"""
- expected_response = Response(
- RESPONSE_TYPE.TABLE, data_frame=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))
- )
+ expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)))
self.handler.native_query = MagicMock(return_value=expected_response)
table_name = "test_table"
@@ -454,7 +458,8 @@ def test_get_columns(self):
from
information_schema.columns
where
- table_name = '{table_name}';
+ table_name = '{table_name}'
+ and table_schema = DATABASE();
"""
self.assertEqual(call_args, expected_sql)
self.assertEqual(response, expected_response)
@@ -473,19 +478,19 @@ def test_types_casting(self):
mock_conn.is_connected = MagicMock(return_value=True)
# region test TEXT/BLOB types and sub-types
- input_row = {
- "t_varchar": "v_varchar",
- "t_tinytext": "v_tinytext",
- "t_text": "v_text",
- "t_mediumtext": "v_mediumtext",
- "t_longtext": "v_longtext",
- "t_tinyblon": "v_tinyblon",
- "t_blob": "v_blob",
- "t_mediumblob": "v_mediumblob",
- "t_longblob": "v_longblob",
- "t_json": '{"key": "value"}',
- }
- mock_cursor.fetchall.return_value = [input_row]
+ input_row = OrderedDict(
+ t_varchar="v_varchar",
+ t_tinytext="v_tinytext",
+ t_text="v_text",
+ t_mediumtext="v_mediumtext",
+ t_longtext="v_longtext",
+ t_tinyblon="v_tinyblon",
+ t_blob="v_blob",
+ t_mediumblob="v_mediumblob",
+ t_longblob="v_longblob",
+ t_json='{"key": "value"}',
+ )
+ mock_cursor.fetchall.return_value = [list(input_row.values())]
mock_cursor.description = [
("t_varchar", 253, None, None, None, None, 1, 0, 45),
@@ -500,7 +505,7 @@ def test_types_casting(self):
("t_json", 245, None, None, None, None, 1, 144, 63),
]
- response: Response = self.handler.native_query(query_str)
+ response: Response = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [
MYSQL_DATA_TYPE.VARBINARY,
MYSQL_DATA_TYPE.TEXT,
@@ -513,7 +518,8 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.BLOB,
MYSQL_DATA_TYPE.JSON,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
for key, input_value in input_row.items():
result_value = response.data_frame[key][0]
self.assertEqual(type(result_value), type(input_value))
@@ -521,17 +527,18 @@ def test_types_casting(self):
# endregion
# region test TINYINT/BOOL/BOOLEAN types
- input_row = {"t_tinyint": 1, "t_bool": 1, "t_boolean": 1}
- mock_cursor.fetchall.return_value = [input_row]
+ input_row = OrderedDict(t_tinyint=1, t_bool=1, t_boolean=1)
+ mock_cursor.fetchall.return_value = [list(input_row.values())]
mock_cursor.description = [
("t_tinyint", 1, None, None, None, None, 1, 0, 63),
("t_bool", 1, None, None, None, None, 1, 0, 63),
("t_boolean", 1, None, None, None, None, 1, 0, 63),
]
- response: Response = self.handler.native_query(query_str)
+ response: Response = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [MYSQL_DATA_TYPE.TINYINT, MYSQL_DATA_TYPE.TINYINT, MYSQL_DATA_TYPE.TINYINT]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
for key, input_value in input_row.items():
result_value = response.data_frame[key][0]
# without None values in result columns types will be one of pandas types
@@ -540,19 +547,19 @@ def test_types_casting(self):
# endregion
# region test numeric types
- input_row = {
- "t_tinyint": 1,
- "t_bool": 0,
- "t_smallint": 2,
- "t_year": 2025,
- "t_mediumint": 3,
- "t_int": 4,
- "t_bigint": 5,
- "t_float": 1.1,
- "t_double": 2.2,
- "t_decimal": Decimal("3.3"),
- }
- mock_cursor.fetchall.return_value = [input_row]
+ input_row = OrderedDict(
+ t_tinyint=1,
+ t_bool=0,
+ t_smallint=2,
+ t_year=2025,
+ t_mediumint=3,
+ t_int=4,
+ t_bigint=5,
+ t_float=1.1,
+ t_double=2.2,
+ t_decimal=Decimal("3.3"),
+ )
+ mock_cursor.fetchall.return_value = [list(input_row.values())]
mock_cursor.description = [
("t_tinyint", 1, None, None, None, None, 1, 0, 63),
("t_bool", 1, None, None, None, None, 1, 0, 63),
@@ -565,7 +572,7 @@ def test_types_casting(self):
("t_double", 5, None, None, None, None, 1, 0, 63),
("t_decimal", 246, None, None, None, None, 1, 0, 63),
]
- response: Response = self.handler.native_query(query_str)
+ response: Response = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [
MYSQL_DATA_TYPE.TINYINT,
MYSQL_DATA_TYPE.TINYINT,
@@ -579,21 +586,22 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.DECIMAL,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
for key, input_value in input_row.items():
result_value = response.data_frame[key][0]
self.assertEqual(result_value, input_value)
# endregion
# test date/time types
- input_row = {
- "t_date": datetime.date(2025, 4, 16),
- "t_time": datetime.timedelta(seconds=45600),
- "t_year": 2025,
- "t_datetime": datetime.datetime(2025, 4, 16, 12, 30, 15),
- "t_timestamp": datetime.datetime(2025, 4, 16, 12, 30, 15),
- }
- mock_cursor.fetchall.return_value = [input_row]
+ input_row = OrderedDict(
+ t_date=datetime.date(2025, 4, 16),
+ t_time=datetime.timedelta(seconds=45600),
+ t_year=2025,
+ t_datetime=datetime.datetime(2025, 4, 16, 12, 30, 15),
+ t_timestamp=datetime.datetime(2025, 4, 16, 12, 30, 15),
+ )
+ mock_cursor.fetchall.return_value = [list(input_row.values())]
mock_cursor.description = [
("t_date", 10, None, None, None, None, 1, 128, 63),
@@ -603,7 +611,7 @@ def test_types_casting(self):
("t_timestamp", 7, None, None, None, None, 1, 128, 63),
]
- response: Response = self.handler.native_query(query_str)
+ response: Response = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [
MYSQL_DATA_TYPE.DATE,
MYSQL_DATA_TYPE.TIME,
@@ -611,7 +619,8 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.DATETIME,
MYSQL_DATA_TYPE.TIMESTAMP,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
for key, input_value in input_row.items():
result_value = response.data_frame[key][0]
self.assertEqual(result_value, input_value)
@@ -619,14 +628,14 @@ def test_types_casting(self):
# region test casting of nullable types
bigint_val = 9223372036854775807
- input_rows = [{"t_bigint": bigint_val, "t_boolean": 1}, {"t_bigint": None, "t_boolean": None}]
- mock_cursor.fetchall.return_value = input_rows
+ input_rows = [OrderedDict(t_bigint=bigint_val, t_boolean=1), OrderedDict(t_bigint=None, t_boolean=None)]
+ mock_cursor.fetchall.return_value = [list(row.values()) for row in input_rows]
description = [
("t_bigint", 8, None, None, None, None, 1, 0, 63),
("t_boolean", 1, None, None, None, None, 1, 0, 63),
]
mock_cursor.description = description
- response: Response = self.handler.native_query(query_str)
+ response: Response = self.handler.native_query(query_str, stream=False)
self.assertEqual(response.data_frame.dtypes.iloc[0], "Int64")
self.assertEqual(response.data_frame.dtypes.iloc[1], "Int64")
self.assertEqual(response.data_frame.iloc[0, 0], bigint_val)
@@ -636,16 +645,17 @@ def test_types_casting(self):
# endregion
# region test vector type
- input_row = {
- "t_vector": array("f", [1.1, 2.2, 3.3]),
- }
- mock_cursor.fetchall.return_value = [input_row]
+ input_row = OrderedDict(
+ t_vector=array("f", [1.1, 2.2, 3.3]),
+ )
+ mock_cursor.fetchall.return_value = [list(input_row.values())]
mock_cursor.description = [("t_vector", 242, None, None, None, None, 1, 144, 63)]
- response: Response = self.handler.native_query(query_str)
+ response: Response = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [MYSQL_DATA_TYPE.VECTOR]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
self.assertEqual(input_row["t_vector"], response.data_frame["t_vector"][0])
# endregion
@@ -661,7 +671,7 @@ def _test_meta_method_with_filter(self, method, sample_data, filter_column, filt
"""
# Test without filter
df = DataFrame(sample_data)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ expected_response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=expected_response)
response = method()
@@ -671,7 +681,7 @@ def _test_meta_method_with_filter(self, method, sample_data, filter_column, filt
# Test with filter
self.handler.native_query.reset_mock()
filtered_df = df[df[filter_column].isin(filter_values)].reset_index(drop=True)
- filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df)
+ filtered_response = TableResponse(data=filtered_df)
self.handler.native_query = MagicMock(return_value=filtered_response)
response = method(table_names=filter_values)
diff --git a/tests/unit/handlers/test_oracle.py b/tests/unit/handlers/test_oracle.py
index cfd8dd7423f..fb18a57fcc6 100644
--- a/tests/unit/handlers/test_oracle.py
+++ b/tests/unit/handlers/test_oracle.py
@@ -18,9 +18,11 @@
import pandas as pd
from pandas import DataFrame
-from base_handler_test import BaseDatabaseHandlerTest
+from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager
from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
+ TableResponse,
+ OkResponse,
+ ErrorResponse,
INF_SCHEMA_COLUMNS_NAMES_SET,
RESPONSE_TYPE,
)
@@ -165,9 +167,42 @@ def test_thick_mode_connection(self):
handler.connect()
mock_init.assert_called_once_with(lib_dir="/path/to/oracle/client/lib")
- def test_native_query_with_results(self):
+ def test_native_query_with_results_streaming(self):
"""
- Tests the `native_query` method for a SELECT statement returning results.
+ Tests the `native_query` method for a SELECT statement returning results at server side execution.
+ """
+ mock_conn = MagicMock()
+ mock_cursor = MockCursorContextManager()
+
+ self.handler.connect = MagicMock(return_value=mock_conn)
+ mock_conn.cursor = MagicMock(return_value=mock_cursor)
+
+ # Server-side execution uses fetchmany, not fetchall
+ mock_cursor.fetchmany = MagicMock(side_effect=[[(1, "test1"), (2, "test2")], []])
+ mock_cursor.description = [
+ ("ID", None, None, None, None, None, None),
+ ("NAME", None, None, None, None, None, None),
+ ]
+
+ query_str = "SELECT ID, NAME FROM test_table"
+ data = self.handler.native_query(query_str, stream=True)
+
+ mock_conn.cursor.assert_called_once()
+ mock_cursor.execute.assert_called_once_with(query_str)
+
+ # Verify the response
+ self.assertIsInstance(data, TableResponse)
+ self.assertEqual(data.type, RESPONSE_TYPE.TABLE)
+ self.assertIsNone(data._data)
+ data.fetchall()
+ self.assertIsInstance(data._data, DataFrame)
+ expected_columns = ["ID", "NAME"]
+ self.assertListEqual(list(data.data_frame.columns), expected_columns)
+ self.assertEqual(len(data.data_frame), 2)
+
+ def test_native_query_with_no_streaming(self):
+ """
+ Tests the `native_query` method for a SELECT statement returning results at client side execution.
"""
mock_conn = MagicMock()
mock_cursor = MagicMock()
@@ -177,22 +212,21 @@ def test_native_query_with_results(self):
self.handler.connect = MagicMock(return_value=mock_conn)
mock_conn.cursor = MagicMock(return_value=mock_cursor)
- mock_cursor.fetchall.return_value = [(1, "test1"), (2, "test2")]
+ mock_cursor.fetchall = MagicMock(return_value=[(1, "test1"), (2, "test2")])
mock_cursor.description = [
("ID", None, None, None, None, None, None),
("NAME", None, None, None, None, None, None),
]
query_str = "SELECT ID, NAME FROM test_table"
- data = self.handler.native_query(query_str)
+ data = self.handler.native_query(query_str, stream=False)
mock_conn.cursor.assert_called_once()
mock_cursor.execute.assert_called_once_with(query_str)
mock_cursor.fetchall.assert_called_once()
mock_conn.commit.assert_called_once()
- self.assertIsInstance(data, Response)
- self.assertFalse(data.error_code)
+ self.assertIsInstance(data, TableResponse)
self.assertEqual(data.type, RESPONSE_TYPE.TABLE)
self.assertIsInstance(data.data_frame, DataFrame)
expected_columns = ["ID", "NAME"]
@@ -222,8 +256,7 @@ def test_native_query_no_results(self):
mock_cursor.fetchall.assert_not_called()
mock_conn.commit.assert_called_once()
- self.assertIsInstance(data, Response)
- self.assertFalse(data.error_code)
+ self.assertIsInstance(data, OkResponse)
self.assertEqual(data.type, RESPONSE_TYPE.OK)
self.assertEqual(data.affected_rows, 1)
@@ -252,7 +285,7 @@ def test_native_query_error(self):
mock_conn.rollback.assert_called_once()
mock_conn.commit.assert_not_called()
- self.assertIsInstance(data, Response)
+ self.assertIsInstance(data, ErrorResponse)
self.assertEqual(data.type, RESPONSE_TYPE.ERROR)
self.assertEqual(data.error_message, error_msg)
@@ -265,7 +298,7 @@ def test_query_method(self):
orig_renderer = self.handler.renderer
self.handler.native_query = MagicMock()
- expected_response = Response(RESPONSE_TYPE.TABLE)
+ expected_response = TableResponse()
self.handler.native_query.return_value = expected_response
mock_ast = MagicMock()
@@ -299,7 +332,7 @@ def test_get_tables(self):
],
columns=["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"],
)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ expected_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=expected_response)
@@ -364,7 +397,7 @@ def test_get_tables_multiple_schemas(self):
],
columns=["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"],
)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ expected_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=expected_response)
@@ -448,7 +481,7 @@ def test_get_columns(self):
]
expected_df = DataFrame(expected_df_data, columns=query_columns)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ expected_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=expected_response)
table_name = "test_table"
@@ -573,7 +606,7 @@ def test_types_casting(self):
("N_BINARY_DOUBLE", oracledb.DB_TYPE_NUMBER, 127, None, None, None, True),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [
MYSQL_DATA_TYPE.FLOAT,
MYSQL_DATA_TYPE.DECIMAL,
@@ -590,7 +623,7 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.FLOAT,
MYSQL_DATA_TYPE.FLOAT,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ self.assertEqual([col.type for col in response.columns], excepted_mysql_types)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[response.data_frame.columns[i]][0]
self.assertEqual(result_value, input_value)
@@ -612,9 +645,9 @@ def test_types_casting(self):
("T_BOOLEAN", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True),
("T_BOOL", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [MYSQL_DATA_TYPE.BOOLEAN, MYSQL_DATA_TYPE.BOOLEAN]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ self.assertEqual([col.type for col in response.columns], excepted_mysql_types)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[response.data_frame.columns[i]][0]
self.assertEqual(result_value, input_value)
@@ -680,7 +713,7 @@ def test_types_casting(self):
("T_RAW", oracledb.DB_TYPE_RAW, 100, 100, None, None, True),
("T_BLOB", oracledb.DB_TYPE_LONG_RAW, None, None, None, None, True),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [
MYSQL_DATA_TYPE.TEXT,
MYSQL_DATA_TYPE.TEXT,
@@ -692,7 +725,7 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.BINARY,
MYSQL_DATA_TYPE.BINARY,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ self.assertEqual([col.type for col in response.columns], excepted_mysql_types)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[response.data_frame.columns[i]][0]
self.assertEqual(result_value, input_value)
@@ -739,13 +772,13 @@ def test_types_casting(self):
("D_TIMESTAMP", oracledb.DB_TYPE_TIMESTAMP, 23, None, 0, 6, True),
("D_TIMESTAMP_P", oracledb.DB_TYPE_TIMESTAMP, 23, None, 0, 9, True),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [
MYSQL_DATA_TYPE.DATE,
MYSQL_DATA_TYPE.TIMESTAMP,
MYSQL_DATA_TYPE.TIMESTAMP,
]
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ self.assertEqual([col.type for col in response.columns], excepted_mysql_types)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[response.data_frame.columns[i]][0]
self.assertEqual(result_value, input_value)
@@ -767,7 +800,7 @@ def test_types_casting(self):
), # set 17 just to force cast to Int64
("T_BOOLEAN", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
self.assertEqual(response.data_frame.dtypes[0], "Int64")
self.assertEqual(response.data_frame.dtypes[1], "boolean")
self.assertEqual(response.data_frame.iloc[0, 0], bigint_val)
@@ -800,12 +833,13 @@ def test_types_casting(self):
("T_EMBEDDING", oracledb.DB_TYPE_VECTOR, None, None, None, None, True),
("T_JSON", oracledb.DB_TYPE_JSON, None, None, None, None, True),
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
excepted_mysql_types = [MYSQL_DATA_TYPE.VECTOR, MYSQL_DATA_TYPE.JSON]
+ self.assertEqual([col.type for col in response.columns], excepted_mysql_types)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[response.data_frame.columns[i]][0]
self.assertEqual(result_value, input_value)
- # endreion
+ # endregion
def test_insert(self):
"""
@@ -813,9 +847,7 @@ def test_insert(self):
using insertmany for batch inserts.
"""
mock_conn = MagicMock()
- mock_cursor = MagicMock()
- mock_cursor.__enter__ = MagicMock(return_value=mock_cursor)
- mock_cursor.__exit__ = MagicMock(return_value=None)
+ mock_cursor = MockCursorContextManager()
self.handler.connect = MagicMock(return_value=mock_conn)
mock_conn.cursor = MagicMock(return_value=mock_cursor)
@@ -837,9 +869,7 @@ def test_insert_error(self):
Tests the insert method to ensure it correctly handles errors
"""
mock_conn = MagicMock()
- mock_cursor = MagicMock()
- mock_cursor.__enter__ = MagicMock(return_value=mock_cursor)
- mock_cursor.__exit__ = MagicMock(return_value=None)
+ mock_cursor = MockCursorContextManager()
self.handler.connect = MagicMock(return_value=mock_conn)
mock_conn.cursor = MagicMock(return_value=mock_cursor)
@@ -869,7 +899,7 @@ def test_meta_get_tables(self, table_names=None):
"row_count",
],
)
- mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ mock_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=mock_response)
response = self.handler.meta_get_tables(table_names=table_names)
@@ -900,7 +930,7 @@ def test_meta_get_columns(self, table_names=None):
],
)
- mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ mock_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=mock_response)
table_name = "TABLE1"
@@ -934,7 +964,7 @@ def test_meta_get_column_statistics(self, table_names=None):
],
)
- mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ mock_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=mock_response)
table_names = ["STATS_TABLE"]
response = self.handler.meta_get_column_statistics(table_names=table_names)
@@ -975,7 +1005,7 @@ def test_meta_get_primary_keys(self):
],
)
- mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ mock_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=mock_response)
table_names = ["USERS", "ORDERS"]
@@ -1024,7 +1054,7 @@ def test_meta_get_foreign_keys(self, table_names=None):
],
)
- mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ mock_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=mock_response)
table_names = ["ORDERS", "ORDER_ITEMS"]
diff --git a/tests/unit/handlers/test_postgres.py b/tests/unit/handlers/test_postgres.py
index 8ad5be6d414..a0e3adc1335 100644
--- a/tests/unit/handlers/test_postgres.py
+++ b/tests/unit/handlers/test_postgres.py
@@ -17,7 +17,12 @@
from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager
from mindsdb.integrations.handlers.postgres_handler.postgres_handler import PostgresHandler, _map_type
-from mindsdb.integrations.libs.response import HandlerResponse as Response, RESPONSE_TYPE
+from mindsdb.integrations.libs.response import (
+ RESPONSE_TYPE,
+ TableResponse,
+ OkResponse,
+ ErrorResponse,
+)
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
@@ -96,35 +101,64 @@ def create_handler(self):
def create_patcher(self):
return patch("psycopg.connect")
- def test_native_query_command_ok(self):
+ def test_native_query_command_ok_stream(self):
"""
Tests the `native_query` method to ensure it executes a SQL query and handles the case
where the query doesn't return a result set (ExecStatus.COMMAND_OK)
"""
mock_conn = MagicMock()
- # Use MockCursorContextManager for simplified mocking
- mock_cursor = MockCursorContextManager()
+ mock_cursor_server = MockCursorContextManager()
+ mock_cursor_client = MockCursorContextManager()
self.handler.connect = MagicMock(return_value=mock_conn)
- mock_conn.cursor = MagicMock(return_value=mock_cursor)
+ mock_conn.cursor = MagicMock(side_effect=[mock_cursor_server, mock_cursor_client])
- mock_cursor.execute.return_value = None
+ syntax_error = psycopg.errors.SyntaxError('syntax error at or near "insert"')
+ mock_cursor_server.execute.side_effect = syntax_error
+ mock_cursor_client.execute.return_value = None
# Setup pgresult
mock_pgresult = MagicMock()
mock_pgresult.status = ExecStatus.COMMAND_OK
- mock_cursor.pgresult = mock_pgresult
- mock_cursor.rowcount = 1
+ mock_cursor_client.pgresult = mock_pgresult
+ mock_cursor_client.rowcount = 1
query_str = "INSERT INTO table VALUES (1, 2, 3)"
- data = self.handler.native_query(query_str)
- mock_cursor.execute.assert_called_once_with(query_str)
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
- self.assertEqual(data.type, RESPONSE_TYPE.OK)
+ data = self.handler.native_query(query_str, stream=True)
+ mock_cursor_server.execute.assert_called_once_with(query_str)
+ mock_cursor_client.execute.assert_called_once_with(query_str)
+ assert isinstance(data, OkResponse)
self.assertEqual(data.affected_rows, 1)
- def test_native_query_with_results(self):
+ def test_native_query_command_ok_no_stream(self):
+ """
+ Tests the `native_query` at client side execution
+ """
+ mock_conn = MagicMock()
+ # mock_cursor_server = MockCursorContextManager()
+ mock_cursor_client = MockCursorContextManager()
+
+ self.handler.connect = MagicMock(return_value=mock_conn)
+ mock_conn.cursor = MagicMock(side_effect=[mock_cursor_client])
+
+ # syntax_error = psycopg.errors.SyntaxError('syntax error at or near "insert"')
+ # mock_cursor_server.execute.side_effect = syntax_error
+ mock_cursor_client.execute.return_value = None
+
+ # Setup pgresult
+ mock_pgresult = MagicMock()
+ mock_pgresult.status = ExecStatus.COMMAND_OK
+ mock_cursor_client.pgresult = mock_pgresult
+ mock_cursor_client.rowcount = 1
+
+ query_str = "INSERT INTO table VALUES (1, 2, 3)"
+ data = self.handler.native_query(query_str, stream=False)
+ # mock_cursor_server.execute.assert_called_once_with(query_str)
+ mock_cursor_client.execute.assert_called_once_with(query_str)
+ assert isinstance(data, OkResponse)
+ self.assertEqual(data.affected_rows, 1)
+
+ def test_native_query_with_results_client_side(self):
"""
Tests the `native_query` method to ensure it executes a SQL query and handles the case
where the query returns a result set
@@ -135,7 +169,7 @@ def test_native_query_with_results(self):
self.handler.connect = MagicMock(return_value=mock_conn)
mock_conn.cursor = MagicMock(return_value=mock_cursor)
- mock_cursor.fetchall = MagicMock(return_value=[[1, "name1"], [2, "name2"]])
+ mock_cursor.fetchall = MagicMock(side_effect=[[[1, "name1"], [2, "name2"]], []])
# Create proper description objects with necessary type_code for _cast_dtypes
mock_cursor.description = [
@@ -149,14 +183,51 @@ def test_native_query_with_results(self):
mock_cursor.pgresult = mock_pgresult
query_str = "SELECT * FROM table"
- data = self.handler.native_query(query_str)
+ data = self.handler.native_query(query_str, stream=False)
mock_cursor.execute.assert_called_once_with(query_str)
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
+ assert isinstance(data, TableResponse)
+ assert getattr(data, "error_code", None) is None
self.assertEqual(data.type, RESPONSE_TYPE.TABLE)
self.assertIsInstance(data.data_frame, DataFrame)
self.assertEqual(list(data.data_frame.columns), ["id", "name"])
+ def test_native_query_with_results_stream(self):
+ """
+ Tests the `native_query` method to ensure it executes a SQL query and handles the case
+ where the query returns a result set at server side execution
+ """
+ mock_conn = MagicMock()
+ mock_cursor = MockCursorContextManager()
+
+ self.handler.connect = MagicMock(return_value=mock_conn)
+ mock_conn.cursor = MagicMock(return_value=mock_cursor)
+
+ # Server-side execution uses fetchmany, not fetchall
+ mock_cursor.fetchmany = MagicMock(side_effect=[[[1, "name1"], [2, "name2"]], []])
+
+ mock_cursor.description = [
+ ColumnDescription(name="id", type_code=regtype_to_oid["integer"]), # int4 type code
+ ColumnDescription(name="name", type_code=regtype_to_oid["text"]), # text type code
+ ]
+
+ query_str = "SELECT * FROM table"
+ data = self.handler.native_query(query_str, stream=True)
+ mock_cursor.execute.assert_called_once_with(query_str)
+
+ # Verify the response
+ assert isinstance(data, TableResponse)
+ assert getattr(data, "error_code", None) is None
+ self.assertEqual(data.type, RESPONSE_TYPE.TABLE)
+ self.assertIsNone(data._data)
+ data.fetchall()
+ self.assertIsInstance(data._data, DataFrame)
+ self.assertEqual(list(data.data_frame.columns), ["id", "name"])
+
+ # Verify DataFrame contains all expected rows
+ self.assertEqual(len(data.data_frame), 2)
+ self.assertEqual(data.data_frame["id"].tolist(), [1, 2])
+ self.assertEqual(data.data_frame["name"].tolist(), ["name1", "name2"])
+
def test_native_query_with_params(self):
"""
Tests the `native_query` method with parameters to ensure executemany is called correctly
@@ -175,8 +246,7 @@ def test_native_query_with_params(self):
params = [(1, "a"), (2, "b")]
data = self.handler.native_query(query_str, params=params)
mock_cursor.executemany.assert_called_once_with(query_str, params)
- assert isinstance(data, Response)
- self.assertFalse(data.error_code)
+ assert isinstance(data, OkResponse)
def test_native_query_error(self):
"""
@@ -198,8 +268,7 @@ def test_native_query_error(self):
mock_cursor.execute.assert_called_once_with(query_str)
- assert isinstance(data, Response)
- self.assertEqual(data.type, RESPONSE_TYPE.ERROR)
+ assert isinstance(data, ErrorResponse)
# The handler implementation sets error_code to 0, check error_message instead
self.assertEqual(data.error_code, 0)
@@ -260,30 +329,7 @@ def test_query_method_uses_renderer_params(self):
self.assertEqual(result, "ok")
self.handler.renderer.get_exec_params.assert_called_once_with(query_node, with_failback=True)
- self.handler.native_query.assert_called_once_with("SELECT 1", ["foo"])
-
- def test_query_stream_yields_batches(self):
- mock_conn = MagicMock()
- mock_cursor = MockCursorContextManager()
- mock_cursor.pgresult = MagicMock(status=ExecStatus.TUPLES_OK)
- mock_cursor.fetchmany = MagicMock(side_effect=[[(1, "name")], []])
- mock_cursor.description = [
- ColumnDescription(name="id", type_code=regtype_to_oid["integer"]),
- ColumnDescription(name="name", type_code=regtype_to_oid["text"]),
- ]
-
- self.handler.connect = MagicMock(return_value=mock_conn)
- mock_conn.cursor = MagicMock(return_value=mock_cursor)
- self.handler.renderer.get_exec_params = MagicMock(return_value=("SELECT * FROM table", None))
- self.handler.disconnect = MagicMock()
-
- batches = list(self.handler.query_stream(MagicMock(), fetch_size=1))
-
- self.assertEqual(len(batches), 1)
- self.assertListEqual(list(batches[0].columns), ["id", "name"])
- mock_conn.commit.assert_called_once()
- mock_conn.rollback.assert_called_once()
- self.handler.disconnect.assert_called_once()
+ self.handler.native_query.assert_called_once_with("SELECT 1", ["foo"], stream=False)
def test_insert_respects_existing_column_case(self):
if getattr(self.handler, "name", None) != "postgres":
@@ -299,9 +345,8 @@ def test_insert_respects_existing_column_case(self):
mock_conn.cursor = MagicMock(return_value=mock_cursor)
self.handler.disconnect = MagicMock()
self.handler.get_columns = MagicMock(
- return_value=Response(
- RESPONSE_TYPE.TABLE,
- data_frame=pd.DataFrame({"COLUMN_NAME": ["Id", "Amount"]}),
+ return_value=TableResponse(
+ data=pd.DataFrame({"COLUMN_NAME": ["Id", "Amount"]}),
)
)
@@ -318,6 +363,15 @@ def test_insert_respects_existing_column_case(self):
self.assertIn('"Id"', executed_copy)
self.assertIn('"Amount"', executed_copy)
+ def test_meta_get_column_statistics_returns_non_table_response(self):
+ error_response = ErrorResponse(error_message="boom")
+ self.handler.native_query = MagicMock(return_value=error_response)
+
+ result = self.handler.meta_get_column_statistics()
+
+ self.assertIs(result, error_response)
+ self.handler.native_query.assert_called_once()
+
def test_cast_dtypes(self):
"""
Tests the _cast_dtypes method to ensure it correctly converts PostgreSQL types to pandas types
@@ -444,13 +498,13 @@ def test_insert(self):
mock_pgresult.status = ExecStatus.TUPLES_OK
mock_cursor.pgresult = mock_pgresult
mock_cursor.rowcount = 1
- mock_cursor.fetchall = MagicMock(
- return_value=[
- ["a", "int", 1, None, "YES", None, None, None, None, None, None, None],
- ["b", "int", 2, None, "YES", None, None, None, None, None, None, None],
- ["c", "int", 3, None, "YES", None, None, None, None, None, None, None],
- ]
- )
+
+ get_columns_result = [
+ ["id", "int", 1, None, "YES", None, None, None, None, None, None, None],
+ ["name", "text", 2, None, "YES", None, None, None, None, None, None, None],
+ ]
+ mock_cursor.fetchmany = MagicMock(side_effect=[get_columns_result, []])
+
information_schema_description = [
ColumnDescription(name="COLUMN_NAME", type_code=regtype_to_oid["text"]),
ColumnDescription(name="DATA_TYPE", type_code=regtype_to_oid["text"]),
@@ -474,19 +528,6 @@ def test_insert(self):
copy_obj.__enter__ = MagicMock(return_value=copy_obj)
copy_obj.__exit__ = MagicMock(return_value=None)
- # region add result for 'get_columns' call
- mock_pgresult = MagicMock()
- mock_pgresult.status = ExecStatus.TUPLES_OK
- mock_cursor.pgresult = mock_pgresult
- mock_cursor.fetchall = MagicMock(
- return_value=[
- ["id", "int", 1, None, "YES", None, None, None, None, None, None, None],
- ["name", "text", 2, None, "YES", None, None, None, None, None, None, None],
- ]
- )
- mock_cursor.description = information_schema_description
- # endregino
-
df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]})
self.handler.insert("test_table", df)
@@ -643,9 +684,11 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.VARCHAR,
MYSQL_DATA_TYPE.VARCHAR,
]
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
+
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[description[i].name][0]
self.assertEqual(type(result_value), type(input_value), f"type mismatch: {result_value} != {input_value}")
@@ -657,8 +700,9 @@ def test_types_casting(self):
mock_cursor.fetchall.return_value = input_rows
mock_cursor.description = [ColumnDescription(name="t_boolean", type_code=16)]
excepted_mysql_types = [MYSQL_DATA_TYPE.BOOL]
- response: Response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
self.assertTrue(pd_types.is_bool_dtype(response.data_frame["t_boolean"][0]))
self.assertTrue(bool(response.data_frame["t_boolean"][0]) is True)
self.assertTrue(bool(response.data_frame["t_boolean"][1]) is False)
@@ -774,8 +818,9 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.FLOAT, # n_float4
MYSQL_DATA_TYPE.DOUBLE, # n_float8
]
- response: Response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[description[i].name][0]
self.assertEqual(result_value, input_value, f"value mismatch: {result_value} != {input_value}")
@@ -850,8 +895,9 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.TIME, # TIMETZ
]
- response: Response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[description[i].name][0]
self.assertEqual(result_value, input_value, f"value mismatch: {result_value} != {input_value}")
@@ -866,7 +912,7 @@ def test_types_casting(self):
ColumnDescription(name="t_boolean", type_code=16),
]
mock_cursor.description = description
- response: Response = self.handler.native_query(query_str)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
self.assertEqual(response.data_frame.dtypes[0], "Int64")
self.assertEqual(response.data_frame.dtypes[1], "boolean")
self.assertEqual(response.data_frame.iloc[0, 0], bigint_val)
@@ -921,8 +967,9 @@ def test_types_casting(self):
MYSQL_DATA_TYPE.VECTOR,
]
- response: Response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ response: TableResponse = self.handler.native_query(query_str, stream=False)
+ for column, mysql_type in zip(response.columns, excepted_mysql_types):
+ self.assertEqual(column.type, mysql_type)
for i, input_value in enumerate(input_row):
result_value = response.data_frame[description[i].name][0]
self.assertEqual(type(result_value), type(input_value), f"type mismatch: {result_value} != {input_value}")
@@ -933,7 +980,7 @@ def test_types_casting(self):
# endregion
def test_get_tables_all_flag(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.get_tables(all=True)
query = self.handler.native_query.call_args[0][0]
self.assertNotIn("current_schema()", query.split("table_schema")[-1])
@@ -955,19 +1002,19 @@ def test_get_columns_with_schema_name(self):
"COLLATION_NAME": [None],
}
)
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=df))
self.handler.get_columns("customers", schema_name="analytics")
query = self.handler.native_query.call_args[0][0]
self.assertIn("table_schema = 'analytics'", query)
def test_meta_get_tables_filters_by_list(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.meta_get_tables(table_names=["orders"])
query = self.handler.native_query.call_args[0][0]
self.assertIn("IN ('orders')", query)
def test_meta_get_columns_filters_by_list(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.meta_get_columns(table_names=["orders"])
query = self.handler.native_query.call_args[0][0]
self.assertIn("IN ('orders')", query)
@@ -984,7 +1031,7 @@ def test_meta_get_column_statistics_transforms_histogram(self):
"histogram_bounds": ["{1,5,10}"],
}
)
- response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
+ response = TableResponse(data=df)
self.handler.native_query = MagicMock(return_value=response)
result = self.handler.meta_get_column_statistics(table_names=["orders"])
@@ -995,13 +1042,13 @@ def test_meta_get_column_statistics_transforms_histogram(self):
self.assertEqual(result.data_frame.loc[0, "MOST_COMMON_VALUES"], ["A", "B"])
def test_meta_get_primary_keys_with_filter(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.meta_get_primary_keys(table_names=["orders"])
query = self.handler.native_query.call_args[0][0]
self.assertIn("AND tc.table_name IN ('orders')", query)
def test_meta_get_foreign_keys_with_filter(self):
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame()))
self.handler.meta_get_foreign_keys(table_names=["orders"])
query = self.handler.native_query.call_args[0][0]
self.assertIn("AND tc.table_name IN ('orders')", query)
diff --git a/tests/unit/handlers/test_redshift.py b/tests/unit/handlers/test_redshift.py
index 8ee9a4f7e27..1d40b93fb4d 100644
--- a/tests/unit/handlers/test_redshift.py
+++ b/tests/unit/handlers/test_redshift.py
@@ -5,18 +5,21 @@
import pandas as pd
import psycopg
-from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
- RESPONSE_TYPE
-)
+from mindsdb.integrations.libs.response import OkResponse, ErrorResponse, RESPONSE_TYPE
from mindsdb.integrations.handlers.redshift_handler.redshift_handler import RedshiftHandler
from test_postgres import TestPostgresHandler
class TestRedshiftHandler(TestPostgresHandler):
-
def create_handler(self):
- return RedshiftHandler('redshift', connection_data=self.dummy_connection_data)
+ return RedshiftHandler("redshift", connection_data=self.dummy_connection_data)
+
+ def test_native_query(self):
+ """
+ This test is overridden to avoid issues with the generic MockCursorContextManager not being compatible with Postgres/Redshift cursor behavior.
+ More specific tests (test_native_query_with_results, test_native_query_command_ok, test_native_query_error) cover this functionality.
+ """
+ pass
def test_insert(self):
"""
@@ -32,20 +35,17 @@ def test_insert(self):
mock_cursor.executemany.return_value = None
- df = pd.DataFrame({
- 'column1': [1, 2, 3, np.nan],
- 'column2': ['a', 'b', 'c', None]
- })
+ df = pd.DataFrame({"column1": [1, 2, 3, np.nan], "column2": ["a", "b", "c", None]})
- table_name = 'mock_table'
+ table_name = "mock_table"
response = self.handler.insert(table_name, df)
- columns = ', '.join([f'"{col}"' if ' ' in col else col for col in df.columns])
- values = ', '.join(['%s' for _ in range(len(df.columns))])
- expected_query = f'INSERT INTO {table_name} ({columns}) VALUES ({values})'
+ columns = ", ".join([f'"{col}"' if " " in col else col for col in df.columns])
+ values = ", ".join(["%s" for _ in range(len(df.columns))])
+ expected_query = f"INSERT INTO {table_name} ({columns}) VALUES ({values})"
mock_cursor.executemany.assert_called_once_with(expected_query, df.replace({np.nan: None}).values.tolist())
- assert isinstance(response, Response)
+ assert isinstance(response, OkResponse)
self.assertEqual(response.type, RESPONSE_TYPE.OK)
mock_conn.commit.assert_called_once()
@@ -65,17 +65,14 @@ def test_insert_error(self):
error = psycopg.Error(error_msg)
mock_cursor.executemany.side_effect = error
- df = pd.DataFrame({
- 'column1': [1, 2, 3, np.nan],
- 'column2': ['a', 'b', 'c', None]
- })
+ df = pd.DataFrame({"column1": [1, 2, 3, np.nan], "column2": ["a", "b", "c", None]})
- response = self.handler.insert('nonexistent_table', df)
+ response = self.handler.insert("nonexistent_table", df)
mock_cursor.executemany.assert_called_once()
mock_conn.rollback.assert_called_once()
- assert isinstance(response, Response)
+ assert isinstance(response, ErrorResponse)
self.assertEqual(response.type, RESPONSE_TYPE.ERROR)
self.assertEqual(response.error_message, error_msg)
@@ -91,21 +88,21 @@ def test_insert_with_empty_dataframe(self):
self.handler.connect = MagicMock(return_value=mock_conn)
mock_conn.cursor = MagicMock(return_value=mock_cursor)
- df = pd.DataFrame(columns=['column1', 'column2'])
+ df = pd.DataFrame(columns=["column1", "column2"])
- table_name = 'mock_table'
+ table_name = "mock_table"
response = self.handler.insert(table_name, df)
- columns = ', '.join([f'"{col}"' if ' ' in col else col for col in df.columns])
- values = ', '.join(['%s' for _ in range(len(df.columns))])
- expected_query = f'INSERT INTO {table_name} ({columns}) VALUES ({values})'
+ columns = ", ".join([f'"{col}"' if " " in col else col for col in df.columns])
+ values = ", ".join(["%s" for _ in range(len(df.columns))])
+ expected_query = f"INSERT INTO {table_name} ({columns}) VALUES ({values})"
mock_cursor.executemany.assert_called_once()
call_args, call_kwargs = mock_cursor.executemany.call_args
self.assertEqual(call_args[0], expected_query)
self.assertEqual(len(call_args[1]), 0)
- assert isinstance(response, Response)
+ assert isinstance(response, OkResponse)
self.assertEqual(response.type, RESPONSE_TYPE.OK)
mock_conn.commit.assert_called_once()
@@ -123,25 +120,27 @@ def test_insert_with_special_column_names(self):
self.handler.connect = MagicMock(return_value=mock_conn)
mock_conn.cursor = MagicMock(return_value=mock_cursor)
- df = pd.DataFrame({
- 'normal_column': [1, 2],
- 'column with spaces': ['a', 'b'],
- 'column-with-hyphens': [True, False],
- 'mixed@column#123': [3.14, 2.71]
- })
+ df = pd.DataFrame(
+ {
+ "normal_column": [1, 2],
+ "column with spaces": ["a", "b"],
+ "column-with-hyphens": [True, False],
+ "mixed@column#123": [3.14, 2.71],
+ }
+ )
- table_name = 'mock_table'
+ table_name = "mock_table"
response = self.handler.insert(table_name, df)
call_args = mock_cursor.executemany.call_args[0][0]
for col in df.columns:
- if ' ' in col:
+ if " " in col:
self.assertIn(f'"{col}"', call_args)
else:
self.assertTrue(col in call_args or f'"{col}"' in call_args)
- assert isinstance(response, Response)
+ assert isinstance(response, OkResponse)
self.assertEqual(response.type, RESPONSE_TYPE.OK)
def test_insert_disconnect_when_needed(self):
@@ -159,15 +158,15 @@ def test_insert_disconnect_when_needed(self):
self.handler.disconnect = MagicMock()
mock_conn.cursor = MagicMock(return_value=mock_cursor)
- df = pd.DataFrame({'column1': [1, 2, 3]})
- self.handler.insert('mock_table', df)
+ df = pd.DataFrame({"column1": [1, 2, 3]})
+ self.handler.insert("mock_table", df)
self.handler.disconnect.assert_called_once()
self.handler.connect.reset_mock()
self.handler.disconnect.reset_mock()
self.handler.is_connected = True
- self.handler.insert('mock_table', df)
+ self.handler.insert("mock_table", df)
self.handler.disconnect.assert_not_called()
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/test_rest_api.py b/tests/unit/handlers/test_rest_api.py
new file mode 100644
index 00000000000..ff10b02d67f
--- /dev/null
+++ b/tests/unit/handlers/test_rest_api.py
@@ -0,0 +1,165 @@
+"""Unit tests for the generic REST API passthrough handler."""
+
+from unittest.mock import patch, MagicMock
+
+from mindsdb.integrations.handlers.rest_api_handler.rest_api_handler import RestApiHandler
+from mindsdb.integrations.libs.passthrough import PassthroughProtocol
+from mindsdb.integrations.libs.passthrough_types import PassthroughRequest, PassthroughResponse
+from mindsdb.integrations.libs.response import (
+ HandlerStatusResponse as StatusResponse,
+)
+
+
+VALID_DATA = {
+ "base_url": "https://api.example.com",
+ "bearer_token": "test-token-123",
+}
+
+
+def _make_handler(connection_data=None):
+ if connection_data is None:
+ connection_data = dict(VALID_DATA)
+ return RestApiHandler("test_rest", connection_data=connection_data)
+
+
+class TestRestApiHandlerInit:
+ def test_satisfies_passthrough_protocol(self):
+ assert issubclass(RestApiHandler, PassthroughProtocol)
+
+ def test_stores_connection_data(self):
+ data = {"base_url": "https://x.com", "bearer_token": "tok"}
+ handler = _make_handler(data)
+ assert handler.connection_data == data
+
+ def test_default_test_request_path(self):
+ handler = _make_handler()
+ assert handler._test_request.method == "GET"
+ assert handler._test_request.path == "/"
+
+ def test_custom_test_path(self):
+ handler = _make_handler(
+ {
+ "base_url": "https://api.example.com",
+ "bearer_token": "tok",
+ "test_path": "/health",
+ }
+ )
+ assert handler._test_request.path == "/health"
+
+ def test_custom_test_path_without_slash(self):
+ handler = _make_handler(
+ {
+ "base_url": "https://api.example.com",
+ "bearer_token": "tok",
+ "test_path": "status",
+ }
+ )
+ assert handler._test_request.path == "/status"
+
+
+class TestCheckConnection:
+ def test_success(self):
+ handler = _make_handler()
+ response = handler.check_connection()
+ assert isinstance(response, StatusResponse)
+ assert response.success is True
+ assert not response.error_message
+
+ def test_missing_base_url(self):
+ handler = _make_handler({"bearer_token": "tok"})
+ response = handler.check_connection()
+ assert response.success is False
+ assert "base_url" in response.error_message
+
+ def test_missing_bearer_token(self):
+ handler = _make_handler({"base_url": "https://api.example.com"})
+ response = handler.check_connection()
+ assert response.success is False
+ assert "bearer_token" in response.error_message
+
+ def test_empty_connection_data(self):
+ handler = _make_handler({})
+ response = handler.check_connection()
+ assert response.success is False
+
+
+class TestPassthroughIntegration:
+ """Test that the mixin methods work correctly on RestApiHandler."""
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_api_passthrough_injects_bearer(self, mock_request):
+ mock_resp = MagicMock()
+ mock_resp.status_code = 200
+ mock_resp.headers = {"Content-Type": "application/json"}
+ mock_resp.iter_content.return_value = [b'{"ok": true}']
+ mock_resp.close = MagicMock()
+ mock_request.return_value = mock_resp
+
+ handler = _make_handler()
+ result = handler.api_passthrough(PassthroughRequest(method="GET", path="/v1/users"))
+
+ assert isinstance(result, PassthroughResponse)
+ assert result.status_code == 200
+ headers = mock_request.call_args.kwargs["headers"]
+ assert headers["Authorization"] == "Bearer test-token-123"
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_api_passthrough_uses_base_url(self, mock_request):
+ mock_resp = MagicMock()
+ mock_resp.status_code = 200
+ mock_resp.headers = {}
+ mock_resp.iter_content.return_value = [b""]
+ mock_resp.close = MagicMock()
+ mock_request.return_value = mock_resp
+
+ handler = _make_handler()
+ handler.api_passthrough(PassthroughRequest(method="GET", path="/foo"))
+
+ called_url = mock_request.call_args.args[1]
+ assert called_url == "https://api.example.com/foo"
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_api_passthrough_includes_default_headers(self, mock_request):
+ mock_resp = MagicMock()
+ mock_resp.status_code = 200
+ mock_resp.headers = {}
+ mock_resp.iter_content.return_value = [b""]
+ mock_resp.close = MagicMock()
+ mock_request.return_value = mock_resp
+
+ handler = _make_handler(
+ {
+ "base_url": "https://api.example.com",
+ "bearer_token": "tok",
+ "default_headers": {"Accept": "application/json", "X-Team": "data"},
+ }
+ )
+ handler.api_passthrough(PassthroughRequest(method="GET", path="/"))
+
+ headers = mock_request.call_args.kwargs["headers"]
+ assert headers["Accept"] == "application/json"
+ assert headers["X-Team"] == "data"
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_test_passthrough_success(self, mock_request):
+ mock_resp = MagicMock()
+ mock_resp.status_code = 200
+ mock_resp.headers = {"Content-Type": "application/json"}
+ mock_resp.iter_content.return_value = [b'{"ok": true}']
+ mock_resp.close = MagicMock()
+ mock_request.return_value = mock_resp
+
+ handler = _make_handler()
+ result = handler.test_passthrough()
+
+ assert isinstance(result, dict)
+ assert result["ok"] is True
+ assert result["status_code"] == 200
+
+ def test_test_passthrough_with_no_network(self):
+ """test_passthrough catches connection errors gracefully."""
+ handler = _make_handler()
+ result = handler.test_passthrough()
+ assert isinstance(result, dict)
+ assert result["ok"] is False
+ assert result["error_code"] in ("network", "unknown")
diff --git a/tests/unit/handlers/test_salesforce.py b/tests/unit/handlers/test_salesforce.py
index 54253f3eef8..6df5580ba27 100644
--- a/tests/unit/handlers/test_salesforce.py
+++ b/tests/unit/handlers/test_salesforce.py
@@ -16,7 +16,7 @@
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
from mindsdb.integrations.libs.response import (
- HandlerResponse as Response,
+ TableResponse,
HandlerStatusResponse as StatusResponse,
RESPONSE_TYPE,
)
@@ -157,7 +157,7 @@ def test_check_connection_failure(self):
def test_get_tables(self):
"""
- Test that the `get_tables` method returns a list of tables mapped from the Salesforce API.
+ Test that the `get_tables` method returns a TableResponse with a list of tables mapped from the Salesforce API.
"""
mock_tables = ["Account", "Contact"]
self.mock_connect.return_value = MagicMock(
@@ -168,7 +168,7 @@ def test_get_tables(self):
self.handler.connect()
response = self.handler.get_tables()
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -177,7 +177,7 @@ def test_get_tables(self):
def test_get_columns(self):
"""
- Test that the `get_columns` method returns a list of columns for a given table.
+ Test that the `get_columns` method returns a TableResponse with a list of columns for a given table.
"""
mock_columns = ["Id", "Name", "Email"]
mock_table = "Contact"
@@ -203,7 +203,7 @@ def test_get_columns(self):
self.handler.connect()
response = self.handler.get_columns(mock_table)
- assert isinstance(response, Response)
+ assert isinstance(response, TableResponse)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
df = response.data_frame
@@ -435,7 +435,7 @@ def test_meta_get_tables_filters_requested_tables(self):
with patch(
"mindsdb.integrations.handlers.salesforce_handler.salesforce_handler.MetaAPIHandler.meta_get_tables",
- return_value=Response(RESPONSE_TYPE.TABLE, None),
+ return_value=TableResponse(),
) as mock_meta:
response = self.handler.meta_get_tables(table_names=["contact"])
@@ -680,5 +680,64 @@ def test_meta_get_columns_builds_schema(self):
self.assertEqual(columns[0]["data_type"], "string")
+class TestSalesforcePassthrough(unittest.TestCase):
+ """Exercise the PassthroughMixin retrofit (per-instance base URL)."""
+
+ CONNECTION_DATA = {
+ "username": "u",
+ "password": "p",
+ "client_id": "cid",
+ "client_secret": "csec",
+ "access_token": "sf_access_tok",
+ "instance_url": "https://my-org.my.salesforce.com",
+ }
+
+ def _mock_response(self, status_code=200):
+ resp = MagicMock()
+ resp.status_code = status_code
+ resp.headers = {"Content-Type": "application/json"}
+ resp.iter_content = MagicMock(return_value=iter([b'{"sobjects":[]}']))
+ resp.close = MagicMock()
+ return resp
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_passthrough_uses_bearer_and_instance_url(self, mock_request):
+ mock_request.return_value = self._mock_response()
+ handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA)
+ from mindsdb.integrations.libs.passthrough_types import PassthroughRequest
+
+ resp = handler.api_passthrough(PassthroughRequest("GET", "/services/data/v60.0/"))
+
+ self.assertEqual(resp.status_code, 200)
+ args, kwargs = mock_request.call_args
+ self.assertEqual(args[0], "GET")
+ self.assertEqual(args[1], "https://my-org.my.salesforce.com/services/data/v60.0/")
+ self.assertEqual(kwargs["headers"]["Authorization"], "Bearer sf_access_tok")
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_test_passthrough_returns_ok_on_200(self, mock_request):
+ mock_request.return_value = self._mock_response(status_code=200)
+ handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA)
+
+ result = handler.test_passthrough()
+
+ self.assertTrue(result["ok"])
+ self.assertEqual(result["status_code"], 200)
+ self.assertEqual(result["host"], "my-org.my.salesforce.com")
+ self.assertIsInstance(result["latency_ms"], int)
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_test_passthrough_returns_auth_failed_on_401(self, mock_request):
+ mock_request.return_value = self._mock_response(status_code=401)
+ handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA)
+
+ result = handler.test_passthrough()
+
+ self.assertFalse(result["ok"])
+ self.assertEqual(result["error_code"], "auth_failed")
+ self.assertEqual(result["status_code"], 401)
+ self.assertEqual(result["host"], "my-org.my.salesforce.com")
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/test_shopify_handler.py b/tests/unit/handlers/test_shopify_handler.py
index 277ec09a638..b1ecc6e8811 100644
--- a/tests/unit/handlers/test_shopify_handler.py
+++ b/tests/unit/handlers/test_shopify_handler.py
@@ -817,5 +817,67 @@ def test_limit_large_than_max_page_limit(self, mock_shopify_query):
self.assertEqual(len(result), 300)
+class TestShopifyPassthrough(unittest.TestCase):
+ """Exercise the PassthroughMixin retrofit (X-Shopify-Access-Token auth)."""
+
+ CONNECTION_DATA = {
+ "shop_url": "test-shop.myshopify.com",
+ "client_id": "cid",
+ "client_secret": "csec",
+ "access_token": "shpat_tokenvalue",
+ }
+
+ def _mock_response(self, status_code=200):
+ resp = MagicMock()
+ resp.status_code = status_code
+ resp.headers = {"Content-Type": "application/json"}
+ resp.iter_content = MagicMock(return_value=iter([b'{"shop":{"id":1}}']))
+ resp.close = MagicMock()
+ return resp
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_passthrough_uses_shopify_header_and_per_shop_base_url(self, mock_request):
+ mock_request.return_value = self._mock_response()
+ handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA)
+ from mindsdb.integrations.libs.passthrough_types import PassthroughRequest
+
+ resp = handler.api_passthrough(PassthroughRequest("GET", "/admin/api/2024-01/shop.json"))
+
+ self.assertEqual(resp.status_code, 200)
+ args, kwargs = mock_request.call_args
+ self.assertEqual(args[0], "GET")
+ self.assertEqual(args[1], "https://test-shop.myshopify.com/admin/api/2024-01/shop.json")
+ # Custom Shopify auth header; no bearer Authorization.
+ self.assertEqual(kwargs["headers"]["X-Shopify-Access-Token"], "shpat_tokenvalue")
+ self.assertNotIn("Authorization", kwargs["headers"])
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_test_passthrough_returns_ok_on_200(self, mock_request):
+ mock_request.return_value = self._mock_response(status_code=200)
+ handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA)
+
+ result = handler.test_passthrough()
+
+ self.assertTrue(result["ok"])
+ self.assertEqual(result["status_code"], 200)
+ self.assertEqual(result["host"], "test-shop.myshopify.com")
+ self.assertIsInstance(result["latency_ms"], int)
+ # The probe should hit the version-less endpoint so it survives
+ # Shopify's quarterly Admin API version retirements.
+ self.assertEqual(mock_request.call_args[0][1], "https://test-shop.myshopify.com/admin/shop.json")
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_test_passthrough_returns_auth_failed_on_401(self, mock_request):
+ mock_request.return_value = self._mock_response(status_code=401)
+ handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA)
+
+ result = handler.test_passthrough()
+
+ self.assertFalse(result["ok"])
+ self.assertEqual(result["error_code"], "auth_failed")
+ self.assertEqual(result["status_code"], 401)
+ self.assertEqual(result["host"], "test-shop.myshopify.com")
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/test_snowflake.py b/tests/unit/handlers/test_snowflake.py
index e43aec5eac5..07c0c87b040 100644
--- a/tests/unit/handlers/test_snowflake.py
+++ b/tests/unit/handlers/test_snowflake.py
@@ -16,11 +16,16 @@
import numpy as np
import pandas as pd
from pandas import DataFrame
-from types import SimpleNamespace
from base_handler_test import BaseDatabaseHandlerTest
-from mindsdb.integrations.libs.response import HandlerResponse as Response, INF_SCHEMA_COLUMNS_NAMES_SET, RESPONSE_TYPE
+from mindsdb.integrations.libs.response import (
+ OkResponse,
+ TableResponse,
+ ErrorResponse,
+ INF_SCHEMA_COLUMNS_NAMES_SET,
+ RESPONSE_TYPE,
+)
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
@@ -246,8 +251,7 @@ def test_native_query_with_results(self):
mock_cursor.fetch_pandas_batches.assert_called_once()
mock_cursor.fetchall.assert_not_called()
- self.assertIsInstance(data, Response)
- self.assertFalse(data.error_code)
+ self.assertIsInstance(data, TableResponse)
self.assertEqual(data.type, RESPONSE_TYPE.TABLE)
self.assertIsInstance(data.data_frame, DataFrame)
self.assertListEqual(list(data.data_frame.columns), expected_columns)
@@ -285,8 +289,7 @@ def test_native_query_no_results(self):
mock_cursor.execute.assert_called_once_with(query_str)
mock_cursor.fetch_pandas_batches.assert_called_once()
- self.assertIsInstance(data, Response)
- self.assertFalse(data.error_code)
+ self.assertIsInstance(data, OkResponse)
self.assertEqual(data.type, RESPONSE_TYPE.OK)
self.assertEqual(data.affected_rows, 1)
@@ -350,7 +353,7 @@ def test_native_query_error(self):
mock_conn.cursor.assert_called_once()
mock_cursor.execute.assert_called_once_with(query_str)
- self.assertIsInstance(data, Response)
+ self.assertIsInstance(data, ErrorResponse)
self.assertEqual(data.type, RESPONSE_TYPE.ERROR)
self.assertIn(error_msg, data.error_message)
@@ -376,33 +379,11 @@ def test_native_query_releases_memory_pool_when_jemalloc(self):
mock_pool.backend_name = "jemalloc"
mock_pool.release_unused = MagicMock()
- response = self.handler.native_query("SELECT 1")
+ response = self.handler.native_query("SELECT 1", stream=False)
self.assertEqual(response.type, RESPONSE_TYPE.TABLE)
mock_pool.release_unused.assert_called_once()
- def test_native_query_memory_estimation_error(self):
- mock_conn = MagicMock()
- mock_cursor = MagicMock()
- mock_cursor.__enter__.return_value = mock_cursor
- mock_cursor.__exit__.return_value = None
- large_df = DataFrame({"ID": range(1500)})
- mock_cursor.fetch_pandas_batches.return_value = iter([large_df])
- mock_cursor.description = [ColumnDescription(name="ID", type_code=0, scale=0)]
- mock_cursor.rowcount = 10000
-
- self.handler.connect = MagicMock(return_value=mock_conn)
- mock_conn.cursor.return_value = mock_cursor
-
- with patch(
- "mindsdb.integrations.handlers.snowflake_handler.snowflake_handler.psutil.virtual_memory",
- return_value=SimpleNamespace(available=512),
- ):
- response = self.handler.native_query("SELECT * FROM big_table")
-
- self.assertEqual(response.type, RESPONSE_TYPE.ERROR)
- self.assertIn("query result is too large", response.error_message)
-
def test_key_pair_authentication_success(self):
"""
Tests successful connection using key pair authentication
@@ -645,7 +626,7 @@ def test_query_method(self):
renderer_mock.get_string.return_value = "SELECT * FROM test_table_rendered"
self.handler.native_query = MagicMock()
- expected_response = Response(RESPONSE_TYPE.TABLE)
+ expected_response = TableResponse(data=DataFrame())
self.handler.native_query.return_value = expected_response
try:
@@ -673,11 +654,8 @@ def test_get_tables(self):
"""
Tests that get_tables calls native_query with the correct SQL for Snowflake
"""
- expected_response = Response(
- RESPONSE_TYPE.TABLE,
- data_frame=DataFrame(
- [("table1", "SCHEMA1", "BASE TABLE")], columns=["TABLE_NAME", "TABLE_SCHEMA", "TABLE_TYPE"]
- ),
+ expected_response = TableResponse(
+ data=DataFrame([("table1", "SCHEMA1", "BASE TABLE")], columns=["TABLE_NAME", "TABLE_SCHEMA", "TABLE_TYPE"])
)
self.handler.native_query = MagicMock(return_value=expected_response)
@@ -751,7 +729,7 @@ def test_get_columns(self):
]
expected_df = DataFrame(expected_df_data, columns=query_columns)
- expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df)
+ expected_response = TableResponse(data=expected_df)
self.handler.native_query = MagicMock(return_value=expected_response)
table_name = "test_table"
@@ -794,7 +772,7 @@ def test_meta_get_tables_casts_rowcount(self):
}
]
)
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=df))
result = self.handler.meta_get_tables(table_names=["orders"])
@@ -815,7 +793,7 @@ def test_meta_get_columns_filters(self):
}
]
)
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=df))
result = self.handler.meta_get_columns(table_names=["orders"])
@@ -849,8 +827,8 @@ def test_meta_get_column_statistics_success(self):
)
self.handler.native_query = MagicMock(
side_effect=[
- Response(RESPONSE_TYPE.TABLE, data_frame=columns_df),
- Response(RESPONSE_TYPE.TABLE, data_frame=stats_df),
+ TableResponse(data=columns_df),
+ TableResponse(data=stats_df),
]
)
@@ -864,9 +842,7 @@ def test_meta_get_column_statistics_success(self):
self.assertEqual(id_stats["maximum_value"], 10)
def test_meta_get_column_statistics_handles_error_response(self):
- self.handler.native_query = MagicMock(
- return_value=Response(RESPONSE_TYPE.ERROR, error_message="boom", data_frame=None)
- )
+ self.handler.native_query = MagicMock(return_value=ErrorResponse(error_message="boom"))
result = self.handler.meta_get_column_statistics(table_names=["orders"])
self.assertEqual(result.type, RESPONSE_TYPE.ERROR)
@@ -877,7 +853,7 @@ def test_meta_get_primary_keys_filters(self):
{"table_name": "CUSTOMERS", "column_name": "ID", "key_sequence": 1, "constraint_name": "PK_CUSTOMERS"},
]
)
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=df))
result = self.handler.meta_get_primary_keys(table_names=["ORDERS"])
@@ -909,12 +885,17 @@ def test_meta_get_foreign_keys_filters(self):
},
]
)
- self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df))
+ self.handler.native_query = MagicMock(return_value=TableResponse(data=df))
result = self.handler.meta_get_foreign_keys(table_names=["ORDERS", "CUSTOMERS"])
self.assertEqual(len(result.data_frame), 1)
self.assertIn("child_table_name", result.data_frame.columns)
+ row = result.data_frame.iloc[0]
+ self.assertEqual(row["parent_table_name"], "ORDERS")
+ self.assertEqual(row["parent_column_name"], "CUSTOMER_ID")
+ self.assertEqual(row["child_table_name"], "CUSTOMERS")
+ self.assertEqual(row["child_column_name"], "ID")
def test_meta_get_foreign_keys_handles_exception(self):
self.handler.native_query = MagicMock(side_effect=Exception("boom"))
@@ -1195,7 +1176,8 @@ def test_types_casting(self):
]
response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ actual_mysql_types = [col.type for col in response.columns]
+ self.assertEqual(actual_mysql_types, excepted_mysql_types)
for column_name in input_data.columns:
result_value = response.data_frame[column_name][0]
self.assertEqual(result_value, input_data[column_name][0])
@@ -1346,7 +1328,8 @@ def test_types_casting(self):
]
response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ actual_mysql_types = [col.type for col in response.columns]
+ self.assertEqual(actual_mysql_types, excepted_mysql_types)
for column_name in input_data.columns:
result_value = response.data_frame[column_name][0]
self.assertEqual(result_value, input_data[column_name][0])
@@ -1380,7 +1363,8 @@ def test_types_casting(self):
excepted_mysql_types = [MYSQL_DATA_TYPE.BOOLEAN]
response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ actual_mysql_types = [col.type for col in response.columns]
+ self.assertEqual(actual_mysql_types, excepted_mysql_types)
for column_name in input_data.columns:
result_value = response.data_frame[column_name][0]
self.assertEqual(result_value, input_data[column_name][0])
@@ -1616,7 +1600,8 @@ def test_types_casting(self):
}
)
response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ actual_mysql_types = [col.type for col in response.columns]
+ self.assertEqual(actual_mysql_types, excepted_mysql_types)
self.assertTrue(response.data_frame.equals(expected_result_df))
# endregion
@@ -1679,7 +1664,8 @@ def test_types_casting(self):
}
)
response = self.handler.native_query(query_str)
- self.assertEqual(response.mysql_types, excepted_mysql_types)
+ actual_mysql_types = [col.type for col in response.columns]
+ self.assertEqual(actual_mysql_types, excepted_mysql_types)
self.assertTrue(response.data_frame.equals(expected_result_df))
# endregion
diff --git a/tests/unit/handlers/test_timescaledb.py b/tests/unit/handlers/test_timescaledb.py
index 32c3efb46de..52cbd771908 100644
--- a/tests/unit/handlers/test_timescaledb.py
+++ b/tests/unit/handlers/test_timescaledb.py
@@ -7,22 +7,19 @@
from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager
from mindsdb.integrations.handlers.timescaledb_handler.timescaledb_handler import TimeScaleDBHandler
-from mindsdb.integrations.libs.response import (
- HandlerResponse as Response
-)
+from mindsdb.integrations.libs.response import DataHandlerResponse as Response
class TestTimescaleHandler(BaseDatabaseHandlerTest, unittest.TestCase):
-
@property
def dummy_connection_data(self):
return OrderedDict(
- host='127.0.0.1',
+ host="127.0.0.1",
port=5432,
- user='example_user',
- schema='public',
- password='example_pass',
- database='example_db'
+ user="example_user",
+ schema="public",
+ password="example_pass",
+ database="example_db",
)
@property
@@ -69,10 +66,10 @@ def get_columns_query(self):
"""
def create_handler(self):
- return TimeScaleDBHandler('timescaledb', connection_data=self.dummy_connection_data)
+ return TimeScaleDBHandler("timescaledb", connection_data=self.dummy_connection_data)
def create_patcher(self):
- return patch('psycopg.connect')
+ return patch("psycopg.connect")
def test_native_query(self):
"""
@@ -99,5 +96,5 @@ def test_native_query(self):
self.assertFalse(data.error_code)
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/handlers/test_web.py b/tests/unit/handlers/test_web.py
index 78c1a9e2617..cb34256e15d 100644
--- a/tests/unit/handlers/test_web.py
+++ b/tests/unit/handlers/test_web.py
@@ -9,22 +9,27 @@
from bs4 import BeautifulSoup
from mindsdb.integrations.libs.api_handler_exceptions import TableAlreadyExists
-from mindsdb.integrations.handlers.web_handler.web_handler import WebHandler
-from mindsdb.integrations.handlers.web_handler.web_handler import CrawlerTable
-from mindsdb.integrations.handlers.web_handler import urlcrawl_helpers as helpers
+try:
+ from mindsdb.integrations.handlers.web_handler.web_handler import WebHandler
+ from mindsdb.integrations.handlers.web_handler.web_handler import CrawlerTable
+ from mindsdb.integrations.handlers.web_handler import urlcrawl_helpers as helpers
-from mindsdb.integrations.utilities.sql_utils import (FilterCondition, FilterOperator)
+ WEB_HANDLER_AVAILABLE = True
+except ImportError:
+ WEB_HANDLER_AVAILABLE = False
+from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
-class TestWebsHandler(unittest.TestCase):
+@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)")
+class TestWebsHandler(unittest.TestCase):
def setUp(self) -> None:
- self.handler = WebHandler(name='test_web_handler')
+ self.handler = WebHandler(name="test_web_handler")
def test_crawler_already_registered(self):
with self.assertRaises(TableAlreadyExists):
- self.handler._register_table('crawler', CrawlerTable)
+ self.handler._register_table("crawler", CrawlerTable)
PDF_CONTENT = (
@@ -43,6 +48,7 @@ def test_crawler_already_registered(self):
MARKDOWN_SAMPLE_1 = "# Heading One \n\n ## Heading Two"
+@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)")
class TestWebHelpers(unittest.TestCase):
@patch("requests.Response")
def test_pdf_to_markdown(self, mock_response) -> None:
@@ -60,14 +66,15 @@ def test_broken_pdf_to_markdown(self, mock_response) -> None:
helpers.pdf_to_markdown(response)
def test_url_validation(self):
- assert helpers.is_valid('https://google.com') is True
- assert helpers.is_valid('google.com') is False
+ assert helpers.is_valid("https://google.com") is True
+ assert helpers.is_valid("google.com") is False
def test_get_readable_text_from_soup(self) -> None:
soup = BeautifulSoup(HTML_SAMPLE_1, "html.parser")
import re
- expected = re.sub(r'\s+', ' ', MARKDOWN_SAMPLE_1).strip()
- actual = re.sub(r'\s+', ' ', helpers.get_readable_text_from_soup(soup)).strip()
+
+ expected = re.sub(r"\s+", " ", MARKDOWN_SAMPLE_1).strip()
+ actual = re.sub(r"\s+", " ", helpers.get_readable_text_from_soup(soup)).strip()
assert expected == actual
@@ -104,12 +111,9 @@ def test_parallel_get_all_website_links(self, mock_executor, mock_get_links):
def html_get(url, **kwargs):
# generate html page with 10 sub-links in the same domain
- if not url.endswith('/'):
- url = url + '/'
- links = [
- f"link {i}\n"
- for i in range(10)
- ]
+ if not url.endswith("/"):
+ url = url + "/"
+ links = [f"link {i}\n" for i in range(10)]
html = f"""
@@ -128,24 +132,23 @@ def html_get(url, **kwargs):
return resp
+@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)")
class TestWebHandler(unittest.TestCase):
-
- @patch('requests.Session.get')
+ @patch("requests.Session.get")
def test_web_cases(self, mock_get):
-
mock_get.side_effect = html_get
crawler_table = CrawlerTable(handler=MagicMock())
# filters
- single_url = FilterCondition('url', FilterOperator.EQUAL, 'https://docs.mindsdb.com/')
- two_urls = FilterCondition('url', FilterOperator.IN, ('https://docs.mindsdb.com/', 'https://docs.python.org/'))
+ single_url = FilterCondition("url", FilterOperator.EQUAL, "https://docs.mindsdb.com/")
+ two_urls = FilterCondition("url", FilterOperator.IN, ("https://docs.mindsdb.com/", "https://docs.python.org/"))
- depth_0 = FilterCondition('crawl_depth', FilterOperator.EQUAL, 0)
- depth_1 = FilterCondition('crawl_depth', FilterOperator.EQUAL, 1)
- depth_2 = FilterCondition('crawl_depth', FilterOperator.EQUAL, 2)
+ depth_0 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 0)
+ depth_1 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 1)
+ depth_2 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 2)
- per_url_2 = FilterCondition('per_url_limit', FilterOperator.EQUAL, 2)
+ per_url_2 = FilterCondition("per_url_limit", FilterOperator.EQUAL, 2)
# ---- single url -----
diff --git a/mindsdb/integrations/handlers/altibase_handler/tests/__init__.py b/tests/unit/integrations/__init__.py
similarity index 100%
rename from mindsdb/integrations/handlers/altibase_handler/tests/__init__.py
rename to tests/unit/integrations/__init__.py
diff --git a/mindsdb/integrations/handlers/aqicn_handler/tests/__init__.py b/tests/unit/integrations/libs/__init__.py
similarity index 100%
rename from mindsdb/integrations/handlers/aqicn_handler/tests/__init__.py
rename to tests/unit/integrations/libs/__init__.py
diff --git a/tests/unit/integrations/libs/test_response.py b/tests/unit/integrations/libs/test_response.py
new file mode 100644
index 00000000000..18aa870d939
--- /dev/null
+++ b/tests/unit/integrations/libs/test_response.py
@@ -0,0 +1,671 @@
+"""Unit tests for response classes in mindsdb.integrations.libs.response module.
+
+This module tests all response types used by handlers:
+- TableResponse: for queries that return data (SELECT, SHOW, etc.)
+- OkResponse: for successful operations without data (CREATE, DROP, etc.)
+- ErrorResponse: for error cases
+- HandlerStatusResponse: for connection status checks
+- normalize_response: for converting legacy HandlerResponse to new types
+- _safe_pandas_concat: memory-safe DataFrame concatenation
+"""
+
+from unittest.mock import patch, MagicMock
+
+import pandas as pd
+import pytest
+
+from mindsdb.integrations.libs.response import (
+ TableResponse,
+ OkResponse,
+ ErrorResponse,
+ HandlerStatusResponse,
+ HandlerResponse,
+ normalize_response,
+ _safe_pandas_concat,
+ RESPONSE_TYPE,
+ DataHandlerResponse,
+)
+from mindsdb.utilities.types.column import Column
+from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
+
+
+def _mock_virtual_memory(available_kb: int):
+ """Create a mock for psutil.virtual_memory() with given available memory in KB."""
+ mock_mem = MagicMock()
+ mock_mem.available = available_kb << 10 # convert KB back to bytes
+ return mock_mem
+
+
+class TestHandlerStatusResponse:
+ """Tests for HandlerStatusResponse class."""
+
+ def test_init_success(self):
+ """Test initialization with success status."""
+ redirect_url = "https://example.com/auth"
+ copy_storage = "s3://bucket/path"
+ response = HandlerStatusResponse(success=True, redirect_url=redirect_url, copy_storage=copy_storage)
+
+ assert response.success is True
+ assert response.error_message is None
+ assert response.redirect_url == redirect_url
+ assert response.copy_storage == copy_storage
+
+ json_data = response.to_json()
+ assert json_data["success"] is True
+ assert json_data["error"] is None
+ assert json_data["redirect_url"] == redirect_url
+ assert json_data["copy_storage"] == copy_storage
+
+ def test_init_failure(self):
+ """Test initialization with failure status."""
+ error_msg = "Connection failed"
+ response = HandlerStatusResponse(success=False, error_message=error_msg)
+
+ assert response.success is False
+ assert response.error_message == error_msg
+ assert response.redirect_url is None
+ assert response.copy_storage is None
+
+ json_data = response.to_json()
+ assert json_data["success"] is False
+ assert json_data["error"] == error_msg
+ assert "redirect_url" not in json_data
+ assert "copy_storage" not in json_data
+
+
+class TestErrorResponse:
+ """Unit tests for ErrorResponse class."""
+
+ def test_init_basic(self):
+ """Test basic initialization."""
+ response = ErrorResponse(error_code=1, error_message="Test error", is_expected_error=True)
+
+ assert response.type == RESPONSE_TYPE.ERROR
+ assert response.resp_type == RESPONSE_TYPE.ERROR
+ assert response.error_code == 1
+ assert response.error_message == "Test error"
+ assert response.is_expected_error is True
+ assert response.exception is None
+ assert isinstance(response, DataHandlerResponse)
+
+ def test_exception_capture(self):
+ """Test that exception is captured from current context."""
+ try:
+ raise ValueError("Test exception")
+ except ValueError:
+ response = ErrorResponse(error_message="Caught exception")
+ assert response.exception is not None
+ assert isinstance(response.exception, ValueError)
+
+
+class TestOkResponse:
+ """Unit tests for OkResponse class."""
+
+ def test_init(self):
+ """Test initialization with affected rows count."""
+ response = OkResponse(affected_rows=5)
+
+ assert response.type == RESPONSE_TYPE.OK
+ assert response.resp_type == RESPONSE_TYPE.OK
+ assert response.affected_rows == 5
+ assert isinstance(response, DataHandlerResponse)
+
+ def test_init_without_affected_rows(self):
+ """Test initialization without affected rows."""
+ response = OkResponse()
+
+ assert response.affected_rows is None
+
+
+class TestTableResponse:
+ """Unit tests for TableResponse class."""
+
+ def test_init_with_data(self):
+ """Test initialization with DataFrame."""
+ df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]})
+ response = TableResponse(data=df)
+
+ assert response.type == RESPONSE_TYPE.TABLE
+ assert response.resp_type == RESPONSE_TYPE.TABLE
+ assert response._fetched is True
+ pd.testing.assert_frame_equal(response._data, df)
+ # 'columns' was not provided as attr, so should be as in df
+ assert [c.name for c in response.columns] == ["id", "name"]
+
+ def test_complex_init_with_generator(self):
+ """Test initialization with data generator."""
+ column1 = Column(name="id", type=MYSQL_DATA_TYPE.INT)
+ column2 = Column(name="name", type=MYSQL_DATA_TYPE.VARCHAR)
+ columns = [column1, column2]
+ df = pd.DataFrame({"id": [0, 1], "name": ["a", "b"]})
+ df1 = pd.DataFrame({"id": [2, 3], "name": ["d", "e"]})
+ df2 = pd.DataFrame({"id": [4, 5], "name": ["f", "g"]})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ response = TableResponse(data=df, data_generator=data_gen(), columns=columns)
+
+ assert response.columns[0] is column1
+ assert response.columns[1] is column2
+ assert response.data_generator is not None
+ pd.testing.assert_frame_equal(response._data, df)
+ assert response._fetched is False
+ pieces = []
+ while isinstance(el := response.fetchmany(), pd.DataFrame):
+ pieces.append(el)
+ pd.testing.assert_frame_equal(pieces[0], df1)
+ pd.testing.assert_frame_equal(pieces[1], df2)
+ pd.testing.assert_frame_equal(response._data, pd.concat([df, df1, df2]))
+ assert response._fetched is True
+ assert response.data_generator is None
+
+ def test_data_frame_property(self):
+ """Test initialization with explicit columns."""
+ columns = [Column(name="id", type=MYSQL_DATA_TYPE.INT), Column(name="name", type=MYSQL_DATA_TYPE.VARCHAR)]
+ df = pd.DataFrame({"id": [0, 1], "name": ["a", "b"]})
+ df1 = pd.DataFrame({"id": [2, 3], "name": ["d", "e"]})
+ df2 = pd.DataFrame({"id": [4, 5], "name": ["f", "g"]})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ response = TableResponse(data=df, data_generator=data_gen(), columns=columns)
+ assert response._fetched is False
+ pd.testing.assert_frame_equal(response._data, df)
+ pd.testing.assert_frame_equal(response.data_frame, pd.concat([df, df1, df2]))
+ assert response._fetched is True
+
+ # should not change result
+ response.fetchall()
+ pd.testing.assert_frame_equal(response.data_frame, pd.concat([df, df1, df2]))
+
+ def test_init_with_affected_rows(self):
+ """Test initialization with affected_rows."""
+ df = pd.DataFrame({"id": [1, 2, 3]})
+ response = TableResponse(data=df, affected_rows=100)
+
+ assert response.affected_rows == 100
+
+ def test_iterate_no_save_no_generator(self):
+ """Test iterate_no_save yields existing data."""
+ df = pd.DataFrame({"id": [1, 2, 3]})
+ # Need to provide a generator (even empty) to avoid TypeError
+ response = TableResponse(data=df, data_generator=iter([]))
+
+ chunks = list(response.iterate_no_save())
+
+ assert len(chunks) == 1
+ pd.testing.assert_frame_equal(chunks[0], df)
+
+ # after `iterate_no_save` result should be invalid
+ with pytest.raises(ValueError):
+ pd.testing.assert_frame_equal(response.data_frame, df)
+
+ def test_iterate_no_save_with_generator(self):
+ """Test iterate_no_save yields all chunks without saving."""
+ df1 = pd.DataFrame({"id": [4, 5]})
+ df2 = pd.DataFrame({"id": [6, 7]})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ df = pd.DataFrame({"id": [1, 2, 3]})
+ response = TableResponse(data=df, data_generator=data_gen())
+ chunks = list(response.iterate_no_save())
+
+ assert len(chunks) == 3
+ pd.testing.assert_frame_equal(chunks[0], df)
+ pd.testing.assert_frame_equal(chunks[1], df1)
+ pd.testing.assert_frame_equal(chunks[2], df2)
+
+ # after `iterate_no_save` result should be invalid
+ with pytest.raises(ValueError):
+ pd.testing.assert_frame_equal(response.data_frame, df)
+
+
+class TestNormalizeResponse:
+ """Unit tests for normalize_response function."""
+
+ def test_normalize_table_response(self):
+ """Test that TableResponse is returned as-is."""
+ original = TableResponse(data=pd.DataFrame({"id": [1, 2]}))
+ result = normalize_response(original)
+
+ assert result is original
+
+ def test_normalize_ok_response(self):
+ """Test that OkResponse is returned as-is."""
+ original = OkResponse(affected_rows=5)
+ result = normalize_response(original)
+
+ assert result is original
+
+ def test_normalize_error_response(self):
+ """Test that ErrorResponse is returned as-is."""
+ original = ErrorResponse(error_message="Test error")
+ result = normalize_response(original)
+
+ assert result is original
+
+ def test_normalize_legacy_error_response(self):
+ """Test conversion of legacy HandlerResponse with ERROR type."""
+ legacy = HandlerResponse(resp_type=RESPONSE_TYPE.ERROR, error_code=1, error_message="Legacy error")
+ result = normalize_response(legacy)
+
+ assert isinstance(result, ErrorResponse)
+ assert result.error_code == 1
+ assert result.error_message == "Legacy error"
+
+ def test_normalize_legacy_ok_response(self):
+ """Test conversion of legacy HandlerResponse with OK type."""
+ legacy = HandlerResponse(resp_type=RESPONSE_TYPE.OK, affected_rows=10)
+ result = normalize_response(legacy)
+
+ assert isinstance(result, OkResponse)
+ assert result.affected_rows == 10
+
+ def test_normalize_legacy_table_response(self):
+ """Test conversion of legacy HandlerResponse with TABLE type."""
+ df = pd.DataFrame({"id": [1, 2], "name": ["a", "b"]})
+ legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df)
+ result = normalize_response(legacy)
+
+ assert isinstance(result, TableResponse)
+ pd.testing.assert_frame_equal(result.data_frame, df)
+
+ def test_normalize_legacy_table_response_with_mysql_types(self):
+ """Test conversion preserves mysql_types as column types."""
+ df = pd.DataFrame({"id": [1, 2], "name": ["a", "b"]})
+ mysql_types = [MYSQL_DATA_TYPE.INT, MYSQL_DATA_TYPE.VARCHAR]
+ legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df, mysql_types=mysql_types)
+ result = normalize_response(legacy)
+
+ assert isinstance(result, TableResponse)
+ assert len(result.columns) == 2
+ assert result.columns[0].type == MYSQL_DATA_TYPE.INT
+ assert result.columns[1].type == MYSQL_DATA_TYPE.VARCHAR
+
+ def test_normalize_legacy_table_response_empty_dataframe(self):
+ """Test conversion with empty DataFrame."""
+ df = pd.DataFrame()
+ legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df)
+ result = normalize_response(legacy)
+
+ assert isinstance(result, TableResponse)
+ assert len(result.columns) == 0
+
+
+class TestSafePandasConcat:
+ """Unit tests for _safe_pandas_concat function."""
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_concat_with_enough_memory(self, mock_psutil):
+ """Test successful concatenation when sufficient memory is available."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ df1 = pd.DataFrame({"id": [1, 2]})
+ df2 = pd.DataFrame({"id": [3, 4]})
+ result = _safe_pandas_concat([df1, df2])
+
+ pd.testing.assert_frame_equal(result, pd.concat([df1, df2]))
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_concat_raises_memory_error_when_not_enough_memory(self, mock_psutil):
+ """Test MemoryError is raised when available memory is too low."""
+ # Set available memory to essentially 0
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=10)
+
+ df1 = pd.DataFrame({"x": list(range(1000))})
+ df2 = pd.DataFrame({"x": list(range(1000))})
+
+ with pytest.raises(MemoryError):
+ _safe_pandas_concat([df1, df2])
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_concat_single_piece(self, mock_psutil):
+ """Test concatenation with a single DataFrame."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ df = pd.DataFrame({"id": [1, 2, 3]})
+ result = _safe_pandas_concat([df])
+
+ pd.testing.assert_frame_equal(result, df)
+
+
+class TestRaiseIfLowMemory:
+ """Unit tests for TableResponse._raise_if_low_memory method."""
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_with_known_affected_rows_enough_memory(self, mock_psutil):
+ """Test no error when affected_rows is known and memory is sufficient."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ response = TableResponse(data=pd.DataFrame({"id": [1, 2]}), affected_rows=100)
+ response._last_data_piece = pd.DataFrame({"id": list(range(10))})
+ response.rows_fetched = 10
+
+ # Should not raise
+ response._raise_if_low_memory()
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_with_known_affected_rows_not_enough_memory(self, mock_psutil):
+ """Test MemoryError when affected_rows is known and memory is insufficient."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1)
+
+ # Use strings to ensure DataFrame memory > 1KB after >> 10
+ large_piece = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]})
+ response = TableResponse(data=pd.DataFrame({"text": ["a"]}), affected_rows=1000)
+ response._last_data_piece = large_piece
+ response.rows_fetched = 100
+
+ with pytest.raises(MemoryError, match="Not enough memory"):
+ response._raise_if_low_memory()
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_with_unknown_affected_rows_enough_memory(self, mock_psutil):
+ """Test no error when affected_rows is None and memory is sufficient."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ response = TableResponse(data=pd.DataFrame({"id": [1, 2]}))
+ response._last_data_piece = pd.DataFrame({"id": list(range(10))})
+
+ # Should not raise
+ response._raise_if_low_memory()
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_with_unknown_affected_rows_not_enough_memory(self, mock_psutil):
+ """Test MemoryError when affected_rows is None and memory is insufficient."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1)
+
+ # Use strings to ensure DataFrame memory > 1KB after >> 10
+ large_piece = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]})
+ response = TableResponse(data=pd.DataFrame({"text": ["a"]}))
+ response._last_data_piece = large_piece
+
+ with pytest.raises(MemoryError, match="Not enough memory"):
+ response._raise_if_low_memory()
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_all_rows_already_fetched(self, mock_psutil):
+ """Test no error when all rows have been fetched (rows_expected = 0)."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=0)
+
+ response = TableResponse(data=pd.DataFrame({"id": [1, 2]}), affected_rows=10)
+ response._last_data_piece = pd.DataFrame({"id": list(range(10))})
+ response.rows_fetched = 10 # all rows fetched
+
+ # rows_expected = min(10 - 10, 10) = 0, should not raise
+ response._raise_if_low_memory()
+
+
+class TestIterateWithMemoryCheck:
+ """Unit tests for TableResponse._iterate_with_memory_check method."""
+
+ def test_none_generator_yields_nothing(self):
+ """Test that no chunks are yielded when data_generator is None."""
+ response = TableResponse(data=pd.DataFrame({"id": [1]}))
+ assert response._data_generator is None
+
+ chunks = list(response._iterate_with_memory_check())
+ assert chunks == []
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_normal_iteration(self, mock_psutil):
+ """Test that all chunks are yielded during normal iteration."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ df1 = pd.DataFrame({"id": [1, 2]})
+ df2 = pd.DataFrame({"id": [3, 4]})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ columns = [Column(name="id")]
+ response = TableResponse(data_generator=data_gen(), columns=columns)
+
+ chunks = list(response._iterate_with_memory_check())
+
+ assert len(chunks) == 2
+ pd.testing.assert_frame_equal(chunks[0], df1)
+ pd.testing.assert_frame_equal(chunks[1], df2)
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_memory_error_stops_iteration_after_first_chunk(self, mock_psutil):
+ """Test that MemoryError is raised after the first chunk when memory runs out.
+
+ The pre-loop _raise_if_low_memory() is a no-op (since _last_data_piece is None),
+ so the first real psutil.virtual_memory() call happens at the post-yield check.
+ """
+ # Use strings to ensure DataFrame memory > 1KB after >> 10
+ df1 = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]})
+ df2 = pd.DataFrame({"text": ["y" * 200 for _ in range(100)]})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ columns = [Column(name="text")]
+ response = TableResponse(data_generator=data_gen(), columns=columns)
+
+ gen = response._iterate_with_memory_check()
+
+ # First chunk succeeds β post-yield check will be the first real psutil call
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1)
+ first = next(gen)
+ pd.testing.assert_frame_equal(first, df1)
+
+ # Resuming the generator triggers _raise_if_low_memory with 0 available memory
+ with pytest.raises(MemoryError):
+ next(gen)
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_updates_last_data_piece_and_rows_fetched(self, mock_psutil):
+ """Test that _last_data_piece and rows_fetched are updated during iteration."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ df1 = pd.DataFrame({"id": [1, 2, 3]})
+ df2 = pd.DataFrame({"id": [4, 5]})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ columns = [Column(name="id")]
+ response = TableResponse(data_generator=data_gen(), columns=columns)
+ assert response.rows_fetched == 0
+
+ list(response._iterate_with_memory_check())
+
+ pd.testing.assert_frame_equal(response._last_data_piece, df2)
+ assert response.rows_fetched == 5
+
+
+class TestTableResponseFetchallEdgeCases:
+ """Additional edge-case tests for TableResponse.fetchall."""
+
+ def test_fetchall_no_generator_returns_existing_data(self):
+ """Test fetchall returns existing data when no generator is set."""
+ df = pd.DataFrame({"id": [1, 2, 3]})
+ response = TableResponse(data=df)
+
+ result = response.fetchall()
+ pd.testing.assert_frame_equal(result, df)
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_fetchall_generator_only_no_initial_data(self, mock_psutil):
+ """Test fetchall with generator but no initial data."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ df1 = pd.DataFrame({"id": [1, 2]})
+ df2 = pd.DataFrame({"id": [3, 4]})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ columns = [Column(name="id")]
+ response = TableResponse(data_generator=data_gen(), columns=columns)
+
+ result = response.fetchall()
+ pd.testing.assert_frame_equal(result, pd.concat([df1, df2]))
+ assert response._fetched is True
+ assert response._data_generator is None
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_fetchall_empty_generator_creates_empty_df(self, mock_psutil):
+ """Test fetchall with empty generator creates DataFrame with column names."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ columns = [Column(name="id"), Column(name="name")]
+ response = TableResponse(data_generator=iter([]), columns=columns)
+
+ result = response.fetchall()
+ assert list(result.columns) == ["id", "name"]
+ assert len(result) == 0
+
+ def test_fetchall_raises_if_invalid(self):
+ """Test fetchall raises ValueError if data was already consumed by iterate_no_save."""
+ df = pd.DataFrame({"id": [1]})
+ response = TableResponse(data=df, data_generator=iter([]))
+ list(response.iterate_no_save())
+
+ with pytest.raises(ValueError, match="Data has already been fetched"):
+ response.fetchall()
+
+
+class TestTableResponseFetchmanyEdgeCases:
+ """Additional edge-case tests for TableResponse.fetchmany."""
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_fetchmany_first_piece_with_no_initial_data(self, mock_psutil):
+ """Test fetchmany sets _data directly when no initial data exists."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ df1 = pd.DataFrame({"id": [1, 2]})
+ columns = [Column(name="id")]
+ response = TableResponse(data_generator=iter([df1]), columns=columns)
+
+ piece = response.fetchmany()
+ pd.testing.assert_frame_equal(piece, df1)
+ pd.testing.assert_frame_equal(response._data, df1)
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_fetchmany_accumulates_data(self, mock_psutil):
+ """Test fetchmany accumulates pieces in _data."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ df = pd.DataFrame({"id": [0]})
+ df1 = pd.DataFrame({"id": [1]})
+ df2 = pd.DataFrame({"id": [2]})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ columns = [Column(name="id")]
+ response = TableResponse(data=df, data_generator=data_gen(), columns=columns)
+
+ response.fetchmany() # df1
+ response.fetchmany() # df2
+
+ pd.testing.assert_frame_equal(response._data, pd.concat([df, df1, df2]))
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_fetchmany_returns_none_when_exhausted(self, mock_psutil):
+ """Test fetchmany returns None and marks response as fetched when generator is empty."""
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+
+ df1 = pd.DataFrame({"id": [1]})
+ columns = [Column(name="id")]
+ response = TableResponse(data_generator=iter([df1]), columns=columns)
+
+ piece1 = response.fetchmany()
+ assert isinstance(piece1, pd.DataFrame)
+
+ piece2 = response.fetchmany()
+ assert piece2 is None
+ assert response._fetched is True
+ assert response._data_generator is None
+
+ def test_fetchmany_raises_if_invalid(self):
+ """Test fetchmany raises ValueError after iterate_no_save."""
+ df = pd.DataFrame({"id": [1]})
+ response = TableResponse(data=df, data_generator=iter([]))
+ list(response.iterate_no_save())
+
+ with pytest.raises(ValueError, match="Data has already been fetched"):
+ response.fetchmany()
+
+
+class TestMemoryErrorPropagation:
+ """Tests for MemoryError propagation through fetchall, fetchmany, and iterate_no_save."""
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_fetchall_raises_memory_error(self, mock_psutil):
+ """Test MemoryError propagates through fetchall."""
+ # Enough memory for first chunk, then out of memory
+ mock_psutil.virtual_memory.side_effect = [
+ _mock_virtual_memory(available_kb=1_000_000), # pre-loop check
+ _mock_virtual_memory(available_kb=0), # post-yield check
+ ]
+
+ df1 = pd.DataFrame({"x": list(range(1000))})
+ df2 = pd.DataFrame({"x": list(range(1000))})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ columns = [Column(name="x")]
+ response = TableResponse(data_generator=data_gen(), columns=columns)
+
+ with pytest.raises(MemoryError):
+ response.fetchall()
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_fetchmany_raises_memory_error(self, mock_psutil):
+ """Test MemoryError propagates through fetchmany on second call."""
+ df1 = pd.DataFrame({"x": list(range(1000))})
+ df2 = pd.DataFrame({"x": list(range(1000))})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ columns = [Column(name="x")]
+ response = TableResponse(data_generator=data_gen(), columns=columns)
+
+ # First fetchmany: enough memory (pre-loop check is no-op since _last_data_piece is None)
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000)
+ response.fetchmany()
+
+ # Second fetchmany: pre-loop check fails because we now have _last_data_piece set
+ mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=0)
+ with pytest.raises(MemoryError):
+ response.fetchmany()
+
+ @patch("mindsdb.integrations.libs.response.psutil")
+ def test_iterate_no_save_raises_memory_error(self, mock_psutil):
+ """Test MemoryError propagates through iterate_no_save."""
+ mock_psutil.virtual_memory.side_effect = [
+ _mock_virtual_memory(available_kb=1_000_000), # pre-loop check
+ _mock_virtual_memory(available_kb=0), # post-yield check after first chunk
+ ]
+
+ df1 = pd.DataFrame({"x": list(range(1000))})
+ df2 = pd.DataFrame({"x": list(range(1000))})
+
+ def data_gen():
+ yield df1
+ yield df2
+
+ columns = [Column(name="x")]
+ response = TableResponse(data_generator=data_gen(), columns=columns)
+
+ with pytest.raises(MemoryError):
+ list(response.iterate_no_save())
diff --git a/tests/unit/interfaces/agents/test_generic_api_key.py b/tests/unit/interfaces/agents/test_generic_api_key.py
index 8198b763a08..3473aa05c70 100644
--- a/tests/unit/interfaces/agents/test_generic_api_key.py
+++ b/tests/unit/interfaces/agents/test_generic_api_key.py
@@ -1,9 +1,8 @@
import os
import unittest
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
from mindsdb.integrations.utilities.handler_utils import get_api_key
-from mindsdb.interfaces.agents.agents_controller import AgentsController
class TestGenericApiKeyHandling(unittest.TestCase):
@@ -71,100 +70,6 @@ def test_get_generic_api_key_for_google_provider(self):
)
self.assertEqual(api_key, "test-specific-google-api-key")
- @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.check_model_provider")
- @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.get_agent")
- @patch("mindsdb.interfaces.agents.agents_controller.ProjectController")
- @patch("mindsdb.interfaces.storage.db.session")
- def test_add_agent_with_generic_api_key(
- self, mock_session, mock_project_controller, mock_get_agent, mock_check_model_provider
- ):
- """Test adding an agent with a generic API key in params."""
- # Mock project controller
- mock_project = MagicMock()
- mock_project_controller.return_value.get.return_value = mock_project
-
- # Mock get_agent to return None (agent doesn't exist yet)
- mock_get_agent.return_value = None
-
- # Mock check_model_provider to return a provider
- mock_check_model_provider.return_value = (None, "openai")
-
- # Create an instance of AgentsController
- agent_controller = AgentsController()
-
- # Test adding an agent with a generic API key in params
- params = {"api_key": "test-generic-agent-api-key", "other_param": "value"}
-
- # Create a mock agent with proper params
- mock_agent = MagicMock()
- mock_agent.params = params.copy() # Set params directly
-
- # Mock db.Agents to return our prepared mock agent
- with patch("mindsdb.interfaces.storage.db.Agents", return_value=mock_agent):
- # Add the agent
- agent = agent_controller.add_agent(
- name="test_agent",
- project_name="mindsdb",
- model_name="gpt-4",
- provider="openai",
- params=params,
- )
-
- # Verify that the generic API key was preserved in the params
- self.assertEqual(agent.params["api_key"], "test-generic-agent-api-key")
-
- @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.check_model_provider")
- @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.get_agent")
- @patch("mindsdb.interfaces.agents.agents_controller.ProjectController")
- @patch("mindsdb.interfaces.storage.db.session")
- def test_add_agent_with_both_api_keys(
- self, mock_session, mock_project_controller, mock_get_agent, mock_check_model_provider
- ):
- """Test adding an agent with both generic and provider-specific API keys."""
- # Mock project controller
- mock_project = MagicMock()
- mock_project_controller.return_value.get.return_value = mock_project
-
- # Mock get_agent to return None (agent doesn't exist yet)
- mock_get_agent.return_value = None
-
- # Mock check_model_provider to return a provider
- mock_check_model_provider.return_value = (None, "openai")
-
- # Create an instance of AgentsController
- agent_controller = AgentsController()
-
- # Test adding an agent with both generic and provider-specific API keys
- params = {
- "api_key": "test-generic-agent-api-key",
- "openai_api_key": "test-specific-agent-api-key",
- "other_param": "value",
- }
-
- # Create a mock agent with proper params
- mock_agent = MagicMock()
- mock_agent.params = params.copy() # Set params directly
-
- # Mock db.Agents to return our prepared mock agent
- with patch("mindsdb.interfaces.storage.db.Agents", return_value=mock_agent):
- # Add the agent
- agent = agent_controller.add_agent(
- name="test_agent",
- project_name="mindsdb",
- model_name="gpt-4",
- provider="openai",
- params=params,
- )
-
- # Verify that both API keys were preserved in the params
- self.assertEqual(agent.params["api_key"], "test-generic-agent-api-key")
- self.assertEqual(agent.params["openai_api_key"], "test-specific-agent-api-key")
-
- # Test that get_api_key returns the provider-specific key when both are present
- api_key = get_api_key("openai", {"params": params})
-
- self.assertEqual(api_key, "test-specific-agent-api-key")
-
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py b/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py
new file mode 100644
index 00000000000..6543ef28f4a
--- /dev/null
+++ b/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py
@@ -0,0 +1,79 @@
+import os
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from mindsdb.interfaces.knowledge_base.controller import KnowledgeBaseController
+from mindsdb.interfaces.knowledge_base.default_storage_resolver import resolve_default_storage_engines
+from mindsdb.utilities.config import config
+
+
+def _make_controller(handler_meta_by_name):
+ integration_controller = MagicMock()
+ integration_controller.get_handler_meta.side_effect = lambda name: handler_meta_by_name.get(name)
+ integration_controller.get.return_value = None
+
+ session = SimpleNamespace(integration_controller=integration_controller)
+ return KnowledgeBaseController(session), integration_controller
+
+
+def test_resolve_default_vector_storage_uses_pgvector_from_config():
+ previous_storage = config["knowledge_bases"].get("storage", None)
+ controller, _ = _make_controller({"pgvector": {"import": {"success": True}}})
+
+ try:
+ config.update({"knowledge_bases": {"storage": "pgvector"}})
+ vector_db_name = "kb_pgvector_store"
+ controller._create_persistent_pgvector = MagicMock(return_value=vector_db_name)
+
+ vector_db, vector_table = controller._resolve_default_vector_storage("kb_docs")
+
+ assert vector_db == vector_db_name
+ assert vector_table == "kb_docs"
+ controller._create_persistent_pgvector.assert_called_once_with({})
+ finally:
+ config.update({"knowledge_bases": {"storage": previous_storage}})
+
+
+def test_resolve_default_vector_storage_uses_faiss_from_config():
+ previous_storage = config["knowledge_bases"].get("storage", None)
+ controller, _ = _make_controller({"duckdb_faiss": {"import": {"success": True}}})
+
+ try:
+ config.update({"knowledge_bases": {"storage": "faiss"}})
+
+ vector_db_name = "store_kb_docs"
+ controller._create_persistent_faiss = MagicMock(return_value=vector_db_name)
+
+ vector_db, vector_table = controller._resolve_default_vector_storage("kb_docs")
+
+ assert vector_db == vector_db_name
+ assert vector_table == "kb_docs"
+ controller._create_persistent_faiss.assert_called_once_with("kb_docs")
+ finally:
+ config.update({"knowledge_bases": {"storage": previous_storage}})
+
+
+def test_create_persistent_pgvector_reuses_existing_store():
+ controller, integration_controller = _make_controller({})
+ integration_controller.get.return_value = {"name": "kb_pgvector_store"}
+
+ vector_store_name = controller._create_persistent_pgvector({"is_sparse": True, "vector_size": 30522})
+
+ assert vector_store_name == "kb_pgvector_store"
+ integration_controller.add.assert_not_called()
+
+
+def test_resolver_uses_pgvector_url_fallback_when_storage_is_empty():
+ previous_storage = config["knowledge_bases"].get("storage", None)
+ controller, _ = _make_controller({})
+
+ try:
+ config.update({"knowledge_bases": {"storage": None}})
+ with patch.dict(os.environ, {"KB_PGVECTOR_URL": "postgresql://user:pass@host/db"}, clear=False):
+ resolved = resolve_default_storage_engines(config)
+ assert resolved["default_storage"] == "pgvector"
+ assert resolved["available_vector_engines"] == ["faiss", "pgvector"]
+ assert resolved["pgvector_enabled"] is True
+ finally:
+ config.update({"knowledge_bases": {"storage": previous_storage}})
diff --git a/tests/unit/interfaces/test_get_handler_meta.py b/tests/unit/interfaces/test_get_handler_meta.py
new file mode 100644
index 00000000000..70f01fd6712
--- /dev/null
+++ b/tests/unit/interfaces/test_get_handler_meta.py
@@ -0,0 +1,235 @@
+"""
+Unit tests for IntegrationController.get_handler_meta() focusing on the
+handler_folder=None crash fix for community handler stubs.
+
+Covered scenarios:
+ 1. Community stub (path=None), no handler_folder passed β folder derived from stub metadata.
+ 2. Community stub (path=None), explicit handler_folder passed β explicit folder used as-is.
+ 3. Non-community (built-in) handler with path set β fetch path never triggered.
+ 4. Community stub whose "import.folder" is also None (malformed entry) β graceful None return.
+"""
+
+import threading
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+
+def _make_controller():
+ """
+ Return an IntegrationController instance with _load_handler_modules skipped
+ so no real filesystem / network access happens during construction.
+ """
+ from mindsdb.interfaces.database.integrations import IntegrationController
+
+ with patch.object(IntegrationController, "_load_handler_modules"):
+ ctrl = IntegrationController()
+
+ # Minimal attributes that other methods rely on.
+ ctrl.handler_modules = {}
+ ctrl.handlers_import_status = {}
+ ctrl.handlers_cache = MagicMock()
+ ctrl._import_lock = threading.Lock()
+ ctrl._community_handlers_dir = None
+ return ctrl
+
+
+def _community_stub(handler_name: str, folder: str | None = None):
+ """Build a community handler stub as created by _load_handler_modules."""
+ from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL
+
+ return {
+ "path": None,
+ "import": {
+ "success": None,
+ "error_message": None,
+ "folder": folder if folder is not None else f"{handler_name}_handler",
+ "dependencies": [],
+ },
+ "name": handler_name,
+ "title": handler_name.capitalize(),
+ "description": "",
+ "permanent": False,
+ "connection_args": None,
+ "class_type": None,
+ "type": None,
+ "support_level": HANDLER_SUPPORT_LEVEL.COMMUNITY,
+ }
+
+
+def _builtin_stub(handler_name: str, handler_path: Path):
+ """Build a built-in handler stub as created by _register_handler_dir."""
+ return {
+ "path": handler_path,
+ "import": {
+ "success": True,
+ "error_message": None,
+ "folder": handler_path.name,
+ "dependencies": [],
+ },
+ "name": handler_name,
+ "permanent": False,
+ "connection_args": None,
+ "class_type": None,
+ "type": None,
+ "support_level": None,
+ "community": False,
+ }
+
+
+class TestGetHandlerMetaCommunityFolderFallback(unittest.TestCase):
+ """get_handler_meta() derives handler_folder from stub metadata when None."""
+
+ def setUp(self):
+ self.ctrl = _make_controller()
+
+ def test_community_stub_folder_derived_from_metadata(self):
+ """
+ When handler_folder is not supplied, get_handler_meta() must read
+ "import.folder" from the stub and pass it to _fetch_community_handler.
+ """
+ stub = _community_stub("github", folder="github_handler")
+ self.ctrl.handlers_import_status["github"] = stub
+
+ fetched_meta = {**stub, "path": Path("/tmp/github_handler")}
+ fetched_meta["import"] = {**stub["import"], "success": True}
+
+ with patch.object(self.ctrl, "_fetch_community_handler", return_value=fetched_meta) as mock_fetch:
+ result = self.ctrl.get_handler_meta("github") # no handler_folder
+
+ mock_fetch.assert_called_once_with("github", "github_handler")
+ self.assertIsNotNone(result)
+
+ def test_community_stub_explicit_folder_not_overridden(self):
+ """
+ When handler_folder is explicitly provided, it must be forwarded as-is
+ and the stub metadata must not override it.
+ """
+ stub = _community_stub("github", folder="github_handler")
+ self.ctrl.handlers_import_status["github"] = stub
+
+ fetched_meta = {**stub, "path": Path("/tmp/custom_dir")}
+ fetched_meta["import"] = {**stub["import"], "success": True}
+
+ with patch.object(self.ctrl, "_fetch_community_handler", return_value=fetched_meta) as mock_fetch:
+ result = self.ctrl.get_handler_meta("github", handler_folder="custom_dir")
+
+ mock_fetch.assert_called_once_with("github", "custom_dir")
+ self.assertIsNotNone(result)
+
+ def test_builtin_handler_fetch_path_not_triggered(self):
+ """
+ A built-in handler with a real path must not trigger the community fetch
+ path regardless of the handler_folder argument.
+ """
+ stub = _builtin_stub("mysql", Path("/opt/mindsdb/handlers/mysql_handler"))
+ self.ctrl.handlers_import_status["mysql"] = stub
+
+ with (
+ patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch,
+ patch.object(self.ctrl, "import_handler", return_value=stub),
+ ):
+ result = self.ctrl.get_handler_meta("mysql")
+
+ mock_fetch.assert_not_called()
+ self.assertIsNotNone(result)
+
+ def test_community_stub_missing_folder_returns_none_gracefully(self):
+ """
+ If the stub's "import.folder" is also None (malformed index entry),
+ the guard in get_handler_meta() must return None immediately β before
+ _fetch_community_handler is ever called β to avoid a TypeError from
+ fetch_handler(None, storage_dir).
+ """
+ stub = _community_stub("broken")
+ stub["import"]["folder"] = None # simulate malformed entry
+ self.ctrl.handlers_import_status["broken"] = stub
+
+ with patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch:
+ result = self.ctrl.get_handler_meta("broken") # no handler_folder
+
+ mock_fetch.assert_not_called() # guard exits before reaching _fetch_community_handler
+ self.assertIsNone(result)
+
+ def test_unknown_handler_returns_none(self):
+ """get_handler_meta() for a completely unknown handler name returns None."""
+ result = self.ctrl.get_handler_meta("does_not_exist")
+ self.assertIsNone(result)
+
+
+class TestGetHandlersImportStatus(unittest.TestCase):
+ """get_handlers_import_status() must not fetch/import community stubs."""
+
+ def setUp(self):
+ self.ctrl = _make_controller()
+
+ def test_community_stub_not_fetched_during_listing(self):
+ """
+ Community stubs (support_level="community", path=None) must not trigger
+ _fetch_community_handler() or import_handler() during listing.
+ """
+ stub = _community_stub("github", folder="github_handler")
+ self.ctrl.handlers_import_status["github"] = stub
+
+ with (
+ patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch,
+ patch.object(self.ctrl, "import_handler") as mock_import,
+ ):
+ self.ctrl.get_handlers_import_status()
+
+ mock_fetch.assert_not_called()
+ mock_import.assert_not_called()
+
+ def test_community_stub_metadata_returned_in_listing(self):
+ """
+ Stub metadata must be present in the result so the UI can render the
+ handler entry without a fetch having occurred.
+ """
+ stub = _community_stub("github", folder="github_handler")
+ self.ctrl.handlers_import_status["github"] = stub
+
+ with (
+ patch.object(self.ctrl, "_fetch_community_handler"),
+ patch.object(self.ctrl, "import_handler"),
+ ):
+ result = self.ctrl.get_handlers_import_status()
+
+ self.assertIn("github", result)
+ meta = result["github"]
+ self.assertEqual(meta["name"], "github")
+ self.assertEqual(meta["support_level"], "community")
+ self.assertIsNone(meta["path"])
+ self.assertIsNotNone(meta["import"])
+
+ def test_non_community_handler_uses_get_handler_meta(self):
+ """
+ Built-in handlers (path != None) must still go through get_handler_meta()
+ so that lazy import is triggered if needed.
+ """
+ stub = _builtin_stub("mysql", Path("/opt/mindsdb/handlers/mysql_handler"))
+ self.ctrl.handlers_import_status["mysql"] = stub
+
+ with patch.object(self.ctrl, "get_handler_meta", return_value=stub) as mock_meta:
+ self.ctrl.get_handlers_import_status()
+
+ mock_meta.assert_called_once_with("mysql", stub["import"]["folder"])
+
+ def test_fetched_community_handler_uses_get_handler_meta(self):
+ """
+ A community handler that has already been fetched (path != None) must
+ also go through get_handler_meta() β the early-return guard only applies
+ when path is None.
+ """
+ stub = _community_stub("github", folder="github_handler")
+ stub["path"] = Path("/tmp/community_handlers/github_handler")
+ stub["import"]["success"] = True
+ self.ctrl.handlers_import_status["github"] = stub
+
+ with patch.object(self.ctrl, "get_handler_meta", return_value=stub) as mock_meta:
+ self.ctrl.get_handlers_import_status()
+
+ mock_meta.assert_called_once_with("github", "github_handler")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/unit/planner/test_join_tables.py b/tests/unit/planner/test_join_tables.py
index 24cef73b8fa..7bd8a463d7a 100644
--- a/tests/unit/planner/test_join_tables.py
+++ b/tests/unit/planner/test_join_tables.py
@@ -11,6 +11,7 @@
Star,
BinaryOperation,
Function,
+ Parameter,
)
from mindsdb_sql_parser.utils import JoinType
@@ -319,43 +320,71 @@ def test_join_tables_plan_limit_offset(self):
def test_join_tables_plan_order_by(self):
query = parse_sql("""
+ WITH tab2 AS (
+ SELECT * FROM int2.tab2 limit 100
+ ),
+ categories as (
+ SELECT * FROM int3.cats
+ )
SELECT
tab1.column1, tab2.column1, tab2.column2
- FROM int.tab1 INNER
- JOIN int2.tab2 ON tab1.column1 > tab2.column1
+ FROM int.tab1 tab1
+ INNER JOIN tab2 ON tab1.column1 > tab2.column1
+ WHERE tab2.category_id = (SELECT id FROM categories WHERE name='book')
ORDER BY tab1.column1
LIMIT 10
""")
subquery = copy.deepcopy(query)
+ subquery.cte = None
subquery.from_table = None
subquery.offset = None
+ subquery.where.args[1] = Parameter(Result(2))
- plan = plan_query(query, integrations=["int", "int2"])
+ plan = plan_query(query, integrations=["int", "int2", "int3"], default_namespace="mindsdb")
expected_plan = QueryPlan(
integrations=["int"],
steps=[
- FetchDataframeStepPartition(
+ FetchDataframeStep(
step_num=0,
+ integration="int2",
+ query=parse_sql("select * from tab2 limit 100"),
+ ),
+ FetchDataframeStep(
+ step_num=1,
+ integration="int3",
+ query=parse_sql("select * from cats"),
+ ),
+ SubSelectStep(
+ step_num=2,
+ query=Select(
+ targets=[Identifier("id")],
+ where=BinaryOperation(op="=", args=[Identifier("name"), Constant("book")]),
+ ),
+ dataframe=Result(1),
+ table_name="categories",
+ ),
+ FetchDataframeStepPartition(
+ step_num=3,
integration="int",
- query=parse_sql("select column1 AS column1 from tab1 order by column1"),
+ query=parse_sql("select column1 AS column1 from tab1 AS tab1 order by column1"),
condition={"limit": 10},
steps=[
- FetchDataframeStep(
- step_num=1,
- integration="int2",
+ SubSelectStep(
+ step_num=4,
+ dataframe=Result(0),
query=Select(
targets=[
- Identifier("column1", alias=Identifier("column1")),
- Identifier("column2", alias=Identifier("column2")),
+ Star(),
], # Column pruning
- from_table=Identifier("tab2"),
+ where=BinaryOperation(op="=", args=[Identifier("category_id"), Parameter(Result(2))]),
),
+ table_name="tab2",
),
JoinStep(
- step_num=2,
- left=Result(0),
- right=Result(1),
+ step_num=5,
+ left=Result(3),
+ right=Result(4),
query=Join(
left=Identifier("tab1"),
right=Identifier("tab2"),
@@ -367,7 +396,7 @@ def test_join_tables_plan_order_by(self):
),
],
),
- QueryStep(subquery, from_table=Result(0), strict_where=False),
+ QueryStep(subquery, from_table=Result(3), strict_where=False),
],
)
diff --git a/tests/unit/planner/test_select_from_predictor.py b/tests/unit/planner/test_select_from_predictor.py
index 38a1e65f1ff..85ccf4af365 100644
--- a/tests/unit/planner/test_select_from_predictor.py
+++ b/tests/unit/planner/test_select_from_predictor.py
@@ -1,14 +1,17 @@
import pytest
from mindsdb_sql_parser import parse_sql
-from mindsdb_sql_parser.ast import (Identifier, Select, Constant, Star, Parameter, BinaryOperation)
+from mindsdb_sql_parser.ast import Identifier, Select, Constant, Star, Parameter, BinaryOperation
from mindsdb.api.executor.planner.exceptions import PlanningException
from mindsdb.api.executor.planner import plan_query
from mindsdb.api.executor.planner.query_plan import QueryPlan
from mindsdb.api.executor.planner.step_result import Result
from mindsdb.api.executor.planner.steps import (
- ProjectStep, ApplyPredictorRowStep, GetPredictorColumns, FetchDataframeStep
+ ProjectStep,
+ ApplyPredictorRowStep,
+ GetPredictorColumns,
+ FetchDataframeStep,
)
@@ -16,347 +19,334 @@ class TestPlanSelectFromPredictor:
def test_select_from_predictor_plan(self):
query = Select(
targets=[Star()],
- from_table=Identifier('mindsdb.pred'),
+ from_table=Identifier("mindsdb.pred"),
where=BinaryOperation(
- op='and',
- args=[BinaryOperation(op='=', args=[Identifier('x1'), Constant(1)]),
- BinaryOperation(op='=', args=[Identifier('x2'), Constant('2')])],
- )
+ op="and",
+ args=[
+ BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]),
+ BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]),
+ ],
+ ),
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
- ApplyPredictorRowStep(
- namespace='mindsdb', predictor=Identifier('pred'),
- row_dict={'x1': 1, 'x2': '2'}
- ),
+ ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}),
],
-
)
- plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
assert plan.steps == expected_plan.steps
def test_select_from_predictor_negative_constant(self):
query = parse_sql(
- '''
+ """
select * from mindsdb.pred
where x1 = -1
- '''
+ """
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
- ApplyPredictorRowStep(namespace='mindsdb', predictor=Identifier('pred'), row_dict={'x1': -1, }),
+ ApplyPredictorRowStep(
+ namespace="mindsdb",
+ predictor=Identifier("pred"),
+ row_dict={
+ "x1": -1,
+ },
+ ),
],
)
- plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
assert plan.steps == expected_plan.steps
def test_select_from_predictor_plan_other_ml(self):
query = parse_sql(
- '''
+ """
select * from mlflow.pred
where x1 = 1 and x2 = '2'
- '''
+ """
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
- ApplyPredictorRowStep(
- namespace='mlflow', predictor=Identifier('pred'),
- row_dict={'x1': 1, 'x2': '2'}
- ),
+ ApplyPredictorRowStep(namespace="mlflow", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}),
],
-
)
- plan = plan_query(query, predictor_metadata=[{'name': 'pred', 'integration_name': 'mlflow'}])
+ plan = plan_query(query, predictor_metadata=[{"name": "pred", "integration_name": "mlflow"}])
assert plan.steps == expected_plan.steps
def test_select_from_predictor_aliases_in_project(self):
query = Select(
- targets=[Identifier('tb.x1', alias=Identifier('col1')),
- Identifier('tb.x2', alias=Identifier('col2')),
- Identifier('tb.y', alias=Identifier('predicted'))],
- from_table=Identifier('mindsdb.pred', alias=Identifier('tb')),
+ targets=[
+ Identifier("tb.x1", alias=Identifier("col1")),
+ Identifier("tb.x2", alias=Identifier("col2")),
+ Identifier("tb.y", alias=Identifier("predicted")),
+ ],
+ from_table=Identifier("mindsdb.pred", alias=Identifier("tb")),
where=BinaryOperation(
- op='and',
+ op="and",
args=[
- BinaryOperation(op='=', args=[Identifier('tb.x1'), Constant(1)]),
- BinaryOperation(op='=', args=[Identifier('tb.x2'), Constant('2')]),
+ BinaryOperation(op="=", args=[Identifier("tb.x1"), Constant(1)]),
+ BinaryOperation(op="=", args=[Identifier("tb.x2"), Constant("2")]),
],
- )
+ ),
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
ApplyPredictorRowStep(
- namespace='mindsdb',
- predictor=Identifier('pred', alias=Identifier('tb')),
- row_dict={'x1': 1, 'x2': '2'}
+ namespace="mindsdb",
+ predictor=Identifier("pred", alias=Identifier("tb")),
+ row_dict={"x1": 1, "x2": "2"},
),
ProjectStep(
dataframe=Result(0),
- columns=[Identifier('tb.x1', alias=Identifier('col1')),
- Identifier('tb.x2', alias=Identifier('col2')),
- Identifier('tb.y', alias=Identifier('predicted'))]
+ columns=[
+ Identifier("tb.x1", alias=Identifier("col1")),
+ Identifier("tb.x2", alias=Identifier("col2")),
+ Identifier("tb.y", alias=Identifier("predicted")),
+ ],
),
],
-
)
- plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
assert plan.steps == expected_plan.steps
def test_select_from_predictor_plan_predictor_alias(self):
query = Select(
targets=[Star()],
- from_table=Identifier('mindsdb.pred', alias=Identifier('pred_alias')),
+ from_table=Identifier("mindsdb.pred", alias=Identifier("pred_alias")),
where=BinaryOperation(
- op='and',
+ op="and",
args=[
- BinaryOperation(op='=', args=[Identifier('pred_alias.x1'), Constant(1)]),
- BinaryOperation(
- op='=',
- args=[Identifier('pred_alias.x2'), Constant('2')]
- )
+ BinaryOperation(op="=", args=[Identifier("pred_alias.x1"), Constant(1)]),
+ BinaryOperation(op="=", args=[Identifier("pred_alias.x2"), Constant("2")]),
],
- )
+ ),
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
ApplyPredictorRowStep(
- namespace='mindsdb', predictor=Identifier('pred', alias=Identifier('pred_alias')),
- row_dict={'x1': 1, 'x2': '2'}
+ namespace="mindsdb",
+ predictor=Identifier("pred", alias=Identifier("pred_alias")),
+ row_dict={"x1": 1, "x2": "2"},
),
],
)
- plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
assert plan.steps == expected_plan.steps
def test_select_from_predictor_plan_verbose_col_names(self):
query = Select(
targets=[Star()],
- from_table=Identifier('mindsdb.pred'),
+ from_table=Identifier("mindsdb.pred"),
where=BinaryOperation(
- op='and',
- args=[BinaryOperation(op='=', args=[Identifier('pred.x1'), Constant(1)]),
- BinaryOperation(op='=', args=[Identifier('pred.x2'), Constant('2')])],
- )
+ op="and",
+ args=[
+ BinaryOperation(op="=", args=[Identifier("pred.x1"), Constant(1)]),
+ BinaryOperation(op="=", args=[Identifier("pred.x2"), Constant("2")]),
+ ],
+ ),
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
- ApplyPredictorRowStep(
- namespace='mindsdb', predictor=Identifier('pred'),
- row_dict={'x1': 1, 'x2': '2'}
- ),
+ ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}),
ProjectStep(dataframe=Result(0), columns=[Star()]),
],
)
- plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
for i in range(len(plan.steps)):
assert plan.steps[i] == expected_plan.steps[i]
def test_select_from_predictor_plan_group_by_error(self):
query = Select(
- targets=[Identifier('x1'), Identifier('x2'), Identifier('pred.y')],
- from_table=Identifier('mindsdb.pred'),
- group_by=[Identifier('x1')]
+ targets=[Identifier("x1"), Identifier("x2"), Identifier("pred.y")],
+ from_table=Identifier("mindsdb.pred"),
+ group_by=[Identifier("x1")],
)
with pytest.raises(PlanningException):
- plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
def test_select_from_predictor_wrong_where_op_error(self):
query = Select(
targets=[Star()],
- from_table=Identifier('mindsdb.pred'),
+ from_table=Identifier("mindsdb.pred"),
where=BinaryOperation(
- op='and',
- args=[BinaryOperation(op='>', args=[Identifier('x1'), Constant(1)]),
- BinaryOperation(op='=', args=[Identifier('x2'), Constant('2')])],
- )
+ op="and",
+ args=[
+ BinaryOperation(op=">", args=[Identifier("x1"), Constant(1)]),
+ BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]),
+ ],
+ ),
)
with pytest.raises(PlanningException):
- plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
def test_select_from_predictor_multiple_values_error(self):
query = Select(
targets=[Star()],
- from_table=Identifier('mindsdb.pred'),
+ from_table=Identifier("mindsdb.pred"),
where=BinaryOperation(
- op='and',
- args=[BinaryOperation(op='=', args=[Identifier('x1'), Constant(1)]),
- BinaryOperation(op='=', args=[Identifier('x1'), Constant('2')])],
- )
+ op="and",
+ args=[
+ BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]),
+ BinaryOperation(op="=", args=[Identifier("x1"), Constant("2")]),
+ ],
+ ),
)
with pytest.raises(PlanningException):
- plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
def test_select_from_predictor_no_where_error(self):
- query = Select(
- targets=[Star()],
- from_table=Identifier('mindsdb.pred')
- )
+ query = Select(targets=[Star()], from_table=Identifier("mindsdb.pred"))
with pytest.raises(PlanningException):
- plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}})
+ plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}})
def test_select_from_predictor_default_namespace(self):
query = Select(
targets=[Star()],
- from_table=Identifier('pred'),
+ from_table=Identifier("pred"),
where=BinaryOperation(
- op='and',
- args=[BinaryOperation(op='=', args=[Identifier('x1'), Constant(1)]),
- BinaryOperation(op='=', args=[Identifier('x2'), Constant('2')])],
- )
+ op="and",
+ args=[
+ BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]),
+ BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]),
+ ],
+ ),
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
- default_namespace='mindsdb',
+ predictor_namespace="mindsdb",
+ default_namespace="mindsdb",
steps=[
- ApplyPredictorRowStep(
- namespace='mindsdb', predictor=Identifier('pred'),
- row_dict={'x1': 1, 'x2': '2'}
- ),
+ ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}),
],
)
plan = plan_query(
- query, predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}}
+ query, predictor_namespace="mindsdb", default_namespace="mindsdb", predictor_metadata={"pred": {}}
)
assert plan.steps == expected_plan.steps
def test_select_from_predictor_get_columns(self):
- sql = 'SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0'
+ sql = "SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0"
query = parse_sql(sql)
expected_query = Select(
- targets=[Identifier('GDP_per_capita_USD')],
- from_table=Identifier('hdi_predictor_external'),
- where=BinaryOperation(
- op="=",
- args=[Constant(1), Constant(0)]
- )
+ targets=[Identifier("GDP_per_capita_USD")],
+ from_table=Identifier("hdi_predictor_external"),
+ where=BinaryOperation(op="=", args=[Constant(1), Constant(0)]),
)
assert query.to_tree() == expected_query.to_tree()
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
- default_namespace='mindsdb',
+ predictor_namespace="mindsdb",
+ default_namespace="mindsdb",
steps=[
- GetPredictorColumns(
- namespace='mindsdb',
- predictor=Identifier('hdi_predictor_external')
- ),
- ProjectStep(dataframe=Result(0), columns=[Identifier('GDP_per_capita_USD')]),
+ GetPredictorColumns(namespace="mindsdb", predictor=Identifier("hdi_predictor_external")),
+ ProjectStep(dataframe=Result(0), columns=[Identifier("GDP_per_capita_USD")]),
],
)
plan = plan_query(
- query, predictor_namespace='mindsdb', default_namespace='mindsdb',
- predictor_metadata={'hdi_predictor_external': {}}
+ query,
+ predictor_namespace="mindsdb",
+ default_namespace="mindsdb",
+ predictor_metadata={"hdi_predictor_external": {}},
)
assert plan.steps == expected_plan.steps
def test_using_predictor_version(self):
query = parse_sql(
- '''
+ """
select * from mindsdb.pred.21
where x1 = 1
- '''
+ """
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
ApplyPredictorRowStep(
- namespace='mindsdb', predictor=Identifier(parts=['pred', '21']),
- row_dict={'x1': 1}
+ namespace="mindsdb", predictor=Identifier(parts=["pred", "21"]), row_dict={"x1": 1}
)
],
)
- plan = plan_query(query, predictor_metadata=[{'name': 'pred', 'integration_name': 'mindsdb'}])
+ plan = plan_query(query, predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}])
assert plan.steps == expected_plan.steps
def test_select_from_predictor_subselect(self):
query = parse_sql(
- '''
+ """
select * from mindsdb.pred.21
where x1 = (select id from int1.t1)
- '''
+ """
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
FetchDataframeStep(
- integration='int1',
- query=parse_sql('select id as id from t1'),
+ integration="int1",
+ query=parse_sql("select id as id from t1"),
),
ApplyPredictorRowStep(
- namespace='mindsdb',
- predictor=Identifier(parts=['pred', '21']),
- row_dict={'x1': Parameter(Result(0))}
- )
+ namespace="mindsdb",
+ predictor=Identifier(parts=["pred", "21"]),
+ row_dict={"x1": Parameter(Result(0))},
+ ),
],
)
plan = plan_query(
- query,
- integrations=['int1'],
- predictor_metadata=[{'name': 'pred', 'integration_name': 'mindsdb'}]
+ query, integrations=["int1"], predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}]
)
assert plan.steps == expected_plan.steps
def test_select_from_view_subselect(self):
query = parse_sql(
- '''
+ """
select * from v1
where x1 in (select id from int1.tab1)
- '''
+ """
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
FetchDataframeStep(
- integration='int1',
- query=parse_sql('select id as id from tab1'),
+ integration="int1",
+ query=parse_sql("select id as id from tab1"),
),
FetchDataframeStep(
- integration='mindsdb',
+ integration="mindsdb",
query=Select(
targets=[Star()],
- from_table=Identifier('v1'),
- where=BinaryOperation(
- op='in',
- args=[
- Identifier(parts=['x1']),
- Parameter(Result(0))
- ]
- )
+ from_table=Identifier("v1"),
+ where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(0))]),
),
),
],
@@ -364,81 +354,66 @@ def test_select_from_view_subselect(self):
plan = plan_query(
query,
- integrations=['int1'],
- default_namespace='mindsdb',
- predictor_metadata=[{'name': 'pred', 'integration_name': 'mindsdb'}]
+ integrations=["int1"],
+ default_namespace="mindsdb",
+ predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}],
)
assert plan.steps == expected_plan.steps
def test_select_from_view_subselect_view(self):
query = parse_sql(
- '''
+ """
select * from v1
where x1 in (select v2.id from v2)
- '''
+ """
)
expected_plan = QueryPlan(
- predictor_namespace='mindsdb',
+ predictor_namespace="mindsdb",
steps=[
FetchDataframeStep(
- integration='mindsdb',
- query=parse_sql('select v2.id as id from v2'),
+ integration="mindsdb",
+ query=parse_sql("select v2.id as id from v2"),
),
FetchDataframeStep(
- integration='mindsdb',
+ integration="mindsdb",
query=Select(
targets=[Star()],
- from_table=Identifier('v1'),
- where=BinaryOperation(
- op='in',
- args=[
- Identifier(parts=['x1']),
- Parameter(Result(0))
- ]
- )
+ from_table=Identifier("v1"),
+ where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(0))]),
),
),
],
)
- plan = plan_query(
- query,
- integrations=[],
- default_namespace='mindsdb',
- predictor_metadata=[]
- )
+ plan = plan_query(query, integrations=[], default_namespace="mindsdb", predictor_metadata=[])
assert plan.steps == expected_plan.steps
class TestMLSelect:
-
def test_select_from_predictor_plan_other_ml(self):
# sends to integrations
- query = parse_sql(''' select * from mlflow.predictors ''')
+ query = parse_sql(""" select * from mlflow.predictors """)
expected_plan = QueryPlan(
- steps=[
- FetchDataframeStep(step_num=0, integration='mlflow', query=parse_sql('SELECT * FROM predictors'))
- ],
+ steps=[FetchDataframeStep(step_num=0, integration="mlflow", query=parse_sql("SELECT * FROM predictors"))],
)
- plan = plan_query(query, predictor_metadata=[], integrations=['mlflow'])
+ plan = plan_query(query, predictor_metadata=[], integrations=["mlflow"])
assert plan.steps == expected_plan.steps
class TestNestedSelect:
-
def test_using_predictor_in_subselect(self):
"""
Use predictor in subselect when selecting from integration
"""
sql = """
SELECT *
- FROM chromadb.test_tabl
+ FROM vectordb.test_tabl
WHERE
search_vector = (
SELECT emebddings
@@ -450,37 +425,25 @@ def test_using_predictor_in_subselect(self):
ast_tree = parse_sql(sql)
plan = plan_query(
ast_tree,
- integrations=['chromadb'],
- predictor_metadata=[
- {'name': 'embedding_model', 'integration_name': 'mindsdb'}
- ]
+ integrations=["vectordb"],
+ predictor_metadata=[{"name": "embedding_model", "integration_name": "mindsdb"}],
)
expected_plan = [
ApplyPredictorRowStep(
step_num=0,
- namespace='mindsdb',
- predictor=Identifier(parts=['embedding_model']),
- row_dict={'content': 'some text'}
- ),
- ProjectStep(
- step_num=1,
- dataframe=Result(0),
- columns=[Identifier(parts=['emebddings'])]
+ namespace="mindsdb",
+ predictor=Identifier(parts=["embedding_model"]),
+ row_dict={"content": "some text"},
),
+ ProjectStep(step_num=1, dataframe=Result(0), columns=[Identifier(parts=["emebddings"])]),
FetchDataframeStep(
step_num=2,
- integration='chromadb',
+ integration="vectordb",
query=Select(
targets=[Star()],
- from_table=Identifier(parts=['test_tabl']),
- where=BinaryOperation(
- op='=',
- args=[
- Identifier(parts=['search_vector']),
- Parameter(Result(1))
- ]
- )
+ from_table=Identifier(parts=["test_tabl"]),
+ where=BinaryOperation(op="=", args=[Identifier(parts=["search_vector"]), Parameter(Result(1))]),
),
),
]
@@ -498,31 +461,27 @@ def test_using_integration_in_subselect(self):
WHERE
content = (
SELECT content
- FROM chromadb.test_tabl
+ FROM vectordb.test_tabl
LIMIT 1
)
"""
ast_tree = parse_sql(sql)
plan = plan_query(
ast_tree,
- integrations=['chromadb'],
- predictor_metadata=[
- {'name': 'embedding_model', 'integration_name': 'mindsdb'}
- ]
+ integrations=["vectordb"],
+ predictor_metadata=[{"name": "embedding_model", "integration_name": "mindsdb"}],
)
expected_plan = [
FetchDataframeStep(
- step_num=0,
- integration='chromadb',
- query=parse_sql('SELECT content AS content FROM test_tabl LIMIT 1')
+ step_num=0, integration="vectordb", query=parse_sql("SELECT content AS content FROM test_tabl LIMIT 1")
),
ApplyPredictorRowStep(
step_num=1,
- namespace='mindsdb',
- predictor=Identifier(parts=['embedding_model']),
- row_dict={'content': Parameter(Result(0))}
- )
+ namespace="mindsdb",
+ predictor=Identifier(parts=["embedding_model"]),
+ row_dict={"content": Parameter(Result(0))},
+ ),
]
assert plan.steps == expected_plan
diff --git a/tests/unit/test_passthrough.py b/tests/unit/test_passthrough.py
new file mode 100644
index 00000000000..9b86bc9f7ee
--- /dev/null
+++ b/tests/unit/test_passthrough.py
@@ -0,0 +1,400 @@
+"""Unit tests for PassthroughMixin."""
+
+import unittest
+from unittest.mock import MagicMock, patch
+
+from mindsdb.integrations.libs.passthrough import (
+ PassthroughMixin,
+ REDACTED_SENTINEL,
+)
+from mindsdb.integrations.libs.passthrough_types import (
+ HostNotAllowedError,
+ PassthroughConfigError,
+ PassthroughRequest,
+ PassthroughValidationError,
+)
+
+
+class _FakeHandler(PassthroughMixin):
+ """Minimal handler stub for exercising the mixin."""
+
+ _bearer_token_arg = "api_key"
+ _base_url_default = "https://api.example.com"
+ _test_request = PassthroughRequest(method="GET", path="/me")
+
+ def __init__(self, connection_data: dict):
+ self.name = "fake_ds"
+ self.connection_data = connection_data
+
+
+def _mock_response(status_code=200, body=b'{"ok":true}', headers=None, content_type="application/json"):
+ """Return a mock requests.Response exposing the bits the mixin uses."""
+ resp = MagicMock()
+ resp.status_code = status_code
+ resp.headers = {"Content-Type": content_type, **(headers or {})}
+ resp.iter_content = MagicMock(return_value=iter([body]))
+ resp.close = MagicMock()
+ return resp
+
+
+class PassthroughHappyPathTests(unittest.TestCase):
+ def setUp(self):
+ self.handler = _FakeHandler({"api_key": "secret-token-abcdef1234567890"})
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_injects_bearer_and_uses_default_base_url(self, mock_request):
+ mock_request.return_value = _mock_response()
+ resp = self.handler.api_passthrough(PassthroughRequest("GET", "/me"))
+
+ self.assertEqual(resp.status_code, 200)
+ self.assertEqual(resp.body, {"ok": True})
+
+ args, kwargs = mock_request.call_args
+ self.assertEqual(args[0], "GET")
+ self.assertEqual(args[1], "https://api.example.com/me")
+ self.assertEqual(kwargs["headers"]["Authorization"], "Bearer secret-token-abcdef1234567890")
+ self.assertEqual(kwargs["headers"]["X-Minds-Passthrough"], "1")
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_user_base_url_overrides_default(self, mock_request):
+ self.handler.connection_data["base_url"] = "https://api.eu.example.com"
+ mock_request.return_value = _mock_response()
+ self.handler.api_passthrough(PassthroughRequest("GET", "/me"))
+ self.assertEqual(mock_request.call_args[0][1], "https://api.eu.example.com/me")
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_query_params_forwarded(self, mock_request):
+ mock_request.return_value = _mock_response()
+ self.handler.api_passthrough(PassthroughRequest("GET", "/x", query={"a": "1"}))
+ self.assertEqual(mock_request.call_args.kwargs["params"], {"a": "1"})
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_json_body_forwarded(self, mock_request):
+ mock_request.return_value = _mock_response()
+ self.handler.api_passthrough(PassthroughRequest("POST", "/x", body={"name": "foo"}))
+ self.assertEqual(mock_request.call_args.kwargs["json"], {"name": "foo"})
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_default_headers_merged(self, mock_request):
+ self.handler.connection_data["default_headers"] = {"Accept": "application/json"}
+ mock_request.return_value = _mock_response()
+ self.handler.api_passthrough(PassthroughRequest("GET", "/x"))
+ self.assertEqual(mock_request.call_args.kwargs["headers"]["Accept"], "application/json")
+
+
+class PassthroughHeaderFilteringTests(unittest.TestCase):
+ def setUp(self):
+ self.handler = _FakeHandler({"api_key": "secret-token-abcdef1234567890"})
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_caller_cannot_override_authorization(self, mock_request):
+ mock_request.return_value = _mock_response()
+ self.handler.api_passthrough(
+ PassthroughRequest("GET", "/x", headers={"Authorization": "Bearer hijack", "Cookie": "s=1"})
+ )
+ outgoing = mock_request.call_args.kwargs["headers"]
+ self.assertEqual(outgoing["Authorization"], "Bearer secret-token-abcdef1234567890")
+ self.assertNotIn("Cookie", outgoing)
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_proxy_headers_stripped(self, mock_request):
+ mock_request.return_value = _mock_response()
+ self.handler.api_passthrough(PassthroughRequest("GET", "/x", headers={"Proxy-Authorization": "hijack"}))
+ outgoing = mock_request.call_args.kwargs["headers"]
+ self.assertNotIn("Proxy-Authorization", outgoing)
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_hop_by_hop_response_headers_stripped(self, mock_request):
+ mock_request.return_value = _mock_response(
+ headers={"Connection": "close", "X-Safe": "1", "Transfer-Encoding": "chunked"}
+ )
+ resp = self.handler.api_passthrough(PassthroughRequest("GET", "/x"))
+ self.assertNotIn("Connection", resp.headers)
+ self.assertNotIn("Transfer-Encoding", resp.headers)
+ self.assertEqual(resp.headers.get("X-Safe"), "1")
+
+
+class PassthroughHostAllowlistTests(unittest.TestCase):
+ def test_rejects_host_outside_allowlist(self):
+ handler = _FakeHandler(
+ {
+ "api_key": "t",
+ "base_url": "https://api.example.com",
+ "allowed_hosts": ["api.example.com"],
+ }
+ )
+ # Direct host check using a bad URL
+ with self.assertRaises(HostNotAllowedError):
+ handler._check_host_allowed("evil.com")
+
+ def test_wildcard_allows_any_host(self):
+ handler = _FakeHandler(
+ {
+ "api_key": "t",
+ "base_url": "https://api.example.com",
+ "allowed_hosts": ["*"],
+ }
+ )
+ handler._check_host_allowed("evil.com") # must not raise
+
+ def test_private_ip_rejected_by_default(self):
+ handler = _FakeHandler({"api_key": "t", "base_url": "http://10.0.0.1"})
+ with self.assertRaises(HostNotAllowedError):
+ handler._check_host_allowed("10.0.0.1")
+
+ def test_private_ip_allowed_when_explicitly_listed(self):
+ handler = _FakeHandler(
+ {
+ "api_key": "t",
+ "base_url": "http://10.0.0.1",
+ "allowed_hosts": ["10.0.0.1"],
+ }
+ )
+ # Explicitly allowlisted private IP should still be rejected β the
+ # mixin treats explicit private-IP allowlisting as a foot-gun that
+ # requires the "*" escape hatch. Document this behavior.
+ with self.assertRaises(HostNotAllowedError):
+ handler._check_host_allowed("10.0.0.1")
+
+ def test_loopback_rejected_with_wildcard_when_asterisk_not_used(self):
+ handler = _FakeHandler(
+ {
+ "api_key": "t",
+ "base_url": "http://127.0.0.1",
+ "allowed_hosts": ["127.0.0.1"],
+ }
+ )
+ with self.assertRaises(HostNotAllowedError):
+ handler._check_host_allowed("127.0.0.1")
+
+
+class PassthroughValidationTests(unittest.TestCase):
+ def test_missing_bearer_raises(self):
+ handler = _FakeHandler({}) # no api_key
+ with self.assertRaises(PassthroughConfigError):
+ handler.api_passthrough(PassthroughRequest("GET", "/me"))
+
+ def test_missing_base_url_raises(self):
+ class NoDefault(_FakeHandler):
+ _base_url_default = None
+
+ handler = NoDefault({"api_key": "t"})
+ with self.assertRaises(PassthroughConfigError):
+ handler.api_passthrough(PassthroughRequest("GET", "/me"))
+
+ def test_path_must_start_with_slash(self):
+ handler = _FakeHandler({"api_key": "t"})
+ with self.assertRaises(PassthroughValidationError):
+ handler.api_passthrough(PassthroughRequest("GET", "me"))
+
+ def test_method_allowlist(self):
+ handler = _FakeHandler({"api_key": "t"})
+ with self.assertRaises(PassthroughValidationError):
+ handler.api_passthrough(PassthroughRequest("TRACE", "/me"))
+
+
+class PassthroughSecretScrubTests(unittest.TestCase):
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_token_scrubbed_from_json_body(self, mock_request):
+ token = "secret-token-abcdef1234567890"
+ # Non-UTF-8 byte (0xFF) positioned adjacent to the token. Spec Β§7.6
+ # mandates byte-level scrubbing: if the scrub ran after a
+ # errors="replace" decode, U+FFFD insertions would risk fragmenting
+ # a token mid-match. Byte-level scrub avoids that entirely.
+ body = b'{"error":"Invalid token ' + token.encode("utf-8") + b' \xff trailing"}'
+ handler = _FakeHandler({"api_key": token})
+ # Use plain-text content-type so the non-UTF-8 body survives without
+ # a json.loads detour; the scrub is still invoked.
+ mock_request.return_value = _mock_response(body=body, content_type="text/plain")
+
+ resp = handler.api_passthrough(PassthroughRequest("GET", "/x"))
+ # Token must not survive anywhere in the body.
+ self.assertNotIn(token, str(resp.body))
+ self.assertIn(REDACTED_SENTINEL, str(resp.body))
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_token_scrubbed_from_headers(self, mock_request):
+ token = "secret-token-abcdef1234567890"
+ handler = _FakeHandler({"api_key": token})
+ mock_request.return_value = _mock_response(
+ headers={"X-Debug-Auth": f"Bearer {token}"},
+ )
+ resp = handler.api_passthrough(PassthroughRequest("GET", "/x"))
+ self.assertIn(REDACTED_SENTINEL, resp.headers["X-Debug-Auth"])
+ self.assertNotIn(token, resp.headers["X-Debug-Auth"])
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_long_default_header_values_scrubbed(self, mock_request):
+ token = "secret-token-abcdef1234567890"
+ long_secret = "x" * 32
+ handler = _FakeHandler(
+ {
+ "api_key": token,
+ "default_headers": {"X-Api-Secondary": long_secret},
+ }
+ )
+ mock_request.return_value = _mock_response(body=('{"echoed":"' + long_secret + '"}').encode("utf-8"))
+ resp = handler.api_passthrough(PassthroughRequest("GET", "/x"))
+ self.assertEqual(resp.body["echoed"], REDACTED_SENTINEL)
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_token_scrubbed_in_nested_json_without_corrupting_structure(self, mock_request):
+ token = "secret-token-abcdef1234567890"
+ handler = _FakeHandler({"api_key": token})
+ body = ('{"data": {"nested": {"token": "' + token + '"}}}').encode("utf-8")
+ mock_request.return_value = _mock_response(body=body)
+
+ resp = handler.api_passthrough(PassthroughRequest("GET", "/x"))
+
+ # Structure preserved: dict-of-dict-of-dict with the expected keys.
+ self.assertIsInstance(resp.body, dict)
+ self.assertIsInstance(resp.body["data"], dict)
+ self.assertIsInstance(resp.body["data"]["nested"], dict)
+ self.assertEqual(set(resp.body.keys()), {"data"})
+ self.assertEqual(set(resp.body["data"].keys()), {"nested"})
+ self.assertEqual(set(resp.body["data"]["nested"].keys()), {"token"})
+ # Value redacted at the leaf; token does not survive anywhere.
+ self.assertEqual(resp.body["data"]["nested"]["token"], REDACTED_SENTINEL)
+ self.assertNotIn(token, str(resp.body))
+
+
+class PassthroughTestEndpointTests(unittest.TestCase):
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_returns_ok_on_200(self, mock_request):
+ handler = _FakeHandler({"api_key": "t"})
+ mock_request.return_value = _mock_response(status_code=200)
+ result = handler.test_passthrough()
+ self.assertTrue(result["ok"])
+ self.assertEqual(result["status_code"], 200)
+ self.assertEqual(result["host"], "api.example.com")
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_returns_auth_failed_on_401(self, mock_request):
+ handler = _FakeHandler({"api_key": "t"})
+ mock_request.return_value = _mock_response(status_code=401)
+ result = handler.test_passthrough()
+ self.assertFalse(result["ok"])
+ self.assertEqual(result["error_code"], "auth_failed")
+
+ def test_returns_not_implemented_when_no_test_request(self):
+ class NoTest(_FakeHandler):
+ _test_request = None
+
+ handler = NoTest({"api_key": "t"})
+ result = handler.test_passthrough()
+ self.assertFalse(result["ok"])
+ self.assertEqual(result["error_code"], "not_implemented")
+
+
+class PassthroughAllowedMethodsTests(unittest.TestCase):
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_rejects_method_not_in_allowed_methods(self, mock_request):
+ handler = _FakeHandler(
+ {
+ "api_key": "t",
+ "allowed_methods": ["GET"],
+ }
+ )
+ mock_request.return_value = _mock_response()
+
+ with self.assertRaises(PassthroughValidationError) as cm:
+ handler.api_passthrough(PassthroughRequest("POST", "/x"))
+
+ self.assertEqual(cm.exception.error_code, "method_not_allowed")
+ self.assertEqual(cm.exception.http_status, 405)
+ mock_request.assert_not_called()
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_all_methods_allowed_when_config_absent(self, mock_request):
+ handler = _FakeHandler({"api_key": "t"})
+ mock_request.return_value = _mock_response()
+
+ for method in ("GET", "POST", "PUT", "PATCH", "DELETE"):
+ mock_request.reset_mock()
+ mock_request.return_value = _mock_response()
+ handler.api_passthrough(PassthroughRequest(method, "/x"))
+ self.assertEqual(mock_request.call_args[0][0], method)
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_string_allowed_methods_raises_config_error(self, mock_request):
+ handler = _FakeHandler({"api_key": "t", "allowed_methods": "GET"})
+
+ with self.assertRaises(PassthroughConfigError):
+ handler.api_passthrough(PassthroughRequest("GET", "/x"))
+
+ mock_request.assert_not_called()
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_lowercase_allowed_methods_normalized(self, mock_request):
+ handler = _FakeHandler({"api_key": "t", "allowed_methods": ["get"]})
+ mock_request.return_value = _mock_response()
+
+ # GET passes after uppercase normalization.
+ handler.api_passthrough(PassthroughRequest("GET", "/x"))
+ self.assertEqual(mock_request.call_args[0][0], "GET")
+
+ mock_request.reset_mock()
+ # POST is rejected with method_not_allowed.
+ with self.assertRaises(PassthroughValidationError) as cm:
+ handler.api_passthrough(PassthroughRequest("POST", "/x"))
+ self.assertEqual(cm.exception.error_code, "method_not_allowed")
+ self.assertEqual(cm.exception.http_status, 405)
+ mock_request.assert_not_called()
+
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_unknown_verb_in_allowed_methods_raises_config_error(self, mock_request):
+ handler = _FakeHandler({"api_key": "t", "allowed_methods": ["GET", "TRACE"]})
+
+ with self.assertRaises(PassthroughConfigError) as cm:
+ handler.api_passthrough(PassthroughRequest("GET", "/x"))
+ self.assertIn("TRACE", str(cm.exception))
+ mock_request.assert_not_called()
+
+
+class PassthroughAuthHeaderOverrideTests(unittest.TestCase):
+ @patch("mindsdb.integrations.libs.passthrough.requests.request")
+ def test_custom_auth_header_name_and_format(self, mock_request):
+ class ShopifyLikeHandler(_FakeHandler):
+ _auth_header_name = "X-Shopify-Access-Token"
+ _auth_header_format = "{token}"
+
+ handler = ShopifyLikeHandler({"api_key": "shpat_abc123"})
+ mock_request.return_value = _mock_response()
+
+ handler.api_passthrough(PassthroughRequest("GET", "/x"))
+
+ outgoing = mock_request.call_args.kwargs["headers"]
+ # Custom header present, with raw token (no "Bearer " prefix).
+ self.assertEqual(outgoing["X-Shopify-Access-Token"], "shpat_abc123")
+ # Default Authorization header must NOT be added when the handler
+ # overrides the auth header name.
+ self.assertNotIn("Authorization", outgoing)
+
+
+class PassthroughProtocolTests(unittest.TestCase):
+ def test_non_mixin_class_satisfies_protocol(self):
+ from mindsdb.integrations.libs.passthrough import PassthroughProtocol
+ from mindsdb.integrations.libs.passthrough_types import PassthroughResponse
+
+ class ManualHandler:
+ def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse:
+ return PassthroughResponse(status_code=200, headers={}, body=None, content_type=None)
+
+ def test_passthrough(self) -> dict:
+ return {"ok": True}
+
+ self.assertIsInstance(ManualHandler(), PassthroughProtocol)
+
+ def test_class_missing_methods_fails_protocol(self):
+ from mindsdb.integrations.libs.passthrough import PassthroughProtocol
+
+ class Incomplete:
+ def api_passthrough(self, req): ...
+
+ # missing test_passthrough
+
+ self.assertNotIsInstance(Incomplete(), PassthroughProtocol)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/unused/integration/flows/test_ml_task_queue.py b/tests/unit/utilities/ml_task_queue/test_ml_task_queue.py
similarity index 72%
rename from tests/unused/integration/flows/test_ml_task_queue.py
rename to tests/unit/utilities/ml_task_queue/test_ml_task_queue.py
index 8e17fca4d08..1880c90fc6d 100644
--- a/tests/unused/integration/flows/test_ml_task_queue.py
+++ b/tests/unit/utilities/ml_task_queue/test_ml_task_queue.py
@@ -15,18 +15,17 @@
@pytest.mark.skipif("localhost" in HTTP_API_ROOT or "127.0.0.1" in HTTP_API_ROOT, reason="Requires redis")
class TestMLTaskQueue(HTTPHelperMixin):
-
def test_redis_connection(self):
db = Database(protocol=3, host=REDIS_HOST)
db.ping()
def test_create_model(self, train_finetune_lock):
- """ 1. create db connection
- 2. create test dataset
- 3. start to train model in 'async' mode: check status
- 4. start to train model in 'sync' mode: check status
- 5. await model 2 is finished
- 6. 2 messages in redis stream
+ """1. create db connection
+ 2. create test dataset
+ 3. start to train model in 'async' mode: check status
+ 4. start to train model in 'sync' mode: check status
+ 5. await model 2 is finished
+ 6. 2 messages in redis stream
"""
db_details = {
@@ -37,8 +36,8 @@ def test_create_model(self, train_finetune_lock):
"port": "5432",
"user": "demo_user",
"password": "demo_password",
- "database": "demo"
- }
+ "database": "demo",
+ },
}
self.sql_via_http("DROP MODEL IF EXISTS p_test_queue_async;", RESPONSE_TYPE.OK)
@@ -47,7 +46,7 @@ def test_create_model(self, train_finetune_lock):
query = f"""
CREATE DATABASE IF NOT EXISTS test_demo_queue
WITH ENGINE = 'postgres',
- PARAMETERS = {json.dumps(db_details['connection_data'])};
+ PARAMETERS = {json.dumps(db_details["connection_data"])};
"""
self.sql_via_http(query, RESPONSE_TYPE.OK)
@@ -58,8 +57,8 @@ def test_create_model(self, train_finetune_lock):
predict rental_price;
"""
response = self.sql_via_http(query, RESPONSE_TYPE.TABLE)
- status = response['data'][0][response['column_names'].index('STATUS')]
- assert status in ('generating', 'training')
+ status = response["data"][0][response["column_names"].index("STATUS")]
+ assert status in ("generating", "training")
query = """
create predictor p_test_queue_sync
@@ -68,16 +67,16 @@ def test_create_model(self, train_finetune_lock):
USING join_learn_process=true;
"""
response = self.sql_via_http(query, RESPONSE_TYPE.TABLE)
- status = response['data'][0][response['column_names'].index('STATUS')]
- assert status == 'complete'
+ status = response["data"][0][response["column_names"].index("STATUS")]
+ assert status == "complete"
- status = self.await_model('p_test_queue_async')
- assert status == 'complete'
+ status = self.await_model("p_test_queue_async")
+ assert status == "complete"
db = Database(protocol=3, host=REDIS_HOST)
assert TASKS_STREAM_NAME in db.keys()
- assert db.type(TASKS_STREAM_NAME) == b'stream'
+ assert db.type(TASKS_STREAM_NAME) == b"stream"
xlen = db.xlen(TASKS_STREAM_NAME)
if xlen != 0:
lol = db.xrange(TASKS_STREAM_NAME)
@@ -85,8 +84,7 @@ def test_create_model(self, train_finetune_lock):
assert db.xlen(TASKS_STREAM_NAME) == 0
def test_predict(self):
- """ make predict queries to both trained models
- """
+ """make predict queries to both trained models"""
query = """
SELECT rental_price,
@@ -95,8 +93,8 @@ def test_predict(self):
WHERE b = 10;
"""
response = self.sql_via_http(query, RESPONSE_TYPE.TABLE)
- assert len(response['data']) == 1
- assert len(response['data'][0]) == 2
+ assert len(response["data"]) == 1
+ assert len(response["data"][0]) == 2
query = """
SELECT rental_price,
@@ -106,15 +104,14 @@ def test_predict(self):
LIMIT 3;
"""
response = self.sql_via_http(query, RESPONSE_TYPE.TABLE)
- assert len(response['data']) == 3
- assert len(response['data'][0]) == 2
+ assert len(response["data"]) == 3
+ assert len(response["data"][0]) == 2
db = Database(protocol=3, host=REDIS_HOST)
assert db.xlen(TASKS_STREAM_NAME) == 0
def test_finetune(self, train_finetune_lock):
- """ check that finetune is working
- """
+ """check that finetune is working"""
with train_finetune_lock.acquire(timeout=600):
query = """
@@ -123,20 +120,20 @@ def test_finetune(self, train_finetune_lock):
USING join_learn_process=true;
"""
response = self.sql_via_http(query, RESPONSE_TYPE.TABLE)
- status = response['data'][0][response['column_names'].index('STATUS')]
- assert status == 'complete'
+ status = response["data"][0][response["column_names"].index("STATUS")]
+ assert status == "complete"
query = """
FINETUNE p_test_queue_async
FROM test_demo_queue (SELECT * FROM demo_data.home_rentals LIMIT 10);
"""
response = self.sql_via_http(query, RESPONSE_TYPE.TABLE)
- status = response['data'][0][response['column_names'].index('STATUS')]
+ status = response["data"][0][response["column_names"].index("STATUS")]
# FINETUNE in this case may be very fast, so add 'complete' to check
- assert status in ('generating', 'training', 'complete')
+ assert status in ("generating", "training", "complete")
- status = self.await_model('p_test_queue_async', version_number=2)
- assert status == 'complete'
+ status = self.await_model("p_test_queue_async", version_number=2)
+ assert status == "complete"
db = Database(protocol=3, host=REDIS_HOST)
assert db.xlen(TASKS_STREAM_NAME) == 0
diff --git a/tests/unit/utilities/test_community_handler_fetcher.py b/tests/unit/utilities/test_community_handler_fetcher.py
new file mode 100644
index 00000000000..3fd1ad3b942
--- /dev/null
+++ b/tests/unit/utilities/test_community_handler_fetcher.py
@@ -0,0 +1,302 @@
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import requests
+
+from mindsdb.integrations.utilities.community_handler_fetcher import (
+ _fetch_tree_recursive,
+ _resolve_tree_sha,
+ fetch_handler,
+)
+
+REPO = "mindsdb/mindsdb-community-handlers"
+BRANCH = "main"
+PATH_PREFIX = "community_handlers"
+HANDLER = "elasticsearch_handler"
+REMOTE_PREFIX = f"{PATH_PREFIX}/{HANDLER}"
+TREE_SHA = "abc123deadbeef"
+
+
+def _make_response(status_code=200, json_data=None, raise_for_status=None):
+ """Helper: build a MagicMock that looks like a requests.Response."""
+ resp = MagicMock()
+ resp.status_code = status_code
+ resp.json.return_value = json_data if json_data is not None else {}
+ resp.text = ""
+ if raise_for_status is not None:
+ resp.raise_for_status.side_effect = raise_for_status
+ else:
+ resp.raise_for_status.return_value = None
+ return resp
+
+
+def _make_get_side_effect(contents_resp, trees_resp, raw_resp=None):
+ """Return a side_effect callable that dispatches mocked responses by URL."""
+
+ def _get(url, **kwargs):
+ if "git/trees" in url:
+ return trees_resp
+ if "raw.githubusercontent.com" in url:
+ return raw_resp if raw_resp is not None else _make_response(200, b"")
+ # Contents API (resolve SHA or other)
+ return contents_resp
+
+ return _get
+
+
+class TestResolveTreSha(unittest.TestCase):
+ """Unit tests for _resolve_tree_sha()."""
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_returns_sha_when_found(self, mock_get):
+ parent_listing = [
+ {"name": "other_handler", "type": "dir", "sha": "000"},
+ {"name": HANDLER, "type": "dir", "sha": TREE_SHA},
+ ]
+ mock_get.return_value = _make_response(200, parent_listing)
+
+ result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {})
+
+ self.assertEqual(result, TREE_SHA)
+ mock_get.assert_called_once()
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_returns_none_on_404(self, mock_get):
+ mock_get.return_value = _make_response(404)
+
+ result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {})
+
+ self.assertIsNone(result)
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_returns_none_when_dir_not_in_listing(self, mock_get):
+ parent_listing = [{"name": "other_handler", "type": "dir", "sha": "000"}]
+ mock_get.return_value = _make_response(200, parent_listing)
+
+ result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {})
+
+ self.assertIsNone(result)
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_raises_on_non_200_non_404(self, mock_get):
+ mock_get.return_value = _make_response(503)
+
+ with self.assertRaises(RuntimeError):
+ _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {})
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_raises_on_network_error(self, mock_get):
+ mock_get.side_effect = requests.RequestException("timeout")
+
+ with self.assertRaises(RuntimeError):
+ _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {})
+
+
+class TestFetchTreeRecursive(unittest.TestCase):
+ """Unit tests for _fetch_tree_recursive()."""
+
+ def setUp(self):
+ self.tmp = Path(tempfile.mkdtemp())
+
+ def tearDown(self):
+ shutil.rmtree(self.tmp, ignore_errors=True)
+
+ def _trees_response(self, entries, truncated=False):
+ return _make_response(200, {"sha": TREE_SHA, "tree": entries, "truncated": truncated})
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_downloads_flat_and_nested_files(self, mock_get):
+ tree_entries = [
+ {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10},
+ {"path": "elasticsearch_handler.py", "type": "blob", "sha": "s2", "size": 500},
+ {"path": "tests", "type": "tree", "sha": "s3"},
+ {"path": "tests/__init__.py", "type": "blob", "sha": "s4", "size": 0},
+ {"path": "tests/test_elasticsearch_handler.py", "type": "blob", "sha": "s5", "size": 1107},
+ ]
+ trees_resp = self._trees_response(tree_entries)
+ raw_resp = _make_response(200)
+ raw_resp.content = b"# file content"
+
+ def _get(url, **kwargs):
+ if "git/trees" in url:
+ return trees_resp
+ return raw_resp
+
+ mock_get.side_effect = _get
+
+ count = _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {})
+
+ self.assertEqual(count, 4)
+ self.assertTrue((self.tmp / "__init__.py").exists())
+ self.assertTrue((self.tmp / "elasticsearch_handler.py").exists())
+ self.assertTrue((self.tmp / "tests" / "__init__.py").exists())
+ self.assertTrue((self.tmp / "tests" / "test_elasticsearch_handler.py").exists())
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_max_depth_enforcement(self, mock_get):
+ # depth 4 means path has 4 slashes β 5 components β should be skipped
+ deep_path = "a/b/c/d/e.py"
+ self.assertEqual(deep_path.count("/"), 4) # 4 >= max_depth=4 β skipped
+
+ tree_entries = [
+ {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 0},
+ {"path": deep_path, "type": "blob", "sha": "s2", "size": 99},
+ ]
+ trees_resp = self._trees_response(tree_entries)
+ raw_resp = _make_response(200)
+ raw_resp.content = b""
+
+ def _get(url, **kwargs):
+ if "git/trees" in url:
+ return trees_resp
+ return raw_resp
+
+ mock_get.side_effect = _get
+
+ count = _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}, max_depth=4)
+
+ self.assertEqual(count, 1)
+ self.assertTrue((self.tmp / "__init__.py").exists())
+ self.assertFalse((self.tmp / deep_path).exists())
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_truncated_tree_logs_warning(self, mock_get):
+ trees_resp = self._trees_response([], truncated=True)
+ mock_get.return_value = trees_resp
+
+ with self.assertLogs("mindsdb.integrations.utilities.community_handler_fetcher", level="WARNING") as cm:
+ _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {})
+
+ self.assertTrue(any("truncated" in line for line in cm.output))
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_raises_on_file_download_failure(self, mock_get):
+ tree_entries = [{"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10}]
+ trees_resp = self._trees_response(tree_entries)
+ raw_resp = _make_response(500)
+ raw_resp.raise_for_status.side_effect = requests.HTTPError("500 Server Error")
+
+ def _get(url, **kwargs):
+ if "git/trees" in url:
+ return trees_resp
+ return raw_resp
+
+ mock_get.side_effect = _get
+
+ with self.assertRaises(RuntimeError):
+ _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {})
+
+
+class TestFetchHandler(unittest.TestCase):
+ """Integration-style unit tests for fetch_handler()."""
+
+ def setUp(self):
+ self.storage = Path(tempfile.mkdtemp())
+
+ def tearDown(self):
+ shutil.rmtree(self.storage, ignore_errors=True)
+
+ def _parent_listing(self):
+ return [{"name": HANDLER, "type": "dir", "sha": TREE_SHA}]
+
+ def _tree_entries(self):
+ return [
+ {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10},
+ {"path": "tests/__init__.py", "type": "blob", "sha": "s2", "size": 0},
+ {"path": "tests/test_elasticsearch_handler.py", "type": "blob", "sha": "s3", "size": 1107},
+ ]
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_handler_with_subdirectories(self, mock_get):
+ contents_resp = _make_response(200, self._parent_listing())
+ trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": self._tree_entries(), "truncated": False})
+ raw_resp = _make_response(200)
+ raw_resp.content = b"# content"
+
+ mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp)
+
+ result = fetch_handler(HANDLER, self.storage)
+
+ dest = self.storage / HANDLER
+ self.assertEqual(result, dest)
+ self.assertTrue((dest / "__init__.py").exists())
+ self.assertTrue((dest / "tests" / "__init__.py").exists())
+ self.assertTrue((dest / "tests" / "test_elasticsearch_handler.py").exists())
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_404_handler_not_found(self, mock_get):
+ mock_get.return_value = _make_response(404)
+
+ result = fetch_handler(HANDLER, self.storage)
+
+ self.assertIsNone(result)
+ # tmp dir must not be left behind
+ self.assertFalse((self.storage / f".tmp_{HANDLER}").exists())
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_atomic_rename_cleanup_on_failure(self, mock_get):
+ contents_resp = _make_response(200, self._parent_listing())
+ trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": self._tree_entries(), "truncated": False})
+ # Simulate a download failure for raw files
+ raw_resp = _make_response(500)
+ raw_resp.raise_for_status.side_effect = requests.HTTPError("500")
+
+ mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp)
+
+ with self.assertRaises(RuntimeError):
+ fetch_handler(HANDLER, self.storage)
+
+ # tmp dir must be cleaned up after the exception
+ self.assertFalse((self.storage / f".tmp_{HANDLER}").exists())
+ # dest dir must not exist either
+ self.assertFalse((self.storage / HANDLER).exists())
+
+ def test_existing_handler_skips_fetch(self):
+ dest = self.storage / HANDLER
+ dest.mkdir(parents=True)
+ (dest / "__init__.py").write_text("# existing")
+
+ with patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") as mock_get:
+ result = fetch_handler(HANDLER, self.storage)
+
+ self.assertEqual(result, dest)
+ mock_get.assert_not_called()
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_max_depth_files_not_written(self, mock_get):
+ deep_path = "a/b/c/d/deep.py"
+ tree_entries = [
+ {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 0},
+ {"path": deep_path, "type": "blob", "sha": "s2", "size": 99},
+ ]
+ contents_resp = _make_response(200, self._parent_listing())
+ trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": tree_entries, "truncated": False})
+ raw_resp = _make_response(200)
+ raw_resp.content = b""
+
+ mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp)
+
+ fetch_handler(HANDLER, self.storage)
+
+ dest = self.storage / HANDLER
+ self.assertTrue((dest / "__init__.py").exists())
+ self.assertFalse((dest / deep_path).exists())
+
+ @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get")
+ def test_truncated_tree_warning_propagates(self, mock_get):
+ contents_resp = _make_response(200, self._parent_listing())
+ trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": [], "truncated": True})
+
+ mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp)
+
+ with self.assertLogs("mindsdb.integrations.utilities.community_handler_fetcher", level="WARNING") as cm:
+ fetch_handler(HANDLER, self.storage)
+
+ self.assertTrue(any("truncated" in line for line in cm.output))
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/unit/utilities/test_config.py b/tests/unit/utilities/test_config.py
index 88113161409..d5bd93d46b7 100644
--- a/tests/unit/utilities/test_config.py
+++ b/tests/unit/utilities/test_config.py
@@ -39,3 +39,23 @@ def test_invalid_mindsdb_db_con_raises_error(self):
error_message = str(exc_info.value)
assert "Invalid MINDSDB_DB_CON value" in error_message
assert invalid_db_con in error_message
+
+ def test_knowledge_bases_storage_env_does_not_override_storage_config(self):
+ Config._Config__instance = None
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ config_file = Path(tmpdir) / "config.json"
+ config_file.write_text(json.dumps({}))
+
+ with patch.dict(
+ os.environ,
+ {
+ "MINDSDB_CONFIG_PATH": str(config_file),
+ "MINDSDB_STORAGE_DIR": tmpdir,
+ "KNOWLEDGE_BASES_STORAGE": "faiss, pgvector",
+ },
+ clear=False,
+ ):
+ cfg = Config()
+
+ assert cfg["knowledge_bases"]["storage"] is None
diff --git a/tests/unit/various/test_llm_utils.py b/tests/unit/various/test_llm_utils.py
index 28c7f41f960..d5df7a77e42 100644
--- a/tests/unit/various/test_llm_utils.py
+++ b/tests/unit/various/test_llm_utils.py
@@ -1,86 +1,12 @@
import unittest
-from textwrap import dedent, indent
from numpy import int64
import pandas as pd
-from mindsdb.integrations.libs.llm.utils import ft_chat_formatter, ft_code_formatter, ft_cqa_formatter
-from mindsdb.integrations.libs.llm.utils import ft_jsonl_validation, ft_chat_format_validation
from mindsdb.integrations.libs.llm.utils import get_completed_prompts
class TestLLM(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- # used in `test_ft_chat_format_validation`
- cls.valid_chats = [
- # u/a pattern
- [
- {"role": "user", "content": "hi"},
- {"role": "assistant", "content": "hello"},
- {"role": "user", "content": "how are you?"},
- {"role": "assistant", "content": "I'm good, thanks"},
- ],
- # u/a pattern
- [
- {"role": "user", "content": "hi"},
- {"role": "assistant", "content": "hello"},
- {"role": "user", "content": "how are you?"},
- ],
- # s/u/a pattern
- [
- {"role": "system", "content": "you are a useful assistant."},
- {"role": "user", "content": "hello"},
- {"role": "assistant", "content": "how are you?"},
- ],
- # s/u/a pattern
- [
- {"role": "system", "content": "you are a useful assistant."},
- {"role": "user", "content": "hello"},
- {"role": "assistant", "content": "how are you?"},
- {"role": "user", "content": "I'm good, thanks"},
- ],
- ]
-
- # used in `test_ft_chat_format_validation`
- cls.invalid_chats = [
- # invalid - repeated user
- [
- {"role": "user", "content": "hi"},
- {"role": "user", "content": "hello"}, # this is invalid
- {"role": "assistant", "content": "how are you?"},
- {"role": "user", "content": "I'm good, thanks"},
- ],
- # invalid - repeated assistant
- [
- {"role": "user", "content": "hi"},
- {"role": "assistant", "content": "hello"},
- {"role": "assistant", "content": "how are you?"}, # this is invalid
- {"role": "user", "content": "I'm good, thanks"},
- ],
- # invalid - incorrect system prompt order
- [
- {"role": "user", "content": "hi"},
- {"role": "assistant", "content": "hello"},
- {"role": "system", "content": "you are a useful assistant."}, # this is invalid
- {"role": "user", "content": "I'm good, thanks"},
- ],
- # invalid roles
- [
- {"role": "user", "content": "hi"},
- {"role": "invalid", "content": "this is an invalid role"},
- ],
- # invalid content
- [
- {"role": "user", "content": "hi"},
- {"role": "assistant", "content": None}, # should always be a string
- ],
- # invalid - no assistant in the chat
- [
- {"role": "user", "content": "hi"},
- ],
- ]
-
def test_get_completed_prompts(self):
placeholder = "{{text}}"
prefix = "You are a helpful assistant. Here is the user's input:"
@@ -107,160 +33,3 @@ def test_get_completed_prompts(self):
df = pd.DataFrame({"text": user_inputs})
with self.assertRaises(Exception):
get_completed_prompts(base_template, df)
-
- def test_ft_chat_format_validation(self):
- for chat in self.valid_chats:
- ft_chat_format_validation(chat) # if chat is valid, returns `None`
-
- for chat in self.invalid_chats:
- with self.assertRaises(Exception):
- ft_chat_format_validation(chat) # all of these should raise an Exception
-
- def test_ft_chat_formatter(self):
- # 1a. long DF with required columns (`role` and `content`)
- df = pd.DataFrame(
- {
- "role": ["system", "user", "assistant", "user"],
- "content": ["you are a helpful assistant", "hello", "hi, how can I help?", "I'm good, thanks"],
- }
- )
- chats = ft_chat_formatter(df)
- assert list(chats[0].keys()) == ["messages"]
- ft_chat_format_validation(chats[0]["messages"]) # valid, returns None
-
- # 1b. add `chat_id` to df
- df = pd.DataFrame(
- {
- "chat_id": [1, 1, 1, 2, 2, 2],
- "role": ["system", "user", "assistant"] * 2,
- "content": ["you are a helpful assistant", "hello", "hi, how can I help?"] * 2,
- }
- )
- # add extra row at the end, belonging to first chat. This checks sorting.
- df = pd.concat([df, pd.DataFrame({"chat_id": [1], "role": ["user"], "content": ["I'm good, thanks"]})])
- chats = ft_chat_formatter(df)
- for chat in chats:
- assert list(chat.keys()) == ["messages"]
- ft_chat_format_validation(chat["messages"]) # valid, returns None
-
- # 1c. add `message_id` to df (scrambled to check sorting)
- df = pd.DataFrame(
- {
- "chat_id": [1, 2, 1, 2, 1, 2],
- "message_id": [1, 1, 2, 2, 3, 3],
- "role": ["system", "system", "user", "user", "assistant", "assistant"],
- "content": ["you are a helpful assistant"] * 2 + ["hello"] * 2 + ["hi, how can I help?"] * 2,
- }
- )
- chats = ft_chat_formatter(df)
- for chat in chats:
- assert list(chat.keys()) == ["messages"]
- ft_chat_format_validation(chat["messages"]) # valid, returns None
-
- # 2a. json format - df contains single column `chat_json`
- df = pd.DataFrame(
- {
- "chat_json": [
- '{"messages": [{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hello"}]}'
- ]
- }
- )
- chats = ft_chat_formatter(df)
- assert list(chats[0].keys()) == ["messages"]
- ft_chat_format_validation(chats[0]["messages"]) # valid, returns None
-
- def test_ft_jsonl_validation(self):
- df = pd.DataFrame(
- {
- "role": ["system", "user", "assistant", "user"],
- "content": ["you are a helpful assistant", "hello", "hi, how can I help?", "I'm good, thanks"],
- }
- )
- chats = ft_chat_formatter(df)
-
- # when validated, this method won't return anything
- assert ft_jsonl_validation([line for line in chats]) is None
-
- # otherwise, it raises an Exception
- chats = ft_chat_formatter(df)
- chats[0]["messages"][1]["role"] = "invalid"
- with self.assertRaises(Exception):
- ft_jsonl_validation([line for line in chats])
-
- def test_ft_code_formatter(self):
- df = pd.DataFrame(
- {
- "code": [
- "".join(
- [
- indent(
- dedent("""\
- # format chunks into prompts
- roles = []
- contents = []
-
- for idx in range(0, len(chunks), 3):
- """),
- " " * 4 * 2,
- ), # mind the base indent level
- indent(
- dedent(
- """pre, mid, suf = chunks[idx:idx+3]
-
- interleaved = list(itertools.chain(*zip(templates, (pre, mid, suf))))
- """
- ),
- " " * 4 * 3,
- ), # mind the base indent level
- ]
- )
- ]
- }
- )
- df2 = ft_code_formatter(df, chunk_size=110)
-
- assert list(df2["role"]) == ["system", "user", "assistant"]
- assert (
- df2["content"].iloc[0]
- == "You are a powerful text to code model. Your job is to provide great code completions. As context, you are given code that is found immediately before and after the code you must generate.\n\nYou must output the code that should go in between the prefix and suffix.\n\n"
- ) # noqa
- assert (
- df2["content"].iloc[1]
- == "### Code prefix:\n # format chunks into prompts\n roles = []\n contents = []\n\n\n### Code suffix:\n interleaved = list(itertools.chain(*zip(templates, (pre, mid, suf))))\n\n### Completion:"
- ) # noqa
- assert (
- df2["content"].iloc[2]
- == " for idx in range(0, len(chunks), 3):\n pre, mid, suf = chunks[idx:idx+3]\n\n"
- ) # noqa
-
- df2 = ft_code_formatter(df, format="fim", chunk_size=110)
- assert list(df2["role"]) == ["system", "user", "assistant"]
- assert (
- df2["content"].iloc[0]
- == "You are a powerful text to code model. Your job is to provide great code completions. As context, you are given code that is found immediately before and after the code you must generate.\n\nYou must output the code that should go in between the prefix and suffix.\n\n"
- ) # noqa
- assert (
- df2["content"].iloc[1]
- == "
\n # format chunks into prompts\n roles = []\n contents = []\n\n\n\n interleaved = list(itertools.chain(*zip(templates, (pre, mid, suf))))\n\n"
- ) # noqa
- assert (
- df2["content"].iloc[2]
- == " for idx in range(0, len(chunks), 3):\n pre, mid, suf = chunks[idx:idx+3]\n\n"
- ) # noqa
-
- def test_ft_cqa_formatter(self):
- df = pd.DataFrame(
- {
- "instruction": ["Answer accurately."],
- "context": ["You are a helpful assistant."],
- "question": ["What is the capital of France?"],
- "answer": ["Paris"],
- }
- )
-
- df2 = ft_cqa_formatter(df)
-
- assert list(df2["role"]) == ["system", "user", "assistant"]
- assert df2["content"].iloc[0] == "Answer accurately.\nYou are a helpful assistant."
- assert df2["content"].iloc[1] == "What is the capital of France?"
- assert df2["content"].iloc[2] == "Paris"
diff --git a/tests/unit/various/test_main.py b/tests/unit/various/test_main.py
new file mode 100644
index 00000000000..689a9eb93af
--- /dev/null
+++ b/tests/unit/various/test_main.py
@@ -0,0 +1,183 @@
+import pathlib
+import shutil
+from unittest.mock import patch
+import pytest
+
+
+class TestMainCleanup:
+ @pytest.fixture
+ def patch_main_config(self, tmp_path, monkeypatch):
+ import mindsdb.__main__ as main_mod
+
+ monkeypatch.setattr(main_mod, "config", {"paths": {"tmp": tmp_path}})
+ return tmp_path, main_mod
+
+ @pytest.fixture
+ def errors(self, caplog):
+ """Capture only ERROR logs as concatenated text"""
+
+ class ErrorCapture:
+ @property
+ def text(self):
+ return "\n".join(r.getMessage() for r in caplog.records if r.levelname == "ERROR")
+
+ caplog.clear()
+ caplog.set_level("ERROR")
+ return ErrorCapture()
+
+ def test_cleans_files_and_dirs_but_keeps_tmp_path(self, patch_main_config):
+ tmp_path, main_mod = patch_main_config
+ (tmp_path / "a.txt").write_text("hello")
+ sub = tmp_path / "sub"
+ sub.mkdir()
+ (sub / "b.txt").write_text("world")
+
+ main_mod.clean_mindsdb_tmp_dir()
+
+ assert tmp_path.exists(), "tmp_path itself should not be deleted"
+ assert list(tmp_path.iterdir()) == [], "All content should be removed"
+
+ def test_empty_directory(self, patch_main_config):
+ tmp_path, main_mod = patch_main_config
+ main_mod.clean_mindsdb_tmp_dir()
+ assert tmp_path.exists()
+ assert list(tmp_path.iterdir()) == []
+
+ def test_deeply_nested_directories(self, patch_main_config):
+ tmp_path, main_mod = patch_main_config
+ deep = tmp_path / "a" / "b" / "c" / "d"
+ deep.mkdir(parents=True)
+ (deep / "file.txt").write_text("deep")
+
+ main_mod.clean_mindsdb_tmp_dir()
+
+ assert tmp_path.exists()
+ assert not (tmp_path / "a").exists()
+
+ def test_symlinks_are_handled(self, patch_main_config):
+ tmp_path, main_mod = patch_main_config
+
+ external_file = tmp_path.parent / "external.txt"
+ external_file.write_text("external")
+
+ (tmp_path / "link_to_external").symlink_to(external_file)
+
+ main_mod.clean_mindsdb_tmp_dir()
+
+ assert tmp_path.exists()
+ assert list(tmp_path.iterdir()) == []
+ assert external_file.exists()
+
+ external_file.unlink()
+
+ def test_unlink_failure_continues_and_logs(self, patch_main_config, errors):
+ tmp_path, main_mod = patch_main_config
+ (tmp_path / "ok1.txt").write_text("a")
+ (tmp_path / "failing_file.txt").write_text("b")
+ (tmp_path / "ok2.txt").write_text("c")
+
+ original_unlink = pathlib.Path.unlink
+
+ def mock_unlink(self, *args, **kwargs):
+ if self.name == "failing_file.txt":
+ raise PermissionError("Cannot delete file")
+ return original_unlink(self, *args, **kwargs)
+
+ with patch.object(pathlib.Path, "unlink", mock_unlink):
+ main_mod.clean_mindsdb_tmp_dir()
+
+ txt = errors.text
+ assert "Failed to clean" in txt
+ assert "Cannot delete file" in txt
+
+ assert not (tmp_path / "ok1.txt").exists()
+ assert not (tmp_path / "ok2.txt").exists()
+ assert (tmp_path / "failing_file.txt").exists()
+
+ def test_rmtree_failure_continues_and_logs(self, patch_main_config, errors):
+ tmp_path, main_mod = patch_main_config
+
+ (tmp_path / "file.txt").write_text("content")
+ (tmp_path / "failing_dir").mkdir()
+ (tmp_path / "another_file.txt").write_text("more content")
+ (tmp_path / "good_dir").mkdir()
+
+ original_rmtree = shutil.rmtree
+
+ def mock_rmtree(path, *args, **kwargs):
+ if "failing_dir" in str(path):
+ raise PermissionError("Cannot delete directory")
+ return original_rmtree(path, *args, **kwargs)
+
+ with patch("shutil.rmtree", mock_rmtree):
+ main_mod.clean_mindsdb_tmp_dir()
+
+ txt = errors.text
+ assert "Failed to clean" in txt
+ assert "Cannot delete directory" in txt
+
+ assert not (tmp_path / "file.txt").exists()
+ assert not (tmp_path / "another_file.txt").exists()
+ assert not (tmp_path / "good_dir").exists()
+ assert (tmp_path / "failing_dir").exists()
+
+ def test_mixed_failures_continue_cleanup(self, patch_main_config, errors):
+ tmp_path, main_mod = patch_main_config
+
+ (tmp_path / "good_file1.txt").write_text("a")
+ (tmp_path / "failing_file.txt").write_text("b")
+ (tmp_path / "good_file2.txt").write_text("c")
+ (tmp_path / "failing_dir").mkdir()
+ (tmp_path / "good_dir").mkdir()
+
+ original_unlink = pathlib.Path.unlink
+ original_rmtree = shutil.rmtree
+
+ def mock_unlink(self, *args, **kwargs):
+ if self.name == "failing_file.txt":
+ raise PermissionError("Cannot delete file")
+ return original_unlink(self, *args, **kwargs)
+
+ def mock_rmtree(path, *args, **kwargs):
+ if "failing_dir" in str(path):
+ raise PermissionError("Cannot delete directory")
+ return original_rmtree(path, *args, **kwargs)
+
+ with patch.object(pathlib.Path, "unlink", mock_unlink), patch("shutil.rmtree", mock_rmtree):
+ main_mod.clean_mindsdb_tmp_dir()
+
+ txt = errors.text
+ # We should have at least two "Failed to clean" lines (file + dir)
+ assert txt.count("Failed to clean") >= 2
+
+ assert not (tmp_path / "good_file1.txt").exists()
+ assert not (tmp_path / "good_file2.txt").exists()
+ assert not (tmp_path / "good_dir").exists()
+ assert (tmp_path / "failing_file.txt").exists()
+ assert (tmp_path / "failing_dir").exists()
+
+ def test_nonexistent_tmp_path(self, monkeypatch):
+ import mindsdb.__main__ as main_mod
+ from pathlib import Path
+
+ nonexistent = Path("/tmp/nonexistent_mindsdb_test_dir_12345")
+ assert not nonexistent.exists()
+
+ monkeypatch.setattr(main_mod, "config", {"paths": {"tmp": nonexistent}})
+ main_mod.clean_mindsdb_tmp_dir()
+ assert not nonexistent.exists()
+
+ def test_logger_called_with_correct_level(self, patch_main_config):
+ tmp_path, main_mod = patch_main_config
+ (tmp_path / "failing_file.txt").write_text("content")
+
+ original_unlink = pathlib.Path.unlink
+
+ def mock_unlink(self, *args, **kwargs):
+ if self.name == "failing_file.txt":
+ raise PermissionError("Test error")
+ return original_unlink(self, *args, **kwargs)
+
+ with patch.object(pathlib.Path, "unlink", mock_unlink), patch("mindsdb.__main__.logger") as mock_logger:
+ main_mod.clean_mindsdb_tmp_dir()
+ assert mock_logger.error.called or mock_logger.exception.called
diff --git a/tests/unit/various/test_rag_config_loader.py b/tests/unit/various/test_rag_config_loader.py
deleted file mode 100644
index fc555a553ff..00000000000
--- a/tests/unit/various/test_rag_config_loader.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from unittest.mock import Mock
-from mindsdb.integrations.utilities.rag.settings import (
- RetrieverType,
- MultiVectorRetrieverMode,
- SearchType,
- RAGPipelineModel,
-)
-from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
-
-
-def test_load_rag_config_empty():
- """Test loading RAG config with empty parameters"""
- config = load_rag_config({})
- assert isinstance(config, RAGPipelineModel)
-
-
-def test_load_rag_config_basic():
- """Test loading RAG config with basic parameters"""
- base_config = {"retriever_type": RetrieverType.VECTOR_STORE.value, "search_type": SearchType.SIMILARITY.value}
- config = load_rag_config(base_config)
-
- assert isinstance(config, RAGPipelineModel)
- assert config.retriever_type == RetrieverType.VECTOR_STORE
- assert config.search_type == SearchType.SIMILARITY
-
-
-def test_load_rag_config_with_search_kwargs():
- """Test loading RAG config with search kwargs"""
- base_config = {
- "retriever_type": RetrieverType.VECTOR_STORE.value,
- "search_type": SearchType.SIMILARITY.value,
- "search_kwargs": {"k": 5},
- }
- config = load_rag_config(base_config)
-
- assert isinstance(config, RAGPipelineModel)
- assert config.search_kwargs.k == 5
-
-
-def test_load_rag_config_with_embedding_model():
- """Test loading RAG config with embedding model"""
- base_config = {"retriever_type": RetrieverType.VECTOR_STORE.value, "search_type": SearchType.SIMILARITY.value}
-
- # Create a mock that's a subclass of Embeddings
- class MockEmbeddings:
- def embed_documents(self, texts):
- return [[0.0] * 10] * len(texts)
-
- def embed_query(self, text):
- return [0.0] * 10
-
- embedding_model = MockEmbeddings()
- config = load_rag_config(base_config, embedding_model=embedding_model)
-
- assert isinstance(config, RAGPipelineModel)
- assert config.embedding_model == embedding_model
-
-
-def test_load_rag_config_with_multi_vector_mode():
- """Test loading RAG config with multi vector mode"""
- base_config = {
- "retriever_type": RetrieverType.VECTOR_STORE.value,
- "search_type": SearchType.SIMILARITY.value,
- "multi_retriever_mode": MultiVectorRetrieverMode.SPLIT.value, # Use correct enum value
- }
- config = load_rag_config(base_config)
-
- assert isinstance(config, RAGPipelineModel)
- assert config.retriever_type == RetrieverType.VECTOR_STORE
- assert config.search_type == SearchType.SIMILARITY
- assert config.multi_retriever_mode == MultiVectorRetrieverMode.SPLIT
-
-
-def test_load_rag_config_with_kb_params():
- """Test loading RAG config with knowledge base parameters"""
- base_config = {"retriever_type": RetrieverType.VECTOR_STORE.value, "search_type": SearchType.SIMILARITY.value}
- kb_params = {"search_kwargs": {"k": 5}}
- config = load_rag_config(base_config, kb_params)
-
- assert isinstance(config, RAGPipelineModel)
- assert config.search_kwargs.k == 5
-
-
-def test_load_rag_config_with_vector_store_config():
- """Test loading RAG config with vector store config"""
- base_config = {"retriever_type": RetrieverType.VECTOR_STORE.value, "search_type": SearchType.SIMILARITY.value}
- kb_params = {"vector_store_config": {"kb_table": Mock()}}
- config = load_rag_config(base_config, kb_params)
-
- assert isinstance(config, RAGPipelineModel)
- assert config.vector_store_config.kb_table == kb_params["vector_store_config"]["kb_table"]
-
-
-def test_load_rag_config_from_knowledge_base():
- """Test RAG config loading in knowledge base context"""
- base_config = {
- "retriever_type": RetrieverType.VECTOR_STORE.value,
- "search_type": SearchType.SIMILARITY.value,
- "search_kwargs": {"k": 5},
- }
- kb_params = {"vector_store_config": {"kb_table": Mock()}}
- config = load_rag_config(base_config, kb_params)
-
- assert isinstance(config, RAGPipelineModel)
- assert config.retriever_type == RetrieverType.VECTOR_STORE
- assert config.search_type == SearchType.SIMILARITY
- assert config.search_kwargs.k == 5
- assert config.vector_store_config.kb_table == kb_params["vector_store_config"]["kb_table"]
diff --git a/tests/unit/various/test_retrieval_tool.py b/tests/unit/various/test_retrieval_tool.py
deleted file mode 100644
index 93f4b3c3296..00000000000
--- a/tests/unit/various/test_retrieval_tool.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import pytest
-from unittest.mock import Mock
-from mindsdb.integrations.utilities.rag.settings import (
- RetrieverType,
- MultiVectorRetrieverMode,
- VectorStoreConfig,
- DEFAULT_LLM_MODEL,
- DEFAULT_TEST_TABLE_NAME,
- DEFAULT_CHUNK_SIZE,
-)
-from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
-
-
-@pytest.fixture
-def mock_tools_config():
- return {
- "retriever_type": "vector_store",
- "multi_retriever_mode": "both",
- "embedding_model": Mock(),
- "documents": [Mock()],
- "vector_store_config": {"vector_store_type": "chromadb", "collection_name": "test"},
- "invalid_param": "should_be_filtered_out",
- }
-
-
-def test_rag_params_conversion():
- """Test that parameters are correctly converted to RAGPipelineModel"""
- tools_config = {
- "retriever_type": "vector_store",
- "multi_retriever_mode": "both",
- }
- rag_config = load_rag_config(tools_config)
- assert rag_config.retriever_type == RetrieverType.VECTOR_STORE
- assert rag_config.multi_retriever_mode == MultiVectorRetrieverMode.BOTH
-
-
-def test_invalid_params():
- """Test that invalid enum values raise appropriate errors"""
- tools_config = {
- "retriever_type": "invalid_type",
- }
- with pytest.raises(ValueError):
- load_rag_config(tools_config)
-
- tools_config = {"invalid_param": "invalid_type"}
- with pytest.raises(ValueError):
- load_rag_config(tools_config)
-
-
-def test_vector_store_config_conversion():
- """Test that vector store config is properly handled"""
- tools_config = {"vector_store_config": {"vector_store_type": "chromadb", "collection_name": "test"}}
- rag_config = load_rag_config(tools_config)
- assert isinstance(rag_config.vector_store_config, VectorStoreConfig)
- assert rag_config.vector_store_config.collection_name == "test"
-
-
-def test_default_values():
- """Test that default values are properly set"""
- tools_config = {}
- rag_config = load_rag_config(tools_config)
- # Test default enum values
- assert rag_config.retriever_type == RetrieverType.VECTOR_STORE
- assert rag_config.multi_retriever_mode == MultiVectorRetrieverMode.BOTH
- # Test other default values
- assert rag_config.llm_model_name == DEFAULT_LLM_MODEL
- assert rag_config.table_name == DEFAULT_TEST_TABLE_NAME
- assert rag_config.chunk_size == DEFAULT_CHUNK_SIZE
- assert isinstance(rag_config.vector_store_config, VectorStoreConfig)
-
-
-@pytest.mark.parametrize(
- "field,value,expected",
- [
- ("retriever_type", "auto", RetrieverType.AUTO),
- ("multi_retriever_mode", "split", MultiVectorRetrieverMode.SPLIT),
- ("chunk_size", 500, 500),
- ],
-)
-def test_field_assignments(field, value, expected):
- """Test various field assignments"""
- tools_config = {field: value}
- rag_config = load_rag_config(tools_config)
- assert getattr(rag_config, field) == expected
diff --git a/tests/unused/integration/a2a/__init__.py b/tests/unused/integration/a2a/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/unused/integration/a2a/test_a2a_streaming.py b/tests/unused/integration/a2a/test_a2a_streaming.py
deleted file mode 100644
index 71032a7e679..00000000000
--- a/tests/unused/integration/a2a/test_a2a_streaming.py
+++ /dev/null
@@ -1,436 +0,0 @@
-#!/usr/bin/env python
-import sys
-import json
-import time
-import requests
-import uuid
-import argparse
-import pytest
-import os
-from typing import Dict, List, Set
-
-# Default test configuration
-DEFAULT_HOST = "0.0.0.0"
-DEFAULT_PORT = 10002
-DEFAULT_TIMEOUT = 120 # seconds
-DEFAULT_QUERY = "What's the average price for a one bed?"
-DEFAULT_AGENT_NAME = "my_agent" # Default agent name to use for requests
-
-"""
-Run instructions:
-
-start A2A server and mindsdb
-
-You should have an agent created in MindsDB (this script does NOT create the agent)
-
-By default the agent is named "my_agent"
-
-e.g.
-
-CREATE AGENT my_agent
-USING
-
- model='gemini-2.0-flash',
- include_knowledge_bases=['mindsdb.kb_test'],
- include_tables=['postgresql_conn.home_rentals', 'postgresql_conn2.car_info'],
- prompt_template='
- mindsdb.kb_test knowledge base has info about cities
- postgresql_conn.home_rentals database tables has rental data
- postgresql_conn2.car_info contains info on cars specs
- ';
-
-source .venv/bin/activate
-python test_a2a_streaming.py --host 0.0.0.0 --port 10002 --timeout 180 "What's the average price of homes in berkeley_hills?"
-"""
-
-
-def generate_uuid() -> str:
- """Generate a random UUID string."""
- return str(uuid.uuid4()).replace("-", "")
-
-
-def stream_a2a_query(
- query: str,
- host: str = DEFAULT_HOST,
- port: int = DEFAULT_PORT,
- timeout: int = DEFAULT_TIMEOUT,
- verbose: bool = False,
- agent_name: str = DEFAULT_AGENT_NAME, # Added agent_name parameter with default
-):
- """
- Stream a query to the A2A server and yield the responses incrementally.
-
- Args:
- query: The text query to send to the agent
- host: A2A server host
- port: A2A server port
- timeout: Maximum time to wait for responses (seconds)
- verbose: Whether to print responses to stdout
- agent_name: Name of the agent to use (REQUIRED - the A2A API requires an explicit agent name)
-
- Yields:
- Dict: Response messages from the A2A server
- """
- # Generate unique IDs for the request
- task_id = generate_uuid()
- session_id = generate_uuid()
- request_id = generate_uuid()
-
- # Prepare the request payload
- payload = {
- "jsonrpc": "2.0",
- "id": request_id,
- "method": "tasks/sendSubscribe",
- "params": {
- "id": task_id,
- "sessionId": session_id,
- "message": {
- "role": "user",
- "parts": [
- {"type": "text", "text": query},
- ],
- "metadata": {"agent_name": agent_name}, # Using the provided agent_name
- },
- "acceptedOutputModes": ["text/plain"],
- },
- }
-
- url = f"http://{host}:{port}/a2a"
- if verbose:
- print(f"Sending streaming request to {url}")
- print(f"Query: {query}")
- print("Streaming responses:")
- print("-" * 60)
-
- # Set up headers for SSE
- headers = {
- "Content-Type": "application/json",
- "Accept": "text/event-stream",
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- }
-
- # Track seen messages to avoid duplicates
- seen_messages: Set[str] = set()
- all_responses: List[Dict] = []
- start_time = time.time()
-
- try:
- # Make the streaming request
- with requests.post(url, json=payload, headers=headers, stream=True) as response:
- if not response.ok:
- error_msg = f"Error: HTTP {response.status_code} - {response.text}"
- if verbose:
- print(error_msg)
- yield {"error": error_msg}
- return
-
- # Process the SSE stream
- buffer = ""
- for chunk in response.iter_content(chunk_size=1):
- # Check timeout
- if time.time() - start_time > timeout:
- error_msg = f"Timeout after {timeout} seconds"
- if verbose:
- print(error_msg)
- yield {"error": error_msg, "timeout": True}
- return
-
- if not chunk:
- continue
-
- # Decode the chunk and add to buffer
- buffer += chunk.decode("utf-8")
-
- # Process complete lines
- while "\n" in buffer:
- line, buffer = buffer.split("\n", 1)
- line = line.rstrip()
-
- # Skip empty lines
- if not line:
- continue
-
- # Process data lines
- if line.startswith("data:"):
- data_str = line[5:].strip()
- if not data_str:
- continue
-
- try:
- # Parse the JSON data
- data = json.loads(data_str)
-
- # Extract and display content
- if "result" in data:
- result = data["result"]
-
- # Handle status updates
- if "status" in result:
- message = result["status"].get("message", {})
- parts = message.get("parts", [])
-
- for part in parts:
- # Get content and metadata
- content = part.get("text", "")
- metadata = part.get("metadata", {})
- thought_type = metadata.get("thought_type", "")
-
- # Create a unique key for deduplication
- message_key = f"{thought_type}:{content}"
-
- # Skip if we've seen this message before
- if message_key in seen_messages:
- continue
-
- # Add to seen messages
- seen_messages.add(message_key)
-
- # Create response object
- response_obj = {
- "type": thought_type
- or part.get("type", "text"),
- "content": content,
- "metadata": metadata,
- }
-
- # Display based on thought type
- if verbose:
- if thought_type == "thought":
- print(f"Thought: {content}")
- elif thought_type == "observation":
- print(f"Observation: {content}")
- elif thought_type == "sql":
- print(f"SQL Query: {content}")
- elif part.get("type") == "text":
- print(content)
- sys.stdout.flush()
-
- # Yield the response
- yield response_obj
- all_responses.append(response_obj)
-
- # Handle artifact updates
- if "artifact" in result:
- artifact = result["artifact"]
- parts = artifact.get("parts", [])
-
- for part in parts:
- content = part.get("text", "")
- if content:
- response_obj = {
- "type": "answer",
- "content": content,
- }
- if verbose:
- print(content)
- sys.stdout.flush()
- yield response_obj
- all_responses.append(response_obj)
-
- # Handle completion
- if result.get("final"):
- response_obj = {"type": "completion", "final": True}
- if verbose:
- print("\n[Completed]")
- sys.stdout.flush()
- yield response_obj
- all_responses.append(response_obj)
-
- # Handle errors
- elif "error" in data:
- error_msg = data["error"].get("message", "")
- response_obj = {"error": error_msg}
- if verbose:
- print(f"Error: {error_msg}")
- sys.stdout.flush()
- yield response_obj
- all_responses.append(response_obj)
-
- except json.JSONDecodeError:
- if verbose:
- print(f"Warning: Invalid JSON: {data_str[:50]}...")
- sys.stdout.flush()
-
- # Process event end
- elif line == "":
- # Event boundary - process the event
- pass
-
- except KeyboardInterrupt:
- if verbose:
- print("\nInterrupted by user")
- yield {"error": "Interrupted by user"}
- except Exception as e:
- error_msg = f"Error: {str(e)}"
- if verbose:
- print(error_msg)
- yield {"error": error_msg}
-
- # Return all collected responses
- return all_responses
-
-
-def run_manual_test():
- """Run a manual test with command line arguments."""
- parser = argparse.ArgumentParser(
- description="Test A2A streaming with direct requests"
- )
- parser.add_argument("--host", default=DEFAULT_HOST, help="A2A server host")
- parser.add_argument(
- "--port", type=int, default=DEFAULT_PORT, help="A2A server port"
- )
- parser.add_argument(
- "--timeout", type=int, default=DEFAULT_TIMEOUT, help="Timeout in seconds"
- )
- parser.add_argument(
- "--agent-name", default=DEFAULT_AGENT_NAME, help="Name of the agent to use"
- )
- parser.add_argument(
- "query", nargs="?", default=DEFAULT_QUERY, help="Query to send to the agent"
- )
-
- args = parser.parse_args()
-
- # Run with verbose output for manual testing
- for _ in stream_a2a_query(
- args.query,
- args.host,
- args.port,
- args.timeout,
- verbose=True,
- agent_name=args.agent_name,
- ):
- pass # Just consume the generator to display output
-
-
-@pytest.mark.integration
-def test_a2a_streaming_integration():
- """
- Integration test for A2A streaming functionality.
-
- This test requires a running A2A server. It can be configured with environment variables:
- - A2A_TEST_HOST: A2A server host (default: 0.0.0.0)
- - A2A_TEST_PORT: A2A server port (default: 10002)
- - A2A_TEST_QUERY: Query to send (default: "What's the average price for a one bed?")
- - A2A_TEST_TIMEOUT: Timeout in seconds (default: 120)
- - A2A_TEST_AGENT_NAME: Agent name to use (default: my_agent)
-
- IMPORTANT: You must create the agent in MindsDB before running this test.
- """
- # Get configuration from environment variables
- host = os.environ.get("A2A_TEST_HOST", DEFAULT_HOST)
- port = int(os.environ.get("A2A_TEST_PORT", DEFAULT_PORT))
- query = os.environ.get("A2A_TEST_QUERY", DEFAULT_QUERY)
- timeout = int(os.environ.get("A2A_TEST_TIMEOUT", DEFAULT_TIMEOUT))
- agent_name = os.environ.get("A2A_TEST_AGENT_NAME", DEFAULT_AGENT_NAME)
-
- # Check if we should skip the test
- skip_test = os.environ.get("SKIP_A2A_TEST", "false").lower() == "true"
- if skip_test:
- pytest.skip(
- "Skipping A2A test as requested by SKIP_A2A_TEST environment variable"
- )
-
- # First, check if the agent exists by making a simple request
- try:
- # Make a simple request to check if the agent exists
- url = f"http://{host}:{port}/a2a"
- check_payload = {
- "jsonrpc": "2.0",
- "id": generate_uuid(),
- "method": "tasks/send",
- "params": {
- "id": generate_uuid(),
- "sessionId": generate_uuid(),
- "message": {
- "role": "user",
- "parts": [{"type": "text", "text": "test"}],
- "metadata": {"agent_name": agent_name},
- },
- },
- }
-
- response = requests.post(url, json=check_payload, timeout=10)
- if response.status_code == 404 or "not found" in response.text.lower():
- pytest.skip(
- f"Agent '{agent_name}' not found. Please create the agent before running this test."
- )
- except Exception as e:
- pytest.skip(f"Error checking if agent exists: {str(e)}")
-
- # Collect all responses
- responses = list(
- stream_a2a_query(
- query, host, port, timeout, verbose=False, agent_name=agent_name
- )
- )
-
- # Basic assertions
- assert len(responses) > 0, "No responses received from A2A server"
-
- # Check for error responses, but be more tolerant of task tracking errors and validation errors
- errors = [r for r in responses if "error" in r]
-
- # Identify different types of non-critical errors
- task_tracking_errors = [
- e
- for e in errors
- if "Task" in e.get("error", "") and "not found" in e.get("error", "")
- ]
- validation_errors = [e for e in errors if "validation error" in e.get("error", "")]
- non_critical_errors = task_tracking_errors + validation_errors
-
- # If all errors are non-critical, we can consider the test passed
- if len(errors) > 0 and len(non_critical_errors) == len(errors):
- print(f"Ignoring non-critical errors: {non_critical_errors}")
- else:
- # If there are other types of errors, fail the test
- real_errors = [e for e in errors if e not in non_critical_errors]
- assert len(real_errors) == 0, f"Errors in responses: {real_errors}"
-
- # Print response types for debugging
- response_types = set(r.get("type", "") for r in responses)
- print(f"Response types found: {response_types}")
-
- # Verify we have different types of responses (thoughts, observations, etc.)
- assert len(response_types) > 1, f"Only found response types: {response_types}"
-
- # Look for any kind of final/completion message
- # More flexible approach - check for any of these indicators
- final_messages = [
- r
- for r in responses
- if (
- # Original strict check
- (r.get("type") == "completion" and r.get("final"))
- or r.get("final") is True
- or r.get("type") == "answer"
- or (
- r.get("type") == "text"
- and r.get("content", "").strip()
- and len(r.get("content", "")) > 20
- )
- )
- ]
-
- print(f"Found {len(final_messages)} potential final/completion messages")
- if len(final_messages) == 0:
- # Print the last few responses for debugging
- print("Last 5 responses for debugging:")
- for r in responses[-5:]:
- print(f" {r}")
-
- # More lenient assertion - just check if we got any responses at all
- assert len(responses) > 5, "Not enough responses received"
-
- # Skip the completion check for now as the format may vary
- # assert len(final_messages) > 0, "No completion or final message received"
-
- print(f"β Integration test passed with {len(responses)} responses")
- return responses
-
-
-if __name__ == "__main__":
- # If run directly, use the manual test mode
- run_manual_test()
diff --git a/tests/unused/integration/knowledge_bases/data/seed.sql b/tests/unused/integration/knowledge_bases/data/seed.sql
deleted file mode 100644
index 69fe7847fc2..00000000000
--- a/tests/unused/integration/knowledge_bases/data/seed.sql
+++ /dev/null
@@ -1,14 +0,0 @@
--- Make sure pgvector extension is enabled
-DROP EXTENSION IF EXISTS vector;
-CREATE EXTENSION vector;
-
--- Create the table
-CREATE TABLE IF NOT EXISTS items (
- id text PRIMARY KEY,
- content text NOT NULL,
- embeddings vector NOT NULL,
- metadata jsonb
-);
-
--- Dummy data will be handled in tests themselves. Leave it empty.
-COMMIT;
diff --git a/tests/unused/integration/knowledge_bases/mindsdb_langchain_pgvector_integration_test.py b/tests/unused/integration/knowledge_bases/mindsdb_langchain_pgvector_integration_test.py
deleted file mode 100644
index 3a16dac44d0..00000000000
--- a/tests/unused/integration/knowledge_bases/mindsdb_langchain_pgvector_integration_test.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.pgvector import PGVectorMDB
-from mindsdb.integrations.handlers.langchain_embedding_handler.fastapi_embeddings import FastAPIEmbeddings
-
-
-def setup_pgvector_database():
- """Setup pgvector database"""
- # Using port 15432 to avoid conflicts with local PostgreSQL
- connection_string = "postgresql://gateway:gateway@localhost:15432/gateway"
-
- print(f"Connecting to: {connection_string}")
-
- # Initialize FastAPI embeddings
- embeddings = FastAPIEmbeddings(
- api_base="http://localhost:8043/v1/embeddings",
- model="sparse_model"
- )
-
- # Initialize PGVectorMDB
- vector_db = PGVectorMDB(
- connection_string=connection_string,
- collection_name="test_dev_doc_vectors",
- embedding_function=embeddings,
- is_sparse=True, # Using sparse vectors
- vector_size=30522 # Size for sparse vectors
- )
-
- return vector_db
-
-
-def test_vector_queries(vector_db):
- """Test various vector queries"""
- print("\nTesting vector queries...")
-
- # Test text to be embedded
- test_text = "For the Bsecondaryl containment"
-
- # Get embeddings for the test text
- embedding = vector_db.embedding_function.embed_query(test_text)
-
- # Query similar vectors
- results = vector_db._query_collection(
- embedding=embedding,
- k=5
- )
-
- print("\nVector similarity search results:")
- for item, distance in results:
- print(f"Content: {item.content}")
- print(f"Metadata: {item.metadata}")
- print(f"Distance: {distance}")
- print("---")
-
-
-def main():
- # Setup vector database
- print("\nSetting up pgvector database...")
- vector_db = setup_pgvector_database()
-
- # Run tests
- test_vector_queries(vector_db)
-
-
-if __name__ == "__main__":
- main()
diff --git a/tests/unused/integration/knowledge_bases/test_knowledge_bases.py b/tests/unused/integration/knowledge_bases/test_knowledge_bases.py
deleted file mode 100644
index a43c1a77ce5..00000000000
--- a/tests/unused/integration/knowledge_bases/test_knowledge_bases.py
+++ /dev/null
@@ -1,337 +0,0 @@
-from http import HTTPStatus
-from tempfile import TemporaryDirectory
-from time import perf_counter, sleep
-from uuid import uuid4
-
-import os
-import psycopg2
-import pytest
-
-from mindsdb.api.http.initialize import initialize_app
-from mindsdb.migrations import migrate
-from mindsdb.interfaces.storage import db
-from mindsdb.utilities.config import config
-
-# Should match table name in data/seed.sql
-TEST_TABLE_NAME = 'items'
-# Should match column names in data/seed.sql
-COLUMN_NAMES = ['id', 'content', 'embeddings', 'metadata']
-
-CONNECTION_KWARGS = {
- 'connection_data': {
- 'host': os.environ.get('MDB_TEST_PGVECTOR_HOST', '127.0.0.1'),
- 'port': os.environ.get('MDB_TEST_PGVECTOR_PORT', '5432'),
- 'user': os.environ.get('MDB_TEST_PGVECTOR_USER', 'postgres'),
- 'password': os.environ.get('MDB_TEST_PGVECTOR_PASSWORD', 'supersecret'),
- 'database': None # Different for each test.
- }
-}
-
-MODEL_WAIT_DURATION_SECONDS = 5
-MODEL_WAIT_SLEEP_INTERVAL_SECONDS = 0.2
-
-
-@pytest.fixture(scope="session", autouse=True)
-def app():
- old_minds_db_con = ''
- if 'MINDSDB_DB_CON' in os.environ:
- old_minds_db_con = os.environ['MINDSDB_DB_CON']
- with TemporaryDirectory(prefix='knowledge_bases_integration_test_') as temp_dir:
- db_path = 'sqlite:///' + os.path.join(temp_dir, 'mindsdb.sqlite3.db')
- # Need to change env variable for migrate module, since it calls db.init().
- os.environ['MINDSDB_DB_CON'] = db_path
- db.init()
- migrate.migrate_to_head()
- config["gui"]["open_on_start"] = False
- config["gui"]["autoupdate"] = False
- app = initialize_app()
-
- test_client = app.test_client()
-
- # Create langchain embedding model to use in all tests.
- create_ml_engine_query = 'CREATE ML_ENGINE langchain_embedding FROM langchain_embedding;'
- create_ml_engine_data = {
- 'query': create_ml_engine_query
- }
- response = test_client.post('/api/sql/query', json=create_ml_engine_data, follow_redirects=True)
- assert '200' in response.status
-
- # Create model to use in all tests. Use OpenAI for embeddings.
- create_query = '''
- CREATE MODEL mindsdb.test_embedding_model
- PREDICT embeddings
- USING
- engine='langchain_embedding',
- class = 'OpenAIEmbeddings',
- input_columns = ['content'];
- '''
- train_data = {
- 'query': create_query
- }
- response = test_client.post('/api/projects/mindsdb/models', json=train_data, follow_redirects=True)
- assert '201' in response.status
-
- # Wait for model to complete.
- model_complete = False
- model_start_time = perf_counter()
- while not model_complete:
- if (perf_counter() - model_start_time) > MODEL_WAIT_DURATION_SECONDS:
- pytest.fail('Model test_embedding_model did not finish training in time')
- response = test_client.get('/api/projects/mindsdb/models/test_embedding_model')
- model_status = response.get_json().get('status', 'error')
- if model_status == 'complete':
- model_complete = True
- continue
- if model_status == 'error':
- pytest.fail('Model test_embedding_model encountered an error while training')
- sleep(MODEL_WAIT_SLEEP_INTERVAL_SECONDS)
- yield app
- os.environ['MINDSDB_DB_CON'] = old_minds_db_con
-
-
-@pytest.fixture()
-def client(app):
- return app.test_client()
-
-
-def init_db():
- '''Initialize a new DB for every test.'''
- conn_info = CONNECTION_KWARGS['connection_data'].copy()
- conn_info['database'] = 'postgres'
- db = psycopg2.connect(**conn_info)
- db.autocommit = True
- cursor = db.cursor()
-
- try:
- new_db_name = f'test_pgvector_{uuid4().hex}'
- # Create the test database if it does not exist.
- cursor.execute(f'DROP DATABASE IF EXISTS {new_db_name}')
- db.commit()
- cursor.execute(f'CREATE DATABASE {new_db_name};')
- db.commit()
-
- # Reconnect to the new database
- conn_info['database'] = new_db_name
- db = psycopg2.connect(**conn_info)
- db.autocommit = True
- cursor = db.cursor()
-
- # Seed the database with data
- curr_dir = os.path.dirname(os.path.realpath(__file__))
- seed_sql_path = os.path.join(curr_dir, 'data', 'seed.sql')
- with open(seed_sql_path, 'r') as sql_seed_file:
- cursor.execute(sql_seed_file.read())
- db.commit()
-
- finally:
- # Close the cursor and the connection
- cursor.close()
- db.close()
-
- return new_db_name
-
-
-@pytest.fixture(autouse=True)
-def pgvector_database_name(client):
- # Initialize a fresh DB for each test.
- new_db_name = init_db()
- # Connect new DB to MindsDB.
- conn_info = CONNECTION_KWARGS['connection_data'].copy()
- conn_info['database'] = new_db_name
- example_db_data = {
- 'database': {
- 'name': new_db_name,
- 'engine': 'pgvector',
- 'parameters': conn_info
- }
- }
- response = client.post('/api/databases', json=example_db_data, follow_redirects=True)
- assert '201' in response.status
- return new_db_name
-
-
-@pytest.mark.skipif(os.environ.get('MDB_TEST_PGVECTOR_HOST') is None, reason='MDB_TEST_PGVECTOR_HOST environment variable not set')
-@pytest.mark.skipif(os.environ.get('OPENAI_API_KEY') is None, reason='OPENAI_API_KEY environment variable not set')
-class TestKnowledgeBaseCompletion:
- def test_chat_completion(self, client, pgvector_database_name):
- test_kb_name = 'test_chat_completion_kb'
- create_request = {
- 'knowledge_base': {
- 'name': test_kb_name,
- 'model': 'test_embedding_model',
- 'storage': {
- 'table': TEST_TABLE_NAME,
- 'database': pgvector_database_name
- }
- }
- }
- create_kb_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_kb_response.status_code == HTTPStatus.CREATED
-
- # Insert documents to help answer the question.
- rows_to_insert = [
- {'content': 'The capital of Tyler Fantasy RAG Land is MindsDB'}
- ]
- update_request = {
- 'knowledge_base': {
- 'rows': rows_to_insert
- }
- }
- update_kb_response = client.put(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}',
- json=update_request, follow_redirects=True)
- assert update_kb_response.status_code == HTTPStatus.OK
-
- completion_request = {
- 'query': 'What is the capital of Tyler Fantasy RAG Land?',
- 'llm_model': 'gpt-4o'
- }
- response = client.post(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}/completions',
- json=completion_request, follow_redirects=True)
- assert response.status_code == HTTPStatus.OK
- response_data = response.get_json()
- assert 'message' in response_data
- assert 'content' in response_data['message']
- assert 'context' in response_data['message']
- assert response_data['message']['role'] == 'assistant'
- # Should get the right answer.
- assert 'mindsdb' in response_data['message']['content'].lower()
-
- def test_context_completion_with_keywords(self, client, pgvector_database_name):
- test_kb_name = 'test_context_completion_with_keywords'
- create_request = {
- 'knowledge_base': {
- 'name': test_kb_name,
- 'model': 'test_embedding_model',
- 'storage': {
- 'table': TEST_TABLE_NAME,
- 'database': pgvector_database_name
- }
- }
- }
- create_kb_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_kb_response.status_code == HTTPStatus.CREATED
-
- # Insert documents for context.
- rows_to_insert = [
- {'content': 'The capital of Tyler Fantasy RAG Land is MindsDB'},
- {'content': 'The population of Tyler Fantasy RAG Land is 6'}
- ]
- update_request = {
- 'knowledge_base': {
- 'rows': rows_to_insert
- }
- }
- update_kb_response = client.put(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}',
- json=update_request, follow_redirects=True)
- assert update_kb_response.status_code == HTTPStatus.OK
-
- completion_request = {
- 'query': 'Population of rag land',
- 'keywords': 'population rag land',
- 'type': 'context'
- }
- response = client.post(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}/completions',
- json=completion_request, follow_redirects=True)
- assert response.status_code == HTTPStatus.OK
- response_data = response.get_json()
- # Should have the most relevant document first.
- assert 'documents' in response_data
- assert len(response_data['documents']) == 2
- assert 'population' in response_data['documents'][0]['content']
-
- def test_context_completion_with_metadata(self, client, pgvector_database_name):
- test_kb_name = 'test_context_completion_with_metadata'
- create_request = {
- 'knowledge_base': {
- 'name': test_kb_name,
- 'model': 'test_embedding_model',
- 'storage': {
- 'table': TEST_TABLE_NAME,
- 'database': pgvector_database_name
- }
- }
- }
- create_kb_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_kb_response.status_code == HTTPStatus.CREATED
-
- # Insert documents for context.
- rows_to_insert = [
- {'content': 'The capital of Tyler Fantasy RAG Land is MindsDB', 'author': 'Danya'},
- {'content': 'The population of Tyler Fantasy RAG Land is 6', 'author': 'Tyler'},
- {'content': 'Totally unrelated', 'author': 'Tyler'}
-
- ]
- update_request = {
- 'knowledge_base': {
- 'rows': rows_to_insert
- }
- }
- update_kb_response = client.put(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}',
- json=update_request, follow_redirects=True)
- assert update_kb_response.status_code == HTTPStatus.OK
-
- completion_request = {
- 'query': 'Population of rag land',
- 'metadata': {
- 'author': 'Tyler'
- },
- 'type': 'context'
- }
- response = client.post(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}/completions',
- json=completion_request, follow_redirects=True)
- assert response.status_code == HTTPStatus.OK
- response_data = response.get_json()
- assert 'documents' in response_data
- # Only 2 have matching metadata.
- assert len(response_data['documents']) == 2
- # Should have the most relevant document first.
- assert 'population' in response_data['documents'][0]['content']
-
- def test_context_completion_with_keywords_and_metadata(self, client, pgvector_database_name):
- test_kb_name = 'test_context_completion_with_keywords_and_metadata'
- create_request = {
- 'knowledge_base': {
- 'name': test_kb_name,
- 'model': 'test_embedding_model',
- 'storage': {
- 'table': TEST_TABLE_NAME,
- 'database': pgvector_database_name
- }
- }
- }
- create_kb_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_kb_response.status_code == HTTPStatus.CREATED
-
- # Insert documents for context.
- rows_to_insert = [
- {'content': 'The capital of Tyler Fantasy RAG Land is MindsDB', 'author': 'Danya'},
- {'content': 'The population of Tyler Fantasy RAG Land is 6', 'author': 'Tyler'},
- {'content': 'Totally unrelated', 'author': 'Tyler'}
-
- ]
- update_request = {
- 'knowledge_base': {
- 'rows': rows_to_insert
- }
- }
- update_kb_response = client.put(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}',
- json=update_request, follow_redirects=True)
- assert update_kb_response.status_code == HTTPStatus.OK
-
- completion_request = {
- 'query': 'Population of rag land',
- 'metadata': {
- 'author': 'Tyler'
- },
- 'keywords': 'rag land population',
- 'type': 'context'
- }
- response = client.post(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}/completions',
- json=completion_request, follow_redirects=True)
- assert response.status_code == HTTPStatus.OK
- response_data = response.get_json()
- assert 'documents' in response_data
- # Only 2 have matching metadata.
- assert len(response_data['documents']) == 2
- # Should have the most relevant document first.
- assert 'population' in response_data['documents'][0]['content']
diff --git a/tests/unused/integration/metrics/test_metrics.py b/tests/unused/integration/metrics/test_metrics.py
deleted file mode 100644
index 970b592d834..00000000000
--- a/tests/unused/integration/metrics/test_metrics.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import datetime
-import os
-import pytest
-import requests
-import time
-from typing import Dict
-
-from tests.integration.utils.http_test_helpers import HTTPHelperMixin
-from tests.integration.conftest import HTTP_API_ROOT
-
-
-def _get_metrics():
- url = HTTP_API_ROOT.rstrip("api") + "metrics"
- print(f"Getting metrics from {url}")
- resp = requests.get(url).text
- print(f"Metrics response: {resp}")
- return resp
-
-
-def _wait_for_metric(name: str, labels: Dict[str, str], timeout: datetime.timedelta = None):
- if timeout is None:
- timeout = datetime.timedelta(seconds=30)
- start_time = datetime.datetime.now()
- while datetime.datetime.now() - start_time < timeout:
- metrics = _get_metrics()
- for metrics_line in metrics.split('\n'):
- if name not in metrics_line:
- continue
- # Check labels match metric.
- found = True
- for label_name, label_value in labels.items():
- if f'{label_name}="{label_value}"' not in metrics_line:
- found = False
- break
- if found:
- print(f"Found metric: {metrics_line}")
- metrics_value = metrics_line.split()[-1]
- return float(metrics_value)
- time.sleep(0.5)
- return -1
-
-
-class TestMetrics(HTTPHelperMixin):
- @pytest.mark.skipif(("localhost" in HTTP_API_ROOT or "127.0.0.1" in HTTP_API_ROOT) and os.getenv('PROMETHEUS_MULTIPROC_DIR') is None, reason="PROMETHEUS_MULTIPROC_DIR environment variable is not set")
- def test_http_metrics(self):
- # Make an HTTP request and check for updated metrics.
- api_metric_labels = {
- 'endpoint': '/util/ping_native',
- 'method': 'GET',
- 'status': '200'
- }
-
- before_metric = _wait_for_metric(
- 'mindsdb_rest_api_latency_seconds_count',
- api_metric_labels,
- )
- print(f"Before metric: {before_metric}")
- _ = self.api_request('get', '/util/ping_native')
- assert _wait_for_metric(
- 'mindsdb_rest_api_latency_seconds_count',
- api_metric_labels,
- ) == before_metric + 1
- # Check multiproc dir is populated.
- multiproc_dir = os.getenv('PROMETHEUS_MULTIPROC_DIR')
- # We can't check this dir if we're running against a remote env.
- if multiproc_dir is not None:
- assert os.path.isdir(multiproc_dir)
- assert len(os.listdir(os.getenv('PROMETHEUS_MULTIPROC_DIR'))) > 0
diff --git a/tests/unused/integration/rag/test_rag_search_kwargs.py b/tests/unused/integration/rag/test_rag_search_kwargs.py
deleted file mode 100644
index 7ee668790fd..00000000000
--- a/tests/unused/integration/rag/test_rag_search_kwargs.py
+++ /dev/null
@@ -1,271 +0,0 @@
-import os
-import uuid
-import pytest
-from unittest.mock import Mock, patch
-from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-from langchain_core.documents import Document
-from langchain.vectorstores.base import VectorStore
-import tempfile
-import shutil
-from langchain_community.vectorstores import Chroma
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-
-from mindsdb.integrations.utilities.rag.settings import (
- RAGPipelineModel,
- RetrieverType,
- SearchKwargs,
- SearchType,
- MultiVectorRetrieverMode,
- DEFAULT_LLM_MODEL,
- DEFAULT_LLM_ENDPOINT
-)
-from mindsdb.integrations.utilities.rag.pipelines.rag import LangChainRAGPipeline
-
-requires_openai = pytest.mark.skipif(
- not os.getenv("OPENAI_API_KEY"),
- reason="OPENAI_API_KEY environment variable not set"
-)
-
-
-@pytest.fixture
-def chat_llm():
- api_key = os.getenv("OPENAI_API_KEY")
- if not api_key:
- pytest.skip("OPENAI_API_KEY environment variable not set")
- return ChatOpenAI(
- model=DEFAULT_LLM_MODEL,
- openai_api_base=DEFAULT_LLM_ENDPOINT,
- api_key=api_key
- )
-
-
-@pytest.fixture
-def embeddings():
- api_key = os.getenv("OPENAI_API_KEY")
- if not api_key:
- pytest.skip("OPENAI_API_KEY environment variable not set")
- return OpenAIEmbeddings(api_key=api_key)
-
-
-class MockVectorStore(VectorStore):
- def add_texts(self, *args, **kwargs):
- pass
-
- def similarity_search(self, *args, **kwargs):
- pass
-
- def as_retriever(self, **kwargs):
- return Mock()
-
-
-@pytest.fixture
-def sample_documents():
- return [
- Document(page_content="Test document 1", metadata={"source": "test1"}),
- Document(page_content="Test document 2", metadata={"source": "test2"})
- ]
-
-
-@pytest.fixture
-def vector_store_path():
- temp_dir = tempfile.mkdtemp()
- yield temp_dir
- shutil.rmtree(temp_dir)
-
-
-@pytest.fixture
-def vector_store(embeddings, vector_store_path):
- return Chroma(
- embedding_function=embeddings,
- persist_directory=vector_store_path
- )
-
-
-@pytest.fixture
-def base_config(sample_documents, chat_llm, embeddings, vector_store):
- return RAGPipelineModel(
- documents=sample_documents,
- vector_store=vector_store,
- embedding_model=embeddings,
- llm=chat_llm
- )
-
-
-class TestRAGSearchKwargs:
- @pytest.fixture(autouse=True)
- def setup(self, base_config, sample_documents, chat_llm, embeddings, vector_store):
- """Setup test configuration with fixtures"""
- self.base_config = base_config
- self.sample_documents = sample_documents
- self.chat_llm = chat_llm
- self.embeddings = embeddings
- self.vector_store = vector_store
- self.base_dict = {
- 'documents': self.sample_documents,
- 'vector_store': self.vector_store,
- 'embedding_model': self.embeddings,
- 'llm': self.chat_llm
- }
-
- @requires_openai
- def test_vector_store_retriever_search_kwargs(self):
- config = RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.SIMILARITY_SCORE_THRESHOLD,
- search_kwargs=SearchKwargs(
- k=3,
- score_threshold=0.5
- ),
- retriever_type=RetrieverType.VECTOR_STORE
- )
- mock_retriever = Mock()
- mock_retriever.search_kwargs = {"k": 3, "score_threshold": 0.5}
- with patch('mindsdb.integrations.utilities.rag.vector_store.VectorStoreOperator') as mock_vs_op:
- mock_vs_op.return_value.vector_store.as_retriever.return_value = mock_retriever
- _ = LangChainRAGPipeline.from_retriever(config)
- assert mock_retriever.search_kwargs == {"k": 3, "score_threshold": 0.5}
-
- def test_auto_retriever_search_kwargs(self):
- config = RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.MMR,
- search_kwargs=SearchKwargs(
- k=2,
- fetch_k=4,
- lambda_mult=0.7
- ),
- retriever_type=RetrieverType.AUTO
- )
- mock_retriever = Mock()
- mock_retriever.search_kwargs = {"k": 2, "fetch_k": 4, "lambda_mult": 0.7}
- mock_llm_response = Mock()
- mock_llm_response.content = '[{"name": "source", "description": "Source field", "type": "string"}]'
- with patch('mindsdb.integrations.utilities.rag.retrievers.auto_retriever.AutoRetriever') as MockAutoRetriever, \
- patch('langchain_openai.chat_models.ChatOpenAI.invoke', return_value=mock_llm_response):
- mock_auto = Mock()
- mock_auto.as_runnable.return_value = mock_retriever
- MockAutoRetriever.return_value = mock_auto
- _ = LangChainRAGPipeline.from_auto_retriever(config)
- assert mock_retriever.search_kwargs == {"k": 2, "fetch_k": 4, "lambda_mult": 0.7}
-
- def test_search_kwargs_validation(self):
- """Test the validation rules for SearchKwargs"""
- # Test fetch_k validation for MMR search type
- with pytest.raises(ValueError, match="fetch_k must be greater than k"):
- RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.MMR,
- search_kwargs=SearchKwargs(
- k=5,
- fetch_k=3,
- lambda_mult=0.7
- )
- )
-
- # Test MMR parameter requirements
- with pytest.raises(ValueError, match="lambda_mult is required when using fetch_k"):
- RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.MMR,
- search_kwargs=SearchKwargs(
- k=3,
- fetch_k=5
- )
- )
-
- with pytest.raises(ValueError, match="fetch_k is required when using lambda_mult"):
- RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.MMR,
- search_kwargs=SearchKwargs(
- k=3,
- lambda_mult=0.7
- )
- )
-
- # Test score_threshold requirement for SIMILARITY_SCORE_THRESHOLD
- with pytest.raises(ValueError, match="score_threshold is required"):
- RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.SIMILARITY_SCORE_THRESHOLD,
- search_kwargs=SearchKwargs(
- k=3
- )
- )
-
- def test_search_type_compatibility(self):
- """Test that search kwargs match the search type"""
- # Test MMR search configuration
- config = RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.MMR,
- search_kwargs=SearchKwargs(
- k=3,
- fetch_k=6,
- lambda_mult=0.7
- )
- )
- assert config.search_kwargs.fetch_k == 6
- assert config.search_kwargs.lambda_mult == 0.7
-
- # Test similarity_score_threshold configuration
- config = RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.SIMILARITY_SCORE_THRESHOLD,
- search_kwargs=SearchKwargs(
- k=3,
- score_threshold=0.5
- )
- )
- assert config.search_kwargs.score_threshold == 0.5
-
- # Test basic similarity configuration
- config = RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.SIMILARITY,
- search_kwargs=SearchKwargs(
- k=3,
- filter={"source": "test1"}
- )
- )
- assert config.search_kwargs.filter == {"source": "test1"}
-
- def test_multi_vector_retriever_search_kwargs(self):
- """Test search kwargs for multi vector retriever"""
- config = RAGPipelineModel(
- **self.base_dict,
- search_type=SearchType.SIMILARITY,
- search_kwargs=SearchKwargs(
- k=5,
- filter={"source": "test1"}
- ),
- retriever_type=RetrieverType.MULTI,
- multi_retriever_mode=MultiVectorRetrieverMode.BOTH
- )
-
- mock_retriever = Mock()
- mock_retriever.search_kwargs = {"k": 5, "filter": {"source": "test1"}}
-
- with patch('mindsdb.integrations.utilities.rag.pipelines.rag.MultiVectorRetriever') as MockMultiRetrieverClass:
- class MockMultiRetriever:
- def __init__(self, config):
- self.text_splitter = RecursiveCharacterTextSplitter(
- chunk_size=config.chunk_size,
- chunk_overlap=config.chunk_overlap
- )
- self.documents = config.documents
- self.config = config
-
- def as_runnable(self):
- return mock_retriever
-
- def _split_documents(self):
- return [], []
-
- def _generate_id_and_split_document(self, doc):
- return str(uuid.uuid4()), [doc]
-
- MockMultiRetrieverClass.side_effect = MockMultiRetriever
-
- _ = LangChainRAGPipeline.from_multi_vector_retriever(config)
- assert mock_retriever.search_kwargs == {"k": 5, "filter": {"source": "test1"}}
diff --git a/tests/unused/integrations/utilities/rag/rerankers/test_openai_reranker.py b/tests/unused/integrations/utilities/rag/rerankers/test_openai_reranker.py
deleted file mode 100644
index 7b0c5cafa91..00000000000
--- a/tests/unused/integrations/utilities/rag/rerankers/test_openai_reranker.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from langchain.schema import Document
-import pytest
-
-from mindsdb.integrations.utilities.rag.rerankers.reranker_compressor import LLMReranker
-from mindsdb.integrations.utilities.rag.settings import RerankerConfig
-
-
-@pytest.mark.asyncio
-async def test_openai_reranker():
- openai_reranker = LLMReranker()
- results = await openai_reranker.compress_documents(
- documents=[Document(page_content="Jack declared that he likes cats more than dogs"),
- Document(page_content="Jack declared that he likes AI")],
- query="Jack's opinion on animals",
- )
- assert len(results) == 1
- assert "cats" in results[0].page_content
-
-
-@pytest.mark.asyncio
-async def test_openai_reranker_diff_threshold():
- openai_reranker = LLMReranker(filtering_threshold=0.6)
- assert openai_reranker.filtering_threshold == 0.6
- results = await openai_reranker.compress_documents(
- documents=[Document(page_content="Jack declared that he likes cats more than dogs"),
- Document(page_content="Jack declared that he likes AI")],
- query="Jack's opinion on animals",
- )
- assert len(results) == 1
- assert "cats" in results[0].page_content
- assert openai_reranker.filtering_threshold == 0.6
-
-
-@pytest.mark.asyncio
-async def test_openai_reranker_config():
- config = RerankerConfig(filtering_threshold=0.6, model="gpt-3.5-turbo", base_url="https://api.openai.com/v1")
- openai_reranker = LLMReranker(filtering_threshold=config.filtering_threshold, model=config.model,
- base_url=config.base_url)
- assert openai_reranker.filtering_threshold == 0.6
- results = await openai_reranker.compress_documents(
- documents=[Document(page_content="Jack declared that he likes cats more than dogs"),
- Document(page_content="Jack declared that he likes AI")],
- query="Jack's opinion on animals",
- )
- assert len(results) == 1
- assert "cats" in results[0].page_content
- assert openai_reranker.filtering_threshold == 0.6
diff --git a/tests/unused/integrations/utilities/rag/retrievers/test_multi_hop_retriever.py b/tests/unused/integrations/utilities/rag/retrievers/test_multi_hop_retriever.py
deleted file mode 100644
index 9cbb199f966..00000000000
--- a/tests/unused/integrations/utilities/rag/retrievers/test_multi_hop_retriever.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from typing import List, Any, Optional
-
-import pytest
-from langchain_core.documents import Document
-from langchain_core.retrievers import BaseRetriever
-from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import BaseMessage
-
-from mindsdb.integrations.utilities.rag.retrievers import MultiHopRetriever
-
-
-# Simple template for testing
-TEST_TEMPLATE = """Question: {question}
-Context: {context}
-Generate follow-up questions:"""
-
-
-class MockRetriever(BaseRetriever):
- """Simple mock retriever that returns predefined documents"""
- def _get_relevant_documents(self, query: str, **kwargs) -> List[Document]:
- if "Wright brothers" in query:
- return [Document(page_content="The Wright brothers invented the airplane.")]
- if "World War 1" in query:
- return [Document(page_content="Airplanes were used extensively in WWI.")]
- return []
-
-
-class MockLLM(BaseChatModel):
- """Simple mock LLM that returns predefined responses"""
- @property
- def _llm_type(self) -> str:
- return "mock"
-
- def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager: Optional[Any] = None, **kwargs) -> Any:
- raise NotImplementedError("Not needed for tests")
-
- def invoke(self, input_str: str, **kwargs) -> str:
- if "Wright brothers" in str(input_str):
- return '["How were airplanes used in World War 1?"]'
- return "[]"
-
-
-class InvalidOutputLLM(BaseChatModel):
- """Mock LLM that always returns invalid JSON"""
- @property
- def _llm_type(self) -> str:
- return "mock"
-
- def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager: Optional[Any] = None, **kwargs) -> Any:
- raise NotImplementedError("Not needed for tests")
-
- def invoke(self, input_str: str, **kwargs) -> str:
- return "invalid json"
-
-
-@pytest.fixture
-def mock_retriever():
- return MockRetriever()
-
-
-@pytest.fixture
-def mock_llm():
- return MockLLM()
-
-
-def test_multi_hop_retriever_basic_functionality(mock_retriever, mock_llm):
- """Test the basic functionality of MultiHopRetriever"""
- retriever = MultiHopRetriever(
- base_retriever=mock_retriever,
- llm=mock_llm,
- max_hops=2,
- reformulation_template=TEST_TEMPLATE
- )
-
- # Test with a query that should trigger follow-up
- docs = retriever._get_relevant_documents("Tell me about the Wright brothers")
-
- # Should have documents from both queries
- assert len(docs) == 2
- assert any("Wright brothers" in doc.page_content for doc in docs)
- assert any("WWI" in doc.page_content for doc in docs)
-
-
-def test_multi_hop_retriever_no_results(mock_retriever, mock_llm):
- """Test behavior when no documents are found"""
- retriever = MultiHopRetriever(
- base_retriever=mock_retriever,
- llm=mock_llm,
- max_hops=2,
- reformulation_template=TEST_TEMPLATE
- )
-
- # Test with a query that won't find any documents
- docs = retriever._get_relevant_documents("Something unrelated")
-
- # Should have no documents
- assert len(docs) == 0
-
-
-def test_multi_hop_retriever_invalid_llm_output(mock_retriever):
- """Test handling of invalid LLM output"""
- retriever = MultiHopRetriever(
- base_retriever=mock_retriever,
- llm=InvalidOutputLLM(),
- max_hops=2,
- reformulation_template=TEST_TEMPLATE
- )
-
- # Should still work and return initial results
- docs = retriever._get_relevant_documents("Tell me about the Wright brothers")
- assert len(docs) == 1
- assert "Wright brothers" in docs[0].page_content
-
-
-def test_multi_hop_retriever_max_hops(mock_retriever, mock_llm):
- """Test that max_hops is respected"""
- retriever = MultiHopRetriever(
- base_retriever=mock_retriever,
- llm=mock_llm,
- max_hops=1, # Only allow 1 hop
- reformulation_template=TEST_TEMPLATE
- )
-
- # Should only get initial documents
- docs = retriever._get_relevant_documents("Tell me about the Wright brothers")
- assert len(docs) == 1
- assert "Wright brothers" in docs[0].page_content
diff --git a/tests/unused/integrations/utilities/rag/test_file_loader.py b/tests/unused/integrations/utilities/rag/test_file_loader.py
deleted file mode 100644
index 092ca0936f2..00000000000
--- a/tests/unused/integrations/utilities/rag/test_file_loader.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from mindsdb.integrations.utilities.rag.loaders.file_loader import FileLoader
-
-
-def test_load_pdf():
- loader = FileLoader('./tests/data/test.pdf')
- docs = loader.load()
- # Each page is a doc.
- assert len(docs) == 3
- assert 'THE CASE FOR MACHINE LEARNING' in docs[0].page_content
- assert 'INTRODUCTION' in docs[1].page_content
- assert 'THE CASE FOR \nDEMOCRATIZING \nMACHINE LEARNING' in docs[2].page_content
-
-
-def test_load_csv():
- loader = FileLoader('./tests/data/movies.csv')
- docs = loader.load()
- # Each row is a doc.
- assert len(docs) == 10
- assert 'Toy Story' in docs[0].page_content
- assert 'GoldenEye' in docs[9].page_content
-
-
-def test_load_html():
- loader = FileLoader('./tests/data/test.html')
- docs = loader.load()
- assert len(docs) == 1
- assert 'Some intro text about Foo' in docs[0].page_content
-
-
-def test_load_md():
- loader = FileLoader('./mindsdb/integrations/handlers/langchain_handler/README.md')
- docs = loader.load()
- assert len(docs) == 1
- assert 'This documentation describes the integration of MindsDB with LangChain' in docs[0].page_content
-
-
-def test_load_text():
- loader = FileLoader('./tests/data/test.txt')
- docs = loader.load()
- assert len(docs) == 1
- assert 'This is a test plaintext file' in docs[0].page_content
diff --git a/tests/unused/integrations/utilities/rag/test_file_splitter.py b/tests/unused/integrations/utilities/rag/test_file_splitter.py
deleted file mode 100644
index ce757284b4b..00000000000
--- a/tests/unused/integrations/utilities/rag/test_file_splitter.py
+++ /dev/null
@@ -1,164 +0,0 @@
-from unittest.mock import patch
-
-from langchain_core.documents import Document
-from langchain_text_splitters import MarkdownHeaderTextSplitter, HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter
-from mindsdb.integrations.utilities.rag.splitters.file_splitter import FileSplitter, FileSplitterConfig
-
-
-def test_split_documents_pdf():
- pdf_doc = Document(
- page_content='This is a test PDF file. Let us try to do some splitting!',
- metadata={'extension': '.pdf'}
- )
- recursive_splitter = RecursiveCharacterTextSplitter()
- file_splitter = FileSplitter(FileSplitterConfig(
- recursive_splitter=recursive_splitter
- ))
- split_pdf_docs = file_splitter.split_documents([pdf_doc])
- assert len(split_pdf_docs) > 0
-
-
-def test_split_documents_md():
- md_content = '''
- # Unit Testing for Dummies
- This MD document covers how to write basic unit tests.
- ## Introduction
- Unit testing helps ensure code works as expected and prevents regressions. Time to dive in!
- ## How to Write Tests
- To be continued!
-'''
- md_doc = Document(
- page_content=md_content,
- metadata={'extension': '.md'}
- )
- headers_to_split_on = [
- ('#', 'Header 1'),
- ('##', 'Header 2'),
- ]
- md_text_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
- file_splitter = FileSplitter(FileSplitterConfig(
- markdown_splitter=md_text_splitter
- ))
- split_md_docs = file_splitter.split_documents([md_doc])
- assert len(split_md_docs) == 3
- # Check we actually split on headers.
- assert 'This MD document covers how to write basic unit tests.' in split_md_docs[0].page_content
- assert 'Unit testing helps ensure code works as expected and prevents regressions. Time to dive in!' in split_md_docs[1].page_content
- assert 'To be continued!' in split_md_docs[2].page_content
-
-
-def test_split_documents_html():
- html_content = '''
-
-
-
-
-
Foo
-
Some intro text about Foo.
-
-
Bar main section
-
Some intro text about Bar.
-
Bar subsection 1
-
Some text about the first subtopic of Bar.
-
Bar subsection 2
-
Some text about the second subtopic of Bar.
-
-
-
Baz
-
Some text about Baz
-
-
-
Some concluding text about Foo
-
-
-
-'''
- headers_to_split_on = [
- ('h1', 'Header 1'),
- ('h2', 'Header 2'),
- ('h3', 'Header 3')
- ]
- html_text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
- file_splitter = FileSplitter(FileSplitterConfig(
- html_splitter=html_text_splitter
- ))
- html_doc = Document(
- page_content=html_content,
- metadata={'extension': '.html'}
- )
- split_html_docs = file_splitter.split_documents([html_doc])
- assert len(split_html_docs) == 8
- # # Check we actually split on headers.
- assert 'Foo' in split_html_docs[0].page_content
- assert 'Some intro text about Foo' in split_html_docs[1].page_content
- assert 'Some intro text about Bar' in split_html_docs[2].page_content
- assert 'Some text about the first subtopic of Bar' in split_html_docs[3].page_content
- assert 'Some text about the second subtopic of Bar' in split_html_docs[4].page_content
- assert 'Baz' in split_html_docs[5].page_content
- assert 'Some text about Baz' in split_html_docs[6].page_content
- assert 'Some concluding text about Foo' in split_html_docs[7].page_content
-
-
-def test_split_documents_default():
- recursive_splitter = RecursiveCharacterTextSplitter()
- file_splitter = FileSplitter(FileSplitterConfig(
- recursive_splitter=recursive_splitter
- ))
- txt_doc = Document(
- page_content='This is a text file!',
- metadata={'extension': '.txt'}
- )
- split_txt_docs = file_splitter.split_documents([txt_doc])
- assert len(split_txt_docs) == 1
- assert 'This is a text file!' in split_txt_docs[0].page_content
-
-
-@patch('mindsdb.integrations.utilities.rag.splitters.file_splitter.MarkdownHeaderTextSplitter')
-def test_split_documents_failover(mock_md_splitter):
- md_content = '''
- # Unit Testing for Dummies
- This MD document covers how to write basic unit tests.
- ## Introduction
- Unit testing helps ensure code works as expected and prevents regressions. Time to dive in!
- ## How to Write Tests
- To be continued!
-'''
- mock_md_splitter.split_text.side_effect = Exception('Something went wrong!')
- file_splitter = FileSplitter(FileSplitterConfig(
- markdown_splitter=mock_md_splitter
- ))
- md_doc = Document(
- page_content=md_content,
- metadata={'extension': '.md'}
- )
-
- # Should throw an exception and go to default.
- split_md_docs = file_splitter.split_documents([md_doc])
- assert len(split_md_docs) > 0
-
-
-@patch('mindsdb.integrations.utilities.rag.splitters.file_splitter.MarkdownHeaderTextSplitter')
-def test_split_documents_no_failover(mock_md_splitter):
- md_content = '''
- # Unit Testing for Dummies
- This MD document covers how to write basic unit tests.
- ## Introduction
- Unit testing helps ensure code works as expected and prevents regressions. Time to dive in!
- ## How to Write Tests
- To be continued!
-'''
- mock_md_splitter.split_text.side_effect = Exception('Something went wrong!')
- file_splitter = FileSplitter(FileSplitterConfig(
- markdown_splitter=mock_md_splitter
- ))
- md_doc = Document(
- page_content=md_content,
- metadata={'extension': '.md'}
- )
-
- # Should throw an exception.
- try:
- _ = file_splitter.split_documents([md_doc], default_failover=False)
- except Exception:
- return
- assert False
diff --git a/tests/unused/interfaces/agents/test_agents_controller.py b/tests/unused/interfaces/agents/test_agents_controller.py
deleted file mode 100644
index ba5c4b41028..00000000000
--- a/tests/unused/interfaces/agents/test_agents_controller.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from unittest.mock import patch
-
-import pandas as pd
-
-from mindsdb.interfaces.storage import db
-from mindsdb.interfaces.agents.agents_controller import AgentsController
-
-
-@patch('mindsdb.api.executor.datahub.datanodes.project_datanode.ProjectDataNode')
-@patch('mindsdb.api.executor.datahub.datahub.InformationSchemaDataNode')
-@patch('mindsdb.interfaces.model.model_controller.ModelController')
-def test_get_completion(mock_model_controller, mock_schema_datanode, mock_project_datanode):
- mock_project_datanode_instance = mock_project_datanode.return_value
- mock_datanode_instance = mock_schema_datanode.return_value
- mock_datanode_instance.get.return_value = mock_project_datanode_instance
- mock_model_controller_instance = mock_model_controller.return_value
- mock_model_controller_instance.get_model.return_value = {
- 'model_name': 'test_model',
- 'predict': 'answer',
- 'problem_definition': {
- 'using':
- {
- 'prompt_template': 'What is the meaning of life?'
- }
- }
- }
- agents_controller = AgentsController(
- mock_datanode_instance,
- model_controller=mock_model_controller_instance)
-
- completion_response = {'answer': '42'}
- df = pd.DataFrame.from_records([completion_response])
- mock_project_datanode_instance.predict.return_value = df
- agent = db.Agents()
- agent.model_name = 'test_model'
- agent.provider = 'mindsdb'
- agent.params = {}
- messages = [{'question': 'What is the meaning of life?', 'answer': None}]
- completion_df = agents_controller.get_completion(agent, messages)
-
- assert not completion_df.empty
- assert 'answer' in completion_df.columns
- assert completion_df['answer'].loc[0] == '42'
diff --git a/tests/unused/load/__init__.py b/tests/unused/load/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/unused/load/tasks.py b/tests/unused/load/tasks.py
deleted file mode 100644
index 9a4e364e9c2..00000000000
--- a/tests/unused/load/tasks.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from locust import SequentialTaskSet, task, events
-from tests.utils.query_generator import QueryGenerator as query
-from tests.utils.config import get_value_from_json_env_var, generate_random_db_name
-
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class BaseDBConnectionBehavior(SequentialTaskSet):
- def on_start(self):
- """This method is called once for each user when they start."""
- self.query_generator = query()
- self.random_db_name = generate_random_db_name(f"{self.db_type}_datasource")
- self.create_new_datasource()
-
- def __post_query(self, query):
- try:
- response = self.client.post('/api/sql/query', json={'query': query})
- response.raise_for_status()
- assert response.json()['type'] != 'error'
- return response
- except Exception as e:
- logger.error(f'Error running {query}: {e}')
- events.request.fire(request_type="POST", name="/api/sql/query", response_time=0, response_length=0, exception=e)
- self.interrupt(reschedule=True)
-
- def create_new_datasource(self):
- """This method creates a new data source."""
- db_config = get_value_from_json_env_var("INTEGRATIONS_CONFIG", self.db_type)
- query = self.query_generator.create_database_query(
- self.random_db_name,
- self.db_type,
- db_config
- )
- self.__post_query(query)
-
- @task
- def select_integration_query(self):
- """This task performs a SELECT query from integration."""
- query = f'SELECT * FROM {self.random_db_name}.{self.table_name} LIMIT 10'
- self.__post_query(query)
-
- @task
- def run_native_query(self):
- """This task runs a native DB select query."""
- for n_query in self.native_queries:
- query = f'SELECT * FROM {self.random_db_name}( {n_query})'
- self.__post_query(query)
diff --git a/tests/unused/load/test_postgresql.py b/tests/unused/load/test_postgresql.py
deleted file mode 100644
index d7e137ab5c8..00000000000
--- a/tests/unused/load/test_postgresql.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from tests.load.tasks import BaseDBConnectionBehavior
-
-
-class PostgreSQLConnectionBehavior(BaseDBConnectionBehavior):
- """
- This class defines the behavior of a PostgreSQL connection.
- @TODO: Read query values from sql_queries.json file
- """
- db_type = "postgres"
- table_name = "solar_flare_data"
- native_queries = ["native_query_average", "native_query_aggregation", "native_query_max", "native_query_grouping"]
- native_query_aggregation = f"SELECT COUNT(*) AS total_flares FROM tests.{table_name};"
- native_query_average = f"SELECT AVG(peak_c_per_s) AS avg_peak_counts FROM tests.{table_name};"
- native_query_max = f"SELECT MAX(energy_kev) AS max_energy FROM tests.{table_name};"
- native_query_grouping = f"SELECT active_region_ar, COUNT(*) AS flare_count FROM tests.{table_name} GROUP BY active_region_ar;"
diff --git a/tests/unused/load/tests_start.py b/tests/unused/load/tests_start.py
deleted file mode 100644
index 3eaed9bf111..00000000000
--- a/tests/unused/load/tests_start.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from locust import between, HttpUser
-from tests.load.test_postgresql import PostgreSQLConnectionBehavior
-from tests.utils.config import get_value_from_json_env_var
-
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class DBConnectionUser(HttpUser):
- tasks = [PostgreSQLConnectionBehavior]
- wait_time = between(5, 15)
- config = get_value_from_json_env_var("INTEGRATIONS_CONFIG", "mindsdb_cloud")
- host = config['host']
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- try:
- response = self.client.post('/cloud/login', json={
- 'email': self.config['user'],
- 'password': self.config['password']
- })
- response.raise_for_status()
- except Exception as e:
- logger.error(f'Logging to MindsDB failed: {e}')
diff --git a/tests/unused/unit/api/http/knowledge_bases_test.py b/tests/unused/unit/api/http/knowledge_bases_test.py
deleted file mode 100644
index 0e0f65b1fd7..00000000000
--- a/tests/unused/unit/api/http/knowledge_bases_test.py
+++ /dev/null
@@ -1,1049 +0,0 @@
-from http import HTTPStatus
-
-import pytest
-from unittest.mock import patch, MagicMock
-
-from mindsdb.api.mysql.mysql_proxy.mysql_proxy import SQLAnswer
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-
-def test_prepare(client):
- # Create langchain embedding model to use in all tests.
- create_ml_engine_query = 'CREATE ML_ENGINE langchain_embedding FROM langchain_embedding;'
- create_ml_engine_data = {
- 'query': create_ml_engine_query
- }
- response = client.post('/api/sql/query', json=create_ml_engine_data, follow_redirects=True)
- assert response.status_code == HTTPStatus.OK
-
- # Create model to use in all tests.
- create_query = '''
- CREATE MODEL mindsdb.test_embedding_model
- PREDICT embeddings
- USING
- engine='langchain_embedding',
- class = 'FakeEmbeddings',
- size = 512,
- input_columns = ['content'];
- '''
- train_data = {
- 'query': create_query
- }
- response = client.post('/api/projects/mindsdb/models', json=train_data, follow_redirects=True)
- assert response.status_code == HTTPStatus.CREATED
-
-
-def test_get_knowledge_base(client):
- get_knowledge_bases_response = client.get('/api/projects/mindsdb/knowledge_bases')
-
- assert len(get_knowledge_bases_response.get_json()) == 0
-
- create_request = {
- 'knowledge_base': {
- 'name': 'test_get_kb',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- get_knowledge_bases_response = client.get('/api/projects/mindsdb/knowledge_bases')
- assert get_knowledge_bases_response.status_code == HTTPStatus.OK
-
- all_knowledge_bases = get_knowledge_bases_response.get_json()
- assert len(all_knowledge_bases) == 1
-
- get_knowledge_base_response = client.get('/api/projects/mindsdb/knowledge_bases/test_get_kb')
- assert get_knowledge_base_response.status_code == HTTPStatus.OK
-
- created_knowledge_base = create_response.get_json()
- test_get_kb = get_knowledge_base_response.get_json()
- expected_create_knowledge_base = {
- 'name': 'test_get_kb',
- 'embedding_model': 'test_embedding_model',
- 'vector_database': 'test_get_kb_chromadb',
- 'vector_database_table': 'default_collection',
- 'id': created_knowledge_base['id'],
- 'project_id': created_knowledge_base['project_id'],
- 'params': created_knowledge_base['params'],
- 'created_at': created_knowledge_base['created_at'],
- 'updated_at': created_knowledge_base['updated_at']
- }
- assert created_knowledge_base == expected_create_knowledge_base
- assert test_get_kb == expected_create_knowledge_base
-
- # Returned fields are slightly different for GET all vs POST.
- fetched_knowledge_base = all_knowledge_bases[0]
- expected_get_knowledge_base = {
- 'name': 'test_get_kb',
- 'embedding_model': 'test_embedding_model',
- 'vector_database': 'test_get_kb_chromadb',
- 'vector_database_table': 'default_collection',
- 'id': created_knowledge_base['id'],
- 'project_name': 'mindsdb',
- 'project_id': created_knowledge_base['project_id'],
- 'params': created_knowledge_base['params']
- }
- assert fetched_knowledge_base == expected_get_knowledge_base
-
-
-def test_create_knowledge_base_no_storage(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- created_knowledge_base = create_response.get_json()
- expected_knowledge_base = {
- 'name': 'test_kb',
- 'embedding_model': 'test_embedding_model',
- 'vector_database': 'test_kb_chromadb',
- 'vector_database_table': 'default_collection',
- 'id': created_knowledge_base['id'],
- 'project_id': created_knowledge_base['project_id'],
- 'params': created_knowledge_base['params'],
- 'created_at': created_knowledge_base['created_at'],
- 'updated_at': created_knowledge_base['updated_at']
- }
- assert created_knowledge_base == expected_knowledge_base
-
-
-def test_create_knowledge_base_no_knowledge_base_param(client):
- create_request = {
- 'name': 'test_kb',
- 'model': 'test_embedding_model'
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_create_knowledge_base_no_name(client):
- create_request = {
- 'knowledge_base': {
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_create_knowledge_base_no_model(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_create_knowledge_base_no_storage_database(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb',
- 'model': 'test_embedding_model',
- 'storage': {
- 'table': 'vector_db_table'
- }
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_create_knowledge_base_no_storage_table(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb',
- 'model': 'test_embedding_model',
- 'storage': {
- 'database': 'vector_db'
- }
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_create_knowledge_base_project_not_found(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/buoluobao/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.NOT_FOUND
-
-
-def test_create_knowledge_base_already_exists(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_already_exists',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CONFLICT
-
-
-def test_delete_knowledge_base(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_delete_kb',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- delete_response = client.delete('/api/projects/mindsdb/knowledge_bases/test_delete_kb', follow_redirects=True)
- assert delete_response.status_code == HTTPStatus.NO_CONTENT
-
- get_response = client.get('/api/projects/mindsdb/knowledge_bases/test_delete_kb', follow_redirects=True)
- assert get_response.status_code == HTTPStatus.NOT_FOUND
-
-
-def test_delete_knowledge_base_project_not_found(client):
- delete_response = client.delete('/api/projects/chasiubao/knowledge_bases/test_kb', follow_redirects=True)
- assert delete_response.status_code == HTTPStatus.NOT_FOUND
-
-
-def test_delete_knowledge_base_not_found(client):
- delete_response = client.delete('/api/projects/mindsdb/knowledge_bases/xiaolongbao_kb', follow_redirects=True)
- assert delete_response.status_code == HTTPStatus.NOT_FOUND
-
-
-def test_put_knowledge_base_rows(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_update_rows',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- content_to_embed = '''To begin with a perfect Peking duck recipe at home, firstly choose head on (easy to hang for air drying out), clean and leaner ducks.
- Add around 1 teaspoon of white vinegar in clean water and soak the duck for 1 hour. Then prepare lines and tie the ducks from the top of the neck.
- Hang them on hooks. I hang the ducks on the top of kitchen pool.
- Please note: I make this peking duck in March when the room temperature is around 13-15 degree C, you will need to hang the duck in fridge or in a room with air conditioner in hot summer days.
- '''
-
- test_id = 0
- rows_to_insert = [
- {'id': test_id, 'content': content_to_embed}
- ]
- update_request = {
- 'knowledge_base': {
- 'rows': rows_to_insert
- }
- }
-
- with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table:
- # Create a mock KB instance
- mock_table_instance = MagicMock()
- mock_kb_table.return_value = mock_table_instance
-
- # Setup the _kb attribute with required params
- mock_kb = MagicMock()
- mock_kb.params = {'preprocessing': None} # Initial state
- mock_table_instance._kb = mock_kb
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_update_rows',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
- # Verify insert_rows was called with correct data
- mock_table_instance.insert_rows.assert_called_once()
- actual_rows = mock_table_instance.insert_rows.call_args[0][0]
- assert len(actual_rows) == 1
- assert actual_rows[0]['id'] == test_id
- assert actual_rows[0]['content'] == content_to_embed
-
-
-def test_put_knowledge_base_query(client):
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_update_query',
- 'model': 'test_embedding_model'
- }
- }
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- content_to_embed = '''To begin with a perfect Peking duck recipe at home, firstly choose head on (easy to hang for air drying out), clean and leaner ducks.
- Add around 1 teaspoon of white vinegar in clean water and soak the duck for 1 hour. Then prepare lines and tie the ducks from the top of the neck.
- Hang them on hooks. I hang the ducks on the top of kitchen pool.
- Please note: I make this peking duck in March when the room temperature is around 13-15 degree C, you will need to hang the duck in fridge or in a room with air conditioner in hot summer days.
- '''
- update_request = {
- 'knowledge_base': {
- 'query': 'SELECT * FROM mock_db.recipes'
- }
- }
-
- # Mock the FakeMysqlProxy
- mock_proxy = MagicMock()
- mock_proxy.process_query.return_value = SQLAnswer(
- resp_type=RESPONSE_TYPE.TABLE,
- columns=[{'alias': 'id'}, {'name': 'content'}],
- data=[(0, content_to_embed)]
- )
-
- with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table:
- mock_table_instance = MagicMock()
- mock_kb_table.return_value = mock_table_instance
-
- # Setup the mock table
- mock_kb = MagicMock()
- mock_kb.params = {'preprocessing': None}
- mock_table_instance._kb = mock_kb
- mock_table_instance.mysql_proxy = mock_proxy
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_update_query',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
- # Verify insert was called with correct data
- mock_table_instance.insert_query_result.assert_called_once_with(
- 'SELECT * FROM mock_db.recipes',
- 'mindsdb'
- )
-
-
-@pytest.fixture
-def create_test_kb(client):
- kb_name = 'test_completions_kb'
-
- # First, try to delete the existing knowledge base if it exists
- client.delete(f'/api/projects/mindsdb/knowledge_bases/{kb_name}', follow_redirects=True)
- create_kb_request = {
- 'knowledge_base': {
- 'name': kb_name,
- 'model': 'test_embedding_model'
- }
- }
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_kb_request,
- follow_redirects=True)
-
- # Check if creation was successful
- assert create_response.status_code in [
- HTTPStatus.CREATED,
- HTTPStatus.OK
- ], f"Failed to create knowledge base. Status: {create_response.status}"
-
- return kb_name
-
-
-def test_successful_completion(client, create_test_kb):
- kb_name = create_test_kb
- completion_request = {
- 'query': 'What is the capital of France?',
- }
- response = client.post(f'/api/projects/mindsdb/knowledge_bases/{kb_name}/completions',
- json=completion_request, follow_redirects=True)
- assert response.status_code == HTTPStatus.OK
- response_data = response.get_json()
- assert 'message' in response_data
- assert 'content' in response_data['message']
- assert 'context' in response_data['message']
- assert response_data['message']['role'] == 'assistant'
-
-
-def test_completion_missing_query_parameter(client, create_test_kb):
- kb_name = create_test_kb
- invalid_request = {
- 'knowledge_base': kb_name
- }
- response = client.post(f'/api/projects/mindsdb/knowledge_bases/{kb_name}/completions',
- json=invalid_request, follow_redirects=True)
- assert response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_completion_non_existent_project(client, create_test_kb):
- kb_name = create_test_kb
- completion_request = {
- 'query': 'What is the capital of France?',
- }
- response = client.post(f'/api/projects/nonexistent/knowledge_bases/{kb_name}/completions',
- json=completion_request, follow_redirects=True)
- assert response.status_code == HTTPStatus.NOT_FOUND
-
-
-def test_completion_non_existent_knowledge_base(client):
- completion_request = {
- 'query': 'What is the capital of France?',
- }
- response = client.post('/api/projects/mindsdb/knowledge_bases/nonexistent_kb/completions',
- json=completion_request, follow_redirects=True)
- assert response.status_code == HTTPStatus.NOT_FOUND
-
-
-def test_create_knowledge_base_with_preprocessing(client):
- """Test creating a knowledge base with preprocessing configuration"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_preprocess',
- 'model': 'test_embedding_model',
- 'preprocessing': {
- 'type': 'contextual',
- 'contextual_config': {
- 'chunk_size': 1000,
- 'chunk_overlap': 200,
- 'llm_model': 'gpt-4'
- }
- }
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- created_knowledge_base = create_response.get_json()
- assert 'preprocessing' in created_knowledge_base['params']
- assert created_knowledge_base['params']['preprocessing']['type'] == 'contextual'
-
- # Verify preprocessing config is preserved in GET
- get_response = client.get('/api/projects/mindsdb/knowledge_bases/test_kb_preprocess')
- assert get_response.status_code == HTTPStatus.OK
- kb_data = get_response.get_json()
- assert 'preprocessing' in kb_data['params']
- assert kb_data['params']['preprocessing'] == create_request['knowledge_base']['preprocessing']
-
-
-def test_create_knowledge_base_invalid_preprocessing(client):
- """Test creating a knowledge base with invalid preprocessing configuration"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_invalid_preprocess',
- 'model': 'test_embedding_model',
- 'preprocessing': {
- 'type': 'invalid_type',
- 'contextual_config': {
- 'chunk_size': 'invalid'
- }
- }
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_put_knowledge_base_with_preprocessing(client):
- """Test updating knowledge base with preprocessing"""
- # First create a KB
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_update_preprocess',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- # Update with content and preprocessing
- content_to_embed = [
- "First document to be processed and chunked.",
- "Second document with different content for testing.",
- ]
- rows_to_insert = [
- {'content': content_to_embed[0]},
- {'content': content_to_embed[1]}
- ]
-
- update_request = {
- 'knowledge_base': {
- 'rows': rows_to_insert,
- 'preprocessing': {
- 'type': 'contextual',
- 'contextual_config': {
- 'chunk_size': 500,
- 'chunk_overlap': 100,
- 'llm_model': 'gpt-4'
- }
- }
- }
- }
-
- with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table:
- # Create a mock KB instance
- mock_table_instance = MagicMock()
- mock_kb_table.return_value = mock_table_instance
-
- # Setup the _kb attribute with required params
- mock_kb = MagicMock()
- mock_kb.params = {'preprocessing': None} # Initial state
- mock_table_instance._kb = mock_kb
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_update_preprocess',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
- # Verify preprocessing was configured
- mock_table_instance.configure_preprocessing.assert_called_with({
- 'type': 'contextual',
- 'contextual_config': {
- 'chunk_size': 500,
- 'chunk_overlap': 100,
- 'llm_model': 'gpt-4'
- }
- })
-
- # Verify rows were inserted
- mock_table_instance.insert_rows.assert_called_once()
- inserted_rows = mock_table_instance.insert_rows.call_args[0][0]
- assert len(inserted_rows) == 2
- assert inserted_rows[0]['content'] == content_to_embed[0]
- assert inserted_rows[1]['content'] == content_to_embed[1]
-
-
-def test_put_knowledge_base_invalid_preprocessing(client):
- """Test updating knowledge base with invalid preprocessing config"""
- # First create a KB
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_invalid_update_preprocess',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- # Update with invalid preprocessing config
- update_request = {
- 'knowledge_base': {
- 'rows': [{'content': 'test content'}],
- 'preprocessing': {
- 'type': 'invalid_type',
- 'invalid_config': {
- 'invalid_param': 'invalid_value'
- }
- }
- }
- }
-
- with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table:
- # Create a mock KB instance
- mock_table_instance = MagicMock()
- mock_kb_table.return_value = mock_table_instance
-
- # Setup the _kb attribute with required params
- mock_kb = MagicMock()
- mock_kb.params = {'preprocessing': None}
- mock_table_instance._kb = mock_kb
-
- # Configure the mock to raise an error when invalid preprocessing config is provided
- mock_table_instance.configure_preprocessing.side_effect = ValueError("Invalid preprocessing type")
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_invalid_update_preprocess',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_put_knowledge_base_with_documents(client):
- """Test updating knowledge base with Document objects"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_documents',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- # Test data
- test_documents = [
- {
- 'content': 'First test document content',
- 'metadata': {'source': 'test1', 'category': 'A'}
- },
- {
- 'content': 'Second test document content',
- 'metadata': {'source': 'test2', 'category': 'B'}
- }
- ]
-
- update_request = {
- 'knowledge_base': {
- 'rows': test_documents
- }
- }
-
- with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table:
- mock_table_instance = MagicMock()
- mock_kb_table.return_value = mock_table_instance
-
- # Mock the dependencies
- mock_kb = MagicMock()
- mock_kb.params = {'preprocessing': None}
- mock_table_instance._kb = mock_kb
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_documents',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
- # Verify insert_rows was called with correct Document objects
- mock_table_instance.insert_rows.assert_called_once()
- inserted_rows = mock_table_instance.insert_rows.call_args[0][0]
- assert len(inserted_rows) == 2
- assert all(isinstance(row, dict) for row in inserted_rows)
- assert inserted_rows[0]['content'] == test_documents[0]['content']
- assert inserted_rows[0]['metadata'] == test_documents[0]['metadata']
-
-
-def test_put_knowledge_base_mixed_content(client):
- """Test updating knowledge base with multiple content types"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_mixed',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- update_request = {
- 'knowledge_base': {
- 'rows': [{'content': 'Test content', 'metadata': {'source': 'manual'}}],
- 'files': ['test_file.txt'],
- 'urls': ['http: //example.com'],
- 'query': 'SELECT * FROM test_table',
- 'preprocessing': {
- 'type': 'contextual',
- 'contextual_config': {
- 'chunk_size': 500,
- 'chunk_overlap': 50
- }
- }
- }
- }
-
- with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table:
- mock_table_instance = MagicMock()
- mock_kb_table.return_value = mock_table_instance
-
- # Mock the dependencies
- mock_kb = MagicMock()
- mock_kb.params = {'preprocessing': None}
- mock_table_instance._kb = mock_kb
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_mixed',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
-
-def test_create_knowledge_base_with_text_chunking(client):
- """Test creating a knowledge base with text chunking preprocessing configuration"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_text_chunking',
- 'model': 'test_embedding_model',
- 'preprocessing': {
- 'type': 'text_chunking',
- 'text_chunking_config': {
- 'chunk_size': 500,
- 'chunk_overlap': 50,
- 'separators': ["\n\n", "\n", ".", " "]
- }
- }
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- created_knowledge_base = create_response.get_json()
- assert 'preprocessing' in created_knowledge_base['params']
- assert created_knowledge_base['params']['preprocessing']['type'] == 'text_chunking'
-
- # Verify preprocessing config is preserved in GET
- get_response = client.get('/api/projects/mindsdb/knowledge_bases/test_kb_text_chunking')
- assert get_response.status_code == HTTPStatus.OK
- kb_data = get_response.get_json()
- assert 'preprocessing' in kb_data['params']
- assert kb_data['params']['preprocessing'] == create_request['knowledge_base']['preprocessing']
-
-
-def test_create_knowledge_base_invalid_text_chunking(client):
- """Test creating a knowledge base with invalid text chunking configuration"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_invalid_chunking',
- 'model': 'test_embedding_model',
- 'preprocessing': {
- 'type': 'text_chunking',
- 'text_chunking_config': {
- 'chunk_size': -100, # Invalid negative size
- 'chunk_overlap': "invalid", # Invalid type
- }
- }
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_put_knowledge_base_default_preprocessing(client):
- """Test that text chunking is used as default preprocessing when none specified"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_default_preprocess',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- update_request = {
- 'knowledge_base': {
- 'rows': [{
- 'content': 'Test content for default preprocessing',
- 'source': 'test' # Flat metadata instead of nested
- }]
- }
- }
-
- # Send update request directly
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_default_preprocess',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
-
-def test_put_knowledge_base_missing_metadata(client):
- """Test proper error handling when metadata is missing"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_metadata',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- # Request with missing source
- update_request = {
- 'knowledge_base': {
- 'rows': [{
- 'content': 'Test content without metadata'
- }]
- }
- }
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_metadata',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK # Should succeed with default metadata
-
-
-def test_document_processing_with_valid_metadata(client):
- """Test document processing with valid metadata configuration"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_valid_metadata',
- 'model': 'test_embedding_model',
- 'preprocessing': {
- 'type': 'text_chunking',
- 'text_chunking_config': {
- 'chunk_size': 100,
- 'chunk_overlap': 20
- }
- }
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- # Various valid metadata configurations as flat key-value pairs
- test_rows = [
- {
- 'content': 'First test content',
- 'source': 'test1',
- 'category': 'A'
- },
- {
- 'content': 'Second test content',
- 'source': 'test2',
- 'doc_type': 'document'
- },
- {
- 'content': 'Third test content',
- 'source': 'test3',
- 'tag': 'tag1'
- }
- ]
-
- update_request = {
- 'knowledge_base': {
- 'rows': test_rows
- }
- }
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_valid_metadata',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
-
-def test_document_processing_with_default_metadata(client):
- """Test document processing where system adds default metadata"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_default_metadata',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- # Only provide content, system should add default metadata
- update_request = {
- 'knowledge_base': {
- 'rows': [{
- 'content': 'Test content with system metadata',
- 'source': 'api' # This will be moved to metadata
- }]
- }
- }
-
- # Need to patch both controller and preprocessor
- with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table:
- mock_instance = MagicMock()
- mock_kb_table.return_value = mock_instance
-
- # Mock KB params
- mock_kb = MagicMock()
- mock_kb.params = {}
- mock_instance._kb = mock_kb
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_default_metadata',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
- # Verify that insert_rows was called with the correct data
- mock_instance.insert_rows.assert_called_once()
- inserted_rows = mock_instance.insert_rows.call_args[0][0]
- assert len(inserted_rows) == 1
- assert inserted_rows[0]['content'] == 'Test content with system metadata'
- assert inserted_rows[0]['source'] == 'api'
-
-
-def test_document_loader_with_file_extensions(client):
- """Test document loader handling different file extensions"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_file_extensions',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- # Create test files
- with patch('mindsdb.interfaces.file.file_controller.FileController') as mock_file_controller:
- # Mock file existence checks
- mock_file_controller.return_value.get_file_path.return_value = MagicMock()
-
- update_request = {
- 'knowledge_base': {
- 'files': ['test.md', 'test.html', 'test.pdf']
- }
- }
-
- with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table:
- mock_table_instance = MagicMock()
- mock_kb_table.return_value = mock_table_instance
-
- # Mock KB params
- mock_kb = MagicMock()
- mock_kb.params = {}
- mock_table_instance._kb = mock_kb
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_file_extensions',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
- # Verify document loader was used
- mock_table_instance.insert_files.assert_called_once_with(['test.md', 'test.html', 'test.pdf'])
-
-
-def test_document_loader_sql_error_handling(client):
- """Test document loader handling SQL query errors"""
- create_request = {
- 'knowledge_base': {
- 'name': 'test_kb_sql_errors',
- 'model': 'test_embedding_model'
- }
- }
-
- create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
- assert create_response.status_code == HTTPStatus.CREATED
-
- # This will not use the document loader.
- update_request = {
- 'knowledge_base': {
- 'query': 'INVALID SQL QUERY'
- }
- }
-
- update_response = client.put(
- '/api/projects/mindsdb/knowledge_bases/test_kb_sql_errors',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.BAD_REQUEST
-
-
-def test_preprocessing_update(client):
- """Test updating preprocessing configuration"""
- initial_request = {
- 'knowledge_base': {
- 'name': 'test_kb_preprocess_update',
- 'model': 'test_embedding_model'
- }
- }
-
- # First try to delete if exists
- client.delete(
- f'/api/projects/mindsdb/knowledge_bases/{initial_request["knowledge_base"]["name"]}',
- follow_redirects=True
- )
-
- with patch('mindsdb.api.http.namespaces.knowledge_bases.SessionController') as mock_session_class:
- mock_session = MagicMock()
- mock_session_class.return_value = mock_session
-
- # Setup KB controller mock
- mock_kb_controller = MagicMock()
- mock_session.kb_controller = mock_kb_controller
-
- # Setup table mock
- mock_table = MagicMock()
- mock_kb_controller.get_table.return_value = mock_table
-
- # Setup KB object mock
- mock_kb = MagicMock()
- mock_kb.params = {}
- mock_kb.id = 1
- mock_kb.name = initial_request['knowledge_base']['name']
- mock_kb.as_dict.return_value = {
- 'id': 1,
- 'name': initial_request['knowledge_base']['name'],
- 'params': mock_kb.params,
- 'created_at': '2024-11-07T12: 13: 46',
- 'updated_at': '2024-11-07T12: 13: 46'
- }
-
- # Setup controller methods
- mock_kb_controller.get.side_effect = [None, mock_kb]
- mock_kb_controller.add.return_value = mock_kb
-
- create_response = client.post(
- '/api/projects/mindsdb/knowledge_bases',
- json=initial_request,
- follow_redirects=True
- )
-
- assert create_response.status_code == HTTPStatus.CREATED
-
- # Now update with preprocessing config
- update_request = {
- 'knowledge_base': {
- 'preprocessing': {
- 'type': 'text_chunking',
- 'text_chunking_config': {
- 'chunk_size': 300,
- 'chunk_overlap': 30
- }
- }
- }
- }
-
- # Reset get to always return the KB now that it exists
- mock_kb_controller.get.reset_mock()
- mock_kb_controller.get.return_value = mock_kb
-
- update_response = client.put(
- f'/api/projects/mindsdb/knowledge_bases/{initial_request["knowledge_base"]["name"]}',
- json=update_request,
- follow_redirects=True
- )
-
- assert update_response.status_code == HTTPStatus.OK
-
- # Verify preprocessing was updated
- mock_table.configure_preprocessing.assert_called_with(
- update_request['knowledge_base']['preprocessing']
- )
diff --git a/tests/unused/unit/broken/test_knowledge_base.py b/tests/unused/unit/broken/test_knowledge_base.py
deleted file mode 100644
index 02e224003c6..00000000000
--- a/tests/unused/unit/broken/test_knowledge_base.py
+++ /dev/null
@@ -1,452 +0,0 @@
-import tempfile
-import time
-from unittest.mock import patch
-
-import pandas as pd
-import pytest
-from mindsdb_sql_parser import parse_sql
-
-from mindsdb.interfaces.storage.db import KnowledgeBase
-
-from tests.unit.executor_test_base import BaseExecutorTest
-
-
-class TestKnowledgeBase(BaseExecutorTest):
- def run_sql(self, sql):
- ret = self.command_executor.execute_command(parse_sql(sql))
- assert ret.error_code is None
- if ret.data is not None:
- return ret.data.to_df()
-
- def wait_predictor(self, project, name):
- # wait
- done = False
- for _ in range(200):
- ret = self.run_sql(f"select * from {project}.models where name='{name}'")
- if not ret.empty:
- if ret["STATUS"][0] == "complete":
- done = True
- break
- elif ret["STATUS"][0] == "error":
- break
- time.sleep(0.5)
- if not done:
- raise RuntimeError("predictor wasn't created")
-
- @patch("mindsdb.integrations.handlers.postgres_handler.Handler")
- def setup_method(self, method, mock_handler):
- super().setup_method()
-
- vectordatabase_name = "chroma_test"
-
- # create a vector database table
- tmp_directory = tempfile.mkdtemp()
- self.run_sql(
- f"""
- CREATE DATABASE {vectordatabase_name}
- WITH ENGINE = "chromadb",
- PARAMETERS = {{
- "persist_directory" : "{tmp_directory}"
- }}
- """
- )
-
- # mock the data
- df = pd.DataFrame(
- {
- "id": ["id1", "id2", "id3"],
- "content": ["content1", "content2", "content3"],
- "metadata": [
- '{"datasource": "web", "some_field": "some_value"}',
- '{"datasource": "web"}',
- '{"datasource": "web"}',
- ],
- "embeddings": [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
- }
- )
-
- self.save_file("df", df)
-
- # create the table
- vectordatabase_table_name = "test_table"
- sql = f"""
- CREATE TABLE chroma_test.{vectordatabase_table_name}
- (
- SELECT * FROM files.df
- )
- """
- self.run_sql(sql)
-
- # create an embedding model
- embedding_model_name = "test_dummy_embedding"
- self.run_sql(
- f"""
- CREATE MODEL {embedding_model_name}
- PREDICT embeddings
- USING
- engine='langchain_embedding',
- class = 'FakeEmbeddings',
- size = 3,
- input_columns = ['content']
- """
- )
-
- self.wait_predictor("mindsdb", embedding_model_name)
- self.vector_database_table_name = vectordatabase_table_name
- self.vector_database_name = vectordatabase_name
- self.embedding_model_name = embedding_model_name
- self.database_path = tmp_directory
-
- def teardown_method(self, method):
- # drop the vector database
- self.run_sql("DROP KNOWLEDGE_BASE IF EXISTS test_kb")
- self.run_sql(f"DROP TABLE {self.vector_database_name}.{self.vector_database_table_name}")
- self.run_sql(f"DROP DATABASE {self.vector_database_name}")
-
- def test_create_kb(self):
- # create knowledge base
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb
- USING
- MODEL = {self.embedding_model_name},
- STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- """
- self.run_sql(sql)
-
- # verify the knowledge base is created
- kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb").first()
- assert kb_obj is not None
-
- # create a knowledge base from select
- # todo this should be supported but isn't yet
-
- # sql = f"""
- # CREATE KNOWLEDGE BASE test_kb2
- # FROM (
- # SELECT content, embeddings, metadata
- # FROM {self.vector_database_name}.{self.vector_database_table_name}
- # )
- # USING
- # MODEL = {self.embedding_model_name},
- # STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- # """
- #
- # self.run_sql(sql)
- #
- # # verify the knowledge base is created
- # kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb2").first()
- # assert kb_obj is not None
-
- # create a knowledge base with invalid model and storage name should throw an exception
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb3
- USING
- MODEL = invalid_model_name,
- STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- """
- with pytest.raises(Exception):
- self.run_sql(sql)
-
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb4
- USING
- MODEL = {self.embedding_model_name},
- STORAGE = invalid_storage_name
- """
- with pytest.raises(Exception):
- self.run_sql(sql)
-
- # create a knowledge base without a storage name, default should be used
-
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb5
- USING
- MODEL = {self.embedding_model_name}
- """
-
- self.run_sql(sql)
-
- # verify the knowledge base is created
- kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb5").first()
- assert kb_obj is not None
- assert kb_obj.vector_database.name == "test_kb5_chromadb"
-
- # create a knowledge base without a model name, default should be used
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb6
- USING
- STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- """
-
- self.run_sql(sql)
-
- # verify the knowledge base is created
- kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb6").first()
- assert kb_obj is not None
- assert kb_obj.embedding_model.name == "kb_default_embedding_model"
- # clean up
- self.run_sql("DROP KNOWLEDGE BASE test_kb6")
-
- def test_drop_kb(self):
- # create a knowledge base
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb_delete
- USING
- MODEL = {self.embedding_model_name},
- STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- """
- self.run_sql(sql)
-
- # verify the knowledge base is created
- kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb_delete").first()
- assert kb_obj is not None
-
- # drop a knowledge base
- sql = """
- DROP KNOWLEDGE BASE test_kb_delete
- """
- self.run_sql(sql)
-
- # verify the knowledge base is dropped
- kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb").first()
- assert kb_obj is None
-
- def test_select_from_kb(self):
- # create the knowledge base
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb
- USING
- MODEL = {self.embedding_model_name},
- STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- """
- self.run_sql(sql)
-
- # select from the knowledge base without any filters
- sql = """
- SELECT *
- FROM test_kb
- """
- df = self.run_sql(sql)
- assert df.shape[0] == 3
-
- # select from the knowledge base with an id filter
- sql = """
- SELECT *
- FROM test_kb
- WHERE id = 'id1'
- """
- df = self.run_sql(sql)
- assert df.shape[0] == 1
-
- # select from the knowledge base with a metadata filter
- sql = """
- SELECT *
- FROM test_kb
- WHERE
- `metadata.some_field` = 'some_value'
- """
- df = self.run_sql(sql)
- assert df.shape[0] == 1
-
- # select with a search query
- sql = """
- SELECT *
- FROM test_kb
- WHERE
- content = 'some query'
- LIMIT 1
- """
- df = self.run_sql(sql)
- assert df.shape[0] == 1
-
- @pytest.mark.skip(reason="need to cleanly mock embedding model predict")
- def test_insert_into_kb(self):
- # create the knowledge base
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb
- USING
- MODEL = {self.embedding_model_name},
- STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- """
- self.run_sql(sql)
-
- # insert into the knowledge base using values
- sql = """
- INSERT INTO test_kb (id, content, embeddings, metadata)
- VALUES (
- 'id4',
- 'content4',
- '[4, 5, 6]',
- '{"d": 4}'
- )
-
- """
- self.run_sql(sql)
-
- # verify the knowledge base is updated
- sql = """
- SELECT *
- FROM test_kb
- WHERE id = 'id4'
- """
- df = self.run_sql(sql)
- assert df.shape[0] == 1
-
- # insert into the knowledge base using a select
- sql = """
- INSERT INTO test_kb
- SELECT
- content, metadata
- FROM files.df
- """
- self.run_sql(sql)
-
- # verify the knowledge base is updated
- sql = """
- SELECT *
- FROM test_kb
- """
-
- df = self.run_sql(sql)
- assert df.shape[0] == 7
-
- @pytest.mark.skip(reason="Not implemented")
- def test_update_kb(self):
- ...
-
- def test_delete_from_kb(self):
- # create the knowledge base
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb
- USING
- MODEL = {self.embedding_model_name},
- STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- """
-
- self.run_sql(sql)
-
- # delete with id filter
- sql = """
- DELETE FROM test_kb
- WHERE id = 'id1'
- """
- self.run_sql(sql)
-
- # verify the knowledge base is updated
- sql = """
- SELECT *
- FROM test_kb
- WHERE id = 'id1'
- """
- df = self.run_sql(sql)
- assert df.shape[0] == 0
-
- # delete with metadata filter
- sql = """
- DELETE FROM test_kb
- WHERE `metadata.datasource` = 'web'
- """
- self.run_sql(sql)
-
- # verify the knowledge base is updated
- sql = """
- SELECT *
- FROM test_kb
- WHERE id = 'id2'
- """
- df = self.run_sql(sql)
- assert df.shape[0] == 0
-
- # delete from the knowledge base without any filters is not allowed
- sql = """
- DELETE FROM test_kb
- """
- with pytest.raises(Exception):
- self.run_sql(sql)
-
- def test_show_knowledge_bases(self):
- # create the knowledge base
- sql = f"""
- CREATE KNOWLEDGE BASE test_kb
- USING
- MODEL = {self.embedding_model_name},
- STORAGE = {self.vector_database_name}.{self.vector_database_table_name}
- """
- self.run_sql(sql)
-
- # show knowledge bases
- sql = """
- SHOW KNOWLEDGE BASES
- """
- df = self.run_sql(sql)
- assert df.shape[0] == 1
-
- @pytest.mark.skip(reason="need to cleanly mock embedding model predict")
- def test_kb_params(self):
-
- df = pd.DataFrame([
- {'id': 1, 'ticket': 'NFLX', 'value': 532, 'created_at': '2020-01-01', 'ma': 100},
- {'id': 2, 'ticket': 'MSFT', 'value': 311, 'created_at': '2020-01-02', 'ma': 200},
- ])
-
- self.save_file('stock', df)
-
- # ---- default ----
- self.run_sql(f'create knowledge base kb_test USING MODEL = {self.embedding_model_name}')
- self.run_sql('INSERT INTO kb_test select * from files.stock')
- ret = self.run_sql("select * from kb_test where content='msft'")
- self.run_sql('drop knowledge base kb_test') # have to drop KB with model and vector sore before assertions
-
- # second row is the result, all columns in content
- content = ret.content[0]
- assert 'MSFT' in content and 'created_at' in content and '311' in content and '200' in content
-
- # metadata is empty
- assert ret.metadata[0] is None
-
- # id = 2
- assert ret.id[0] == '2'
-
- # ---- choose content ----
- self.run_sql('''
- create knowledge base kb_test
- using content_columns = ['ticket', 'value']
- ''')
- self.run_sql('INSERT INTO kb_test select * from files.stock')
- ret = self.run_sql("select * from kb_test where content='msft'")
- self.run_sql('drop knowledge base kb_test')
-
- metadata = ret.metadata[0]
- content = ret.content[0]
- # ticket and value in content
- assert 'MSFT' in content and '311' in content
- # created and ma in metadata
- assert 'created_at' in metadata and 'ma' in metadata
-
- # ---- choose metadata ----
- self.run_sql('''
- create knowledge base kb_test
- using metadata_columns = ['created_at', 'value']
- ''')
- self.run_sql('INSERT INTO kb_test select * from files.stock')
- ret = self.run_sql("select * from kb_test where content='msft'")
- self.run_sql('drop knowledge base kb_test')
-
- metadata = ret.metadata[0]
- content = ret.content[0]
- # ticket and ma in content
- assert 'MSFT' in content and '200' in content
- # created and value in metadata
- assert 'created_at' in metadata and 'value' in metadata
-
- # ---- choose id ----
- self.run_sql('''
- create knowledge base kb_test
- using id_column='ma'
- ''')
- self.run_sql('INSERT INTO kb_test select * from files.stock')
- ret = self.run_sql("select * from kb_test where content='msft'")
- self.run_sql('drop knowledge base kb_test')
-
- # id = 200
- assert ret.id[0] == '200'
diff --git a/tests/unused/unit/broken/test_map_reduce_summarizer_chain.py b/tests/unused/unit/broken/test_map_reduce_summarizer_chain.py
deleted file mode 100644
index 2953a94ad6f..00000000000
--- a/tests/unused/unit/broken/test_map_reduce_summarizer_chain.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from unittest.mock import AsyncMock, MagicMock
-
-import pandas as pd
-from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
-from langchain_core.documents import Document
-
-from mindsdb.integrations.libs.vectordatabase_handler import VectorStoreHandler
-from mindsdb.integrations.utilities.rag.chains.map_reduce_summarizer_chain import MapReduceSummarizerChain
-from mindsdb.integrations.utilities.rag.settings import SummarizationConfig
-from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
-
-
-class TestMapReduceSummarizerChain:
- def test_summarizes_documents(self):
- mock_vector_store_handler = MagicMock(spec=VectorStoreHandler, wraps=VectorStoreHandler)
- mock_vector_store_handler.select.side_effect = [
- pd.DataFrame.from_records([
- {'content': 'Chunk 1'},
- {'content': 'Chunk 2'},
- ]),
- pd.DataFrame.from_records([
- {'content': 'Chunk 3'}
- ])
- ]
- mock_map_reduce_documents_chain = AsyncMock(spec=MapReduceDocumentsChain, wraps=MapReduceDocumentsChain)
- mock_map_reduce_documents_chain.ainvoke.side_effect = [{'output_text': 'Final summary 1'}, {'output_text': 'Final summary 2'}]
- test_summarizer_chain = MapReduceSummarizerChain(
- vector_store_handler=mock_vector_store_handler,
- map_reduce_documents_chain=mock_map_reduce_documents_chain,
- summarization_config=SummarizationConfig()
- )
-
- chain_input = {
- 'context': [
- Document(page_content='Chunk 1', metadata={'original_row_id': '1'}),
- Document(page_content='Chunk 2', metadata={'original_row_id': '1'}),
- Document(page_content='Chunk 3', metadata={'original_row_id': '2'})
- ],
- 'question': 'What is the answer to life?',
- }
- actual_chain_output = test_summarizer_chain.invoke(chain_input)
-
- # Make sure we select from the vector store correctly.
- mock_vector_store_handler.select.assert_any_call(
- 'embeddings',
- columns=['content', 'metadata'],
- conditions=[FilterCondition(
- "metadata->>'original_row_id'",
- FilterOperator.EQUAL,
- '1'
- )]
- )
- mock_vector_store_handler.select.assert_any_call(
- 'embeddings',
- columns=['content', 'metadata'],
- conditions=[FilterCondition(
- "metadata->>'original_row_id'",
- FilterOperator.EQUAL,
- '2'
- )]
- )
-
- # Make sure we are calling the summarization chain with the right chunks.
- mock_map_reduce_documents_chain.ainvoke.assert_awaited()
-
- # Make sure the summary is actually added to the context.
- expected_chain_output = {
- 'context': [
- Document(page_content='Chunk 1', metadata={'original_row_id': '1', 'summary': 'Final summary 1'}),
- Document(page_content='Chunk 2', metadata={'original_row_id': '1', 'summary': 'Final summary 1'}),
- Document(page_content='Chunk 3', metadata={'original_row_id': '2', 'summary': 'Final summary 2'})
- ],
- 'question': 'What is the answer to life?',
- }
-
- assert actual_chain_output == expected_chain_output
diff --git a/tests/unused/unit/broken/test_sql_retriever.py b/tests/unused/unit/broken/test_sql_retriever.py
deleted file mode 100644
index 7734739540b..00000000000
--- a/tests/unused/unit/broken/test_sql_retriever.py
+++ /dev/null
@@ -1,313 +0,0 @@
-from unittest.mock import MagicMock
-
-import pandas as pd
-from langchain_core.documents import Document
-from langchain_core.embeddings import Embeddings
-from langchain_core.outputs.generation import Generation
-from langchain_core.outputs.llm_result import LLMResult
-from langchain_core.retrievers import BaseRetriever
-from langchain_openai.chat_models.base import ChatOpenAI
-
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-from mindsdb.integrations.libs.response import HandlerResponse
-from mindsdb.integrations.libs.vectordatabase_handler import DistanceFunction, VectorStoreHandler
-from mindsdb.integrations.utilities.rag.retrievers.sql_retriever import SQLRetriever
-from mindsdb.integrations.utilities.rag.settings import DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE, DEFAULT_SEMANTIC_PROMPT_TEMPLATE, ColumnSchema, MetadataSchema, SearchKwargs
-
-
-class TestSQLRetriever:
- def test_basic(self):
- llm = MagicMock(spec=ChatOpenAI, wraps=ChatOpenAI)
- llm_result = MagicMock(spec=LLMResult, wraps=LLMResult)
- llm_result.generations = [
- [
- Generation(
- text='''```json
-{
- "filters": [
- {
- "attribute": "ContributorName",
- "comparator": "=",
- "value": "Alfred"
- }
- ]
-}
-```'''
- )
- ]
- ]
- llm.generate_prompt.return_value = llm_result
- vector_db_mock = MagicMock(spec=VectorStoreHandler, wraps=VectorStoreHandler)
- series = pd.Series(
- [0, 'Chunk1', '[1.0, 2.0, 3.0]', {'key1': 'value1'}, 0, 1],
- index=['id', 'content', 'embeddings', 'metadata', 'Id', 'Type']
- )
- df = pd.DataFrame([series])
- vector_db_mock.native_query.return_value = HandlerResponse(
- RESPONSE_TYPE.TABLE,
- data_frame=df
- )
- embeddings_mock = MagicMock(spec=Embeddings, wraps=Embeddings)
- embeddings_mock.embed_query.return_value = list(range(768))
-
- source_schema = MetadataSchema(
- table='test_source_table',
- description='Contains source documents',
- columns=[
- ColumnSchema(name='Id', type='int', description='Unique ID as primary key of doc'),
- ColumnSchema(name='Type', type='int', description='Document Type', values={1: 'Unknown', 2: 'Site Audit'})
- ]
- )
- unit_schema = MetadataSchema(
- table='unit',
- description='Contains information about specific units of power plants. Several units can be part of a single plant.',
- columns=[
- ColumnSchema(name='UnitKey', type='int', description='Unique ID of the unit'),
- ColumnSchema(name='PlantKey', type='int', description='ID of the plant the unit belongs to')
- ]
- )
- plant_schema = MetadataSchema(
- table='plant',
- description='Contains information about specific power plants',
- columns=[
- ColumnSchema(name='PlantKey', type='int', description='The unique ID of the plant'),
- ColumnSchema(name='PlantName', type='str', description='The name of the plant')
- ]
- )
- document_unit_schema = MetadataSchema(
- table='document_unit',
- description='Links documents to the power plant they are relevant to',
- columns=[
- ColumnSchema(name='DocumentId', type='int', description='The ID of the document associated with the unit'),
- ColumnSchema(name='UnitKey', type='int', description='The ID of the unit the documnet is associated with')
- ]
- )
- all_schemas = [source_schema, unit_schema, plant_schema, document_unit_schema]
- fallback_retriever = MagicMock(spec=BaseRetriever, wraps=BaseRetriever)
- sql_retriever = SQLRetriever(
- fallback_retriever=fallback_retriever,
- vector_store_handler=vector_db_mock,
- metadata_schemas=all_schemas,
- embeddings_model=embeddings_mock,
- metadata_filters_prompt_template=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE,
- rewrite_prompt_template=DEFAULT_SEMANTIC_PROMPT_TEMPLATE,
- num_retries=2,
- embeddings_table='test_embeddings_table',
- source_table='test_source_table',
- distance_function=DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE,
- search_kwargs=SearchKwargs(k=5),
- llm=llm
- )
-
- docs = sql_retriever.invoke('What are Beaver Valley plant documents for nuclear fuel waste?')
- # Make sure right doc was retrieved.
- assert len(docs) == 1
- assert docs[0].page_content == 'Chunk1'
- assert docs[0].metadata == {'key1': 'value1'}
-
- def test_retries(self):
- llm = MagicMock(spec=ChatOpenAI, wraps=ChatOpenAI)
- llm_result = MagicMock(spec=LLMResult, wraps=LLMResult)
- llm_result.generations = [
- [
- Generation(
- text='''```json
-{
- "filters": [
- {
- "attribute": "ContributorName",
- "comparator": "=",
- "value": "Alfred"
- }
- ]
-}
-```'''
- )
- ]
- ]
- llm.generate_prompt.return_value = llm_result
- vector_db_mock = MagicMock(spec=VectorStoreHandler, wraps=VectorStoreHandler)
- series = pd.Series(
- [0, 'Chunk1', '[1.0, 2.0, 3.0]', {'key1': 'value1'}, 0, 1],
- index=['id', 'content', 'embeddings', 'metadata', 'Id', 'Type']
- )
- df = pd.DataFrame([series])
- vector_db_mock.native_query.side_effect = [
- HandlerResponse(
- RESPONSE_TYPE.ERROR,
- error_message='Something went wrong I am in absolute shambles'
- ),
- HandlerResponse(
- RESPONSE_TYPE.ERROR,
- error_message='Something went wrong I am in absolute shambles'
- ),
- HandlerResponse(
- RESPONSE_TYPE.TABLE,
- data_frame=df
- )
- ]
- embeddings_mock = MagicMock(spec=Embeddings, wraps=Embeddings)
- embeddings_mock.embed_query.return_value = list(range(768))
-
- source_schema = MetadataSchema(
- table='test_source_table',
- description='Contains source documents',
- columns=[
- ColumnSchema(name='Id', type='int', description='Unique ID as primary key of doc'),
- ColumnSchema(name='Type', type='int', description='Document Type', values={1: 'Unknown', 2: 'Site Audit'})
- ]
- )
- unit_schema = MetadataSchema(
- table='unit',
- description='Contains information about specific units of power plants. Several units can be part of a single plant.',
- columns=[
- ColumnSchema(name='UnitKey', type='int', description='Unique ID of the unit'),
- ColumnSchema(name='PlantKey', type='int', description='ID of the plant the unit belongs to')
- ]
- )
- plant_schema = MetadataSchema(
- table='plant',
- description='Contains information about specific power plants',
- columns=[
- ColumnSchema(name='PlantKey', type='int', description='The unique ID of the plant'),
- ColumnSchema(name='PlantName', type='str', description='The name of the plant')
- ]
- )
- document_unit_schema = MetadataSchema(
- table='document_unit',
- description='Links documents to the power plant they are relevant to',
- columns=[
- ColumnSchema(name='DocumentId', type='int', description='The ID of the document associated with the unit'),
- ColumnSchema(name='UnitKey', type='int', description='The ID of the unit the documnet is associated with')
- ]
- )
- all_schemas = [source_schema, unit_schema, plant_schema, document_unit_schema]
- fallback_retriever = MagicMock(spec=BaseRetriever, wraps=BaseRetriever)
- sql_retriever = SQLRetriever(
- fallback_retriever=fallback_retriever,
- vector_store_handler=vector_db_mock,
- metadata_schemas=all_schemas,
- embeddings_model=embeddings_mock,
- metadata_filters_prompt_template=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE,
- rewrite_prompt_template=DEFAULT_SEMANTIC_PROMPT_TEMPLATE,
- num_retries=3,
- embeddings_table='test_embeddings_table',
- source_table='test_source_table',
- distance_function=DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE,
- search_kwargs=SearchKwargs(k=5),
- llm=llm
- )
-
- docs = sql_retriever.invoke('What are Beaver Valley plant documents for nuclear fuel waste?')
- # Make sure we retried.
- assert len(vector_db_mock.native_query.mock_calls) == 3
- # Make sure right doc was retrieved.
- assert len(docs) == 1
- assert docs[0].page_content == 'Chunk1'
- assert docs[0].metadata == {'key1': 'value1'}
-
- def test_fallback(self):
- llm = MagicMock(spec=ChatOpenAI, wraps=ChatOpenAI)
- llm_result = MagicMock(spec=LLMResult, wraps=LLMResult)
- llm_result.generations = [
- [
- Generation(
- text='''```json
-{
- "filters": [
- {
- "attribute": "ContributorName",
- "comparator": "=",
- "value": "Alfred"
- }
- ]
-}
-```'''
- )
- ]
- ]
- llm.generate_prompt.return_value = llm_result
- vector_db_mock = MagicMock(spec=VectorStoreHandler, wraps=VectorStoreHandler)
- vector_db_mock.native_query.side_effect = [
- HandlerResponse(
- RESPONSE_TYPE.ERROR,
- error_message='Something went wrong I am in absolute shambles'
- ),
- HandlerResponse(
- RESPONSE_TYPE.ERROR,
- error_message='Something went wrong I am in absolute shambles'
- ),
- HandlerResponse(
- RESPONSE_TYPE.ERROR,
- error_message='Something went wrong I am in absolute shambles'
- ),
- ]
- embeddings_mock = MagicMock(spec=Embeddings, wraps=Embeddings)
- embeddings_mock.embed_query.return_value = list(range(768))
-
- source_schema = MetadataSchema(
- table='test_source_table',
- description='Contains source documents',
- columns=[
- ColumnSchema(name='Id', type='int', description='Unique ID as primary key of doc'),
- ColumnSchema(name='Type', type='int', description='Document Type', values={1: 'Unknown', 2: 'Site Audit'})
- ]
- )
- unit_schema = MetadataSchema(
- table='unit',
- description='Contains information about specific units of power plants. Several units can be part of a single plant.',
- columns=[
- ColumnSchema(name='UnitKey', type='int', description='Unique ID of the unit'),
- ColumnSchema(name='PlantKey', type='int', description='ID of the plant the unit belongs to')
- ]
- )
- plant_schema = MetadataSchema(
- table='plant',
- description='Contains information about specific power plants',
- columns=[
- ColumnSchema(name='PlantKey', type='int', description='The unique ID of the plant'),
- ColumnSchema(name='PlantName', type='str', description='The name of the plant')
- ]
- )
- document_unit_schema = MetadataSchema(
- table='document_unit',
- description='Links documents to the power plant they are relevant to',
- columns=[
- ColumnSchema(name='DocumentId', type='int', description='The ID of the document associated with the unit'),
- ColumnSchema(name='UnitKey', type='int', description='The ID of the unit the documnet is associated with')
- ]
- )
- all_schemas = [source_schema, unit_schema, plant_schema, document_unit_schema]
- fallback_retriever = MagicMock(spec=BaseRetriever, wraps=BaseRetriever)
- fallback_retriever._get_relevant_documents.return_value = [
- Document(
- page_content='Chunk1',
- metadata={
- 'key1': 'value1'
- }
- )
- ]
- sql_retriever = SQLRetriever(
- fallback_retriever=fallback_retriever,
- vector_store_handler=vector_db_mock,
- metadata_schemas=all_schemas,
- embeddings_model=embeddings_mock,
- metadata_filters_prompt_template=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE,
- rewrite_prompt_template=DEFAULT_SEMANTIC_PROMPT_TEMPLATE,
- num_retries=2,
- embeddings_table='test_embeddings_table',
- source_table='test_source_table',
- distance_function=DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE,
- search_kwargs=SearchKwargs(k=5),
- llm=llm
- )
-
- docs = sql_retriever.invoke('What are Beaver Valley plant documents for nuclear fuel waste?')
- # Make sure we retried.
- assert len(vector_db_mock.native_query.mock_calls) == 3
- # Make sure we falled back.
- assert len(fallback_retriever._get_relevant_documents.mock_calls) == 1
- # Make sure right doc was retrieved.
- assert len(docs) == 1
- assert docs[0].page_content == 'Chunk1'
- assert docs[0].metadata == {'key1': 'value1'}
diff --git a/tests/unused/unit/handler_tests/__init__.py b/tests/unused/unit/handler_tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/unused/unit/handler_tests/data/pgvector/seed.sql b/tests/unused/unit/handler_tests/data/pgvector/seed.sql
deleted file mode 100644
index 0b1739b555d..00000000000
--- a/tests/unused/unit/handler_tests/data/pgvector/seed.sql
+++ /dev/null
@@ -1,20 +0,0 @@
--- Make sure pgvector extension is enabled
-DROP EXTENSION IF EXISTS vector;
-CREATE EXTENSION vector;
-
--- Create the table
-CREATE TABLE items (
- id bigserial PRIMARY KEY,
- content text NOT NULL,
- embeddings vector(3) NOT NULL, -- 3 dimensions
- metadata jsonb
-);
-
--- Dummy data
-INSERT INTO items (content, embeddings, metadata) VALUES
- ('a fat cat sat on a mat and ate a fat rat', '[1, 2, 3]', '{"location": "Wonderland", "author": "Taishan"}'),
- ('a fat dog sat on a mat and ate a fat rat', '[4, 5, 6]', '{"location": "Wonderland", "author": "Taishan"}'),
- ('a thin cat sat on a mat and ate a thin rat', '[7, 8, 9]', '{"location": "Zimbabwe", "author": "Taishan"}'),
- ('a thin dog sat on a mat and ate a thin rat', '[10, 11, 12]', '{"location": "Springfield", "author": "Muhammad"}');
-
-COMMIT;
diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/auto_retriever.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/auto_retriever.yml
deleted file mode 100644
index c3dada1b80b..00000000000
--- a/tests/unused/unit/handler_tests/data/rag_pipelines/auto_retriever.yml
+++ /dev/null
@@ -1 +0,0 @@
-retriever_type: auto
diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_both.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_both.yml
deleted file mode 100644
index 0974bdc14de..00000000000
--- a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_both.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-retriever_type: multi
-multi_retriever_mode: both
-
diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_split.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_split.yml
deleted file mode 100644
index 9daf6c19f88..00000000000
--- a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_split.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-retriever_type: multi
-multi_retriever_mode: split
-
diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_summarize.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_summarize.yml
deleted file mode 100644
index 9379c365ec2..00000000000
--- a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_summarize.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-retriever_type: multi
-multi_retriever_mode: summarize
-
diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_chroma.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_chroma.yml
deleted file mode 100644
index bc35cd3e998..00000000000
--- a/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_chroma.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-retriever_type: vector_store
-vector_store_config:
- vector_store_type: chroma
diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_pgvector.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_pgvector.yml
deleted file mode 100644
index a75a9b33b71..00000000000
--- a/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_pgvector.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-retriever_type: vector_store
-vector_store_config:
- vector_store_type: pgvector
\ No newline at end of file
diff --git a/tests/unused/unit/handler_tests/test_apache_doris_handler.py b/tests/unused/unit/handler_tests/test_apache_doris_handler.py
deleted file mode 100644
index f4b1f3ac861..00000000000
--- a/tests/unused/unit/handler_tests/test_apache_doris_handler.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import pytest
-
-from mindsdb.integrations.handlers.apache_doris_handler.apache_doris_handler import ApacheDorisHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-
-HANDLER_KWARGS = {
- "connection_data": {
- "host": "127.0.0.1",
- "port": 9030,
- "user": "root",
- "password": "password",
- "database": "mindstest"
- }
-}
-
-HANDLER_NAME = 'test_doris_handler'
-
-
-@pytest.fixture(scope="module")
-def handler(request):
- handler = ApacheDorisHandler(HANDLER_NAME, **HANDLER_KWARGS)
- yield handler
-
-
-class TestMySQLHandler:
-
- def check_valid_response(self, res):
- if res.resp_type == RESPONSE_TYPE.TABLE:
- assert res.data_frame is not None, "expected to have some data, but got None"
- assert res.error_code == 0, f"expected to have zero error_code, but got {res.error_code}"
- assert res.error_message is None, f"expected to have None in error message, but got {res.error_message}"
-
- def get_table_names(self, handler):
- res = handler.get_tables()
- tables = res.data_frame
- assert tables is not None, "expected to have some tables in the db, but got None"
- assert 'table_name' in tables, f"expected to get 'table_name' column in the response:\n{tables}"
- return list(tables['table_name'])
-
- def test_connect(self, handler):
- handler.connect()
- assert handler.is_connected, "connection error"
-
- def test_check_connection(self, handler):
- res = handler.check_connection()
- assert res.success, res.error_message
-
- def test_native_query_show_dbs(self, handler):
- dbs = handler.native_query("SHOW DATABASES;")
- dbs = dbs.data_frame
- assert dbs is not None, "expected to get some data, but got None"
- assert 'Database' in dbs, f"expected to get 'Database' column in response:\n{dbs}"
- dbs = list(dbs["Database"])
- expected_db = HANDLER_KWARGS["connection_data"]["database"]
- assert expected_db in dbs, f"expected to have {expected_db} db in response: {dbs}"
-
- def test_get_tables(self, handler):
- tables = self.get_table_names(handler)
- assert "example_tbl" in tables, f"expected to have 'example_tbl' table in the db but got: {tables}"
-
- def test_describe_table(self, handler):
- described = handler.get_columns("example_tbl")
- describe_data = described.data_frame
- self.check_valid_response(described)
- got_columns = list(describe_data.iloc[:, 0])
- want_columns = ["user_id", "date", "city", "age", "sex", "last_visit_date", "cost", "max_dwell_time", "min_dwell_time"]
- assert got_columns == want_columns, f"expected to have next columns in table:\n{want_columns}\nbut got:\n{got_columns}"
-
- def test_create_table(self, handler):
- new_table = "test_mdb"
- res = handler.native_query(f"""
- CREATE TABLE IF NOT EXISTS {new_table} (test_col INT)
- DISTRIBUTED BY HASH(test_col) BUCKETS 1
- PROPERTIES (
- "replication_allocation" = "tag.location.default: 1"
- );
- """)
- self.check_valid_response(res)
- tables = self.get_table_names(handler)
- assert new_table in tables, f"expected to have {new_table} in database, but got: {tables}"
-
- def test_drop_table(self, handler):
- drop_table = "test_mdb"
- res = handler.native_query(f"DROP TABLE IF EXISTS {drop_table}")
- self.check_valid_response(res)
- tables = self.get_table_names(handler)
- assert drop_table not in tables
-
- def test_select_query(self, handler):
- limit = 3
- query = f"SELECT * FROM example_tbl LIMIT {limit}"
- res = handler.query(query)
- self.check_valid_response(res)
- got_rows = res.data_frame.shape[0]
- want_rows = limit
- assert got_rows == want_rows, f"expected to have {want_rows} rows in response but got: {got_rows}"
-
- def test_select_where_query(self, handler):
- want_rows = 5
- query = "SELECT * FROM example_tbl WHERE sex = 0"
- res = handler.query(query)
- self.check_valid_response(res)
- got_rows = res.data_frame.shape[0]
- assert got_rows == want_rows, f"expected to have {want_rows} rows in response but got: {got_rows}"
diff --git a/tests/unused/unit/handler_tests/test_aqicn_handler.py b/tests/unused/unit/handler_tests/test_aqicn_handler.py
deleted file mode 100644
index 674e4f04799..00000000000
--- a/tests/unused/unit/handler_tests/test_aqicn_handler.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import importlib
-import os
-import pytest
-from mindsdb_sql_parser import parse_sql
-
-from ..unit.executor_test_base import BaseExecutorTest
-
-try:
- importlib.import_module("requests")
- REQUESTS_INSTALLED = True
-except ImportError:
- REQUESTS_INSTALLED = False
-
-
-@pytest.mark.skipif(not REQUESTS_INSTALLED, reason="requests package is not installed")
-class TestAQICNHandler(BaseExecutorTest):
-
- def run_sql(self, sql):
- ret = self.command_executor.execute_command(parse_sql(sql))
- assert ret.error_code is None
- if ret.data is not None:
- return ret.data.to_df()
-
- def setup_method(self):
- super().setup_method()
- self.api_key = os.environ.get("AQICN_KEY")
- self.run_sql(f"""
- CREATE DATABASE mindsdb_aqicn
- WITH ENGINE = 'aqicn',
- PARAMETERS = {
- "api_key": '{self.api_key}'
- };
- """)
-
- def test_basic_select_from(self):
- sql = 'SELECT * FROM mindsdb_aqicn.air_quality_city where city="Bangalore";'
- assert self.run_sql(sql).shape[0] == 1
-
- sql = 'SELECT * FROM mindsdb_aqicn.air_quality_lat_lng where lat="12.938539" AND lng="77.5901";'
- assert self.run_sql(sql).shape[0] == 1
-
- sql = 'SELECT * FROM mindsdb_aqicn.air_quality_user_location;'
- assert self.run_sql(sql).shape[0] == 1
-
- def test_complex_select(self):
- sql = 'SELECT data.city.url, data.city.name FROM mindsdb_aqicn.air_quality_lat_lng where lat="12.938539" AND lng="77.5901";'
- assert self.run_sql(sql).shape[1] == 2
-
- sql = 'SELECT * FROM mindsdb_aqicn.air_quality_user_location LIMIT 1;'
- assert self.run_sql(sql).shape[0] == 1
diff --git a/tests/unused/unit/handler_tests/test_binance_handler.py b/tests/unused/unit/handler_tests/test_binance_handler.py
deleted file mode 100644
index 456b9d7345b..00000000000
--- a/tests/unused/unit/handler_tests/test_binance_handler.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from mindsdb.integrations.handlers.binance_handler.binance_tables import BinanceAggregatedTradesTable
-from mindsdb.integrations.handlers.binance_handler.binance_handler import BinanceHandler
-from mindsdb_sql_parser import ast
-from mindsdb_sql_parser.ast.select.star import Star
-from mindsdb_sql_parser.ast.select.identifier import Identifier
-
-from unittest.mock import Mock
-
-import pandas as pd
-import unittest
-
-
-class BinanceAggregatedTradesTableTest(unittest.TestCase):
- def test_get_columns_returns_all_columns(self):
- api_handler = Mock(BinanceHandler)
- trades_table = BinanceAggregatedTradesTable(api_handler)
- # Order matters.
- expected_columns = [
- 'symbol',
- 'open_time',
- 'open_price',
- 'high_price',
- 'low_price',
- 'close_price',
- 'volume',
- 'close_time',
- 'quote_asset_volume',
- 'number_of_trades',
- 'taker_buy_base_asset_volume',
- 'taker_buy_quote_asset_volume'
- ]
- self.assertListEqual(trades_table.get_columns(), expected_columns)
-
- def test_select_returns_all_columns(self):
- api_handler = Mock(BinanceHandler)
- api_handler.call_binance_api.return_value = pd.DataFrame([
- [
- 'symbol', # Symbol
- 1499040000000, # Kline open time
- '0.01634790', # Open price
- '0.80000000', # High price
- '0.01575800', # Low price
- '0.01577100', # Close price
- '148976.11427815', # Volume
- 1499644799999, # Kline Close time
- '2434.19055334', # Quote asset volume
- 308, # Number of trades
- '1756.87402397', # Taker buy base asset volume
- '28.46694368', # Taker buy quote asset volume
- ]
- ])
- trades_table = BinanceAggregatedTradesTable(api_handler)
-
- select_all = ast.Select(
- targets=[Star()],
- from_table='aggregated_trade_data',
- where='aggregated_trade_data.symbol = "symbol"'
- )
-
- all_trade_data = trades_table.select(select_all)
- first_trade_data = all_trade_data.iloc[0]
-
- self.assertEqual(all_trade_data.shape[1], 12)
- self.assertEqual(first_trade_data['symbol'], 'symbol')
- self.assertEqual(first_trade_data['open_time'], 1499040000000)
- self.assertEqual(first_trade_data['open_price'], '0.01634790')
- self.assertEqual(first_trade_data['high_price'], '0.80000000')
- self.assertEqual(first_trade_data['low_price'], '0.01575800')
- self.assertEqual(first_trade_data['close_price'], '0.01577100')
- self.assertEqual(first_trade_data['volume'], '148976.11427815')
- self.assertEqual(first_trade_data['close_time'], 1499644799999)
- self.assertEqual(first_trade_data['quote_asset_volume'], '2434.19055334')
- self.assertEqual(first_trade_data['number_of_trades'], 308)
- self.assertEqual(first_trade_data['taker_buy_base_asset_volume'], '1756.87402397')
- self.assertEqual(first_trade_data['taker_buy_quote_asset_volume'], '28.46694368')
-
- def test_select_returns_only_selected_columns(self):
- api_handler = Mock(BinanceHandler)
- api_handler.call_binance_api.return_value = pd.DataFrame([
- [
- 'symbol', # Symbol
- 1499040000000, # Kline open time
- '0.01634790', # Open price
- '0.80000000', # High price
- '0.01575800', # Low price
- '0.01577100', # Close price
- '148976.11427815', # Volume
- 1499644799999, # Kline Close time
- '2434.19055334', # Quote asset volume
- 308, # Number of trades
- '1756.87402397', # Taker buy base asset volume
- '28.46694368', # Taker buy quote asset volume
- ]
- ])
- trades_table = BinanceAggregatedTradesTable(api_handler)
-
- open_time_identifier = Identifier(path_str='open_time')
- close_time_identifier = Identifier(path_str='close_time')
- select_times = ast.Select(
- targets=[open_time_identifier, close_time_identifier],
- from_table='aggregated_trade_data',
- where='aggregated_trade_data.symbol = "symbol"'
- )
-
- all_trade_data = trades_table.select(select_times)
- first_trade_data = all_trade_data.iloc[0]
-
- self.assertEqual(all_trade_data.shape[1], 2)
- self.assertEqual(first_trade_data['open_time'], 1499040000000)
- self.assertEqual(first_trade_data['close_time'], 1499644799999)
diff --git a/tests/unused/unit/handler_tests/test_coinbase_handler.py b/tests/unused/unit/handler_tests/test_coinbase_handler.py
deleted file mode 100644
index 0a7b3067b82..00000000000
--- a/tests/unused/unit/handler_tests/test_coinbase_handler.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from mindsdb.integrations.handlers.coinbase_handler.coinbase_tables import CoinBaseAggregatedTradesTable
-from mindsdb.integrations.handlers.coinbase_handler.coinbase_handler import CoinBaseHandler
-from mindsdb_sql_parser import ast
-from mindsdb_sql_parser.ast.select.star import Star
-from mindsdb_sql_parser.ast.select.identifier import Identifier
-
-from unittest.mock import Mock
-
-import pandas as pd
-import unittest
-
-
-class CoinBaseAggregatedTradesTableTest(unittest.TestCase):
- def test_get_columns_returns_all_columns(self):
- api_handler = Mock(CoinBaseHandler)
- trades_table = CoinBaseAggregatedTradesTable(api_handler)
- # Order matters.
- expected_columns = [
- 'symbol',
- 'low',
- 'high',
- 'open',
- 'close',
- 'volume',
- 'timestamp',
- 'timestamp_iso'
- ]
- self.assertListEqual(trades_table.get_columns(), expected_columns)
-
- def test_select_returns_all_columns(self):
- api_handler = Mock(CoinBaseHandler)
- api_handler.call_coinbase_api.return_value = pd.DataFrame([
- [
- 'BTC-USD', # symbol
- 34330.01, # low
- 34623.21, # high
- 34493.51, # open
- 34349.16, # close
- 719.064133, # volume
- 1698710400, # timestamp
- "2023-10-30T20:00:00-04:00" # timestamp_iso
- ]
- ])
- trades_table = CoinBaseAggregatedTradesTable(api_handler)
-
- select_all = ast.Select(
- targets=[Star()],
- from_table='coinbase_candle_data',
- where='coinbase_candle_data.symbol = "BTC-USD"'
- )
-
- all_trade_data = trades_table.select(select_all)
- first_trade_data = all_trade_data.iloc[0]
-
- self.assertEqual(all_trade_data.shape[1], 8)
- self.assertEqual(first_trade_data['symbol'], 'BTC-USD')
- self.assertEqual(first_trade_data['low'], 34330.01)
- self.assertEqual(first_trade_data['high'], 34623.21)
- self.assertEqual(first_trade_data['open'], 34493.51)
- self.assertEqual(first_trade_data['close'], 34349.16)
- self.assertEqual(first_trade_data['volume'], 719.064133)
- self.assertEqual(first_trade_data['timestamp'], 1698710400)
- self.assertEqual(first_trade_data['timestamp_iso'], '2023-10-30T20:00:00-04:00')
-
- def test_select_returns_only_selected_columns(self):
- api_handler = Mock(CoinBaseHandler)
- api_handler.call_coinbase_api.return_value = pd.DataFrame([
- [
- 'BTC-USD', # symbol
- 34330.01, # low
- 34623.21, # high
- 34493.51, # open
- 34349.16, # close
- 719.064133, # volume
- 1698710400, # timestamp
- "2023-10-30T20:00:00-04:00" # timestamp_iso
- ]
- ])
- trades_table = CoinBaseAggregatedTradesTable(api_handler)
-
- open_time_identifier = Identifier(path_str='open')
- close_time_identifier = Identifier(path_str='close')
- select_times = ast.Select(
- targets=[open_time_identifier, close_time_identifier],
- from_table='coinbase_candle_data',
- where='coinbase_candle_data.symbol = "BTC-USD"'
- )
-
- all_trade_data = trades_table.select(select_times)
- first_trade_data = all_trade_data.iloc[0]
-
- self.assertEqual(all_trade_data.shape[1], 2)
- self.assertEqual(first_trade_data['open'], 34493.51)
- self.assertEqual(first_trade_data['close'], 34349.16)
diff --git a/tests/unused/unit/handler_tests/test_eventbrite_handler.py b/tests/unused/unit/handler_tests/test_eventbrite_handler.py
deleted file mode 100644
index b07e3f09cdd..00000000000
--- a/tests/unused/unit/handler_tests/test_eventbrite_handler.py
+++ /dev/null
@@ -1,405 +0,0 @@
-from mindsdb.integrations.handlers.eventbrite_handler.eventbrite_handler import (
- EventbriteHandler,
-)
-
-from mindsdb.integrations.handlers.eventbrite_handler.eventbrite_handler import (
- CategoryInfoTable,
- EventDetailsTable,
-)
-
-from mindsdb_sql_parser import ast
-from mindsdb_sql_parser.ast.select.identifier import Identifier
-
-from unittest.mock import Mock
-
-import pandas as pd
-import unittest
-
-
-class CategoryInfoTableTest(unittest.TestCase):
- def test_get_columns_returns_all_columns(self):
- api_handler = Mock(EventbriteHandler)
- trades_table = CategoryInfoTable(api_handler)
- # Order matters.
- expected_columns = [
- "resource_uri",
- "id",
- "name",
- "name_localized",
- "short_name",
- "short_name_localized",
- ]
- self.assertListEqual(trades_table.get_columns(), expected_columns)
-
- def test_select_returns_some_columns(self):
- api_handler = Mock()
- api_handler.api.list_categories.return_value = pd.DataFrame(
- {
- "locale": "en_US",
- "categories": [
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/103/",
- "id": "103",
- "name": "Music",
- "name_localized": "Music",
- "short_name": "Music",
- "short_name_localized": "Music",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/101/",
- "id": "101",
- "name": "Business & Professional",
- "name_localized": "Business & Professional",
- "short_name": "Business",
- "short_name_localized": "Business",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/110/",
- "id": "110",
- "name": "Food & Drink",
- "name_localized": "Food & Drink",
- "short_name": "Food & Drink",
- "short_name_localized": "Food & Drink",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/113/",
- "id": "113",
- "name": "Community & Culture",
- "name_localized": "Community & Culture",
- "short_name": "Community",
- "short_name_localized": "Community",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/105/",
- "id": "105",
- "name": "Performing & Visual Arts",
- "name_localized": "Performing & Visual Arts",
- "short_name": "Arts",
- "short_name_localized": "Arts",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/104/",
- "id": "104",
- "name": "Film, Media & Entertainment",
- "name_localized": "Film, Media & Entertainment",
- "short_name": "Film & Media",
- "short_name_localized": "Film & Media",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/108/",
- "id": "108",
- "name": "Sports & Fitness",
- "name_localized": "Sports & Fitness",
- "short_name": "Sports & Fitness",
- "short_name_localized": "Sports & Fitness",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/107/",
- "id": "107",
- "name": "Health & Wellness",
- "name_localized": "Health & Wellness",
- "short_name": "Health",
- "short_name_localized": "Health",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/102/",
- "id": "102",
- "name": "Science & Technology",
- "name_localized": "Science & Technology",
- "short_name": "Science & Tech",
- "short_name_localized": "Science & Tech",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/109/",
- "id": "109",
- "name": "Travel & Outdoor",
- "name_localized": "Travel & Outdoor",
- "short_name": "Travel & Outdoor",
- "short_name_localized": "Travel & Outdoor",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/111/",
- "id": "111",
- "name": "Charity & Causes",
- "name_localized": "Charity & Causes",
- "short_name": "Charity & Causes",
- "short_name_localized": "Charity & Causes",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/114/",
- "id": "114",
- "name": "Religion & Spirituality",
- "name_localized": "Religion & Spirituality",
- "short_name": "Spirituality",
- "short_name_localized": "Spirituality",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/115/",
- "id": "115",
- "name": "Family & Education",
- "name_localized": "Family & Education",
- "short_name": "Family & Education",
- "short_name_localized": "Family & Education",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/116/",
- "id": "116",
- "name": "Seasonal & Holiday",
- "name_localized": "Seasonal & Holiday",
- "short_name": "Holiday",
- "short_name_localized": "Holiday",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/112/",
- "id": "112",
- "name": "Government & Politics",
- "name_localized": "Government & Politics",
- "short_name": "Government",
- "short_name_localized": "Government",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/106/",
- "id": "106",
- "name": "Fashion & Beauty",
- "name_localized": "Fashion & Beauty",
- "short_name": "Fashion",
- "short_name_localized": "Fashion",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/117/",
- "id": "117",
- "name": "Home & Lifestyle",
- "name_localized": "Home & Lifestyle",
- "short_name": "Home & Lifestyle",
- "short_name_localized": "Home & Lifestyle",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/118/",
- "id": "118",
- "name": "Auto, Boat & Air",
- "name_localized": "Auto, Boat & Air",
- "short_name": "Auto, Boat & Air",
- "short_name_localized": "Auto, Boat & Air",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/119/",
- "id": "119",
- "name": "Hobbies & Special Interest",
- "name_localized": "Hobbies & Special Interest",
- "short_name": "Hobbies",
- "short_name_localized": "Hobbies",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/199/",
- "id": "199",
- "name": "Other",
- "name_localized": "Other",
- "short_name": "Other",
- "short_name_localized": "Other",
- },
- {
- "resource_uri": "https://www.eventbriteapi.com/v3/categories/120/",
- "id": "120",
- "name": "School Activities",
- "name_localized": "School Activities",
- "short_name": "School Activities",
- "short_name_localized": "School Activities",
- },
- ],
- }
- )
- eventbrite_table = CategoryInfoTable(api_handler)
-
- resource_uri_identifier = Identifier(path_str="resource_uri")
- id_identifier = Identifier(path_str="id")
- name_identifier = Identifier(path_str="name")
- name_localized_identifier = Identifier(path_str="name_localized")
-
- select_all = ast.Select(
- targets=[
- resource_uri_identifier,
- id_identifier,
- name_identifier,
- name_localized_identifier,
- ],
- from_table="categoryInfoTable",
- )
-
- all_category_data = eventbrite_table.select(select_all)
-
- self.assertEqual(all_category_data.shape[0], 20)
- self.assertEqual(all_category_data.shape[1], 4)
-
-
-class EventDetailsTableTest(unittest.TestCase):
- def test_get_columns_returns_all_columns(self):
- api_handler = Mock(EventbriteHandler)
- trades_table = EventDetailsTable(api_handler)
- # Order matters.
- expected_columns = [
- "name_text",
- "name_html",
- "description_text",
- "description_html",
- "url",
- "start_timezone",
- "start_local",
- "start_utc",
- "end_timezone",
- "end_local",
- "end_utc",
- "organization_id",
- "created",
- "changed",
- "published",
- "capacity",
- "capacity_is_custom",
- "status",
- "currency",
- "listed",
- "shareable",
- "online_event",
- "tx_time_limit",
- "hide_start_date",
- "hide_end_date",
- "locale",
- "is_locked",
- "privacy_setting",
- "is_series",
- "is_series_parent",
- "inventory_type",
- "is_reserved_seating",
- "show_pick_a_seat",
- "show_seatmap_thumbnail",
- "show_colors_in_seatmap_thumbnail",
- "source",
- "is_free",
- "version",
- "summary",
- "facebook_event_id",
- "logo_id",
- "organizer_id",
- "venue_id",
- "category_id",
- "subcategory_id",
- "format_id",
- "id",
- "resource_uri",
- "is_externally_ticketed",
- "logo_crop_mask",
- "logo_original",
- "logo_id",
- "logo_url",
- "logo_aspect_ratio",
- "logo_edge_color",
- "logo_edge_color_set",
- ]
- self.assertListEqual(trades_table.get_columns(), expected_columns)
-
- def test_select_returns_some_columns(self):
- api_handler = Mock()
- api_handler.api.get_event.return_value = pd.DataFrame(
- {
- "name": {
- "text": "AI Forum: Can AI Fix Climate Change?",
- "html": "AI Forum: Can AI Fix Climate Change?",
- },
- "description": {
- "text": "The third in a series of lunchtime presentations by King's researchers at Science Gallery London",
- "html": "The third in a series of lunchtime presentations by King's researchers at Science Gallery London",
- },
- "url": "https://www.eventbrite.co.uk/e/ai-forum-can-ai-fix-climate-change-tickets-717926867587",
- "start": {
- "timezone": "Europe/London",
- "local": "2023-11-01T13:15:00",
- "utc": "2023-11-01T13:15:00Z",
- },
- "end": {
- "timezone": "Europe/London",
- "local": "2023-11-01T14:00:00",
- "utc": "2023-11-01T14:00:00Z",
- },
- "organization_id": "112948679745",
- "created": "2023-09-12T15:38:12Z",
- "changed": "2023-10-11T20:01:50Z",
- "published": "2023-09-12T15:44:26Z",
- "capacity": None,
- "capacity_is_custom": None,
- "status": "live",
- "currency": "GBP",
- "listed": True,
- "shareable": True,
- "online_event": False,
- "tx_time_limit": 1200,
- "hide_start_date": False,
- "hide_end_date": False,
- "locale": "en_GB",
- "is_locked": False,
- "privacy_setting": "unlocked",
- "is_series": False,
- "is_series_parent": False,
- "inventory_type": "limited",
- "is_reserved_seating": False,
- "show_pick_a_seat": False,
- "show_seatmap_thumbnail": False,
- "show_colors_in_seatmap_thumbnail": False,
- "source": "coyote",
- "is_free": True,
- "version": None,
- "summary": "The third in a series of lunchtime presentations by King's researchers at Science Gallery London",
- "facebook_event_id": None,
- "logo_id": "596060689",
- "organizer_id": "7201076133",
- "venue_id": "173614819",
- "category_id": "102",
- "subcategory_id": None,
- "format_id": "2",
- "id": "717926867587",
- "resource_uri": "https://www.eventbriteapi.com/v3/events/717926867587/",
- "is_externally_ticketed": False,
- "logo": {
- "crop_mask": {
- "top_left": {"x": 0, "y": 0},
- "width": 2160,
- "height": 1080,
- },
- "original": {
- "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F596060689%2F112948679745%2F1%2Foriginal.20230912-154003?auto=format%2Ccompress&q=75&sharp=10&s=0bb5e81d009275e956f98c418a9a3025",
- "width": 2160,
- "height": 1080,
- },
- "id": "596060689",
- "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F596060689%2F112948679745%2F1%2Foriginal.20230912-154003?h=200&w=450&auto=format%2Ccompress&q=75&sharp=10&rect=0%2C0%2C2160%2C1080&s=282ef80475d468edff954cb243435432",
- "aspect_ratio": "2",
- "edge_color": "#d6b4be",
- "edge_color_set": True,
- },
- }
- )
- eventbrite_table = EventDetailsTable(api_handler)
-
- name_text_identifier = Identifier(path_str="name_text")
- description_text_identifier = Identifier(path_str="description_text")
- url_identifier = Identifier(path_str="url")
-
- select_all = ast.Select(
- targets=[name_text_identifier, description_text_identifier, url_identifier],
- from_table="eventDetailsTable",
- where='locationId = "717926867587"',
- )
-
- review_data = eventbrite_table.select(select_all)
- first_data = review_data.iloc[0]
-
- self.assertEqual(review_data.shape[1], 3)
- self.assertEqual(
- first_data["name_text"], "AI Forum: Can AI Fix Climate Change?"
- )
- self.assertEqual(
- first_data["description_text"],
- "The third in a series of lunchtime presentations by King's researchers at Science Gallery London",
- )
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/unused/unit/handler_tests/test_gmail_handler.py b/tests/unused/unit/handler_tests/test_gmail_handler.py
deleted file mode 100644
index 7e8fe86758d..00000000000
--- a/tests/unused/unit/handler_tests/test_gmail_handler.py
+++ /dev/null
@@ -1,240 +0,0 @@
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-from mindsdb.integrations.handlers.gmail_handler.gmail_handler import GmailHandler
-from mindsdb.integrations.handlers.gmail_handler.gmail_handler import EmailsTable
-from google.oauth2.credentials import Credentials
-from mindsdb_sql_parser import parse_sql
-import unittest
-from unittest.mock import Mock, patch
-from unittest import mock
-
-
-class GmailHandlerTest(unittest.TestCase):
- def setUp(self) -> None:
- self.credentials_file = 'test1_credentials.json'
- self.credentials_url = 's3://your-bucket/test_credentials.json'
- self.handler = GmailHandler(connection_data={
- 'credentials_file': self.credentials_file,
- 'credentials_url': self.credentials_url
- })
-
- @patch('mindsdb.integrations.handlers.gmail_handler.gmail_handler.requests.get') # Patching the requests.get method
- def test_has_creds_file_with_valid_s3_link(self, mock_get):
- # Configure the mock behavior
- mock_response = mock_get.return_value
- mock_response.status_code = 200
- mock_response.text = 'Mocked credentials file content'
-
- result = self.handler._has_creds_file(self.credentials_file)
- # Assert that the requests.get method was called with the correct URL
- mock_get.assert_called_once_with(self.credentials_url)
- # Assert that the method returns True
- self.assertTrue(result)
-
- @patch('mindsdb.integrations.handlers.gmail_handler.gmail_handler.requests.get') # Patching the requests.get method
- def test_has_creds_file_with_invalid_s3_link(self, mock_get):
- # Test when invalid S3 credentials file is provided
- mock_response = mock.Mock()
- mock_response.status_code = 404
- mock_get.return_value = mock_response
-
- # TODO this will be broken now that we don't use global loggers anymore
- with patch('mindsdb.utilities.log.logger') as mock_logger:
- result = self.handler._has_creds_file(self.credentials_file)
- # Assert that the requests.get method was called with the correct URL
- self.assertFalse(result)
- # Assert that the error message is logged
- mock_logger.error.assert_called_once_with("Failed to get credentials from S3", 404)
-
- def test_create_connection_with_mocked_token(self):
- with mock.patch('google.oauth2.credentials.Credentials.from_authorized_user_file') as mock_credentials:
- mock_credentials.return_value = Credentials('token.json')
- with mock.patch('os.path.isfile') as mock_isfile:
- mock_isfile.return_value = True
- result = self.handler.create_connection()
- self.assertIsNotNone(result)
-
- def test_create_connection_with_mocked_credentials_file(self):
- with mock.patch('google.oauth2.credentials.Credentials.from_authorized_user_file') as mock_credentials:
- mock_credentials.is_valid.return_value = False
- with mock.patch('os.path.isfile') as mock_isfile:
- mock_isfile.return_value = True
- result = self.handler.create_connection()
- self.assertIsNotNone(result)
-
- def test_create_connection_with_mocked_credentials_file_and_s3(self):
- with mock.patch('google.oauth2.credentials.Credentials.from_authorized_user_file') as mock_credentials:
- mock_credentials.is_valid.return_value = False
- with mock.patch('os.path.isfile') as mock_isfile:
- mock_isfile.return_value = True
- with mock.patch(
- 'mindsdb.integrations.handlers.gmail_handler.gmail_handler.GmailHandler._has_creds_file') \
- as mock_has_creds_file:
- mock_has_creds_file.return_value = True
- result = self.handler.create_connection()
- self.assertIsNotNone(result)
-
- def test_parse_parts_with_multipart_mime_type(self):
- email_parts = [
- {
- 'mimeType': 'multipart/mixed',
- 'parts': [
- {
- 'mimeType': 'multipart/alternative',
- 'parts': [
- {
- 'mimeType': 'text/plain',
- 'body': {
- 'data': 'VGhpcyBpcyB0aGUgcGxhaW4gdGV4dCBib2R5IG9mIHRoZSBlbWFpbC4='
- }
- },
- {
- 'mimeType': 'text/html',
- 'body': {
- 'data': 'PGh0bWw+CiAgICA8Ym9keT4KICAgICAgPHA+V'
- 'GhpcyBpcyB0aGUgSFRNTCBib2R5IG9mIHRoZSBlbWFpbC4'
- 'gPC9wPgogICAgPC9ib2R5PjwvaHRtbD4='
- }
- }
- ]
- },
- {
- 'mimeType': 'application/pdf',
- 'filename': 'example.pdf',
- 'body': {
- 'attachmentId': '<>'
- }
- }
- ]
- }
-
- ]
- attachments = []
- email_body = self.handler._parse_parts(email_parts, attachments)
- expected_body = "This is the plain text body of the email."
- expected_attachments = [
- {
- 'filename': 'example.pdf',
- 'mimeType': 'application/pdf',
- 'attachmentId': '<>'
- }
- ]
- self.assertEqual(email_body, expected_body)
- self.assertEqual(attachments, expected_attachments)
-
- def test_parse_parts_with_multipart_mime_type_and_no_parts(self):
- email_parts = [
- {
- 'mimeType': 'multipart/mixed',
- 'parts': []
- }
- ]
- attachments = []
- email_body = self.handler._parse_parts(email_parts, attachments)
- expected_body = ""
- expected_attachments = []
- self.assertEqual(email_body, expected_body)
- self.assertEqual(attachments, expected_attachments)
-
- def test_parse_parts_with_multiple_attachments(self):
- email_parts = [
- {
- 'mimeType': 'multipart/mixed',
- 'parts': [
- {
- 'mimeType': 'multipart/alternative',
- 'parts': [
- {
- 'mimeType': 'text/plain',
- 'body': {
- 'data': 'VGhpcyBpcyB0aGUgcGxhaW4gdGV4dCBib2R5IG9mIHRoZSBlbWFpbC4='
- }
- },
- {
- 'mimeType': 'text/html',
- 'body': {
- 'data': 'PGh0bWw+CiAgICA8Ym9keT4KICAgICAgPHA+'
- 'VGhpcyBpcyB0aGUgSFRNTCBib2R5IG9mIHRoZSBlbWFpb'
- 'C4gPC9wPgogICAgPC9ib2R5PjwvaHRtbD4='
- }
- }
- ]
- },
- {
- 'mimeType': 'application/pdf',
- 'filename': 'example.pdf',
- 'body': {
- 'attachmentId': '<>'
- }
- },
- {
- 'mimeType': 'application/pdf',
- 'filename': 'example2.pdf',
- 'body': {
- 'attachmentId': '<>'
- }
- }
- ]
- }
- ]
- attachments = []
- email_body = self.handler._parse_parts(email_parts, attachments)
- expected_body = "This is the plain text body of the email."
- expected_attachments = [
- {
- 'filename': 'example.pdf',
- 'mimeType': 'application/pdf',
- 'attachmentId': '<>'
- },
- {
- 'filename': 'example2.pdf',
- 'mimeType': 'application/pdf',
- 'attachmentId': '<>'
- }
- ]
- self.assertEqual(email_body, expected_body)
- self.assertEqual(attachments, expected_attachments)
-
-
-class EmailsTableTest(unittest.TestCase):
-
- def test_get_tables(self):
- handler = Mock(GmailHandler)
- tables = handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_get_columns_returns_all_columns(self):
- gmail_handler = Mock(GmailHandler)
- gmail_table = EmailsTable(gmail_handler)
- expected_columns = [
- 'id',
- 'message_id',
- 'thread_id',
- 'label_ids',
- 'sender',
- 'to',
- 'date',
- 'subject',
- 'snippet',
- 'history_id',
- 'size_estimate',
- 'body',
- 'attachments'
-
- ]
- self.assertListEqual(gmail_table.get_columns(), expected_columns)
-
- def test_delete_method(self):
- gmail_handler = Mock(GmailHandler)
- gmail_table = EmailsTable(gmail_handler)
- query = parse_sql('delete from gmail where id=1')
- gmail_table.delete(query)
- gmail_handler.call_gmail_api.assert_called_once_with('delete_message', {'id': 1})
-
- def test_update_method(self):
- gmail_handler = Mock(GmailHandler)
- gmail_table = EmailsTable(gmail_handler)
- query = parse_sql('update gmail set addLabel="test1",removeLabel = "test" where id=1')
- gmail_table.update(query)
- gmail_handler.call_gmail_api.assert_called_once_with('modify_message', {'id': 1,
- 'body': {'addLabelIds': ['test1'],
- 'removeLabelIds': ['test']}})
diff --git a/tests/unused/unit/handler_tests/test_instatus_handler.py b/tests/unused/unit/handler_tests/test_instatus_handler.py
deleted file mode 100644
index f42932f281d..00000000000
--- a/tests/unused/unit/handler_tests/test_instatus_handler.py
+++ /dev/null
@@ -1,144 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.instatus_handler.instatus_handler import InstatusHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-import pandas as pd
-import os
-
-
-class InstatusHandlerTest(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- cls.handler = InstatusHandler(name='mindsdb_instatus', connection_data={'api_key': os.environ.get('INSTATUS_API_KEY')})
-
- def setUp(self):
- self.pageId = self.handler.call_instatus_api(endpoint='/v2/pages')['id'][0]
- self.componentId = self.handler.call_instatus_api(endpoint=f'/v1/{self.pageId}/components')['id'][0]
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_call_instatus_api(self):
- self.assertIsInstance(self.handler.call_instatus_api(endpoint='/v2/pages'), pd.DataFrame)
-
- def test_2_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_3_get_columns(self):
- status_pages_columns = self.handler.get_columns(table_name='status_pages')
- components_columns = self.handler.get_columns(table_name='components')
- assert type(status_pages_columns) is not RESPONSE_TYPE.ERROR
- assert type(components_columns) is not RESPONSE_TYPE.ERROR
-
- def test_4_select_status_pages(self):
- query = '''SELECT *
- FROM mindsdb_instatus.status_pages'''
- self.assertTrue(self.handler.native_query(query))
-
- def test_5_select_status_pages_by_conditions(self):
- query = '''SELECT name, status, subdomain
- FROM mindsdb_instatus.status_pages
- WHERE id = "clo3xshsk1114842hkn377y3lrap"'''
- self.assertTrue(self.handler.native_query(query))
-
- def test_6_insert_status_pages(self):
- query = f'''INSERT INTO mindsdb_instatus.status_pages (email, name, subdomain, components, logoUrl) VALUES ('{os.environ.get('EMAIL_ID')}', 'mindsdb', 'somtirtha-roy', '["Website", "App", "API"]', 'https://instatus.com/sample.png')'''
- try:
- self.assertTrue(self.handler.native_query(query))
- except Exception as e:
- error_message = str(e)
- if "This subdomain is taken by another status page" in error_message:
- print("Subdomain is already taken. Choose a different one.")
-
- def test_7_update_status_pages(self):
- # update the row with the id obtained
- query = f'''UPDATE mindsdb_instatus.status_pages
- SET logoUrl = 'https://instatus.com/sample.png',
- faviconUrl = 'https://instatus.com/favicon-32x32.png',
- websiteUrl = 'https://instatus.com',
- language = 'en',
- useLargeHeader = true,
- brandColor = '#111',
- okColor = '#33B17E',
- disruptedColor = '#FF8C03',
- degradedColor = '#ECC94B',
- downColor = '#DC123D',
- noticeColor = '#70808F',
- unknownColor = '#DFE0E1',
- googleAnalytics = 'UA-00000000-1',
- subscribeBySms = true,
- smsService = 'twilio',
- twilioSid = 'YOUR_TWILIO_SID',
- twilioToken = 'YOUR_TWILIO_TOKEN',
- twilioSender = 'YOUR_TWILIO_SENDER',
- nexmoKey = null,
- nexmoSecret = null,
- nexmoSender = null,
- htmlInMeta = null,
- htmlAboveHeader = null,
- htmlBelowHeader = null,
- htmlAboveFooter = null,
- htmlBelowFooter = null,
- htmlBelowSummary = null,
- cssGlobal = null,
- launchDate = null,
- dateFormat = 'MMMMMM d, yyyy',
- dateFormatShort = 'MMM yyyy',
- timeFormat = 'p',
- private = false,
- useAllowList = false,
- translations = '{{
- "name": {{
- "fr": "nasa"
- }}
- }}'
- WHERE id = "{self.pageId}"'''
- self.assertTrue(self.handler.native_query(query))
-
- def test_8_select_components(self):
- query = f'''SELECT *
- FROM mindsdb_instatus.components
- WHERE page_id = '{self.pageId}';'''
- self.assertTrue(self.handler.native_query(query))
-
- def test_9_select_components_by_conditions(self):
- query = f'''SELECT *
- FROM mindsdb_instatus.components
- WHERE page_id = '{self.pageId}'
- AND component_id = '{self.componentId}';'''
- self.assertTrue(self.handler.native_query(query))
-
- def test_10_insert_components(self):
- query = f'''INSERT INTO mindsdb_instatus.components (page_id, name, description, status, order, showUptime, grouped, translations_name_in_fr, translations_desc_in_fr)
- VALUES (
- '{self.pageId}',
- 'Test component',
- 'Testing',
- 'OPERATIONAL',
- 6,
- true,
- false,
- "Composant de test",
- "En test"
- );'''
- self.assertTrue(self.handler.native_query(query))
-
- def test_11_update_components(self):
- query = f'''UPDATE mindsdb_instatus.components
- SET
- name = 'Test component 4',
- description = 'Test test test',
- status = 'OPERATIONAL',
- order = 6,
- showUptime = true,
- grouped = false,
- translations_name_in_fr = "Composant de test 4",
- translations_desc_in_fr = "Test test test"
- WHERE page_id = '{self.pageId}'
- AND component_id = '{self.componentId}';'''
- self.assertTrue(self.handler.native_query(query))
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/unused/unit/handler_tests/test_intercom_handler.py b/tests/unused/unit/handler_tests/test_intercom_handler.py
deleted file mode 100644
index cee50735d13..00000000000
--- a/tests/unused/unit/handler_tests/test_intercom_handler.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import unittest
-from mindsdb.integrations.handlers.intercom_handler.intercom_handler import IntercomHandler
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-import pandas as pd
-import os
-
-
-class InstatusHandlerTest(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- cls.handler = IntercomHandler(name="mindsdb_intercon", connection_data={'access_token': os.environ.get('INTERCOM_ACCESS_TOKEN')})
-
- def test_0_check_connection(self):
- assert self.handler.check_connection()
-
- def test_1_connect(self):
- assert self.handler.connect()
-
- def test_2_call_instatus_api(self):
- self.assertIsInstance(self.handler.call_intercom_api(endpoint='/articles'), pd.DataFrame)
-
- def test_3_get_tables(self):
- tables = self.handler.get_tables()
- assert tables.type is not RESPONSE_TYPE.ERROR
-
- def test_4_get_columns(self):
- columns = self.handler.get_columns(table_name='articles')
- assert type(columns) is not RESPONSE_TYPE.ERROR
-
- def test_5_select_articles(self):
- query = "SELECT * FROM articles"
- self.assertTrue(self.handler.native_query(query=query))
-
- def test_6_select_articles_by_condition(self):
- query = "SELECT * FROM articles WHERE id = '8553922'"
- self.assertTrue(self.handler.native_query(query=query))
-
- def test_7_insert_article(self):
- query = '''INSERT INTO myintercom.articles (title, description, body, author_id, state, parent_id, parent_type)
- VALUES ('Thanks for everything',
- 'Description of the Article',
- 'Body of the Article',
- 6840572,
- 'published',
- 6801839,
- 'collection'
- );'''
- self.assertTrue(self.handler.native_query(query=query))
-
- def test_8_update_article(self):
- df = pd.DataFrame(self.handler.call_intercom_api(endpoint='/articles', params={'page': 1, 'per_page': 1})['data'][0])
- _id = df['id'][0]
- query = f'''UPDATE myintercom.articles
- SET title = 'Christmas is here!',
- body = '